Commit eb816a9d authored by Volker Krause's avatar Volker Krause
Browse files

Unify the singular and plural airport name detection code paths

The only actual semantic difference is how possible IATA codes in the text
are interpreted for disambiguation. With that now being done only if the
found code is also found as a candidate from the rest of the text we don't
actually need the separation at all anymore.
parent 917bb8ab
Pipeline #77402 passed with stage
in 14 minutes and 14 seconds
......@@ -15,6 +15,8 @@
#include <cmath>
Q_DECLARE_METATYPE(KItinerary::KnowledgeDb::IataCode)
using namespace KItinerary;
using namespace KItinerary::KnowledgeDb;
......@@ -135,55 +137,57 @@ private Q_SLOTS:
QCOMPARE(tz.id(), QByteArray("Asia/Shanghai"));
}
void iataLookupTest()
void iataLookupTest_data()
{
QTest::addColumn<QString>("name");
QTest::addColumn<KnowledgeDb::IataCode>("iataCode");
// via unique fragment lookup
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Flughafen Berlin-Tegel")), KnowledgeDb::IataCode{"TXL"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("TEGEL")), KnowledgeDb::IataCode{"TXL"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Paris Charles de Gaulle")), KnowledgeDb::IataCode{"CDG"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Zürich")), KnowledgeDb::IataCode{"ZRH"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("AMSTERDAM, NL (SCHIPHOL AIRPORT)")), KnowledgeDb::IataCode{"AMS"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("London Heathrow")), KnowledgeDb::IataCode{"LHR"});
QTest::newRow("TXL1") << s("Flughafen Berlin-Tegel") << KnowledgeDb::IataCode{"TXL"};
QTest::newRow("TXL2") << s("TEGEL") << KnowledgeDb::IataCode{"TXL"};
QTest::newRow("CDG") << s("Paris Charles de Gaulle") << KnowledgeDb::IataCode{"CDG"};
QTest::newRow("ZRH") << s("Zürich") << KnowledgeDb::IataCode{"ZRH"};
QTest::newRow("AMS") << s("AMSTERDAM, NL (SCHIPHOL AIRPORT)") << KnowledgeDb::IataCode{"AMS"};
QTest::newRow("LHR") << s("London Heathrow") << KnowledgeDb::IataCode{"LHR"};
// via non-unique fragment lookup
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("John F. Kennedy International Airport")), KnowledgeDb::IataCode{"JFK"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("San Francisco International")), KnowledgeDb::IataCode{"SFO"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Düsseldorf International")), KnowledgeDb::IataCode{"DUS"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("London City")), KnowledgeDb::IataCode{"LCY"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("DETROIT, MI (METROPOLITAN WAYNE CO)")), KnowledgeDb::IataCode{"DTW"});
// not unique
QVERIFY(!KnowledgeDb::iataCodeFromName(QStringLiteral("Flughafen Berlin")).isValid());
QVERIFY(!KnowledgeDb::iataCodeFromName(QStringLiteral("Charles de Gaulle Orly")).isValid());
QVERIFY(!KnowledgeDb::iataCodeFromName(QStringLiteral("Brussels Airport, BE")).isValid());
QVERIFY(!KnowledgeDb::iataCodeFromName(QStringLiteral("Frankfurt")).isValid());
QTest::newRow("JFK") << s("John F. Kennedy International Airport") << KnowledgeDb::IataCode{"JFK"};
QTest::newRow("SFO") << s("San Francisco International") << KnowledgeDb::IataCode{"SFO"};
QTest::newRow("DUS") << s("Düsseldorf International") << KnowledgeDb::IataCode{"DUS"};
QTest::newRow("LCY") << s("London City") << KnowledgeDb::IataCode{"LCY"};
QTest::newRow("DTW") << s("DETROIT, MI (METROPOLITAN WAYNE CO)") << KnowledgeDb::IataCode{"DTW"};
// string normalization
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Sao Paulo-Guarulhos International")), KnowledgeDb::IataCode{"GRU"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("São Paulo-Guarulhos International")), KnowledgeDb::IataCode{"GRU"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Zurich")), KnowledgeDb::IataCode{"ZRH"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Dusseldorf International")), KnowledgeDb::IataCode{"DUS"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Almeria")), KnowledgeDb::IataCode{"LEI"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("ALMERÍA")), KnowledgeDb::IataCode{"LEI"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Keflavík")), KnowledgeDb::IataCode{"KEF"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Keflavik")), KnowledgeDb::IataCode{"KEF"});
QTest::newRow("GRU1") << s("Sao Paulo-Guarulhos International") << KnowledgeDb::IataCode{"GRU"};
QTest::newRow("GRU2") << s("São Paulo-Guarulhos International") << KnowledgeDb::IataCode{"GRU"};
QTest::newRow("ZRH2") << s("Zurich") << KnowledgeDb::IataCode{"ZRH"};
QTest::newRow("DUS2") << s("Dusseldorf International") << KnowledgeDb::IataCode{"DUS"};
QTest::newRow("LEI1") << s("Almeria") << KnowledgeDb::IataCode{"LEI"};
QTest::newRow("LEI2") << s("ALMERÍA") << KnowledgeDb::IataCode{"LEI"};
QTest::newRow("KEF1") << s("Keflavík") << KnowledgeDb::IataCode{"KEF"};
QTest::newRow("KEF2") << s("Keflavik") << KnowledgeDb::IataCode{"KEF"};
// alternative transliterations
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Duesseldorf International")), KnowledgeDb::IataCode{"DUS"});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Zuerich")), KnowledgeDb::IataCode{"ZRH"});
QTest::newRow("DUS3") << s("Duesseldorf International") << KnowledgeDb::IataCode{"DUS"};
QTest::newRow("ZRH3") << s("Zuerich") << KnowledgeDb::IataCode{"ZRH"};
// IATA code contained in name
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Frankfurt")), KnowledgeDb::IataCode{});
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("Frankfurt FRA")), KnowledgeDb::IataCode{"FRA"});
QTest::newRow("FRA") << s("Frankfurt FRA") << KnowledgeDb::IataCode{"FRA"};
// multiple unique hits / unique hit on valid (but wrong) IATA code
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("GIMPO INTERNATIONAL TERMINAL I - SKY CITY INTERNATIONAL TERMINAL")), KnowledgeDb::IataCode{"GMP"});
QTest::newRow("GMP") << s("GIMPO INTERNATIONAL TERMINAL I - SKY CITY INTERNATIONAL TERMINAL") << KnowledgeDb::IataCode{"GMP"};
// Amadeus/BCD airport names containing city/country data too, and using "INTL" abbreviation
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("SAN FRANCISCO CA SAN FRANCISCO INTL")), KnowledgeDb::IataCode{"SFO"});
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("BEIJING CN CAPITAL INTL")), (std::vector<IataCode>{IataCode{"PEK"}, IataCode{"PKX"}}));
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("FRANKFURT DE - FRANKFURT INTL")), KnowledgeDb::IataCode{}); // ambigious with Frankfurt Hahn
QCOMPARE(KnowledgeDb::iataCodeFromName(QStringLiteral("SEATTLE US - SEATTLE TACOMA INTL")), KnowledgeDb::IataCode{"SEA"});
QTest::newRow("SFO2") << s("SAN FRANCISCO CA SAN FRANCISCO INTL") << KnowledgeDb::IataCode{"SFO"};
QTest::newRow("SEA") << s("SEATTLE US - SEATTLE TACOMA INTL") << KnowledgeDb::IataCode{"SEA"};
}
void iataLookupTest()
{
QFETCH(QString, name);
QFETCH(KnowledgeDb::IataCode, iataCode);
QCOMPARE(KnowledgeDb::iataCodesFromName(name), (std::vector<IataCode>{iataCode}));
}
void iataCodeMultiLookupTest()
......@@ -199,8 +203,14 @@ private Q_SLOTS:
// multiple unique hits / unique hit on valid (but wrong) IATA code
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("SEOUL KR GIMPO INTERNATIONAL TERMINAL I - SKY CITY INTERNATIONAL TERMINAL")), (std::vector<IataCode>{IataCode{"GMP"}, IataCode{"ICN"}}));
// "wrong" us of "international"
// "wrong" use of "international"
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("FRANKFURT DE - FRANKFURT INTL")), (std::vector<IataCode>{IataCode{"FRA"}, IataCode{"HHN"}}));
// not unique or conflicting
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("Flughafen Berlin")), (std::vector<IataCode>{IataCode{"BER"}, IataCode{"BML"}, IataCode{"TXL"}}));
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("Charles de Gaulle Orly")), (std::vector<IataCode>{IataCode{"CDG"}, IataCode{"ORY"}}));
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("Brussels Airport, BE")), (std::vector<IataCode>{IataCode{"BRU"}, IataCode{"CRL"}}));
QCOMPARE(KnowledgeDb::iataCodesFromName(QStringLiteral("BEIJING CN CAPITAL INTL")), (std::vector<IataCode>{IataCode{"PEK"}, IataCode{"PKX"}}));
}
void countryDataTest()
......
......@@ -132,13 +132,7 @@ void FlightPostProcessor::lookupAirportCodes(const Airport &airport, std::vector
return;
}
// TODO if we don't need this elsewhere, maybe merge those two methods and do this logic internally more efficiently?
const auto code = KnowledgeDb::iataCodeFromName(airport.name());
if (code.isValid()) {
codes.push_back(code);
} else {
codes = KnowledgeDb::iataCodesFromName(airport.name());
}
codes = KnowledgeDb::iataCodesFromName(airport.name());
}
void FlightPostProcessor::pickAirportByDistance(int duration, const std::vector<KnowledgeDb::IataCode>& startCodes, std::vector<KnowledgeDb::IataCode>& codes) const
......
......@@ -224,7 +224,7 @@ std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(QStringView na
// check if the name contained the IATA code as disambiguation already
const auto code = iataCodeForIataCodeFragment(fragments);
if (code.isValid()) {
if (code.isValid() && std::find(codes.begin(), codes.end(), code) != codes.end()) {
return {code};
}
......@@ -241,44 +241,4 @@ std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(QStringView na
return codes;
}
KnowledgeDb::IataCode KnowledgeDb::iataCodeFromName(QStringView name)
{
const auto fragments = splitToFragments(name);
QStringList normalizedFragments;
normalizedFragments.reserve(fragments.size());
std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
std::vector<IataCode> codes;
iataCodeForNameFragments(normalizedFragments, codes);
if (codes.size() == 1) {
return codes[0];
}
codes.clear();
// try again, with alternative translitarations of e.g. umlauts replaced
applyTransliterations(normalizedFragments);
iataCodeForNameFragments(normalizedFragments, codes);
if (codes.size() == 1) {
return codes[0];
}
codes.clear();
// check if the name contained the IATA code as disambiguation already
const auto code = iataCodeForIataCodeFragment(fragments);
if (code.isValid()) {
return {code};
}
// attempt to cut off possibly confusing fancy terminal names
auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
if (it != normalizedFragments.end()) {
normalizedFragments.erase(it, normalizedFragments.end());
iataCodeForNameFragments(normalizedFragments, codes);
}
if (codes.size() == 1) {
return codes[0];
}
return {};
}
}
......@@ -36,8 +36,6 @@ KITINERARY_EXPORT QTimeZone timezoneForAirport(IataCode iataCode);
/** Returns the country the airport with IATA code @p iataCode is in. */
KITINERARY_EXPORT CountryId countryForAirport(IataCode iataCode);
/** Attempts to find the unique IATA code for the given airport name. */
KITINERARY_EXPORT IataCode iataCodeFromName(QStringView name);
/** Returns all possible IATA code candidates for the given airport name. */
KITINERARY_EXPORT std::vector<IataCode> iataCodesFromName(QStringView name);
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment