Commit debbe6c2 authored by Volker Krause's avatar Volker Krause
Browse files

Switch from Gares & Connexion (P3104) to SNCF station codes (P8181)

SNCF station codes a strict superset, and also cover e.g. foreign stations
not managed by Gares & Connexions, and thus is what we actually want here
to make international tickets work as well. This only recently became
available in Wikidata though.

This does not contain the actual db update yet, that will happen as part
of the removal of the explicit timezone information.
parent 2584d251
......@@ -33,5 +33,6 @@ Extracted data can be augmented by static knowledge obtained from Wikidata:
Via KItinerary::KnowledgeDb:
* Airport IATA codes, countries, timezones and geo coordinates.
* Train station countries, timezones and geo coordinates.
* Train station lookup by IBNR or Gares & Connexion IDs.
* Train station lookup by UIC, IBNR, SNCF, VR or Indian Railway station identifiers.
* Country ISO codes, driving side and used power plugs.
* Timezone and country lookup from a geo coordinate.
......@@ -142,27 +142,27 @@ private Q_SLOTS:
QCOMPARE(station.country, CountryId{"FI"});
}
void testGaresConnexionsIdLookup()
void testSncfStationIdLookup()
{
auto station = KnowledgeDb::stationForGaresConnexionsId({});
auto station = KnowledgeDb::stationForSncfStationId({});
QVERIFY(!station.coordinate.isValid());
QCOMPARE(toQTimeZone(station.timezone), QTimeZone());
station = KnowledgeDb::stationForGaresConnexionsId(GaresConnexionsId{"XXXXX"});
station = KnowledgeDb::stationForSncfStationId(SncfStationId{"XXXXX"});
QVERIFY(!station.coordinate.isValid());
QCOMPARE(toQTimeZone(station.timezone), QTimeZone());
station = KnowledgeDb::stationForGaresConnexionsId(GaresConnexionsId{"FRAES"});
station = KnowledgeDb::stationForSncfStationId(SncfStationId{"FRAES"});
QVERIFY(station.coordinate.isValid());
QCOMPARE(toQTimeZone(station.timezone), QTimeZone("Europe/Paris"));
QCOMPARE(station.country, CountryId{"FR"});
station = KnowledgeDb::stationForGaresConnexionsId(GaresConnexionsId{QStringLiteral("FRXYT")});
station = KnowledgeDb::stationForSncfStationId(SncfStationId{QStringLiteral("FRXYT")});
QVERIFY(station.coordinate.isValid());
QCOMPARE(toQTimeZone(station.timezone), QTimeZone("Europe/Paris"));
QCOMPARE(station.country, CountryId{"FR"});
station = KnowledgeDb::stationForGaresConnexionsId(GaresConnexionsId{"CHGVA"});
station = KnowledgeDb::stationForSncfStationId(SncfStationId{"CHGVA"});
QEXPECT_FAIL("", "Wikidata does not supply ids for non-French stations yet", Continue);
QVERIFY(station.coordinate.isValid());
QEXPECT_FAIL("", "Wikidata does not supply ids for non-French stations yet", Continue);
......
......@@ -87,8 +87,11 @@ class KITINERARY_EXPORT Place
/** Identifier.
* We use the following schemas currently:
* - 'uic:', UIC station code (see https://www.wikidata.org/wiki/Property:P722)
* - 'sncf:', Gares & Connexions ID, (see https://www.wikidata.org/wiki/Property:P3104), French train station identifier.
* - 'ibnr:', Internationale Bahnhofsnummer, (see https://www.wikidata.org/wiki/Property:P954), German train station identifier.
* - 'sncf:', SNCF station id (see https://www.wikidata.org/wiki/Property:P8181), French train station identifier.
* - 'ibnr:', Internationale Bahnhofsnummer (see https://www.wikidata.org/wiki/Property:P954), German train station identifier.
* - 'ir:', Indian Railways station code (see https://www.wikidata.org/wiki/Property:P5696).
* - 'vrfi:', Finish railway station codes.
* - 'benerail:', Belgian railway station codes.
* @see http://schema.org/docs/datamodel.html#identifierBg
*/
KITINERARY_PROPERTY(QString, identifier, setIdentifier)
......
......@@ -245,7 +245,7 @@ TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation sta
if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
station.setIdentifier(QString());
} else if (id.startsWith(QLatin1String("sncf:")) && id.size() == 10) {
const auto record = KnowledgeDb::stationForGaresConnexionsId(KnowledgeDb::GaresConnexionsId{id.mid(5)});
const auto record = KnowledgeDb::stationForSncfStationId(KnowledgeDb::SncfStationId{id.mid(5)});
applyStationData(record, station);
applyStationCountry(id.mid(5, 2).toUpper(), station);
} else if (id.startsWith(QLatin1String("ibnr:")) && id.size() == 12) {
......@@ -283,7 +283,7 @@ QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, cons
QTimeZone tz;
if (station.identifier().startsWith(QLatin1String("sncf:"))) {
const auto record = KnowledgeDb::stationForGaresConnexionsId(KnowledgeDb::GaresConnexionsId{station.identifier().mid(5)});
const auto record = KnowledgeDb::stationForSncfStationId(KnowledgeDb::SncfStationId{station.identifier().mid(5)});
tz = KnowledgeDb::toQTimeZone(record.timezone);
} else if (station.identifier().startsWith(QLatin1String("ibnr:"))) {
const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{station.identifier().mid(5).toUInt()});
......
......@@ -46,7 +46,7 @@ static bool operator<(const TrainStationDbGenerator::Station &lhs, const QUrl &r
bool TrainStationDbGenerator::generate(QIODevice *out)
{
// retrieve content from Wikidata
if (!fetchIBNR() || !fetchUIC() || !fetchGaresConnexions() || !fetchIndianRailwaysStationCode() || !fetchFinishStationCodes()) {
if (!fetchIBNR() || !fetchUIC() || !fetchSncf() || !fetchIndianRailwaysStationCode() || !fetchFinishStationCodes()) {
return false;
}
if (!fetchCountryInformation()) {
......@@ -70,7 +70,7 @@ namespace KnowledgeDb {
writeStationData(out);
writeIBNRMap(out);
writeUICMap(out);
writeGareConnexionMap(out);
writeSncfMap(out);
writeIndianRailwaysMap(out);
writeVRMap(out);
out->write(R"(
......@@ -164,12 +164,12 @@ bool TrainStationDbGenerator::fetchUIC()
return true;
}
bool TrainStationDbGenerator::fetchGaresConnexions()
bool TrainStationDbGenerator::fetchSncf()
{
const auto stationArray = WikiData::query(R"(
SELECT DISTINCT ?station ?stationLabel ?gareConnexionId ?coord WHERE {
SELECT DISTINCT ?station ?stationLabel ?sncfId ?coord WHERE {
?station (wdt:P31/wdt:P279*) wd:Q55488.
?station wdt:P3104 ?gareConnexionId.
?station wdt:P8181 ?sncfId.
OPTIONAL { ?station wdt:P625 ?coord. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
} ORDER BY (?station))", "wikidata_trainstation_gare_connexion.json");
......@@ -185,16 +185,16 @@ bool TrainStationDbGenerator::fetchGaresConnexions()
const auto id = stationObj.value(QLatin1String("gareConnexionId")).toObject().value(QLatin1String("value")).toString().toUpper();
if (id.size() != 5 || !Util::containsOnlyLetters(id)) {
++m_idFormatViolations;
qWarning() << "Gares & Connexions ID format violation" << id << uri;
qWarning() << "SNCF ID format violation" << id << uri;
continue;
}
const auto it = m_garesConnexionsIdMap.find(id);
if (it != m_garesConnexionsIdMap.end() && (*it).second != uri) {
const auto it = m_sncfIdMap.find(id);
if (it != m_sncfIdMap.end() && (*it).second != uri) {
++m_idConflicts;
qWarning() << "Conflict on Gares & Connexions ID" << id << uri << m_garesConnexionsIdMap[id];
qWarning() << "Conflict on SNCF ID" << id << uri << m_sncfIdMap[id];
} else {
m_garesConnexionsIdMap[id] = uri;
m_sncfIdMap[id] = uri;
}
}
......@@ -419,15 +419,15 @@ void TrainStationDbGenerator::writeUICMap(QIODevice* out)
out->write("};\n\n");
}
void TrainStationDbGenerator::writeGareConnexionMap(QIODevice *out)
void TrainStationDbGenerator::writeSncfMap(QIODevice *out)
{
out->write("static constexpr const TrainStationIdIndex<GaresConnexionsId> garesConnexionsId_table[] = {\n");
for (const auto &it : m_garesConnexionsIdMap) {
out->write("static constexpr const TrainStationIdIndex<SncfStationId> sncfStationId_table[] = {\n");
for (const auto &it : m_sncfIdMap) {
const auto station = std::lower_bound(m_stations.begin(), m_stations.end(), it.second);
if (station == m_stations.end() || (*station).uri != it.second) {
continue;
}
out->write(" { GaresConnexionsId{\"");
out->write(" { SncfStationId{\"");
out->write(it.first.toUtf8());
out->write("\"}, TrainStationIndex{");
out->write(QByteArray::number((int)std::distance(m_stations.begin(), station)));
......@@ -442,7 +442,7 @@ void TrainStationDbGenerator::writeIndianRailwaysMap(QIODevice *out)
{
// variable length identifiers, so we need a string table
std::vector<uint16_t> offsets;
offsets.reserve(m_garesConnexionsIdMap.size());
offsets.reserve(m_sncfIdMap.size());
uint16_t offset = 0;
out->write("static constexpr const char indianRailwaysSationCode_stringtable[] =\n");
......@@ -514,7 +514,7 @@ void TrainStationDbGenerator::printSummary()
qDebug() << "Generated database containing" << m_stations.size() << "train stations";
qDebug() << "IBNR index:" << m_ibnrMap.size() << "elements";
qDebug() << "UIC index:" << m_uicMap.size() << "elements";
qDebug() << "Gares & Connexions ID index:" << m_garesConnexionsIdMap.size() << "elements";
qDebug() << "SNCF station code index:" << m_sncfIdMap.size() << "elements";
qDebug() << "Indian Railwaiys station code index:" << m_indianRailwaysMap.size() << "elements";
qDebug() << "VR (Finland) station code index:" << m_vrfiMap.size() << "elements";
qDebug() << "Identifier collisions:" << m_idConflicts;
......
......@@ -52,7 +52,7 @@ public:
private:
bool fetchIBNR();
bool fetchUIC();
bool fetchGaresConnexions();
bool fetchSncf();
bool fetchIndianRailwaysStationCode();
bool fetchFinishStationCodes();
bool fetchCountryInformation();
......@@ -61,7 +61,7 @@ private:
void writeStationData(QIODevice *out);
void writeIBNRMap(QIODevice *out);
void writeUICMap(QIODevice *out);
void writeGareConnexionMap(QIODevice *out);
void writeSncfMap(QIODevice *out);
void writeIndianRailwaysMap(QIODevice *out);
void writeVRMap(QIODevice *out);
void printSummary();
......@@ -69,7 +69,7 @@ private:
std::vector<Station> m_stations;
std::map<uint32_t, QUrl> m_ibnrMap;
std::map<uint32_t, QUrl> m_uicMap;
std::map<QString, QUrl> m_garesConnexionsIdMap;
std::map<QString, QUrl> m_sncfIdMap;
std::map<QString, QUrl> m_indianRailwaysMap;
std::map<QString, QUrl> m_vrfiMap;
Timezones m_tzDb;
......
......@@ -35,7 +35,7 @@ static_assert(trainstation_table_size < (1 << (sizeof(TrainStationIndex) * 8)),
}
}
GaresConnexionsId::GaresConnexionsId(const QString& id)
SncfStationId::SncfStationId(const QString& id)
{
if (id.size() != 5) {
return;
......@@ -73,14 +73,14 @@ TrainStation KnowledgeDb::stationForUic(UICStation uic)
return trainstation_table[(*it).stationIndex.value()];
}
TrainStation KnowledgeDb::stationForGaresConnexionsId(GaresConnexionsId garesConnexionsId)
TrainStation KnowledgeDb::stationForSncfStationId(SncfStationId sncfId)
{
const auto gcIt = std::lower_bound(std::begin(garesConnexionsId_table), std::end(garesConnexionsId_table), garesConnexionsId);
if (gcIt == std::end(garesConnexionsId_table) || (*gcIt).stationId != garesConnexionsId) {
const auto it = std::lower_bound(std::begin(sncfStationId_table), std::end(sncfStationId_table), sncfId);
if (it == std::end(sncfStationId_table) || (*it).stationId != sncfId) {
return {Coordinate{}, Tz::Undefined, CountryId{}};
}
return trainstation_table[(*gcIt).stationIndex.value()];
return trainstation_table[(*it).stationIndex.value()];
}
TrainStation KnowledgeDb::stationForIndianRailwaysStationCode(const QString &code)
......
......@@ -77,19 +77,19 @@ class UICStation : public UnalignedNumber<3> {
using UnalignedNumber<3>::UnalignedNumber;
};
/** Gares & Connexion ID.
* 2 letters ISO country code, 5 letters station id, expected to be in upper case.
/** SNCF station id.
* 2 letters ISO country code, 3 letters station id, expected to be in upper case.
*/
class GaresConnexionsId : public UnalignedNumber<3>
class SncfStationId : public UnalignedNumber<3>
{
public:
inline constexpr GaresConnexionsId() = default;
inline explicit constexpr GaresConnexionsId(const char s[5])
inline constexpr SncfStationId() = default;
inline explicit constexpr SncfStationId(const char s[5])
: UnalignedNumber<3>(fromChars(s))
{
}
KITINERARY_EXPORT explicit GaresConnexionsId(const QString &id);
KITINERARY_EXPORT explicit SncfStationId(const QString &id);
private:
static inline constexpr uint32_t fromChars(const char s[5])
......@@ -128,8 +128,8 @@ KITINERARY_EXPORT TrainStation stationForIbnr(IBNR ibnr);
/** Lookup train station data by UIC station id. */
KITINERARY_EXPORT TrainStation stationForUic(UICStation uic);
/** Lookup train station data by Gares & Connexions ID. */
KITINERARY_EXPORT TrainStation stationForGaresConnexionsId(GaresConnexionsId garesConnexionsId);
/** Lookup train station data by SNCF station id. */
KITINERARY_EXPORT TrainStation stationForSncfStationId(SncfStationId sncfId);
/** Lookup train station data by Indian Railways station code. */
KITINERARY_EXPORT TrainStation stationForIndianRailwaysStationCode(const QString &code);
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment