airportdb.cpp 10.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
   Copyright (c) 2017 Volker Krause <vkrause@kde.org>

   This library is free software; you can redistribute it and/or modify it
   under the terms of the GNU Library General Public License as published by
   the Free Software Foundation; either version 2 of the License, or (at your
   option) any later version.

   This library is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
   License for more details.

   You should have received a copy of the GNU Library General Public License
   along with this library; see the file COPYING.LIB.  If not, write to the
   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
   02110-1301, USA.
*/

#include "airportdb.h"
#include "airportdb_p.h"
22
#include "airportdb_data.cpp"
23
#include "stringutil.h"
24
#include "timezonedb.h"
25
26
27
28
29
30
31
32

#include <QDebug>
#include <QRegularExpression>
#include <QTimeZone>

#include <algorithm>
#include <cstring>

33
namespace KItinerary {
34
namespace KnowledgeDb {
35

36
37
38
39
static_assert(alignof(Airport) <= sizeof(Airport), "Airport struct alignment too big!");

static constexpr auto airport_table_size = sizeof(airport_table) / sizeof(Airport);
static_assert(airport_table_size == sizeof(coordinate_table) / sizeof(KnowledgeDb::Coordinate), "Airport coordinate table size mismatch!");
40

41
static bool operator<(const Airport &lhs, IataCode rhs)
42
{
43
    return lhs.iataCode < rhs;
44
45
}

46
Coordinate coordinateForAirport(IataCode iataCode)
47
{
48
49
50
    const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
    if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
        return {};
Laurent Montel's avatar
Laurent Montel committed
51
    }
52

53
    return coordinate_table[std::distance(std::begin(airport_table), it)];
54
55
}

56
QTimeZone timezoneForAirport(IataCode iataCode)
57
{
58
59
60
    const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
    if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
        return {};
Laurent Montel's avatar
Laurent Montel committed
61
    }
62

63
    return KnowledgeDb::toQTimeZone((*it).timezone);
64
65
}

66
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
67
{
68
69
70
    const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
    if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
        return {};
Laurent Montel's avatar
Laurent Montel committed
71
    }
72
73

    return (*it).country;
74
75
}

76
77
78
79
80
81
82
83
84
static QString normalizeFragment(const QString &s)
{
    auto res = StringUtil::normalize(s);
    // resolve abbreviations
    if (res == QLatin1String("intl")) return QStringLiteral("international");

    return res;
}

85
86
87
88
89
90
91
92
93
94
95
96
static void applyTransliterations(QStringList &fragments)
{
    // note that the output has the corresponding diacritic markers already stripped,
    // as StringUtil::normalize has already been applied to fragments
    // similarly, the input is already case-folded
    for (auto &fragment : fragments) {
        fragment.replace(QLatin1String("ae"), QLatin1String("a"));
        fragment.replace(QLatin1String("oe"), QLatin1String("o"));
        fragment.replace(QLatin1String("ue"), QLatin1String("u"));
    }
}

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
static IataCode iataCodeForUniqueFragment(const QString &s)
{
    const auto it = std::lower_bound(std::begin(name1_string_index), std::end(name1_string_index), s.toUtf8(), [](const Name1Index &lhs, const QByteArray &rhs) {
        const auto cmp = strncmp(name1_string_table + lhs.offset(), rhs.constData(), std::min<int>(lhs.length, rhs.size()));
        if (cmp == 0) {
            return lhs.length < rhs.size();
        }
        return cmp < 0;
    });
    if (it == std::end(name1_string_index) || it->length != s.toUtf8().size() || strncmp(name1_string_table + it->offset(), s.toUtf8().constData(), it->length) != 0) {
        return {};
    }
    return airport_table[it->iataIndex].iataCode;
}

112
static void iataCodeForUniqueFragment(const QStringList &fragments, std::vector<IataCode> &codes)
113
{
114
    for (const auto &s : fragments) {
115
116
        const auto foundCode = iataCodeForUniqueFragment(s);
        if (!foundCode.isValid()) {
117
            continue;
Laurent Montel's avatar
Laurent Montel committed
118
        }
119

120
121
122
        auto it = std::lower_bound(codes.begin(), codes.end(), foundCode);
        if (it == codes.end() || (*it) != foundCode) {
            codes.insert(it, foundCode);
Laurent Montel's avatar
Laurent Montel committed
123
        }
124
    }
125
126
}

127
static void iataCodeForNonUniqueFragments(const QStringList &fragments, std::vector<IataCode> &codes)
128
{
129
    // we didn't find a unique name fragment, try the non-unique index
130
131
    QSet<uint16_t> iataIdxs;
    for (const auto &s : fragments) {
132
        const auto it = std::lower_bound(std::begin(nameN_string_index), std::end(nameN_string_index), s.toUtf8(), [](const NameNIndex &lhs, const QByteArray &rhs) {
Laurent Montel's avatar
Laurent Montel committed
133
134
135
136
137
138
                const auto cmp = strncmp(nameN_string_table + lhs.strOffset, rhs.constData(), std::min<int>(lhs.strLength, rhs.size()));
                if (cmp == 0) {
                    return lhs.strLength < rhs.size();
                }
                return cmp < 0;
            });
139
        if (it == std::end(nameN_string_index) || it->strLength != s.toUtf8().size() || strncmp(nameN_string_table + it->strOffset, s.toUtf8().constData(), it->strLength) != 0) {
140
141
142
            continue;
        }

143
        // TODO we can do this in-place in codes
144
145
146
147
148
149
150
151
152
153
        QSet<uint16_t> candidates;
        candidates.reserve(it->iataCount);
        for (auto i = 0; i < it->iataCount; ++i) {
            candidates.insert(nameN_iata_table[it->iataOffset + i]);
        }
        if (iataIdxs.isEmpty()) { // first round
            iataIdxs = candidates;
            continue;
        }

154
155
156
157
158
        // ignore the imprecisely used "international" if it results in an empty set here
        if (s == QLatin1String("international") && !iataIdxs.intersects(candidates)) {
            continue;
        }

159
160
161
162
163
164
        iataIdxs &= candidates;
        if (iataIdxs.isEmpty()) {
            break;
        }
    }

165
166
    std::transform(iataIdxs.begin(), iataIdxs.end(), std::back_inserter(codes), [](const auto idx) { return airport_table[idx].iataCode; });
    std::sort(codes.begin(), codes.end());
167
}
168

169
170
171
172
173
174
175
static IataCode iataCodeForIataCodeFragment(const QStringList &fragments)
{
    IataCode code;
    for (const auto &s : fragments) {
        if (s.size() != 3) {
            continue;
        }
Volker Krause's avatar
Volker Krause committed
176
        if (!std::all_of(s.begin(), s.end(), [](const auto c) { return c.isUpper(); })) {
177
178
179
180
181
182
183
184
185
186
            continue;
        }
        const IataCode searchCode{s};
        if (code.isValid() && searchCode != code) {
            return {};
        }
        const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), searchCode);
        if (it != std::end(airport_table) && (*it).iataCode == searchCode) {
            code = searchCode;
        }
187
        // check that this is only a IATA code, not also a (conflicting) name fragment
188
        const auto uniqueFragmentCode = iataCodeForUniqueFragment(normalizeFragment(s));
189
190
191
        if (uniqueFragmentCode.isValid() && code.isValid() && uniqueFragmentCode != code) {
            return {};
        }
192
193
194
195
    }
    return code;
}

196
static void iataCodeForNameFragments(const QStringList &fragments, std::vector<IataCode> &codes)
197
{
198
199
200
    iataCodeForUniqueFragment(fragments, codes);
    if (!codes.empty()) {
        return;
201
    }
202
203
204
205
206
    iataCodeForNonUniqueFragments(fragments, codes);
}

static QStringList splitToFragments(const QString &name)
{
207
208
209
210
211
212
    return name.split(QRegularExpression(QStringLiteral("[ 0-9/'\"\\(\\)&\\,.–„-]")),
#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
                      QString::SkipEmptyParts);
#else
                      Qt::SkipEmptyParts);
#endif
213
214
}

215
216
}

217
std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(const QString &name)
218
{
219
    const auto fragments = splitToFragments(name);
220
221
    QStringList normalizedFragments;
    normalizedFragments.reserve(fragments.size());
222
    std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
223

224
    std::vector<IataCode> codes, candidates;
225
    iataCodeForNameFragments(normalizedFragments, codes);
226
227

    // try again, with alternative translitarations of e.g. umlauts replaced
228
    applyTransliterations(normalizedFragments);
229
230
231
    iataCodeForNameFragments(normalizedFragments, candidates);
    if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
        codes = std::move(candidates);
232
233
234
    }

    // check if the name contained the IATA code as disambiguation already
235
    const auto code = iataCodeForIataCodeFragment(fragments);
236
    if (code.isValid()) {
237
        return {code};
238
239
240
241
242
243
    }

    // attempt to cut off possibly confusing fancy terminal names
    auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
    if (it != normalizedFragments.end()) {
        normalizedFragments.erase(it, normalizedFragments.end());
244
245
246
247
248
        candidates.clear();
        iataCodeForNameFragments(normalizedFragments, candidates);
        if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
            codes = std::move(candidates);
        }
249
    }
250
    return codes;
251
}
252

253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
KnowledgeDb::IataCode KnowledgeDb::iataCodeFromName(const QString &name)
{
    const auto fragments = splitToFragments(name);
    QStringList normalizedFragments;
    normalizedFragments.reserve(fragments.size());
    std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });

    std::vector<IataCode> codes;
    iataCodeForNameFragments(normalizedFragments, codes);
    if (codes.size() == 1) {
        return codes[0];
    }
    codes.clear();

    // try again, with alternative translitarations of e.g. umlauts replaced
    applyTransliterations(normalizedFragments);
    iataCodeForNameFragments(normalizedFragments, codes);
    if (codes.size() == 1) {
        return codes[0];
    }
    codes.clear();

    // check if the name contained the IATA code as disambiguation already
    const auto code = iataCodeForIataCodeFragment(fragments);
    if (code.isValid()) {
        return {code};
    }

    // attempt to cut off possibly confusing fancy terminal names
    auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
    if (it != normalizedFragments.end()) {
        normalizedFragments.erase(it, normalizedFragments.end());
        iataCodeForNameFragments(normalizedFragments, codes);
    }
    if (codes.size() == 1) {
        return codes[0];
    }
    return {};
291
}
292

293
}