Commit 02483128 authored by Volker Krause's avatar Volker Krause
Browse files

Support 4 digit times to the extend possible

The problem with 4 digit times is the ambiguity with year numbers. To solve
this we now assume anything that would be a valid time outside of the
[2001-2099] range to be a time. That leaves all practically relevant years
valid and still covers the vast majority of 4 digit times found in the OSM
corpus.

This is worth it given how common the 4 digit time mistake is in OSM data.

BUG: 446137
parent 33b00e21
Pipeline #105573 passed with stage
in 1 minute and 26 seconds
......@@ -76,7 +76,7 @@ private Q_SLOTS:
QTest::newRow("year open end") << QByteArray("2010+") << QDateTime({2010, 1, 1}, {0, 0}) << QDateTime();
QTest::newRow("year interval odd") << QByteArray("2011-2031/2") << QDateTime({2021, 1, 1}, {0, 0}) << QDateTime({2022, 1, 1}, {0, 0});
QTest::newRow("year interval even") << QByteArray("2010-2030/2") << QDateTime({2020, 1, 1}, {0, 0}) << QDateTime({2021, 1, 1}, {0, 0});
QTest::newRow("year interval 4") << QByteArray("2000-2100/4") << QDateTime({2020, 1, 1}, {0, 0}) << QDateTime({2021, 1, 1}, {0, 0});
QTest::newRow("year interval 4") << QByteArray("2004-2096/4") << QDateTime({2020, 1, 1}, {0, 0}) << QDateTime({2021, 1, 1}, {0, 0});
QTest::newRow("year time") << QByteArray("2021 10:00-20:00") << QDateTime({2021, 1, 1}, {10, 0}) << QDateTime({2021, 1, 1}, {20, 0});
QTest::newRow("year open end interval") << QByteArray("2016/5") << QDateTime({2021, 1, 1}, {0, 0}) << QDateTime({2022, 1, 1}, {0, 0});
......@@ -195,7 +195,7 @@ private Q_SLOTS:
void testNoMatch_data()
{
QTest::addColumn<QByteArray>("expression");
QTest::newRow("year range") << QByteArray("1980-2000");
QTest::newRow("year range") << QByteArray("1980-1999");
QTest::newRow("full date") << QByteArray("2020 Nov 6");
QTest::newRow("date range") << QByteArray("1980 Jan 1-2020 Nov 6");
QTest::newRow("year/month") << QByteArray("2020 Oct");
......
......@@ -363,6 +363,11 @@ private Q_SLOTS:
T2("Friday and Saturday 24/7 Sunday-Thursday 4:00 am to 12:00 am", "Fr,Sa 00:00-24:00; Su-Th 04:00-24:00"); // bug 445962
// 24/7 as standalone small range selector not supported yet
//T2("Apr-Oct: 24/7; Nov-Mar: Mo-Su 06:00-22:00", "Apr-Oct: 00:00-24:00; Nov-Mar: Mo-Su 06:00-22:00");
// 4 digit times
T2("0600-1800", "06:00-18:00");
T2("0700-2000", "07:00-20:00");
T2("Tu-Th 8:30-17:30, Fr 8:30-1700", "Tu-Th 08:30-17:30, Fr 08:30-17:00");
#undef T
#undef T2
#undef T3
......@@ -417,7 +422,6 @@ private Q_SLOTS:
// from https://wiki.openstreetmap.org/wiki/Key:opening_hours#Common_mistakes
T("7/8-23");
T("0600-1800");
T("07;00-2;00pm");
T("08.00-16.00, public room till 03.00 a.m");
T("09:00-21:00 TEL/072(360)3200");
......
......@@ -25,16 +25,24 @@
SPACE ([ \t\r\n]| | | )+
INTEGER [0-9]+
CYRILLIC (а|б|в|г|д|е|ё|ж|з|и|й|к|л|м|н|о|п|р|с|т|у|ф|х|ц|ч|ш|щ|ъ|ы|ь|э|ю|я)
YEAR [1-2][019][0-9][0-9]
%%
{SPACE} {}
{YEAR} { yylval->num = std::strtol(yytext, nullptr, 10); return T_YEAR; }
[0-9]+ {
yylval->num = std::strtol(yytext, nullptr, 10);
if (yyleng == 4) {
if ((yylval->num > 2000 && yylval->num < 2100) || (yylval->num >= 1000 && (yylval->num % 100) >= 60)) {
return T_YEAR;
}
if (yylval->num <= 2400 && (yylval->num % 100) < 60) {
return T_4DIGIT_TIME;
}
}
return T_INTEGER;
}
;/. { return T_NORMAL_RULE_SEPARATOR; } // technically this should have space after the semicolon, but that is not always followed in OSM data
", " { return T_ADDITIONAL_RULE_SEPARATOR; }
......@@ -78,8 +86,6 @@ YEAR [1-2][019][0-9][0-9]
[0-5]?[0-9](\ ?a\.?m\.?|a) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_AM; }
[0-5]?[0-9](\ ?p\.?m\.?|p) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_PM; }
{INTEGER} { yylval->num = std::strtol(yytext, nullptr, 10); return T_INTEGER; }
/* technically weekday names should be two letter English abbreviations, but reality is more creative */
Mondays? { yylval->num = 1; return T_WEEKDAY; }
Tuesdays? { yylval->num = 2; return T_WEEKDAY; }
......
......@@ -141,6 +141,7 @@ typedef void* yyscan_t;
%token T_ALT_TIME_SEP_OR_SUFFIX
%token <num> T_ALT_TIME_AM
%token <num> T_ALT_TIME_PM
%token <num> T_4DIGIT_TIME
%token T_ALT_RANGE_SEP
......@@ -857,6 +858,9 @@ ExtendedHourMinute:
Time::convertFromPm($$);
if (!Time::isValid($$)) { YYABORT; }
}
| T_4DIGIT_TIME[T] {
$$ = { Time::NoEvent, $T / 100, $T % 100 }; // lexer ensures this is always a valid time
}
;
RangeSeparator:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment