openinghourslexer.l 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
%{
/*
    SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
    SPDX-License-Identifier: LGPL-2.0-or-later
*/

#include "openinghoursparser_p.h"

9
10
#include <cstring>

Volker Krause's avatar
Volker Krause committed
11
12
#define YY_USER_ACTION yylloc->first_column = yylloc->last_column; yylloc->last_column += yyleng;

13
14
15
%}

%option warn nodefault
16
/* technically the case of all tokens is clearly defined, but reality ignores that in parts, so we do the same */
17
%option caseless
18
19
20
21
22
23
24
25
%option reentrant
%option noyywrap
%option nounput
%option never-interactive
%option bison-bridge
%option bison-locations
%option yylineno

26
SPACE       ([ \t\r\n]| | | )+
27
28
29

INTEGER     [0-9]+

Volker Krause's avatar
Volker Krause committed
30
YEAR        [1-2][019][0-9][0-9]
31
32
33
34
35

%%

{SPACE} {}

Volker Krause's avatar
Volker Krause committed
36
{YEAR} { yylval->num = std::strtol(yytext, nullptr, 10); return T_YEAR; }
37

38
;/. { return T_NORMAL_RULE_SEPARATOR; } // technically this should have space after the semicolon, but that is not always followed in OSM data
39
", " { return T_ADDITIONAL_RULE_SEPARATOR; }
40
"||" { return T_FALLBACK_SEPARATOR; } // technically this should have a space on either side, but that is not always followed in OSM data
41

42
43
44
45
"open"    { yylval->state = State::Open;    return T_STATE; }
"closed"  { yylval->state = State::Closed;  return T_STATE; }
"off"     { yylval->state = State::Off;  return T_STATE; }
"unknown" { yylval->state = State::Unknown; return T_STATE; }
46
47
48
49

"24/7" { return T_24_7; }

"+" { return T_PLUS; }
50
-|‒|–|‑|—|―|-|−|ー { return T_MINUS; }
51
52
"/" { return T_SLASH; }
":" { return T_COLON; }
53
54
55
,/. { return T_COMMA; }

[,;] {} // eat trailing commas/semicolons, while invalid those occur commonly in OSM data. Practically this is done indirectly in combination with the above rules as we cannot lookahead to EOF
56

Volker Krause's avatar
Volker Krause committed
57
58
59
60
"dawn"    { yylval->time = { Time::Dawn,    0, 0 }; return T_EVENT; }
"sunrise" { yylval->time = { Time::Sunrise, 0, 0 }; return T_EVENT; }
"sunset"  { yylval->time = { Time::Sunset , 0, 0 }; return T_EVENT; }
"dusk"    { yylval->time = { Time::Dusk,    0, 0 }; return T_EVENT; }
61
62
63
64
65
66

"[" { return T_LBRACKET; }
"]" { return T_RBRACKET; }
"(" { return T_LPAREN; }
")" { return T_RPAREN; }

67
"PH" { return T_PH; }
68
69
70
71
72
73
"SH" { return T_SH; }

" day" { return T_KEYWORD_DAY; }
" days" { return T_KEYWORD_DAY; }
"week" { return T_KEYWORD_WEEK; }
"easter" { return T_EASTER; }
74
"whitsun" { return T_WHITSUN; } // non-standard, will be turned into "easter +49 days"
75

76
77
78
79
  /* am/pm time format support, non-standard and has to appear before the generic number token. */
[0-5]?[0-9](\ ?a\.?m\.?|a) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_AM; }
[0-5]?[0-9](\ ?p\.?m\.?|p) { yylval->num = std::strtol(yytext, nullptr, 10); return T_ALT_TIME_PM; }

Volker Krause's avatar
Volker Krause committed
80
81
{INTEGER} { yylval->num = std::strtol(yytext, nullptr, 10); return T_INTEGER; }

82
 /* technically weekday names should be two letter English abbreviations, but reality is more creative */
83
84
85
86
87
88
89
90
91
92
Mondays?    { yylval->num = 1; return T_WEEKDAY; }
Tuesdays?   { yylval->num = 2; return T_WEEKDAY; }
Wednesdays? { yylval->num = 3; return T_WEEKDAY; }
Thursdays?  { yylval->num = 4; return T_WEEKDAY; }
Fridays?    { yylval->num = 5; return T_WEEKDAY; }
Saturdays?  { yylval->num = 6; return T_WEEKDAY; }
Sundays?    { yylval->num = 7; return T_WEEKDAY; }

Tues  { yylval->num = 2; return T_WEEKDAY; }
Thurs? { yylval->num = 4; return T_WEEKDAY; }
93
94
95
96
97
98
99
100
101

Mon? { yylval->num = 1; return T_WEEKDAY; }
Tue? { yylval->num = 2; return T_WEEKDAY; }
Wed? { yylval->num = 3; return T_WEEKDAY; }
Thu? { yylval->num = 4; return T_WEEKDAY; }
Fri? { yylval->num = 5; return T_WEEKDAY; }
Sat? { yylval->num = 6; return T_WEEKDAY; }
Sun? { yylval->num = 7; return T_WEEKDAY; }

102
103
104
105
106
107
108
109
Mo\. { yylval->num = 1; return T_WEEKDAY; }
Tu\. { yylval->num = 2; return T_WEEKDAY; }
We\. { yylval->num = 3; return T_WEEKDAY; }
Th\. { yylval->num = 4; return T_WEEKDAY; }
Fr\. { yylval->num = 5; return T_WEEKDAY; }
Sa\. { yylval->num = 6; return T_WEEKDAY; }
Su\. { yylval->num = 7; return T_WEEKDAY; }

Volker Krause's avatar
Volker Krause committed
110
111
112
113
114
115
116
117
118
119
120
121
122
 /* same for month names, technically those should be three letter English abbreviations */
"January" { yylval->num = 1; return T_MONTH; }
"February" { yylval->num = 2; return T_MONTH; }
"March" { yylval->num = 3; return T_MONTH; }
"April" { yylval->num = 4; return T_MONTH; }
"June" { yylval->num = 6; return T_MONTH; }
"July" { yylval->num = 7; return T_MONTH; }
"August" { yylval->num = 8; return T_MONTH; }
"September" { yylval->num = 9; return T_MONTH; }
"October" { yylval->num = 10; return T_MONTH; }
"November" { yylval->num = 11; return T_MONTH; }
"December" { yylval->num = 12; return T_MONTH; }

David Faure's avatar
David Faure committed
123
124
125
126
127
128
129
130
Jan\.? { yylval->num = 1; return T_MONTH; }
Feb\.? { yylval->num = 2; return T_MONTH; }
Mar\.? { yylval->num = 3; return T_MONTH; }
Apr\.? { yylval->num = 4; return T_MONTH; }
May\.? { yylval->num = 5; return T_MONTH; }
Jun\.? { yylval->num = 6; return T_MONTH; }
Jul\.? { yylval->num = 7; return T_MONTH; }
Aug\.? { yylval->num = 8; return T_MONTH; }
131
Sept?\.? { yylval->num = 9; return T_MONTH; }
David Faure's avatar
David Faure committed
132
133
134
Oct\.? { yylval->num = 10; return T_MONTH; }
Nov\.? { yylval->num = 11; return T_MONTH; }
Dec\.? { yylval->num = 12; return T_MONTH; }
135

David Faure's avatar
David Faure committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
 /* Month names in French */
"Janvier" { yylval->num = 1; return T_MONTH; }
"Février" { yylval->num = 2; return T_MONTH; }
"Mars" { yylval->num = 3; return T_MONTH; }
"Avril" { yylval->num = 4; return T_MONTH; }
"Mai" { yylval->num = 5; return T_MONTH; }
"Juin" { yylval->num = 6; return T_MONTH; }
"Juillet" { yylval->num = 7; return T_MONTH; }
"Août" { yylval->num = 8; return T_MONTH; }
"Septembre" { yylval->num = 9; return T_MONTH; }
"Octobre" { yylval->num = 10; return T_MONTH; }
"Novembre" { yylval->num = 11; return T_MONTH; }
"Décembre" { yylval->num = 12; return T_MONTH; }

150
151
 /* different quote types are sometimes mixed and/or used nested, so this is a compromise to catch most of them */
["][^"]*["] {
Volker Krause's avatar
Volker Krause committed
152
153
    yylval->strRef.str = yytext + 1;
    yylval->strRef.len = yyleng - 2;
154
155
    return T_COMMENT;
}
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
(“|”|„)[^(\"|“|”|„)]*(\"|“|”|„) {
    int startOffset = 1; int endOffset = 1;
    for (const auto quote : { "\"", "“", "”", "„" }) {
        const int len = std::strlen(quote);
        if (yyleng <= len) {
            continue;
        }
        if (std::strncmp(yytext, quote, len) == 0) {
            startOffset = len;
        }
        if (std::strncmp(yytext + yyleng - len, quote, len) == 0) {
            endOffset = len;
        }
    }
    yylval->strRef.str = yytext + startOffset;
    yylval->strRef.len = yyleng - startOffset - endOffset;
    return T_COMMENT;
}
174

175
176
  /* various alternative formats, none of this is remotely compliant with the specification, but appears in reality nevertheless */

177
  /* alternative time formats */
178
:|︓|ː|\. { return T_ALT_TIME_SEP; }
179
h|時 { return T_ALT_TIME_SEP_OR_SUFFIX; }
180

181
  /* alternative range separators */
David Faure's avatar
David Faure committed
182
~|~|〜|to|à|bis|a|ás|às|as|au|al|→|до { return T_ALT_RANGE_SEP; }
183

184
  /* localized state names */
185
ferm(e|é)|geschlossen|ruhetag|encerrado|chiuso { yylval->state = State::Closed;  return T_STATE; }
186
187
188

 /* German localized day names. */
Montags?     { yylval->num = 1; return T_WEEKDAY; }
189
Die(nstags?)?   { yylval->num = 2; return T_WEEKDAY; }
190
191
192
193
194
Mittwochs?   { yylval->num = 3; return T_WEEKDAY; }
Donnerstags? { yylval->num = 4; return T_WEEKDAY; }
Freitags?    { yylval->num = 5; return T_WEEKDAY; }
Samstags?    { yylval->num = 6; return T_WEEKDAY; }
Sonntags?    { yylval->num = 7; return T_WEEKDAY; }
195
Feiertage?   { return T_PH; }
196

David Faure's avatar
David Faure committed
197
  /* French, Spanish, Italian */
198
Lu { yylval->num = 1; return T_WEEKDAY; }
David Faure's avatar
David Faure committed
199
200
201
202
Ma { yylval->num = 2; return T_WEEKDAY; }
  /* French, Italian */
Me { yylval->num = 3; return T_WEEKDAY; }
  /* German, Spanish, Italian */
203
Mi { yylval->num = 3; return T_WEEKDAY; }
David Faure's avatar
David Faure committed
204
205
  /* French */
Je { yylval->num = 4; return T_WEEKDAY; }
206
207
  /* Italian */
Gi { yylval->num = 4; return T_WEEKDAY; }
David Faure's avatar
David Faure committed
208
209
210
  /* French, Italian */
Ve { yylval->num = 5; return T_WEEKDAY; }
  /* German */
211
212
So { yylval->num = 7; return T_WEEKDAY; }

David Faure's avatar
David Faure committed
213
214
215
 /* "Di" conflicts between German and French...
    "Do" conflicts between German and Spanish+Italian... */

216
  /* French localized day names. */
217
218
219
220
221
222
223
Lun(di)?    { yylval->num = 1; return T_WEEKDAY; }
Mardi       { yylval->num = 2; return T_WEEKDAY; }
Mer(credi)? { yylval->num = 3; return T_WEEKDAY; }
Jeu(di)?    { yylval->num = 4; return T_WEEKDAY; }
Ven(dredi)? { yylval->num = 5; return T_WEEKDAY; }
Sam(edi)?   { yylval->num = 6; return T_WEEKDAY; }
Dim(anche)? { yylval->num = 7; return T_WEEKDAY; }
David Faure's avatar
David Faure committed
224
"jours fériés" { return T_PH; }
225
226

  /* Spanish localized day names */
227
228
Lunes         { yylval->num = 1; return T_WEEKDAY; }
Martes        { yylval->num = 2; return T_WEEKDAY; }
229
Mi(é|e)rcoles { yylval->num = 3; return T_WEEKDAY; }
230
231
232
233
Jueves        { yylval->num = 4; return T_WEEKDAY; }
Vie(rnes)?    { yylval->num = 5; return T_WEEKDAY; }
S(á|a)b(ado)? { yylval->num = 6; return T_WEEKDAY; }
Dom(ingo)?    { yylval->num = 7; return T_WEEKDAY; }
234

David Faure's avatar
David Faure committed
235
  /* Italian localized day names */
236
237
238
239
240
241
242
Luned(ì|i)     { yylval->num = 1; return T_WEEKDAY; }
Marted(ì|i)    { yylval->num = 2; return T_WEEKDAY; }
Mercoled(ì|i)  { yylval->num = 3; return T_WEEKDAY; }
Gio(ved(ì|i))? { yylval->num = 4; return T_WEEKDAY; }
Venerd(ì|i)    { yylval->num = 5; return T_WEEKDAY; }
Sabato         { yylval->num = 6; return T_WEEKDAY; }
Domenica       { yylval->num = 7; return T_WEEKDAY; }
David Faure's avatar
David Faure committed
243

244
245
246
247
248
249
250
251
  /* Portuguese localized day names */
feira   { yylval->num = 1; return T_WEEKDAY; }
segunda { yylval->num = 1; return T_WEEKDAY; }
ter(ç|c)a   { yylval->num = 2; return T_WEEKDAY; }
quarta  { yylval->num = 3; return T_WEEKDAY; }
quinta  { yylval->num = 4; return T_WEEKDAY; }
sexta   { yylval->num = 5; return T_WEEKDAY; }

252
253
254
255
256
257
258
259
260
261
  /* Japanese localized day names */
月|月曜|月曜日 { yylval->num = 1; return T_WEEKDAY; }
火|火曜|火曜日 { yylval->num = 2; return T_WEEKDAY; }
水|水曜|水曜日 { yylval->num = 3; return T_WEEKDAY; }
木|木曜|木曜日 { yylval->num = 4; return T_WEEKDAY; }
金|金曜|金曜日 { yylval->num = 5; return T_WEEKDAY; }
土|土曜|土曜日 { yylval->num = 6; return T_WEEKDAY; }
日|日曜|日曜日 { yylval->num = 7; return T_WEEKDAY; }
祝日 { return T_PH; }

262
263
264
265
266
267
268
269
270
  /* Indonesian localized day names */
Senin   { yylval->num = 1; return T_WEEKDAY; }
Selasa  { yylval->num = 2; return T_WEEKDAY; }
Rabu    { yylval->num = 3; return T_WEEKDAY; }
Kamis   { yylval->num = 4; return T_WEEKDAY; }
Jumat   { yylval->num = 5; return T_WEEKDAY; }
Sabtu   { yylval->num = 6; return T_WEEKDAY; }
Minggu  { yylval->num = 7; return T_WEEKDAY; }

271
  /* creative rule separators */
272
、|and|et|e|y|und|& { return T_ADDITIONAL_RULE_SEPARATOR; }
273

274
  /* skip filler words */
275
from|von|du|de|le|das|分|uhr|"en continu" {}
276

277
. {
278
    //printf("unexpected character: %s at %d:%d\n", yytext, yylloc->first_line, yylloc->first_column);
279
280
281
282
    return T_INVALID;
}

%%