Members of the KDE Community are recommended to subscribe to the kde-community mailing list at https://mail.kde.org/mailman/listinfo/kde-community to allow them to participate in important discussions and receive other important announcements

Commit a631be96 authored by Volker Krause's avatar Volker Krause

Allow to specify the locale used for parsing dates

This fixes parsing e.g. English booking emails in a French locale.
parent d6274fd7
......@@ -32,9 +32,11 @@ class UnstructuredDataExtractorTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void init()
void initTestCase()
{
Q_INIT_RESOURCE(rules);
// use some exotic locale to ensure the date/time parsing doesn't just work by luck
QLocale::setDefault(QLocale(QStringLiteral("fr_FR")));
}
void testExtractText_data()
......
......@@ -71,6 +71,11 @@ QString ExtractorRule::format() const
return m_format;
}
QLocale ExtractorRule::locale() const
{
return m_locale;
}
bool ExtractorRule::load(QXmlStreamReader &reader)
{
m_name = reader.attributes().value(QLatin1String("name")).toString();
......@@ -82,6 +87,8 @@ bool ExtractorRule::load(QXmlStreamReader &reader)
if (!m_regexp.isValid()) {
qCWarning(SEMANTIC_LOG) << m_regexp.errorString() << m_regexp.pattern() << "at offset" << m_regexp.patternErrorOffset();
}
if (reader.attributes().hasAttribute(QLatin1String("locale")))
m_locale = QLocale(reader.attributes().value(QLatin1String("locale")).toString());
return true;
}
......@@ -163,7 +170,7 @@ bool ExtractorPropertyRule::match(ExtractorContext *context) const
if (res.hasMatch()) {
auto val = value(res, context);
if (type() == QLatin1String("dateTime") && !format().isEmpty()) {
const auto dt = QDateTime::fromString(val, format());
const auto dt = locale().toDateTime(val, format());
val = dt.toString(Qt::ISODate);
}
......
......@@ -20,6 +20,7 @@
#ifndef EXTRACTORRULE_H
#define EXTRACTORRULE_H
#include <QLocale>
#include <QRegularExpression>
#include <QString>
#include <QVector>
......@@ -42,6 +43,7 @@ public:
protected:
QString value(const QRegularExpressionMatch &match, ExtractorContext *context) const;
QString format() const;
QLocale locale() const;
QRegularExpression m_regexp;
......@@ -50,6 +52,7 @@ private:
QString m_type;
QString m_value;
QString m_format;
QLocale m_locale;
bool m_repeat = false;
};
......
......@@ -16,11 +16,11 @@
<class type="Airport" name="departureAirport">
<property name="name" match="DEPARTURE: (.*?) [ -]" value="${1}"/>
</class>
<property name="departureTime" match="([0-9]{2}) ([A-Z]{3}) ([0-9]{2}:[0-9]{2})" value="${1} ${2} ${year} ${3}" type="dateTime" format="dd MMM yyyy hh:mm"/>
<property name="departureTime" match="([0-9]{2}) ([A-Z]{3}) ([0-9]{2}:[0-9]{2})" value="${1} ${2} ${year} ${3}" type="dateTime" format="dd MMM yyyy hh:mm" locale="en"/>
<class type="Airport" name="arrivalAirport">
<property name="name" match="ARRIVAL: *(.*?) [ -]" value="${1}"/>
</class>
<property name="arrivalTime" match="([0-9]{2}) ([A-Z]{3}) ([0-9]{2}:[0-9]{2})" value="${1} ${2} ${year} ${3}" type="dateTime" format="dd MMM yyyy hh:mm"/>
<property name="arrivalTime" match="([0-9]{2}) ([A-Z]{3}) ([0-9]{2}:[0-9]{2})" value="${1} ${2} ${year} ${3}" type="dateTime" format="dd MMM yyyy hh:mm" locale="en"/>
</class>
</class>
</extractor>
......@@ -9,11 +9,11 @@
<class type="Airport" name="departureAirport">
<property name="name" match="([A-Z][\S ]*)\n" value="${1}"/>
</class>
<property name="departureTime" match="([0-9]{2} [A-Za-z]{3} [0-9]{4}),\s*([0-9]{2}:[0-9]{2})" value="${1} ${2}" type="dateTime" format="dd MMM yyyy hh:mm"/>
<property name="departureTime" match="([0-9]{2} [A-Za-z]{3} [0-9]{4}),\s*([0-9]{2}:[0-9]{2})" value="${1} ${2}" type="dateTime" format="dd MMM yyyy hh:mm" locale="en"/>
<class type="Airport" name="arrivalAirport">
<property name="name" match="([A-Z][\S ]*)\n" value="${1}"/>
</class>
<property name="arrivalTime" match="([0-9]{2} [A-Za-z]{3} [0-9]{4}),\s*([0-9]{2}:[0-9]{2})" value="${1} ${2}" type="dateTime" format="dd MMM yyyy hh:mm"/>
<property name="arrivalTime" match="([0-9]{2} [A-Za-z]{3} [0-9]{4}),\s*([0-9]{2}:[0-9]{2})" value="${1} ${2}" type="dateTime" format="dd MMM yyyy hh:mm" locale="en"/>
<variable name="airlineCode" match="[A-Z0-9]{2}" value="${0}"/>
<property name="flightNumber" match="[0-9]{3,4}" value="${0}"/>
......
......@@ -13,11 +13,11 @@
<property name="iataCode" value="${airlineCode}"/>
<property name="name" match="operated by (.*)\n" value="${1}"/>
</class>
<property name="departureTime" match="Departure:\s*([0-9]{2}:[0-9]{2})" value="${date} ${1}" type="dateTime" format="dd.MM.yyyy hh:mm"/>
<property name="departureTime" match="Departure:\s*([0-9]{2}:[0-9]{2})" value="${date} ${1}" type="dateTime" format="dd.MM.yyyy hh:mm" locale="en"/>
<class type="Airport" name="departureAirport">
<property name="name" match="\s*(.*)\n" value="${1}"/>
</class>
<property name="arrivalTime" match="Arrival:\s*([0-9]{2}:[0-9]{2})" value="${date} ${1}" type="dateTime" format="dd.MM.yyyy hh:mm"/>
<property name="arrivalTime" match="Arrival:\s*([0-9]{2}:[0-9]{2})" value="${date} ${1}" type="dateTime" format="dd.MM.yyyy hh:mm" locale="en"/>
<class type="Airport" name="arrivalAirport">
<property name="name" match="\s*(.*)\n" value="${1}"/>
</class>
......
......@@ -14,13 +14,13 @@
<property name="name" match=", ([A-Za-z0-9 ]*)\n" value="${1}"/>
</class>
<variable name="depDate" match="Date: *[A-Z][a-z]{2} (.*)\n" value="${1}"/>
<property name="departureTime" match="Departure: *([0-9]+:[0-9]+)" value="${depDate} ${1}" type="dateTime" format="d MMM yyyy hh:mm"/>
<property name="departureTime" match="Departure: *([0-9]+:[0-9]+)" value="${depDate} ${1}" type="dateTime" format="d MMM yyyy hh:mm" locale="en"/>
<class type="Airport" name="departureAirport">
<property name="name" match=" *(.*)\(" value="${1}"/>
<property name="iataCode" match="[A-Z]{3}" value="${0}"/>
</class>
<variable name="arrDate" match="Date: *[A-Z][a-z]{2} (.*)\n" value="${1}"/>
<property name="arrivalTime" match="Arrival: *([0-9]+:[0-9]+)" value="${arrDate} ${1}" type="dateTime" format="d MMM yyyy hh:mm"/>
<property name="arrivalTime" match="Arrival: *([0-9]+:[0-9]+)" value="${arrDate} ${1}" type="dateTime" format="d MMM yyyy hh:mm" locale="en"/>
<class type="Airport" name="arrivalAirport">
<property name="name" match=" *(.*)\(" value="${1}"/>
<property name="iataCode" match="[A-Z]{3}" value="${0}"/>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment