Commit 1193c875 authored by Volker Krause's avatar Volker Krause
Browse files

Remove the remaining parts of the extractor engine

parent 6f4136ae
......@@ -14,7 +14,6 @@ ecm_add_test(airportdbtest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorresulttest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractordocumentnodetest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorfiltertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorinputtest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorrepositorytest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorscriptenginetest.cpp extractorscriptenginetest.qrc TEST_NAME extractorscriptenginetest LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(berdecodertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
......
/*
SPDX-FileCopyrightText: 2019 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#include <KItinerary/ExtractorInput>
#include <QDebug>
#include <QObject>
#include <QTest>
#include <QVariant>
using namespace KItinerary;
class ExtractorInputTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void testTypeFromContent_data()
{
QTest::addColumn<QByteArray>("data");
QTest::addColumn<ExtractorInput::Type>("type");
QTest::newRow("empty") << QByteArray() << ExtractorInput::Unknown;
QTest::newRow("html") << QByteArray("<html>") << ExtractorInput::Html;
QTest::newRow("html padded") << QByteArray(" <!DOCTYPE html>") << ExtractorInput::Html;
QTest::newRow("json") << QByteArray(R"({"@type": "Foo"})") << ExtractorInput::JsonLd;
QTest::newRow("json padded") << QByteArray(" []") << ExtractorInput::JsonLd;
QTest::newRow("pkpass") << QByteArray("PK\x03\x04") << ExtractorInput::PkPass;
QTest::newRow("pdf") << QByteArray("%PDF") << ExtractorInput::Pdf;
QTest::newRow("text") << QByteArray("abc def") << ExtractorInput::Unknown;
QTest::newRow("ical") << QByteArray("BEGIN:VCALENDAR\nEND:VCALENDAR") << ExtractorInput::ICal;
QTest::newRow("email") << QByteArray("From: null@kde.org\nTo: foo@localhost\n\n") << ExtractorInput::Email;
QTest::newRow("mbox") << QByteArray("From null@kde.org Mon Jan 01 12:34:56 1970\n") << ExtractorInput::Email;
}
void testTypeFromContent()
{
QFETCH(QByteArray, data);
QFETCH(ExtractorInput::Type, type);
QCOMPARE(ExtractorInput::typeFromContent(data), type);
}
void testTypeFromMimeType_data()
{
QTest::addColumn<QString>("mimeType");
QTest::addColumn<ExtractorInput::Type>("type");
QTest::newRow("empty") << QString() << ExtractorInput::Unknown;
QTest::newRow("html") << QStringLiteral("text/html") << ExtractorInput::Html;
QTest::newRow("json") << QStringLiteral("application/json") << ExtractorInput::JsonLd;
QTest::newRow("json+ld") << QStringLiteral("application/ld+json") << ExtractorInput::JsonLd;
QTest::newRow("pkpass") << QStringLiteral("application/vnd.apple.pkpass") << ExtractorInput::PkPass;
QTest::newRow("pdf") << QStringLiteral("application/pdf") << ExtractorInput::Pdf;
QTest::newRow("text") << QStringLiteral("text/plain") << ExtractorInput::Text;
QTest::newRow("ical") << QStringLiteral("text/calendar") << ExtractorInput::ICal;
QTest::newRow("email") << QStringLiteral("message/rfc822") << ExtractorInput::Email;
}
void testTypeFromMimeType()
{
QFETCH(QString, mimeType);
QFETCH(ExtractorInput::Type, type);
QCOMPARE(ExtractorInput::typeFromMimeType(mimeType), type);
}
void testTypeFromFileName_data()
{
QTest::addColumn<QString>("fileName");
QTest::addColumn<ExtractorInput::Type>("type");
QTest::newRow("empty") << QString() << ExtractorInput::Unknown;
QTest::newRow("html") << QStringLiteral("foo.html") << ExtractorInput::Html;
QTest::newRow("html 2") << QStringLiteral("FOO.HTM") << ExtractorInput::Html;
QTest::newRow("json") << QStringLiteral("foo.json") << ExtractorInput::JsonLd;
QTest::newRow("json+ld") << QStringLiteral("foo.JSONLD") << ExtractorInput::JsonLd;
QTest::newRow("pkpass") << QStringLiteral("foo.pkpass") << ExtractorInput::PkPass;
QTest::newRow("pdf") << QStringLiteral("foo.pdf") << ExtractorInput::Pdf;
QTest::newRow("text") << QStringLiteral("foo.txt") << ExtractorInput::Text;
QTest::newRow("ical") << QStringLiteral("foo.ics") << ExtractorInput::ICal;
QTest::newRow("email") << QStringLiteral("foo.mbox") << ExtractorInput::Email;
}
void testTypeFromFileName()
{
QFETCH(QString, fileName);
QFETCH(ExtractorInput::Type, type);
QCOMPARE(ExtractorInput::typeFromFileName(fileName), type);
}
void testTypeEnumToString()
{
QCOMPARE(ExtractorInput::typeToString(ExtractorInput::Unknown), QString());
QCOMPARE(ExtractorInput::typeToString(ExtractorInput::Pdf), QLatin1String("Pdf"));
}
void testTypeEnumFromString()
{
QCOMPARE(ExtractorInput::typeFromName(QStringLiteral("HTML")), ExtractorInput::Html);
QCOMPARE(ExtractorInput::typeFromName(QStringLiteral("text")), ExtractorInput::Text);
QCOMPARE(ExtractorInput::typeFromName(QStringLiteral("Pdf")), ExtractorInput::Pdf);
QCOMPARE(ExtractorInput::typeFromName(QStringLiteral("something else")), ExtractorInput::Unknown);
QCOMPARE(ExtractorInput::typeFromName(QString()), ExtractorInput::Unknown);
}
};
QTEST_APPLESS_MAIN(ExtractorInputTest)
#include "extractorinputtest.moc"
......@@ -36,14 +36,6 @@ set(kitinerary_lib_srcs
extractors/iatabcbpextractor.cpp
generic/genericextractor.cpp
generic/genericicalextractor.cpp
generic/genericpdfextractor.cpp
generic/genericpkpassextractor.cpp
generic/genericuic918extractor.cpp
generic/genericvdvextractor.cpp
generic/structureddataextractor.cpp
jsapi/barcode.cpp
jsapi/bitarray.cpp
jsapi/context.cpp
......@@ -103,9 +95,7 @@ set(kitinerary_lib_srcs
barcodedecoder.cpp
calendarhandler.cpp
documentutil.cpp
extractor.cpp
extractorcapabilities.cpp
extractorinput.cpp
extractorpostprocessor.cpp
extractorutil.cpp
extractorvalidator.cpp
......@@ -182,9 +172,7 @@ ecm_generate_headers(KItinerary_FORWARDING_HEADERS
BarcodeDecoder
CalendarHandler
DocumentUtil
Extractor
ExtractorCapabilities
ExtractorInput
ExtractorPostprocessor
ExtractorValidator
File
......
......@@ -39,16 +39,6 @@ ExtractorFilter::~ExtractorFilter() = default;
ExtractorFilter& ExtractorFilter::operator=(const ExtractorFilter&) = default;
ExtractorFilter& ExtractorFilter::operator=(ExtractorFilter&&) = default;
ExtractorInput::Type ExtractorFilter::type() const
{
return ExtractorInput::typeFromMimeType(d->m_mimeType);
}
void ExtractorFilter::setType(ExtractorInput::Type type)
{
setMimeType(ExtractorInput::typeToMimeType(type));
}
QString ExtractorFilter::mimeType() const
{
return d->m_mimeType;
......@@ -102,10 +92,7 @@ bool ExtractorFilter::load(const QJsonObject &obj)
d.detach();
d->m_mimeType = obj.value(QLatin1String("mimeType")).toString();
if (d->m_mimeType.isEmpty()) {
setType(ExtractorInput::typeFromName(obj.value(QLatin1String("type")).toString()));
}
if (d->m_mimeType.isEmpty()) {
qCDebug(Log) << "unspecified filter type";
qCDebug(Log) << "unspecified filter MIME type";
}
d->m_fieldName = obj.value(QLatin1String("field")).toString();
d->m_exp.setPattern(obj.value(QLatin1String("match")).toString());
......
......@@ -8,8 +8,6 @@
#include "kitinerary_export.h"
#include "extractorinput.h"
#include <QExplicitlySharedDataPointer>
#include <qobjectdefs.h>
......@@ -33,8 +31,6 @@ public:
ExtractorFilter& operator=(const ExtractorFilter&);
ExtractorFilter& operator=(ExtractorFilter&&);
/** The filter type. */
[[deprecated("use mimeType()")]] ExtractorInput::Type type() const;
/** MIME type of the document part this filter can match. */
QString mimeType() const;
/** The field to filter on. */
......@@ -74,7 +70,6 @@ public:
/** Create a filter from a JS object value. */
static ExtractorFilter fromJSValue(const QJSValue &js);
[[deprecated("use setMimeType()")]] void setType(ExtractorInput::Type type);
void setMimeType(const QString &mimeType);
void setFieldName(const QString &fieldName);
void setPattern(const QString &pattern);
......
......@@ -7,7 +7,6 @@
#include "config-kitinerary.h"
#include "extractorrepository.h"
#include "extractor.h"
#include "logging.h"
#include "extractors/iatabcbpextractor.h"
......@@ -16,15 +15,6 @@
#include <KItinerary/ExtractorFilter>
#include <KItinerary/ScriptExtractor>
#ifdef HAVE_KCAL
#include <KCalendarCore/Calendar>
#include <KCalendarCore/Event>
#endif
#include <KMime/Content>
#include <KPkPass/Pass>
#include <QDirIterator>
#include <QJsonArray>
#include <QJsonDocument>
......@@ -47,12 +37,8 @@ public:
void loadAll();
void initBuiltInExtractors();
void loadScriptExtractors();
void addExtractor(Extractor &&e);
void addExtractor(std::unique_ptr<AbstractExtractor> &&e);
void extractorForTypeAndContent(ExtractorInput::Type type, const QString &content, std::vector<Extractor> &extractors) const;
static void insertExtractor(const Extractor &ext, std::vector<Extractor> &extractors);
std::vector<Extractor> m_extractors;
std::vector<std::unique_ptr<AbstractExtractor>> m_extractorsNew;
QStringList m_extraSearchPaths;
};
......@@ -75,31 +61,6 @@ void ExtractorRepositoryPrivate::initBuiltInExtractors()
addExtractor(std::make_unique<IataBcbpExtractor>());
}
void ExtractorRepositoryPrivate::extractorForTypeAndContent(ExtractorInput::Type type, const QString &content, std::vector<Extractor> &extractors) const
{
for (auto it = m_extractors.begin(), end = m_extractors.end(); it != end; ++it) {
for (const auto &filter : (*it).filters()) {
if (filter.type() == type && filter.matches(content)) {
insertExtractor(*it, extractors);
break;
}
}
}
}
// approximate set behavior on extractors, using the d pointers as a quick way to ensure uniqueness
void ExtractorRepositoryPrivate::insertExtractor(const Extractor &ext, std::vector<Extractor> &extractors)
{
const auto it = std::lower_bound(extractors.begin(), extractors.end(), ext, [](const auto &lhs, const auto &rhs) {
return lhs.d.constData() < rhs.d.constData();
});
if (it != extractors.end() && (*it).d.constData() == ext.d.constData()) {
return;
}
extractors.insert(it, ext);
}
ExtractorRepository::ExtractorRepository()
{
static ExtractorRepositoryPrivate repo;
......@@ -111,7 +72,6 @@ ExtractorRepository::ExtractorRepository(KItinerary::ExtractorRepository &&) noe
void ExtractorRepository::reload()
{
d->m_extractors.clear();
d->m_extractorsNew.clear();
d->loadAll();
}
......@@ -121,149 +81,6 @@ const std::vector<std::unique_ptr<AbstractExtractor>>& ExtractorRepository::extr
return d->m_extractorsNew;
}
const std::vector<Extractor>& ExtractorRepository::allExtractors() const
{
return d->m_extractors;
}
void ExtractorRepository::extractorsForMessage(KMime::Content *part, std::vector<Extractor> &extractors) const
{
if (!part) {
return;
}
for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) {
for (const auto &filter : (*it).filters()) {
if (filter.type() != ExtractorInput::Email) {
continue;
}
auto header = part->headerByType(filter.fieldName().toUtf8().constData());
auto ancestor = part;
while (!header && ancestor->parent()) {
ancestor = ancestor->parent();
header = ancestor->headerByType(filter.fieldName().toUtf8().constData());
}
if (!header) {
continue;
}
const auto headerData = header->asUnicodeString();
if (filter.matches(headerData)) {
ExtractorRepositoryPrivate::insertExtractor(*it, extractors);
break;
}
}
}
}
void ExtractorRepository::extractorsForPass(KPkPass::Pass *pass, std::vector<Extractor> &extractors) const
{
if (pass->type() != KPkPass::Pass::BoardingPass && pass->type() != KPkPass::Pass::EventTicket) {
return;
}
for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) {
if ((*it).type() != ExtractorInput::PkPass) {
continue;
}
for (const auto &filter : (*it).filters()) {
if (filter.type() != ExtractorInput::PkPass) {
continue;
}
QString value;
if (filter.fieldName() == QLatin1String("passTypeIdentifier")) {
value = pass->passTypeIdentifier();
} else {
continue;
}
if (filter.matches(value)) {
ExtractorRepositoryPrivate::insertExtractor(*it, extractors);
break;
}
}
}
}
static QString valueForJsonPath(const QJsonObject &obj, const QString &path)
{
const auto pathSections = path.splitRef(QLatin1Char('.'));
QJsonValue v(obj);
for (const auto &pathSection : pathSections) {
if (!v.isObject()) {
return {};
}
v = v.toObject().value(pathSection.toString());
}
return v.toString();
}
void ExtractorRepository::extractorsForJsonLd(const QJsonArray &data, std::vector<Extractor> &extractors) const
{
for (const auto &val : data) {
for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) {
for (const auto &filter : (*it).filters()) {
if (filter.type() != ExtractorInput::JsonLd) {
continue;
}
const auto value = valueForJsonPath(val.toObject(), filter.fieldName());
if (value.isEmpty()) {
continue;
}
if (filter.matches(value)) {
ExtractorRepositoryPrivate::insertExtractor(*it, extractors);
break;
}
}
}
}
}
void ExtractorRepository::extractorsForBarcode(const QString &code, std::vector<Extractor> &extractors) const
{
d->extractorForTypeAndContent(ExtractorInput::Barcode, code, extractors);
}
#ifdef HAVE_KCAL
void ExtractorRepository::extractorsForCalendar(const KCalendarCore::Calendar *cal, std::vector<Extractor> &extractors) const
{
for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) {
for (const auto &filter : (*it).filters()) {
if (filter.type() != ExtractorInput::ICal) {
continue;
}
const auto value = cal->property(filter.fieldName().toUtf8().constData());
if (filter.matches(value.toString())) {
ExtractorRepositoryPrivate::insertExtractor(*it, extractors);
break;
}
}
}
}
void ExtractorRepository::extractorsForEvent(const KCalendarCore::Event *event, std::vector<Extractor> &extractors) const
{
for (auto it = d->m_extractors.begin(), end = d->m_extractors.end(); it != end; ++it) {
for (const auto &filter : (*it).filters()) {
if (filter.type() != ExtractorInput::ICal) {
continue;
}
const auto propIdx = KCalendarCore::Event::staticMetaObject.indexOfProperty(filter.fieldName().toUtf8().constData());
if (propIdx < 0) {
continue;
}
const auto prop = KCalendarCore::Event::staticMetaObject.property(propIdx);
const auto value = prop.readOnGadget(event);
if (filter.matches(value.toString())) {
ExtractorRepositoryPrivate::insertExtractor(*it, extractors);
break;
}
}
}
}
#endif
void ExtractorRepository::extractorsForNode(const ExtractorDocumentNode &node, std::vector<const AbstractExtractor*> &extractors) const
{
if (node.isNull()) {
......@@ -283,11 +100,6 @@ void ExtractorRepository::extractorsForNode(const ExtractorDocumentNode &node, s
}
}
void ExtractorRepository::extractorsForContent(const QString &content, std::vector<Extractor> &extractors) const
{
d->extractorForTypeAndContent(ExtractorInput::Text, content, extractors);
}
const AbstractExtractor* ExtractorRepository::extractorByName(QStringView name) const
{
auto it = std::lower_bound(d->m_extractorsNew.begin(), d->m_extractorsNew.end(), name, [](const auto &lhs, auto rhs) {
......@@ -299,17 +111,6 @@ const AbstractExtractor* ExtractorRepository::extractorByName(QStringView name)
return {};
}
Extractor ExtractorRepository::extractor(const QString &name) const
{
auto it = std::lower_bound(d->m_extractors.begin(), d->m_extractors.end(), name, [](const auto &lhs, const auto &rhs) {
return lhs.name() < rhs;
});
if (it != d->m_extractors.end() && (*it).name() == name) {
return *it;
}
return {};
}
void ExtractorRepositoryPrivate::loadScriptExtractors()
{
auto searchDirs = m_extraSearchPaths;
......@@ -344,11 +145,6 @@ void ExtractorRepositoryPrivate::loadScriptExtractors()
if (doc.isObject()) {
const auto obj = doc.object();
Extractor e;
if (e.load(obj, fi.canonicalFilePath())) {
addExtractor(std::move(e));
}
auto ext = std::make_unique<ScriptExtractor>();
if (ext->load(obj, fi.canonicalFilePath())) {
addExtractor(std::move(ext));
......@@ -359,11 +155,6 @@ void ExtractorRepositoryPrivate::loadScriptExtractors()
const auto extractorArray = doc.array();
int i = 0;
for (const auto &v : extractorArray) {
Extractor e;
if (e.load(v.toObject(), fi.canonicalFilePath(), extractorArray.size() == 1 ? -1 : i)) {
addExtractor(std::move(e));
}
auto ext = std::make_unique<ScriptExtractor>();
if (ext->load(v.toObject(), fi.canonicalFilePath(), extractorArray.size() == 1 ? -1 : i)) {
addExtractor(std::move(ext));
......@@ -380,16 +171,6 @@ void ExtractorRepositoryPrivate::loadScriptExtractors()
}
}
void ExtractorRepositoryPrivate::addExtractor(Extractor &&e)
{
auto it = std::lower_bound(m_extractors.begin(), m_extractors.end(), e, [](const auto &lhs, const auto &rhs) {
return lhs.name() < rhs.name();
});
if (it == m_extractors.end() || (*it).name() != e.name()) {
m_extractors.insert(it, std::move(e));
}
}
void ExtractorRepositoryPrivate::addExtractor(std::unique_ptr<AbstractExtractor> &&e)
{
auto it = std::lower_bound(m_extractorsNew.begin(), m_extractorsNew.end(), e, [](const auto &lhs, const auto &rhs) {
......@@ -410,31 +191,6 @@ void ExtractorRepository::setAdditionalSearchPaths(const QStringList& searchPath
d->m_extraSearchPaths = searchPaths;
}
QJsonValue ExtractorRepository::extractorToJson(const Extractor &extractor) const
{
QJsonArray a;
bool added = false;
for (const auto &e : d->m_extractors) {
if (e.fileName() != extractor.fileName()) {
continue;
}
if (extractor.name() == e.name()) {
a.push_back(extractor.toJson());
added = true;
} else {
a.push_back(e.toJson());
}
}
if (!added) {
a.push_back(extractor.toJson());
}
if (a.size() == 1) {
return a.at(0);
}
return a;
}
QJsonValue ExtractorRepository::extractorToJson(const ScriptExtractor *extractor) const
{
QJsonArray a;
......
......@@ -12,19 +12,6 @@
#include <memory>
#include <vector>
namespace KCalendarCore {
class Calendar;
class Event;
}
namespace KMime {
class Content;
}
namespace KPkPass {
class Pass;
}
class QJsonArray;
class QJsonValue;
class QString;
......@@ -32,7 +19,6 @@ class QString;
namespace KItinerary {
class AbstractExtractor;
class Extractor;
class ExtractorDocumentNode;
class ExtractorRepositoryPrivate;
class ScriptExtractor;
......@@ -60,28 +46,12 @@ public:
/** All known extractors. */
const std::vector<std::unique_ptr<AbstractExtractor>>& extractors() const;
[[deprecated("use extractors()")]] const std::vector<Extractor>& allExtractors() const;
/** Finds matching extractors for the given document node. */
void extractorsForNode(const ExtractorDocumentNode &node, std::vector<const AbstractExtractor*> &extractors) const;
/** Finds matching extractors for the given message part. */
[[deprecated("use extractorsForNode")]] void extractorsForMessage(KMime::Content *part, std::vector<Extractor> &extractors) const;
/** Finds matching extractors for the given pkpass boarding pass. */
[[deprecated("use extractorsForNode")]] void extractorsForPass(KPkPass::Pass *pass, std::vector<Extractor> &extractors) const;
/** Finds matching extractors for the given JSON-LD data provided by generic extractors. */
[[deprecated("use extractorsForNode")]] void extractorsForJsonLd(const QJsonArray &data, std::vector<Extractor> &extractors) const;
/** Finds matching extractors for the given barcode string. */
[[deprecated("use extractorsForNode")]] void extractorsForBarcode(const QString &code, std::vector<Extractor> &extractors) const;
/** Find matching extractors for the given iCal calendar. */
[[deprecated("use extractorsForNode")]] void extractorsForCalendar(const KCalendarCore::Calendar *cal, std::vector<Extractor> &extractors) const;
/** Find matching extractors for the given iCal event. */
[[deprecated("use extractorsForNode")]] void extractorsForEvent(const KCalendarCore::Event *event, std::vector<Extractor> &extractors) const;
/** Find matching extractors for the given content. */
[[deprecated("use extractorsForNode")]] void extractorsForContent(const QString &content, std::vector<Extractor> &extractors) const;
/** Returns the extractor with the given identifier. */
const AbstractExtractor* extractorByName(QStringView name) const;
[[deprecated("use extractorByName")]] Extractor extractor(const QString &name) const;
/** Returns the list of additional search paths for extractor scripts. */
QStringList additionalSearchPaths() const;
......@@ -92,7 +62,6 @@ public:
/** JSON serialization of @p extractor, including all other Extractor definitions in the same file, if any.
* Only for tooling, do not use otherwise.