Commit 3153388a authored by Volker Krause's avatar Volker Krause
Browse files

Add automatic test for the extractor script environment

This validates the right arguments are passed for all the different input
types, something we previously could only test implicitly by relying on
the big (non-public) extractor test suite.
parent ccf65027
Pipeline #55129 canceled with stages
......@@ -3,7 +3,7 @@ Upstream-Name: KItinerary
Upstream-Contact: Volker Krause <vkrause@kde.org>
Source: https://invent.kde.org/pim/kitinerary
Files: autotests/barcodes/* autotests/bcbpdata/* autotests/calendarhandlerdata/* autotests/extractordata/* autotests/jsonlddata/* autotests/mergedata/* autotests/misc/* autotests/pkpassdata/* autotests/postprocessordata/* autotests/rct2/* autotests/structureddata/* autotests/uic918-3/* autotests/validationdata/*
Files: autotests/barcodes/* autotests/bcbpdata/* autotests/calendarhandlerdata/* autotests/extractordata/* autotests/jsonlddata/* autotests/mergedata/* autotests/misc/* autotests/pkpassdata/* autotests/postprocessordata/* autotests/rct2/* autotests/scriptenginedata/* autotests/structureddata/* autotests/uic918-3/* autotests/validationdata/*
Copyright: none
License: CC0-1.0
......
......@@ -16,6 +16,7 @@ ecm_add_test(extractordocumentnodetest.cpp LINK_LIBRARIES Qt5::Test KPim::Itiner
ecm_add_test(extractorfiltertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorinputtest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorrepositorytest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(extractorscriptenginetest.cpp extractorscriptenginetest.qrc TEST_NAME extractorscriptenginetest LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(berdecodertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(berencodertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
ecm_add_test(bcbpparsertest.cpp LINK_LIBRARIES Qt5::Test KPim::Itinerary)
......
/*
SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#include <KItinerary/ExtractorDocumentNode>
#include <KItinerary/ExtractorDocumentNodeFactory>
#include <KItinerary/ExtractorDocumentProcessor>
#include <KItinerary/ExtractorEngine>
#include <KItinerary/ExtractorResult>
#include <KItinerary/ScriptExtractor>
#include <QDebug>
#include <QDirIterator>
#include <QFile>
#include <QJsonArray>
#include <QJsonDocument>
#include <QJsonObject>
#include <QTest>
using namespace KItinerary;
#define s(x) QStringLiteral(x)
static void expandRecursive(ExtractorDocumentNode &node, const ExtractorEngine *engine)
{
if (node.isNull()) {
return;
}
node.processor()->expandNode(node, engine);
for (auto child : node.childNodes()) {
expandRecursive(child, engine);
}
node.processor()->preExtract(node, engine);
}
class ExtractorScriptEngineTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void testArguments_data()
{
QTest::addColumn<QString>("inputFile");
QTest::addColumn<QString>("refFile");
QTest::newRow("text/plain") << s(SOURCE_DIR "/scriptenginedata/plain-text.txt") << s(SOURCE_DIR "/scriptenginedata/plain-text.txt.json");
QTest::newRow("pkpass") << s(SOURCE_DIR "/pkpassdata/swiss.pkpass") << s(SOURCE_DIR "/scriptenginedata/swiss.pkpass.json");
QTest::newRow("IATA BCBP PDF") << s(SOURCE_DIR "/extractordata/synthetic/iata-bcbp-demo.pdf") << s(SOURCE_DIR "/scriptenginedata/iata-bcbp-demo.pdf.json");
QTest::newRow("ical") << s(SOURCE_DIR "/extractordata/ical/eventreservation.ics") << s(SOURCE_DIR "/scriptenginedata/eventreservation.ics.json");
QTest::newRow("uic9183") << s(SOURCE_DIR "/uic918-3/valid/Testw_VOW8XG6_A9E8DXL_0.bin") << s(SOURCE_DIR "/scriptenginedata/Testw_VOW8XG6_A9E8DXL_0.bin.json");
QTest::newRow("html") << s(SOURCE_DIR "/structureddata/google-flight-reservation-json-ld.html") << s(SOURCE_DIR "/scriptenginedata/google-flight-reservation-json-ld.html.json");
}
void testArguments()
{
QFETCH(QString, inputFile);
QFETCH(QString, refFile);
QFile in(inputFile);
QVERIFY(in.open(QFile::ReadOnly));
ExtractorEngine engine;
auto root = engine.documentNodeFactory()->createNode(in.readAll(), inputFile);
QVERIFY(!root.isNull());
expandRecursive(root, &engine);
ScriptExtractor extractor;
extractor.setScriptFileName(s(":/reflector.js"));
extractor.setScriptFunction(s("dumpArgs"));
const auto result = extractor.extract(root, &engine).jsonLdResult();
QFile ref(refFile);
QVERIFY(ref.open(QFile::ReadOnly));
const auto refResult = QJsonDocument::fromJson(ref.readAll()).array();
if (result != refResult) {
qDebug().noquote() << QJsonDocument(result).toJson();
}
QCOMPARE(result, refResult);
}
};
QTEST_GUILESS_MAIN(ExtractorScriptEngineTest)
#include "extractorscriptenginetest.moc"
<!--
SPDX-FileCopyrightText: none
SPDX-License-Identifier: CC0-1.0
-->
<RCC>
<qresource prefix="/">
<file>reflector.js</file>
</qresource>
</RCC>
/*
SPDX-FileCopyrightText: 2021 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
function dumpValue(value, depth)
{
if (value === undefined || value === null
|| typeof value == "boolean"
|| typeof value == "number"
|| typeof value == "string")
{
return value;
} else if (typeof value == "object") {
if (depth == 0) {
return "[...]";
}
if (value.length != undefined) {
var arr = new Array();
for (var i = 0; i < value.length; ++i) {
arr.push(dumpValue(value[i], depth - 1));
}
return arr;
}
var obj = new Object();
var empty = true;
for (var prop in value) {
if (prop == "parent" || prop == "firstChild" || prop == "nextSibling" || value[prop] === undefined || value[prop] === null || typeof value[prop] == "function") {
continue;
}
obj[prop] = dumpValue(value[prop], depth - 1);
empty = false;
}
// implicitly convertible (color, date, etC)
if (empty) {
return value.toString();
} else {
return obj;
}
} else {
console.log("unhandled value type", typeof value);
}
return value;
}
function dumpArgs(content, node)
{
var res = new Array();
var wrapper = new Object();
wrapper.content = dumpValue(content, 6);
res.push(wrapper);
res.push(dumpValue(node, 6));
res.push(dumpValue(Context, 6));
return res;
}
[
{
"content": {
"carrierId": "0080",
"outboundArrivalStationId": "",
"outboundDepartureStationId": "",
"person": {
"className": "Person",
"email": "",
"familyName": "häöür Räß",
"givenName": "kíck_-_:.,stû R",
"name": ""
},
"pnr": "9E8DXL",
"seatingType": "2"
}
},
{
"childNodes": [
],
"content": {
"carrierId": "0080",
"outboundArrivalStationId": "",
"outboundDepartureStationId": "",
"person": {
"className": "Person",
"email": "",
"familyName": "häöür Räß",
"givenName": "kíck_-_:.,stû R",
"name": ""
},
"pnr": "9E8DXL",
"seatingType": "2"
},
"contextDateTime": "Fri Feb 17 15:18:00 2012 GMT+0100",
"isNull": false,
"mimeType": "internal/uic9183",
"result": [
{
"@type": "TrainReservation",
"reservationFor": {
"@type": "TrainTrip",
"provider": {
"@type": "Organization",
"identifier": "uic:0080"
}
},
"reservationNumber": "9E8DXL",
"reservedTicket": {
"@type": "Ticket",
"ticketToken": "aztecbin:I1VUMDEwMDgwMDAwMDIwLAIUGue7pQWksx4NCGj9cJzioDvHG2ICFGnc04h8A1eHmpP6hrOkqvuyoaofAAAAADAzNzR4nH2Rv07CUBTGkUFdXHyCJi5QqZ57+p8NbCsNFJMWCMhAiGhsQDSlkuiTsLk4ysqig0lfwEE3H8Zz5Y+YGDvcfvc7v/vd09N6p2QXLGAAqgxggGkbVrMi6am1h+mACAyZygywbMvmoGsBApBhMpQV7hQr5CjMBIVoX3Fa1qnaWg/CRQ4a87eBqmpq5KNvOdis1sveX7QG8oqmOKYFwPtlrHodXXUHN9F5OCKLuiGKhMxFgYSyFCYJDSUmMTMghttAgtcxoCNMJaWWk9lgSBuNZzlRd9i/uI3ijNcNh9n9ozC+o5r+T81YC0HG+2i4hbxQ809cMXCPi6JXEyuemHFKh47jOVmiqFVUL5PHZJa8RIJP4kHoJ9Ozfkfq5A8I0HiL/Kvw+2bqG5mp82krNBONj55qdLOMP+dyozh5Fvy938H8UKPOJ2fMf0o6Tcv7JN56m4xz7dRGrv26GP3T9ufmzmR8vxvS5mNB9IjoLYnpivgC6qKSSw==",
"ticketedSeat": {
"@type": "Seat",
"seatingType": "2"
}
},
"underName": {
"@context": "http://schema.org",
"@type": "Person",
"familyName": "häöür Räß",
"givenName": "kíck_-_:.,stû R"
}
}
]
},
{
"data": [
{
"@type": "TrainReservation",
"reservationFor": {
"@type": "TrainTrip",
"provider": {
"@type": "Organization",
"identifier": "uic:0080"
}
},
"reservationNumber": "9E8DXL",
"reservedTicket": {
"@type": "Ticket",
"ticketToken": "aztecbin:I1VUMDEwMDgwMDAwMDIwLAIUGue7pQWksx4NCGj9cJzioDvHG2ICFGnc04h8A1eHmpP6hrOkqvuyoaofAAAAADAzNzR4nH2Rv07CUBTGkUFdXHyCJi5QqZ57+p8NbCsNFJMWCMhAiGhsQDSlkuiTsLk4ysqig0lfwEE3H8Zz5Y+YGDvcfvc7v/vd09N6p2QXLGAAqgxggGkbVrMi6am1h+mACAyZygywbMvmoGsBApBhMpQV7hQr5CjMBIVoX3Fa1qnaWg/CRQ4a87eBqmpq5KNvOdis1sveX7QG8oqmOKYFwPtlrHodXXUHN9F5OCKLuiGKhMxFgYSyFCYJDSUmMTMghttAgtcxoCNMJaWWk9lgSBuNZzlRd9i/uI3ijNcNh9n9ozC+o5r+T81YC0HG+2i4hbxQ809cMXCPi6JXEyuemHFKh47jOVmiqFVUL5PHZJa8RIJP4kHoJ9Ozfkfq5A8I0HiL/Kvw+2bqG5mp82krNBONj55qdLOMP+dyozh5Fvy938H8UKPOJ2fMf0o6Tcv7JN56m4xz7dRGrv26GP3T9ufmzmR8vxvS5mNB9IjoLYnpivgC6qKSSw==",
"ticketedSeat": {
"@type": "Seat",
"seatingType": "2"
}
},
"underName": {
"@context": "http://schema.org",
"@type": "Person",
"familyName": "häöür Räß",
"givenName": "kíck_-_:.,stû R"
}
}
],
"objectName": "",
"pdfPageNumber": 0,
"senderDate": "Fri Feb 17 15:18:00 2012 GMT+0100"
}
]
[
{
"content": "QVariant(KCalendarCore::Calendar::Ptr, )"
},
{
"childNodes": [
{
"childNodes": [
],
"content": {
"allDay": false,
"attachments": [
],
"attendees": [
],
"categories": [
],
"conferences": [
],
"created": "Wed Dec 27 12:16:49 2017 GMT+0100",
"description": "Dr Konqui\nBooking reference: 42\nKatie Dragon\nBooking reference: 42",
"dtEnd": "Sat Jul 13 16:30:00 2019 GMT+0200",
"dtStart": "Sat Sep 7 09:30:00 2019 GMT+0200",
"geoLatitude": 255,
"geoLongitude": 255,
"hasGeo": false,
"lastModified": "Wed Dec 27 12:16:49 2017 GMT+0100",
"location": "University of Milano-Bicocca",
"organizer": {
"email": "",
"fullName": "",
"isEmpty": true,
"name": ""
},
"priority": 0,
"secrecy": 0,
"status": 0,
"summary": "Akademy 2019",
"transparency": 0,
"uid": "KIT-1234567890-1b22236a-21ff-4885-8c99-b3b2bbca062c"
},
"contextDateTime": "Invalid Date",
"isNull": false,
"mimeType": "internal/event",
"result": [
{
"@context": "http://schema.org",
"@type": "EventReservation",
"reservationFor": {
"@type": "Event",
"endDate": "[...]",
"location": "[...]",
"name": "Akademy 2019",
"startDate": "[...]"
},
"reservationNumber": "42",
"underName": {
"@type": "Person",
"name": "Dr Konqui"
}
},
{
"@context": "http://schema.org",
"@type": "EventReservation",
"reservationFor": {
"@type": "Event",
"endDate": "[...]",
"location": "[...]",
"name": "Akademy 2019",
"startDate": "[...]"
},
"reservationNumber": "42",
"underName": {
"@type": "Person",
"name": "Katie Dragon"
}
}
]
}
],
"content": "QVariant(KCalendarCore::Calendar::Ptr, )",
"contextDateTime": "Invalid Date",
"isNull": false,
"mimeType": "text/calendar",
"result": [
]
},
{
"data": [
],
"objectName": "",
"pdfPageNumber": 0,
"senderDate": "Invalid Date"
}
]
[
{
"content": {
"objectName": "",
"root": {
"content": "",
"isNull": false,
"name": "html",
"recursiveContent": "{\n \"@context\": \"http://schema.org\",\n \"@type\": \"FlightReservation\",\n \"reservationNumber\": \"RXJ34P\",\n \"reservationStatus\": \"http://schema.org/Confirmed\",\n \"underName\": {\n \"@type\": \"Person\",\n \"name\": \"Eva Green\"\n },\n \"reservationFor\": {\n \"@type\": \"Flight\",\n \"flightNumber\": \"110\",\n \"airline\": {\n \"@type\": \"Airline\",\n \"name\": \"United\",\n \"iataCode\": \"UA\"\n },\n \"departureAirport\": {\n \"@type\": \"Airport\",\n \"name\": \"San Francisco Airport\",\n \"iataCode\": \"SFO\"\n },\n \"departureTime\": \"2027-03-04T20:15:00-08:00\",\n \"arrivalAirport\": {\n \"@type\": \"Airport\",\n \"name\": \"John F. Kennedy International Airport\",\n \"iataCode\": \"JFK\"\n },\n \"arrivalTime\": \"2027-03-05T06:30:00-05:00\"\n }\n}"
}
}
},
{
"childNodes": [
],
"content": {
"objectName": "",
"root": {
"content": "",
"isNull": false,
"name": "html",
"recursiveContent": "{\n \"@context\": \"http://schema.org\",\n \"@type\": \"FlightReservation\",\n \"reservationNumber\": \"RXJ34P\",\n \"reservationStatus\": \"http://schema.org/Confirmed\",\n \"underName\": {\n \"@type\": \"Person\",\n \"name\": \"Eva Green\"\n },\n \"reservationFor\": {\n \"@type\": \"Flight\",\n \"flightNumber\": \"110\",\n \"airline\": {\n \"@type\": \"Airline\",\n \"name\": \"United\",\n \"iataCode\": \"UA\"\n },\n \"departureAirport\": {\n \"@type\": \"Airport\",\n \"name\": \"San Francisco Airport\",\n \"iataCode\": \"SFO\"\n },\n \"departureTime\": \"2027-03-04T20:15:00-08:00\",\n \"arrivalAirport\": {\n \"@type\": \"Airport\",\n \"name\": \"John F. Kennedy International Airport\",\n \"iataCode\": \"JFK\"\n },\n \"arrivalTime\": \"2027-03-05T06:30:00-05:00\"\n }\n}"
}
},
"contextDateTime": "Invalid Date",
"isNull": false,
"mimeType": "text/html",
"result": [
{
"@context": "http://schema.org",
"@type": "FlightReservation",
"reservationFor": {
"@type": "Flight",
"airline": {
"@type": "Airline",
"iataCode": "UA",
"name": "United"
},
"arrivalAirport": {
"@type": "Airport",
"iataCode": "JFK",
"name": "John F. Kennedy International Airport"
},
"arrivalTime": "2027-03-05T06:30:00-05:00",
"departureAirport": {
"@type": "Airport",
"iataCode": "SFO",
"name": "San Francisco Airport"
},
"departureTime": "2027-03-04T20:15:00-08:00",
"flightNumber": "110"
},
"reservationNumber": "RXJ34P",
"reservationStatus": "http://schema.org/Confirmed",
"underName": {
"@type": "Person",
"name": "Eva Green"
}
}
]
},
{
"data": [
{
"@context": "http://schema.org",
"@type": "FlightReservation",
"reservationFor": {
"@type": "Flight",
"airline": {
"@type": "Airline",
"iataCode": "UA",
"name": "United"
},
"arrivalAirport": {
"@type": "Airport",
"iataCode": "JFK",
"name": "John F. Kennedy International Airport"
},
"arrivalTime": "2027-03-05T06:30:00-05:00",
"departureAirport": {
"@type": "Airport",
"iataCode": "SFO",
"name": "San Francisco Airport"
},
"departureTime": "2027-03-04T20:15:00-08:00",
"flightNumber": "110"
},
"reservationNumber": "RXJ34P",
"reservationStatus": "http://schema.org/Confirmed",
"underName": {
"@type": "Person",
"name": "Eva Green"
}
}
],
"objectName": "",
"pdfPageNumber": 0,
"senderDate": "Invalid Date"
}
]
[
{
"content": {
"creationTime": "Mon Aug 19 20:23:28 2019 GMT+0200",
"modificationTime": "Invalid Date",
"objectName": "",
"pageCount": 1,
"pages": [
{
"images": [
{
"height": 139,
"width": 139
},
{
"height": 79,
"width": 79
}
],
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n"
}
],
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n"
}
},
{
"childNodes": [
{
"childNodes": [
{
"childNodes": [
],
"content": "M1DRAGON/KONQUI EXXX007 VIEMXPAK 1996 249J013C0042 167>5321WW8325BAK 0014123456002001414246700100141234789012A0141234567890 1AK AK 1234567890123 4PCYLX58Z^108ABCDEFGH",
"contextDateTime": "Mon Aug 19 20:23:28 2019 GMT+0200",
"isNull": false,
"location": 0,
"mimeType": "text/plain",
"result": [
]
}
],
"content": "QVariant(QImage, QImage(QSize(164, 164),format=QImage::Format_Grayscale8,depth=8,devicePixelRatio=1,bytesPerLine=164,sizeInBytes=26896))",
"contextDateTime": "Mon Aug 19 20:23:28 2019 GMT+0200",
"isNull": false,
"location": 0,
"mimeType": "internal/qimage",
"result": [
]
}
],
"content": {
"creationTime": "Mon Aug 19 20:23:28 2019 GMT+0200",
"modificationTime": "Invalid Date",
"objectName": "",
"pageCount": 1,
"pages": [
{
"images": [
{
"height": 139,
"width": 139
},
{
"height": 79,
"width": 79
}
],
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n"
}
],
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n"
},
"contextDateTime": "Mon Aug 19 20:23:28 2019 GMT+0200",
"isNull": false,
"mimeType": "application/pdf",
"result": [
]
},
{
"data": [
],
"objectName": "",
"pdfPageNumber": 0,
"senderDate": "Mon Aug 19 20:23:28 2019 GMT+0200"
}
]
[
{
"content": "Hello World!\n"
},
{
"childNodes": [
],
"content": "Hello World!\n",
"contextDateTime": "Invalid Date",
"isNull": false,
"mimeType": "text/plain",
"result": [
]
},
{