Commit 9ef2a675 authored by Volker Krause's avatar Volker Krause
Browse files

Add the actual extraction loop

This ties all the new parts together now, shrinking the extractor
engine considerably once the old code is removed.
parent da7c447c
Pipeline #55148 passed with stages
in 12 minutes and 49 seconds
......@@ -25,6 +25,7 @@ set(kitinerary_lib_srcs
engine/extractordocumentnode.cpp
engine/extractordocumentnodefactory.cpp
engine/extractordocumentprocessor.cpp
engine/extractorengine.cpp
engine/extractorfilter.cpp
engine/extractorrepository.cpp
engine/extractorresult.cpp
......@@ -103,7 +104,6 @@ set(kitinerary_lib_srcs
documentutil.cpp
extractor.cpp
extractorcapabilities.cpp
extractorengine.cpp
extractorinput.cpp
extractorpostprocessor.cpp
extractorutil.cpp
......@@ -183,7 +183,6 @@ ecm_generate_headers(KItinerary_FORWARDING_HEADERS
DocumentUtil
Extractor
ExtractorCapabilities
ExtractorEngine
ExtractorInput
ExtractorPostprocessor
ExtractorValidator
......@@ -234,6 +233,7 @@ ecm_generate_headers(KItinerary_Engine_FORWARDING_HEADERS
ExtractorDocumentNode
ExtractorDocumentNodeFactory
ExtractorDocumentProcessor
ExtractorEngine
ExtractorFilter
ExtractorRepository
ExtractorResult
......
......@@ -5,10 +5,17 @@
*/
#include "config-kitinerary.h"
#include "barcodedecoder.h"
#include "extractorengine.h"
#include "barcodedecoder.h"
#include "extractor.h"
#include "engine/extractorrepository.h"
#include "abstractextractor.h"
#include "extractordocumentnode.h"
#include "extractordocumentnodefactory.h"
#include "extractordocumentprocessor.h"
#include "extractorresult.h"
#include "extractorrepository.h"
#include "extractorscriptengine_p.h"
#include "generic/genericpdfextractor_p.h"
#include "generic/genericicalextractor_p.h"
#include "generic/genericpkpassextractor_p.h"
......@@ -23,8 +30,6 @@
#include "uic9183/uic9183parser.h"
#include "vdv/vdvticketparser.h"
#include "engine/extractordocumentnodefactory.h"
#include "engine/extractorscriptengine_p.h"
#include "jsapi/barcode.h"
#include "jsapi/context.h"
#include "jsapi/jsonld.h"
......@@ -80,6 +85,8 @@ public:
void setContext(PdfDocument *pdf);
void processNode(ExtractorDocumentNode &node);
ExtractorEngine *q = nullptr;
std::vector<Extractor> m_extractors;
std::vector<Extractor> m_additionalExtractors;
......@@ -182,6 +189,44 @@ void ExtractorEnginePrivate::extractExternal()
std::copy(res.begin(), res.end(), std::back_inserter(m_result));
}
void ExtractorEnginePrivate::processNode(ExtractorDocumentNode& node)
{
if (node.isNull()) {
return;
}
node.processor()->expandNode(node, q);
for (auto c : node.childNodes()) {
processNode(c);
}
node.processor()->reduceNode(node);
node.processor()->preExtract(node, q);
std::vector<const AbstractExtractor*> extractors;
m_repo.extractorsForNode(node, extractors);
for (const auto &extractor : extractors) {
auto res = extractor->extract(node, q);
node.addResult(std::move(res));
// TODO store result sources
}
node.processor()->postExtract(node);
// set modification time for all results that don't have it yet
if (node.contextDateTime().isValid()) {
auto result = node.result().jsonLdResult();
for (int i = 0; i < result.size(); ++i) {
auto res = result.at(i).toObject();
if (!res.contains(QLatin1String("modifiedTime"))) {
res.insert(QStringLiteral("modifiedTime"), node.contextDateTime().toString(Qt::ISODate));
}
result[i] = res;
}
node.setResult(result);
}
}
ExtractorEngine::ExtractorEngine()
: d(new ExtractorEnginePrivate)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment