Commit da7c447c authored by Volker Krause's avatar Volker Krause
Browse files

Implement out-of-process extraction for the new engine

We no longer need to special-case this as in the old system, we can just
inject this as a document processor that intercepts extraction at the
types we want to extract externally.
parent a1dfa9e2
......@@ -65,6 +65,7 @@ set(kitinerary_lib_srcs
pdf/popplerutils.cpp
processors/binarydocumentprocessor.cpp
processors/externalprocessor.cpp
processors/htmldocumentprocessor.cpp
processors/icaldocumentprocessor.cpp
processors/imagedocumentprocessor.cpp
......
......@@ -10,7 +10,7 @@
#include "logging.h"
#include "processors/binarydocumentprocessor.h"
// #include "processors/externalprocessor.h"
#include "processors/externalprocessor.h"
#include "processors/htmldocumentprocessor.h"
#include "processors/icaldocumentprocessor.h"
#include "processors/imagedocumentprocessor.h"
......@@ -237,7 +237,7 @@ void ExtractorDocumentNodeFactory::registerProcessor(std::unique_ptr<ExtractorDo
void ExtractorDocumentNodeFactory::setUseSeparateProcess(bool separateProcess)
{
if (separateProcess && !d->interceptProcessor) {
// d->interceptProcessor = std::make_unique<ExternalProcessor>();
d->interceptProcessor = std::make_unique<ExternalProcessor>();
} else {
d->interceptProcessor.reset();
}
......
/*
SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#include <config-kitinerary.h>
#include "externalprocessor.h"
#include "logging.h"
#include <KItinerary/AbstractExtractor>
#include <KItinerary/ExtractorEngine>
#include <KItinerary/ExtractorRepository>
#include <KItinerary/ExtractorResult>
#include <KItinerary/PdfDocument>
#include <QFileInfo>
#include <QJsonArray>
#include <QJsonDocument>
#include <QProcess>
using namespace KItinerary;
ExternalProcessor::ExternalProcessor()
{
// find external extractor
QFileInfo fi(QLatin1String(CMAKE_INSTALL_FULL_LIBEXECDIR_KF5) + QLatin1String("/kitinerary-extractor"));
if (!fi.exists() && !fi.isFile() && !fi.isExecutable()) {
qCCritical(Log) << "Cannot find external extractor:" << fi.fileName();
return;
}
m_externalExtractor = fi.canonicalFilePath();
}
ExternalProcessor::~ExternalProcessor() = default;
bool ExternalProcessor::canHandleData(const QByteArray &encodedData, QStringView fileName) const
{
return PdfDocument::maybePdf(encodedData) || fileName.endsWith(QLatin1String(".pdf", Qt::CaseInsensitive));
}
ExtractorDocumentNode ExternalProcessor::createNodeFromData(const QByteArray &encodedData) const
{
ExtractorDocumentNode node;
node.setContent(encodedData);
return node;
}
void ExternalProcessor::preExtract(ExtractorDocumentNode &node, const ExtractorEngine *engine) const
{
std::vector<const AbstractExtractor*> extractors;
engine->extractorRepository()->extractorsForNode(node, extractors);
QStringList extNames;
extNames.reserve(extractors.size());
std::transform(extractors.begin(), extractors.end(), std::back_inserter(extNames), [](auto ext) { return ext->name(); });
QProcess proc;
proc.setProgram(m_externalExtractor);
QStringList args({QLatin1String("--context-date"), node.contextDateTime().toString(Qt::ISODate),
QLatin1String("--extractors"), extNames.join(QLatin1Char(';')),
QLatin1String("--no-validation")});
const auto extraPaths = engine->extractorRepository()->additionalSearchPaths();
for (const auto &p : extraPaths) {
args.push_back(QStringLiteral("--additional-search-path"));
args.push_back(p);
}
proc.setArguments(args);
proc.start(QProcess::ReadWrite);
proc.setProcessChannelMode(QProcess::ForwardedErrorChannel);
if (!proc.waitForStarted(1000)) {
qCWarning(Log) << "could not start external extractor" << m_externalExtractor << proc.errorString();
return;
}
proc.write(node.content<QByteArray>());
proc.closeWriteChannel();
if (!proc.waitForFinished(15000)) {
qCWarning(Log) << "external extractor did not exit cleanly" << m_externalExtractor << proc.errorString();
return;
}
const auto res = QJsonDocument::fromJson(proc.readAllStandardOutput()).array();
node.addResult(res);
}
/*
SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#ifndef KITINERARY_EXTERNALPROCESSOR_H
#define KITINERARY_EXTERNALPROCESSOR_H
#include <KItinerary/ExtractorDocumentProcessor>
#include <QString>
namespace KItinerary {
/** Dummy node to delegate to an external extractor process. */
class ExternalProcessor : public ExtractorDocumentProcessor
{
public:
ExternalProcessor();
~ExternalProcessor();
bool canHandleData(const QByteArray &encodedData, QStringView fileName) const override;
ExtractorDocumentNode createNodeFromData(const QByteArray &encodedData) const override;
void preExtract(ExtractorDocumentNode &node, const ExtractorEngine *engine) const override;
private:
QString m_externalExtractor;
};
}
#endif // KITINERARY_EXTERNALPROCESSOR_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment