Commit 42b9d23c authored by Volker Krause's avatar Volker Krause
Browse files

Call extractor scripts for each filter match

This splits filter matching into a variant that checks for any match, and
one that collects all matches. The latter is then used as input for the
extractor script, restoring the previous behavior.
parent 9873fe82
......@@ -43,29 +43,33 @@ private Q_SLOTS:
filter.setPattern(s("KDE"));
filter.setScope(ExtractorFilter::Current);
auto triggerNode = filter.matches(root);
QVERIFY(triggerNode.isNull());
QVERIFY(!filter.matches(root));
filter.setPattern(s("libkcal"));
triggerNode = filter.matches(root);
QVERIFY(!triggerNode.isNull());
QCOMPARE(triggerNode.mimeType(), root.mimeType());
QVERIFY(filter.matches(root));
std::vector<ExtractorDocumentNode> matches;
filter.allMatches(root, matches);
QCOMPARE(matches.size(), 1);
QCOMPARE(matches[0].mimeType(), root.mimeType());
filter.setScope(ExtractorFilter::Children);
triggerNode = filter.matches(root);
QVERIFY(triggerNode.isNull());
QVERIFY(!filter.matches(root));
filter.setMimeType(s("internal/event"));
filter.setPattern(s("Akademy"));
triggerNode = filter.matches(root);
QVERIFY(triggerNode.isNull());
QVERIFY(!filter.matches(root));
filter.setFieldName(s("summary"));
triggerNode = filter.matches(root);
QVERIFY(!triggerNode.isNull());
QCOMPARE(triggerNode.mimeType(), QLatin1String("internal/event"));
QVERIFY(filter.matches(root));
matches.clear();
filter.allMatches(root, matches);
QCOMPARE(matches.size(), 1);
QCOMPARE(matches[0].mimeType(), QLatin1String("internal/event"));
filter.setScope(ExtractorFilter::Descendants);
triggerNode = filter.matches(root);
QVERIFY(!triggerNode.isNull());
QCOMPARE(triggerNode.mimeType(), QLatin1String("internal/event"));
QVERIFY(filter.matches(root));
matches.clear();
filter.allMatches(root, matches);
QCOMPARE(matches.size(), 1);
QCOMPARE(matches[0].mimeType(), QLatin1String("internal/event"));
}
void testPkPassFilter()
......@@ -88,15 +92,15 @@ private Q_SLOTS:
filter.setPattern(s("pass.booking.swiss.com"));
filter.setScope(ExtractorFilter::Current);
QVERIFY(filter.matches(bcbp).isNull());
QVERIFY(!filter.matches(root).isNull());
QVERIFY(!filter.matches(bcbp));
QVERIFY(filter.matches(root));
filter.setScope(ExtractorFilter::Parent);
QVERIFY(!filter.matches(bcbp).isNull());
QVERIFY(filter.matches(root).isNull());
QVERIFY(filter.matches(bcbp));
QVERIFY(!filter.matches(root));
filter.setScope(ExtractorFilter::Ancestors);
QVERIFY(!filter.matches(bcbp).isNull());
QVERIFY(filter.matches(root).isNull());
QVERIFY(filter.matches(bcbp));
QVERIFY(!filter.matches(root));
}
void testResultFilter()
......@@ -116,9 +120,9 @@ private Q_SLOTS:
filter.setFieldName(s("location.address.addressLocality"));
filter.setPattern(s("Berlin"));
filter.setScope(ExtractorFilter::Current);
QVERIFY(filter.matches(root).isNull());
QVERIFY(!filter.matches(root));
filter.setPattern(s("Milan"));
QVERIFY(!filter.matches(root).isNull());
QVERIFY(filter.matches(root));
}
};
......
......@@ -159,14 +159,20 @@ static QString valueForJsonPath(const QJsonObject &obj, const QString &path)
return v.toString();
}
static ExtractorDocumentNode filterMachesNode(const ExtractorFilter &filter, ExtractorFilter::Scope scope, const ExtractorDocumentNode &node)
enum MatchMode { Any, All };
static bool filterMachesNode(const ExtractorFilter &filter, ExtractorFilter::Scope scope, const ExtractorDocumentNode &node,
std::vector<ExtractorDocumentNode> &matches, MatchMode matchMode)
{
if (node.isNull()) {
return {};
return false;
}
if (filter.mimeType() == node.mimeType() && node.processor()->matches(filter, node)) {
return node;
if (matchMode == All) {
matches.push_back(node);
}
return true;
}
if (scope != ExtractorFilter::Ancestors && filter.mimeType() == QLatin1String("application/ld+json") && !node.result().isEmpty()) {
......@@ -174,43 +180,68 @@ static ExtractorDocumentNode filterMachesNode(const ExtractorFilter &filter, Ext
for (const auto &elem : res) {
const auto property = valueForJsonPath(elem.toObject(), filter.fieldName());
if (filter.matches(property)) {
return node;
if (matchMode == All) {
matches.push_back(node);
} else {
return true;
}
}
}
}
if (scope == ExtractorFilter::Ancestors) {
return filterMachesNode(filter, scope, node.parent());
return filterMachesNode(filter, scope, node.parent(), matches, matchMode);
}
if (scope == ExtractorFilter::Descendants) {
for (const auto &child : node.childNodes()) {
const auto m = filterMachesNode(filter, ExtractorFilter::Descendants, child);
if (!m.isNull()) {
return m;
const auto m = filterMachesNode(filter, ExtractorFilter::Descendants, child, matches, matchMode);
if (m && matchMode == Any) {
return true;
}
}
}
return {};
return !matches.empty();
}
ExtractorDocumentNode ExtractorFilter::matches(const ExtractorDocumentNode &node) const
bool ExtractorFilter::matches(const ExtractorDocumentNode &node) const
{
std::vector<ExtractorDocumentNode> matches;
switch (d->m_scope) {
case ExtractorFilter::Current:
return filterMachesNode(*this, ExtractorFilter::Current, node);
return filterMachesNode(*this, ExtractorFilter::Current, node, matches, Any);
case ExtractorFilter::Parent:
return filterMachesNode(*this, ExtractorFilter::Current, node.parent());
return filterMachesNode(*this, ExtractorFilter::Current, node.parent(), matches, Any);
case ExtractorFilter::Ancestors:
return filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent());
return filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent(), matches, Any);
case ExtractorFilter::Children:
case ExtractorFilter::Descendants:
for (const auto &child : node.childNodes()) {
const auto m = filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child);
if (!m.isNull()) {
return m;
if (filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child, matches, Any)) {
return true;
}
}
}
return {};
return false;
}
void ExtractorFilter::allMatches(const ExtractorDocumentNode &node, std::vector<ExtractorDocumentNode>& matches) const
{
switch (d->m_scope) {
case ExtractorFilter::Current:
filterMachesNode(*this, ExtractorFilter::Current, node, matches, All);
return;
case ExtractorFilter::Parent:
filterMachesNode(*this, ExtractorFilter::Current, node.parent(), matches, All);
return;
case ExtractorFilter::Ancestors:
filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent(), matches, All);
return;
case ExtractorFilter::Children:
case ExtractorFilter::Descendants:
for (const auto &child : node.childNodes()) {
filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child, matches, All);
}
return;
}
}
......@@ -56,11 +56,15 @@ public:
/** Evaluation scope of this filter, in relation to the node being extracted. */
Scope scope() const;
/** Checks whether this filter applies to @p node. */
bool matches(const ExtractorDocumentNode &node) const;
/** Checks whether this filter applies to @p node.
* @returns An invalid node if the filter doesn't match, or the node that ended up matching the filter.
* This can differ from @p node depending on scope().
* Unlike matches() this returns all nodes triggering this filter.
* This matters in particular for matching child nodes, where multiple
* ones can match the filter.
*/
ExtractorDocumentNode matches(const ExtractorDocumentNode &node) const;
void allMatches(const ExtractorDocumentNode &node, std::vector<ExtractorDocumentNode> &matches) const;
///@cond internal
/** Load filter from @p obj. */
......
......@@ -159,18 +159,26 @@ bool ScriptExtractor::canHandle(const ExtractorDocumentNode &node) const
}
return std::any_of(d->m_filters.begin(), d->m_filters.end(), [&node](const auto &filter) {
return !filter.matches(node).isNull();
return filter.matches(node);
});
}
ExtractorResult ScriptExtractor::extract(const ExtractorDocumentNode &node, const ExtractorEngine *engine) const
{
ExtractorDocumentNode triggerNode;
std::vector<ExtractorDocumentNode> triggerNodes;
for (const auto &filter : d->m_filters) {
triggerNode = filter.matches(node);
if (!triggerNode.isNull()) {
break;
if (filter.scope() == ExtractorFilter::Children || filter.scope() == ExtractorFilter::Descendants) {
filter.allMatches(node, triggerNodes);
}
}
return engine->scriptEngine()->execute(this, node, triggerNode);
if (triggerNodes.empty()) {
return engine->scriptEngine()->execute(this, node, {});
} else {
ExtractorResult result;
for (const auto &triggerNode : triggerNodes) {
result.append(engine->scriptEngine()->execute(this, node, triggerNode));
}
return result;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment