Members of the KDE Community are recommended to subscribe to the kde-community mailing list at https://mail.kde.org/mailman/listinfo/kde-community to allow them to participate in important discussions and receive other important announcements

Commit b39a40fc authored by Volker Krause's avatar Volker Krause

Refactor processor plugin and memento to support the fallback extractor

For this we need to look for plain text parts too, and we can't stop after
the first data we found, in case there's higher quality structured data
in a later part.
parent 1f090fde
......@@ -36,7 +36,7 @@ public:
const MimeTreeParser::Interface::BodyPartFormatter *bodyPartFormatter(int idx) const override
{
if (idx == 0)
if (idx < 2)
return new SemanticProcessor();
return nullptr;
}
......
{
"formatter": [
{ "mimetype": "text/html" }
{ "mimetype": "text/html" },
{ "mimetype": "text/plain" }
],
"renderer": [
{ "type": "MimeTreeParser::MessagePartList" }
......
......@@ -19,6 +19,8 @@
#include "semanticmemento.h"
#include <KMime/ContentIndex>
SemanticMemento::~SemanticMemento() = default;
void SemanticMemento::detach()
......@@ -30,6 +32,16 @@ bool SemanticMemento::isEmpty() const
return m_data.isEmpty();
}
bool SemanticMemento::isParsed(const KMime::ContentIndex& index) const
{
return m_parsedParts.contains(index);
}
void SemanticMemento::setParsed(const KMime::ContentIndex& index)
{
m_parsedParts.insert(index);
}
QVector<QVariant> SemanticMemento::data() const
{
return m_data;
......@@ -39,3 +51,13 @@ void SemanticMemento::setData(const QVector<QVariant> &data)
{
m_data = data;
}
bool SemanticMemento::hasStructuredData() const
{
return m_foundStructuredData && !isEmpty();
}
void SemanticMemento::setStructuredDataFound(bool f)
{
m_foundStructuredData = f;
}
......@@ -22,9 +22,12 @@
#include <MimeTreeParser/BodyPart>
#include <QSet>
#include <QVariant>
#include <QVector>
namespace KMime { class ContentIndex; }
/** Memento holding the semantic information extracted for an email. */
class SemanticMemento : public MimeTreeParser::Interface::BodyPartMemento
{
......@@ -33,11 +36,19 @@ public:
void detach() override;
bool isEmpty() const;
bool isParsed(const KMime::ContentIndex &index) const;
void setParsed(const KMime::ContentIndex &index);
QVector<QVariant> data() const;
void setData(const QVector<QVariant> &data);
bool hasStructuredData() const;
void setStructuredDataFound(bool f);
private:
QVector<QVariant> m_data;
QSet<KMime::ContentIndex> m_parsedParts;
bool m_foundStructuredData = false;
};
#endif // SEMANTICMEMENTO_H
......@@ -32,21 +32,38 @@ MimeTreeParser::MessagePart::Ptr SemanticProcessor::process(MimeTreeParser::Inte
if (!nodeHelper)
return {};
auto memento = dynamic_cast<SemanticMemento*>(nodeHelper->bodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData"));
if (memento)
if (!memento) {
memento = new SemanticMemento;
nodeHelper->setBodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData", memento);
}
// check if we still have to do anything at all
if (memento->hasStructuredData())
return {};
if (memento->isParsed(part.content()->index()))
return {};
memento->setParsed(part.content()->index());
qCDebug(SEMANTIC_LOG) << "-------------------------------------------- BEGIN SEMANTIC PARSING";
StructuredDataExtractor extractor;
extractor.parse(part.content()->decodedText());
memento = new SemanticMemento;
nodeHelper->setBodyPartMemento(part.topLevelContent(), "org.kde.messageviewer.semanticData", memento);
const auto data = extractor.data();
const auto decodedData = JsonLdDocument::fromJson(data);
if (data.size() != decodedData.size()) {
qCDebug(SEMANTIC_LOG) << "Unhandled content:" << QJsonDocument(data).toJson();
qCDebug(SEMANTIC_LOG()) << part.content()->contentType()->mimeType();
// look for structured data first, cheaper and better quality
if (part.content()->contentType()->mimeType() == "text/html") {
StructuredDataExtractor extractor;
extractor.parse(part.content()->decodedText());
const auto data = extractor.data();
const auto decodedData = JsonLdDocument::fromJson(data);
if (data.size() != decodedData.size()) {
qCDebug(SEMANTIC_LOG) << "Unhandled content:" << QJsonDocument(data).toJson();
}
if (!decodedData.isEmpty()) {
memento->setData(decodedData);
memento->setStructuredDataFound(true);
qCDebug(SEMANTIC_LOG) << "Found structured data:" << decodedData;
}
}
memento->setData(decodedData);
qCDebug(SEMANTIC_LOG) << "-------------------------------------------- END SEMANTIC PARSING";
return {};
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment