Commit ac9e7f94 authored by Volker Krause's avatar Volker Krause
Browse files

Make PDF title accessible to extractor scripts and filters

This also implements decoding PDF meta data strings properly, the previous
way only worked by accident for non-Unicode content.
parent b10c0aa3
......@@ -23,7 +23,8 @@
}
],
"producer": "cairo 1.16.0 (https://cairographics.org)",
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n"
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n",
"title": ""
}
},
{
......@@ -85,7 +86,8 @@
}
],
"producer": "cairo 1.16.0 (https://cairographics.org)",
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n"
"text": " Akademy Airways\n Boarding Pass\nFrom: Vienna International, Terminal 2\nTo: Milano Malpensa, Terminal 1\nFlight: AK 1996\nGate: A36\nBoarding Time: 15:20\nDeparture Time: 15:45\nArrival Time: 17:20\nPassenger: Dragon, Dr. Konqi\n",
"title": ""
},
"contextDateTime": "Mon Aug 19 20:23:28 2019 GMT+0200",
"isNull": false,
......
......@@ -19,6 +19,7 @@
#include <DateInfo.h>
#include <PDFDoc.h>
#include <PDFDocEncoding.h>
#include <Stream.h>
#include <cmath>
......@@ -296,15 +297,34 @@ QDateTime PdfDocument::modificationTime() const
#endif
}
QString PdfDocument::producer() const
QString gooStringToUnicode(const std::unique_ptr<GooString> &s)
{
std::unique_ptr<GooString> s(d->m_popplerDoc->getDocInfoProducer());
if (!s) {
return {};
}
if (s->hasUnicodeMarker() || s->hasUnicodeMarkerLE()) {
return QString::fromUtf16(reinterpret_cast<const ushort*>(s->toStr().c_str()), s->toStr().size() / 2);
} else {
int len = 0;
std::unique_ptr<const char[]> utf16Data(pdfDocEncodingToUTF16(s->toStr(), &len));
return QString::fromUtf16(reinterpret_cast<const ushort*>(utf16Data.get()), len / 2);
}
return QString::fromUtf8(s->c_str());
}
QString PdfDocument::title() const
{
return gooStringToUnicode(d->m_popplerDoc->getDocInfoTitle());
}
QString PdfDocument::producer() const
{
return gooStringToUnicode(d->m_popplerDoc->getDocInfoProducer());
}
QVariantList PdfDocument::pagesVariant() const
{
QVariantList l;
......
......@@ -92,6 +92,7 @@ class KITINERARY_EXPORT PdfDocument : public QObject
Q_PROPERTY(QVariantList pages READ pagesVariant CONSTANT)
Q_PROPERTY(QDateTime creationTime READ creationTime CONSTANT)
Q_PROPERTY(QDateTime modificationTime READ modificationTime CONSTANT)
Q_PROPERTY(QString title READ title CONSTANT)
Q_PROPERTY(QString producer READ producer CONSTANT)
public:
......@@ -115,6 +116,8 @@ public:
/** Modification time as specified in the PDF file. */
QDateTime modificationTime() const;
/** The document title. */
QString title() const;
/** The document producer. */
QString producer() const;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment