pdfdocument.cpp 8.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
    Copyright (C) 2018 Volker Krause <vkrause@kde.org>

    This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU Library General Public License as published by
    the Free Software Foundation; either version 2 of the License, or (at your
    option) any later version.

    This program is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
    License for more details.

    You should have received a copy of the GNU General Public License
15
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
16 17 18 19
*/

#include "config-kitinerary.h"
#include "pdfdocument.h"
20 21 22
#include "pdfdocument_p.h"
#include "pdfextractoroutputdevice_p.h"
#include "pdfimage_p.h"
23
#include "popplerglobalparams_p.h"
24
#include "popplerutils_p.h"
25
#include "logging.h"
26 27 28 29 30 31

#include <QDebug>
#include <QImage>
#include <QScopedValueRollback>

#ifdef HAVE_POPPLER
32
#include <DateInfo.h>
33 34 35 36
#include <PDFDoc.h>
#include <Stream.h>
#endif

37 38
#include <cmath>

39 40
using namespace KItinerary;

41 42 43 44 45 46 47
void PdfPagePrivate::load()
{
    if (m_loaded) {
        return;
    }

#ifdef HAVE_POPPLER
48
    PopplerGlobalParams gp;
49
    PdfExtractorOutputDevice device;
50
    m_doc->m_popplerDoc->displayPageSlice(&device, m_pageNum + 1, 72, 72, 0, false, true, false, -1, -1, -1, -1);
51
    device.finalize();
52 53 54
    const auto pageRect = m_doc->m_popplerDoc->getPage(m_pageNum + 1)->getCropBox();
    std::unique_ptr<GooString> s(device.getText(pageRect->x1, pageRect->y1, pageRect->x2, pageRect->y2));

55
#if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0)
56 57
    m_text = QString::fromUtf8(s->c_str());
#else
58
    m_text = QString::fromUtf8(s->getCString());
59
#endif
60 61 62 63 64 65 66 67
    m_images = std::move(device.m_images);
    for (auto it = m_images.begin(); it != m_images.end(); ++it) {
        (*it).d->m_page = this;
    }
#endif
    m_loaded = true;
}

68 69 70 71 72
PdfPage::PdfPage()
    : d(new PdfPagePrivate)
{
}

73
PdfPage::PdfPage(const PdfPage&) = default;
74
PdfPage::~PdfPage() = default;
75
PdfPage& PdfPage::operator=(const PdfPage&) = default;
76 77 78

QString PdfPage::text() const
{
79
    d->load();
80 81 82
    return d->m_text;
}

83
#ifdef HAVE_POPPLER
84 85 86 87
static double ratio(double begin, double end, double ratio)
{
    return begin + (end - begin) * ratio;
}
88
#endif
89 90 91 92

QString PdfPage::textInRect(double left, double top, double right, double bottom) const
{
#ifdef HAVE_POPPLER
93
    PopplerGlobalParams gp;
94

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
    const auto page = d->m_doc->m_popplerDoc->getPage(d->m_pageNum + 1);
    const auto pageRect = page->getCropBox();

    double l, t, r, b;
    switch (page->getRotate()) {
        case 0:
            l = ratio(pageRect->x1, pageRect->x2, left);
            t = ratio(pageRect->y1, pageRect->y2, top);
            r = ratio(pageRect->x1, pageRect->x2, right);
            b = ratio(pageRect->y1, pageRect->y2, bottom);
            break;
        case 90:
            l = ratio(pageRect->y1, pageRect->y2, left);
            t = ratio(pageRect->x1, pageRect->x2, top);
            r = ratio(pageRect->y1, pageRect->y2, right);
            b = ratio(pageRect->x1, pageRect->x2, bottom);
            break;
        default:
            qCWarning(Log) << "Unsupported page rotation!" << page->getRotate();
            return {};
    }

117
    TextOutputDev device(nullptr, false, 0, false, false);
118
    d->m_doc->m_popplerDoc->displayPageSlice(&device, d->m_pageNum + 1, 72, 72, 0, false, true, false, -1, -1, -1, -1);
119
    std::unique_ptr<GooString> s(device.getText(l, t, r, b));
120
#if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0)
121 122
    return QString::fromUtf8(s->c_str());
#else
123
    return QString::fromUtf8(s->getCString());
124
#endif
125
#else
126 127 128 129
    Q_UNUSED(left);
    Q_UNUSED(top);
    Q_UNUSED(right);
    Q_UNUSED(bottom);
130 131 132 133
    return {};
#endif
}

134 135
int PdfPage::imageCount() const
{
136
    d->load();
137 138 139 140 141
    return d->m_images.size();
}

PdfImage PdfPage::image(int index) const
{
142
    d->load();
143 144 145 146 147
    return d->m_images[index];
}

QVariantList PdfPage::imagesVariant() const
{
148
    d->load();
149 150 151 152 153 154
    QVariantList l;
    l.reserve(imageCount());
    std::for_each(d->m_images.begin(), d->m_images.end(), [&l](const PdfImage& img) { l.push_back(QVariant::fromValue(img)); });
    return l;
}

155 156
QVariantList PdfPage::imagesInRect(double left, double top, double right, double bottom) const
{
157
    d->load();
158 159
    QVariantList l;
#ifdef HAVE_POPPLER
160
    PopplerGlobalParams gp;
161 162 163
    const auto pageRect = d->m_doc->m_popplerDoc->getPage(d->m_pageNum + 1)->getCropBox();

    for (const auto &img : d->m_images) {
164 165
        if ((img.d->m_transform.dx() >= ratio(pageRect->x1, pageRect->x2, left) && img.d->m_transform.dx() <= ratio(pageRect->x1, pageRect->x2, right)) &&
            (img.d->m_transform.dy() >= ratio(pageRect->y1, pageRect->y2, top)  && img.d->m_transform.dy() <= ratio(pageRect->y1, pageRect->y2, bottom)))
166 167 168 169
        {
            l.push_back(QVariant::fromValue(img));
        }
    }
Volker Krause's avatar
Volker Krause committed
170 171 172 173 174
#else
    Q_UNUSED(left);
    Q_UNUSED(top);
    Q_UNUSED(right);
    Q_UNUSED(bottom);
175 176 177 178
#endif
    return l;
}

179

180 181 182 183 184 185 186 187 188 189
PdfDocument::PdfDocument(QObject *parent)
    : QObject(parent)
    , d(new PdfDocumentPrivate)
{
}

PdfDocument::~PdfDocument() = default;

QString PdfDocument::text() const
{
190 191 192
    QString text;
    std::for_each(d->m_pages.begin(), d->m_pages.end(), [&text](const PdfPage &p) { text += p.text(); });
    return text;
193 194
}

195 196
int PdfDocument::pageCount() const
{
197 198 199 200 201
#ifdef HAVE_POPPLER
    return d->m_popplerDoc->getNumPages();
#else
    return 0;
#endif
202 203 204 205 206 207 208
}

PdfPage PdfDocument::page(int index) const
{
    return d->m_pages[index];
}

209 210 211 212 213
int PdfDocument::fileSize() const
{
    return d->m_pdfData.size();
}

Volker Krause's avatar
Volker Krause committed
214
#ifdef HAVE_POPPLER
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
static QDateTime parsePdfDateTime(const char *str)
{
    int year, month, day, hour, min, sec, tzHours, tzMins;
    char tz;

    if (!parseDateString(str, &year, &month, &day, &hour, &min, &sec, &tz, &tzHours, &tzMins)) {
        return {};
    }

    QDate date(year, month, day);
    QTime time(hour, min, sec);
    if (!date.isValid() || !time.isValid()) {
        return {};
    }

    int offset = tzHours * 3600 + tzMins * 60;
    if (tz == '+') {
        return QDateTime(date, time, Qt::OffsetFromUTC, offset);
    } else if (tz == '-') {
        return QDateTime(date, time, Qt::OffsetFromUTC, -offset);
    }
    return QDateTime(date, time, Qt::UTC);
}
Volker Krause's avatar
Volker Krause committed
238
#endif
239 240 241

QDateTime PdfDocument::creationTime() const
{
Volker Krause's avatar
Volker Krause committed
242
#ifdef HAVE_POPPLER
243 244 245 246
    std::unique_ptr<GooString> dt(d->m_popplerDoc->getDocInfoCreatDate());
    if (!dt) {
        return {};
    }
247
#if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0)
248
    return parsePdfDateTime(dt->c_str());
249 250 251
#else
    return parsePdfDateTime(dt->getCString());
#endif
Volker Krause's avatar
Volker Krause committed
252 253 254
#else
    return {};
#endif
255 256 257 258
}

QDateTime PdfDocument::modificationTime() const
{
Volker Krause's avatar
Volker Krause committed
259
#ifdef HAVE_POPPLER
260 261 262 263
    std::unique_ptr<GooString> dt(d->m_popplerDoc->getDocInfoModDate());
    if (!dt) {
        return {};
    }
264
#if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 72, 0)
265
    return parsePdfDateTime(dt->c_str());
266 267 268
#else
    return parsePdfDateTime(dt->getCString());
#endif
Volker Krause's avatar
Volker Krause committed
269 270 271
#else
    return {};
#endif
272 273
}

274 275 276 277 278 279 280 281
QVariantList PdfDocument::pagesVariant() const
{
    QVariantList l;
    l.reserve(pageCount());
    std::for_each(d->m_pages.begin(), d->m_pages.end(), [&l](const PdfPage& p) { l.push_back(QVariant::fromValue(p)); });
    return l;
}

282 283 284
PdfDocument* PdfDocument::fromData(const QByteArray &data, QObject *parent)
{
#ifdef HAVE_POPPLER
285
    PopplerGlobalParams gp;
286 287 288 289

    std::unique_ptr<PdfDocument> doc(new PdfDocument(parent));
    doc->d->m_pdfData = data;
    // PDFDoc takes ownership of stream
290
#if KPOPPLER_VERSION >= QT_VERSION_CHECK(0, 58, 0)
Volker Krause's avatar
Volker Krause committed
291
    auto stream = new MemStream(const_cast<char*>(doc->d->m_pdfData.constData()), 0, doc->d->m_pdfData.size(), Object());
292
#else
293 294 295
    Object obj;
    obj.initNull();
    auto stream = new MemStream(const_cast<char*>(doc->d->m_pdfData.constData()), 0, doc->d->m_pdfData.size(), &obj);
296
#endif
297 298
    std::unique_ptr<PDFDoc> popplerDoc(new PDFDoc(stream, nullptr, nullptr));
    if (!popplerDoc->isOk()) {
299
        qCWarning(Log) << "Got invalid PDF document!" << popplerDoc->getErrorCode();
300 301 302
        return nullptr;
    }

303
    doc->d->m_pages.reserve(popplerDoc->getNumPages());
304
    for (int i = 0; i < popplerDoc->getNumPages(); ++i) {
305 306 307
        PdfPage page;
        page.d->m_pageNum = i;
        page.d->m_doc = doc->d.get();
308
        doc->d->m_pages.push_back(page);
309 310
    }

311
    doc->d->m_popplerDoc = std::move(popplerDoc);
312 313 314 315 316 317 318
    return doc.release();
#else
    Q_UNUSED(data);
    Q_UNUSED(parent);
    return nullptr;
#endif
}