Commit 2d9e1de6 authored by Volker Krause's avatar Volker Krause
Browse files

Add a flag to abort loading PDF images when encountering colored pixels

This allows us to shortcut barcode decoding in quite a few cases, saving
about 30% on the entire test suite. This is a bit non-intuitive, as the
obvious checks don't work. Some vendors have anti-aliased graphics for
the barcodes rather than leaving the proper scaling to PDF, which means
we have to expect any shade between black and white as well. Some other
vendors tint the barcode in their corporate colors, so we can't assume
red == green == blue either. Therefore we check for large differences
between the color components, that seems to be almost as effective as
a strict black/white check in terms of performance, while producing no
test failures.
parent 90c33ad7
......@@ -70,7 +70,8 @@ std::vector<GenericExtractor::Result> GenericPdfExtractor::extract(PdfDocument *
const auto page = doc->page(i);
for (int j = 0; j < page.imageCount(); ++j) {
const auto img = page.image(j);
auto img = page.image(j);
img.setLoadingHints(PdfImage::AbortOnColorHint); // we only care about b/w-ish images for barcode detection
if (img.hasObjectId() && m_imageIds.find(img.objectId()) != m_imageIds.end()) {
continue;
}
......@@ -95,7 +96,12 @@ std::vector<GenericExtractor::Result> GenericPdfExtractor::extract(PdfDocument *
GenericExtractor::Result GenericPdfExtractor::extractImage(const PdfImage &img)
{
const auto b = m_barcodeDecoder->decodeBinary(img.image());
const auto imgData = img.image();
if (imgData.isNull()) { // can happen due to AbortOnColorHint
return {};
}
const auto b = m_barcodeDecoder->decodeBinary(imgData);
if (Uic9183Parser::maybeUic9183(b)) {
QJsonArray result;
GenericUic918Extractor::extract(b, result, m_contextDate);
......@@ -106,7 +112,7 @@ GenericExtractor::Result GenericPdfExtractor::extractImage(const PdfImage &img)
}
if (b.isEmpty()) {
return extractBarcode(m_barcodeDecoder->decodeString(img.image()));
return extractBarcode(m_barcodeDecoder->decodeString(imgData));
} else {
return extractBarcode(QString::fromUtf8(b));
}
......
......@@ -20,6 +20,7 @@
#include "pdfdocument_p.h"
#include "popplerutils_p.h"
#include <QDebug>
#include <QScopedValueRollback>
#ifdef HAVE_POPPLER
......@@ -81,6 +82,15 @@ void ImageLoaderOutputDevice::drawImage(GfxState *state, Object *ref, Stream *st
}
#endif
static inline bool isColor(GfxRGB rgb)
{
enum { Threshold = 72 * 256 }; // GfxComp is stored as color value * 255
// barcode images for SNCF and Renfe for example are anti-aliased, so we cannot simply filter for black or white
// KLM/AF use tinted barcodes, so checking for R = G = B doesn't help either
return std::abs(rgb.r - rgb.g) > Threshold || std::abs(rgb.r - rgb.b) > Threshold || std::abs(rgb.g - rgb.b) > Threshold;
}
QImage PdfImagePrivate::load(Stream* str, GfxImageColorMap* colorMap)
{
auto img = QImage(m_sourceWidth, m_sourceHeight, m_format);
......@@ -96,6 +106,9 @@ QImage PdfImagePrivate::load(Stream* str, GfxImageColorMap* colorMap)
GfxRGB rgb;
for (int j = 0; j < m_sourceWidth; ++j) {
colorMap->getRGB(row + (j * bytesPerPixel), &rgb);
if ((m_loadingHints & PdfImage::AbortOnColorHint) && isColor(rgb)) {
return {};
}
*imgData++ = colToByte(rgb.r);
*imgData++ = colToByte(rgb.g);
*imgData++ = colToByte(rgb.b);
......@@ -189,6 +202,11 @@ QTransform PdfImage::transform() const
return d->m_transform;
}
void PdfImage::setLoadingHints(LoadingHints hints)
{
d->m_loadingHints = hints;
}
QImage PdfImage::image() const
{
if (d->m_format == QImage::Format_Invalid) {
......
......@@ -58,6 +58,16 @@ public:
*/
QTransform transform() const;
/** Hints for loading image data. */
enum LoadingHint {
NoHint = 0, ///< Load image data as-is. The default.
AbortOnColorHint = 1, ///< Abort loading when encountering a non black/white pixel, as a shortcut for barcode detection.
};
Q_DECLARE_FLAGS(LoadingHints, LoadingHint)
/** Sets image loading hints. */
void setLoadingHints(LoadingHints hints);
/** The source image with display transformations applied. */
QImage image() const;
......@@ -82,5 +92,6 @@ private:
}
Q_DECLARE_METATYPE(KItinerary::PdfImage)
Q_DECLARE_OPERATORS_FOR_FLAGS(KItinerary::PdfImage::LoadingHints)
#endif // KITINERARY_PDFIMAGE_H
......@@ -20,6 +20,7 @@
#include <config-kitinerary.h>
#include "pdfimage.h"
#include "pdfvectorpicture_p.h"
#include <QImage>
......@@ -60,6 +61,7 @@ public:
int m_height = 0;
int m_sourceWidth = 0;
int m_sourceHeight = 0;
PdfImage::LoadingHints m_loadingHints = PdfImage::NoHint;
};
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment