Members of the KDE Community are recommended to subscribe to the kde-community mailing list at https://mail.kde.org/mailman/listinfo/kde-community to allow them to participate in important discussions and receive other important announcements

Commit 81f027ec authored by Jakub Stachowski's avatar Jakub Stachowski

Support for files compressed with Huffman encoding. This means you can finally

read some files that FBReader cannot handle


svn path=/trunk/playground/graphics/okular/mobipocket/; revision=882322
parent 8452972d
......@@ -18,6 +18,7 @@ find_package(SharedMimeInfo REQUIRED)
set(okularGenerator_mobi_PART_SRCS
converter.cpp
mobipocket.cpp
decompressor.cpp
mobidocument.cpp
generator_mobi.cpp
)
......@@ -28,7 +29,6 @@ target_link_libraries(okularGenerator_mobi okularcore ${mobi_LIBRARIES} ${KDE4_K
install(TARGETS okularGenerator_mobi DESTINATION ${PLUGIN_INSTALL_DIR})
########### install files ###############
install( FILES libokularGenerator_mobi.desktop okularMobi.desktop DESTINATION ${SERVICES_INSTALL_DIR} )
......
- better error handling
- tests for Mobipocket classes
- anchors (a filepos=)
- handle files compression with Huffman encoding
- decryption for DRMed files
- metadata
......@@ -15,7 +15,7 @@
#include <QTextDocumentFragment>
#include <QtCore/QDebug>
#include <QtCore/QFile>
#include <qmobi.h>
#include <mobipocket.h>
#include <klocale.h>
#include <okular/core/action.h>
......@@ -36,7 +36,7 @@ QTextDocument* Converter::convert( const QString &fileName )
{
MobiDocument* newDocument=new MobiDocument(fileName);
if (!newDocument->isValid()) {
emit error(i18n("Error while opening the EPub document."), -1);
emit error(i18n("Error while opening the Mobipocket document."), -1);
delete newDocument;
return NULL;
}
......
/***************************************************************************
* Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> *
* *
* RLE decompressor based on FBReader *
* Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com> *
* *
* Huffdic decompressor based on Python code by Igor Skochinsky *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
#include "mobipocket.h"
#include "decompressor.h"
#include <QtCore/QList>
static unsigned char TOKEN_CODE[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
namespace Mobipocket {
class NOOPDecompressor : public Decompressor
{
public:
NOOPDecompressor(const PDB& p) : Decompressor(p) {}
QByteArray decompress(const QByteArray& data) { return data; }
};
class RLEDecompressor : public Decompressor
{
public:
RLEDecompressor(const PDB& p) : Decompressor(p) {}
QByteArray decompress(const QByteArray& data);
};
class BitReader
{
public:
BitReader(const QByteArray& d) : pos(0), data(d)
{
data.append("\000\000\000\000");
len=data.size()*8;
}
quint32 read() {
quint32 g=0;
quint64 r=0;
while (g<32) {
r=(r << 8) | (quint8)data[(pos+g)>>3];
g=g+8 - ((pos+g) & 7);
}
return (r >> (g-32));
}
bool eat(int n) {
pos+=n;
return pos <= len;
}
int left() {
return len - pos;
}
private:
int pos;
int len;
QByteArray data;
};
class HuffdicDecompressor : public Decompressor
{
public:
HuffdicDecompressor(const PDB& p);
QByteArray decompress(const QByteArray& data);
private:
void unpack(BitReader reader, int depth = 0);
QList<QByteArray> dicts;
quint32 entry_bits;
quint32 dict1[256];
quint32 dict2[64];
QByteArray buf;
};
QByteArray RLEDecompressor::decompress(const QByteArray& data)
{
QByteArray ret;
ret.reserve(8192);
unsigned char token;
unsigned short copyLength, N, shift;
unsigned short shifted;
int i=0;
int maxIndex=data.size()-1;
while (i<data.size()) {
token = data.at(i++);
switch (TOKEN_CODE[token]) {
case 0:
ret.append(token);
break;
case 1:
if ((i + token > maxIndex) ) {
goto endOfLoop;
}
ret.append(data.mid(i,token));
i+=token;
break;
case 2:
ret.append(' ');
ret.append(token ^ 0x80);
break;
case 3:
if (i + 1 > maxIndex) {
goto endOfLoop;
}
N = token;
N<<=8;
N+=(unsigned char)data.at(i++);
copyLength = (N & 7) + 3;
shift = (N & 0x3fff) / 8;
shifted = ret.size()-shift;
if (shifted>(ret.size()-1)) goto endOfLoop;
for (int i=0;i<copyLength;i++) ret.append(ret.at(shifted+i));
break;
}
}
endOfLoop:
return ret;
}
quint32 readBELong(const QByteArray& data, int offset)
{
quint32 ret=0;
for (int i=0;i<4;i++) { ret<<=8; ret+=(unsigned char)data[offset+i]; }
return ret;
}
HuffdicDecompressor::HuffdicDecompressor(const PDB& p) : Decompressor(p)
{
QByteArray header=p.getRecord(0);
quint32 huff_ofs=readBELong(header,0x70);
quint32 huff_num=readBELong(header,0x74);
for (unsigned int i=1;i<huff_num;i++) dicts.append(p.getRecord(huff_ofs+i));
QByteArray huff1=p.getRecord(huff_ofs);
quint32 off1=readBELong(huff1,16);
quint32 off2=readBELong(huff1,20);
if (!huff1.startsWith("HUFF")) goto fail;
if (!dicts[0].startsWith("CDIC")) goto fail;
entry_bits=readBELong(dicts[0],12);
memcpy(dict1,huff1.data()+off1, 256*4);
memcpy(dict2,huff1.data()+off2, 64*4);
return;
fail:
valid=false;
}
QByteArray HuffdicDecompressor::decompress(const QByteArray& data)
{
buf.clear();
unpack(BitReader(data));
return buf;
}
void HuffdicDecompressor::unpack(BitReader reader,int depth)
{
if (depth>32) goto fail;
while (reader.left()) {
quint32 dw=reader.read();
quint32 v=dict1[dw>>24];
quint8 codelen = v & 0x1F;
if (!codelen) goto fail;
quint32 code = dw >> (32 - codelen);
quint32 r=(v >> 8);
if (!( v & 0x80)) {
while (code < dict2[(codelen-1)*2]) {
codelen++;
code = dw >> (32 - codelen);
}
r = dict2[(codelen-1)*2+1];
}
r-=code;
if (!codelen) goto fail;
if (!reader.eat(codelen)) return;
quint32 dict_no = r >> entry_bits;
quint32 off1 = 16 + (r - (dict_no << entry_bits))*2;
QByteArray dict=dicts[dict_no];
quint32 off2 = 16 + (unsigned char)dict[off1]*256 + (unsigned char)dict[off1+1];
quint32 blen = (unsigned char)dict[off2]*256 + (unsigned char)dict[off2+1];
QByteArray slice=dict.mid(off2+2,(blen & 0x7fff));
if (blen & 0x8000) buf+=slice;
else unpack(BitReader(slice),depth+1);
}
return;
fail:
valid=false;
}
Decompressor* Decompressor::create(quint8 type, const PDB& pdb)
{
switch (type) {
case 1 : return new NOOPDecompressor(pdb);
case 2 : return new RLEDecompressor(pdb);
case 'H' : return new HuffdicDecompressor(pdb);
default : return 0;
}
}
}
/***************************************************************************
* Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
#include <QtCore/QByteArray>
namespace Mobipocket {
class PDB;
class Decompressor {
public:
Decompressor(const PDB& p) : pdb(p), valid(true) {}
virtual QByteArray decompress(const QByteArray& data) = 0;
virtual ~Decompressor() {}
bool isValid() const { return valid; }
static Decompressor* create(quint8 type, const PDB& pdb);
protected:
const PDB& pdb;
bool valid;
};
}
\ No newline at end of file
/***************************************************************************
* Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl> *
* *
* RLE decompressor based on FBReader *
* Copyright (C) 2004-2008 Geometer Plus <contact@geometerplus.com> *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
#include <mobipocket.h>
#include "mobipocket.h"
#include "decompressor.h"
#include <QtCore/QIODevice>
#include <QtCore/QtEndian>
#include <QtCore/QBuffer>
#include <QtGui/QImageReader>
#include <kdebug.h>
static unsigned char TOKEN_CODE[256] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
namespace Mobipocket {
class NOOPDecompressor : public Decompressor
{
public:
QByteArray decompress(const QByteArray& data) { return data; }
};
class RLEDecompressor : public Decompressor
{
public:
QByteArray decompress(const QByteArray& data);
};
QByteArray RLEDecompressor::decompress(const QByteArray& data)
{
QByteArray ret;
ret.reserve(8192);
unsigned char token;
unsigned short copyLength, N, shift;
unsigned short shifted;
int i=0;
int maxIndex=data.size()-1;
while (i<data.size()) {
token = data.at(i++);
switch (TOKEN_CODE[token]) {
case 0:
ret.append(token);
break;
case 1:
if ((i + token > maxIndex) ) {
goto endOfLoop;
}
ret.append(data.mid(i,token));
i+=token;
break;
case 2:
ret.append(' ');
ret.append(token ^ 0x80);
break;
case 3:
if (i + 1 > maxIndex) {
goto endOfLoop;
}
// N = (token << 8) + data.at(i++);
N = token;
N<<=8;
N+=(unsigned char)data.at(i++);
copyLength = (N & 7) + 3;
shift = (N & 0x3fff) / 8;
shifted = ret.size()-shift;
if (shifted>(ret.size()-1)) goto endOfLoop;
for (int i=0;i<copyLength;i++) ret.append(ret.at(shifted+i));
break;
}
}
endOfLoop:
return ret;
}
/////////////////////////////////////////////
struct PDBPrivate {
QList<quint32> recordOffsets;
QIODevice* device;
......@@ -184,16 +97,19 @@ struct DocumentPrivate
valid=pdb.isValid();
if (!valid) return;
QByteArray mhead=pdb.getRecord(0);
if (mhead[0]!=(char)0) {}
switch (mhead[1]) {
case 1 : dec = new NOOPDecompressor(); break;
case 2 : dec = new RLEDecompressor(); break;
default : dec=0; {}
}
kDebug() << "MHEAD" << (int)mhead[0];
// if (mhead[0]!=(char)0) goto fail;
kDebug() << "MHEAD" << (int)mhead[1];
dec = Decompressor::create(mhead[1], pdb);
if (!dec) goto fail;
ntextrecords=(unsigned char)mhead[8];
ntextrecords<<=8;
ntextrecords+=(unsigned char)mhead[9];
return;
fail:
valid=false;
}
void findFirstImage() {
firstImageRecord=ntextrecords+1;
......@@ -217,8 +133,13 @@ Document::Document(QIODevice* dev) : d(new DocumentPrivate(dev))
QString Document::text() const
{
QByteArray whole;
for (int i=1;i<d->ntextrecords;i++)
for (int i=1;i<d->ntextrecords+1;i++) {
whole+=d->dec->decompress(d->pdb.getRecord(i));
if (!d->dec->isValid()) {
d->valid=false;
return QString::null;
}
}
return QString::fromUtf8(whole);
}
......
......@@ -30,12 +30,6 @@ private:
PDBPrivate* const d;
};
class Decompressor {
public:
virtual QByteArray decompress(const QByteArray& data) = 0;
virtual ~Decompressor() {}
};
struct DocumentPrivate;
class Document {
public:
......
#include "mobipocket.h"
#include <qfile.h>
#include <kdebug.h>
int main(int argc, char ** argv)
{
QFile f(argv[1]);
f.open(QIODevice::ReadOnly);
Mobipocket::Document* d=new Mobipocket::Document(&f);
kDebug() << d->isValid();
d->text();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment