mobipocket.cpp 8.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
/***************************************************************************
 *   Copyright (C) 2008 by Jakub Stachowski <qbast@go2.pl>                 *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 ***************************************************************************/

10
11
12
#include "mobipocket.h"
#include "decompressor.h"

Laurent Montel's avatar
Laurent Montel committed
13
14
15
16
#include <QIODevice>
#include <QtEndian>
#include <QBuffer>
#include <QTextCodec>
17
#include <QtGui/QImageReader>
18
19
20

namespace Mobipocket {

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
QByteArray Stream::read(int len)
{
    QByteArray ret;
    ret.resize(len);
    len=read(ret.data(),len);
    if (len<0) len=0;
    ret.resize(len);
    return ret;
}

QByteArray Stream::readAll() 
{
    QByteArray ret, bit;
    while (!(bit=read(4096)).isEmpty()) ret+=bit;
    return ret;
}



40
41
struct PDBPrivate {
    QList<quint32> recordOffsets;
42
    Stream* device;
43
44
45
46
47
48
49
50
51
52
53
54
    QString fileType;
    quint16 nrecords;
    bool valid;
    
    void init();
};

void PDBPrivate::init() 
{
        valid=true;
        quint16 word;
        quint32 dword;
Jakub Stachowski's avatar
Jakub Stachowski committed
55
        if (!device->seek(0x3c)) goto fail;
56
57
        fileType=QString::fromLatin1(device->read(8));
        
Jakub Stachowski's avatar
Jakub Stachowski committed
58
        if (!device->seek(0x4c)) goto fail;
59
60
61
62
63
64
65
66
        device->read((char*)&word,2);
        nrecords=qFromBigEndian(word);
        
        for (int i=0;i<nrecords;i++) {
            device->read((char*)&dword,4);
            recordOffsets.append(qFromBigEndian(dword)); 
            device->read((char*)&dword,4);
        }
Jakub Stachowski's avatar
Jakub Stachowski committed
67
68
69
        return;
    fail:
        valid=false;
70
71
}

72
PDB::PDB(Stream* dev) : d(new PDBPrivate)
73
74
75
76
77
{
    d->device=dev;
    d->init();
}

Laurent Montel's avatar
Laurent Montel committed
78
79
80
81
82
PDB::~PDB()
{
    delete d;
}

83
84
85
86
87
QByteArray PDB::getRecord(int i) const
{
    if (i>=d->nrecords) return QByteArray();
    quint32 offset=d->recordOffsets[i];
    bool last=(i==(d->nrecords-1));
Jakub Stachowski's avatar
Jakub Stachowski committed
88
    if (!d->device->seek(offset)) return QByteArray();
89
90
    if (last) return d->device->readAll();
    return d->device->read(d->recordOffsets[i+1]-offset);
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
}

bool PDB::isValid() const
{
    return d->valid;
}

int PDB::recordCount() const
{
    return d->nrecords;
}

////////////////////////////////////////////
struct DocumentPrivate 
{
106
    DocumentPrivate(Stream* d) : pdb(d), valid(true), firstImageRecord(0), 
Jakub Stachowski's avatar
Jakub Stachowski committed
107
        drm(false), thumbnailIndex(0) {}
108
109
110
    PDB pdb;
    Decompressor* dec;
    quint16 ntextrecords;
111
    quint16 maxRecordSize;
112
    bool valid;
Jakub Stachowski's avatar
Jakub Stachowski committed
113
114
    
    // number of first record holding image. Usually it is directly after end of text, but not always
115
    quint16 firstImageRecord;
116
    QMap<Document::MetaKey, QString> metadata;
117
    QTextCodec* codec;
118
    bool drm;
119
    
Jakub Stachowski's avatar
Jakub Stachowski committed
120
121
122
123
    // index of thumbnail in image list. May be specified in EXTH. 
    // If not then just use first image and hope for the best
    int thumbnailIndex;
    
124
125
126
    void init();
    void findFirstImage();
    void parseEXTH(const QByteArray& data);
127
    void parseHtmlHead(const QString& data);
128
    QString readEXTHRecord(const QByteArray& data, quint32& offset);
Jakub Stachowski's avatar
Jakub Stachowski committed
129
    QImage getImageFromRecord(int recnum);
130
131
}; 

Jakub Stachowski's avatar
Jakub Stachowski committed
132

133
134
void DocumentPrivate::parseHtmlHead(const QString& data)
{
135
136
137
138
139
    static QRegExp title("<dc:title.*>(.*)</dc:title>", Qt::CaseInsensitive);
    static QRegExp author("<dc:creator.*>(.*)</dc:creator>", Qt::CaseInsensitive);
    static QRegExp copyright("<dc:rights.*>(.*)</dc:rights>", Qt::CaseInsensitive);
    static QRegExp subject("<dc:subject.*>(.*)</dc:subject>", Qt::CaseInsensitive);
    static QRegExp description("<dc:description.*>(.*)</dc:description>", Qt::CaseInsensitive);
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
    title.setMinimal(true);
    author.setMinimal(true);
    copyright.setMinimal(true);
    subject.setMinimal(true);
    description.setMinimal(true);
    
    // title could have been already taken from MOBI record
    if (!metadata.contains(Document::Title) && title.indexIn(data)!=-1) metadata[Document::Title]=title.capturedTexts()[1];
    if (author.indexIn(data)!=-1) metadata[Document::Author]=author.capturedTexts()[1];
    if (copyright.indexIn(data)!=-1) metadata[Document::Copyright]=copyright.capturedTexts()[1];
    if (subject.indexIn(data)!=-1) metadata[Document::Subject]=subject.capturedTexts()[1];
    if (description.indexIn(data)!=-1) metadata[Document::Description]=description.capturedTexts()[1];
    
}

155
156
void DocumentPrivate::init()
{
157
    quint32 encoding=0;
Jakub Stachowski's avatar
Jakub Stachowski committed
158

159
160
161
    valid=pdb.isValid();
    if (!valid) return;
    QByteArray mhead=pdb.getRecord(0);
162
    if (mhead.isNull() || mhead.size() <14 ) goto fail;
163
    dec = Decompressor::create(mhead[1], pdb);
164
    if ((int)mhead[12]!=0 || (int)mhead[13]!=0) drm=true;
Jakub Stachowski's avatar
Jakub Stachowski committed
165
166
    if (!dec) goto fail;

167
168
169
    ntextrecords=(unsigned char)mhead[8];
    ntextrecords<<=8;
    ntextrecords+=(unsigned char)mhead[9];
170
171
172
    maxRecordSize=(unsigned char)mhead[10];
    maxRecordSize<<=8;
    maxRecordSize+=(unsigned char)mhead[11];
173
174
175
    if (mhead.size() > 31 ) encoding=readBELong(mhead, 28);
    if (encoding==65001) codec=QTextCodec::codecForName("UTF-8");
    else codec=QTextCodec::codecForName("CP1252");
176
    if (mhead.size()>176) parseEXTH(mhead);
177
178
    
    // try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records
179
    if (metadata.size()<2 && !drm) parseHtmlHead(codec->toUnicode(dec->decompress(pdb.getRecord(1))));
Jakub Stachowski's avatar
Jakub Stachowski committed
180
181
182
    return;
fail:
    valid=false;
183
}
184

185
186
187
188
189
190
191
192
193
194
void DocumentPrivate::findFirstImage() {
    firstImageRecord=ntextrecords+1;
    while (firstImageRecord<pdb.recordCount()) {
        QByteArray rec=pdb.getRecord(firstImageRecord);
        if (rec.isNull()) return;
        QBuffer buf(&rec);
        buf.open(QIODevice::ReadOnly);
        QImageReader r(&buf);
        if (r.canRead()) return;
        firstImageRecord++;
195
    }
196
197
198
199
200
201
202
}

QString DocumentPrivate::readEXTHRecord(const QByteArray& data, quint32& offset)
{
    quint32 len=readBELong(data,offset);
    offset+=4;
    len-=8;
203
    QString ret=codec->toUnicode(data.mid(offset,len));
204
205
206
207
    offset+=len;
    return ret;
}

Jakub Stachowski's avatar
Jakub Stachowski committed
208
209
210
QImage DocumentPrivate::getImageFromRecord(int i) 
{
    QByteArray rec=pdb.getRecord(i);
Jakub Stachowski's avatar
Jakub Stachowski committed
211
    return (rec.isNull()) ? QImage() : QImage::fromData(rec);
Jakub Stachowski's avatar
Jakub Stachowski committed
212
213
214
}


215
216
void DocumentPrivate::parseEXTH(const QByteArray& data) 
{
217
218
    // try to get name 
    if (data.size()>=92) {
Jakub Stachowski's avatar
Jakub Stachowski committed
219
220
        qint32 nameoffset=readBELong(data,84);
        qint32 namelen=readBELong(data,88);
221
        if ( (nameoffset + namelen) < data.size() ) {
222
            metadata[Document::Title]=codec->toUnicode(data.mid(nameoffset, namelen));
223
224
225
        }
    }

226
227
228
229
230
231
    quint32 exthoffs=readBELong(data,20)+16;

    if (data.mid(exthoffs,4)!="EXTH") return;
    quint32 records=readBELong(data,exthoffs+8);
    quint32 offset=exthoffs+12;
    for (unsigned int i=0;i<records;i++) {
232
        if (offset+4 > quint32(data.size())) break;
233
234
235
236
237
238
239
        quint32 type=readBELong(data,offset);
        offset+=4;
        switch (type) {
            case 100: metadata[Document::Author]=readEXTHRecord(data,offset); break;
            case 103: metadata[Document::Description]=readEXTHRecord(data,offset); break;
            case 105: metadata[Document::Subject]=readEXTHRecord(data,offset); break;
            case 109: metadata[Document::Copyright]=readEXTHRecord(data,offset); break;
240
            case 202: offset += 4; thumbnailIndex = readBELong(data,offset); offset+=4; break;
241
            default: readEXTHRecord(data,offset);
242
243
        }
    }
244
245
246
            
    
}
247

248
Document::Document(Stream* dev) : d(new DocumentPrivate(dev))
249
250
251
252
{
    d->init();
}

Laurent Montel's avatar
Laurent Montel committed
253
254
255
256
257
258
Document::~Document()
{
    delete d;
}


259
QString Document::text(int size) const 
260
261
{
    QByteArray whole;
262
    for (int i=1;i<d->ntextrecords+1;i++) { 
263
264
265
266
        QByteArray decompressedRecord = d->dec->decompress(d->pdb.getRecord(i));
        if (decompressedRecord.size() > d->maxRecordSize)
            decompressedRecord.resize(d->maxRecordSize);
        whole+=decompressedRecord;
267
268
        if (!d->dec->isValid()) {
            d->valid=false;
Jakub Stachowski's avatar
Jakub Stachowski committed
269
            return QString();
270
        }
271
        if (size!=-1 && whole.size()>size) break;
272
    }
273
    return d->codec->toUnicode(whole);
274
275
276
277
278
279
280
281
282
283
}

int Document::imageCount() const 
{
    //FIXME: don't count FLIS and FCIS records
    return d->pdb.recordCount()-d->ntextrecords;
}

bool Document::isValid() const
{
284
    return d->valid;
285
286
287
288
}

QImage Document::getImage(int i) const 
{
289
    if (!d->firstImageRecord) d->findFirstImage();
Jakub Stachowski's avatar
Jakub Stachowski committed
290
    return d->getImageFromRecord(d->firstImageRecord+i);
291
}
292
293
294
295
296
297

QMap<Document::MetaKey,QString> Document::metadata() const
{
    return d->metadata;
}

298
299
300
301
302
bool Document::hasDRM() const
{
    return d->drm;
}

Jakub Stachowski's avatar
Jakub Stachowski committed
303
304
305
QImage Document::thumbnail() const 
{
    if (!d->firstImageRecord) d->findFirstImage();
306
307
308
309
310
311
312
    QImage img=d->getImageFromRecord(d->thumbnailIndex+d->firstImageRecord);
    // does not work, try first image
    if (img.isNull() && d->thumbnailIndex) {
        d->thumbnailIndex=0;
        img=d->getImageFromRecord(d->firstImageRecord);
    }
    return img;
Jakub Stachowski's avatar
Jakub Stachowski committed
313
314
}

315
}