textpage.cpp 63.7 KB
Newer Older
1
2
3
4
5
6
7
8
/***************************************************************************
 *   Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com>             *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 ***************************************************************************/
9

Albert Astals Cid's avatar
Albert Astals Cid committed
10
#include "textpage.h"
11
#include "textpage_p.h"
Pino Toscano's avatar
Pino Toscano committed
12

13
14
#include <kdebug.h>

15
#include "area.h"
16
#include "debug_p.h"
17
#include "misc.h"
18
#include "page.h"
19
#include "page_p.h"
20

21
22
#include <cstring>

23
#include <QtAlgorithms>
24
#include <QVarLengthArray>
Myreal Name's avatar
Myreal Name committed
25

26
using namespace Okular;
27

28
class SearchPoint
29
{
30
31
    public:
        SearchPoint()
32
            : offset_begin( -1 ), offset_end( -1 )
33
34
35
        {
        }

36
37
        TextList::ConstIterator it_begin;
        TextList::ConstIterator it_end;
38
39
40
41
        int offset_begin;
        int offset_end;
};

42
43
44
45
46
static int qHash(const QRect &r)
{
    return r.left() * r.top() + r.right() * r.bottom();
}

47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/* text comparison functions */

bool CaseInsensitiveCmpFn( const QStringRef & from, const QStringRef & to,
                           int *fromLength, int *toLength )
{
    *fromLength = from.length();
    *toLength = to.length();
    return from.compare( to, Qt::CaseInsensitive ) == 0;
}

bool CaseSensitiveCmpFn( const QStringRef & from, const QStringRef & to,
                           int *fromLength, int *toLength )
{
    *fromLength = from.length();
    *toLength = to.length();
    return from.compare( to, Qt::CaseSensitive ) == 0;
}

65
66
67
68
69
70
71
72

/*
  Rationale behind TinyTextEntity:

  instead of storing directly a QString for the text of an entity,
  we store the UTF-16 data and their length. This way, we save about
  4 int's wrt a QString, and we can create a new string from that
  raw data (that's the only penalty of that).
73
74
75
  Even better, if the string we need to store has at most
  MaxStaticChars characters, then we store those in place of the QChar*
  that would be used (with new[] + free[]) for the data.
76
77
78
 */
class TinyTextEntity
{
79
80
    static const int MaxStaticChars = sizeof( QChar * ) / sizeof( QChar );

81
82
83
84
85
    public:
        TinyTextEntity( const QString &text, const NormalizedRect &rect )
            : area( rect )
        {
            Q_ASSERT_X( !text.isEmpty(), "TinyTextEntity", "empty string" );
86
87
            Q_ASSERT_X( sizeof( d ) == sizeof( QChar * ), "TinyTextEntity",
                        "internal storage is wider than QChar*, fix it!" );
88
            length = text.length();
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
            switch ( length )
            {
#if QT_POINTER_SIZE >= 8
                case 4:
                    d.qc[3] = text.at( 3 ).unicode();
                    // fall through
                case 3:
                    d.qc[2] = text.at( 2 ).unicode();
                    // fall through
#endif
                case 2:
                    d.qc[1] = text.at( 1 ).unicode();
                    // fall through
                case 1:
                    d.qc[0] = text.at( 0 ).unicode();
                    break;
                default:
                    d.data = new QChar[ length ];
                    std::memcpy( d.data, text.constData(), length * sizeof( QChar ) );
            }
109
110
111
112
        }

        ~TinyTextEntity()
        {
113
114
115
116
            if ( length > MaxStaticChars )
            {
                delete [] d.data;
            }
117
118
119
120
        }

        inline QString text() const
        {
121
122
            return length <= MaxStaticChars ? QString::fromRawData( ( const QChar * )&d.qc[0], length )
                                            : QString::fromRawData( d.data, length );
123
124
125
126
127
128
129
130
131
132
133
134
135
136
        }

        inline NormalizedRect transformedArea( const QMatrix &matrix ) const
        {
            NormalizedRect transformed_area = area;
            transformed_area.transform( matrix );
            return transformed_area;
        }

        NormalizedRect area;

    private:
        Q_DISABLE_COPY( TinyTextEntity )

137
138
139
140
141
        union
        {
            QChar *data;
            ushort qc[MaxStaticChars];
        } d;
142
143
144
145
        int length;
};


146
TextEntity::TextEntity( const QString &text, NormalizedRect *area )
147
    : m_text( text ), m_area( area ), d( 0 )
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
{
}

TextEntity::~TextEntity()
{
    delete m_area;
}

QString TextEntity::text() const
{
    return m_text;
}

NormalizedRect* TextEntity::area() const
{
    return m_area;
}

166
NormalizedRect TextEntity::transformedArea(const QMatrix &matrix) const
167
{
168
169
170
    NormalizedRect transformed_area = *m_area;
    transformed_area.transform( matrix );
    return transformed_area;
171
172
}

173

174
175
176
177
178
TextPagePrivate::TextPagePrivate()
    : m_page( 0 )
{
}

179
180
181
182
183
TextPagePrivate::~TextPagePrivate()
{
    qDeleteAll( m_searchPoints );
    qDeleteAll( m_words );
}
184
185


186
TextPage::TextPage()
187
    : d( new TextPagePrivate() )
188
189
190
191
{
}

TextPage::TextPage( const TextEntity::List &words )
192
    : d( new TextPagePrivate() )
193
{
Laurent Montel's avatar
Laurent Montel committed
194
    TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
195
196
197
198
199
200
201
    for ( ; it != itEnd; ++it )
    {
        TextEntity *e = *it;
        if ( !e->text().isEmpty() )
            d->m_words.append( new TinyTextEntity( e->text(), *e->area() ) );
        delete e;
    }
202
203
}

204
TextPage::~TextPage()
205
{
206
207
208
209
210
    delete d;
}

void TextPage::append( const QString &text, NormalizedRect *area )
{
211
    if ( !text.isEmpty() )
212
        d->m_words.append( new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) );
213
    delete area;
214
215
}

216
217
218
219
220
/**
 * We will divide the whole page in some regions depending on the horizontal and
 * vertical spacing among different regions. Each region will have an area and an
 * associated TextList in sorted order.
*/
221
222
class RegionText
{
223
224

public:
225
226
227
    RegionText()
    {
    };
228

229
    RegionText(const TextList &list,const QRect &area)
230
231
232
        : m_region_text(list) ,m_area(area)
    {
    }
233
234
235
236
237
238
239
240
    
    inline QString string() const
    {
        QString res;
        foreach(TinyTextEntity *te, m_region_text)
            res += te->text();
        return res;
    }
241

242
243
    inline TextList text() const
    {
244
245
246
        return m_region_text;
    }

247
248
    inline QRect area() const
    {
249
250
251
        return m_area;
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
252
    inline void setArea(const QRect &area)
253
    {
254
255
256
        m_area = area;
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
257
    inline void setText(const TextList &text)
258
    {
259
260
261
262
263
264
265
266
        m_region_text = text;
    }

private:
    TextList m_region_text;
    QRect m_area;
};

267
RegularAreaRect * TextPage::textArea ( TextSelection * sel) const
268
{
269
270
271
    if ( d->m_words.isEmpty() )
        return new RegularAreaRect();

272
/**
Tobias Koenig's avatar
Tobias Koenig committed
273
274
275
276
277
278
279
280
281
282
283
284
    It works like this:
    There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0)
    rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those 
    points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search
    for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both 
    of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a 
    1. (rx,ry)x(1,ty)
    2. (0,ty)x(1,uy)
    3. (0,uy)x(vx,vy)

    To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor
    or that has a left border >= cx and bottom border >= cy. 
285
*/
Tobias Koenig's avatar
Tobias Koenig committed
286
    RegularAreaRect * ret= new RegularAreaRect;
287

288
    const QMatrix matrix = d->m_page ? d->m_page->rotationMatrix() : QMatrix();
289
#if 0
Tobias Koenig's avatar
Tobias Koenig committed
290
291
292
293
    int it = -1;
    int itB = -1;
    int itE = -1;

Frederik Schwarzer's avatar
typos    
Frederik Schwarzer committed
294
    // ending cursor is higher than start cursor, we need to find positions in reverse
295
296
297
    NormalizedRect tmp;
    NormalizedRect start;
    NormalizedRect end;
Tobias Koenig's avatar
Tobias Koenig committed
298
299
300
301
302
303
304
305
306
307
308

    NormalizedPoint startC = sel->start();
    double startCx = startC.x;
    double startCy = startC.y;

    NormalizedPoint endC = sel->end();
    double endCx = endC.x;
    double endCy = endC.y;

    if ( sel->direction() == 1 || ( sel->itB() == -1 && sel->direction() == 0 ) )
    {
309
#ifdef DEBUG_TEXTPAGE
310
        kWarning() << "running first loop";
311
#endif
Tobias Koenig's avatar
Tobias Koenig committed
312
313
314
        const int count = d->m_words.count();
        for ( it = 0; it < count; it++ )
        {
315
            tmp = *d->m_words[ it ]->area();
316
317
318
            if ( tmp.contains( startCx, startCy )
                 || ( tmp.top <= startCy && tmp.bottom >= startCy && tmp.left >= startCx )
                 || ( tmp.top >= startCy))
Tobias Koenig's avatar
Tobias Koenig committed
319
320
321
            {
                /// we have found the (rx,ry)x(tx,ty)
                itB = it;
322
#ifdef DEBUG_TEXTPAGE
323
                kWarning() << "start is" << itB << "count is" << d->m_words.count();
324
#endif
Tobias Koenig's avatar
Tobias Koenig committed
325
326
                break;
            }
327
        }
Tobias Koenig's avatar
Tobias Koenig committed
328
329
330
        sel->itB( itB );
    }
    itB = sel->itB();
331
#ifdef DEBUG_TEXTPAGE
332
333
    kWarning() << "direction is" << sel->direction();
    kWarning() << "reloaded start is" << itB << "against" << sel->itB();
334
#endif
Tobias Koenig's avatar
Tobias Koenig committed
335
336
    if ( sel->direction() == 0 || ( sel->itE() == -1 && sel->direction() == 1 ) )
    {
337
#ifdef DEBUG_TEXTPAGE
338
        kWarning() << "running second loop";
339
#endif
Tobias Koenig's avatar
Tobias Koenig committed
340
341
        for ( it = d->m_words.count() - 1; it >= itB; it-- )
        {
342
            tmp = *d->m_words[ it ]->area();
343
344
345
            if ( tmp.contains( endCx, endCy )
                 || ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx )
                 || ( tmp.bottom <= endCy ) )
Tobias Koenig's avatar
Tobias Koenig committed
346
347
348
            {
                /// we have found the (ux,uy)x(vx,vy)
                itE = it;
349
#ifdef DEBUG_TEXTPAGE
350
                kWarning() << "ending is" << itE << "count is" << d->m_words.count();
351
352
353
                kWarning() << "conditions" << tmp.contains( endCx, endCy ) << " " 
                  << ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) << " " <<
                  ( tmp.top >= endCy);
354
#endif
Tobias Koenig's avatar
Tobias Koenig committed
355
356
                break;
            }
357
        }
Tobias Koenig's avatar
Tobias Koenig committed
358
359
        sel->itE( itE );
    }
360
#ifdef DEBUG_TEXTPAGE
361
    kWarning() << "reloaded ending is" << itE << "against" << sel->itE();
362
#endif
363

Tobias Koenig's avatar
Tobias Koenig committed
364
365
    if ( sel->itB() != -1 && sel->itE() != -1 )
    {
366
367
        start = *d->m_words[ sel->itB() ]->area();
        end = *d->m_words[ sel->itE() ]->area();
Tobias Koenig's avatar
Tobias Koenig committed
368
369

        NormalizedRect first, second, third;
Frederik Schwarzer's avatar
Frederik Schwarzer committed
370
        /// finding out if there is more than one baseline between them is a hard and discussable task
Tobias Koenig's avatar
Tobias Koenig committed
371
372
        /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the 
        /// areas, if more than one -> we have a three or over line selection
373
374
        first = start;
        second.top = start.bottom;
Tobias Koenig's avatar
Tobias Koenig committed
375
        first.right = second.right = 1;
376
        third = end;
Tobias Koenig's avatar
Tobias Koenig committed
377
        third.left = second.left = 0;
378
        second.bottom = end.top;
Tobias Koenig's avatar
Tobias Koenig committed
379
380
        int selMax = qMax( sel->itB(), sel->itE() );
        for ( it = qMin( sel->itB(), sel->itE() ); it <= selMax; ++it )
381
        {
382
            tmp = *d->m_words[ it ]->area();
383
            if ( tmp.intersects( &first ) || tmp.intersects( &second ) || tmp.intersects( &third ) )
384
                ret->appendShape( d->m_words.at( it )->transformedArea( matrix ) );
385
        }
Tobias Koenig's avatar
Tobias Koenig committed
386
    }
387
#else
388
389
    const double scaleX = d->m_page->m_page->width();
    const double scaleY = d->m_page->m_page->height();
390

391
392
    NormalizedPoint startC = sel->start();
    NormalizedPoint endC = sel->end();
393
    NormalizedPoint temp;
394

395
    // if startPoint is right to endPoint swap them
396
397
    if(startC.x > endC.x)
    {
398
399
400
401
402
        temp = startC;
        startC = endC;
        endC = temp;
    }

403
    // minX,maxX,minY,maxY gives the bounding rectangle coordinates of the document
404
405
406
407
408
409
    const NormalizedRect boundingRect = d->m_page->m_page->boundingBox();
    const QRect content = boundingRect.geometry(scaleX,scaleY);
    const double minX = content.left();
    const double maxX = content.right();
    const double minY = content.top();
    const double maxY = content.bottom();
Myreal Name's avatar
Myreal Name committed
410

411
    /**
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
     * We will now find out the TinyTextEntity for the startRectangle and TinyTextEntity for
     * the endRectangle. We have four cases:
     *
     * Case 1(a): both startpoint and endpoint are out of the bounding Rectangle and at one side, so the rectangle made of start
     * and endPoint are outof the bounding rect (do not intersect)
     *
     * Case 1(b): both startpoint and endpoint are out of bounding rect, but they are in different side, so is their rectangle
     *
     * Case 2(a): find the rectangle which contains start and endpoint and having some
     * TextEntity
     *
     * Case 2(b): if 2(a) fails (if startPoint and endPoint both are unchanged), then we check whether there is any
     * TextEntity within the rect made by startPoint and endPoint
     *
     * Case 3: Now, we may have two type of selection.
     * 1. startpoint is left-top of start_end and endpoint is right-bottom
     * 2. startpoint is left-bottom of start_end and endpoint is top-right
     *
     * Also, as 2(b) is passed, we might have it,itEnd or both unchanged, but the fact is that we have
     * text within them. so, we need to search for the best suitable textposition for start and end.
     *
     * Case 3(a): We search the nearest rectangle consisting of some
     * TinyTextEntity right to or bottom of the startPoint for selection 01.
     * And, for selection 02, we have to search for right and top
     *
     * Case 3(b): For endpoint, we have to find the point top of or left to
     * endpoint if we have selection 01.
     * Otherwise, the search will be left and bottom
440
     */
441

442
    // we know that startC.x > endC.x, we need to decide which is top and which is bottom
443
444
    const NormalizedRect start_end = (startC.y < endC.y) ? NormalizedRect(startC.x, startC.y, endC.x, endC.y)
                                                         : NormalizedRect(startC.x, endC.y, endC.x, startC.y);
445

446
    // Case 1(a)
447
    if(!boundingRect.intersects(start_end)) return ret;
448

449
    // case 1(b)
450
451
452
453
454
    /**
        note that, after swapping of start and end, we know that,
        start is always left to end. but, we cannot say start is
        positioned upper than end.
    **/
455
456
    else
    {
457
        // if start is left to content rect take it to content rect boundary
458
459
        if(startC.x * scaleX < minX) startC.x = minX/scaleX;
        if(endC.x * scaleX > maxX) endC.x = maxX/scaleX;
460

461
        // if start is top to end (selection type 01)
462
463
        if(startC.y * scaleY < minY) startC.y = minY/scaleY;
        if(endC.y * scaleY > maxY) endC.y = maxY/scaleY;
464

465
466
467
        // if start is bottom to end (selection type 02)
        if(startC.y * scaleY > maxY) startC.y = maxY/scaleY;
        if(endC.y * scaleY < minY) endC.y = minY/scaleY;
468
    }
469

Laurent Montel's avatar
Laurent Montel committed
470
    TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
471
    TextList::ConstIterator start = it, end = itEnd, tmpIt = it; //, tmpItEnd = itEnd;
472
    const MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight;
Myreal Name's avatar
Myreal Name committed
473

474
    NormalizedRect tmp;
475
    //case 2(a)
476
477
    for ( ; it != itEnd; ++it )
    {
478
        tmp = (*it)->area;
479
480
481
482
483
484
        if(tmp.contains(startC.x,startC.y)){
            start = it;
        }
        if(tmp.contains(endC.x,endC.y)){
            end = it;
        }
485
    }
486

487
    //case 2(b)
488
    it = tmpIt;
489
490
    if(start == it && end == itEnd)
    {
491
        for ( ; it != itEnd; ++it )
492
        {
493
            // is there any text reactangle within the start_end rect
494
            tmp = (*it)->area;
495
496
            if(start_end.intersects(tmp))
                break;
497
        }
498

499
500
        // we have searched every text entities, but none is within the rectangle created by start and end
        // so, no selection should be done
501
502
        if(it == itEnd)
        {
503
            return ret;
504
505
        }
    }
506
507
    it = tmpIt;
    bool selection_two_start = false;
508

509
    //case 3.a
510
511
    if(start == it)
    {
512
513
        bool flagV = false;
        NormalizedRect rect;
514

515
        // selection type 01
516
517
518
519
        if(startC.y <= endC.y)
        {
            for ( ; it != itEnd; ++it )
            {
520
521
522
                rect= (*it)->area;
                rect.isBottom(startC) ? flagV = false: flagV = true;

523
524
                if(flagV && rect.isRight(startC))
                {
525
526
527
528
529
530
                    start = it;
                    break;
                }
            }
        }

531
        //selection type 02
532
533
        else
        {
534
            selection_two_start = true;
535
            int distance = scaleX + scaleY + 100;
536
537
            int count = 0;

538
539
            for ( ; it != itEnd; ++it )
            {
540
                rect= (*it)->area;
541

542
543
                if(rect.isBottomOrLevel(startC) && rect.isRight(startC))
                {
544
                    count++;
545
546
547
548
                    QRect entRect = rect.geometry(scaleX,scaleY);
                    int xdist, ydist;
                    xdist = entRect.center().x() - startC.x * scaleX;
                    ydist = entRect.center().y() - startC.y * scaleY;
549

550
551
552
                    //make them positive
                    if(xdist < 0) xdist = -xdist;
                    if(ydist < 0) ydist = -ydist;
553

554
555
                    if( (xdist + ydist) < distance)
                    {
556
557
558
559
560
561
562
                        distance = xdist+ ydist;
                        start = it;
                    }
                }
            }
        }
    }
563

564
    //case 3.b
565
566
    if(end == itEnd)
    {
567
        it = tmpIt;
568
        itEnd = itEnd-1;
569

570
571
        bool flagV = false;
        NormalizedRect rect;
572

573
574
575
576
        if(startC.y <= endC.y)
        {
            for ( ; itEnd >= it; itEnd-- )
            {
577
578
                rect= (*itEnd)->area;
                rect.isTop(endC) ? flagV = false: flagV = true;
579

580
581
                if(flagV && rect.isLeft(endC))
                {
582
583
                    end = itEnd;
                    break;
584
                }
585
586
            }
        }
587

588
589
        else
        {
590
            int distance = scaleX + scaleY + 100;
591
592
            for ( ; itEnd >= it; itEnd-- )
            {
593
                rect= (*itEnd)->area;
594

595
596
                if(rect.isTopOrLevel(endC) && rect.isLeft(endC))
                {
597
598
599
600
                    QRect entRect = rect.geometry(scaleX,scaleY);
                    int xdist, ydist;
                    xdist = entRect.center().x() - endC.x * scaleX;
                    ydist = entRect.center().y() - endC.y * scaleY;
601

602
603
604
605
                    //make them positive
                    if(xdist < 0) xdist = -xdist;
                    if(ydist < 0) ydist = -ydist;

606
607
                    if( (xdist + ydist) < distance)
                    {
608
609
610
611
612
                        distance = xdist+ ydist;
                        end = itEnd;
                    }

                }
613
614
            }
        }
615
616
    }

617
618
619
620
    /* if start and end in selection 02 are in the same column, and we
     start at an empty space we have to remove the selection of last
     character
    */
621
622
623
624
    if(selection_two_start)
    {
        if(start > end)
        {
625
626
627
628
            start = start - 1;
        }
    }

629
    // if start is less than end swap them
630
631
    if(start > end)
    {
Myreal Name's avatar
Myreal Name committed
632
633
634
635
636
        it = start;
        start = end;
        end = it;
    }

637
    // removes the possibility of crash, in case none of 1 to 3 is true
638
    if(end == d->m_words.constEnd()) end--;
639

640
641
    for( ;start <= end ; start++)
    {
Myreal Name's avatar
Myreal Name committed
642
        ret->appendShape( (*start)->transformedArea( matrix ), side );
643
     }
Myreal Name's avatar
Myreal Name committed
644

645
#endif
646

Tobias Koenig's avatar
Tobias Koenig committed
647
    return ret;
648
}
649
650


651
RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct,
652
                                     Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area )
653
{
654
    SearchDirection dir=direct;
655
    // invalid search request
656
    if ( d->m_words.isEmpty() || query.isEmpty() || ( area && area->isNull() ) )
657
        return 0;
658
659
    TextList::ConstIterator start;
    TextList::ConstIterator end;
660
    const QMap< int, SearchPoint* >::const_iterator sIt = d->m_searchPoints.constFind( searchID );
Laurent Montel's avatar
Laurent Montel committed
661
    if ( sIt == d->m_searchPoints.constEnd() )
662
    {
663
664
        // if no previous run of this search is found, then set it to start
        // from the beginning (respecting the search direction)
665
        if ( dir == NextResult )
666
            dir = FromTop;
667
        else if ( dir == PreviousResult )
668
            dir = FromBottom;
669
    }
670
671
    bool forward = true;
    switch ( dir )
672
    {
673
        case FromTop:
Laurent Montel's avatar
Laurent Montel committed
674
675
            start = d->m_words.constBegin();
            end = d->m_words.constEnd();
676
677
            break;
        case FromBottom:
Laurent Montel's avatar
Laurent Montel committed
678
679
            start = d->m_words.constEnd();
            end = d->m_words.constBegin();
680
681
682
683
            Q_ASSERT( start != end );
            // we can safely go one step back, as we already checked
            // that the list is not empty
            --start;
684
            forward = false;
685
            break;
686
        case NextResult:
687
            start = (*sIt)->it_end;
Laurent Montel's avatar
Laurent Montel committed
688
            end = d->m_words.constEnd();
689
690
            if ( ( start + 1 ) != end )
                ++start;
691
            break;
692
        case PreviousResult:
693
            start = (*sIt)->it_begin;
Laurent Montel's avatar
Laurent Montel committed
694
            end = d->m_words.constBegin();
695
696
            if ( start != end )
                --start;
697
            forward = false;
698
            break;
699
700
    };
    RegularAreaRect* ret = 0;
701
702
    const TextComparisonFunction cmpFn = caseSensitivity == Qt::CaseSensitive
                                       ? CaseSensitiveCmpFn : CaseInsensitiveCmpFn;
703
704
    if ( forward )
    {
705
        ret = d->findTextInternalForward( searchID, query, caseSensitivity, cmpFn, start, end );
706
    }
707
708
    else
    {
709
        ret = d->findTextInternalBackward( searchID, query, caseSensitivity, cmpFn, start, end );
710
711
    }
    return ret;
712
713
714
}


715
RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const QString &_query,
716
                                                             Qt::CaseSensitivity caseSensitivity,
717
                                                             TextComparisonFunction comparer,
718
719
                                                             const TextList::ConstIterator &start,
                                                             const TextList::ConstIterator &end )
720
{
721
    const QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
722

723
    RegularAreaRect* ret=new RegularAreaRect;
724
725
726
727
728

    // normalize query search all unicode (including glyphs)
    const QString query = (caseSensitivity == Qt::CaseSensitive)
                            ? _query.normalized(QString::NormalizationForm_KC) 
                            : _query.toLower().normalized(QString::NormalizationForm_KC);
729

730
    // j is the current position in our query
731
    // len is the length of the string in TextEntity
732
    // queryLeft is the length of the query we have left
733
    const TinyTextEntity* curEntity = 0;
734
    int j=0, len=0, queryLeft=query.length();
735
    int offset = 0;
736
    bool haveMatch=false;
737
    bool offsetMoved = false;
738
739
    TextList::ConstIterator it = start;
    TextList::ConstIterator it_begin;
740
    for ( ; it != end; ++it )
741
    {
742
        curEntity = *it;
743
        const QString &str = curEntity->text();
744
	kDebug() << str;
745
746
747
748
749
750
751
752
        if ( !offsetMoved && ( it == start ) )
        {
            if ( m_searchPoints.contains( searchID ) )
            {
                offset = qMax( m_searchPoints[ searchID ]->offset_end, 0 );
            }
            offsetMoved = true;
        }
753
754
        {
            len=str.length();
755
            int min=qMin(queryLeft,len);
756
#ifdef DEBUG_TEXTPAGE
757
            kDebug(OkularDebug) << str.mid(offset,min) << ":" << _query.mid(j,min);
758
#endif
Frederik Schwarzer's avatar
typos    
Frederik Schwarzer committed
759
            // we have equal (or less than) area of the query left as the length of the current 
760
            // entity
761

762
763
764
            int resStrLen = 0, resQueryLen = 0;
            if ( !comparer( str.midRef( offset, min ), query.midRef( j, min ),
                            &resStrLen, &resQueryLen ) )
765
766
            {
                    // we not have matched
Pino Toscano's avatar
Pino Toscano committed
767
                    // this means we do not have a complete match
768
769
770
                    // we need to get back to query start
                    // and continue the search from this place
                    haveMatch=false;
771
                    ret->clear();
772
#ifdef DEBUG_TEXTPAGE
773
            kDebug(OkularDebug) << "\tnot matched";
774
#endif
775
                    j=0;
776
                    offset = 0;
777
                    queryLeft=query.length();
778
                    it_begin = TextList::ConstIterator();
779
780
781
782
783
784
785
            }
            else
            {
                    // we have a match
                    // move the current position in the query
                    // to the position after the length of this string
                    // we matched
786
                    // subtract the length of the current entity from 
787
                    // the left length of the query
788
#ifdef DEBUG_TEXTPAGE
789
            kDebug(OkularDebug) << "\tmatched";
790
#endif
791
                    haveMatch=true;
792
                    ret->append( curEntity->transformedArea( matrix ) );
793
794
                    j += resStrLen;
                    queryLeft -= resQueryLen;
795
                    if ( it_begin == TextList::ConstIterator() )
796
797
798
                    {
                        it_begin = it;
                    }
799
800
            }
        }
801

802
        if (haveMatch && queryLeft==0 && j==query.length())
803
        {
804
            // save or update the search point for the current searchID
805
806
            QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
            if ( sIt == m_searchPoints.end() )
807
            {
808
                sIt = m_searchPoints.insert( searchID, new SearchPoint );
809
            }
810
            SearchPoint* sp = *sIt;
811
            sp->it_begin = it_begin;
812
            sp->it_end = it - 1;
813
814
            sp->offset_begin = j;
            sp->offset_end = j + qMin( queryLeft, len );
815
            ret->simplify();
816
817
            return ret;
        }
818
    }
819
    // end of loop - it means that we've ended the textentities
820
821
    const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
    if ( sIt != m_searchPoints.end() )
822
    {
823
824
        SearchPoint* sp = *sIt;
        m_searchPoints.erase( sIt );
825
826
827
        delete sp;
    }
    delete ret;
828
829
830
    return 0;
}

831
832
RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query,
                                                            Qt::CaseSensitivity caseSensitivity,
833
                                                            TextComparisonFunction comparer,
834
835
                                                            const TextList::ConstIterator &start,
                                                            const TextList::ConstIterator &end )
836
{
837
    const QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
838
839

    RegularAreaRect* ret=new RegularAreaRect;
840
841
842
843
844

    // normalize query to search all unicode (including glyphs)
    const QString query = (caseSensitivity == Qt::CaseSensitive) 
                            ? _query.normalized(QString::NormalizationForm_KC) 
                            : _query.toLower().normalized(QString::NormalizationForm_KC);
845
846
847
848

    // j is the current position in our query
    // len is the length of the string in TextEntity
    // queryLeft is the length of the query we have left
849
    const TinyTextEntity* curEntity = 0;
850
851
852
    int j=query.length() - 1, len=0, queryLeft=query.length();
    bool haveMatch=false;
    bool offsetMoved = false;
853
854
    TextList::ConstIterator it = start;
    TextList::ConstIterator it_begin;
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
    while ( true )
    {
        curEntity = *it;
        const QString &str = curEntity->text();
        if ( !offsetMoved && ( it == start ) )
        {
            offsetMoved = true;
        }
        if ( query.at(j).isSpace() )
        {
            // lets match newline as a space
#ifdef DEBUG_TEXTPAGE
            kDebug(OkularDebug) << "newline or space";
#endif
            j--;
            queryLeft--;
        }
        else
        {
            len=str.length();
            int min=qMin(queryLeft,len);
#ifdef DEBUG_TEXTPAGE
            kDebug(OkularDebug) << str.right(min) << " : " << _query.mid(j-min+1,min);
#endif
Frederik Schwarzer's avatar
typos    
Frederik Schwarzer committed
879
            // we have equal (or less than) area of the query left as the length of the current 
880
881
            // entity

882
883
884
            int resStrLen = 0, resQueryLen = 0;
            if ( !comparer( str.rightRef( min ), query.midRef( j - min + 1, min ),
                            &resStrLen, &resQueryLen ) )
885
886
887
888
889
890
891
892
893
894
895
896
            {
                    // we not have matched
                    // this means we do not have a complete match
                    // we need to get back to query start
                    // and continue the search from this place
                    haveMatch=false;
                    ret->clear();
#ifdef DEBUG_TEXTPAGE
                    kDebug(OkularDebug) << "\tnot matched";
#endif
                    j=query.length() - 1;
                    queryLeft=query.length();
897
                    it_begin = TextList::ConstIterator();
898
899
900
901
902
903
904
            }
            else
            {
                    // we have a match
                    // move the current position in the query
                    // to the position after the length of this string
                    // we matched
905
                    // subtract the length of the current entity from 
906
907
908
909
910
911
                    // the left length of the query
#ifdef DEBUG_TEXTPAGE
                    kDebug(OkularDebug) << "\tmatched";
#endif
                    haveMatch=true;
                    ret->append( curEntity->transformedArea( matrix ) );
912
913
                    j -= resStrLen;
                    queryLeft -= resQueryLen;
914
                    if ( it_begin == TextList::ConstIterator() )
915
916
917
                    {
                        it_begin = it;
                    }
918
919
920
921
922
923
924
925
926
927
928
929
            }
        }

        if (haveMatch && queryLeft==0 && j<0)
        {
            // save or update the search point for the current searchID
            QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
            if ( sIt == m_searchPoints.end() )
            {
                sIt = m_searchPoints.insert( searchID, new SearchPoint );
            }
            SearchPoint* sp = *sIt;
930
931
            sp->it_begin = it;
            sp->it_end = it_begin;
932
933
934
935
936
937
938
939
940
941
942
            sp->offset_begin = j;
            sp->offset_end = j + qMin( queryLeft, len );
            ret->simplify();
            return ret;
        }
        if ( it == end )
            break;
        else
            --it;
    }
    // end of loop - it means that we've ended the textentities
943
    const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
944
945
946
947
948
949
950
951
952
953
    if ( sIt != m_searchPoints.end() )
    {
        SearchPoint* sp = *sIt;
        m_searchPoints.erase( sIt );
        delete sp;
    }
    delete ret;
    return 0;
}

954
QString TextPage::text(const RegularAreaRect *area) const
955
956
957
958
959
{
    return text(area, AnyPixelTextAreaInclusionBehaviour);
}

QString TextPage::text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const
960
{
961
    if ( area && area->isNull() )
962
        return QString();
963

Laurent Montel's avatar
Laurent Montel committed
964
    TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
965
966
    QString ret;
    if ( area )
Pino Toscano's avatar
Pino Toscano committed
967
    {
968
        for ( ; it != itEnd; ++it )
969
        {
970
971
972
973
974
975
976
977
            if (b == AnyPixelTextAreaInclusionBehaviour)
            {
                if ( area->intersects( (*it)->area ) )
                {
                    ret += (*it)->text();
                }
            }
            else
978
            {
979
980
981
982
983
                Norma