textpage.cpp 63 KB
Newer Older
1
2
3
4
5
6
7
8
/***************************************************************************
 *   Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com>             *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 ***************************************************************************/
9

Albert Astals Cid's avatar
Albert Astals Cid committed
10
#include "textpage.h"
11
#include "textpage_p.h"
Pino Toscano's avatar
Pino Toscano committed
12

13
14
#include <kdebug.h>

15
#include "area.h"
16
#include "debug_p.h"
17
#include "misc.h"
18
#include "page.h"
19
#include "page_p.h"
20

21
22
#include <cstring>

23
#include <QtAlgorithms>
24
#include <QVarLengthArray>
Myreal Name's avatar
Myreal Name committed
25

26
using namespace Okular;
27

28
class SearchPoint
29
{
30
31
    public:
        SearchPoint()
32
            : offset_begin( -1 ), offset_end( -1 )
33
34
35
        {
        }

36
37
        TextList::ConstIterator it_begin;
        TextList::ConstIterator it_end;
38
39
40
41
        int offset_begin;
        int offset_end;
};

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/* text comparison functions */

bool CaseInsensitiveCmpFn( const QStringRef & from, const QStringRef & to,
                           int *fromLength, int *toLength )
{
    *fromLength = from.length();
    *toLength = to.length();
    return from.compare( to, Qt::CaseInsensitive ) == 0;
}

bool CaseSensitiveCmpFn( const QStringRef & from, const QStringRef & to,
                           int *fromLength, int *toLength )
{
    *fromLength = from.length();
    *toLength = to.length();
    return from.compare( to, Qt::CaseSensitive ) == 0;
}

60
61
62
63
64
65
66
67

/*
  Rationale behind TinyTextEntity:

  instead of storing directly a QString for the text of an entity,
  we store the UTF-16 data and their length. This way, we save about
  4 int's wrt a QString, and we can create a new string from that
  raw data (that's the only penalty of that).
68
69
70
  Even better, if the string we need to store has at most
  MaxStaticChars characters, then we store those in place of the QChar*
  that would be used (with new[] + free[]) for the data.
71
72
73
 */
class TinyTextEntity
{
74
75
    static const int MaxStaticChars = sizeof( QChar * ) / sizeof( QChar );

76
77
78
79
80
    public:
        TinyTextEntity( const QString &text, const NormalizedRect &rect )
            : area( rect )
        {
            Q_ASSERT_X( !text.isEmpty(), "TinyTextEntity", "empty string" );
81
82
            Q_ASSERT_X( sizeof( d ) == sizeof( QChar * ), "TinyTextEntity",
                        "internal storage is wider than QChar*, fix it!" );
83
            length = text.length();
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
            switch ( length )
            {
#if QT_POINTER_SIZE >= 8
                case 4:
                    d.qc[3] = text.at( 3 ).unicode();
                    // fall through
                case 3:
                    d.qc[2] = text.at( 2 ).unicode();
                    // fall through
#endif
                case 2:
                    d.qc[1] = text.at( 1 ).unicode();
                    // fall through
                case 1:
                    d.qc[0] = text.at( 0 ).unicode();
                    break;
                default:
                    d.data = new QChar[ length ];
                    std::memcpy( d.data, text.constData(), length * sizeof( QChar ) );
            }
104
105
106
107
        }

        ~TinyTextEntity()
        {
108
109
110
111
            if ( length > MaxStaticChars )
            {
                delete [] d.data;
            }
112
113
114
115
        }

        inline QString text() const
        {
116
117
            return length <= MaxStaticChars ? QString::fromRawData( ( const QChar * )&d.qc[0], length )
                                            : QString::fromRawData( d.data, length );
118
119
120
121
122
123
124
125
126
127
128
129
130
131
        }

        inline NormalizedRect transformedArea( const QMatrix &matrix ) const
        {
            NormalizedRect transformed_area = area;
            transformed_area.transform( matrix );
            return transformed_area;
        }

        NormalizedRect area;

    private:
        Q_DISABLE_COPY( TinyTextEntity )

132
133
134
135
136
        union
        {
            QChar *data;
            ushort qc[MaxStaticChars];
        } d;
137
138
139
140
        int length;
};


141
TextEntity::TextEntity( const QString &text, NormalizedRect *area )
142
    : m_text( text ), m_area( area ), d( 0 )
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
{
}

TextEntity::~TextEntity()
{
    delete m_area;
}

QString TextEntity::text() const
{
    return m_text;
}

NormalizedRect* TextEntity::area() const
{
    return m_area;
}

161
NormalizedRect TextEntity::transformedArea(const QMatrix &matrix) const
162
{
163
164
165
    NormalizedRect transformed_area = *m_area;
    transformed_area.transform( matrix );
    return transformed_area;
166
167
}

168

169
170
171
172
173
TextPagePrivate::TextPagePrivate()
    : m_page( 0 )
{
}

174
175
176
177
178
TextPagePrivate::~TextPagePrivate()
{
    qDeleteAll( m_searchPoints );
    qDeleteAll( m_words );
}
179
180


181
TextPage::TextPage()
182
    : d( new TextPagePrivate() )
183
184
185
186
{
}

TextPage::TextPage( const TextEntity::List &words )
187
    : d( new TextPagePrivate() )
188
{
Laurent Montel's avatar
Laurent Montel committed
189
    TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
190
191
192
193
194
195
196
    for ( ; it != itEnd; ++it )
    {
        TextEntity *e = *it;
        if ( !e->text().isEmpty() )
            d->m_words.append( new TinyTextEntity( e->text(), *e->area() ) );
        delete e;
    }
197
198
}

199
TextPage::~TextPage()
200
{
201
202
203
204
205
    delete d;
}

void TextPage::append( const QString &text, NormalizedRect *area )
{
206
    if ( !text.isEmpty() )
207
        d->m_words.append( new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) );
208
    delete area;
209
210
}

211
212
213
214
215
/**
 * We will divide the whole page in some regions depending on the horizontal and
 * vertical spacing among different regions. Each region will have an area and an
 * associated TextList in sorted order.
*/
216
217
class RegionText
{
218
219

public:
220
221
222
    RegionText()
    {
    };
223

224
    RegionText(const TextList &list,const QRect &area)
225
226
227
228
        : m_region_text(list) ,m_area(area)
    {
    }

229
    // We assume text will be set only once at the time of object creation
230
231
    inline TextList text() const
    {
232
233
234
        return m_region_text;
    }

235
236
    inline QRect area() const
    {
237
238
239
        return m_area;
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
240
    inline void setArea(const QRect &area)
241
    {
242
243
244
        m_area = area;
    }

Albert Astals Cid's avatar
Albert Astals Cid committed
245
    inline void setText(const TextList &text)
246
    {
247
248
249
250
251
252
253
254
        m_region_text = text;
    }

private:
    TextList m_region_text;
    QRect m_area;
};

255
RegularAreaRect * TextPage::textArea ( TextSelection * sel) const
256
{
257
258
259
    if ( d->m_words.isEmpty() )
        return new RegularAreaRect();

260
/**
Tobias Koenig's avatar
Tobias Koenig committed
261
262
263
264
265
266
267
268
269
270
271
272
    It works like this:
    There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0)
    rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those 
    points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search
    for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both 
    of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a 
    1. (rx,ry)x(1,ty)
    2. (0,ty)x(1,uy)
    3. (0,uy)x(vx,vy)

    To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor
    or that has a left border >= cx and bottom border >= cy. 
273
*/
Tobias Koenig's avatar
Tobias Koenig committed
274
    RegularAreaRect * ret= new RegularAreaRect;
275

276
    const QMatrix matrix = d->m_page ? d->m_page->rotationMatrix() : QMatrix();
277
#if 0
Tobias Koenig's avatar
Tobias Koenig committed
278
279
280
281
    int it = -1;
    int itB = -1;
    int itE = -1;

Frederik Schwarzer's avatar
typos    
Frederik Schwarzer committed
282
    // ending cursor is higher than start cursor, we need to find positions in reverse
283
284
285
    NormalizedRect tmp;
    NormalizedRect start;
    NormalizedRect end;
Tobias Koenig's avatar
Tobias Koenig committed
286
287
288
289
290
291
292
293
294
295
296

    NormalizedPoint startC = sel->start();
    double startCx = startC.x;
    double startCy = startC.y;

    NormalizedPoint endC = sel->end();
    double endCx = endC.x;
    double endCy = endC.y;

    if ( sel->direction() == 1 || ( sel->itB() == -1 && sel->direction() == 0 ) )
    {
297
#ifdef DEBUG_TEXTPAGE
298
        kWarning() << "running first loop";
299
#endif
Tobias Koenig's avatar
Tobias Koenig committed
300
301
302
        const int count = d->m_words.count();
        for ( it = 0; it < count; it++ )
        {
303
            tmp = *d->m_words[ it ]->area();
304
305
306
            if ( tmp.contains( startCx, startCy )
                 || ( tmp.top <= startCy && tmp.bottom >= startCy && tmp.left >= startCx )
                 || ( tmp.top >= startCy))
Tobias Koenig's avatar
Tobias Koenig committed
307
308
309
            {
                /// we have found the (rx,ry)x(tx,ty)
                itB = it;
310
#ifdef DEBUG_TEXTPAGE
311
                kWarning() << "start is" << itB << "count is" << d->m_words.count();
312
#endif
Tobias Koenig's avatar
Tobias Koenig committed
313
314
                break;
            }
315
        }
Tobias Koenig's avatar
Tobias Koenig committed
316
317
318
        sel->itB( itB );
    }
    itB = sel->itB();
319
#ifdef DEBUG_TEXTPAGE
320
321
    kWarning() << "direction is" << sel->direction();
    kWarning() << "reloaded start is" << itB << "against" << sel->itB();
322
#endif
Tobias Koenig's avatar
Tobias Koenig committed
323
324
    if ( sel->direction() == 0 || ( sel->itE() == -1 && sel->direction() == 1 ) )
    {
325
#ifdef DEBUG_TEXTPAGE
326
        kWarning() << "running second loop";
327
#endif
Tobias Koenig's avatar
Tobias Koenig committed
328
329
        for ( it = d->m_words.count() - 1; it >= itB; it-- )
        {
330
            tmp = *d->m_words[ it ]->area();
331
332
333
            if ( tmp.contains( endCx, endCy )
                 || ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx )
                 || ( tmp.bottom <= endCy ) )
Tobias Koenig's avatar
Tobias Koenig committed
334
335
336
            {
                /// we have found the (ux,uy)x(vx,vy)
                itE = it;
337
#ifdef DEBUG_TEXTPAGE
338
                kWarning() << "ending is" << itE << "count is" << d->m_words.count();
339
340
341
                kWarning() << "conditions" << tmp.contains( endCx, endCy ) << " " 
                  << ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) << " " <<
                  ( tmp.top >= endCy);
342
#endif
Tobias Koenig's avatar
Tobias Koenig committed
343
344
                break;
            }
345
        }
Tobias Koenig's avatar
Tobias Koenig committed
346
347
        sel->itE( itE );
    }
348
#ifdef DEBUG_TEXTPAGE
349
    kWarning() << "reloaded ending is" << itE << "against" << sel->itE();
350
#endif
351

Tobias Koenig's avatar
Tobias Koenig committed
352
353
    if ( sel->itB() != -1 && sel->itE() != -1 )
    {
354
355
        start = *d->m_words[ sel->itB() ]->area();
        end = *d->m_words[ sel->itE() ]->area();
Tobias Koenig's avatar
Tobias Koenig committed
356
357

        NormalizedRect first, second, third;
Frederik Schwarzer's avatar
Frederik Schwarzer committed
358
        /// finding out if there is more than one baseline between them is a hard and discussable task
Tobias Koenig's avatar
Tobias Koenig committed
359
360
        /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the 
        /// areas, if more than one -> we have a three or over line selection
361
362
        first = start;
        second.top = start.bottom;
Tobias Koenig's avatar
Tobias Koenig committed
363
        first.right = second.right = 1;
364
        third = end;
Tobias Koenig's avatar
Tobias Koenig committed
365
        third.left = second.left = 0;
366
        second.bottom = end.top;
Tobias Koenig's avatar
Tobias Koenig committed
367
368
        int selMax = qMax( sel->itB(), sel->itE() );
        for ( it = qMin( sel->itB(), sel->itE() ); it <= selMax; ++it )
369
        {
370
            tmp = *d->m_words[ it ]->area();
371
            if ( tmp.intersects( &first ) || tmp.intersects( &second ) || tmp.intersects( &third ) )
372
                ret->appendShape( d->m_words.at( it )->transformedArea( matrix ) );
373
        }
Tobias Koenig's avatar
Tobias Koenig committed
374
    }
375
#else
376
377
    const double scaleX = d->m_page->m_page->width();
    const double scaleY = d->m_page->m_page->height();
378

379
380
    NormalizedPoint startC = sel->start();
    NormalizedPoint endC = sel->end();
381
    NormalizedPoint temp;
382

383
    // if startPoint is right to endPoint swap them
384
385
    if(startC.x > endC.x)
    {
386
387
388
389
390
        temp = startC;
        startC = endC;
        endC = temp;
    }

391
    // minX,maxX,minY,maxY gives the bounding rectangle coordinates of the document
392
393
394
395
396
397
    const NormalizedRect boundingRect = d->m_page->m_page->boundingBox();
    const QRect content = boundingRect.geometry(scaleX,scaleY);
    const double minX = content.left();
    const double maxX = content.right();
    const double minY = content.top();
    const double maxY = content.bottom();
Myreal Name's avatar
Myreal Name committed
398

399
    /**
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
     * We will now find out the TinyTextEntity for the startRectangle and TinyTextEntity for
     * the endRectangle. We have four cases:
     *
     * Case 1(a): both startpoint and endpoint are out of the bounding Rectangle and at one side, so the rectangle made of start
     * and endPoint are outof the bounding rect (do not intersect)
     *
     * Case 1(b): both startpoint and endpoint are out of bounding rect, but they are in different side, so is their rectangle
     *
     * Case 2(a): find the rectangle which contains start and endpoint and having some
     * TextEntity
     *
     * Case 2(b): if 2(a) fails (if startPoint and endPoint both are unchanged), then we check whether there is any
     * TextEntity within the rect made by startPoint and endPoint
     *
     * Case 3: Now, we may have two type of selection.
     * 1. startpoint is left-top of start_end and endpoint is right-bottom
     * 2. startpoint is left-bottom of start_end and endpoint is top-right
     *
     * Also, as 2(b) is passed, we might have it,itEnd or both unchanged, but the fact is that we have
     * text within them. so, we need to search for the best suitable textposition for start and end.
     *
     * Case 3(a): We search the nearest rectangle consisting of some
     * TinyTextEntity right to or bottom of the startPoint for selection 01.
     * And, for selection 02, we have to search for right and top
     *
     * Case 3(b): For endpoint, we have to find the point top of or left to
     * endpoint if we have selection 01.
     * Otherwise, the search will be left and bottom
428
     */
429

430
    // we know that startC.x > endC.x, we need to decide which is top and which is bottom
431
432
    const NormalizedRect start_end = (startC.y < endC.y) ? NormalizedRect(startC.x, startC.y, endC.x, endC.y)
                                                         : NormalizedRect(startC.x, endC.y, endC.x, startC.y);
433

434
    // Case 1(a)
435
    if(!boundingRect.intersects(start_end)) return ret;
436

437
    // case 1(b)
438
439
440
441
442
    /**
        note that, after swapping of start and end, we know that,
        start is always left to end. but, we cannot say start is
        positioned upper than end.
    **/
443
444
    else
    {
445
        // if start is left to content rect take it to content rect boundary
446
447
        if(startC.x * scaleX < minX) startC.x = minX/scaleX;
        if(endC.x * scaleX > maxX) endC.x = maxX/scaleX;
448

449
        // if start is top to end (selection type 01)
450
451
        if(startC.y * scaleY < minY) startC.y = minY/scaleY;
        if(endC.y * scaleY > maxY) endC.y = maxY/scaleY;
452

453
454
455
        // if start is bottom to end (selection type 02)
        if(startC.y * scaleY > maxY) startC.y = maxY/scaleY;
        if(endC.y * scaleY < minY) endC.y = minY/scaleY;
456
    }
457

Laurent Montel's avatar
Laurent Montel committed
458
    TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
459
    TextList::ConstIterator start = it, end = itEnd, tmpIt = it; //, tmpItEnd = itEnd;
460
    const MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight;
Myreal Name's avatar
Myreal Name committed
461

462
    NormalizedRect tmp;
463
    //case 2(a)
464
465
    for ( ; it != itEnd; ++it )
    {
466
        tmp = (*it)->area;
467
468
469
470
471
472
        if(tmp.contains(startC.x,startC.y)){
            start = it;
        }
        if(tmp.contains(endC.x,endC.y)){
            end = it;
        }
473
    }
474

475
    //case 2(b)
476
    it = tmpIt;
477
478
    if(start == it && end == itEnd)
    {
479
        for ( ; it != itEnd; ++it )
480
        {
481
            // is there any text reactangle within the start_end rect
482
            tmp = (*it)->area;
483
484
            if(start_end.intersects(tmp))
                break;
485
        }
486

487
488
        // we have searched every text entities, but none is within the rectangle created by start and end
        // so, no selection should be done
489
490
        if(it == itEnd)
        {
491
            return ret;
492
493
        }
    }
494
495
    it = tmpIt;
    bool selection_two_start = false;
496

497
    //case 3.a
498
499
    if(start == it)
    {
500
501
        bool flagV = false;
        NormalizedRect rect;
502

503
        // selection type 01
504
505
506
507
        if(startC.y <= endC.y)
        {
            for ( ; it != itEnd; ++it )
            {
508
509
510
                rect= (*it)->area;
                rect.isBottom(startC) ? flagV = false: flagV = true;

511
512
                if(flagV && rect.isRight(startC))
                {
513
514
515
516
517
518
                    start = it;
                    break;
                }
            }
        }

519
        //selection type 02
520
521
        else
        {
522
            selection_two_start = true;
523
            int distance = scaleX + scaleY + 100;
524
525
            int count = 0;

526
527
            for ( ; it != itEnd; ++it )
            {
528
                rect= (*it)->area;
529

530
531
                if(rect.isBottomOrLevel(startC) && rect.isRight(startC))
                {
532
                    count++;
533
534
535
536
                    QRect entRect = rect.geometry(scaleX,scaleY);
                    int xdist, ydist;
                    xdist = entRect.center().x() - startC.x * scaleX;
                    ydist = entRect.center().y() - startC.y * scaleY;
537

538
539
540
                    //make them positive
                    if(xdist < 0) xdist = -xdist;
                    if(ydist < 0) ydist = -ydist;
541

542
543
                    if( (xdist + ydist) < distance)
                    {
544
545
546
547
548
549
550
                        distance = xdist+ ydist;
                        start = it;
                    }
                }
            }
        }
    }
551

552
    //case 3.b
553
554
    if(end == itEnd)
    {
555
        it = tmpIt;
556
        itEnd = itEnd-1;
557

558
559
        bool flagV = false;
        NormalizedRect rect;
560

561
562
563
564
        if(startC.y <= endC.y)
        {
            for ( ; itEnd >= it; itEnd-- )
            {
565
566
                rect= (*itEnd)->area;
                rect.isTop(endC) ? flagV = false: flagV = true;
567

568
569
                if(flagV && rect.isLeft(endC))
                {
570
571
                    end = itEnd;
                    break;
572
                }
573
574
            }
        }
575

576
577
        else
        {
578
            int distance = scaleX + scaleY + 100;
579
580
            for ( ; itEnd >= it; itEnd-- )
            {
581
                rect= (*itEnd)->area;
582

583
584
                if(rect.isTopOrLevel(endC) && rect.isLeft(endC))
                {
585
586
587
588
                    QRect entRect = rect.geometry(scaleX,scaleY);
                    int xdist, ydist;
                    xdist = entRect.center().x() - endC.x * scaleX;
                    ydist = entRect.center().y() - endC.y * scaleY;
589

590
591
592
593
                    //make them positive
                    if(xdist < 0) xdist = -xdist;
                    if(ydist < 0) ydist = -ydist;

594
595
                    if( (xdist + ydist) < distance)
                    {
596
597
598
599
600
                        distance = xdist+ ydist;
                        end = itEnd;
                    }

                }
601
602
            }
        }
603
604
    }

605
606
607
608
    /* if start and end in selection 02 are in the same column, and we
     start at an empty space we have to remove the selection of last
     character
    */
609
610
611
612
    if(selection_two_start)
    {
        if(start > end)
        {
613
614
615
616
            start = start - 1;
        }
    }

617
    // if start is less than end swap them
618
619
    if(start > end)
    {
Myreal Name's avatar
Myreal Name committed
620
621
622
623
624
        it = start;
        start = end;
        end = it;
    }

625
    // removes the possibility of crash, in case none of 1 to 3 is true
626
    if(end == d->m_words.constEnd()) end--;
627

628
629
    for( ;start <= end ; start++)
    {
Myreal Name's avatar
Myreal Name committed
630
        ret->appendShape( (*start)->transformedArea( matrix ), side );
631
     }
Myreal Name's avatar
Myreal Name committed
632

633
#endif
634

Tobias Koenig's avatar
Tobias Koenig committed
635
    return ret;
636
}
637
638


639
RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct,
640
                                     Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area )
641
{
642
    SearchDirection dir=direct;
643
    // invalid search request
644
    if ( d->m_words.isEmpty() || query.isEmpty() || ( area && area->isNull() ) )
645
        return 0;
646
647
    TextList::ConstIterator start;
    TextList::ConstIterator end;
648
    const QMap< int, SearchPoint* >::const_iterator sIt = d->m_searchPoints.constFind( searchID );
Laurent Montel's avatar
Laurent Montel committed
649
    if ( sIt == d->m_searchPoints.constEnd() )
650
    {
651
652
        // if no previous run of this search is found, then set it to start
        // from the beginning (respecting the search direction)
653
        if ( dir == NextResult )
654
            dir = FromTop;
655
        else if ( dir == PreviousResult )
656
            dir = FromBottom;
657
    }
658
659
    bool forward = true;
    switch ( dir )
660
    {
661
        case FromTop:
Laurent Montel's avatar
Laurent Montel committed
662
663
            start = d->m_words.constBegin();
            end = d->m_words.constEnd();
664
665
            break;
        case FromBottom:
Laurent Montel's avatar
Laurent Montel committed
666
667
            start = d->m_words.constEnd();
            end = d->m_words.constBegin();
668
669
670
671
            Q_ASSERT( start != end );
            // we can safely go one step back, as we already checked
            // that the list is not empty
            --start;
672
            forward = false;
673
            break;
674
        case NextResult:
675
            start = (*sIt)->it_end;
Laurent Montel's avatar
Laurent Montel committed
676
            end = d->m_words.constEnd();
677
678
            if ( ( start + 1 ) != end )
                ++start;
679
            break;
680
        case PreviousResult:
681
            start = (*sIt)->it_begin;
Laurent Montel's avatar
Laurent Montel committed
682
            end = d->m_words.constBegin();
683
684
            if ( start != end )
                --start;
685
            forward = false;
686
            break;
687
688
    };
    RegularAreaRect* ret = 0;
689
690
    const TextComparisonFunction cmpFn = caseSensitivity == Qt::CaseSensitive
                                       ? CaseSensitiveCmpFn : CaseInsensitiveCmpFn;
691
692
    if ( forward )
    {
693
        ret = d->findTextInternalForward( searchID, query, caseSensitivity, cmpFn, start, end );
694
    }
695
696
    else
    {
697
        ret = d->findTextInternalBackward( searchID, query, caseSensitivity, cmpFn, start, end );
698
699
    }
    return ret;
700
701
702
}


703
RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const QString &_query,
704
                                                             Qt::CaseSensitivity caseSensitivity,
705
                                                             TextComparisonFunction comparer,
706
707
                                                             const TextList::ConstIterator &start,
                                                             const TextList::ConstIterator &end )
708
{
709
    const QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
710

711
    RegularAreaRect* ret=new RegularAreaRect;
712
713
714
715
716

    // normalize query search all unicode (including glyphs)
    const QString query = (caseSensitivity == Qt::CaseSensitive)
                            ? _query.normalized(QString::NormalizationForm_KC) 
                            : _query.toLower().normalized(QString::NormalizationForm_KC);
717

718
    // j is the current position in our query
719
    // len is the length of the string in TextEntity
720
    // queryLeft is the length of the query we have left
721
    const TinyTextEntity* curEntity = 0;
722
    int j=0, len=0, queryLeft=query.length();
723
    int offset = 0;
724
    bool haveMatch=false;
725
    bool offsetMoved = false;
726
727
    TextList::ConstIterator it = start;
    TextList::ConstIterator it_begin;
728
    for ( ; it != end; ++it )
729
    {
730
        curEntity = *it;
731
        const QString &str = curEntity->text();
732
	kDebug() << str;
733
734
735
736
737
738
739
740
        if ( !offsetMoved && ( it == start ) )
        {
            if ( m_searchPoints.contains( searchID ) )
            {
                offset = qMax( m_searchPoints[ searchID ]->offset_end, 0 );
            }
            offsetMoved = true;
        }
741
742
        {
            len=str.length();
743
            int min=qMin(queryLeft,len);
744
#ifdef DEBUG_TEXTPAGE
745
            kDebug(OkularDebug) << str.mid(offset,min) << ":" << _query.mid(j,min);
746
#endif
Frederik Schwarzer's avatar
typos    
Frederik Schwarzer committed
747
            // we have equal (or less than) area of the query left as the length of the current 
748
            // entity
749

750
751
752
            int resStrLen = 0, resQueryLen = 0;
            if ( !comparer( str.midRef( offset, min ), query.midRef( j, min ),
                            &resStrLen, &resQueryLen ) )
753
754
            {
                    // we not have matched
Pino Toscano's avatar
Pino Toscano committed
755
                    // this means we do not have a complete match
756
757
758
                    // we need to get back to query start
                    // and continue the search from this place
                    haveMatch=false;
759
                    ret->clear();
760
#ifdef DEBUG_TEXTPAGE
761
            kDebug(OkularDebug) << "\tnot matched";
762
#endif
763
                    j=0;
764
                    offset = 0;
765
                    queryLeft=query.length();
766
                    it_begin = TextList::ConstIterator();
767
768
769
770
771
772
773
            }
            else
            {
                    // we have a match
                    // move the current position in the query
                    // to the position after the length of this string
                    // we matched
774
                    // subtract the length of the current entity from 
775
                    // the left length of the query
776
#ifdef DEBUG_TEXTPAGE
777
            kDebug(OkularDebug) << "\tmatched";
778
#endif
779
                    haveMatch=true;
780
                    ret->append( curEntity->transformedArea( matrix ) );
781
782
                    j += resStrLen;
                    queryLeft -= resQueryLen;
783
                    if ( it_begin == TextList::ConstIterator() )
784
785
786
                    {
                        it_begin = it;
                    }
787
788
            }
        }
789

790
        if (haveMatch && queryLeft==0 && j==query.length())
791
        {
792
            // save or update the search point for the current searchID
793
794
            QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
            if ( sIt == m_searchPoints.end() )
795
            {
796
                sIt = m_searchPoints.insert( searchID, new SearchPoint );
797
            }
798
            SearchPoint* sp = *sIt;
799
            sp->it_begin = it_begin;
800
            sp->it_end = it - 1;
801
802
            sp->offset_begin = j;
            sp->offset_end = j + qMin( queryLeft, len );
803
            ret->simplify();
804
805
            return ret;
        }
806
    }
807
    // end of loop - it means that we've ended the textentities
808
809
    const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
    if ( sIt != m_searchPoints.end() )
810
    {
811
812
        SearchPoint* sp = *sIt;
        m_searchPoints.erase( sIt );
813
814
815
        delete sp;
    }
    delete ret;
816
817
818
    return 0;
}

819
820
RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query,
                                                            Qt::CaseSensitivity caseSensitivity,
821
                                                            TextComparisonFunction comparer,
822
823
                                                            const TextList::ConstIterator &start,
                                                            const TextList::ConstIterator &end )
824
{
825
    const QMatrix matrix = m_page ? m_page->rotationMatrix() : QMatrix();
826
827

    RegularAreaRect* ret=new RegularAreaRect;
828
829
830
831
832

    // normalize query to search all unicode (including glyphs)
    const QString query = (caseSensitivity == Qt::CaseSensitive) 
                            ? _query.normalized(QString::NormalizationForm_KC) 
                            : _query.toLower().normalized(QString::NormalizationForm_KC);
833
834
835
836

    // j is the current position in our query
    // len is the length of the string in TextEntity
    // queryLeft is the length of the query we have left
837
    const TinyTextEntity* curEntity = 0;
838
839
840
    int j=query.length() - 1, len=0, queryLeft=query.length();
    bool haveMatch=false;
    bool offsetMoved = false;
841
842
    TextList::ConstIterator it = start;
    TextList::ConstIterator it_begin;
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
    while ( true )
    {
        curEntity = *it;
        const QString &str = curEntity->text();
        if ( !offsetMoved && ( it == start ) )
        {
            offsetMoved = true;
        }
        if ( query.at(j).isSpace() )
        {
            // lets match newline as a space
#ifdef DEBUG_TEXTPAGE
            kDebug(OkularDebug) << "newline or space";
#endif
            j--;
            queryLeft--;
        }
        else
        {
            len=str.length();
            int min=qMin(queryLeft,len);
#ifdef DEBUG_TEXTPAGE
            kDebug(OkularDebug) << str.right(min) << " : " << _query.mid(j-min+1,min);
#endif
Frederik Schwarzer's avatar
typos    
Frederik Schwarzer committed
867
            // we have equal (or less than) area of the query left as the length of the current 
868
869
            // entity

870
871
872
            int resStrLen = 0, resQueryLen = 0;
            if ( !comparer( str.rightRef( min ), query.midRef( j - min + 1, min ),
                            &resStrLen, &resQueryLen ) )
873
874
875
876
877
878
879
880
881
882
883
884
            {
                    // we not have matched
                    // this means we do not have a complete match
                    // we need to get back to query start
                    // and continue the search from this place
                    haveMatch=false;
                    ret->clear();
#ifdef DEBUG_TEXTPAGE
                    kDebug(OkularDebug) << "\tnot matched";
#endif
                    j=query.length() - 1;
                    queryLeft=query.length();
885
                    it_begin = TextList::ConstIterator();
886
887
888
889
890
891
892
            }
            else
            {
                    // we have a match
                    // move the current position in the query
                    // to the position after the length of this string
                    // we matched
893
                    // subtract the length of the current entity from 
894
895
896
897
898
899
                    // the left length of the query
#ifdef DEBUG_TEXTPAGE
                    kDebug(OkularDebug) << "\tmatched";
#endif
                    haveMatch=true;
                    ret->append( curEntity->transformedArea( matrix ) );
900
901
                    j -= resStrLen;
                    queryLeft -= resQueryLen;
902
                    if ( it_begin == TextList::ConstIterator() )
903
904
905
                    {
                        it_begin = it;
                    }
906
907
908
909
910
911
912
913
914
915
916
917
            }
        }

        if (haveMatch && queryLeft==0 && j<0)
        {
            // save or update the search point for the current searchID
            QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
            if ( sIt == m_searchPoints.end() )
            {
                sIt = m_searchPoints.insert( searchID, new SearchPoint );
            }
            SearchPoint* sp = *sIt;
918
919
            sp->it_begin = it;
            sp->it_end = it_begin;
920
921
922
923
924
925
926
927
928
929
930
            sp->offset_begin = j;
            sp->offset_end = j + qMin( queryLeft, len );
            ret->simplify();
            return ret;
        }
        if ( it == end )
            break;
        else
            --it;
    }
    // end of loop - it means that we've ended the textentities
931
    const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
932
933
934
935
936
937
938
939
940
941
    if ( sIt != m_searchPoints.end() )
    {
        SearchPoint* sp = *sIt;
        m_searchPoints.erase( sIt );
        delete sp;
    }
    delete ret;
    return 0;
}

942
QString TextPage::text(const RegularAreaRect *area) const
943
944
945
946
947
{
    return text(area, AnyPixelTextAreaInclusionBehaviour);
}

QString TextPage::text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const
948
{
Pino Toscano's avatar