SearchDiskFiles.cpp 6.74 KB
Newer Older
1
2
3
4
5
6
/*
    SPDX-FileCopyrightText: 2011-21 Kåre Särs <kare.sars@iki.fi>
    SPDX-FileCopyrightText: 2021 Christoph Cullmann <cullmann@kde.org>

    SPDX-License-Identifier: LGPL-2.0-or-later
*/
7

8
#include "SearchDiskFiles.h"
9
10

#include <QDir>
11
#include <QElapsedTimer>
12
#include <QTextStream>
13
#include <QUrl>
14

Christoph Cullmann's avatar
Christoph Cullmann committed
15
16
17
SearchDiskFiles::SearchDiskFiles(SearchDiskFilesWorkList &worklist, const QRegularExpression &regexp, const bool includeBinaryFiles)
    : m_worklist(worklist)
    , m_regExp(regexp.pattern(), regexp.patternOptions()) // we WANT to kill the sharing, ELSE WE LOCK US DEAD!
18
    , m_includeBinaryFiles(includeBinaryFiles)
19
{
Christoph Cullmann's avatar
Christoph Cullmann committed
20
21
    // ensure we have a proper thread name during e.g. perf profiling
    setObjectName(QStringLiteral("SearchDiskFiles"));
22
}
23

24
void SearchDiskFiles::run()
25
{
26
27
28
29
30
31
32
33
34
    // do we need to search multiple lines?
    const bool multiLineSearch = m_regExp.pattern().contains(QLatin1String("\\n"));

    // timer to emit matchesFound once in a time even for files without matches
    // this triggers process in the UI
    QElapsedTimer emitTimer;
    emitTimer.start();

    // search, pulls work from the shared work list for all workers
Christoph Cullmann's avatar
Christoph Cullmann committed
35
36
37
38
    while (true) {
        // get next file, we get empty string if all done or search canceled!
        const auto fileName = m_worklist.nextFileToSearch();
        if (fileName.isEmpty()) {
39
            break;
40
        }
41

42
43
44
45
46
47
        // open file early, this allows mime-type detection & search to use same io device
        QFile file(fileName);
        if (!file.open(QFile::ReadOnly)) {
            continue;
        }

48
49
50
51
        // let the right search algorithm compute the matches for this file
        QVector<KateSearchMatch> matches;
        if (multiLineSearch) {
            matches = searchMultiLineRegExp(file);
52
        } else {
53
54
55
56
57
58
59
60
            matches = searchSingleLineRegExp(file);
        }

        // if we have matches or didn't emit something long enough, do so
        // we don't emit for all file to not stall get GUI and lock us a lot ;)
        if (!matches.isEmpty() || emitTimer.hasExpired(100)) {
            Q_EMIT matchesFound(QUrl::fromLocalFile(file.fileName()), matches);
            emitTimer.restart();
61
62
63
64
        }
    }
}

65
QVector<KateSearchMatch> SearchDiskFiles::searchSingleLineRegExp(QFile &file)
66
{
67
    QTextStream stream(&file);
68
    QVector<KateSearchMatch> matches;
69
    QString line;
70
    int currentLineNumber = 0;
71
    while (stream.readLineInto(&line)) {
72
73
74
75
76
        // check if not binary data....
        // bad, but stuff better than asking QMimeDatabase which is a performance & threading disaster...
        if (!m_includeBinaryFiles && line.contains(QLatin1Char('\0'))) {
            // kill all seen matches and be done
            matches.clear();
77
            return matches;
78
79
        }

80
81
82
83
84
        // match all occurrences in the current line
        int columnToStartMatch = 0;
        bool canceled = false;
        while (true) {
            // handle canceling
Christoph Cullmann's avatar
Christoph Cullmann committed
85
            if (m_worklist.isCanceled()) {
86
                canceled = true;
87
                break;
88
            }
89

90
91
92
93
94
            // try match at the current interesting column, abort search loop if nothing found!
            const QRegularExpressionMatch match = m_regExp.match(line, columnToStartMatch);
            const int column = match.capturedStart();
            if (column == -1 || match.capturedLength() == 0)
                break;
95

96
97
98
99
100
            // remember match
            const int endColumn = column + match.capturedLength();
            const int preContextStart = qMax(0, column - MatchModel::PreContextLen);
            const QString preContext = line.mid(preContextStart, column - preContextStart);
            const QString postContext = line.mid(endColumn, MatchModel::PostContextLen);
Alexander Lohnau's avatar
Alexander Lohnau committed
101
            matches.push_back(
102
                KateSearchMatch{preContext, match.captured(), postContext, QString(), KTextEditor::Range{currentLineNumber, column, currentLineNumber, column + match.capturedLength()}, true});
103

104
105
            // advance match column
            columnToStartMatch = column + match.capturedLength();
106
        }
107
108
109
110
111
112
113
114

        // handle canceling => above we only did break out of the matching loop!
        if (canceled) {
            break;
        }

        // advance to next line
        ++currentLineNumber;
115
    }
116
    return matches;
117
118
}

119
QVector<KateSearchMatch> SearchDiskFiles::searchMultiLineRegExp(QFile &file)
120
121
122
{
    int column = 0;
    int line = 0;
123
124
    QString fullDoc;
    QVector<int> lineStart;
125
    QRegularExpression tmpRegExp = m_regExp;
126

127
    QVector<KateSearchMatch> matches;
128
    QTextStream stream(&file);
129
    fullDoc = stream.readAll();
130
131
132
133
134
135

    // check if not binary data....
    // bad, but stuff better than asking QMimeDatabase which is a performance & threading disaster...
    if (!m_includeBinaryFiles && fullDoc.contains(QLatin1Char('\0'))) {
        // kill all seen matches and be done
        matches.clear();
136
        return matches;
137
138
    }

139
    fullDoc.remove(QLatin1Char('\r'));
140
141
142

    lineStart.clear();
    lineStart << 0;
143
    for (int i = 0; i < fullDoc.size() - 1; i++) {
144
        if (fullDoc[i] == QLatin1Char('\n')) {
145
            lineStart << i + 1;
146
147
        }
    }
148
    if (tmpRegExp.pattern().endsWith(QLatin1Char('$'))) {
149
        fullDoc += QLatin1Char('\n');
150
        QString newPatern = tmpRegExp.pattern();
Joseph Wenninger's avatar
Joseph Wenninger committed
151
        newPatern.replace(QStringLiteral("$"), QStringLiteral("(?=\\n)"));
152
153
        tmpRegExp.setPattern(newPatern);
    }
154

155
156
157
    QRegularExpressionMatch match;
    match = tmpRegExp.match(fullDoc);
    column = match.capturedStart();
158
    while (column != -1 && !match.captured().isEmpty()) {
Christoph Cullmann's avatar
Christoph Cullmann committed
159
        if (m_worklist.isCanceled()) {
160
            break;
161
        }
162
163
164
        // search for the line number of the match
        int i;
        line = -1;
165
        for (i = 1; i < lineStart.size(); i++) {
166
            if (lineStart[i] > column) {
167
                line = i - 1;
168
169
170
171
172
173
                break;
            }
        }
        if (line == -1) {
            break;
        }
174
175
176
        int startColumn = (column - lineStart[line]);
        int endLine = line + match.captured().count(QLatin1Char('\n'));
        int lastNL = match.captured().lastIndexOf(QLatin1Char('\n'));
177
        int endColumn = lastNL == -1 ? startColumn + match.captured().length() : match.captured().length() - lastNL - 1;
178

Alexander Lohnau's avatar
Alexander Lohnau committed
179
180
        int preContextStart = qMax(lineStart[line], column - MatchModel::PreContextLen);
        QString preContext = fullDoc.mid(preContextStart, column - preContextStart);
181
182
        QString postContext = fullDoc.mid(column + match.captured().length(), MatchModel::PostContextLen);

Alexander Lohnau's avatar
Alexander Lohnau committed
183
184
        matches.push_back(
            KateSearchMatch{preContext, match.captured(), postContext, QString(), KTextEditor::Range{line, startColumn, endLine, endColumn}, true});
185

186
187
        match = tmpRegExp.match(fullDoc, column + match.capturedLength());
        column = match.capturedStart();
188
    }
189
    return matches;
190
}