Commit f2d2ede7 authored by Christoph Cullmann's avatar Christoph Cullmann 🐮
Browse files

improve threading

1) use a worklist approach => better spreading of search work
2) use idea thread count for number of runnables => higher parallelism
3) avoid that the QRegularExpression is shared between threads (without this, e.g. 16 vs. 2 threads is no real difference)
4) deactive qmimedb atm => creates QRegularExpression internally => leads to same locking hell :/

with this, in a linux.git clone, searching for Linus takes:

1) with 2 threads: ~4.5 seconds
2) with 16 threads: ~0.5 seconds :P
parent f602dd28
......@@ -22,10 +22,9 @@
#include <QTextStream>
#include <QUrl>
SearchDiskFiles::SearchDiskFiles(const QStringList &files, const QRegularExpression &regexp, const bool includeBinaryFiles)
: QObject(nullptr)
, m_files(files)
, m_regExp(regexp)
SearchDiskFiles::SearchDiskFiles(SearchDiskFilesWorkList &worklist, const QRegularExpression &regexp, const bool includeBinaryFiles)
: m_worklist(worklist)
, m_regExp(regexp.pattern(), regexp.patternOptions()) // we WANT to kill the sharing, ELSE WE LOCK US DEAD!
, m_includeBinaryFiles(includeBinaryFiles)
{
// ensure we have a proper thread name during e.g. perf profiling
......@@ -35,8 +34,10 @@ SearchDiskFiles::SearchDiskFiles(const QStringList &files, const QRegularExpress
void SearchDiskFiles::run()
{
const QMimeDatabase db;
for (const QString &fileName : qAsConst(m_files)) {
if (m_cancelSearch) {
while (true) {
// get next file, we get empty string if all done or search canceled!
const auto fileName = m_worklist.nextFileToSearch();
if (fileName.isEmpty()) {
break;
}
......@@ -48,10 +49,10 @@ void SearchDiskFiles::run()
// exclude binary files?
if (!m_includeBinaryFiles) {
const auto mimeType = db.mimeTypeForFileNameAndData(fileName, &file);
if (!mimeType.inherits(QStringLiteral("text/plain"))) {
continue;
}
/* const auto mimeType = db.mimeTypeForFileNameAndData(fileName, &file);
if (!mimeType.inherits(QStringLiteral("text/plain"))) {
continue;
}*/
}
if (m_regExp.pattern().contains(QLatin1String("\\n"))) {
......@@ -62,11 +63,6 @@ void SearchDiskFiles::run()
}
}
void SearchDiskFiles::cancelSearch()
{
m_cancelSearch = true;
}
void SearchDiskFiles::searchSingleLineRegExp(QFile &file)
{
QTextStream stream(&file);
......@@ -76,13 +72,13 @@ void SearchDiskFiles::searchSingleLineRegExp(QFile &file)
QRegularExpressionMatch match;
QVector<KateSearchMatch> matches;
while (!(line = stream.readLine()).isNull()) {
if (m_cancelSearch) {
if (m_worklist.isCanceled()) {
break;
}
match = m_regExp.match(line);
column = match.capturedStart();
while (column != -1 && !match.captured().isEmpty()) {
if (m_cancelSearch) {
if (m_worklist.isCanceled()) {
break;
}
......@@ -136,7 +132,7 @@ void SearchDiskFiles::searchMultiLineRegExp(QFile &file)
column = match.capturedStart();
QVector<KateSearchMatch> matches;
while (column != -1 && !match.captured().isEmpty()) {
if (m_cancelSearch) {
if (m_worklist.isCanceled()) {
break;
}
// search for the line number of the match
......
......@@ -18,6 +18,8 @@
#ifndef SearchDiskFiles_h
#define SearchDiskFiles_h
#include <QMutex>
#include <QMutexLocker>
#include <QObject>
#include <QRegularExpression>
#include <QRunnable>
......@@ -29,18 +31,152 @@ class QString;
class QUrl;
class QFile;
/**
* Thread-safe worklist to feed the SearchDiskFiles runnables.
*/
class SearchDiskFilesWorkList
{
public:
/**
* Default constructor => nothing to be done
*/
SearchDiskFilesWorkList() = default;
/**
* Any workers running?
* @return any worker running?
*/
bool isRunning()
{
QMutexLocker lock(&m_mutex);
return m_currentRunningRunnables > 0;
}
/**
* Search canceled?
* @return canceled?
*/
bool isCanceled()
{
// we don't lock here, doesn't matter if we see the value a bit later
// we call this "OFTEN"
return m_canceled;
}
/**
* Init the search, shall only be done if not running.
* @param files files to search
* @param numberOfWorkers number of workers we will spawn
*/
void init(const QStringList &files, int numberOfWorkers)
{
/**
* ensure sane initial state: last search is done!
* should hold even if canceled, the last markOnRunnableAsDone clears all fields!
*/
QMutexLocker lock(&m_mutex);
Q_ASSERT(m_currentRunningRunnables == 0);
Q_ASSERT(m_filesToSearch.isEmpty());
Q_ASSERT(m_filesToSearchIndex == 0);
/**
* we shall not be called without any work!
*/
Q_ASSERT(!files.isEmpty());
Q_ASSERT(numberOfWorkers > 0);
/**
* init work
*/
m_currentRunningRunnables = numberOfWorkers;
m_filesToSearch = files;
m_filesToSearchIndex = 0;
m_canceled = false;
}
/**
* Get one file to search if still some there.
* Will return empty string if no further work (or canceled)
* @return file to search next or empty string
*/
QString nextFileToSearch()
{
QMutexLocker lock(&m_mutex);
if (m_filesToSearchIndex >= m_filesToSearch.size()) {
return QString();
}
// else return file, shall not be empty and advance one file
const auto file = m_filesToSearch.at(m_filesToSearchIndex);
Q_ASSERT(!file.isEmpty());
++m_filesToSearchIndex;
return file;
}
/**
* Mark one runnable as done.
*/
void markOnRunnableAsDone()
{
QMutexLocker lock(&m_mutex);
Q_ASSERT(m_currentRunningRunnables > 0);
--m_currentRunningRunnables;
// if we are done, cleanup
if (m_currentRunningRunnables == 0) {
m_filesToSearch.clear();
m_filesToSearchIndex = 0;
}
}
/**
* Cancel the work.
*/
void cancel()
{
QMutexLocker lock(&m_mutex);
m_canceled = true;
m_filesToSearch.clear();
m_filesToSearchIndex = 0;
}
private:
/**
* non-recursive mutex to lock internals, only locked a very short time
*/
QMutex m_mutex;
/**
* current number of still active runnables, if == 0 => nothing running
*/
int m_currentRunningRunnables = 0;
/**
* worklist => files to search in on the disk
*/
QStringList m_filesToSearch;
/**
* current index into the worklist => next file to search
* we don't do modify the stringlist, we just move the index
*/
int m_filesToSearchIndex = 0;
/**
* was the search canceled?
*/
bool m_canceled = false;
};
class SearchDiskFiles : public QObject, public QRunnable
{
Q_OBJECT
public:
SearchDiskFiles(const QStringList &iles, const QRegularExpression &regexp, const bool includeBinaryFiles);
SearchDiskFiles(SearchDiskFilesWorkList &worklist, const QRegularExpression &regexp, const bool includeBinaryFiles);
void run() override;
public Q_SLOTS:
void cancelSearch();
Q_SIGNALS:
void matchesFound(const QUrl &url, const QVector<KateSearchMatch> &searchMatches);
......@@ -49,10 +185,9 @@ private:
void searchMultiLineRegExp(QFile &file);
private:
QStringList m_files;
QRegularExpression m_regExp;
SearchDiskFilesWorkList &m_worklist;
const QRegularExpression m_regExp;
bool m_includeBinaryFiles = false;
bool m_cancelSearch = false;
};
#endif
......@@ -768,44 +768,58 @@ void KatePluginSearchView::startDiskFileSearch(const QStringList &fileList, cons
return;
}
// Note: Experimented with different thread counts. 1 to QThread::idealThreadCount()
// The optimum was two threads.... My theory is that the disk is the main bottleneck.
int chunkSize = (fileList.size() / 2) + 1;
chunkSize = qMax(chunkSize, 1);
int nextChunk = 0;
while (nextChunk < fileList.size()) {
QStringList chunckList = fileList.mid(nextChunk, chunkSize);
SearchDiskFiles *runner = new SearchDiskFiles(chunckList, reg, includeBinaryFiles);
// spread work to X threads => default to ideal thread count
const int threadCount = m_searchDiskFilePool.maxThreadCount();
// init worklist for these number of threads
m_worklistForDiskFiles.init(fileList, threadCount);
// spawn enough runnables, they will pull the files themself from our worklist
// this must exactly match the count we used to init the worklist above, as this is used to finalize stuff!
for (int i = 0; i < threadCount; ++i) {
// new runnable, will pull work from the worklist itself!
// worklist is used to drive if we need to stop the work, too!
SearchDiskFiles *runner = new SearchDiskFiles(m_worklistForDiskFiles, reg, includeBinaryFiles);
// queued connection for the results, this is emitted by a different thread than the runnable object and this one!
connect(runner, &SearchDiskFiles::matchesFound, this, &KatePluginSearchView::matchesFound, Qt::QueuedConnection);
connect(this, &KatePluginSearchView::cancelSearch, runner, &SearchDiskFiles::cancelSearch, Qt::QueuedConnection);
// queued connection for the results, this is emitted by a different thread than the runnable object and this one!
connect(
runner,
&SearchDiskFiles::destroyed,
this,
[this]() {
if (m_searchDiskFilePool.activeThreadCount() == 0) {
// signal the worklist one runnable more is done
m_worklistForDiskFiles.markOnRunnableAsDone();
// if no longer anything running, signal finished!
if (!m_worklistForDiskFiles.isRunning()) {
if (!m_diskSearchDoneTimer.isActive()) {
m_diskSearchDoneTimer.start();
}
}
},
Qt::QueuedConnection);
// launch the runnable
m_searchDiskFilePool.start(runner);
nextChunk += chunkSize;
}
}
void KatePluginSearchView::cancelDiskFileSearch()
{
Q_EMIT cancelSearch();
// signal canceling to runnables
m_worklistForDiskFiles.cancel();
// wait for finalization
m_searchDiskFilePool.clear();
m_searchDiskFilePool.waitForDone();
}
bool KatePluginSearchView::searchingDiskFiles()
{
return m_searchDiskFilePool.activeThreadCount() > 0;
return m_worklistForDiskFiles.isRunning();
}
void KatePluginSearchView::searchPlaceChanged()
......@@ -910,10 +924,10 @@ void KatePluginSearchView::updateViewColors()
}
}
// static QElapsedTimer s_timer;
static QElapsedTimer s_timer;
void KatePluginSearchView::startSearch()
{
// s_timer.start();
s_timer.start();
// Forcefully stop any ongoing search or replace
m_folderFilesList.terminateSearch();
......@@ -1237,7 +1251,7 @@ void KatePluginSearchView::searchDone()
m_searchJustOpened = false;
updateMatchMarks();
// qDebug() << "done:" << s_timer.elapsed();
qDebug() << "done:" << s_timer.elapsed();
}
void KatePluginSearchView::searchWhileTypingDone()
......
......@@ -175,9 +175,6 @@ private Q_SLOTS:
void copySearchToClipboard(CopyResultType type);
void customResMenuRequested(const QPoint &pos);
Q_SIGNALS:
void cancelSearch();
protected:
bool eventFilter(QObject *obj, QEvent *ev) override;
......@@ -196,7 +193,17 @@ private:
KTextEditor::Application *m_kateApp;
SearchOpenFiles m_searchOpenFiles;
FolderFilesList m_folderFilesList;
/**
* worklist for runnables, must survive thread pool below!
*/
SearchDiskFilesWorkList m_worklistForDiskFiles;
/**
* threadpool for multi-threaded disk search
*/
QThreadPool m_searchDiskFilePool;
QTimer m_diskSearchDoneTimer;
QAction *m_matchCase = nullptr;
QAction *m_useRegExp = nullptr;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment