Commit 49c41c2b authored by Igor Kushnir's avatar Igor Kushnir
Browse files

Optimize LanguageController's MimeTypeCache: QMultiHash suffixes

The performance gains are not very significant, but the code becomes
shorter as well, so there seem to be no drawbacks to this change.

Average BenchLanguageController results before and at this commit in
milliseconds per iteration:
        Data row                    Before      At
1. benchLanguagesForUrlNoCache()
    CMakeLists                      0.00045     0.00046
    cmakelists wrong case           0.00045     0.00046
    lower-case                      0.00045     0.00040
    upper-case                      0.00045     0.00039
    mixed-case                      0.00045     0.00040
    .C                              0.00037     0.00040
    .cl                             0.00038     0.00038
    existent C with extension       0.00043     0.00045
    .cc                             0.00040     0.00039
    .cmake                          0.00041     0.00044
    .diff                           0.00038     0.00040
    .qml                            0.00037     0.00039
    existent C w/o extension        0.16        0.16
    existent patch w/o extension    0.20        0.20
2. benchLanguagesForUrlFilledCache()
    CMakeLists                      0.00048     0.00050
    cmakelists wrong case           0.00049     0.00050
    lower-case                      0.00048     0.00041
    upper-case                      0.00049     0.00041
    mixed-case                      0.00049     0.00042
    .C                              0.00041     0.00042
    .cl                             0.00045     0.00042
    existent C with extension       0.00047     0.00047
    .cc                             0.00044     0.00041
    .cmake                          0.00042     0.00043
    .diff                           0.00041     0.00041
    .qml                            0.00045     0.00043
    existent C w/o extension        0.16        0.16
    existent patch w/o extension    0.20        0.20

Note: benchLanguagesForUrlNoMatch*() benchmark results are practically
unaffected by this commit because the QMimeDatabase::mimeTypeForFile()
call is orders of magnitude slower than the
MimeTypeCache::languagesForFileName() call optimized here.

The benchmarks run slightly faster on average now.
benchLanguagesForUrlFilledCache(), which is closest to the real-life
LanguageController usage in KDevelop, got a 14% speedup for the common
*.cpp pattern data rows ("lower-case", "upper-case" and "mixed-case").
parent 59270583
Pipeline #96960 passed with stage
in 33 minutes and 46 seconds
......@@ -13,6 +13,7 @@
#include <QHash>
#include <QMimeDatabase>
#include <QMultiHash>
#include <QMutexLocker>
#include <QRegularExpression>
#include <QThread>
......@@ -47,24 +48,6 @@ bool containsWildcardCharacter(QString::const_iterator first, QString::const_ite
}
using KDevelop::ILanguageSupport;
using StringLanguagePair = std::pair<QString, ILanguageSupport*>;
struct LastCharacterLess {
bool operator()(const StringLanguagePair& a, const StringLanguagePair& b) const {
return checkedLess(a.first.back(), b.first.back());
}
bool operator()(const StringLanguagePair& a, QChar b) const {
return checkedLess(a.first.back(), b);
}
bool operator()(QChar a, const StringLanguagePair& b) const {
return checkedLess(a, b.first.back());
}
private:
static bool checkedLess(QChar a, QChar b) {
Q_ASSERT(!a.isUpper() && !b.isUpper());
return a < b;
}
};
class MimeTypeCache
{
......@@ -75,9 +58,13 @@ public:
private:
void addGlobPattern(const QString& pattern, ILanguageSupport* language);
// Suffixes are stored lower-case and sorted with LastCharacterLess to speed up matching.
std::vector<StringLanguagePair> m_suffixes; ///< contains e.g. ".cpp"
std::vector<StringLanguagePair> m_literalPatterns; ///< contains e.g. "CMakeLists.txt"
using StringLanguagePair = std::pair<QString, ILanguageSupport*>;
/// key() is the lower-cased last character of the suffix; value().first is e.g. ".cpp".
/// The last character key performs better than the entire lower-cased (last) extension key. Perhaps
/// because no lower-cased temporary fileName's extension string is created in languagesForFileName().
QMultiHash<QChar, StringLanguagePair> m_suffixes;
std::vector<StringLanguagePair> m_literalPatterns; ///< contains e.g. "CMakeLists.txt" as front().first
/// fallback, hopefully empty in practice
std::vector<std::pair<QRegularExpression, ILanguageSupport*>> m_regularExpressions;
};
......@@ -90,17 +77,10 @@ void MimeTypeCache::addMimeType(const QString& mimeTypeName, ILanguageSupport* l
return;
}
const auto suffixCount = m_suffixes.size();
const auto globPatterns = mime.globPatterns();
for (const QString& pattern : globPatterns) {
addGlobPattern(pattern, language);
}
if (m_suffixes.size() != suffixCount) {
Q_ASSERT(std::none_of(m_suffixes.cbegin(), m_suffixes.cend(), [](const StringLanguagePair& p) {
return p.first.back().isUpper();
}));
std::sort(m_suffixes.begin(), m_suffixes.end(), LastCharacterLess{});
}
}
QList<ILanguageSupport*> MimeTypeCache::languagesForFileName(const QString& fileName) const
......@@ -108,21 +88,19 @@ QList<ILanguageSupport*> MimeTypeCache::languagesForFileName(const QString& file
Q_ASSERT(!fileName.isEmpty());
QList<ILanguageSupport*> languages;
// lastLanguageEquals() helps to improve performance by skipping checks for an already
// added language. It also prevents duplicate elements of languages when there are
// multiple equivalent glob patterns for a given language (for example, clangsupport
// plugin supports *.c from text/x-csrc and *.C from text/x-c++src).
// lastLanguageEquals() helps to improve performance by skipping checks for an already added language.
const auto lastLanguageEquals = [&languages](const ILanguageSupport* lang) {
return !languages.empty() && languages.constLast() == lang;
};
const auto possibleSuffixes = std::equal_range(m_suffixes.cbegin(), m_suffixes.cend(),
fileName.back().toLower(), LastCharacterLess{});
for (auto it = possibleSuffixes.first; it != possibleSuffixes.second; ++it) {
if (!lastLanguageEquals(it->second) && fileName.endsWith(it->first, Qt::CaseInsensitive)) {
languages.push_back(it->second);
const auto lastChar = fileName.back().toLower();
for (auto it = m_suffixes.constFind(lastChar); it != m_suffixes.cend() && it.key() == lastChar; ++it) {
const auto lang = it.value().second;
if (!lastLanguageEquals(lang) && fileName.endsWith(it.value().first, Qt::CaseInsensitive)) {
languages.push_back(lang);
}
}
for (const auto& p : m_literalPatterns) {
if (fileName.compare(p.first, Qt::CaseInsensitive) == 0 && !lastLanguageEquals(p.second)) {
languages.push_back(p.second);
......@@ -146,10 +124,12 @@ void MimeTypeCache::addGlobPattern(const QString& pattern, ILanguageSupport* lan
}
if (pattern.front() == QLatin1Char{'*'}) {
// Check `pattern.size() > 1` because storing an empty string in m_suffixes
// would break LastCharacterLess comparison.
if (pattern.size() > 1 && !containsWildcardCharacter(pattern.cbegin() + 1, pattern.cend())) {
m_suffixes.emplace_back(pattern.mid(1).toLower(), language);
const auto lastChar = pattern.back().toLower();
StringLanguagePair suffix{pattern.mid(1).toLower(), language};
if (!m_suffixes.contains(lastChar, suffix)) {
m_suffixes.insert(lastChar, std::move(suffix));
}
return;
}
} else if (!containsWildcardCharacter(pattern.cbegin(), pattern.cend())) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment