Commit 8d3c7108 authored by Waqar Ahmed's avatar Waqar Ahmed Committed by Christoph Cullmann
Browse files

Update fuzzy matching code

parent 49fcaeb1
......@@ -57,7 +57,7 @@ protected:
const auto idx = sourceModel()->index(sourceRow, 0, sourceParent);
const QString string = idx.data().toString();
const QStringView actionName = string.splitRef(QLatin1Char(':')).at(1);
const bool res = kfts::fuzzy_match_sequential(m_pattern, actionName, score);
const bool res = kfts::fuzzy_match(m_pattern, actionName.trimmed(), score);
sourceModel()->setData(idx, score, CommandModel::Score);
return res;
}
......@@ -86,7 +86,7 @@ public:
const auto strs = index.data().toString().split(QLatin1Char(':'));
QString str = strs.at(1);
const QString nameColor = option.palette.color(QPalette::Link).name();
kfts::to_fuzzy_matched_display_string(m_filterString, str, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
kfts::to_scored_fuzzy_matched_display_string(m_filterString, str, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
const QString component = QStringLiteral("<span style=\"color: gray;\">") + strs.at(0) + QStringLiteral(": </span>");
......
......@@ -144,9 +144,10 @@ public:
const QString nameColor = option.palette.color(QPalette::Link).name();
if (mode == FilterMode::FilterByName) {
kfts::to_fuzzy_matched_display_string(m_filterString, name, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
kfts::to_scored_fuzzy_matched_display_string(
m_filterString, name, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
} else if (mode == FilterMode::FilterByPath) {
kfts::to_fuzzy_matched_display_string(m_filterString, path, QStringLiteral("<b>"), QStringLiteral("</b>"));
kfts::to_scored_fuzzy_matched_display_string(m_filterString, path, QStringLiteral("<b>"), QStringLiteral("</b>"));
} else {
// check if there's a / separtion in filter string
// if there is, we use the last part to highlight the
......@@ -155,11 +156,12 @@ public:
if (pos > -1) {
++pos;
auto pattern = m_filterString.midRef(pos);
kfts::to_fuzzy_matched_display_string(pattern, name, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
kfts::to_scored_fuzzy_matched_display_string(pattern, name, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
} else {
kfts::to_fuzzy_matched_display_string(m_filterString, name, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
kfts::to_scored_fuzzy_matched_display_string(
m_filterString, name, QStringLiteral("<b style=\"color:%1;\">").arg(nameColor), QStringLiteral("</b>"));
}
kfts::to_fuzzy_matched_display_string(m_filterString, path, QStringLiteral("<b>"), QStringLiteral("</b>"));
kfts::to_scored_fuzzy_matched_display_string(m_filterString, path, QStringLiteral("<b>"), QStringLiteral("</b>"));
}
const auto pathFontsize = option.font.pointSize();
......
......@@ -30,13 +30,8 @@ Q_DECL_UNUSED static bool fuzzy_match_simple(const QStringView pattern, const QS
* results this function won't be as effective.
*/
Q_DECL_UNUSED static bool fuzzy_match(const QStringView pattern, const QStringView str, int &outScore);
Q_DECL_UNUSED static bool fuzzy_match(const QStringView pattern, const QStringView str, int &outScore, uint8_t *matches, int maxMatches);
Q_DECL_UNUSED static bool fuzzy_match(const QStringView pattern, const QStringView str, int &outScore, uint8_t *matches);
/**
* @brief This is a special case function which doesn't score separator matches higher than sequential matches.
* This is currently used in Kate's command bar where the string items are usually space separated names.
*/
Q_DECL_UNUSED static bool fuzzy_match_sequential(const QStringView pattern, const QStringView str, int &outScore);
/**
* @brief get string for display in treeview / listview. This should be used from style delegate.
* For example: with @a pattern = "kate", @a str = "kateapp" and @htmlTag = "<b>
......@@ -47,6 +42,8 @@ Q_DECL_UNUSED static bool fuzzy_match_sequential(const QStringView pattern, cons
* of interval container
*/
Q_DECL_UNUSED static QString to_fuzzy_matched_display_string(const QStringView pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose);
Q_DECL_UNUSED static QString
to_scored_fuzzy_matched_display_string(const QStringView pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose);
}
namespace kfts
......@@ -54,6 +51,11 @@ namespace kfts
// Forward declarations for "private" implementation
namespace fuzzy_internal
{
static inline constexpr QChar toLower(QChar c)
{
return c.isLower() ? c : c.toLower();
}
static bool fuzzy_match_recursive(QStringView::const_iterator pattern,
QStringView::const_iterator str,
int &outScore,
......@@ -62,10 +64,9 @@ static bool fuzzy_match_recursive(QStringView::const_iterator pattern,
const QStringView::const_iterator patternEnd,
uint8_t const *srcMatches,
uint8_t *newMatches,
int maxMatches,
int nextMatch,
int &recursionCount,
int seqBonus = 15);
int &totalMatches,
int &recursionCount);
}
// Public interface
......@@ -83,10 +84,10 @@ static bool fuzzy_match_simple(const QStringView pattern, const QStringView str)
static bool fuzzy_match(const QStringView pattern, const QStringView str, int &outScore)
{
uint8_t matches[256];
return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
return fuzzy_match(pattern, str, outScore, matches);
}
static bool fuzzy_match(const QStringView pattern, const QStringView str, int &outScore, uint8_t *matches, int maxMatches)
static bool fuzzy_match(const QStringView pattern, const QStringView str, int &outScore, uint8_t *matches)
{
int recursionCount = 0;
......@@ -94,21 +95,9 @@ static bool fuzzy_match(const QStringView pattern, const QStringView str, int &o
auto patternIt = pattern.cbegin();
const auto patternEnd = pattern.cend();
const auto strEnd = str.cend();
int totalMatches = 0;
return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount);
}
static bool fuzzy_match_sequential(const QStringView pattern, const QStringView str, int &outScore)
{
int recursionCount = 0;
uint8_t matches[256];
auto maxMatches = sizeof(matches);
auto strIt = str.cbegin();
auto patternIt = pattern.cbegin();
const auto patternEnd = pattern.cend();
const auto strEnd = str.cend();
return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, maxMatches, 0, recursionCount, 40);
return fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, outScore, strIt, strEnd, patternEnd, nullptr, matches, 0, totalMatches, recursionCount);
}
// Private implementation
......@@ -120,13 +109,15 @@ static bool fuzzy_internal::fuzzy_match_recursive(QStringView::const_iterator pa
const QStringView::const_iterator patternEnd,
const uint8_t *srcMatches,
uint8_t *matches,
int maxMatches,
int nextMatch,
int &recursionCount,
int seqBonus)
int &totalMatches,
int &recursionCount)
{
// Count recursions
static constexpr int recursionLimit = 10;
// max number of matches allowed, this should be enough
static constexpr int maxMatches = 256;
// Count recursions
++recursionCount;
if (recursionCount >= recursionLimit) {
return false;
......@@ -139,43 +130,38 @@ static bool fuzzy_internal::fuzzy_match_recursive(QStringView::const_iterator pa
// Recursion params
bool recursiveMatch = false;
uint8_t bestRecursiveMatches[256];
uint8_t bestRecursiveMatches[maxMatches];
int bestRecursiveScore = 0;
// Loop through pattern and str looking for a match
bool first_match = true;
bool firstMatch = true;
QChar currentPatternChar = toLower(*pattern);
// Are we matching in sequence start from start?
bool matchingInSequence = true;
while (pattern != patternEnd && str != strEnd) {
// Found match
if (pattern->toLower() == str->toLower()) {
if (currentPatternChar == toLower(*str)) {
// Supplied matches buffer was too short
if (nextMatch >= maxMatches) {
return false;
}
// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches) {
if (firstMatch && srcMatches) {
memcpy(matches, srcMatches, nextMatch);
first_match = false;
firstMatch = false;
}
// Recursive call that "skips" this match
uint8_t recursiveMatches[256];
int recursiveScore;
auto strNextChar = std::next(str);
if (fuzzy_match_recursive(pattern,
strNextChar,
recursiveScore,
strBegin,
strEnd,
patternEnd,
matches,
recursiveMatches,
sizeof(recursiveMatches),
nextMatch,
recursionCount)) {
uint8_t recursiveMatches[maxMatches];
int recursiveScore = 0;
const auto strNextChar = std::next(str);
if (!matchingInSequence
&& fuzzy_match_recursive(
pattern, strNextChar, recursiveScore, strBegin, strEnd, patternEnd, matches, recursiveMatches, nextMatch, totalMatches, recursionCount)) {
// Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
memcpy(bestRecursiveMatches, recursiveMatches, 256);
memcpy(bestRecursiveMatches, recursiveMatches, maxMatches);
bestRecursiveScore = recursiveScore;
}
recursiveMatch = true;
......@@ -184,77 +170,89 @@ static bool fuzzy_internal::fuzzy_match_recursive(QStringView::const_iterator pa
// Advance
matches[nextMatch++] = (uint8_t)(std::distance(strBegin, str));
++pattern;
currentPatternChar = toLower(*pattern);
} else {
matchingInSequence = false;
}
++str;
}
// Determine if full pattern was matched
bool matched = pattern == patternEnd ? true : false;
const bool matched = pattern == patternEnd;
// Calculate score
if (matched) {
int sequential_bonus = seqBonus; // bonus for adjacent matches
static constexpr int separator_bonus = 30; // bonus if match occurs after a separator
static constexpr int camel_bonus = 30; // bonus if match is uppercase and prev is lower
static constexpr int first_letter_bonus = 15; // bonus if the first letter is matched
static constexpr int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
static constexpr int max_leading_letter_penalty = -15; // maximum penalty for leading letters
static constexpr int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
// Iterate str to end
while (str != strEnd) {
++str;
}
static constexpr int sequentialBonus = 25;
static constexpr int separatorBonus = 25; // bonus if match occurs after a separator
static constexpr int camelBonus = 25; // bonus if match is uppercase and prev is lower
static constexpr int firstLetterBonus = 15; // bonus if the first letter is matched
static constexpr int leadingLetterPenalty = -5; // penalty applied for every letter in str before the first match
static constexpr int maxLeadingLetterPenalty = -15; // maximum penalty for leading letters
static constexpr int unmatchedLetterPenalty = -1; // penalty for every letter that doesn't matter
static constexpr int nonBeginSequenceBonus = 10;
// Initialize score
outScore = 100;
// Apply leading letter penalty
int penalty = leading_letter_penalty * matches[0];
if (penalty < max_leading_letter_penalty) {
penalty = max_leading_letter_penalty;
}
const int penalty = std::max(leadingLetterPenalty * matches[0], maxLeadingLetterPenalty);
outScore += penalty;
// Apply unmatched penalty
const int unmatched = (int)(std::distance(strBegin, str)) - nextMatch;
outScore += unmatched_letter_penalty * unmatched;
const int unmatched = (int)(std::distance(strBegin, strEnd)) - nextMatch;
outScore += unmatchedLetterPenalty * unmatched;
uint8_t seqs[maxMatches] = {0};
// Apply ordering bonuses
int j = 0;
for (int i = 0; i < nextMatch; ++i) {
uint8_t currIdx = matches[i];
const uint8_t currIdx = matches[i];
if (i > 0) {
uint8_t prevIdx = matches[i - 1];
const uint8_t prevIdx = matches[i - 1];
// Sequential
if (currIdx == (prevIdx + 1)) {
outScore += sequential_bonus;
if (j > 0 && seqs[j - 1] == i - 1) {
outScore += sequentialBonus;
seqs[j++] = i;
} else {
// In sequence, but from first char
outScore += nonBeginSequenceBonus;
}
}
}
// Check for bonuses based on neighbor character value
if (currIdx > 0) {
// Camel case
QChar neighbor = *(strBegin + currIdx - 1);
QChar curr = *(strBegin + currIdx);
if (neighbor.isLower() && curr.isUpper()) {
outScore += camel_bonus;
const QChar neighbor = *(strBegin + currIdx - 1);
const QChar curr = *(strBegin + currIdx);
// if camel case bonus, then not snake / separator.
// This prevents double bonuses
const bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
if (!neighborSeparator && neighbor.isLower() && curr.isUpper()) {
outScore += camelBonus - currIdx;
continue;
}
// Separator
bool neighborSeparator = neighbor == QLatin1Char('_') || neighbor == QLatin1Char(' ');
if (neighborSeparator) {
outScore += separator_bonus;
outScore += separatorBonus - currIdx;
}
} else {
// First letter
outScore += first_letter_bonus;
// First letter match has the highest score
outScore += firstLetterBonus + separatorBonus;
seqs[j++] = i;
}
}
}
totalMatches = nextMatch;
// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
// Recursive score is better than "this"
......@@ -278,7 +276,7 @@ static QString to_fuzzy_matched_display_string(const QStringView pattern, QStrin
*/
int j = 0;
for (int i = 0; i < str.size() && j < pattern.size(); ++i) {
if (str.at(i).toLower() == pattern.at(j).toLower()) {
if (fuzzy_internal::toLower(str.at(i)) == fuzzy_internal::toLower(pattern.at(j))) {
str.replace(i, 1, htmlTag + str.at(i) + htmlTagClose);
i += htmlTag.size() + htmlTagClose.size();
++j;
......@@ -286,6 +284,39 @@ static QString to_fuzzy_matched_display_string(const QStringView pattern, QStrin
}
return str;
}
static QString to_scored_fuzzy_matched_display_string(const QStringView pattern, QString &str, const QString &htmlTag, const QString &htmlTagClose)
{
if (pattern.isEmpty()) {
return str;
}
uint8_t matches[256];
bool matched = false;
int totalMatches = 0;
{
int score = 0;
int recursionCount = 0;
auto strIt = str.cbegin();
auto patternIt = pattern.cbegin();
const auto patternEnd = pattern.cend();
const auto strEnd = str.cend();
matched = fuzzy_internal::fuzzy_match_recursive(patternIt, strIt, score, strIt, strEnd, patternEnd, nullptr, matches, 0, totalMatches, recursionCount);
}
int offset = 0;
for (int i = 0; i < totalMatches; ++i) {
str.insert(matches[i] + offset, htmlTag);
offset += htmlTag.size();
str.insert(matches[i] + offset + 1, htmlTagClose);
offset += htmlTagClose.size();
}
return str;
}
} // namespace kfts
#endif // KFTS_FUZZY_MATCH_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment