Commit cc3274db authored by Ilia Kats's avatar Ilia Kats Committed by Christoph Cullmann
Browse files

use a binary search instead of a trie to avoid the dependency

parent 7ba6e27b
include(ECMQtDeclareLoggingCategory)
add_subdirectory(hat-trie)
find_package(Qt5Test ${QT_MIN_VERSION} QUIET REQUIRED)
add_library(latexcompletionplugin MODULE)
target_compile_definitions(latexcompletionplugin PRIVATE TRANSLATION_DOMAIN="latexcompletionplugin")
ecm_qt_declare_logging_category(
latexcompletionplugin
HEADER logging.h
IDENTIFIER LATEXCOMPLETION
CATEGORY_NAME "katelatexcompletionplugin"
)
kde_target_enable_exceptions(latexcompletionplugin PRIVATE)
target_link_libraries(latexcompletionplugin PRIVATE KF5::I18n KF5::TextEditor tsl::hat_trie)
target_link_libraries(latexcompletionplugin PRIVATE KF5::I18n KF5::TextEditor)
target_sources(
latexcompletionplugin
......@@ -23,3 +15,7 @@ target_sources(
# ensure we are able to load plugins pre-install, too, directories must match!
set_target_properties(latexcompletionplugin PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/ktexteditor")
install(TARGETS latexcompletionplugin DESTINATION ${KDE_INSTALL_PLUGINDIR}/ktexteditor)
if (BUILD_TESTING)
add_subdirectory(autotests)
endif()
include(ECMMarkAsTest)
add_executable(latexcompletion_test testcompletiontable.cpp)
target_include_directories(latexcompletion_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)
add_test(NAME plugin-latexunicodecompletion_test COMMAND latexcompletion_test)
target_link_libraries(latexcompletion_test Qt5::Core Qt5::Test)
ecm_mark_as_test(latexcompletion_test)
/*
SPDX-FileCopyrightText: 2021 Ilia Kats <ilia-kats@gmx.net>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
// this is just to confirm that string sorting in Python is the same as sorting with Qt
#include "completiontable.h"
#include <QObject>
#include <QTest>
class LatexCompletionTableTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void testSorting()
{
for (int i = 0; i < completiontable.size() - 1; ++i) {
QVERIFY(completiontable[i].completion < completiontable[i + 1].completion);
}
}
};
QTEST_MAIN(LatexCompletionTableTest)
#include "testcompletiontable.moc"
......@@ -5,9 +5,12 @@
*/
#include "completionmodel.h"
#include "completiontrie.h"
#include "completiontable.h"
#include "logging.h"
#include <algorithm>
#include <iterator>
#include <QIcon>
#include <QRegularExpression>
......@@ -20,26 +23,26 @@ LatexCompletionModel::LatexCompletionModel(QObject *parent)
{
setHasGroups(false);
}
void LatexCompletionModel::completionInvoked(KTextEditor::View *view,
const KTextEditor::Range &range,
KTextEditor::CodeCompletionModel::InvocationType invocationType)
{
Q_UNUSED(invocationType);
beginResetModel();
m_matches.clear();
m_matches.first = m_matches.second = -1;
auto word = view->document()->text(range);
if (!word.isEmpty() && word[0] == QLatin1Char('\\')) {
try {
auto prefixrange = completiontrie.equal_prefix_range(word.toStdString());
for (auto it = prefixrange.first; it != prefixrange.second; ++it) {
m_matches.push_back(QPair(QString::fromStdString(it.key()), &(*it)));
}
} catch (const std::exception &e) {
qCCritical(LATEXCOMPLETION) << "caught exception while generating completions for " << word;
qCCritical(LATEXCOMPLETION) << e.what();
} catch (...) {
qCCritical(LATEXCOMPLETION) << "caught exception while generating completions for " << word;
}
auto beginit = completiontable.constBegin();
auto endit = completiontable.constEnd();
auto prefixrangestart = std::lower_bound(beginit, endit, word, [](const Completion &a, const QString &b) -> bool {
return a.completion.startsWith(b) ? false : a.completion < b;
});
auto prefixrangeend = std::upper_bound(beginit, endit, word, [](const QString &a, const Completion &b) -> bool {
return b.completion.startsWith(a) ? false : a < b.completion;
});
m_matches.first = std::distance(beginit, prefixrangestart);
m_matches.second = std::distance(beginit, prefixrangeend);
}
endResetModel();
}
......@@ -80,7 +83,7 @@ int LatexCompletionModel::rowCount(const QModelIndex &parent) const
} else if (parent.parent().isValid()) {
return 0; // Completion-items have no children
} else {
return m_matches.size();
return m_matches.second - m_matches.first;
}
}
......@@ -97,7 +100,7 @@ QModelIndex LatexCompletionModel::index(int row, int column, const QModelIndex &
return QModelIndex();
}
if (row < 0 || row >= m_matches.size() || column < 0 || column >= ColumnCount) {
if (row < 0 || row >= m_matches.second - m_matches.first || column < 0 || column >= ColumnCount) {
return QModelIndex();
}
......@@ -129,20 +132,20 @@ QVariant LatexCompletionModel::data(const QModelIndex &index, int role) const
}
}
if (index.isValid() && m_matches.size()) {
auto symbol = m_matches[index.row()];
if (index.isValid() && m_matches.second - m_matches.first > 0) {
const Completion &completion = completiontable[m_matches.first + index.row()];
if (role == IsExpandable)
return true; // if it's not expandable, the description will often be cut off
// because apprarently the ItemSelected role is not taken into account
// when determining the completion widget width. So expanding is
// the only way to make sure that the complete description is available.
else if (role == ItemSelected || role == ExpandingWidget)
return QStringLiteral("<table><tr><td>%1</td><td>%2</td></tr></table>").arg(symbol.second->codepoint, symbol.second->name);
return QStringLiteral("<table><tr><td>%1</td><td>%2</td></tr></table>").arg(completion.codepoint, completion.name);
else if (role == Qt::DisplayRole) {
if (index.column() == Name)
return symbol.first;
return completion.completion;
else if (index.column() == Postfix)
return symbol.second->chars;
return completion.chars;
} else if (index.column() == Icon && role == Qt::DecorationRole) {
static const QIcon icon(QIcon::fromTheme(QStringLiteral("texcompiler")).pixmap(QSize(16, 16)));
return icon;
......
......@@ -41,6 +41,6 @@ public:
QVariant data(const QModelIndex &index, int role) const override;
private:
QVector<QPair<QString, const Completion *>> m_matches;
QPair<int, int> m_matches = {0, 0};
};
#endif
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -6,7 +6,7 @@
JULIA_UNICODE_DOCUMENTATION_URL = "https://docs.julialang.org/en/v1/manual/unicode-input/"
CONTAINER_ID = "documenter-page"
OUTFNAME = "completiontrie.h"
OUTFNAME = "completiontable.h"
from urllib import request
from html.parser import HTMLParser
......@@ -74,16 +74,16 @@ completionchars = set()
wordchars = set(list(ascii_letters) + list(digits) + ["_"])
with open(OUTFNAME, "w") as out:
out.write("""\
#include <tsl/htrie_map.h>
#include <QString>
#include <QRegularExpression>
struct Completion {
QString completion;
QString codepoint;
QString chars;
QString name;
};
static const tsl::htrie_map<char, Completion> completiontrie({
static const QVector<Completion> completiontable({
""")
for i, completion in enumerate(parser.table):
......@@ -93,10 +93,10 @@ static const tsl::htrie_map<char, Completion> completiontrie({
latexsym = completion[2].replace("\\", "\\\\")
if i > 0:
out.write(",")
out.write(f"{{\n\"{latexsym}\",\n{{\n"
out.write(f"{{\n QStringLiteral(\"{latexsym}\"),\n"
f" QStringLiteral(\"{completion[0]}\"),\n"
f" QStringLiteral(u\"{completion[1]}\"),\n"
f" QStringLiteral(\"{completion[3]}\")\n}}\n}}\n")
f" QStringLiteral(\"{completion[3]}\")\n}}\n")
out.write("""\
});
""")
......
language: cpp
before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get update -y -qq; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get install -y -qq libboost-test-dev; fi
compiler:
- clang
- gcc
dist: trusty
os:
- linux
- osx
script:
- cd tests
- mkdir build
- cd build
- cmake ..
- make
- ./tsl_hat_trie_tests
cmake_minimum_required(VERSION 3.1)
project(tsl_hat_trie)
add_library(tsl_hat_trie INTERFACE)
# Use tsl::hat_trie as target, more consistent with other libraries conventions (Boost, Qt, ...)
add_library(tsl::hat_trie ALIAS tsl_hat_trie)
target_include_directories(tsl_hat_trie INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include")
target_sources(tsl_hat_trie INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/array-hash/array_growth_policy.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/array-hash/array_hash.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/array-hash/array_map.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/array-hash/array_set.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/htrie_hash.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/htrie_map.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/tsl/htrie_set.h")
if(${CMAKE_VERSION} VERSION_GREATER "3.7")
# Only available since version 3.8
target_compile_features(tsl_hat_trie INTERFACE cxx_std_11)
endif()
MIT License
Copyright (c) 2017 Thibaut
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
This diff is collapsed.
os: Visual Studio 2015
platform:
- Win32
- x64
configuration:
- Debug
- Release
build_script:
- set BOOST_ROOT=C:\Libraries\boost_1_62_0
- if %PLATFORM% == Win32 set BOOST_LIBRARYDIR=C:\Libraries\boost_1_62_0\lib32-msvc-14.0
- if %PLATFORM% == x64 set BOOST_LIBRARYDIR=C:\Libraries\boost_1_62_0\lib64-msvc-14.0
- cd tests
- mkdir build
- cd build
- if %PLATFORM% == Win32 cmake .. -G"Visual Studio 14 2015"
- if %PLATFORM% == x64 cmake .. -G"Visual Studio 14 2015 Win64"
- cmake --build . --config %CONFIGURATION%
test_script:
- set PATH=%PATH%;%BOOST_LIBRARYDIR%
- .\%CONFIGURATION%\tsl_hat_trie_tests.exe
This diff is collapsed.
/**
* MIT License
*
* Copyright (c) 2017 Tessil
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSL_ARRAY_GROWTH_POLICY_H
#define TSL_ARRAY_GROWTH_POLICY_H
#include <algorithm>
#include <array>
#include <climits>
#include <cmath>
#include <cstddef>
#include <iterator>
#include <limits>
#include <ratio>
#include <stdexcept>
namespace tsl {
namespace ah {
/**
* Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two. It allows
* the table to use a mask operation instead of a modulo operation to map a hash to a bucket.
*
* GrowthFactor must be a power of two >= 2.
*/
template<std::size_t GrowthFactor>
class power_of_two_growth_policy {
public:
/**
* Called on the hash table creation and on rehash. The number of buckets for the table is passed in parameter.
* This number is a minimum, the policy may update this value with a higher value if needed (but not lower).
*
* If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
* bucket_for_hash must always return 0 in this case.
*/
explicit power_of_two_growth_policy(std::size_t& min_bucket_count_in_out) {
if(min_bucket_count_in_out > max_bucket_count()) {
throw std::length_error("The hash table exceeds its maxmimum size.");
}
if(min_bucket_count_in_out > 0) {
min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
m_mask = min_bucket_count_in_out - 1;
}
else {
m_mask = 0;
}
}
/**
* Return the bucket [0, bucket_count()) to which the hash belongs.
* If bucket_count() is 0, it must always return 0.
*/
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
return hash & m_mask;
}
/**
* Return the number of buckets that should be used on next growth.
*/
std::size_t next_bucket_count() const {
if((m_mask + 1) > max_bucket_count() / GrowthFactor) {
throw std::length_error("The hash table exceeds its maxmimum size.");
}
return (m_mask + 1) * GrowthFactor;
}
/**
* Return the maximum number of buckets supported by the policy.
*/
std::size_t max_bucket_count() const {
// Largest power of two.
return (std::numeric_limits<std::size_t>::max() / 2) + 1;
}
/**
* Reset the growth policy as if it was created with a bucket count of 0.
* After a clear, the policy must always return 0 when bucket_for_hash is called.
*/
void clear() noexcept {
m_mask = 0;
}
private:
static std::size_t round_up_to_power_of_two(std::size_t value) {
if(is_power_of_two(value)) {
return value;
}
if(value == 0) {
return 1;
}
--value;
for(std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
value |= value >> i;
}
return value + 1;
}
static constexpr bool is_power_of_two(std::size_t value) {
return value != 0 && (value & (value - 1)) == 0;
}
protected:
static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2, "GrowthFactor must be a power of two >= 2.");
std::size_t m_mask;
};
/**
* Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
* to a bucket. Slower but it can be useful if you want a slower growth.
*/
template<class GrowthFactor = std::ratio<3, 2>>
class mod_growth_policy {
public:
explicit mod_growth_policy(std::size_t& min_bucket_count_in_out) {
if(min_bucket_count_in_out > max_bucket_count()) {
throw std::length_error("The hash table exceeds its maxmimum size.");
}
if(min_bucket_count_in_out > 0) {
m_mod = min_bucket_count_in_out;
}
else {
m_mod = 1;
}
}
std::size_t bucket_for_hash(std::size_t hash) const noexcept {
return hash % m_mod;
}
std::size_t next_bucket_count() const {
if(m_mod == max_bucket_count()) {
throw std::length_error("The hash table exceeds its maxmimum size.");
}
const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
if(!std::isnormal(next_bucket_count)) {
throw std::length_error("The hash table exceeds its maxmimum size.");
}
if(next_bucket_count > double(max_bucket_count())) {
return max_bucket_count();
}
else {
return std::size_t(next_bucket_count);
}
}
std::size_t max_bucket_count() const {
return MAX_BUCKET_COUNT;
}
void clear() noexcept {
m_mod = 1;
}
private:
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR = 1.0 * GrowthFactor::num / GrowthFactor::den;
static const std::size_t MAX_BUCKET_COUNT =
std::size_t(double(
std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR
));
static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
std::size_t m_mod;
};
namespace detail {
static constexpr const std::array<std::size_t, 40> PRIMES = {{
1ul, 5ul, 17ul, 29ul, 37ul, 53ul, 67ul, 79ul, 97ul, 131ul, 193ul, 257ul, 389ul, 521ul, 769ul, 1031ul,
1543ul, 2053ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul,
402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
}};
template<unsigned int IPrime>
static constexpr std::size_t mod(std::size_t hash) { return hash % PRIMES[IPrime]; }
// MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo as the
// compiler can optimize the modulo code better with a constant known at the compilation.
static constexpr const std::array<std::size_t(*)(std::size_t), 40> MOD_PRIME = {{
&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>, &mod<9>, &mod<10>,
&mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>,
&mod<21>, &mod<22>, &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>,
&mod<31>, &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37> , &mod<38>, &mod<39>
}};
}
/**
* Grow the hash table by using prime numbers as bucket count. Slower than tsl::ah::power_of_two_growth_policy in
* general but will probably distribute the values around better in the buckets with a poor hash function.
*
* To allow the compiler to optimize the modulo operation, a lookup table is used with constant primes numbers.
*
* With a switch the code would look like:
* \code
* switch(iprime) { // iprime is the current prime of the hash table
* case 0: hash % 5ul;
* break;
* case 1: hash % 17ul;
* break;
* case 2: hash % 29ul;
* break;
* ...
* }
* \endcode
*
* Due to the constant variable in the modulo the compiler is able to optimize the operation
* by a series of multiplications, substractions and shifts.
*
* The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64 bits environement.
*/
class prime_growth_policy {
public:
explicit prime_growth_policy(std::size_t& min_bucket_count_in_out) {
auto it_prime = std::lower_bound(detail::PRIMES.begin(),
detail::PRIMES.end(), min_bucket_count_in_out);
if(it_prime == detail::PRIMES.end()) {
throw std::length_error("The hash table exceeds its maxmimum size.");
}
m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
if(min_bucket_count_in_out > 0) {
min_bucket_count_in_out = *it_prime;
}
else {
min_bucket_count_in_out = 0;
}
}