Commit 5d7263ff authored by Pinak Ahuja's avatar Pinak Ahuja
Browse files

Add various iters and fetchTermsStartingWith to PositionDB

parent 6eeb1980
......@@ -23,6 +23,8 @@
#include "postingiterator.h"
#include "singledbtest.h"
#include <QRegularExpression>
using namespace Baloo;
class PositionDBTest : public SingleDBTest
......@@ -80,6 +82,86 @@ private Q_SLOTS:
QCOMPARE(it->docId(), static_cast<quint64>(0));
QVERIFY(it->positions().isEmpty());
}
void testPrefixIter() {
PositionDB db(PositionDB::create(m_txn), m_txn);
db.put("abc", {PositionInfo(1), PositionInfo(4), PositionInfo(5), PositionInfo(9), PositionInfo(11)});
db.put("fir", {PositionInfo(1), PositionInfo(3), PositionInfo(5)});
db.put("fire", {PositionInfo(1), PositionInfo(8), PositionInfo(9)});
db.put("fore", {PositionInfo(2), PositionInfo(3), PositionInfo(5)});
PostingIterator* it = db.prefixIter("fi");
QVERIFY(it);
QVector<quint64> result = {1, 3, 5, 8, 9};
for (quint64 val : result) {
QCOMPARE(it->next(), static_cast<quint64>(val));
QCOMPARE(it->docId(), static_cast<quint64>(val));
QVERIFY(it->positions().isEmpty());
}
}
void testRegExpIter() {
PositionDB db(PositionDB::create(m_txn), m_txn);
db.put("abc", {PositionInfo(1), PositionInfo(4), PositionInfo(5), PositionInfo(9), PositionInfo(11)});
db.put("fir", {PositionInfo(1), PositionInfo(3), PositionInfo(5), PositionInfo(7)});
db.put("fire", {PositionInfo(1), PositionInfo(8)});
db.put("fore", {PositionInfo(2), PositionInfo(3), PositionInfo(5)});
db.put("zib", {PositionInfo(4), PositionInfo(5), PositionInfo(6)});
PostingIterator* it = db.regexpIter(QRegularExpression(".re"), QByteArray("f"));
QVERIFY(it);
QVector<quint64> result = {1, 2, 3, 5, 8};
for (quint64 val : result) {
QCOMPARE(it->next(), static_cast<quint64>(val));
QCOMPARE(it->docId(), static_cast<quint64>(val));
}
// Non existing
it = db.regexpIter(QRegularExpression("dub"), QByteArray("f"));
QVERIFY(it == 0);
}
void testCompIter() {
PositionDB db(PositionDB::create(m_txn), m_txn);
db.put("abc", {PositionInfo(1), PositionInfo(4), PositionInfo(5), PositionInfo(9), PositionInfo(11)});
db.put("R1", {PositionInfo(1), PositionInfo(3), PositionInfo(5), PositionInfo(7)});
db.put("R2", {PositionInfo(1), PositionInfo(8)});
PostingIterator* it = db.compIter("R", "2", PositionDB::GreaterEqual);
QVERIFY(it);
QVector<quint64> result = {1, 2, 3, 5, 8};
for (quint64 val : result) {
QCOMPARE(it->next(), static_cast<quint64>(val));
QCOMPARE(it->docId(), static_cast<quint64>(val));
}
it = db.compIter("R", "2", PositionDB::LessEqual);
QVERIFY(it);
result = {1, 3, 5, 7, 8};
for (quint64 val : result) {
QCOMPARE(it->next(), static_cast<quint64>(val));
QCOMPARE(it->docId(), static_cast<quint64>(val));
}
}
void testFetchTermsStartingWith() {
PositionDB db(PositionDB::create(m_txn), m_txn);
db.put("abc", {PositionInfo(1), PositionInfo(4), PositionInfo(5), PositionInfo(9), PositionInfo(11)});
db.put("fir", {PositionInfo(1), PositionInfo(3), PositionInfo(5), PositionInfo(7)});
db.put("fire", {PositionInfo(1), PositionInfo(8)});
db.put("fore", {PositionInfo(2), PositionInfo(3), PositionInfo(5)});
db.put("zib", {PositionInfo(4), PositionInfo(5), PositionInfo(6)});
QVector<QByteArray> list = {"fir", "fire", "fore"};
QCOMPARE(db.fetchTermsStartingWith("f"), list);
}
};
QTEST_MAIN(PositionDBTest)
......
......@@ -22,8 +22,10 @@
#include "positioncodec.h"
#include "positioninfo.h"
#include "postingiterator.h"
#include "orpostingiterator.h"
#include <QDebug>
#include <QRegularExpression>
using namespace Baloo;
......@@ -116,13 +118,56 @@ void PositionDB::del(const QByteArray& term)
Q_ASSERT_X(rc == 0, "PositionDB::del", mdb_strerror(rc));
}
QVector< QByteArray > PositionDB::fetchTermsStartingWith(const QByteArray& term)
{
MDB_val key;
key.mv_size = term.size();
key.mv_data = static_cast<void*>(const_cast<char*>(term.constData()));
MDB_cursor* cursor;
mdb_cursor_open(m_txn, m_dbi, &cursor);
int rc = mdb_cursor_get(cursor, &key, 0, MDB_SET_RANGE);
if (rc == MDB_NOTFOUND) {
mdb_cursor_close(cursor);
return QVector<QByteArray>();
}
Q_ASSERT_X(rc == 0, "PostingDB::fetchTermsStartingWith", mdb_strerror(rc));
const QByteArray arr = QByteArray::fromRawData(static_cast<char*>(key.mv_data), key.mv_size);
if (!arr.startsWith(term)) {
mdb_cursor_close(cursor);
return QVector<QByteArray>();
}
QVector<QByteArray> terms;
terms << arr;
while (1) {
rc = mdb_cursor_get(cursor, &key, 0, MDB_NEXT);
if (rc == MDB_NOTFOUND) {
break;
}
Q_ASSERT_X(rc == 0, "PostingDB::fetchTermsStartingWith", mdb_strerror(rc));
const QByteArray arr = QByteArray::fromRawData(static_cast<char*>(key.mv_data), key.mv_size);
if (!arr.startsWith(term)) {
break;
}
terms << arr;
}
mdb_cursor_close(cursor);
return terms;
}
//
// Query
//
class DBPositionIterator : public PostingIterator {
public:
DBPositionIterator(char* data, uint size)
DBPositionIterator(void* data, uint size)
: m_pos(-1)
{
PositionCodec codec;
......@@ -175,6 +220,94 @@ PostingIterator* PositionDB::iter(const QByteArray& term)
return new DBPositionIterator(static_cast<char*>(val.mv_data), val.mv_size);
}
template <typename Validator>
PostingIterator* PositionDB::iter(const QByteArray& prefix, Validator validate)
{
Q_ASSERT(!prefix.isEmpty());
MDB_val key;
key.mv_size = prefix.size();
key.mv_data = static_cast<void*>(const_cast<char*>(prefix.constData()));
MDB_cursor* cursor;
mdb_cursor_open(m_txn, m_dbi, &cursor);
QVector<PostingIterator*> termIterators;
MDB_val val;
int rc = mdb_cursor_get(cursor, &key, &val, MDB_SET_RANGE);
if (rc == MDB_NOTFOUND) {
mdb_cursor_close(cursor);
return 0;
}
Q_ASSERT_X(rc == 0, "PostingDB::regexpIter", mdb_strerror(rc));
const QByteArray arr = QByteArray::fromRawData(static_cast<char*>(key.mv_data), key.mv_size);
if (!arr.startsWith(prefix)) {
mdb_cursor_close(cursor);
return 0;
}
if (validate(arr)) {
termIterators << new DBPositionIterator(val.mv_data, val.mv_size);
}
while (1) {
rc = mdb_cursor_get(cursor, &key, &val, MDB_NEXT);
if (rc == MDB_NOTFOUND) {
break;
}
Q_ASSERT_X(rc == 0, "PostingDB::regexpIter", mdb_strerror(rc));
const QByteArray arr = QByteArray::fromRawData(static_cast<char*>(key.mv_data), key.mv_size);
if (!arr.startsWith(prefix)) {
break;
}
if (validate(arr)) {
termIterators << new DBPositionIterator(val.mv_data, val.mv_size);
}
}
if (termIterators.isEmpty()) {
mdb_cursor_close(cursor);
return 0;
}
mdb_cursor_close(cursor);
return new OrPostingIterator(termIterators);
}
PostingIterator* PositionDB::prefixIter(const QByteArray& prefix)
{
auto validate = [] (const QByteArray& arr) {
Q_UNUSED(arr);
return true;
};
return iter(prefix, validate);
}
PostingIterator* PositionDB::regexpIter(const QRegularExpression& regexp, const QByteArray& prefix)
{
int prefixLen = prefix.length();
auto validate = [&regexp, prefixLen] (const QByteArray& arr) {
QString term = QString::fromUtf8(arr.mid(prefixLen));
return regexp.match(term).hasMatch();
};
return iter(prefix, validate);
}
PostingIterator* PositionDB::compIter(const QByteArray& prefix, const QByteArray& comVal, PositionDB::Comparator com)
{
Q_ASSERT(!comVal.isEmpty());
int prefixLen = prefix.length();
auto validate = [prefixLen, &comVal, com] (const QByteArray& arr) {
QByteArray term = QByteArray::fromRawData(arr.constData() + prefixLen, arr.length() - prefixLen);
return ((com == LessEqual && term <= comVal) || (com == GreaterEqual && term >= comVal));
};
return iter(prefix, validate);
}
QMap<QByteArray, QVector<PositionInfo>> PositionDB::toTestMap() const
{
MDB_cursor* cursor;
......
......@@ -46,10 +46,24 @@ public:
QVector<PositionInfo> get(const QByteArray& term);
void del(const QByteArray& term);
QVector<QByteArray> fetchTermsStartingWith(const QByteArray& term);
PostingIterator* iter(const QByteArray& term);
PostingIterator* prefixIter(const QByteArray& term);
PostingIterator* regexpIter(const QRegularExpression& regexp, const QByteArray& prefix);
enum Comparator {
LessEqual,
GreaterEqual
};
PostingIterator* compIter(const QByteArray& prefix, const QByteArray& val, Comparator com);
QMap<QByteArray, QVector<PositionInfo>> toTestMap() const;
private:
template <typename Validator>
PostingIterator* iter(const QByteArray& prefix, Validator validate);
MDB_txn* m_txn;
MDB_dbi m_dbi;
};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment