Verified Commit 68facf36 authored by Daniel Vrátil's avatar Daniel Vrátil 🤖
Browse files

ImapSet: optimize the set before serialization

Summary:
Client code usually generates the ImapSet by simply passing it a vector of
IDs or just by calling ImapSet::add() in a for loop for individual IDs. This
creates potentially massive amount of intervals. This change ads a method
that will optimize the ImapSet by merging adjacent or overlapping intervals.

I ran into this trying to move about 28'000 emails to Trash in KMail. The IMAP
resource calls ImapSet::add() in a for loop to add each Item to be moved into
the set. This just created 28'000 single-UID intervals and the resulting request
string was so long that my Courier IMAP server just dropped the connection, so
the IMAP resource reconnected and tried to send the same massive request again
and again and again...

By optimizing the set the string size is reduced from nearly 156 kilobytes
down to less than 4 kilobytes.

Reviewers: #kde_pim, vkrause

Reviewed By: #kde_pim, vkrause

Subscribers: kde-pim

Tags: #kde_pim

Differential Revision: https://phabricator.kde.org/D28944
parent 4da8833c
......@@ -25,6 +25,11 @@
using namespace KIMAP;
QByteArray operator""_ba(const char *str, std::size_t len)
{
return QByteArray{str, static_cast<int>(len)};
}
class ImapSetTest : public QObject
{
Q_OBJECT
......@@ -69,6 +74,100 @@ private Q_SLOTS:
//qDebug() << "Expects" << imapSet << "got" << ImapSet::fromImapSequenceSet( byteArray );
QCOMPARE(ImapSet::fromImapSequenceSet(byteArray), imapSet);
}
void testOptimize_data()
{
QTest::addColumn<ImapSet>("imapSet");
QTest::addColumn<QByteArray>("originalString");
QTest::addColumn<QByteArray>("expectedString");
{
ImapSet imapSet;
for (int i = 1; i <= 10; ++i) {
imapSet.add(i);
}
QTest::newRow("Neighbouring numbers") << imapSet << "1,2,3,4,5,6,7,8,9,10"_ba << "1:10"_ba;
}
{
ImapSet imapSet;
imapSet.add(ImapInterval{1, 3});
imapSet.add(ImapInterval{5, 7});
QTest::newRow("Neighbouring intervals with a gap") << imapSet << "1:3,5:7"_ba << "1:3,5:7"_ba;
}
{
ImapSet imapSet;
for (int i : { 5, 8, 3, 1, 9, 2, 7, 4, 6 }) {
imapSet.add(i);
}
QTest::newRow("Random order") << imapSet << "5,8,3,1,9,2,7,4,6"_ba << "1:9"_ba;
}
{
ImapSet imapSet;
imapSet.add(ImapInterval{1, 3});
imapSet.add(ImapInterval{2, 4});
QTest::newRow("Overlapping") << imapSet << "1:3,2:4"_ba << "1:4"_ba;
}
{
ImapSet imapSet;
imapSet.add(ImapInterval{2, 4});
imapSet.add(ImapInterval{1, 3});
imapSet.add(4);
imapSet.add(ImapInterval{7, 8});
imapSet.add(ImapInterval{8, 9});
QTest::newRow("Multiple overlapping with a gap") << imapSet << "2:4,1:3,4,7:8,8:9"_ba << "1:4,7:9"_ba;
}
{
ImapSet imapSet;
imapSet.add(5);
imapSet.add(8);
imapSet.add(10);
imapSet.add(ImapInterval{0, 20});
QTest::newRow("Overlapping multiple intervals") << imapSet << "5,8,10,0:20"_ba << "0:20"_ba;
}
{
ImapSet imapSet;
imapSet.add(1);
imapSet.add(ImapInterval{3, 5});
imapSet.add(ImapInterval{4, 0});
QTest::newRow("Open end overlap") << imapSet << "1,3:5,4:*"_ba << "1,3:*"_ba;
}
{
ImapSet imapSet;
imapSet.add(ImapInterval{1, 4});
imapSet.add(3);
QTest::newRow("Value within interval") << imapSet << "1:4,3"_ba << "1:4"_ba;
}
{
ImapSet imapSet;
imapSet.add(ImapInterval{1, 0});
imapSet.add(ImapInterval{3, 0});
imapSet.add(5);
QTest::newRow("Multiple open end intervals") << imapSet << "1:*,3:*,5"_ba << "1:*"_ba;
}
{
ImapSet imapSet;
for (ImapSet::Id id : {1, 2, 3, 5, 6, 8, 9, 10, 15, 16, 19, 20, 21, 23}) {
imapSet.add(id);
}
QTest::newRow("Merge single values") << imapSet << "1,2,3,5,6,8,9,10,15,16,19,20,21,23"_ba
<< "1:3,5:6,8:10,15:16,19:21,23"_ba;
}
}
void testOptimize()
{
QFETCH(ImapSet, imapSet);
QFETCH(QByteArray, originalString);
QFETCH(QByteArray, expectedString);
QCOMPARE(imapSet.intervals().size(), originalString.count(",") + 1);
QCOMPARE(imapSet.toImapSequenceSet(), originalString);
imapSet.optimize();
QCOMPARE(imapSet.intervals().size(), expectedString.count(",") + 1);
QCOMPARE(imapSet.toImapSequenceSet(), expectedString);
}
};
QTEST_GUILESS_MAIN(ImapSetTest)
......
......@@ -102,6 +102,7 @@ void CopyJob::doStart()
{
Q_D(CopyJob);
d->set.optimize();
QByteArray parameters = d->set.toImapSequenceSet() + ' ';
parameters += '\"' + KIMAP::encodeImapFolderName(d->mailBox.toUtf8()) + '\"';
......
......@@ -133,7 +133,7 @@ FetchJob::~FetchJob()
void FetchJob::setSequenceSet(const ImapSet &set)
{
Q_D(FetchJob);
Q_ASSERT(!set.toImapSequenceSet().trimmed().isEmpty());
Q_ASSERT(!set.isEmpty());
d->set = set;
}
......@@ -189,6 +189,7 @@ void FetchJob::doStart()
{
Q_D(FetchJob);
d->set.optimize();
QByteArray parameters = d->set.toImapSequenceSet() + ' ';
Q_ASSERT(!parameters.trimmed().isEmpty());
......
......@@ -316,6 +316,38 @@ bool ImapSet::isEmpty() const
return d->intervals.isEmpty();
}
void ImapSet::optimize()
{
// There's nothing to optimize if we have fewer than 2 intervals
if (d->intervals.size() < 2) {
return;
}
// Sort the intervals in ascending order by their beginning value
std::sort(d->intervals.begin(), d->intervals.end(),
[](const ImapInterval &lhs, const ImapInterval &rhs) {
return lhs.begin() < rhs.begin();
});
auto it = d->intervals.begin();
while (it != d->intervals.end() && it != std::prev(d->intervals.end())) {
auto next = std::next(it);
// +1 so that we also merge neighbouring intervals, e.g. 1:2,3:4 -> 1:4
if (it->hasDefinedEnd() && it->end() + 1 >= next->begin()) {
next->setBegin(it->begin());
if (next->hasDefinedEnd() && it->end() > next->end()) {
next->setEnd(it->end());
}
it = d->intervals.erase(it);
} else if (!it->hasDefinedEnd()) {
// We can eat up all the remaining intervals
it = d->intervals.erase(next, d->intervals.end());
} else {
++it;
}
}
}
QDebug &operator<<(QDebug &d, const ImapInterval &interval)
{
d << interval.toImapSequence();
......
......@@ -224,6 +224,15 @@ public:
*/
bool isEmpty() const;
/**
* Optimizes the ImapSet by sorting and merging overlapping intervals.
*
* Normally you shouldn't need to call this method. KIMAP will make sure
* to opimize the ImapSet before serializing it to string and sending it
* to the IMAP server.
*/
void optimize();
private:
class Private;
QSharedDataPointer<Private> d;
......
......@@ -107,6 +107,7 @@ void MoveJob::doStart()
{
Q_D(MoveJob);
d->set.optimize();
QByteArray parameters = d->set.toImapSequenceSet() + ' ';
parameters += '\"' + KIMAP::encodeImapFolderName(d->mailBox.toUtf8()) + '\"';
......
......@@ -231,7 +231,9 @@ Term::Term(Term::SequenceSearchKey key, const ImapSet &set)
case SequenceNumber:
break;
}
d->command += " " + set.toImapSequenceSet();
auto optimizedSet = set;
optimizedSet.optimize();
d->command += " " + optimizedSet.toImapSequenceSet();
}
Term::Term(const Term &other)
......
......@@ -163,6 +163,7 @@ void StoreJob::doStart()
return;
}
d->set.optimize();
QByteArray parameters = d->set.toImapSequenceSet() + ' ';
if (!d->flags.isEmpty()) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment