Commit c8d8abd1 authored by Ahmad Samir's avatar Ahmad Samir
Browse files

UrlFilter::FullUrlRegExp matches more valid urls

Add unittest.

This is based on:
https://datatracker.ietf.org/doc/html/rfc3986

BUG: 452978
FIXED-IN: 22.08
parent 89d69221
......@@ -129,3 +129,8 @@ ecm_mark_nongui_executable(Vt102EmulationTest)
add_test(NAME Vt102EmulationTest COMMAND Vt102EmulationTest)
target_link_libraries(Vt102EmulationTest ${KONSOLE_TEST_LIBS})
include(ECMAddTests)
ecm_add_test(
HotSpotFilterTest.cpp
LINK_LIBRARIES ${KONSOLE_TEST_LIBS}
)
/*
SPDX-FileCopyrightText: 2022 Ahmad Samir <a.samirh78@gmail.com>
SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "HotSpotFilterTest.h"
#include <QTest>
QTEST_GUILESS_MAIN(HotSpotFilterTest)
void HotSpotFilterTest::testUrlFilterRegex_data()
{
QTest::addColumn<QString>("url");
QTest::addColumn<bool>("matchResult");
// A space, \n, or \t before the url to match what happens at runtime,
// i.e. to match "http" but not "foohttp"
QTest::newRow("url_simple") << " https://api.kde.org" << true;
QTest::newRow("url_with_port") << "\nhttps://api.kde.org:2098" << true;
QTest::newRow("url_with_path") << "https://api.kde.org/path/to/somewhere" << true;
QTest::newRow("url_with_query") << "https://user:pass@api.kde.org?somequery=foo" << true;
QTest::newRow("url_with_port_path") << " https://api.kde.org:2098/path/to/somewhere" << true;
QTest::newRow("url_with_user_password") << "\thttps://user:blah@api.kde.org" << true;
QTest::newRow("url_with_user_password_port_fragment") << " https://user:blah@api.kde.org:2098#fragment" << true;
QTest::newRow("url_all_bells") << " https://user:pass@api.kde.org:2098/path/to/somewhere?somequery=foo#fragment" << true;
QTest::newRow("uppercase") << " https://invent.kde.org/frameworks/ktexteditor/-/blob/master/README.md" << true;
QTest::newRow("markup") << " [https://foobar](https://foobar)" << true;
QTest::newRow("bad_url_no_scheme") << QStringLiteral(" www.kde.org") << false;
}
void HotSpotFilterTest::testUrlFilterRegex()
{
QFETCH(QString, url);
QFETCH(bool, matchResult);
const QRegularExpression &regex = Konsole::UrlFilter::FullUrlRegExp;
const QRegularExpressionMatch match = regex.match(url);
QCOMPARE(match.hasMatch(), matchResult);
if (strcmp(QTest::currentDataTag(), "markup") == 0) {
QCOMPARE(match.capturedView(0), u"https://foobar");
} else if (matchResult) {
QCOMPARE(match.capturedView(0), url.trimmed());
}
}
/*
SPDX-FileCopyrightText: 2022 Ahmad Samir <a.samirh78@gmail.com>
SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef HOTSPOTFILTERTEST_H
#define HOTSPOTFILTERTEST_H
#include "filterHotSpots/UrlFilter.h"
class HotSpotFilterTest : public QObject
{
Q_OBJECT
private Q_SLOTS:
void testUrlFilterRegex_data();
void testUrlFilterRegex();
};
#endif // HOTSPOTFILTERTEST_H
......@@ -15,9 +15,51 @@ using namespace Konsole;
// used for finding URLs in the text, especially if they are very general and could match very long
// pieces of text.
// Please be careful when altering them.
// protocolname:// or www. followed by anything other than whitespaces, <, >, ', ", ], !, ), : and comma, and ends before whitespaces, <, >, ', ", ], !, ), :, comma and dot
// I. e. it can end with anything as a part of the URL except .
const QRegularExpression UrlFilter::FullUrlRegExp(QStringLiteral("(www\\.(?!\\.)|[a-z][a-z0-9+.-]*://)[^!,\\s<>'\"\\]\\)\\:]+[^!,\\.\\s<>'\"\\]\\)\\:]"));
// FullUrlRegExp is implemented based on:
// https://datatracker.ietf.org/doc/html/rfc3986
// See above URL for what "unreserved", "pct-encoded" ...etc mean, also
// for the regex used for each part of the url being matched against
// unreserved / pct-encoded / sub-delims
// [a-z0-9\\-._~%!$&'()*+,;=]
// The above string is used in various char[] below
// All () groups are non-capturing (by using "(?:)" notation)
// less bookkeeping on the PCRE engine side
// scheme://
// - Must start with an ASCII letter, preceeded by any non-word character,
// so "http" but not "mhttp"
static const char scheme[] = "(?<=^|\\s|\\W)(?:[a-z][a-z0-9+\\-.]*://)";
// user:password@
static const char userInfo[] =
"(?:"
"[a-z0-9\\-._~%!$&'()*+,;=]+?:?"
"[a-z0-9\\-._~%!$&'()*+,;=]+@"
")?";
static const char host[] = "(?:[a-z0-9\\-._~%!$&'()*+,;=]+)"; // www.foo.bar
static const char port[] = "(?::[0-9]+)?"; // :1234
static const char path[] = "(?:[a-zA-Z0-9\\-._~%!$&'()*+,;=:@/]+)?"; // /path/to/some/place
static const char query[] = "(?:\\?[a-z0-9\\-._~%!$&'()*+,;=:@/]+)?"; // "?somequery=bar"
static const char fragment[] = "(?:#[a-z0-9/?]+)?";
using LS1 = QLatin1String;
/* clang-format off */
const QRegularExpression UrlFilter::FullUrlRegExp(
LS1(scheme)
+ LS1(userInfo)
+ LS1(host)
+ LS1(port)
+ LS1(path)
+ LS1(query)
+ LS1(fragment)
);
/* clang-format on */
/////////////////////////////////////////////
// email address:
// [word chars, dots or dashes]@[word chars, dots or dashes].[word chars]
......
......@@ -9,11 +9,14 @@
#define URLFILTER_H
#include "RegExpFilter.h"
#include "konsoleprivate_export.h"
namespace Konsole
{
/** A filter which matches URLs in blocks of text */
class UrlFilter : public RegExpFilter
// Exported for unittests
class KONSOLEPRIVATE_EXPORT UrlFilter : public RegExpFilter
{
public:
UrlFilter();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment