Commit e7a170f3 authored by Laurent Montel's avatar Laurent Montel 😁
Browse files

Test excludeExtraHeader. Need to improve regexp.

CCBUG: 415254
parent 556e1219
......@@ -242,57 +242,6 @@ QString sigStatusToString(const QGpgME::Protocol *cryptProto, int status_code, G
return result;
}
/** Checks whether @p str contains external references. To be precise,
we only check whether @p str contains 'xxx="http[s]:' where xxx is
not href. Obfuscated external references are ignored on purpose.
*/
bool containsExternalReferences(const QString &str, const QString &extraHead)
{
const bool hasBaseInHeader = extraHead.contains(QLatin1String(
"<base href=\""), Qt::CaseInsensitive);
if (hasBaseInHeader && (str.contains(QLatin1String("href=\"/"), Qt::CaseInsensitive)
|| str.contains(QLatin1String("<img src=\"/"), Qt::CaseInsensitive))) {
return true;
}
int httpPos = str.indexOf(QLatin1String("\"http:"), Qt::CaseInsensitive);
int httpsPos = str.indexOf(QLatin1String("\"https:"), Qt::CaseInsensitive);
while (httpPos >= 0 || httpsPos >= 0) {
// pos = index of next occurrence of "http: or "https: whichever comes first
int pos = (httpPos < httpsPos)
? ((httpPos >= 0) ? httpPos : httpsPos)
: ((httpsPos >= 0) ? httpsPos : httpPos);
// look backwards for "href"
if (pos > 5) {
int hrefPos = str.lastIndexOf(QLatin1String("href"), pos - 5, Qt::CaseInsensitive);
// if no 'href' is found or the distance between 'href' and '"http[s]:'
// is larger than 7 (7 is the distance in 'href = "http[s]:') then
// we assume that we have found an external reference
if ((hrefPos == -1) || (pos - hrefPos > 7)) {
// HTML messages created by KMail itself for now contain the following:
// <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
// Make sure not to show an external references warning for this string
int dtdPos = str.indexOf(QLatin1String(
"http://www.w3.org/TR/html4/loose.dtd"), pos + 1);
if (dtdPos != (pos + 1)) {
return true;
}
}
}
// find next occurrence of "http: or "https:
if (pos == httpPos) {
httpPos = str.indexOf(QLatin1String("\"http:"), httpPos + 6, Qt::CaseInsensitive);
} else {
httpsPos = str.indexOf(QLatin1String("\"https:"), httpsPos + 7, Qt::CaseInsensitive);
}
}
if (str.contains(QRegularExpression(QLatin1String("<img.*src=http:/"), QRegularExpression::CaseInsensitiveOption))
|| str.contains(QRegularExpression(QLatin1String("<img.*src=https:/"), QRegularExpression::CaseInsensitiveOption))) {
return true;
}
return false;
}
// FIXME this used to go through the full webkit parser to extract the body and head blocks
// until we have that back, at least attempt to fix some of the damage
// yes, "parsing" HTML with regexps is very very wrong, but it's still better than not filtering
......@@ -439,7 +388,7 @@ void DefaultRendererPrivate::render(const HtmlMessagePart::Ptr &mp, HtmlWriter *
}
block.setProperty("containsExternalReferences",
containsExternalReferences(bodyText, extraHead));
Util::containsExternalReferences(bodyText, extraHead));
c.insert(QStringLiteral("content"), bodyText);
}
......
add_definitions( -DMESSAGEVIEWER_UTIL_DATA_DIR="${CMAKE_CURRENT_SOURCE_DIR}/data" )
macro(add_messageviewer_utils_unittest _source)
get_filename_component(_name ${_source} NAME_WE)
ecm_add_test(${_source}
......
<html><head><meta name="qrichtext" content="1" /><style type="text/css">
p, li { white-space: pre-wrap; }
</style></head><body style=" font-family:'Sans Serif'; font-size:15pt; font-weight:400; font-style:normal;">
<div style="position: relative"><img width="1" height="1" src="https://gridinbound.blob.core.windows.net/gic/g.png?h=1206d90b19dd492fa6a41594dabc9fe7" >
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; -qt-user-state:0;">sdfsdf<span style=" font-weight:600;">sdfsdfs</span><span style=" font-weight:600; font-style:italic;">dfsdfs</span><span style=" font-weight:600; font-style:italic; text-decoration: underline;">dfsdf</span></p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; ">&nbsp;</p></body></html>
<html><head><meta name="qrichtext" content="1" /><style type="text/css">
p, li { white-space: pre-wrap; }
</style></head><body style=" font-family:'Sans Serif'; font-size:15pt; font-weight:400; font-style:normal;">
<div style="position: relative"><img width="1" height="1" src=https://gridinbound.blob.core.windows.net/gic/g.png?h=1206d90b19dd492fa6a41594dabc9fe >
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; -qt-user-state:0;">sdfsdf<span style=" font-weight:600;">sdfsdfs</span><span style=" font-weight:600; font-style:italic;">dfsdfs</span><span style=" font-weight:600; font-style:italic; text-decoration: underline;">dfsdf</span></p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; ">&nbsp;</p></body></html>
<html><head><meta name="qrichtext" content="1" /><style type="text/css">
p, li { white-space: pre-wrap; }
</style></head><body style=" font-family:'Sans Serif'; font-size:15pt; font-weight:400; font-style:normal;">
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; -qt-user-state:0;">sdfsdf<span style=" font-weight:600;">sdfsdfs</span><span style=" font-weight:600; font-style:italic;">dfsdfs</span><span style=" font-weight:600; font-style:italic; text-decoration: underline;">dfsdf</span></p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; ">&nbsp;</p></body></html>
<html><head><meta name="qrichtext" content="1" />
<img src="cid:parn-num">Some text goes here blah blah src=http://example.com</img>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; ">&nbsp;</p></body></html>
<html><head><meta name="qrichtext" content="1" />
<img src="cid:parn-num">Some text goes here blah blah src=https://example.com</img>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px; ">&nbsp;</p></body></html>
......@@ -44,3 +44,32 @@ void MessageViewerUtilsTest::shouldExcludeHeader()
QFETCH(bool, exclude);
QCOMPARE(MessageViewer::Util::excludeExtraHeader(header), exclude);
}
void MessageViewerUtilsTest::shouldContainsExternalReferences_data()
{
QTest::addColumn<QString>("filename");
QTest::addColumn<QString>("extraHead");
QTest::addColumn<bool>("hasExternalReference");
QTest::newRow("noimage.txt") << QStringLiteral("noimage.txt") << QString() << false;
QTest::newRow("image.txt") << QStringLiteral("image.txt") << QString() << true;
QTest::newRow("image2.txt") << QStringLiteral("image2.txt") << QString() << true;
QTest::newRow("noimage2.txt") << QStringLiteral("noimage2.txt") << QString() << false;
QTest::newRow("noimage3.txt") << QStringLiteral("noimage3.txt") << QString() << false;
}
void MessageViewerUtilsTest::shouldContainsExternalReferences()
{
QFETCH(QString, filename);
QFETCH(QString, extraHead);
QFETCH(bool, hasExternalReference);
const QString curPath = QStringLiteral(MESSAGEVIEWER_UTIL_DATA_DIR "/");
QFile file(curPath + filename);
QVERIFY(file.open(QIODevice::ReadOnly));
const QString html = QString::fromLatin1(file.readAll());
QEXPECT_FAIL("noimage3.txt", "Need to Investigate it", Continue);
QEXPECT_FAIL("noimage2.txt", "Need to Investigate it", Continue);
QCOMPARE(MessageViewer::Util::containsExternalReferences(html, extraHead), hasExternalReference);
}
......@@ -29,6 +29,9 @@ public:
private Q_SLOTS:
void shouldExcludeHeader_data();
void shouldExcludeHeader();
void shouldContainsExternalReferences_data();
void shouldContainsExternalReferences();
};
#endif // MESSAGEVIEWERUTILSTEST_H
......@@ -75,6 +75,55 @@
#include <QRegularExpression>
using namespace MessageViewer;
/** Checks whether @p str contains external references. To be precise,
we only check whether @p str contains 'xxx="http[s]:' where xxx is
not href. Obfuscated external references are ignored on purpose.
*/
bool Util::containsExternalReferences(const QString &str, const QString &extraHead)
{
const bool hasBaseInHeader = extraHead.contains(QLatin1String(
"<base href=\""), Qt::CaseInsensitive);
if (hasBaseInHeader && (str.contains(QLatin1String("href=\"/"), Qt::CaseInsensitive)
|| str.contains(QLatin1String("<img src=\"/"), Qt::CaseInsensitive))) {
return true;
}
int httpPos = str.indexOf(QLatin1String("\"http:"), Qt::CaseInsensitive);
int httpsPos = str.indexOf(QLatin1String("\"https:"), Qt::CaseInsensitive);
while (httpPos >= 0 || httpsPos >= 0) {
// pos = index of next occurrence of "http: or "https: whichever comes first
int pos = (httpPos < httpsPos)
? ((httpPos >= 0) ? httpPos : httpsPos)
: ((httpsPos >= 0) ? httpsPos : httpPos);
// look backwards for "href"
if (pos > 5) {
int hrefPos = str.lastIndexOf(QLatin1String("href"), pos - 5, Qt::CaseInsensitive);
// if no 'href' is found or the distance between 'href' and '"http[s]:'
// is larger than 7 (7 is the distance in 'href = "http[s]:') then
// we assume that we have found an external reference
if ((hrefPos == -1) || (pos - hrefPos > 7)) {
// HTML messages created by KMail itself for now contain the following:
// <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
// Make sure not to show an external references warning for this string
int dtdPos = str.indexOf(QLatin1String(
"http://www.w3.org/TR/html4/loose.dtd"), pos + 1);
if (dtdPos != (pos + 1)) {
return true;
}
}
}
// find next occurrence of "http: or "https:
if (pos == httpPos) {
httpPos = str.indexOf(QLatin1String("\"http:"), httpPos + 6, Qt::CaseInsensitive);
} else {
httpsPos = str.indexOf(QLatin1String("\"https:"), httpsPos + 7, Qt::CaseInsensitive);
}
}
const bool containsReg = str.contains(QRegularExpression(QLatin1String("<img.*src=\"?https?:/.*>"), QRegularExpression::CaseInsensitiveOption));
//qDebug() << " str.contains " << containsReg;
return containsReg;
}
bool Util::checkOverwrite(const QUrl &url, QWidget *w)
{
......@@ -608,3 +657,4 @@ void Util::readGravatarConfig()
Gravatar::GravatarCache::self()->clear();
}
}
......@@ -58,6 +58,7 @@ namespace MessageViewer {
*/
namespace Util {
// return true if we should proceed, false if we should abort
Q_REQUIRED_RESULT MESSAGEVIEWER_EXPORT bool containsExternalReferences(const QString &str, const QString &extraHead);
Q_REQUIRED_RESULT bool MESSAGEVIEWER_EXPORT checkOverwrite(const QUrl &url, QWidget *w);
Q_REQUIRED_RESULT MESSAGEVIEWER_EXPORT bool saveMessageInMboxAndGetUrl(QUrl &url, const Akonadi::Item::List &retrievedMsgs, QWidget *parent, bool appendMessages = false);
Q_REQUIRED_RESULT MESSAGEVIEWER_EXPORT bool saveMessageInMbox(const Akonadi::Item::List &retrievedMsgs, QWidget *parent, bool appendMessages = false);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment