Commit 5bc696b5 authored by David Edmundson's avatar David Edmundson
Browse files

Sanitise notification HTML

Summary:
Qt labels support a HTML subset, using a completely internal parser in
QTextDocument.

The Notification spec support an even smaller subset of notification
elements.

It's important to strip out irrelevant tags that could potentially load
remote information without user interaction, such as img
src or even <b style="background:url...

But we want to maintain the basic rich text formatting of bold and
italics and links.

This parser iterates reads the XML, copying only permissable tags and
attributes.

A future obvious improvement would be to merge the original regular
expressions into this stream parser, but I'm trying to minimise
breakages to get this into 5.12.

Test Plan:
Moved code into it's own class for easy unit testing
Tried a bunch of things, including what the old regexes were doing

Also ran notify send with a few options to make sure things worked

Reviewers: #plasma, fvogt

Reviewed By: fvogt

Subscribers: aacid, fvogt, plasma-devel

Tags: #plasma

Differential Revision: https://phabricator.kde.org/D10188
parent 265ab959
......@@ -4,6 +4,7 @@ set(notifications_engine_SRCS
notificationsengine.cpp
notificationservice.cpp
notificationaction.cpp
notificationsanitizer.cpp
)
qt5_add_dbus_adaptor( notifications_engine_SRCS org.freedesktop.Notifications.xml notificationsengine.h NotificationsEngine )
......@@ -26,3 +27,10 @@ kcoreaddons_desktop_to_json(plasma_engine_notifications plasma-dataengine-notifi
install(TARGETS plasma_engine_notifications DESTINATION ${KDE_INSTALL_PLUGINDIR}/plasma/dataengine)
install(FILES plasma-dataengine-notifications.desktop DESTINATION ${KDE_INSTALL_KSERVICES5DIR} )
install(FILES notifications.operations DESTINATION ${PLASMA_DATA_INSTALL_DIR}/services)
#unit test
add_executable(notification_test notificationsanitizer.cpp notifications_test.cpp)
target_link_libraries(notification_test Qt5::Test Qt5::Core)
ecm_mark_as_test(notification_test)
#include <QtTest>
#include <QObject>
#include <QDebug>
#include "notificationsanitizer.h"
class NotificationTest : public QObject
{
Q_OBJECT
public:
NotificationTest() {}
private Q_SLOTS:
void parse_data();
void parse();
};
void NotificationTest::parse_data()
{
QTest::addColumn<QString>("messageIn");
QTest::addColumn<QString>("expectedOut");
QTest::newRow("basic no HTML") << "I am a notification" << "I am a notification";
QTest::newRow("whitespace") << " I am a notification " << "I am a notification";
QTest::newRow("basic html") << "I am <b>the</b> notification" << "I am <b>the</b> notification";
QTest::newRow("nested html") << "I am <i><b>the</b></i> notification" << "I am <i><b>the</b></i> notification";
QTest::newRow("no extra tags") << "I am <blink>the</blink> notification" << "I am the notification";
QTest::newRow("no extra attrs") << "I am <b style=\"font-weight:20\">the</b> notification" << "I am <b>the</b> notification";
QTest::newRow("newlines") << "I am\nthe\nnotification" << "I am<br/>the<br/>notification";
QTest::newRow("multinewlines") << "I am\n\nthe\n\n\nnotification" << "I am<br/>the<br/>notification";
QTest::newRow("amp") << "me&you" << "me&amp;you";
QTest::newRow("double escape") << "foo &amp; &lt;bar&gt;" << "foo &amp; &lt;bar&gt;";
QTest::newRow("quotes") << "&apos;foo&apos;" << "'foo'";//as label can't handle this normally valid entity
QTest::newRow("image normal") << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"/> and more text" << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"/> and more text";
//this input is technically wrong, so the output is also wrong, but QTextHtmlParser does the "right" thing
QTest::newRow("image normal no close") << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"> and more text" << "This is <img src=\"file:://foo/boo.png\" alt=\"cheese\"> and more text</img>";
QTest::newRow("image remote URL") << "This is <img src=\"http://foo.com/boo.png\" alt=\"cheese\" /> and more text" << "This is <img alt=\"cheese\"/> and more text";
//more bad formatted options. To some extent actual output doesn't matter. Garbage in, garbabe out.
//the important thing is that it doesn't contain anything that could be parsed as the remote URL
QTest::newRow("image remote URL no close") << "This is <img src=\"http://foo.com/boo.png>\" alt=\"cheese\"> and more text" << "This is <img alt=\"cheese\"> and more text</img>";
QTest::newRow("image remote URL double open") << "This is <<img src=\"http://foo.com/boo.png>\" and more text" << "This is ";
QTest::newRow("image remote URL no entitiy close") << "This is <img src=\"http://foo.com/boo.png\" and more text" << "This is ";
QTest::newRow("image remote URL space in element name") << "This is < img src=\"http://foo.com/boo.png\" alt=\"cheese\" /> and more text" << "This is ";
QTest::newRow("link") << "This is a link <a href=\"http://foo.com/boo\"/> and more text" << "This is a link <a href=\"http://foo.com/boo\"/> and more text";
}
void NotificationTest::parse()
{
QFETCH(QString, messageIn);
QFETCH(QString, expectedOut);
const QString out = NotificationSanitizer::parse(messageIn);
expectedOut = "<?xml version=\"1.0\"?><html>" + expectedOut + "</html>\n";
QCOMPARE(out, expectedOut);
}
QTEST_GUILESS_MAIN(NotificationTest)
#include "notifications_test.moc"
/*
* Copyright (C) 2017 David Edmundson <davidedmundson@kde.org>
*
* This program is free software you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "notificationsanitizer.h"
#include <QXmlStreamReader>
#include <QXmlStreamWriter>
#include <QRegularExpression>
#include <QDebug>
#include <QUrl>
QString NotificationSanitizer::parse(const QString &text)
{
// replace all \ns with <br/>
QString t = text;
t.replace(QLatin1String("\n"), QStringLiteral("<br/>"));
// Now remove all inner whitespace (\ns are already <br/>s)
t = t.simplified();
// Finally, check if we don't have multiple <br/>s following,
// can happen for example when "\n \n" is sent, this replaces
// all <br/>s in succsession with just one
t.replace(QRegularExpression(QStringLiteral("<br/>\\s*<br/>(\\s|<br/>)*")), QLatin1String("<br/>"));
// This fancy RegExp escapes every occurence of & since QtQuick Text will blatantly cut off
// text where it finds a stray ampersand.
// Only &{apos, quot, gt, lt, amp}; as well as &#123 character references will be allowed
t.replace(QRegularExpression(QStringLiteral("&(?!(?:apos|quot|[gl]t|amp);|#)")), QLatin1String("&amp;"));
QXmlStreamReader r(QStringLiteral("<html>") + t + QStringLiteral("</html>"));
QString result;
QXmlStreamWriter out(&result);
const QVector<QString> allowedTags = {"b", "i", "u", "img", "a", "html", "br"};
out.writeStartDocument();
while (!r.atEnd()) {
r.readNext();
if (r.tokenType() == QXmlStreamReader::StartElement) {
const QString name = r.name().toString();
if (!allowedTags.contains(name)) {
continue;
}
out.writeStartElement(name);
if (name == QLatin1String("img")) {
auto src = r.attributes().value("src").toString();
auto alt = r.attributes().value("alt").toString();
const QUrl url(src);
if (url.isLocalFile()) {
out.writeAttribute(QStringLiteral("src"), src);
} else {
//image denied for security reasons! Do not copy the image src here!
}
out.writeAttribute(QStringLiteral("alt"), alt);
}
if (name == QLatin1String("a")) {
out.writeAttribute(QStringLiteral("href"), r.attributes().value("href").toString());
}
}
if (r.tokenType() == QXmlStreamReader::EndElement) {
const QString name = r.name().toString();
if (!allowedTags.contains(name)) {
continue;
}
out.writeEndElement();
}
if (r.tokenType() == QXmlStreamReader::Characters) {
const auto text = r.text().toString();
out.writeCharacters(text); //this auto escapes chars -> HTML entities
}
}
out.writeEndDocument();
if (r.hasError()) {
qWarning() << "Notification to send to backend contains invalid XML: "
<< r.errorString() << "line" << r.lineNumber()
<< "col" << r.columnNumber();
}
// The Text.StyledText format handles only html3.2 stuff and &apos; is html4 stuff
// so we need to replace it here otherwise it will not render at all.
result = result.replace(QLatin1String("&apos;"), QChar('\''));
return result;
}
/*
* Copyright (C) 2017 David Edmundson <davidedmundson@kde.org>
*
* This program is free software you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include <QString>
namespace NotificationSanitizer
{
/*
* This turns generic random text of either plain text of any degree of faux-HTML into HTML allowed
* in the notification spec namely:
* a, img, b, i, u and br
* All other tags and attributes are stripped
* Whitespace is stripped and converted to <br/>
* Double newlines are compressed
*
* Image src is only copied when referring to a local file
*/
QString parse(const QString &in);
}
......@@ -20,6 +20,7 @@
#include "notificationsengine.h"
#include "notificationservice.h"
#include "notificationsadaptor.h"
#include "notificationsanitizer.h"
#include <QDebug>
#include <KConfigGroup>
......@@ -261,23 +262,7 @@ uint NotificationsEngine::Notify(const QString &app_name, uint replaces_id,
const QString source = QStringLiteral("notification %1").arg(id);
QString bodyFinal = (partOf == 0 ? body : _body);
// First trim whitespace from beginning and end
bodyFinal = bodyFinal.trimmed();
// Now replace all \ns with <br/>
bodyFinal = bodyFinal.replace(QLatin1String("\n"), QLatin1String("<br/>"));
// Now remove all inner whitespace (\ns are already <br/>s
bodyFinal = bodyFinal.simplified();
// Finally, check if we don't have multiple <br/>s following,
// can happen for example when "\n \n" is sent, this replaces
// all <br/>s in succsession with just one
bodyFinal.replace(QRegularExpression(QStringLiteral("<br/>\\s*<br/>(\\s|<br/>)*")), QLatin1String("<br/>"));
// This fancy RegExp escapes every occurence of & since QtQuick Text will blatantly cut off
// text where it finds a stray ampersand.
// Only &{apos, quot, gt, lt, amp}; as well as &#123 character references will be allowed
bodyFinal.replace(QRegularExpression(QStringLiteral("&(?!(?:apos|quot|[gl]t|amp);|#)")), QLatin1String("&amp;"));
// The Text.StyledText format handles only html3.2 stuff and &apos; is html4 stuff
// so we need to replace it here otherwise it will not render at all.
bodyFinal.replace(QLatin1String("&apos;"), QChar('\''));
bodyFinal = NotificationSanitizer::parse(bodyFinal);
Plasma::DataEngine::Data notificationData;
notificationData.insert(QStringLiteral("id"), QString::number(id));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment