Commit 19ce5add authored by Laurent Montel's avatar Laurent Montel 😁

Add class for adblock

parent f3bf54e2
set(adblocklibprivate_SRCS
adblockmanager.cpp)
adblockmanager.cpp
adblocksearchtree.cpp
adblockrule.cpp
qzregexp.cpp
qztools.cpp
)
add_library(adblocklibprivate ${adblocklibprivate_SRCS})
generate_export_header(adblocklibprivate BASE_NAME adblocklib)
......@@ -7,6 +12,8 @@ generate_export_header(adblocklibprivate BASE_NAME adblocklib)
target_link_libraries(adblocklibprivate
PRIVATE
Qt5::Core
Qt5::WebEngine
Qt5::WebEngineWidgets
)
set_target_properties(adblocklibprivate
......
/*
Copyright (c) 2016 Montel Laurent <montel@kde.org>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2014 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
#include "adblockmatcher.h"
#include "adblockmanager.h"
#include "adblockrule.h"
#include "adblocksubscription.h"
AdBlockMatcher::AdBlockMatcher(AdBlockManager* manager)
: QObject(manager)
, m_manager(manager)
{
connect(manager, SIGNAL(enabledChanged(bool)), this, SLOT(enabledChanged(bool)));
}
AdBlockMatcher::~AdBlockMatcher()
{
clear();
}
const AdBlockRule* AdBlockMatcher::match(const QWebEngineUrlRequestInfo &request, const QString &urlDomain, const QString &urlString) const
{
// Exception rules
if (m_networkExceptionTree.find(request, urlDomain, urlString))
return 0;
int count = m_networkExceptionRules.count();
for (int i = 0; i < count; ++i) {
const AdBlockRule* rule = m_networkExceptionRules.at(i);
if (rule->networkMatch(request, urlDomain, urlString))
return 0;
}
// Block rules
if (const AdBlockRule* rule = m_networkBlockTree.find(request, urlDomain, urlString))
return rule;
count = m_networkBlockRules.count();
for (int i = 0; i < count; ++i) {
const AdBlockRule* rule = m_networkBlockRules.at(i);
if (rule->networkMatch(request, urlDomain, urlString))
return rule;
}
return 0;
}
bool AdBlockMatcher::adBlockDisabledForUrl(const QUrl &url) const
{
int count = m_documentRules.count();
for (int i = 0; i < count; ++i)
if (m_documentRules.at(i)->urlMatch(url))
return true;
return false;
}
bool AdBlockMatcher::elemHideDisabledForUrl(const QUrl &url) const
{
if (adBlockDisabledForUrl(url))
return true;
int count = m_elemhideRules.count();
for (int i = 0; i < count; ++i)
if (m_elemhideRules.at(i)->urlMatch(url))
return true;
return false;
}
QString AdBlockMatcher::elementHidingRules() const
{
return m_elementHidingRules;
}
QString AdBlockMatcher::elementHidingRulesForDomain(const QString &domain) const
{
QString rules;
int addedRulesCount = 0;
int count = m_domainRestrictedCssRules.count();
for (int i = 0; i < count; ++i) {
const AdBlockRule* rule = m_domainRestrictedCssRules.at(i);
if (!rule->matchDomain(domain))
continue;
if (Q_UNLIKELY(addedRulesCount == 1000)) {
rules.append(rule->cssSelector());
rules.append(QL1S("{display:none !important;}\n"));
addedRulesCount = 0;
}
else {
rules.append(rule->cssSelector() + QLatin1Char(','));
addedRulesCount++;
}
}
if (addedRulesCount != 0) {
rules = rules.left(rules.size() - 1);
rules.append(QL1S("{display:none !important;}\n"));
}
return rules;
}
void AdBlockMatcher::update()
{
clear();
QHash<QString, const AdBlockRule*> cssRulesHash;
QVector<const AdBlockRule*> exceptionCssRules;
foreach (AdBlockSubscription* subscription, m_manager->subscriptions()) {
foreach (const AdBlockRule* rule, subscription->allRules()) {
// Don't add internally disabled rules to cache
if (rule->isInternalDisabled())
continue;
if (rule->isCssRule()) {
// We will add only enabled css rules to cache, because there is no enabled/disabled
// check on match. They are directly embedded to pages.
if (!rule->isEnabled())
continue;
if (rule->isException())
exceptionCssRules.append(rule);
else
cssRulesHash.insert(rule->cssSelector(), rule);
}
else if (rule->isDocument()) {
m_documentRules.append(rule);
}
else if (rule->isElemhide()) {
m_elemhideRules.append(rule);
}
else if (rule->isException()) {
if (!m_networkExceptionTree.add(rule))
m_networkExceptionRules.append(rule);
}
else {
if (!m_networkBlockTree.add(rule))
m_networkBlockRules.append(rule);
}
}
}
foreach (const AdBlockRule* rule, exceptionCssRules) {
const AdBlockRule* originalRule = cssRulesHash.value(rule->cssSelector());
// If we don't have this selector, the exception does nothing
if (!originalRule)
continue;
AdBlockRule* copiedRule = originalRule->copy();
copiedRule->m_options |= AdBlockRule::DomainRestrictedOption;
copiedRule->m_blockedDomains.append(rule->m_allowedDomains);
cssRulesHash[rule->cssSelector()] = copiedRule;
m_createdRules.append(copiedRule);
}
// Apparently, excessive amount of selectors for one CSS rule is not what WebKit likes.
// (In my testings, 4931 is the number that makes it crash)
// So let's split it by 1000 selectors...
int hidingRulesCount = 0;
QHashIterator<QString, const AdBlockRule*> it(cssRulesHash);
while (it.hasNext()) {
it.next();
const AdBlockRule* rule = it.value();
if (rule->isDomainRestricted()) {
m_domainRestrictedCssRules.append(rule);
}
else if (Q_UNLIKELY(hidingRulesCount == 1000)) {
m_elementHidingRules.append(rule->cssSelector());
m_elementHidingRules.append(QL1S("{display:none !important;} "));
hidingRulesCount = 0;
}
else {
m_elementHidingRules.append(rule->cssSelector() + QLatin1Char(','));
hidingRulesCount++;
}
}
if (hidingRulesCount != 0) {
m_elementHidingRules = m_elementHidingRules.left(m_elementHidingRules.size() - 1);
m_elementHidingRules.append(QL1S("{display:none !important;} "));
}
}
void AdBlockMatcher::clear()
{
m_networkExceptionTree.clear();
m_networkExceptionRules.clear();
m_networkBlockTree.clear();
m_networkBlockRules.clear();
m_domainRestrictedCssRules.clear();
m_elementHidingRules.clear();
m_documentRules.clear();
m_elemhideRules.clear();
qDeleteAll(m_createdRules);
m_createdRules.clear();
}
void AdBlockMatcher::enabledChanged(bool enabled)
{
if (enabled)
update();
else
clear();
}
/*
Copyright (c) 2016 Montel Laurent <montel@kde.org>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2014 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
#ifndef ADBLOCKMATCHER_H
#define ADBLOCKMATCHER_H
#include <QUrl>
#include <QObject>
#include "adblocksearchtree.h"
class QWebEngineUrlRequestInfo;
class AdBlockManager;
class AdBlockMatcher : public QObject
{
Q_OBJECT
public:
explicit AdBlockMatcher(AdBlockManager* manager);
~AdBlockMatcher();
const AdBlockRule* match(const QWebEngineUrlRequestInfo &request, const QString &urlDomain, const QString &urlString) const;
bool adBlockDisabledForUrl(const QUrl &url) const;
bool elemHideDisabledForUrl(const QUrl &url) const;
QString elementHidingRules() const;
QString elementHidingRulesForDomain(const QString &domain) const;
public Q_SLOTS:
void update();
void clear();
private Q_SLOTS:
void enabledChanged(bool enabled);
private:
AdBlockManager* m_manager;
QVector<AdBlockRule*> m_createdRules;
QVector<const AdBlockRule*> m_networkExceptionRules;
QVector<const AdBlockRule*> m_networkBlockRules;
QVector<const AdBlockRule*> m_domainRestrictedCssRules;
QVector<const AdBlockRule*> m_documentRules;
QVector<const AdBlockRule*> m_elemhideRules;
QString m_elementHidingRules;
AdBlockSearchTree m_networkBlockTree;
AdBlockSearchTree m_networkExceptionTree;
};
#endif // ADBLOCKMATCHER_H
/*
Copyright (c) 2016 Montel Laurent <montel@kde.org>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2010-2014 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
/**
* Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the Benjamin Meyer nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef ADBLOCKRULE_H
#define ADBLOCKRULE_H
#include <QObject>
#include <QStringList>
#include <QStringMatcher>
#include "qzregexp.h"
class QUrl;
class QWebEngineUrlRequestInfo;
class AdBlockSubscription;
class AdBlockRule
{
Q_DISABLE_COPY(AdBlockRule)
public:
AdBlockRule(const QString &filter = QString(), AdBlockSubscription* subscription = Q_NULLPTR);
~AdBlockRule();
AdBlockRule* copy() const;
AdBlockSubscription* subscription() const;
void setSubscription(AdBlockSubscription* subscription);
QString filter() const;
void setFilter(const QString &filter);
bool isCssRule() const;
QString cssSelector() const;
bool isDocument() const;
bool isElemhide() const;
bool isDomainRestricted() const;
bool isException() const;
bool isComment() const;
bool isEnabled() const;
void setEnabled(bool enabled);
bool isSlow() const;
bool isInternalDisabled() const;
bool urlMatch(const QUrl &url) const;
bool networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const;
bool matchDomain(const QString &domain) const;
bool matchThirdParty(const QWebEngineUrlRequestInfo &request) const;
bool matchObject(const QWebEngineUrlRequestInfo &request) const;
bool matchSubdocument(const QWebEngineUrlRequestInfo &request) const;
bool matchXmlHttpRequest(const QWebEngineUrlRequestInfo &request) const;
bool matchImage(const QWebEngineUrlRequestInfo &request) const;
bool matchScript(const QWebEngineUrlRequestInfo &request) const;
bool matchStyleSheet(const QWebEngineUrlRequestInfo &request) const;
bool matchObjectSubrequest(const QWebEngineUrlRequestInfo &request) const;
protected:
bool stringMatch(const QString &domain, const QString &encodedUrl) const;
bool isMatchingDomain(const QString &domain, const QString &filter) const;
bool isMatchingRegExpStrings(const QString &url) const;
QStringList parseRegExpFilter(const QString &filter) const;
private:
enum RuleType {
CssRule = 0,
DomainMatchRule = 1,
RegExpMatchRule = 2,
StringEndsMatchRule = 3,
StringContainsMatchRule = 4,
Invalid = 5
};
enum RuleOption {
DomainRestrictedOption = 1,
ThirdPartyOption = 2,
ObjectOption = 4,
SubdocumentOption = 8,
XMLHttpRequestOption = 16,
ImageOption = 32,
ScriptOption = 64,
StyleSheetOption = 128,
ObjectSubrequestOption = 256,
// Exception only options
DocumentOption = 1024,
ElementHideOption = 2048
};
Q_DECLARE_FLAGS(RuleOptions, RuleOption)
inline bool hasOption(const RuleOption &opt) const;
inline bool hasException(const RuleOption &opt) const;
inline void setOption(const RuleOption &opt);
inline void setException(const RuleOption &opt, bool on);
void parseFilter();
void parseDomains(const QString &domains, const QChar &separator);
bool filterIsOnlyDomain(const QString &filter) const;
bool filterIsOnlyEndsMatch(const QString &filter) const;
QString createRegExpFromFilter(const QString &filter) const;
QList<QStringMatcher> createStringMatchers(const QStringList &filters) const;
AdBlockSubscription* m_subscription;
RuleType m_type;
RuleOptions m_options;
RuleOptions m_exceptions;
// Original rule filter
QString m_filter;
// Parsed rule for string matching (CSS Selector for CSS rules)
QString m_matchString;
// Case sensitivity for string matching
Qt::CaseSensitivity m_caseSensitivity;
bool m_isEnabled;
bool m_isException;
bool m_isInternalDisabled;
QStringList m_allowedDomains;
QStringList m_blockedDomains;
struct RegExp {
QzRegExp regExp;
QList<QStringMatcher> matchers;
};
// Use dynamic allocation to save memory
RegExp* m_regExp;
friend class AdBlockMatcher;
friend class AdBlockSearchTree;
friend class AdBlockSubscription;
};
#endif // ADBLOCKRULE_H
/*
Copyright (c) 2016 Montel Laurent <montel@kde.org>
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License, version 2, as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2013-2014 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License