Commit dda15b0a authored by Laurent Montel's avatar Laurent Montel 😁
Browse files

Extract code so we can fix parsing code

CCBUG: 383381
parent 8adb11e7
......@@ -91,6 +91,7 @@ set(libsyndicationlocal_SRCS
specificitem.cpp
specificitemvisitor.cpp
tools.cpp
loaderutil.cpp
)
set(syndication_SRCS
......
......@@ -13,12 +13,15 @@
#include "documentsource.h"
#include "feed.h"
#include "global.h"
#include "loaderutil_p.h"
#include "parsercollection.h"
#include <QUrl>
#include <QRegExp>
#include <QStringList>
#include <QDebug>
#include <QFile>
#include <syndication_debug.h>
......@@ -26,8 +29,7 @@ namespace Syndication
{
struct Loader::LoaderPrivate {
LoaderPrivate() : retriever(nullptr), lastError(Success),
retrieverError(0)
LoaderPrivate()
{
}
......@@ -36,9 +38,9 @@ struct Loader::LoaderPrivate {
delete retriever;
}
DataRetriever *retriever;
Syndication::ErrorCode lastError;
int retrieverError;
DataRetriever *retriever = nullptr;
Syndication::ErrorCode lastError = Success;
int retrieverError = 0;
QUrl discoveredFeedURL;
QUrl url;
};
......@@ -137,69 +139,10 @@ void Loader::slotRetrieverDone(const QByteArray &data, bool success)
void Loader::discoverFeeds(const QByteArray &data)
{
QString str = QString::fromLatin1(data.constData()).simplified();
QString s2;
//QTextStream ts( &str, QIODevice::WriteOnly );
//ts << data.data();
// "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
// "type[\\s]=[\\s]\\\"application/rss+xml\\\""
// "href[\\s]=[\\s]\\\"application/rss+xml\\\""
QRegExp rx(QStringLiteral("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"), Qt::CaseInsensitive);
if (rx.indexIn(str) != -1) {
s2 = rx.cap(1);
} else {
// does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
int pos = 0;
QStringList feeds;
QString host = d->url.host();
rx.setPattern(QStringLiteral("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"));
while (pos >= 0) {
pos = rx.indexIn(str, pos);
s2 = rx.cap(1);
if (s2.endsWith(QLatin1String(".rdf")) ||
s2.endsWith(QLatin1String(".rss")) ||
s2.endsWith(QLatin1String(".xml"))) {
feeds.append(s2);
}
if (pos >= 0) {
pos += rx.matchedLength();
}
}
QUrl testURL;
// loop through, prefer feeds on same host
QStringList::const_iterator end(feeds.constEnd());
for (QStringList::const_iterator it = feeds.constBegin(); it != end; ++it) {
testURL = QUrl(*it);
if (testURL.host() == host) {
s2 = *it;
break;
}
}
}
if (s2.isNull()) {
return;
}
if (QUrl(s2).isRelative()) {
if (s2.startsWith(QLatin1String("//"))) {
s2.prepend(d->url.scheme() + QLatin1Char(':'));
d->discoveredFeedURL = QUrl(s2);
} else if (s2.startsWith(QLatin1Char('/'))) {
d->discoveredFeedURL = d->url;
d->discoveredFeedURL.setPath(s2);
} else {
d->discoveredFeedURL = d->url;
d->discoveredFeedURL.setPath(d->discoveredFeedURL.path() + QLatin1Char('/') + s2);
}
//QT5 d->discoveredFeedURL.cleanPath();
} else {
d->discoveredFeedURL = QUrl(s2);
const QUrl url = LoaderUtil::parseFeed(data, d->url);
if (!url.isEmpty()) {
d->discoveredFeedURL = url;
}
//QT5 d->discoveredFeedURL.cleanPath();
}
} // namespace Syndication
/*
* This file is part of the syndication library
*
* Copyright (C) 2019 Laurent Montel <montel@kde.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#include "loaderutil_p.h"
QUrl Syndication::LoaderUtil::parseFeed(const QByteArray &data, const QUrl &url)
{
QUrl discoveredFeedURL;
QString str = QString::fromLatin1(data.constData()).simplified();
QString s2;
//QTextStream ts( &str, QIODevice::WriteOnly );
//ts << data.data();
// "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>"
// "type[\\s]=[\\s]\\\"application/rss+xml\\\""
// "href[\\s]=[\\s]\\\"application/rss+xml\\\""
QRegExp rx(QStringLiteral("(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)"), Qt::CaseInsensitive);
if (rx.indexIn(str) != -1) {
s2 = rx.cap(1);
} else {
// does not support Atom/RSS autodiscovery.. try finding feeds by brute force....
int pos = 0;
QStringList feeds;
QString host = url.host();
rx.setPattern(QStringLiteral("(?:<A )[^H]*(?)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"));
while (pos >= 0) {
pos = rx.indexIn(str, pos);
s2 = rx.cap(1);
if (s2.endsWith(QLatin1String(".rdf")) ||
s2.endsWith(QLatin1String(".rss")) ||
s2.endsWith(QLatin1String(".xml"))) {
feeds.append(s2);
}
if (pos >= 0) {
pos += rx.matchedLength();
}
}
QUrl testURL;
// loop through, prefer feeds on same host
QStringList::const_iterator end(feeds.constEnd());
for (QStringList::const_iterator it = feeds.constBegin(); it != end; ++it) {
testURL = QUrl(*it);
if (testURL.host() == host) {
s2 = *it;
break;
}
}
}
if (s2.isNull()) {
return discoveredFeedURL;
}
if (QUrl(s2).isRelative()) {
if (s2.startsWith(QLatin1String("//"))) {
s2.prepend(url.scheme() + QLatin1Char(':'));
discoveredFeedURL = QUrl(s2);
} else if (s2.startsWith(QLatin1Char('/'))) {
discoveredFeedURL = url;
discoveredFeedURL.setPath(s2);
} else {
discoveredFeedURL = url;
discoveredFeedURL.setPath(discoveredFeedURL.path() + QLatin1Char('/') + s2);
}
} else {
discoveredFeedURL = QUrl(s2);
}
return discoveredFeedURL;
}
/*
* This file is part of the syndication library
*
* Copyright (C) 2019 Laurent Montel <montel@kde.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
*/
#ifndef LOADERUTIL_H
#define LOADERUTIL_H
#include <QUrl>
namespace Syndication {
namespace LoaderUtil
{
Q_REQUIRED_RESULT QUrl parseFeed(const QByteArray &data, const QUrl &url);
}
}
#endif // LOADERUTIL_H
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment