Commit 745fe33a authored by Michael Pyne's avatar Michael Pyne

Fix bug 108297 (JuK crashes when getting cover for an album with quotes)

I was actually able to reproduce this crash for album without a quotation mark.
In fact, I was able to reproduce this for each and every single track I tried.
Apparently Google changed the output slightly on their image search, which the
current code was quite unable to cope with.

I fixed the crash issue by enabling exceptions and catching the KHTML-sent
exceptions.

I think I've fixed the underlying issue by changing the code to adapt a
little better to upcoming changes in the Google layout.  For example there
should no longer be hardcoded node position checks.

wheels: The change is non-trivial but it looks like JuK's Google Cover Art
Downloader is now completely broken without it.  So I will backport this to
branch.

CCBUG:108297

svn path=/trunk/KDE/kdemultimedia/juk/; revision=433198
parent ba2e9626
......@@ -86,6 +86,8 @@ endif
juk_LDADD = -lm $(LDADD_GST) $(mblibs) $(LIB_KIO) $(taglib_libs) $(LIB_KHTML) $(LIB_ARTS) ../akode/lib/libakode.la
juk_LDFLAGS = $(all_libraries) $(KDE_RPATH) $(LDFLAGS_GST)
KDE_CXXFLAGS = $(USE_EXCEPTIONS)
tagguessertest_LDADD = $(LIB_KDECORE)
tagguessertest_LDFLAGS = $(all_libraries)
......
......@@ -14,6 +14,9 @@
#include <dom/html_document.h>
#include <dom/html_misc.h>
#include <dom/html_table.h>
#include <dom/dom_exception.h>
#include <dom/dom2_traversal.h>
#include <kapplication.h>
#include <kstatusbar.h>
......@@ -77,62 +80,99 @@ void GoogleFetcher::slotLoadImageURLs(GoogleFetcher::ImageSize size)
m_loadedQuery = m_searchString;
m_loadedSize = size;
// We don't normally like exceptions but missing DOMException will kill
// JuK rather abruptly whether we like it or not so we don't really have a
// choice if we're going to screen-scrape Google.
try {
DOM::HTMLDocument search;
search.setAsync(false);
search.setAsync(false); // Grab the document before proceeding.
search.load(url.url());
DOM::Node body = search.body();
DOM::NodeList topLevelNodes = body.childNodes();
// On google results, if the fifth (0-based) node is a "Script" node,
// then there are no results. Otherwise, there are results.
DOM::Node fourthNode = topLevelNodes.item(4);
DOM::HTMLElement body = search.body();
DOM::NodeList topLevelNodes = body.getElementsByTagName("table");
if(topLevelNodes.length() <= 5 ||
topLevelNodes.item(4).nodeName().string() == "script")
if(!hasImageResults(search))
{
kdDebug(65432) << "Search returned no results.\n";
emit signalNewSearch(m_imageList);
return;
}
// Go through each of the top (table) nodes
for(uint i = 5; i < topLevelNodes.length(); i++) {
for(uint i = 0; i < topLevelNodes.length(); i++) {
DOM::Node thisTopNode = topLevelNodes.item(i);
if(thisTopNode.nodeName().string() == "table") {
uint imageIndex = 0;
if(!thisTopNode.firstChild().firstChild().firstChild()
.attributes().getNamedItem("colspan").isNull())
{
imageIndex = 1;
}
DOM::NodeList images = thisTopNode.firstChild().childNodes().item(imageIndex).childNodes();
// For each table node, pull the images out of the first row
for(uint j = 0; j < images.length(); j++) {
QString imageURL = "http://images.google.com" +
images.item(j).firstChild().firstChild().attributes()
.getNamedItem("src").nodeValue().string();
DOM::Node topFont = thisTopNode.firstChild().childNodes()
.item(imageIndex + 1).childNodes().item(j).firstChild();
// And pull the size out of the second row
for(uint k = 0; k < topFont.childNodes().length(); k++) {
if(topFont.childNodes().item(k).nodeName().string() == "font") {
m_imageList.append(GoogleImage(imageURL, topFont.childNodes().item(k)
.firstChild().nodeValue().string()));
}
}
}
}
// The get named item test seems to accurately determine whether a
// <TABLE> tag contains the actual images or is just layout filler.
// The parent node check is due to the fact that we only want top-level
// tables, but the getElementsByTagName returns all tables in the
// tree.
DOM::HTMLTableElement table = thisTopNode;
if(table.isNull() || table.parentNode() != body || table.getAttribute("align").isEmpty())
continue;
DOM::HTMLCollection rows = table.rows();
uint imageIndex = 0;
// Some tables will have an extra row saying "Displaying only foo-size
// images". These tables have three rows, so we need to have
// increment imageIndex for these.
if(rows.length() > 2)
imageIndex = 1;
// A list of <TDs> containing the hyperlink to the site, with image.
DOM::NodeList images = rows.item(imageIndex).childNodes();
// For each table node, pull the images out of the first row
for(uint j = 0; j < images.length(); j++) {
DOM::Element tdElement = images.item(j);
if(tdElement.isNull()) {
// Whoops....
kdError(65432) << "Expecting a <TD> in a <TR> parsing Google Images!\n";
continue;
}
// Grab first item out of list of images. There should only be
// one anyways.
DOM::Element imgElement = tdElement.getElementsByTagName("img").item(0);
if(imgElement.isNull()) {
kdError(65432) << "Expecting a <IMG> in a <TD> parsing Google Images!\n";
continue;
}
QString imageURL = "http://images.google.com" +
imgElement.getAttribute("src").string();
// Pull the matching <TD> node for the row under the one we've
// got.
tdElement = rows.item(imageIndex + 1).childNodes().item(j);
// Iterate over it until we find a string with "pixels".
unsigned long whatToShow = DOM::NodeFilter::SHOW_TEXT;
DOM::NodeIterator it = search.createNodeIterator(tdElement, whatToShow, 0, false);
DOM::Node node;
for(node = it.nextNode(); !node.isNull(); node = it.nextNode()) {
if(node.nodeValue().string().contains("pixels")) {
m_imageList.append(GoogleImage(imageURL, node.nodeValue().string()));
break;
}
}
}
}
} // try
catch (DOM::DOMException &e)
{
kdError(65432) << "Caught DOM Exception: " << e.code << endl;
}
catch (...)
{
kdError(65432) << "Caught unknown exception.\n";
}
emit signalNewSearch(m_imageList);
}
......@@ -190,4 +230,20 @@ bool GoogleFetcher::requestNewSearchTerms(bool noResults)
return ok;
}
bool GoogleFetcher::hasImageResults(DOM::HTMLDocument &search)
{
unsigned long typesToShow = DOM::NodeFilter::SHOW_TEXT;
DOM::NodeIterator it = search.createNodeIterator(search.body(), typesToShow, 0, false);
DOM::Node node;
for(node = it.nextNode(); !node.isNull(); node = it.nextNode()) {
// node should be a text node.
if(node.nodeValue().string().contains("did not match any"))
return false;
}
return true;
}
#include "googlefetcher.moc"
......@@ -23,6 +23,10 @@
#include "filehandle.h"
namespace DOM {
class HTMLDocument;
}
class KURL;
class GoogleImage
......@@ -59,6 +63,9 @@ private:
void displayWaitMessage();
bool requestNewSearchTerms(bool noResults = false);
// Returns true if there are results in the search, otherwise returns false.
bool hasImageResults(DOM::HTMLDocument &search);
private slots:
void slotLoadImageURLs(GoogleFetcher::ImageSize size = All);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment