From 8a4fd1c5229abf4d248654a8b996bddf168694da Mon Sep 17 00:00:00 2001 From: Volker Krause Date: Sat, 21 Nov 2020 16:16:08 +0100 Subject: [PATCH] Work around another variant of invalid JSON This time it's extra commas at the end of objects, brought to you by Eventbrite. So far their emails contained valid JSON, but the affected one does not contain a location, so possibly breaking their template. --- .../structureddata/eventbrite-broken-json.html | 16 ++++++++++++++++ .../structureddata/eventbrite-broken-json.json | 14 ++++++++++++++ src/generic/structureddataextractor.cpp | 12 ++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 autotests/structureddata/eventbrite-broken-json.html create mode 100644 autotests/structureddata/eventbrite-broken-json.json diff --git a/autotests/structureddata/eventbrite-broken-json.html b/autotests/structureddata/eventbrite-broken-json.html new file mode 100644 index 0000000..993c9e2 --- /dev/null +++ b/autotests/structureddata/eventbrite-broken-json.html @@ -0,0 +1,16 @@ + + + diff --git a/autotests/structureddata/eventbrite-broken-json.json b/autotests/structureddata/eventbrite-broken-json.json new file mode 100644 index 0000000..bbe78cc --- /dev/null +++ b/autotests/structureddata/eventbrite-broken-json.json @@ -0,0 +1,14 @@ +[ + { + "@context": "http://schema.org", + "@type": "EventReservation", + "reservationFor": { + "@type": "Event", + "endDate": "2020-12-12T17:00:00+01:00", + "name": "XMAS Open Mobility Data Community Remote Meetup 2020 #xomdcrm20", + "startDate": "2020-12-11T17:00:00+01:00" + }, + "reservationNumber": "XXX007", + "reservationStatus": "http://schema.org/Confirmed" + } +] diff --git a/src/generic/structureddataextractor.cpp b/src/generic/structureddataextractor.cpp index fbd70d0..40b75cf 100644 --- a/src/generic/structureddataextractor.cpp +++ b/src/generic/structureddataextractor.cpp @@ -37,6 +37,18 @@ static QByteArray fixupJson(const QByteArray &data) output.append("]"); } + // Eventbrite adds commas where there shouldn't be one... + for (int idx = output.indexOf("\",\n"); idx > 0 && idx + 3 < output.size(); idx = output.indexOf("\",\n", idx)) { + const auto comma = idx + 1; + idx += 3; + while (idx < output.size() && std::isspace(output[idx])) { + ++idx; + } + if (idx < output.size() && output[idx] == '}') { + output[comma] = ' '; + } + } + return output; } -- GitLab