Commit 7abd1c6e authored by Volker Krause's avatar Volker Krause
Browse files

Add OSM PBF parser

Mainly needed for the Marble tile generator for now, but much easier to
test and measure here.
parent be4b222f
......@@ -38,6 +38,9 @@ set_package_properties(FLEX PROPERTIES TYPE RECOMMENDED PURPOSE "MapCSS parser f
find_package(BISON)
set_package_properties(BISON PROPERTIES TYPE RECOMMENDED PURPOSE "MapCSS parser for indoor map rendering.")
find_package(Protobuf)
set_package_properties(Protobuf PROPERTIES TYPE OPTIONAL PURPOSE "Parsing of OSM PBF files.")
if (EXISTS "${CMAKE_SOURCE_DIR}/.git")
add_definitions(-DQT_DISABLE_DEPRECATED_BEFORE=0x050d00)
add_definitions(-DKF_DISABLE_DEPRECATED_BEFORE_AND_AT=0x054400)
......
......@@ -21,6 +21,7 @@
#include <osm/element.h>
#include <osm/o5mparser.h>
#include <osm/osmpbfparser.h>
#include <QElapsedTimer>
#include <QFile>
......@@ -62,8 +63,13 @@ void MapLoader::loadFromO5m(const QString &fileName)
const auto data = f.map(0, f.size());
OSM::DataSet ds;
OSM::O5mParser p(&ds);
p.parse(data, f.size());
if (fileName.endsWith(QLatin1String(".osm.pbf"))) {
OSM::OsmPbfParser p(&ds);
p.parse(data, f.size());
} else {
OSM::O5mParser p(&ds);
p.parse(data, f.size());
}
m_data.setDataSet(std::move(ds));
qCDebug(Log) << "o5m loading took" << loadTime.elapsed() << "ms";
Q_EMIT done();
......
/*
SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#cmakedefine HAVE_PROTOBUF
/*
SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#include "config-kosm.h"
#include "osmpbfparser.h"
#ifdef HAVE_PROTOBUF
#include "fileformat.pb.h"
#include "osmformat.pb.h"
#endif
#include <QByteArray>
#include <QDebug>
#include <QtEndian>
#include <zlib.h>
using namespace OSM;
OsmPbfParser::OsmPbfParser(DataSet *dataSet)
: m_dataSet(dataSet)
{
}
void OsmPbfParser::parse(const uint8_t *data, std::size_t len)
{
#ifdef HAVE_PROTOBUF
const uint8_t *it = data;
const uint8_t *end = data + len;
while (parseBlob(it, end));
#else
qWarning() << "OSM PBF file format not available!";
return;
#endif
}
#ifdef HAVE_PROTOBUF
bool OsmPbfParser::parseBlob(const uint8_t *&it, const uint8_t *end)
{
if (std::distance(it, end) < (int)sizeof(int32_t)) {
return false;
}
int32_t blobHeaderSize = 0;
std::memcpy(&blobHeaderSize, it, sizeof(int32_t));
blobHeaderSize = qFromBigEndian(blobHeaderSize);
it += sizeof(int32_t);
if (blobHeaderSize < 0 || std::distance(it, end) < blobHeaderSize) {
return false;
}
OSMPBF::BlobHeader blobHeader;
if (!blobHeader.ParseFromArray(it, blobHeaderSize)) {
return false;
}
it += blobHeaderSize;
OSMPBF::Blob blob;
if (std::distance(it, end) < blobHeader.datasize() || !blob.ParseFromArray(it, blobHeader.datasize())) {
return false;
}
const uint8_t *dataBegin = nullptr;
if (blob.has_raw()) {
dataBegin = reinterpret_cast<const uint8_t*>(blob.raw().data());
} else if (blob.has_zlib_data()) {
m_zlibBuffer.resize(blob.raw_size());
z_stream zStream;
zStream.next_in = (uint8_t*)blob.zlib_data().data();
zStream.avail_in = blob.zlib_data().size();
zStream.next_out = (uint8_t*)m_zlibBuffer.data();
zStream.avail_out = blob.raw_size();
zStream.zalloc = nullptr;
zStream.zfree = nullptr;
zStream.opaque = nullptr;
auto result = inflateInit(&zStream);
if (result != Z_OK) {
return false;
}
result = inflate(&zStream, Z_FINISH);
if (result != Z_STREAM_END) {
return false;
}
result = inflateEnd( &zStream );
dataBegin = reinterpret_cast<const uint8_t*>(m_zlibBuffer.constData());
} else {
return false;
}
if (std::strcmp(blobHeader.type().c_str(), "OSMData") == 0) {
parsePrimitiveBlock(dataBegin, blob.raw_size());
}
m_zlibBuffer.clear();
it += blobHeader.datasize();
return true;
}
void OsmPbfParser::parsePrimitiveBlock(const uint8_t *data, std::size_t len)
{
OSMPBF::PrimitiveBlock block;
if (!block.ParseFromArray(data, len)) {
return;
}
for (int i = 0; i < block.primitivegroup_size(); ++i) {
const auto &group = block.primitivegroup(i);
if (group.nodes_size()) {
qWarning() << "non-dense nodes - not implemented yet!";
} else if (group.has_dense()) {
parseDenseNodes(block, group);
} else if (group.ways_size()) {
parseWays(block, group);
} else if (group.relations_size()) {
parseRelations(block, group);
}
}
}
void OsmPbfParser::parseDenseNodes(const OSMPBF::PrimitiveBlock &block, const OSMPBF::PrimitiveGroup &group)
{
int64_t idDelta = 0;
int64_t latDelta = 0;
int64_t lonDelta = 0;
int tagIdx = 0;
const auto dense = group.dense();
for (int i = 0; i < dense.id_size(); ++i) {
idDelta += dense.id(i);
latDelta += dense.lat(i);
lonDelta += dense.lon(i);
OSM::Node node;
node.id = idDelta;
node.coordinate.latitude = latDelta + 900'000'000ll;
node.coordinate.longitude = lonDelta + 1'800'000'000ll;
while (tagIdx < dense.keys_vals_size()) {
const auto keyIdx = dense.keys_vals(tagIdx++);
if (keyIdx == 0) {
break;
}
const auto valIdx = dense.keys_vals(tagIdx++);
OSM::Tag tag;
tag.key = m_dataSet->makeTagKey(block.stringtable().s(keyIdx).data());
tag.value = QByteArray(block.stringtable().s(valIdx).data());
OSM::setTag(node, std::move(tag));
}
m_dataSet->addNode(std::move(node));
}
}
void OsmPbfParser::parseWays(const OSMPBF::PrimitiveBlock &block, const OSMPBF::PrimitiveGroup &group)
{
for (int i = 0; i < group.ways_size(); ++i) {
const auto &w = group.ways(i);
OSM::Way way;
way.id = w.id();
way.nodes.reserve(w.refs_size());
int64_t idDelta = 0;
for (int j = 0; j < w.refs_size(); ++j) {
idDelta += w.refs(j);
way.nodes.push_back(idDelta);
}
for (int j = 0; j < w.keys_size(); ++j) {
OSM::Tag tag;
tag.key = m_dataSet->makeTagKey(block.stringtable().s(w.keys(j)).data());
tag.value = QByteArray(block.stringtable().s(w.vals(j)).data());
OSM::setTag(way, std::move(tag));
}
m_dataSet->addWay(std::move(way));
}
}
void OsmPbfParser::parseRelations(const OSMPBF::PrimitiveBlock &block, const OSMPBF::PrimitiveGroup &group)
{
for (int i = 0; i < group.relations_size(); ++i) {
const auto &r = group.relations(i);
OSM::Relation rel;
rel.id = r.id();
rel.members.reserve(r.memids_size());
int64_t idDelta = 0;
for (int j = 0; j < r.memids_size(); ++j) {
OSM::Member mem;
idDelta += r.memids(j);
mem.id = idDelta;
mem.role = m_dataSet->makeRole(block.stringtable().s(r.roles_sid(j)).data());
const auto type = r.types(j);
switch (type) {
case OSMPBF::Relation_MemberType_NODE: mem.type = OSM::Type::Node; break;
case OSMPBF::Relation_MemberType_WAY: mem.type = OSM::Type::Way; break;
case OSMPBF::Relation_MemberType_RELATION: mem.type = OSM::Type::Relation; break;
}
rel.members.push_back(std::move(mem));
}
for (int j = 0; j < r.keys_size(); ++j) {
OSM::Tag tag;
tag.key = m_dataSet->makeTagKey(block.stringtable().s(r.keys(j)).data());
tag.value = QByteArray(block.stringtable().s(r.vals(j)).data());
OSM::setTag(rel, std::move(tag));
}
m_dataSet->addRelation(std::move(rel));
}
}
#endif
/*
SPDX-FileCopyrightText: 2020 Volker Krause <vkrause@kde.org>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#ifndef KOSM_OSMPBFPARSER_H
#define KOSM_OSMPBFPARSER_H
#include "datatypes.h"
namespace OSMPBF {
class PrimitiveBlock;
class PrimitiveGroup;
}
namespace OSM {
/** Parser of .osm.pbf files.
* @see https://wiki.openstreetmap.org/wiki/PBF_Format
*/
class OsmPbfParser
{
public:
explicit OsmPbfParser(DataSet *dataSet);
/** Parse the given binary content.
* Feed this with QFile::map() for example.
*/
void parse(const uint8_t *data, std::size_t len);
private:
bool parseBlob(const uint8_t *&it, const uint8_t *end);
void parsePrimitiveBlock(const uint8_t *data, std::size_t len);
void parseDenseNodes(const OSMPBF::PrimitiveBlock &block, const OSMPBF::PrimitiveGroup &group);
void parseWays(const OSMPBF::PrimitiveBlock &block, const OSMPBF::PrimitiveGroup &group);
void parseRelations(const OSMPBF::PrimitiveBlock &block, const OSMPBF::PrimitiveGroup &group);
DataSet *m_dataSet = nullptr;
QByteArray m_zlibBuffer;
};
}
#endif // KOSM_OSMPBFPARSER_H
/*
SPDX-FileCopyrightText: 2010 Scott A. Crosby. <scott@sacrosby.com>
SPDX-License-Identifier: MIT
*/
option optimize_for = LITE_RUNTIME;
option java_package = "crosby.binary";
package OSMPBF;
//protoc --java_out=../.. fileformat.proto
//
// STORAGE LAYER: Storing primitives.
//
message Blob {
optional bytes raw = 1; // No compression
optional int32 raw_size = 2; // When compressed, the uncompressed size
// Possible compressed versions of the data.
optional bytes zlib_data = 3;
// PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED.
optional bytes lzma_data = 4;
// Formerly used for bzip2 compressed data. Depreciated in 2010.
optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number.
}
/* A file contains an sequence of fileblock headers, each prefixed by
their length in network byte order, followed by a data block
containing the actual data. types staring with a "_" are reserved.
*/
message BlobHeader {
required string type = 1;
optional bytes indexdata = 2;
required int32 datasize = 3;
}
/*
SPDX-FileCopyrightText: 2010 Scott A. Crosby. <scott@sacrosby.com>
SPDX-License-Identifier: MIT
*/
option optimize_for = LITE_RUNTIME;
option java_package = "crosby.binary";
package OSMPBF;
/* OSM Binary file format
This is the master schema file of the OSM binary file format. This
file is designed to support limited random-access and future
extendability.
A binary OSM file consists of a sequence of FileBlocks (please see
fileformat.proto). The first fileblock contains a serialized instance
of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that
contain the primitives.
Each primitiveblock is designed to be independently parsable. It
contains a string table storing all strings in that block (keys and
values in tags, roles in relations, usernames, etc.) as well as
metadata containing the precision of coordinates or timestamps in that
block.
A primitiveblock contains a sequence of primitive groups, each
containing primitives of the same type (nodes, densenodes, ways,
relations). Coordinates are stored in signed 64-bit integers. Lat&lon
are measured in units <granularity> nanodegrees. The default of
granularity of 100 nanodegrees corresponds to about 1cm on the ground,
and a full lat or lon fits into 32 bits.
Converting an integer to a lattitude or longitude uses the formula:
$OUT = IN * granularity / 10**9$. Many encoding schemes use delta
coding when representing nodes and relations.
*/
//////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////
/* Contains the file header. */
message HeaderBlock {
optional HeaderBBox bbox = 1;
/* Additional tags to aid in parsing this dataset */
repeated string required_features = 4;
repeated string optional_features = 5;
optional string writingprogram = 16;
optional string source = 17; // From the bbox field.
/* Tags that allow continuing an Osmosis replication */
// replication timestamp, expressed in seconds since the epoch,
// otherwise the same value as in the "timestamp=..." field
// in the state.txt file used by Osmosis
optional int64 osmosis_replication_timestamp = 32;
// replication sequence number (sequenceNumber in state.txt)
optional int64 osmosis_replication_sequence_number = 33;
// replication base URL (from Osmosis' configuration.txt file)
optional string osmosis_replication_base_url = 34;
}
/** The bounding box field in the OSM header. BBOX, as used in the OSM
header. Units are always in nanodegrees -- they do not obey
granularity rules. */
message HeaderBBox {
required sint64 left = 1;
required sint64 right = 2;
required sint64 top = 3;
required sint64 bottom = 4;
}
///////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////
message PrimitiveBlock {
required StringTable stringtable = 1;
repeated PrimitiveGroup primitivegroup = 2;
// Granularity, units of nanodegrees, used to store coordinates in this block
optional int32 granularity = 17 [default=100];
// Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees.
optional int64 lat_offset = 19 [default=0];
optional int64 lon_offset = 20 [default=0];
// Granularity of dates, normally represented in units of milliseconds since the 1970 epoch.
optional int32 date_granularity = 18 [default=1000];
// Proposed extension:
//optional BBox bbox = XX;
}
// Group of OSMPrimitives. All primitives in a group must be the same type.
message PrimitiveGroup {
repeated Node nodes = 1;
optional DenseNodes dense = 2;
repeated Way ways = 3;
repeated Relation relations = 4;
repeated ChangeSet changesets = 5;
}
/** String table, contains the common strings in each block.
Note that we reserve index '0' as a delimiter, so the entry at that
index in the table is ALWAYS blank and unused.
*/
message StringTable {
repeated bytes s = 1;
}
/* Optional metadata that may be included into each primitive. */
message Info {
optional int32 version = 1 [default = -1];
optional int64 timestamp = 2;
optional int64 changeset = 3;
optional int32 uid = 4;
optional uint32 user_sid = 5; // String IDs
// The visible flag is used to store history information. It indicates that
// the current object version has been created by a delete operation on the
// OSM API.
// When a writer sets this flag, it MUST add a required_features tag with
// value "HistoricalInformation" to the HeaderBlock.
// If this flag is not available for some object it MUST be assumed to be
// true if the file has the required_features tag "HistoricalInformation"
// set.
optional bool visible = 6;
}
/** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */
message DenseInfo {
repeated int32 version = 1 [packed = true];
repeated sint64 timestamp = 2 [packed = true]; // DELTA coded
repeated sint64 changeset = 3 [packed = true]; // DELTA coded
repeated sint32 uid = 4 [packed = true]; // DELTA coded
repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded
// The visible flag is used to store history information. It indicates that
// the current object version has been created by a delete operation on the
// OSM API.
// When a writer sets this flag, it MUST add a required_features tag with
// value "HistoricalInformation" to the HeaderBlock.
// If this flag is not available for some object it MUST be assumed to be
// true if the file has the required_features tag "HistoricalInformation"
// set.
repeated bool visible = 6 [packed = true];
}
// THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW.
// TODO: REMOVE THIS?
message ChangeSet {
required int64 id = 1;
//
// // Parallel arrays.
// repeated uint32 keys = 2 [packed = true]; // String IDs.
// repeated uint32 vals = 3 [packed = true]; // String IDs.
//
// optional Info info = 4;
// optional int64 created_at = 8;
// optional int64 closetime_delta = 9;
// optional bool open = 10;
// optional HeaderBBox bbox = 11;
}
message Node {
required sint64 id = 1;
// Parallel arrays.
repeated uint32 keys = 2 [packed = true]; // String IDs.
repeated uint32 vals = 3 [packed = true]; // String IDs.
optional Info info = 4; // May be omitted in omitmeta
required sint64 lat = 8;
required sint64 lon = 9;
}
/* Used to densly represent a sequence of nodes that do not have any tags.
We represent these nodes columnwise as five columns: ID's, lats, and
lons, all delta coded. When metadata is not omitted,
We encode keys & vals for all nodes as a single array of integers
containing key-stringid and val-stringid, using a stringid of 0 as a
delimiter between nodes.
( (<keyid> <valid>)* '0' )*
*/
message DenseNodes {
repeated sint64 id = 1 [packed = true]; // DELTA coded
//repeated Info info = 4;
optional DenseInfo denseinfo = 5;
repeated sint64 lat = 8 [packed = true]; // DELTA coded
repeated sint64 lon = 9 [packed = true]; // DELTA coded
// Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless.
repeated int32 keys_vals = 10 [packed = true];
}
message Way {
required int64 id = 1;
// Parallel arrays.
repeated uint32 keys = 2 [packed = true];
repeated uint32 vals = 3 [packed = true];
optional Info info = 4;
repeated sint64 refs = 8 [packed = true]; // DELTA coded
}
message Relation {
enum MemberType {
NODE = 0;
WAY = 1;
RELATION = 2;
}
required int64 id = 1;
// Parallel arrays.
repeated uint32 keys = 2 [packed = true];
repeated uint32 vals = 3 [packed = true];
optional Info info = 4;
// Parallel arrays
repeated int32 roles_sid = 8 [packed = true]; // This should have been defined as uint32 for consistency, but it is now too late to change it
repeated sint64 memids = 9 [packed = true]; // DELTA encoded
repeated MemberType types = 10 [packed = true];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment