Commit 6863aa70 authored by Akshat Tandon's avatar Akshat Tandon

Making script more general so that it runs for datasets other than Natural Earth.

The script uses some metadata key values which are specific only to the Natural Earth dataset. This change enables checking of specific keys before usage to prevent crashes at runtime in case the SHP dataset is not from Natural Earth.
parent c8503e1d
#!/usr/bin/python3 #!/usr/bin/python3
""" """
This script is designed to act as assistance in converting shapefiles This script is designed to act as assistance in converting shapefiles
to OpenStreetMap data. This file is optimized and tested with MassGIS to OpenStreetMap data. This file is optimized and tested with MassGIS
shapefiles, converted to EPSG:4326 before being passed to the script. shapefiles, converted to EPSG:4326 before being passed to the script.
You can perform this conversion with You can perform this conversion with
ogr2ogr -t_srs EPSG:4326 new_file.shp old_file.shp ogr2ogr -t_srs EPSG:4326 new_file.shp old_file.shp
It is expected that you will modify the fixed_tags, tag_mapping, and It is expected that you will modify the fixed_tags, tag_mapping, and
boring_tags attributes of this script before running. You should read, boring_tags attributes of this script before running. You should read,
or at least skim, the code up until it says: or at least skim, the code up until it says:
DO NOT CHANGE AFTER THIS LINE. DO NOT CHANGE AFTER THIS LINE.
to accomodate your own data. to accomodate your own data.
""" """
__author__ = "Christopher Schmidt <crschmidt@crschmidt.net>" __author__ = "Christopher Schmidt <crschmidt@crschmidt.net>"
__version__ = "$Id$" __version__ = "$Id$"
gdal_install = """ gdal_install = """
Installing GDAL depends on your platform. Information is available at: Installing GDAL depends on your platform. Information is available at:
http://trac.osgeo.org/gdal/wiki/DownloadingGdalBinaries http://trac.osgeo.org/gdal/wiki/DownloadingGdalBinaries
For Debian-based systems: For Debian-based systems:
apt-get install python-gdal apt-get install python-gdal
will usually suffice. will usually suffice.
""" """
# These tags are attached to all exterior ways. You can put any key/value pairs # These tags are attached to all exterior ways. You can put any key/value pairs
# in this dictionary. # in this dictionary.
fixed_tags = { fixed_tags = {
'source': 'Natural Earth (http://www.naturalearthdata.com/)', 'source': 'Natural Earth (http://www.naturalearthdata.com/)',
'created_by': 'polyshp2osm' 'created_by': 'polyshp2osm'
} }
feat_dict = {} feat_dict = {}
node_dict = {} node_dict = {}
non_geom = 0 non_geom = 0
eflag = False eflag = False
nodes = [] #(id, lon, lat, tags) nodes = [] #(id, lon, lat, tags)
ways = [] #(id, node_refs, tags) ways = [] #(id, node_refs, tags)
relations = [] #(id, ways) relations = [] #(id, ways)
non_polygons = ['Admin-1 aggregation', 'Admin-1 minor island', 'Admin-1 scale rank'] non_polygons = ['Admin-1 aggregation', 'Admin-1 minor island', 'Admin-1 scale rank']
# Here are a number of functions: These functions define tag mappings. The API # Here are a number of functions: These functions define tag mappings. The API
# For these functions is that they are passed the attributes from a feature, # For these functions is that they are passed the attributes from a feature,
# and they return a list of two-tuples which match to key/value pairs. # and they return a list of two-tuples which match to key/value pairs.
def access(data): def access(data):
"""Access restrictions.""" """Access restrictions."""
keys = { keys = {
...@@ -66,7 +66,7 @@ def access(data): ...@@ -66,7 +66,7 @@ def access(data):
if data['pub_access'] in keys: if data['pub_access'] in keys:
return [('access', keys[data['pub_access']])] return [('access', keys[data['pub_access']])]
return None return None
def protection(data): def protection(data):
keys = { keys = {
'P': 'perpetuity', 'P': 'perpetuity',
...@@ -77,7 +77,7 @@ def protection(data): ...@@ -77,7 +77,7 @@ def protection(data):
if data['lev_prot'] in keys: if data['lev_prot'] in keys:
return [('protected', keys[data['lev_prot']])] return [('protected', keys[data['lev_prot']])]
return None return None
def owner_type(data): def owner_type(data):
"""See wiki:Key:ownership""" """See wiki:Key:ownership"""
keys = { keys = {
...@@ -95,7 +95,7 @@ def owner_type(data): ...@@ -95,7 +95,7 @@ def owner_type(data):
if 'owner_type' in data: if 'owner_type' in data:
if data['owner_type'] in keys: if data['owner_type'] in keys:
return [['ownership', keys[data['owner_type']]]] return [['ownership', keys[data['owner_type']]]]
def purpose(data): def purpose(data):
"""Based on a discussion on IRC""" """Based on a discussion on IRC"""
keys = { keys = {
...@@ -113,8 +113,8 @@ def purpose(data): ...@@ -113,8 +113,8 @@ def purpose(data):
if 'prim_purp' in data: if 'prim_purp' in data:
if data['prim_purp'] in keys: if data['prim_purp'] in keys:
return keys[data['prim_purp']] return keys[data['prim_purp']]
def road_map(data): def road_map(data):
keys = { keys = {
#'Ferry Route': [('route','ferry')], #'Ferry Route': [('route','ferry')],
...@@ -129,7 +129,7 @@ def road_map(data): ...@@ -129,7 +129,7 @@ def road_map(data):
if 'type' in data: if 'type' in data:
if data['type'] in keys: if data['type'] in keys:
return keys[data['type']] return keys[data['type']]
def city_map(data): def city_map(data):
population = 0 population = 0
capital = 'no' capital = 'no'
...@@ -159,7 +159,7 @@ def mountain_map(data): ...@@ -159,7 +159,7 @@ def mountain_map(data):
elevation = data['elevation'] elevation = data['elevation']
temp = [('natural', 'peak'), ('ele', elevation)] temp = [('natural', 'peak'), ('ele', elevation)]
return temp return temp
def feature_class(data): def feature_class(data):
global non_fcla_dict global non_fcla_dict
keys = { keys = {
...@@ -297,14 +297,14 @@ def feature_class(data): ...@@ -297,14 +297,14 @@ def feature_class(data):
non_fcla_dict[data['featurecla']] += 1 non_fcla_dict[data['featurecla']] += 1
else: else:
non_fcla_dict[data['featurecla']] = 1 non_fcla_dict[data['featurecla']] = 1
def name_map(data): def name_map(data):
if 'name' in data: if 'name' in data:
return [('name', data['name'])] return [('name', data['name'])]
def name_tags(data): def name_tags(data):
"""This function returns two things: a 'pretty' name to use, and """This function returns two things: a 'pretty' name to use, and
may return a landuse of either 'cemetery' or 'forest' if the name may return a landuse of either 'cemetery' or 'forest' if the name
...@@ -319,10 +319,10 @@ def name_tags(data): ...@@ -319,10 +319,10 @@ def name_tags(data):
tags.append(['landuse', 'cemetery']) tags.append(['landuse', 'cemetery'])
elif "forest" in name.lower(): elif "forest" in name.lower():
tags.append(['landuse', 'forest']) tags.append(['landuse', 'forest'])
tags.append(['name', name]) tags.append(['name', name])
return tags return tags
def cal_date(data): def cal_date(data):
"""Return YYYY-MM-DD or YYYY formatted dates, based on """Return YYYY-MM-DD or YYYY formatted dates, based on
(m)m/(d)d/yyyy dates""" (m)m/(d)d/yyyy dates"""
...@@ -337,19 +337,19 @@ def cal_date(data): ...@@ -337,19 +337,19 @@ def cal_date(data):
print("Invalid date: %s" % date) print("Invalid date: %s" % date)
eflag = True eflag = True
return None return None
# The most important part of the code: define a set of key/value pairs # The most important part of the code: define a set of key/value pairs
# to iterate over to generate keys. This is a list of two-tuples: first # to iterate over to generate keys. This is a list of two-tuples: first
# is a 'key', which is only used if the second value is a string. In # is a 'key', which is only used if the second value is a string. In
# that case, it is a map of lowercased fielnames to OSM tag names: so # that case, it is a map of lowercased fielnames to OSM tag names: so
# fee_owner maps to 'owner' in the OSM output. # fee_owner maps to 'owner' in the OSM output.
# if the latter is callable (has a __call__; is a function), then that # if the latter is callable (has a __call__; is a function), then that
# method is called, passing in a dict of feature attributes with # method is called, passing in a dict of feature attributes with
# lowercased key names. Those functions can then return a list of # lowercased key names. Those functions can then return a list of
# two-tuples to be used as tags, or nothin' to skip the tags. # two-tuples to be used as tags, or nothin' to skip the tags.
tag_mapping = [ tag_mapping = [
('fee_owner', 'owner'), ('fee_owner', 'owner'),
('cal_date', cal_date), ('cal_date', cal_date),
...@@ -361,32 +361,32 @@ tag_mapping = [ ...@@ -361,32 +361,32 @@ tag_mapping = [
('featurecla', feature_class), ('featurecla', feature_class),
('name', name_map) ('name', name_map)
] ]
# These tags are not exported, even with the source data; this should be # These tags are not exported, even with the source data; this should be
# used for tags which are usually calculated in a GIS. AREA and LEN are # used for tags which are usually calculated in a GIS. AREA and LEN are
# common. # common.
boring_tags = [ 'AREA', 'LEN', 'GIS_ACRES'] boring_tags = [ 'AREA', 'LEN', 'GIS_ACRES']
# Namespace is used to prefix existing data attributes. If 'None', or # Namespace is used to prefix existing data attributes. If 'None', or
# '--no-source' is set, then source attributes are not exported, only # '--no-source' is set, then source attributes are not exported, only
# attributes in tag_mapping. # attributes in tag_mapping.
namespace = "natural_earth" namespace = "natural_earth"
#namespace = None #namespace = None
# Uncomment the "DONT_RUN = False" line to get started. # Uncomment the "DONT_RUN = False" line to get started.
#DONT_RUN = True #DONT_RUN = True
DONT_RUN = False DONT_RUN = False
# =========== DO NOT CHANGE AFTER THIS LINE. =========================== # =========== DO NOT CHANGE AFTER THIS LINE. ===========================
# Below here is regular code, part of the file. This is not designed to # Below here is regular code, part of the file. This is not designed to
# be modified by users. # be modified by users.
# ====================================================================== # ======================================================================
import sys import sys
try: try:
try: try:
from osgeo import ogr from osgeo import ogr
...@@ -401,14 +401,14 @@ except ImportError: ...@@ -401,14 +401,14 @@ except ImportError:
print("OGR Python Bindings not installed.\n%s" % gdal_install) print("OGR Python Bindings not installed.\n%s" % gdal_install)
sys.exit(1) sys.exit(1)
eflag = True eflag = True
def close_file(): def close_file():
""" Internal. Close an open file.""" """ Internal. Close an open file."""
global open_file global open_file
if not open_file.closed: if not open_file.closed:
open_file.write("</osm>") open_file.write("</osm>")
open_file.close() open_file.close()
def start_new_file(): def start_new_file():
""" Internal. Open a new file, closing existing file if neccesary.""" """ Internal. Open a new file, closing existing file if neccesary."""
global open_file, file_counter, node_dict global open_file, file_counter, node_dict
...@@ -419,21 +419,32 @@ def start_new_file(): ...@@ -419,21 +419,32 @@ def start_new_file():
print("<?xml version='1.0' encoding='UTF-8'?>" , end = '\n', file = open_file) print("<?xml version='1.0' encoding='UTF-8'?>" , end = '\n', file = open_file)
print("<osm version='0.5'>" , end = '\n', file = open_file) print("<osm version='0.5'>" , end = '\n', file = open_file)
node_dict = {} node_dict = {}
def clean_attr(val): def clean_attr(val):
"""Internal. Hacky way to make attribute XML safe.""" """Internal. Hacky way to make attribute XML safe."""
val = str(val) val = str(val)
val = val.replace("&", "&amp;").replace("'", "&quot;").replace("<", "&lt;").replace(">", "&gt;").strip() val = val.replace("&", "&amp;").replace("'", "&quot;").replace("<", "&lt;").replace(">", "&gt;").strip()
return val return val
def check_featurecla(f):
"""
Checks if featurecla field is present in the feature f.
If present it implies that shp data is from Natural Earth dataset
"""
if 'featurecla' in f.keys():
return True
else:
return False
def add_point(f): def add_point(f):
"""Adds a point geometry to the OSM file""" """Adds a point geometry to the OSM file"""
global id_counter global id_counter
airport_metadata = None airport_metadata = None
pt = f.GetGeometryRef() pt = f.GetGeometryRef()
if f['featurecla'] == 'Airport': if check_featurecla(f):
airport_metadata = f if f['featurecla'] == 'Airport':
f = None airport_metadata = f
f = None
node_id = add_node(id_counter, pt.GetX(0), pt.GetY(0), 'POINT', f) node_id = add_node(id_counter, pt.GetX(0), pt.GetY(0), 'POINT', f)
if node_id == id_counter: if node_id == id_counter:
id_counter += 1 id_counter += 1
...@@ -441,7 +452,7 @@ def add_point(f): ...@@ -441,7 +452,7 @@ def add_point(f):
add_way_around_node(airport_metadata) add_way_around_node(airport_metadata)
def add_relation_multipolygon(geom, f): def add_relation_multipolygon(geom, f):
""" Writes the multipolygon relation to the OSM file, returns 0 if no relation is formed""" """ Writes the multipolygon relation to the OSM file, returns 0 if no relation is formed"""
global id_counter, file_counter, counter, file_name, open_file, namespace global id_counter, file_counter, counter, file_name, open_file, namespace
...@@ -452,7 +463,7 @@ def add_relation_multipolygon(geom, f): ...@@ -452,7 +463,7 @@ def add_relation_multipolygon(geom, f):
print('Error in writing relation') print('Error in writing relation')
return None return None
rel_ways.append(way_id) rel_ways.append(way_id)
if geom.GetGeometryCount() > 1: if geom.GetGeometryCount() > 1:
for i in range(1, geom.GetGeometryCount()): for i in range(1, geom.GetGeometryCount()):
way_id = add_way(geom.GetGeometryRef(i), f, False) way_id = add_way(geom.GetGeometryRef(i), f, False)
...@@ -461,13 +472,14 @@ def add_relation_multipolygon(geom, f): ...@@ -461,13 +472,14 @@ def add_relation_multipolygon(geom, f):
return None return None
rel_ways.append(way_id) rel_ways.append(way_id)
rel_id = id_counter rel_id = id_counter
if f['featurecla'] in non_polygons: if check_featurecla(f):
return 0 #means no relation is there if f['featurecla'] in non_polygons:
return 0 #means no relation is there
relations.append((rel_id, rel_ways)) relations.append((rel_id, rel_ways))
id_counter += 1 id_counter += 1
return rel_id #if rel_id return 0, means no relations is there return rel_id #if rel_id return 0, means no relations is there
def write_relation_multipolygon(relation): def write_relation_multipolygon(relation):
global open_file global open_file
print("<relation id='-%s'><tag k='type' v='multipolygon' />" % relation[0] , end = '\n', file = open_file) print("<relation id='-%s'><tag k='type' v='multipolygon' />" % relation[0] , end = '\n', file = open_file)
...@@ -475,7 +487,7 @@ def write_relation_multipolygon(relation): ...@@ -475,7 +487,7 @@ def write_relation_multipolygon(relation):
for way in relation[1][1:]: for way in relation[1][1:]:
print('<member type="way" ref="-%s" role="inner" />' % way , end = '\n', file = open_file) print('<member type="way" ref="-%s" role="inner" />' % way , end = '\n', file = open_file)
print("</relation>" , end = '\n', file = open_file) print("</relation>" , end = '\n', file = open_file)
def write_tags(f): def write_tags(f):
"""Writes the tags associated with a way or a relation""" """Writes the tags associated with a way or a relation"""
global id_counter, file_counter, counter, file_name, open_file, namespace global id_counter, file_counter, counter, file_name, open_file, namespace
...@@ -503,9 +515,10 @@ def write_tags(f): ...@@ -503,9 +515,10 @@ def write_tags(f):
for name, value in fixed_tags.items(): for name, value in fixed_tags.items():
print(" <tag k='%s' v='%s' />" % (name, clean_attr(value)) , end = '\n', file = open_file) print(" <tag k='%s' v='%s' />" % (name, clean_attr(value)) , end = '\n', file = open_file)
if f.GetGeometryRef().GetGeometryName() == 'POLYGON' or f.GetGeometryRef().GetGeometryName() == 'MULTIPOLYGON': if f.GetGeometryRef().GetGeometryName() == 'POLYGON' or f.GetGeometryRef().GetGeometryName() == 'MULTIPOLYGON':
if f['featurecla'] not in non_polygons: if check_featurecla(f):
print(" <tag k='area' v='yes' />" , end = '\n', file = open_file) if f['featurecla'] not in non_polygons:
print(" <tag k='area' v='yes' />" , end = '\n', file = open_file)
def add_way(geom, f, tag_flag): def add_way(geom, f, tag_flag):
""" Writes the way of a particular geometry to the OSM file""" """ Writes the way of a particular geometry to the OSM file"""
global open_file, id_counter, ways global open_file, id_counter, ways
...@@ -522,7 +535,7 @@ def add_way(geom, f, tag_flag): ...@@ -522,7 +535,7 @@ def add_way(geom, f, tag_flag):
tags = None tags = None
ways.append((way_id, node_refs, tags)) ways.append((way_id, node_refs, tags))
return way_id return way_id
def write_way(way): def write_way(way):
global open_file global open_file
print("<way id='-%s'>" % way[0] , end = '\n', file = open_file) print("<way id='-%s'>" % way[0] , end = '\n', file = open_file)
...@@ -531,7 +544,7 @@ def write_way(way): ...@@ -531,7 +544,7 @@ def write_way(way):
if way[2]: if way[2]:
write_tags(way[2]) write_tags(way[2])
print("</way>", end = '\n', file = open_file) print("</way>", end = '\n', file = open_file)
def add_way_nodes(geom, f): def add_way_nodes(geom, f):
"""Writes the nodes of a particular way""" """Writes the nodes of a particular way"""
global open_file, id_counter global open_file, id_counter
...@@ -553,8 +566,8 @@ def add_way_nodes(geom, f): ...@@ -553,8 +566,8 @@ def add_way_nodes(geom, f):
id_counter += 1 id_counter += 1
ids.append(node_id) ids.append(node_id)
return ids return ids
def add_node(num_id, lon, lat, geom_name, f): def add_node(num_id, lon, lat, geom_name, f):
""" Writes the node to the OSM file""" """ Writes the node to the OSM file"""
global open_file, node_dict global open_file, node_dict
...@@ -569,7 +582,7 @@ def add_node(num_id, lon, lat, geom_name, f): ...@@ -569,7 +582,7 @@ def add_node(num_id, lon, lat, geom_name, f):
nodes.append((num_id, lon, lat, None)) nodes.append((num_id, lon, lat, None))
node_dict[key] = num_id node_dict[key] = num_id
return num_id return num_id
def write_node(node): def write_node(node):
global open_file global open_file
if node[3] == None: if node[3] == None:
...@@ -578,7 +591,7 @@ def write_node(node): ...@@ -578,7 +591,7 @@ def write_node(node):
print("<node id='-%s' visible='true' lon='%s' lat='%s' >" % (node[0], node[1], node[2]), end = '\n', file = open_file) print("<node id='-%s' visible='true' lon='%s' lat='%s' >" % (node[0], node[1], node[2]), end = '\n', file = open_file)
write_tags(node[3]) write_tags(node[3])
print("</node>", end = '\n', file = open_file) print("</node>", end = '\n', file = open_file)
def add_way_around_node(f): def add_way_around_node(f):
""" Writes a way around a single point""" """ Writes a way around a single point"""
global id_counter, ways global id_counter, ways
...@@ -586,34 +599,34 @@ def add_way_around_node(f): ...@@ -586,34 +599,34 @@ def add_way_around_node(f):
ways.append((id_counter, [nid], f)) ways.append((id_counter, [nid], f))
id_counter += 1 id_counter += 1
open_file = None open_file = None
file_name = None file_name = None
id_counter = 1 id_counter = 1
file_counter = 0 file_counter = 0
counter = 0 counter = 0
geom_counter = {} geom_counter = {}
class AppError(Exception): pass class AppError(Exception): pass
def run(filenames, slice_count=1, obj_count=5000000, output_location=None, no_source=False): def run(filenames, slice_count=1, obj_count=5000000, output_location=None, no_source=False):
"""Run the converter. Requires open_file, file_name, id_counter, """Run the converter. Requires open_file, file_name, id_counter,
file_counter, counter to be defined in global space; not really a very good file_counter, counter to be defined in global space; not really a very good
singleton.""" singleton."""
global id_counter, file_counter, counter, file_name, open_file, namespace, non_geom, non_fcla_dict, nodes, ways, relations global id_counter, file_counter, counter, file_name, open_file, namespace, non_geom, non_fcla_dict, nodes, ways, relations
open_file = None open_file = None
file_name = None file_name = None
id_counter = 1 id_counter = 1
file_counter = 0 file_counter = 0
counter = 0 counter = 0
geom_counter = {} geom_counter = {}
if output_location: if output_location:
file_name = output_location file_name = output_location
...@@ -624,7 +637,7 @@ def run(filenames, slice_count=1, obj_count=5000000, output_location=None, no_so ...@@ -624,7 +637,7 @@ def run(filenames, slice_count=1, obj_count=5000000, output_location=None, no_so
if no_source: if no_source:
namespace=None namespace=None
ds = ogr.Open(filename) ds = ogr.Open(filename)
if not ds: if not ds:
raise AppError("OGR Could not open the file %s" % filename) raise AppError("OGR Could not open the file %s" % filename)
...@@ -632,28 +645,28 @@ def run(filenames, slice_count=1, obj_count=5000000, output_location=None, no_so ...@@ -632,28 +645,28 @@ def run(filenames, slice_count=1, obj_count=5000000, output_location=None, no_so
l = ds.GetLayer(0) l = ds.GetLayer(0)
max_objs_per_file = obj_count max_objs_per_file = obj_count
extent = l.GetExtent() extent = l.GetExtent()
#if extent[0] < -180 or extent[0] > 180 or extent[2] < -90 or extent[2] > 90: #if extent[0] < -180 or extent[0] > 180 or extent[2] < -90 or extent[2] > 90:
# raise AppError("Extent does not look like degrees; are you sure it is? \n(%s, %s, %s, %s)" % (extent[0], extent[2], extent[1], extent[3])) # raise AppError("Extent does not look like degrees; are you sure it is? \n(%s, %s, %s, %s)" % (extent[0], extent[2], extent[1], extent[3]))
slice_width = (extent[1] - extent[0]) / slice_count slice_width = (extent[1] - extent[0]) / slice_count
seen = {} seen = {}
print("Running %s slices with %s base filename against shapefile %s" % ( print(