Source code for stompy.spatial.wkb2shp

"""
Functions for reading and writing shapefiles.

These are simple wrappers around OGR and Shapely
libraries.

"""
from __future__ import print_function
import six

try:
    from osgeo import ogr,osr
except ImportError:
    import ogr,osr

import glob,os,re

from shapely import wkb,wkt
from shapely.geometry import Polygon,LineString,Point,MultiPolygon,MultiLineString,MultiPoint
import shapely.geos
from shapely import ops # for transform()
from .geom_types import ogr2text,text2ogr
from . import proj_utils
import uuid

import numpy as np

[docs]def wkb2shp(shp_name, input_wkbs, srs_text='EPSG:26910', field_gen = lambda f: {}, fields = None, overwrite=False, geom_type=None, driver=None, layer_name=None): """ Save data to a shapefile. shp_name: filename.shp for writing the result or 'MEMORY' to return an in-memory ogr layer. input_wkbs: list of shapely geometry objects for each feature. They must all be the same geometry type (no mixing lines and polygons, etc.) There are three ways of specifying fields: field_gen: a function which will be called once for each feature, with the geometry as its argument, and returns a dict of fields. fields: a numpy structure array with named fields, or fields: a dict of field names, with each dictionary entry holding a sequence of field values. srs_text: sets the projection information when writing the shapefile. Expects a string, for example 'EPSG:3095' or 'WGS84'. driver: Directly specify an alternative driver, such as GPKG. if None, assumed shapefile, unless shp_name is 'memory' in which case create an in-Memory layer. for GPKG, the optional layer_name argument can be used to name the layer which would default to the basename of the shp_name otherwise """ if layer_name is None: layer_name=shp_name if driver=='GPKG': drv = ogr.GetDriverByName('GPKG') new_ds = drv.CreateDataSource(shp_name) if shp_name.lower()=='memory': drv = ogr.GetDriverByName('Memory') new_ds = drv.CreateDataSource("mem_" + uuid.uuid1().hex) else: if os.path.exists(shp_name): if shp_name[-4:] == '.shp': if overwrite: # remove any matching files: print("Removing the old to make way for the new") os.unlink(shp_name) else: raise Exception("Shapefile exists, but overwrite is False") # open the output shapefile: drv = ogr.GetDriverByName('ESRI Shapefile') new_ds = drv.CreateDataSource(shp_name) if isinstance(srs_text,osr.SpatialReference): srs = srs_text else: srs = osr.SpatialReference() srs.SetFromUserInput(srs_text) ## Depending on the inputs, populate # geoms - a list or array of shapely geometries # field_names - ordered list of field names # field_values - list of lists of field values # Case 1: all the data is packed into a numpy struct array geoms = input_wkbs if fields is not None and type(fields) == list: # sub case - fields is a list of dicts field_iter = iter(fields) field_gen = lambda x: six.next(field_iter) fields = None if fields is not None and isinstance(fields,dict): field_names=list(fields.keys()) N=len(fields[field_names[0]]) field_values=[None]*N for n in range(N): row=[fields[fname][n] for fname in field_names] field_values[n]=row elif fields is not None and isinstance(fields,np.ndarray): dt = fields.dtype # Note that each field may itself have some shape - so we need to enumerate those # dimensions, too. field_names = [] for name in dt.names: # ndindex iterates over tuples which index successive elements of the field for index in np.ndindex( dt[name].shape ): name_idx = name + "_".join([str(i) for i in index]) field_names.append(name_idx) field_values = [] for i in range(len(fields)): fields_onerow = [] for name in dt.names: for index in np.ndindex( dt[name].shape ): if index != (): fields_onerow.append( fields[i][name][index] ) else: fields_onerow.append( fields[i][name] ) field_values.append( fields_onerow ) else: # Case 2: geometries and a field generator are specified field_dicts = [] for g in geoms: field_dicts.append( field_gen(g) ) # py3k: .keys() is a dict_keys object, not 100% compatible with a list. field_names = list(field_dicts[0].keys()) field_values = [] for i in range(len(input_wkbs)): field_values.append( [field_dicts[i][k] for k in field_names] ) for n in field_names: if len(n)>10: raise Exception("Cannot have field names longer than 10 characters") if geom_type is None: # find it by querying the features - minor bug - this only # works when shapely geometries were passed in. types = np.array( [text2ogr[g.type] for g in geoms] ) geom_type = int(types.max()) # print "Chose geometry type to be %s"%ogr2text[geom_type] new_layer = new_ds.CreateLayer(layer_name, srs=srs, geom_type=geom_type) # setup the feature definition: # create fields based on the field key/value pairs # return for the first wkb file casters = [] for field_i,key in enumerate(field_names): val = field_values[0][field_i] if type(val) == int or isinstance(val,np.integer): field_def = ogr.FieldDefn(key,ogr.OFTInteger) casters.append( int ) elif isinstance(val,np.floating): # above: use np.float, as it seems to be more compatible with # 32-bit and 64-bit floats. # This is an old bug - seems to work without this in the modern # era, and in turn, asscalar does *not* work with list of lists # # a numpy array of float32 ends up with # # a type here of <type 'numpy.float32'>, # # which doesn't match isinstance(...,float) # # asscalar helps out with that # 2018-07: np.floating is maybe the proper solution. print( "float valued key is %s"%key) field_def = ogr.FieldDefn(key,ogr.OFTReal) field_def.SetWidth(64) field_def.SetPrecision(10) casters.append( float ) else: field_def = ogr.FieldDefn(key,ogr.OFTString) casters.append( str ) # print "Field name is %s"%key new_layer.CreateField( field_def ) for i,geom in enumerate(geoms): feature_fields = field_values[i] # print "Processing: ",feature_fields if type(geom) == str: fp = open(wkb_file,'rb') geom_wkbs = [fp.read()] fp.close() elif type(geom) in (Polygon,LineString,Point): geom_wkbs = [geom.wkb] elif type(geom) in (MultiPolygon,MultiLineString,MultiPoint): geom_wkbs = [g.wkb for g in geom.geoms] for geom_wkb in geom_wkbs: feat_geom = ogr.CreateGeometryFromWkb(geom_wkb) feat = ogr.Feature( new_layer.GetLayerDefn() ) feat.SetGeometryDirectly(feat_geom) for i,val in enumerate(feature_fields): feat.SetField(str(field_names[i]),casters[i](feature_fields[i])) new_layer.CreateFeature(feat) feat.Destroy() if shp_name!="Memory": new_layer.SyncToDisk() else: return new_ds
# kind of the reverse of the above
[docs]def shp2geom(shp_fn,use_wkt=False,target_srs=None, source_srs=None,return_srs=False, query=None, fold_to_lower=False): """ Read a shapefile into memory as a numpy array. Data is returned as a record array, with geometry as a shapely geometry object in the 'geom' field. target_srs: input suitable for osgeo.osr.SetFromUserInput(), or an existing osr.SpatialReference, to specify a projection to which the data should be projected. If this is specified but the shapefile does not specify a projection, and source_srs is not given, then an exception is raised. source_srs will override the projection in the shapefile if specified. fold_to_lower: fold field names to lower case. return_srs: return a tuple, second item being the text representation of the project, or None if no projection information was found. """ ods = ogr.Open(shp_fn) if ods is None: raise ValueError("File '%s' corrupt or not found"%shp_fn) layer = ods.GetLayer(0) if query is not None: layer.SetAttributeFilter(query) if target_srs is not None: # potentially transform on the fly if source_srs is None: source_srs=layer.GetSpatialRef() if source_srs is None: raise Exception("Reprojection requested, but no source reference available") mapper=proj_utils.mapper(source_srs,target_srs) # have to massage it a bit to suit shapely's calling convention def xform(x,y,z=None): # x,y,z may be scalar or array # ugly code... annoying code... if z is None: xy=np.moveaxis( np.array([x,y]), 0, -1 ) xyp=mapper(xy) return xyp[...,0], xyp[...,1] else: xyz=np.moveaxis( np.array([x,y,z]), 0, -1 ) xyzp=mapper(xyz) return xyzp[...,0],xyzp[...,1],xyzp[...,2] def geom_xform(g): return ops.transform(xform,g) else: target_srs=layer.GetSpatialRef() def geom_xform(g): return g feat = layer.GetNextFeature() defn = feat.GetDefnRef() fld_count = defn.GetFieldCount() fields = [] for i in range(fld_count): fdef =defn.GetFieldDefn(i) name = fdef.name if fold_to_lower: name=name.lower() ogr_type = fdef.GetTypeName() if ogr_type == 'String': np_type = object getter = lambda f,i=i: f.GetFieldAsString(i) elif ogr_type =='Integer': np_type = np.int32 getter = lambda f,i=i: f.GetFieldAsInteger(i) elif ogr_type == 'Date': np_type = '<M8[s]' # np.datetime64 # this handles null and real dates, whereas GetFieldAsDateTime # would take more finagling to deal with nulls. getter = lambda f,i=i: np.datetime64(f.GetFieldAsString(i).replace('/','-')) else: np_type = np.float64 getter = lambda f,i=i: f.GetFieldAsDouble(i) fields.append( (i,name,np_type,getter) ) # And one for the geometry def rdr(f): # The try..except block is from olden days when OGR was not stable # to weird geometries. geo_ref=f.GetGeometryRef() if geo_ref is None: # this is possible, for example, in QGIS delete all nodes of a # line, but don't delete the actual feature. return None #try: if use_wkt: geo=wkt.loads( f.GetGeometryRef().ExportToWkt() ) else: geo=wkb.loads( f.GetGeometryRef().ExportToWkb() ) geo=geom_xform(geo) #except: # geo=None return geo fields.append( (None,'geom',object,rdr) ) layer_dtype = [ (name,np_type) for i,name,np_type,getter in fields] recs = [] layer.ResetReading() while 1: feat = layer.GetNextFeature() if feat is None: break try: field_vals = [getter(feat) for i,name,np_type,getter in fields] except shapely.geos.ReadingError: print("Failed to load geometry for feature") continue field_array = tuple(field_vals) recs.append(field_array) recs = np.array( recs, dtype=layer_dtype) if return_srs: return recs, target_srs.ExportToWkt() else: return recs