Source code for stompy.spatial.join_features

from __future__ import print_function

from optparse import OptionParser

# try to connect features that really ought to be connected:
import numpy as np

import shapely.wkb,shapely.geometry
try:
    from osgeo import ogr
except ImportError:
    import ogr
import sys
import os.path
import six

from numpy.linalg import norm

from . import wkb2shp
from .. import utils

import logging
logging.basicConfig(level=logging.INFO)
log=logging.getLogger('join_features')

try:
    from shapely.prepared import prep as prepare_geometry
except ImportError:
    prepare_geometry=lambda x:x
    log.warning("Prepared geometries not available - tests will be slow")

try:
    from shapely.strtree import STRtree
except ImportError:
    # this will make complex inputs horrifically slow, but might
    # complete
    class STRtree(object):
        def __init__(self,geoms):
            self.geoms=geoms
        def query(self,g):
            return self.geoms

[docs]def progress_printer(str,steps_done=None,steps_total=None): if steps_done is not None and steps_total is not None: log.info( "%s -- %d%%"%(str,100.0*steps_done/steps_total) ) elif steps_done: log.info( "%s -- %d"%(str,steps_done) ) else: log.info(str)
progress_message = progress_printer # in a few cases (<0.01% ??) the prepared geometry will report a different answer # than the real geometry. But the prepared geomtries are an order of magnitude # faster... trust_prepared = True
[docs]def merge_lines(layer=None,segments=None): """ Given an ogr LineString layer, merge linestrings by matching endpoints, and return a list of arrays of points. if layer is given, it should be an ogr LineString layer if segments is given, it should be a list of numpy arrays, where each array is [N,2] giving points along a path. this version only handles *exact* matches between endpoints """ # a hash that maps x,y values to feature ids endpoints = {} features = {} # map feature id to a list of points # map old feature ids to new ones: remapper = {} progress_message("Reading features") def seg_iter(): if not layer: for fid,seg in enumerate(segments): yield fid,seg else: layer.ResetReading() while 1: feat = layer.GetNextFeature() if not feat: return fid = feat.GetFID() geo = feat.GetGeometryRef() # should be a linestring if geo is None: log.warning("Missing geometry - will skip") continue geom=shapely.wkb.loads(geo.ExportToWkb()) if geom.type == 'MultiLineString': geolist=geom.geoms else: geolist=[geom] for sub_idx,one_geom in enumerate(geolist): if one_geom.type != 'LineString': raise Exception("All (sub)features must be linestrings (fid=%s, %s)"%(fid,one_geom.type)) # read the points into a numpy array: points = np.array(one_geom.coords) # use tuples to keep sub-features distinct yield (fid,sub_idx),points for fid,points in seg_iter(): features[fid] = points start_point = tuple(points[0]) end_point = tuple(points[-1]) if start_point == end_point: continue if start_point not in endpoints: endpoints[start_point] = [] endpoints[start_point].append(fid) if end_point not in endpoints: endpoints[end_point] = [] endpoints[end_point].append(fid) remapper[fid] = fid # check on how many things match up: # almost every point has exactly two features - perfect! progress_message("%i possible matched features"%len(endpoints)) # toss out endpoints that don't have exactly two matches: endpoint_list = [] for k in endpoints: if len(endpoints[k]) == 2: endpoint_list.append(endpoints[k]) total_pairs = len(endpoint_list) pairs_processed = 0 # iterate over the end points, merging all exact matches: for matched_pair in endpoint_list: fidA,fidB = [remapper[fid] for fid in matched_pair] if fidA==fidB: continue pairs_processed += 1 if pairs_processed%1000==0: progress_message("Merge lines exact",pairs_processed,total_pairs) # coordsA = features[fidA] coordsB = features[fidB] # figure out how they go together, and figure out what point needs # to be redirected from featB: # also be sure to skip the repeated poin if all(coordsA[0]==coordsB[0]): coordsC = np.concatenate((coordsA[::-1],coordsB[1:])) redirect = coordsB[-1] elif all(coordsA[-1]==coordsB[0]): coordsC = np.concatenate((coordsA,coordsB[1:])) redirect = coordsB[-1] elif all(coordsA[0]==coordsB[-1]): coordsC = np.concatenate((coordsB,coordsA[1:])) redirect = coordsB[0] elif all(coordsA[-1]==coordsB[-1]): coordsC = np.concatenate((coordsA[:-1],coordsB[::-1])) redirect = coordsB[0] else: log.error( "No match:" ) log.error( "%s %s"%( fidA,fidB) ) log.error( "%s"%( coordsA[0]) ) log.error( "%s"%( coordsA[-1]) ) log.error( "%s"%( coordsB[0] ) ) log.error( "%s"%( coordsB[-1] ) ) raise Exception("hash says we have a match, but no good match found") # replace the geometry of featA features[fidA] = coordsC for k in remapper.keys(): if remapper[k] == fidB: remapper[k] = fidA # and delete featB del features[fidB] progress_message("merge completed") # cast to list for python 3 return list(features.values())
[docs]def tolerant_merge_lines(features,tolerance): """ expects features to be formatted like the output of merge_lines, i.e. a list of numpy arrays """ NO_MATCH =0 FIRST_FIRST=1 FIRST_LAST =2 LAST_FIRST =3 LAST_LAST =4 INIT_MATCH =5 # dummy value to kick-start the loop closed_already = [ all(feat[0]==feat[-1]) for feat in features] def check_match(pntsA,pntsB): if norm(pntsA[0]-pntsB[0]) <= tolerance: return FIRST_FIRST elif norm(pntsA[0]-pntsB[-1]) <= tolerance: return FIRST_LAST elif norm(pntsA[-1]-pntsB[0]) <= tolerance: return LAST_FIRST elif norm(pntsA[-1]-pntsB[-1]) <= tolerance: return LAST_LAST else: return NO_MATCH # how to do the matching: # nested loops? match the i-th feature against each jth other feature # if they match, merge j onto i, set j-th to None, and start scanning # again to match more features against i-th for i in range(len(features)): if features[i] is None: continue if closed_already[i]: continue progress_message("Merge lines tolerant",i,len(features)) # once we've tried to match the i-th feature against everybody # after i, there's no reason to look at it again, so the inner # loop starts at i+1 match = INIT_MATCH while match: match = NO_MATCH # check each subsequent feature for j in range(i+1,len(features)): if features[j] is None: continue # check next j-th if closed_already[j]: continue match = check_match(features[i], features[j]) # When merging, drop one point from the merge location # otherwise if they are very close we'll end up with numerical issues # related to repeated points. if match==FIRST_FIRST: features[i] = np.concatenate((features[i][::-1],features[j][1:])) elif match==FIRST_LAST: features[i] = np.concatenate((features[j],features[i][1:])) elif match==LAST_FIRST: features[i] = np.concatenate((features[i],features[j][1:])) elif match==LAST_LAST: features[i] = np.concatenate((features[i][:-1],features[j][::-1])) # if we get a match, we just merged the features and can # remove the j-th feature. if match != NO_MATCH: features[j] = None # at this point, though, our i-th feature has changed and # requires that we re-process matches against it, so # with match set non-zero, escape out of the j-loop # and the while loop will restart the j-loop. break # if we fall out of this loop and didn't have a match, we're done # with the i-th feature, so let the next iteration of the i-loop # run # this just eliminates None elements features = [f for f in features if f is not None] # Make an additional loop to see if there are rings that we need to close: for feat in features: delta = norm(feat[0] - feat[-1]) if delta > 0.0 and delta <= tolerance: log.info("tolerant_merge: joining a loop - dist = %f"%delta) feat[-1] = feat[0] return features
# how many of the features are closed, and return the one that isn't # since it will define the exterior ring in the output # if all the rings are closed, return the ring with the greatest area # and closed_p=True
[docs]def clean_degenerate_rings(point_lists,degen_shpname='degenerate_rings.shp'): """ Given a list of lists of points - filter out point lists which represent degenerate rings, writing the invalid rings to a shapefile degen_shpname, and returning a list of only the valid rings. Unclosed linestrings are passed through. set degen_shpname to None to disable that output. """ degen_lines = [] valid_lists = [] for i in range(len(point_lists)): point_list = point_lists[i] if all(point_list[0]!=point_list[-1]): valid_lists.append(point_list) else: # closed - check it's area poly = shapely.geometry.Polygon(point_list) try: a=poly.area valid_lists.append(point_list) except ValueError: log.error( "degenerate feature: %s"%i ) degen_line = shapely.geometry.LineString(point_list) degen_lines.append(degen_line) if degen_shpname is not None and len(degen_lines)>0: wkb2shp.wkb2shp(degen_shpname,degen_lines,srs_text='EPSG:26910',overwrite=True) return valid_lists
[docs]def find_exterior_ring(point_lists): open_strings = [] max_area = 0 max_area_id = None for i in range(len(point_lists)): point_list = point_lists[i] if all(point_list[0]!=point_list[-1]): open_strings.append(i) else: # closed - check it's area poly = shapely.geometry.Polygon(point_list) a = poly.area if a > max_area: max_area = a max_area_id = i if len(open_strings) > 1: log.error( "Wanted exactly 0 or 1 open strings, got %i"%len(open_strings) ) for i in open_strings: log.error(" Open string: %s"%( point_lists[i] ) ) raise Exception("Can't figure out who is the exterior ring") if len(open_strings) == 1: log.error("Choosing exterior ring based on it being the only open ring") log.error( "Endpoints: %s"%( point_lists[open_strings[0]][0],point_lists[open_strings[0]][-1] ) ) return open_strings[0],False else: log.info( "No open linestrings, resorting to choosing exterior ring by area" ) print("Selected exterior ring with area %.0f"%max_area) return max_area_id,True
[docs]def arc_to_close_line(points,n_arc_points=40): """ Given a list of points, return an arc that closes the linestring, and faces away from the centroid of the points """ # Find the centroid of the original points. geo = shapely.geometry.Polygon(points) centroid = np.array(geo.centroid) # for now, assume a 180 degree arc: arc_center = (points[0]+points[-1])/2.0 # the arc will get appended to the linestring, so find the initial vector from # the last point in the linestring: start_vector = points[-1] - arc_center arc_center_to_centroid = centroid - arc_center # if we are going CCW, then if cross(arc_center_to_centroid, start_vector) > 0: arc_dir = +1 else: arc_dir = -1 # how many steps in the arc? ultimately could be tied to the desired spatial # resolution, or at least that great (since it will get filtered down to the # desired resolution, but not filtered up) angles = np.linspace(0,arc_dir*np.pi,n_arc_points) arc_points = np.zeros((n_arc_points,2),np.float64) # rotate the start vector for i in range(n_arc_points): angle = angles[i] xx = np.cos(angle) xy = -np.sin(angle) yx = np.sin(angle) yy = np.cos(angle) new_x = start_vector[0]*xx + start_vector[1]*xy new_y = start_vector[0]*yx + start_vector[1]*yy arc_points[i] = arc_center + [new_x,new_y] return arc_points
[docs]def lines_to_polygons_slow(new_features,close_arc=False,single_feature=True,force_orientation=True): """ single_feature: False is not yet implemented! returns a list of Polygons and a list of features which were not part of a polygon force_orientation: ensure that interior rings have negative signed area """ assert single_feature ### Remove non-polygons - still not smart enough to handle duplicate points new_features = [f for f in new_features if len(f) > 2] ### Find exterior ring log.info("Finding exterior ring from %d linestrings"%len(new_features)) new_features = clean_degenerate_rings(new_features) exterior_id,closed_p = find_exterior_ring(new_features) if close_arc and not closed_p: ### Add an arc to close the exterior ring: # really this out to test whether or not it's necessary # to add the arc. closing_arc = arc_to_close_line(new_features[exterior_id]) new_features[exterior_id] = np.concatenate((new_features[exterior_id],closing_arc)) exterior = new_features[exterior_id] interiors = [new_features[i] for i in range(len(new_features)) if i!=exterior_id] ### Remove features that are not contained by the exterior ring: ext_poly = shapely.geometry.Polygon(exterior) if prepared is not None: prep_ext_poly = prepared.prep(ext_poly) else: prep_ext_poly = None new_interiors = [] extras = [] # features which were not inside exterior, but otherwise valid for i in range(len(interiors)): interior = interiors[i] if i%300==0: progress_message("Checking for orphan interior features",i,len(interiors)) if force_orientation and (utils.signed_area(interior) > 0): interior=interior[::-1] int_poly = shapely.geometry.Polygon(interior) # spaghetti logic if prep_ext_poly is None or prep_ext_poly.contains(int_poly): if prep_ext_poly and trust_prepared: new_interiors.append(interior) else: if ext_poly.contains(int_poly): new_interiors.append(interior) else: if prep_ext_poly is not None: log.warning( "A feature got through the prepared query, but the real query says it's outside the exterior") else: log.debug("Removing a feature that was outside the exterior ring" ) extras.append(interior) else: log.debug("Removing a feature that the fast query said was outside the exterior ring") extras.append(interior) # create a single polygon feature from all of the rings: poly_geom = shapely.geometry.Polygon(exterior,new_interiors) return [poly_geom],extras
# updated version, hopefully faster in the usual case of no open loops, but # multiple polygons
[docs]def lines_to_polygons(new_features,close_arc=False,single_feature=True,force_orientation=True, return_open=False,min_area=0.0): """ returns a list of Polygons and a list of features which were not part of a polygon force_orientation: ensure that interior rings have negative signed area return_open: if True, allow open linestrings, but they will be returned in a 3rd item. min_area: prune polygons with area below this threshold """ ### Remove non-polygons - still not smart enough to handle duplicate points new_features = [f for f in new_features if len(f) > 2] new_features = clean_degenerate_rings(new_features) open_strings=[] simple_polys=[] for i,point_list in enumerate(new_features): if np.any(point_list[0]!=point_list[-1]): open_strings.append(i) else: # closed - check it's area simple_polys.append(shapely.geometry.Polygon(point_list)) log.info("%d open strings, %d simple polygons"%(len(open_strings), len(simple_polys))) if len(open_strings): if not return_open: log.error("New version of lines_to_polygons is faster but intolerant. Cannot handle ") log.error("%d open strings"%len(open_strings)) log.error("First open string starts at %s"%(new_features[open_strings[0]][0])) raise Exception("No longer can handle open line strings") polys=[] # output polygons areas=np.array([p.area for p in simple_polys]) if min_area>0: select=areas>=min_area simple_polys=[p for p,a in zip(simple_polys,areas) if a>=min_area] areas=areas[select] # sort big to small ordering=np.argsort(-areas) simple_polys=[simple_polys[i] for i in ordering] areas=areas[ordering] log.info("Building index") # Because the index only hands back the poly, not an index. for i,p in enumerate(simple_polys): p.join_id=i index=STRtree(simple_polys) log.info("done building index") poly_geoms=[] # accumulate results assigned_p=[False]*len(simple_polys) unassigned_idxs=list(range(len(simple_polys))) while len(unassigned_idxs): ext_idx=unassigned_idxs.pop(0) # get first/biggest one if assigned_p[ext_idx]: continue # was included as a sub-feature already assigned_p[ext_idx]=True ext_poly=simple_polys[ext_idx] ### Find exterior ring log.info("Examining largest poly left with area=%f, %d potential interiors"% (ext_poly.area,len(unassigned_idxs))) prep_ext_poly = prepare_geometry(ext_poly) hits=index.query(ext_poly) hit_indexes=[p.join_id for p in hits] # this keeps us comparing large->small, needed to avoid # confusing islands in lake with lakes hit_indexes.sort() for i in utils.progress(hit_indexes): if assigned_p[i]: continue int_poly=simple_polys[i] if prep_ext_poly.contains(int_poly): # include that poly as an interior ext_poly=shapely.geometry.Polygon(ext_poly.exterior, list(ext_poly.interiors)+[int_poly.exterior]) prep_ext_poly=prepare_geometry(ext_poly) assigned_p[i]=True # lazily remove from unassigned_idxs at top of loop poly_geoms.append(ext_poly) if single_feature: break extras=[p for p,is_assigned in zip(simple_polys,assigned_p) if not is_assigned] if return_open: return poly_geoms,extras,open_strings else: return poly_geoms,extras
####### Running the actual steps ########
[docs]def vector_mag(vectors): """ vectors: xy vectors, shape [...,2] return magnitude (L2 norm) """ # equivalent to np.linalg.norm(axis=-1), but older numpy doesn't # allow axis keyword return np.sqrt(np.sum(vectors**2,axis=-1))
[docs]def process_layer(orig_layer,output_name,tolerance=0.0, create_polygons=False,close_arc=False, single_feature=True, remove_duplicates=True): """ remove_duplicates: if true, exactly duplicated nodes along a single path will be removed, i.e. the linestring A-B-B-C will become A-B-C. single_feature: only save the biggest feature """ orig_srs=None orig_srs_text=None if isinstance(orig_layer,str): ods = ogr.Open(orig_layer) orig_layer = ods.GetLayer(0) try: orig_srs=orig_layer.GetSpatialRef() orig_srs_text=orig_srs.ExportToWkt() except Exception as exc: log.error("Attempted to get spatial ref: %s"%exc) ### The actual geometry processing: ### ### <processing> new_features = merge_lines(orig_layer) if remove_duplicates: log.info("Checking the merged features for duplicate points" ) # possibly important here to have the duplicate test more stringent than # the tolerant_merge_lines. # also have to be careful about a string of points closely spaced - don't # want to remove all of them, just enough to keep the minimal spacing above # tolerance. short_tol = 0.5*tolerance for fi in range(len(new_features)): pnts = new_features[fi] valid = np.ones( len(pnts), np.bool8) # go with a slower but safer loop here - last_valid=0 for i in range(1,len(pnts)): if vector_mag( pnts[last_valid]-pnts[i] ) < short_tol: if i==len(pnts)-1: # special case to avoid moving the last vertex valid[last_valid] = False last_valid = i else: valid[i] = False else: last_valid = i # print "Ring %d: # invalid=%d / %d"%(i,sum(~valid),len(new_features[i])) new_features[fi] = new_features[fi][valid,:] if tolerance > 0.0: new_features = tolerant_merge_lines(new_features,tolerance) ### </processing> ### <output> if create_polygons: if single_feature: geoms,extras = lines_to_polygons(new_features,close_arc=close_arc,single_feature=True) else: # geoms=[] # unmatched=new_features # while len(unmatched): # one_poly,unmatched=lines_to_polygons(unmatched,close_arc=close_arc,single_feature=True) # geoms.append(one_poly[0]) geoms,extras=lines_to_polygons(new_features,close_arc=close_arc,single_feature=False) else: # Line output geoms = [shapely.geometry.LineString(pnts) for pnts in new_features] # Write it all out to a shapefile: progress_message("Writing output") kws={} if orig_srs_text is not None: kws['srs_text']=orig_srs_text wkb2shp.wkb2shp(output_name,geoms, overwrite=True,**kws) return output_name
if __name__ == '__main__': parser = OptionParser(usage="usage: %prog [options] input.shp output.shp") parser.add_option("-p", "--poly", help="create polygons from closed linestrings", action="store_true", dest='create_polygons',default=False) parser.add_option("-a", "--arc", dest="close_arc", default=False, action="store_true", help="close the largest open linestring with a circular arc") parser.add_option("-t","--tolerance", dest="tolerance", type="float", default=0.0, metavar="DISTANCE", help="Tolerance for joining two endpoints, in geographic units") parser.add_option("-m","--multiple", dest="single_feature", default=True, action="store_false",metavar="SINGLE_FEATURE") (options, args) = parser.parse_args() input_shp,output_shp = args ods = ogr.Open(input_shp) layer = ods.GetLayer(0) process_layer(layer,output_shp, create_polygons=options.create_polygons,close_arc=options.close_arc, tolerance=options.tolerance,single_feature=options.single_feature)