def merge_lines(df, by): """Use GEOS line merge to merge MultiLineStrings into LineStrings (where possible). This uses aggregate_lines first to aggregate lines to MultiLineStrings. WARNING: this can be a bit slow. Parameters ---------- df : GeoDataFrame by : string or list-like field(s) to aggregate by Returns ------- GeoDataFrame of LineStrings or MultiLinestrings (if required) """ agg = aggregate_lines(df, by) agg["geometry"] = pg.line_merge(agg.geometry.values.data) geom_type = pg.get_type_id(agg["geometry"].values.data) ix = geom_type == 5 if ix.sum() > 0: agg.loc[~ix, "geometry"] = pg.multilinestrings( agg.loc[~ix].geometry.values.data, np.arange((~ix).sum())) return agg
def test_line_merge_geom_array(): actual = pygeos.line_merge([line_string, multi_line_string]) assert pygeos.equals(actual[0], line_string) assert pygeos.equals(actual[1], multi_line_string)
def remove_false_nodes(gdf): """ Clean topology of existing LineString geometry by removal of nodes of degree 2. Parameters ---------- gdf : GeoDataFrame, GeoSeries, array of pygeos geometries (Multi)LineString data of street network Returns ------- gdf : GeoDataFrame, GeoSeries See also -------- momepy.extend_lines momepy.close_gaps """ if isinstance(gdf, (gpd.GeoDataFrame, gpd.GeoSeries)): # explode to avoid MultiLineStrings # double reset index due to the bug in GeoPandas explode df = gdf.reset_index(drop=True).explode().reset_index(drop=True) # get underlying pygeos geometry geom = df.geometry.values.data else: geom = gdf # extract array of coordinates and number per geometry coords = pygeos.get_coordinates(geom) indices = pygeos.get_num_coordinates(geom) # generate a list of start and end coordinates and create point geometries edges = [0] i = 0 for ind in indices: ix = i + ind edges.append(ix - 1) edges.append(ix) i = ix edges = edges[:-1] points = pygeos.points(np.unique(coords[edges], axis=0)) # query LineString geometry to identify points intersecting 2 geometries tree = pygeos.STRtree(geom) inp, res = tree.query_bulk(points, predicate="intersects") unique, counts = np.unique(inp, return_counts=True) merge = res[np.isin(inp, unique[counts == 2])] if len(merge) > 0: # filter duplications and create a dictionary with indication of components to # be merged together dups = [ item for item, count in collections.Counter(merge).items() if count > 1 ] split = np.split(merge, len(merge) / 2) components = {} for i, a in enumerate(split): if a[0] in dups or a[1] in dups: if a[0] in components.keys(): i = components[a[0]] elif a[1] in components.keys(): i = components[a[1]] components[a[0]] = i components[a[1]] = i # iterate through components and create new geometries new = [] for c in set(components.values()): keys = [] for item in components.items(): if item[1] == c: keys.append(item[0]) new.append(pygeos.line_merge(pygeos.union_all(geom[keys]))) # remove incorrect geometries and append fixed versions df = df.drop(merge) final = gpd.GeoSeries(new).explode().reset_index(drop=True) if isinstance(gdf, gpd.GeoDataFrame): return df.append( gpd.GeoDataFrame({df.geometry.name: final}, geometry=df.geometry.name), ignore_index=True, ) return df.append(final, ignore_index=True)