def test_simplify_nan(): actual = pygeos.simplify( np.array([point, np.nan, np.nan, None, point]), np.array([np.nan, 1.0, np.nan, 1.0, 1.0]), ) assert pygeos.equals(actual[-1], point) assert pygeos.is_empty(actual[:-1]).all()
def simplify(data, tolerance, preserve_topology=True): if compat.USE_PYGEOS: # preserve_topology has different default as pygeos! return pygeos.simplify(data, tolerance, preserve_topology=preserve_topology) else: # method and not a property -> can't use _unary_geo out = np.empty(len(data), dtype=object) out[:] = [ geom.simplify(tolerance, preserve_topology=preserve_topology) for geom in data ] return out
def constructive(arr, operation, *args, **kwargs): if operation == 'boundary': geometries = pg.boundary(pg.from_wkb(arr), **kwargs) elif operation == 'buffer': geometries = pg.buffer(pg.from_wkb(arr), *args, **kwargs) elif operation == 'build_area': geometries = pg.build_area(pg.from_wkb(arr), **kwargs) elif operation == 'centroid': geometries = pg.centroid(pg.from_wkb(arr), **kwargs) elif operation == 'clip_by_rect': geometries = pg.clip_by_rect(pg.from_wkb(arr), *args, **kwargs) elif operation == 'convex_hull': geometries = pg.convex_hull(pg.from_wkb(arr), **kwargs) elif operation == 'delaunay_triangles': geometries = pg.delaunay_triangles(pg.from_wkb(arr), **kwargs) elif operation == 'envelope': geometries = pg.envelope(pg.from_wkb(arr), **kwargs) elif operation == 'extract_unique_points': geometries = pg.extract_unique_points(pg.from_wkb(arr), **kwargs) elif operation == 'make_valid': geometries = pg.make_valid(pg.from_wkb(arr), **kwargs) elif operation == 'normalize': geometries = pg.normalize(pg.from_wkb(arr), **kwargs) elif operation == 'offset_curve': geometries = pg.offset_curve(pg.from_wkb(arr), *args, **kwargs) elif operation == 'point_on_surface': geometries = pg.point_on_surface(pg.from_wkb(arr), **kwargs) elif operation == 'reverse': geometries = pg.reverse(pg.from_wkb(arr), **kwargs) elif operation == 'simplify': geometries = pg.simplify(pg.from_wkb(arr), *args, **kwargs) elif operation == 'snap': geometries = pg.snap(pg.from_wkb(arr), *args, **kwargs) elif operation == 'voronoi_polygons': geometries = pg.voronoi_polygons(pg.from_wkb(arr), **kwargs) else: warnings.warn(f'Operation {operation} not supported.') return None return pg.to_wkb(geometries)
def test_simplify(): line = pygeos.linestrings([[0, 0], [0.1, 1], [0, 2]]) actual = pygeos.simplify(line, [0, 1.0]) assert pygeos.get_num_points(actual).tolist() == [3, 2]
def global_shapefiles(data_path, regionalized=False, assigned_level=1): """ This function will simplify shapes and add necessary columns, to make further processing more quickly For now, we will make use of the latest GADM data, split by level: https://gadm.org/download_world.html Optional Arguments: *regionalized* : Default is **False**. Set to **True** will also create the global_regions.shp file. """ gadm_path = os.path.join(data_path, 'GADM36', 'gadm36_levels.gpkg') cleaned_shapes_path = os.path.join(data_path, 'cleaned_shapes') if not os.path.exists(cleaned_shapes_path): os.makedirs(cleaned_shapes_path) # path to country GADM file if regionalized == False: # load country file gadm_level0 = pandas.DataFrame( geopandas.read_file(gadm_path, layer='level0')) #convert to pygeos tqdm.pandas(desc='Convert geometries to pygeos') gadm_level0['geometry'] = gadm_level0.geometry.progress_apply( lambda x: pygeos.from_shapely(x)) # remove antarctica, no roads there anyways gadm_level0 = gadm_level0.loc[~gadm_level0['NAME_0']. isin(['Antarctica'])] # remove tiny shapes to reduce size substantially tqdm.pandas(desc='Remove tiny shapes') gadm_level0['geometry'] = gadm_level0.progress_apply( remove_tiny_shapes, axis=1) #simplify geometry tqdm.pandas(desc='Simplify geometry') gadm_level0.geometry = gadm_level0.geometry.progress_apply( lambda x: pygeos.simplify(pygeos.buffer( pygeos.simplify(x, tolerance=0.005, preserve_topology=True), 0.01), tolerance=0.005, preserve_topology=True)) #save to new country file glob_ctry_path = os.path.join(cleaned_shapes_path, 'global_countries.gpkg') tqdm.pandas(desc='Convert geometries back to shapely') gadm_level0.geometry = gadm_level0.geometry.progress_apply( lambda x: loads(pygeos.to_wkb(x))) geopandas.GeoDataFrame(gadm_level0).to_file(glob_ctry_path, layer='level0', driver="GPKG") else: # this is dependent on the country file, so check whether that one is already created: glob_ctry_path = os.path.join(cleaned_shapes_path, 'global_countries.gpkg') if os.path.exists(glob_ctry_path): gadm_level0 = geopandas.read_file(os.path.join(glob_ctry_path), layer='level0') else: print('ERROR: You need to create the country file first') return None # load region file gadm_level_x = pandas.DataFrame( geopandas.read_file(gadm_path, layer='level{}'.format(assigned_level))) #convert to pygeos tqdm.pandas(desc='Convert geometries to pygeos') gadm_level_x['geometry'] = gadm_level_x.geometry.progress_apply( lambda x: pygeos.from_shapely(x)) # remove tiny shapes to reduce size substantially tqdm.pandas(desc='Remove tiny shapes') gadm_level_x['geometry'] = gadm_level_x.progress_apply( remove_tiny_shapes, axis=1) #simplify geometry tqdm.pandas(desc='Simplify geometry') gadm_level_x.geometry = gadm_level_x.geometry.progress_apply( lambda x: pygeos.simplify(pygeos.buffer( pygeos.simplify(x, tolerance=0.005, preserve_topology=True), 0.01), tolerance=0.005, preserve_topology=True)) # add some missing geometries from countries with no subregions get_missing_countries = list( set(list(gadm_level0.GID_0.unique())).difference( list(gadm_level_x.GID_0.unique()))) #TO DO: GID_2 and lower tiers should first be filled by a tier above, rather then by the country file mis_country = gadm_level0.loc[gadm_level0['GID_0'].isin( get_missing_countries)] # if assigned_level == 1: mis_country['GID_1'] = mis_country['GID_0'] + '.' + str( 0) + '_' + str(1) elif assigned_level == 2: mis_country['GID_2'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '_' + str(1) elif assigned_level == 3: mis_country['GID_3'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '.' + str(0) + '_' + str(1) elif assigned_level == 4: mis_country['GID_4'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '.' + str(0) + '.' + str(0) + '_' + str(1) elif assigned_level == 5: mis_country['GID_5'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '.' + str(0) + '.' + str(0) + '.' + str( 0) + '_' + str(1) tqdm.pandas(desc='Convert geometries back to shapely') gadm_level_x.geometry = gadm_level_x.geometry.progress_apply( lambda x: loads(pygeos.to_wkb(x))) # concat missing country to gadm levels gadm_level_x = geopandas.GeoDataFrame( pandas.concat([gadm_level_x, mis_country], ignore_index=True)) gadm_level_x.reset_index(drop=True, inplace=True) #save to new country file gadm_level_x.to_file(os.path.join(cleaned_shapes_path, 'global_regions.gpkg'), layer='level{}'.format(assigned_level), driver="GPKG")
def dissolve_waterbodies(df, joins): """Dissolve waterbodies that overlap, duplicate, or otherwise touch each other. WARNING: some adjacent waterbodies are divided by dams, etc. These will need to be accounted for later when snapping dams. Parameters ---------- df : GeoDataFrame waterbodies joins : DataFrame waterbody / flowline joins Returns ------- tuple of (GeoDataFrame, DataFrame) (waterbodies, waterbody joins) """ ### Join waterbodies to themselves to find overlaps start = time() to_agg = pd.DataFrame(sjoin(df.geometry, df.geometry)) # drop the self-intersections to_agg = to_agg.loc[to_agg.index != to_agg.index_right].copy() print("Found {:,} waterbodies that touch or overlap".format( len(to_agg.index.unique()))) if len(to_agg): # Use network (mathematical, not aquatic) adjacency analysis # to identify all sets of waterbodies that touch. # Construct an identity map from all wbIDs to their newID (will be new wbID after dissolve) grouped = to_agg.groupby(level=0).index_right.unique() network = nx.from_pandas_edgelist( grouped.explode().reset_index().rename(columns={ "wbID": "index", "index_right": "wbID" }), "index", "wbID", ) components = pd.Series(nx.connected_components(network)).apply(list) groups = pd.DataFrame(components.explode().rename("wbID")) next_id = df.index.max() + 1 groups["group"] = (next_id + groups.index).astype("uint32") groups = groups.set_index("wbID") # assign group to polygons to aggregate to_agg = (to_agg.join(groups).reset_index().drop( columns=["index_right"]).drop_duplicates().set_index("wbID").join( df[["geometry", "FType"]])) ### Dissolve groups # Buffer geometries slightly to make sure that any which intersect actually overlap print("Buffering {:,} unique waterbodies before dissolving...".format( len(to_agg))) buffer_start = time() # TODO: use pg, and simplify since this creates a large number of vertices by default to_agg["geometry"] = pg.simplify( pg.buffer(to_agg.geometry, 0.1, quadsegs=1), 0.1) print("Buffer completed in {:.2f}s".format(time() - buffer_start)) print("Dissolving...") dissolve_start = time() # NOTE: automatically takes the first FType # dissolved = to_agg.dissolve(by="group").reset_index(drop=True) dissolved = dissolve(to_agg, by="group") errors = (pg.get_type_id( dissolved.geometry) == pg.GeometryType.MULTIPOLYGON.value) if errors.max(): print( "WARNING: Dissolve created {:,} multipolygons, these will cause errors later!" .format(errors.sum())) # this may create multipolygons if polygons that are dissolved don't sufficiently share overlapping geometries. # for these, we want to retain them as individual polygons # dissolved = dissolved.explode().reset_index(drop=True) # WARNING: this doesn't work with our logic below for figuring out groups associated with original wbIDs # since after exploding, we don't know what wbID went into what group # assign new IDs and update fields next_id = df.index.max() + 1 dissolved["wbID"] = (next_id + dissolved.index).astype("uint32") dissolved["AreaSqKm"] = (pg.area(dissolved.geometry) * 1e-6).astype("float32") dissolved["NHDPlusID"] = 0 dissolved.NHDPlusID = dissolved.NHDPlusID.astype("uint64") dissolved.wbID = dissolved.wbID.astype("uint32") print( "Dissolved {:,} adjacent polygons into {:,} new polygons in {:.2f}s" .format(len(to_agg), len(dissolved), time() - dissolve_start)) # remove waterbodies that were dissolved, and append the result # of the dissolve df = (df.loc[~df.index.isin(to_agg.index)].reset_index().append( dissolved, ignore_index=True, sort=False).set_index("wbID")) # update joins ix = joins.loc[joins.wbID.isin(groups.index)].index # NOTE: this mapping will not work if explode() is used above joins.loc[ix, "wbID"] = joins.loc[ix].wbID.map(groups.group) # Group together ones that were dissolved above joins = joins.drop_duplicates().reset_index(drop=True) print("Done resolving overlapping waterbodies in {:.2f}s".format(time() - start)) return df, joins
minzoom, maxzoom = level["zoom"] simplification = level["simplification"] size_threshold = level["size"] print( f"Extracting size class >= {size_threshold} for zooms {minzoom} - {maxzoom} (simplifying to {simplification})" ) subset = flowlines.loc[ flowlines.sizeclass >= size_threshold].copy() if maxzoom < 8: # exclude altered flowlines at low zooms subset = subset.loc[subset.mapcode < 2].copy() if simplification: subset["geometry"] = pg.simplify(subset.geometry.values.data, simplification) json_filename = tmp_dir / f"region{huc2}_{minzoom}_{maxzoom}_flowlines.json" mbtiles_filename = ( tmp_dir / f"region{huc2}_{minzoom}_{maxzoom}_flowlines.mbtiles") mbtiles_files.append(mbtiles_filename) write_dataframe( subset.to_crs(GEO_CRS), json_filename, driver="GeoJSONSeq", ) del subset