def simp(x): print(x) roads_to_keep = [ 'primary', 'primary_link', 'secondary', 'secondary_link', 'tertiary', 'tertiary_link', 'trunk', 'trunk_link', 'motorway', 'motorway_link' ] if not os.path.exists( "/scistor/ivm/data_catalogue/open_street_map/road_networks/" + x + "-edges.feather"): cGDF = roads(filename(x)) cGDF = cGDF.loc[cGDF.highway.isin(roads_to_keep)].reset_index( drop=True) bob = simply.simplified_network(cGDF) a, b = bob.edges, bob.nodes a['geometry'] = pyg.to_wkb(a['geometry']) b['geometry'] = pyg.to_wkb(b['geometry']) feather.write_dataframe( a, "/scistor/ivm/data_catalogue/open_street_map/road_networks/" + x + "-edges.feather") feather.write_dataframe( b, "/scistor/ivm/data_catalogue/open_street_map/road_networks/" + x + "-nodes.feather") print(x + " is done")
def test_to_wkb_3D(): point_z = pygeos.points(1, 1, 1) actual = pygeos.to_wkb(point_z, byte_order=1) # fmt: off assert actual == b"\x01\x01\x00\x00\x80\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?" # noqa # fmt: on actual = pygeos.to_wkb(point_z, output_dimension=2, byte_order=1) assert actual == POINT11_WKB
def test_to_wkb_byte_order(): point = pygeos.points(1.0, 1.0) be = b"\x00" le = b"\x01" point_type = b"\x01\x00\x00\x00" # 1 as 32-bit uint (LE) coord = b"\x00\x00\x00\x00\x00\x00\xf0?" # 1.0 as double (LE) assert pygeos.to_wkb(point, byte_order=1) == le + point_type + 2 * coord assert pygeos.to_wkb(point, byte_order=0) == be + point_type[::-1] + 2 * coord[::-1]
def test_to_wkb_srid(): # hex representation of POINT (0 0) with SRID=4 ewkb = "01010000200400000000000000000000000000000000000000" wkb = "010100000000000000000000000000000000000000" actual = pygeos.from_wkb(ewkb) assert pygeos.to_wkt(actual, trim=True) == "POINT (0 0)" assert pygeos.to_wkb(actual, hex=True, byte_order=1) == wkb assert pygeos.to_wkb(actual, hex=True, include_srid=True, byte_order=1) == ewkb point = pygeos.points(1, 1) point_with_srid = pygeos.set_srid(point, np.int32(4326)) result = pygeos.to_wkb(point_with_srid, include_srid=True, byte_order=1) assert np.frombuffer(result[5:9], "<u4").item() == 4326
def embed_geometries(self, uuid): """Embeds geometries with additional points, equivalent to the original. Retrieves (multi)lines and (multi)polygons, and increase their vertices with some vertices collinear with their neighbours. The resulted geometries could be detected, but they do not alter the original shapes. Arguments: uuid (str): UUID Returns: (geovaex.GeoDataFrame): A new dataframe. """ if not self._has_geometry: raise TypeError('Dataframe does not contain geometries.') df = self._df rs = self._random_state(uuid) indices, rand_geoms = self.get_random_geometries(rs) offset = 0 dfs = [] for i, idx in enumerate(indices): dfs.append(df[offset:idx].trim()) geom = self.transform_geometry(rand_geoms[i], rs) row = df[idx:idx + 1].to_vaex_df() row = geovaex.from_df(row, pa.chunked_array( [pa.array([pg.to_wkb(geom)])]), crs=df.geometry.crs) dfs.append(row) offset = idx + 1 if idx < len(df): dfs.append(df[offset:].trim()) return GeoDataFrameConcatenated(dfs)
def test_to_wkb_hex(): point = pygeos.points(1, 1) actual = pygeos.to_wkb(point, hex=True, byte_order=1) le = "01" point_type = "01000000" coord = "000000000000F03F" # 1.0 as double (LE) assert actual == le + point_type + 2 * coord
def create_final_od_grid(df,height_div): height = numpy.sqrt(pygeos.area(df.geometry)/height_div).values[0] grid = pd.DataFrame(create_grid(create_bbox(df),height),columns=['geometry']) #clip grid of bbox to grid of the actual spatial exterior of the country clip_grid = pygeos.intersection(grid,df.geometry) clip_grid = clip_grid.loc[~pygeos.is_empty(clip_grid.geometry)] # turn to shapely geometries again for zonal stats clip_grid.geometry = pygeos.to_wkb(clip_grid.geometry) clip_grid.geometry = clip_grid.geometry.apply(loads) clip_grid = gpd.GeoDataFrame(clip_grid) # get total population per grid cell clip_grid['tot_pop'] = clip_grid.geometry.apply(lambda x: zonal_stats(x,world_pop,stats="sum")) clip_grid['tot_pop'] = clip_grid['tot_pop'].apply(lambda x: x[0]['sum']) # remove cells in the grid that have no population data clip_grid = clip_grid.loc[~pd.isna(clip_grid.tot_pop)] clip_grid = clip_grid.loc[clip_grid.tot_pop > 100] clip_grid.reset_index(inplace=True,drop=True) clip_grid.geometry = clip_grid.geometry.centroid clip_grid['GID_0'] = GID_0 clip_grid['grid_height'] = height return clip_grid
def plot_results(gdf_in): """[summary] Args: gdf_in ([type]): [description] """ gdf_in['geometry'] = gdf_in.geometry.apply( lambda x: loads(pygeos.to_wkb(x))) gdf_plot = gpd.GeoDataFrame(gdf_in) gdf_plot.crs = 4326 gdf_plot = gdf_plot.to_crs(3857) plt.rcParams['figure.figsize'] = [20, 10] fig, axes = plt.subplots(1, 2) for iter_, ax in enumerate(axes.flatten()): if iter_ == 0: gdf_plot.loc[gdf_plot.flow > 1].plot( ax=ax, column='flow', legend=False, cmap='Reds', linewidth=3) #loc[gdf_plot.flow>1] ctx.add_basemap(ax, source=ctx.providers.Stamen.TonerLite, zoom=15) ax.set_axis_off() ax.set_title('Flows along the network') else: pd.DataFrame( gdf_in.loc[gdf_in.max_flow > 1].groupby('infra_type').sum() ['distance'] / gdf_in.groupby('infra_type').sum()['distance'] ).dropna().sort_values(by='distance', ascending=False).plot(type='bar', color='red', ax=ax) ax.set_ylabel('Percentage of edges > max flow') ax.set_xlabel('Road type')
def to_geofeather(df, path, crs=None): """Serializes a pandas DataFrame containing pygeos geometries to a feather file on disk. IMPORTANT: feather format does not support a non-default index; call reset_index() before using this function. Internally, the geometry data are converted to WKB format. This also creates a .crs file with CRS information for this dataset Parameters ---------- df : pandas.DataFrame path : str path to feather file to write crs : str or dict, optional (default: None) GeoPandas CRS object """ import_optional_dependency("pygeos", extra="pygeos is required for pygeos support.") from pygeos import to_wkb # fetch attribute from Pandas DataFrame if we previously added it there crs = crs or getattr(df, "crs", None) df = DataFrame(df.copy()) df["geometry"] = to_wkb(df.geometry) _to_geofeather(df, path, crs=crs)
def transform(arr, src_crs, tgt_crs): transformer = pyproj.Transformer.from_crs(src_crs, tgt_crs, always_xy=True) geometry = pg.from_wkb(arr) coords = pg.get_coordinates(geometry) new_coords = transformer.transform(coords[:, 0], coords[:, 1]) projected = pg.set_coordinates(geometry, np.array(new_coords).T) return pg.to_wkb(projected)
def drop_duplicate_geometries(gdf, keep='first'): """Drop duplicate geometries from a dataframe """ # convert to wkb so drop_duplicates will work # discussed in https://github.com/geopandas/geopandas/issues/521 mask = gdf.geometry.apply(lambda geom: pygeos.to_wkb(geom)) # use dropped duplicates index to drop from actual dataframe return gdf.iloc[mask.drop_duplicates(keep).index]
def to_wkb(data, hex=False, **kwargs): if compat.USE_PYGEOS: return pygeos.to_wkb(data, hex=hex, **kwargs) if hex: out = [geom.wkb_hex if geom is not None else None for geom in data] else: out = [geom.wkb if geom is not None else None for geom in data] return np.array(out, dtype=object)
def within(self, geometry, chunksize=1000000, max_workers=None): chunks = self.chunked(chunksize) geometry = pg.to_wkb(geometry) if len(chunks) == 1: return self._within_single(geometry) pieces = self._multiprocess(within, chunks, geometry, max_workers=max_workers) return np.concatenate(pieces)
def _pygeos_to_shapely(geom): if geom is None: return None if compat.PYGEOS_SHAPELY_COMPAT: geom = shapely.geos.lgeos.GEOSGeom_clone(geom._ptr) return shapely.geometry.base.geom_factory(geom) # fallback going through WKB if pygeos.is_empty(geom) and pygeos.get_type_id(geom) == 0: # empty point does not roundtrip through WKB return shapely.wkt.loads("POINT EMPTY") else: return shapely.wkb.loads(pygeos.to_wkb(geom))
def get_hash(series): """Calculate hash of each geometry for easy equality check. The hash is based on the WKB of the geometry. Parameters ---------- series : Series contains pygeos geometries Returns ------- Series hash codes for each geometry """ return to_wkb(series).apply(lambda wkb: hash(wkb))
def get_gdp_values(gdf, data_path): """[summary] Args: gdf ([type]): [description] Returns: [type]: [description] """ world_pop = os.path.join(data_path, 'global_gdp', 'GDP_2015.tif') gdf['geometry'] = gdf.geometry.apply(lambda x: loads(pygeos.to_wkb(x))) gdp = list(item['sum'] for item in zonal_stats(gdf.geometry, world_pop, stats="sum")) gdp = [x if x is not None else 0 for x in gdp] gdf['geometry'] = pygeos.from_shapely(gdf.geometry) return gdp
def _convert_to_ewkb(gdf, geom_name, srid): """Convert geometries to ewkb. """ if compat.USE_PYGEOS: from pygeos import set_srid, to_wkb geoms = to_wkb(set_srid(gdf[geom_name].values.data, srid=srid), hex=True, include_srid=True) else: from shapely.wkb import dumps geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]] gdf[geom_name] = geoms return gdf
def trans2wkb4series(s, index=range(0, 0)): if isinstance(index, range): index = range(0, s.size) import pygeos s_arr = [] if not isinstance(s, pd.Series): return None try: for i in range(0, s.size): if not s[i]: s_arr.append(None) else: s_arr.append(pygeos.to_wkb(pygeos.Geometry(s[i]))) s = pd.Series(s_arr, index=index) except: return None return s
def _geometry_from_latlon(table, lat_field, lon_field, crs): """Transforms an arrow to table to spatial arrow table, using lat, lon information. Extracts the lat, lon information from an arrow table, creates the Point geometry and writes the geometry information to the arrow table. Parameters: table (object): The arrow table. lat_field (string): The latitude field name. lon_field (string): The longitude field name. crs (string): The lat, lon CRS. Returns: (object): The arrow spatial table. """ lat = table.column(lat_field) lon = table.column(lon_field) geometry = pg.to_wkb(pg.points(lon, lat)) field = pa.field('geometry', 'binary', metadata={'crs': crs}) table = table.append_column(field, [geometry]) table = table.drop([lat_field, lon_field]) return table
def _convert_to_ewkb(gdf, geom_name, srid): """Convert geometries to ewkb. """ if compat.USE_PYGEOS: from pygeos import set_srid, to_wkb geoms = to_wkb(set_srid(gdf[geom_name].values.data, srid=srid), hex=True, include_srid=True) else: from shapely.wkb import dumps geoms = [dumps(geom, srid=srid, hex=True) for geom in gdf[geom_name]] # The gdf will warn that the geometry column doesn't hold in-memory geometries # now that they are EWKB, so convert back to a regular dataframe to avoid warning # the user that the dtypes are unexpected. df = pd.DataFrame(gdf, copy=False) df[geom_name] = geoms return df
def from_pygeos(geometries): """Converts a Series or ndarray of pygeos geometry objects to a GeoSeries. Parameters ---------- geometries : Series or ndarray of pygeos geometry objects Returns ------- GeoSeries """ def load_wkb(wkb): return loads(wkb) wkb = pg.to_wkb(geometries) if isinstance(geometries, pd.Series): return gp.GeoSeries(wkb.apply(load_wkb)) return gp.GeoSeries(np.vectorize(load_wkb, otypes=[np.object])(wkb))
def _export_table_from_df(df, geometry_col): """Exports a table from a dataframe. Parameters: df (object): A vaex DataFrame. geometry_col (string): The column name containing the geometry. Returns: (object): An arrow spatial table. """ column_names = df.get_column_names(strings=True) arrow_arrays = [] geometry = pg.from_wkt(df[geometry_col].values) geometry = pa.array(pg.to_wkb(geometry)) arrow_arrays.append(geometry) actual_columns = ['geometry'] for column_name in column_names: if column_name == geometry_col: continue arrow_arrays.append(df[column_name].evaluate()) actual_columns.append(column_name) table = pa.Table.from_arrays(arrow_arrays, actual_columns) return table
def constructive(arr, operation, *args, **kwargs): if operation == 'boundary': geometries = pg.boundary(pg.from_wkb(arr), **kwargs) elif operation == 'buffer': geometries = pg.buffer(pg.from_wkb(arr), *args, **kwargs) elif operation == 'build_area': geometries = pg.build_area(pg.from_wkb(arr), **kwargs) elif operation == 'centroid': geometries = pg.centroid(pg.from_wkb(arr), **kwargs) elif operation == 'clip_by_rect': geometries = pg.clip_by_rect(pg.from_wkb(arr), *args, **kwargs) elif operation == 'convex_hull': geometries = pg.convex_hull(pg.from_wkb(arr), **kwargs) elif operation == 'delaunay_triangles': geometries = pg.delaunay_triangles(pg.from_wkb(arr), **kwargs) elif operation == 'envelope': geometries = pg.envelope(pg.from_wkb(arr), **kwargs) elif operation == 'extract_unique_points': geometries = pg.extract_unique_points(pg.from_wkb(arr), **kwargs) elif operation == 'make_valid': geometries = pg.make_valid(pg.from_wkb(arr), **kwargs) elif operation == 'normalize': geometries = pg.normalize(pg.from_wkb(arr), **kwargs) elif operation == 'offset_curve': geometries = pg.offset_curve(pg.from_wkb(arr), *args, **kwargs) elif operation == 'point_on_surface': geometries = pg.point_on_surface(pg.from_wkb(arr), **kwargs) elif operation == 'reverse': geometries = pg.reverse(pg.from_wkb(arr), **kwargs) elif operation == 'simplify': geometries = pg.simplify(pg.from_wkb(arr), *args, **kwargs) elif operation == 'snap': geometries = pg.snap(pg.from_wkb(arr), *args, **kwargs) elif operation == 'voronoi_polygons': geometries = pg.voronoi_polygons(pg.from_wkb(arr), **kwargs) else: warnings.warn(f'Operation {operation} not supported.') return None return pg.to_wkb(geometries)
def _geometry_from_wkt(table, geom, crs): """Transforms an arrow to table to spatial arrow table, using geometry information. Extracts the geometry information from an arrow table, creates the WKB geometry and writes the geometry information to the arrow table. Parameters: table (object): The arrow table. geom (string): The geometry field name. crs (string): The lat, lon CRS. Returns: (object): The arrow spatial table. """ if geom == 'geometry': column_names = table.column_names column_names[column_names.index('geometry')] = 'geometry_' table = table.rename_columns(column_names) geom = 'geometry_' geometry = pg.to_wkb(pg.from_wkt(table.column(geom))) if crs is None: crs = 'EPSG:4326' field = pa.field('geometry', 'binary', metadata={'crs': crs}) table = table.append_column(field, [geometry]) table = table.drop([geom]) return table
def __getstate__(self): return (pygeos.to_wkb(self.data), self._crs)
def time_write_to_wkb(self): pygeos.to_wkb(self.to_write)
def setup(self): self.to_write = pygeos.polygons(np.random.random((10000, 100, 2))) self.to_read_wkt = pygeos.to_wkt(self.to_write) self.to_read_wkb = pygeos.to_wkb(self.to_write)
def global_shapefiles(data_path, regionalized=False, assigned_level=1): """ This function will simplify shapes and add necessary columns, to make further processing more quickly For now, we will make use of the latest GADM data, split by level: https://gadm.org/download_world.html Optional Arguments: *regionalized* : Default is **False**. Set to **True** will also create the global_regions.shp file. """ gadm_path = os.path.join(data_path, 'GADM36', 'gadm36_levels.gpkg') cleaned_shapes_path = os.path.join(data_path, 'cleaned_shapes') if not os.path.exists(cleaned_shapes_path): os.makedirs(cleaned_shapes_path) # path to country GADM file if regionalized == False: # load country file gadm_level0 = pandas.DataFrame( geopandas.read_file(gadm_path, layer='level0')) #convert to pygeos tqdm.pandas(desc='Convert geometries to pygeos') gadm_level0['geometry'] = gadm_level0.geometry.progress_apply( lambda x: pygeos.from_shapely(x)) # remove antarctica, no roads there anyways gadm_level0 = gadm_level0.loc[~gadm_level0['NAME_0']. isin(['Antarctica'])] # remove tiny shapes to reduce size substantially tqdm.pandas(desc='Remove tiny shapes') gadm_level0['geometry'] = gadm_level0.progress_apply( remove_tiny_shapes, axis=1) #simplify geometry tqdm.pandas(desc='Simplify geometry') gadm_level0.geometry = gadm_level0.geometry.progress_apply( lambda x: pygeos.simplify(pygeos.buffer( pygeos.simplify(x, tolerance=0.005, preserve_topology=True), 0.01), tolerance=0.005, preserve_topology=True)) #save to new country file glob_ctry_path = os.path.join(cleaned_shapes_path, 'global_countries.gpkg') tqdm.pandas(desc='Convert geometries back to shapely') gadm_level0.geometry = gadm_level0.geometry.progress_apply( lambda x: loads(pygeos.to_wkb(x))) geopandas.GeoDataFrame(gadm_level0).to_file(glob_ctry_path, layer='level0', driver="GPKG") else: # this is dependent on the country file, so check whether that one is already created: glob_ctry_path = os.path.join(cleaned_shapes_path, 'global_countries.gpkg') if os.path.exists(glob_ctry_path): gadm_level0 = geopandas.read_file(os.path.join(glob_ctry_path), layer='level0') else: print('ERROR: You need to create the country file first') return None # load region file gadm_level_x = pandas.DataFrame( geopandas.read_file(gadm_path, layer='level{}'.format(assigned_level))) #convert to pygeos tqdm.pandas(desc='Convert geometries to pygeos') gadm_level_x['geometry'] = gadm_level_x.geometry.progress_apply( lambda x: pygeos.from_shapely(x)) # remove tiny shapes to reduce size substantially tqdm.pandas(desc='Remove tiny shapes') gadm_level_x['geometry'] = gadm_level_x.progress_apply( remove_tiny_shapes, axis=1) #simplify geometry tqdm.pandas(desc='Simplify geometry') gadm_level_x.geometry = gadm_level_x.geometry.progress_apply( lambda x: pygeos.simplify(pygeos.buffer( pygeos.simplify(x, tolerance=0.005, preserve_topology=True), 0.01), tolerance=0.005, preserve_topology=True)) # add some missing geometries from countries with no subregions get_missing_countries = list( set(list(gadm_level0.GID_0.unique())).difference( list(gadm_level_x.GID_0.unique()))) #TO DO: GID_2 and lower tiers should first be filled by a tier above, rather then by the country file mis_country = gadm_level0.loc[gadm_level0['GID_0'].isin( get_missing_countries)] # if assigned_level == 1: mis_country['GID_1'] = mis_country['GID_0'] + '.' + str( 0) + '_' + str(1) elif assigned_level == 2: mis_country['GID_2'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '_' + str(1) elif assigned_level == 3: mis_country['GID_3'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '.' + str(0) + '_' + str(1) elif assigned_level == 4: mis_country['GID_4'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '.' + str(0) + '.' + str(0) + '_' + str(1) elif assigned_level == 5: mis_country['GID_5'] = mis_country['GID_0'] + '.' + str( 0) + '.' + str(0) + '.' + str(0) + '.' + str(0) + '.' + str( 0) + '_' + str(1) tqdm.pandas(desc='Convert geometries back to shapely') gadm_level_x.geometry = gadm_level_x.geometry.progress_apply( lambda x: loads(pygeos.to_wkb(x))) # concat missing country to gadm levels gadm_level_x = geopandas.GeoDataFrame( pandas.concat([gadm_level_x, mis_country], ignore_index=True)) gadm_level_x.reset_index(drop=True, inplace=True) #save to new country file gadm_level_x.to_file(os.path.join(cleaned_shapes_path, 'global_regions.gpkg'), layer='level{}'.format(assigned_level), driver="GPKG")
def test_to_wkb_point_empty_srid(): expected = pygeos.set_srid(empty_point, 4236) wkb = pygeos.to_wkb(expected, include_srid=True) actual = pygeos.from_wkb(wkb) assert pygeos.get_srid(actual) == 4236
def test_to_wkb_point_empty_post_geos38(geom, dims, expected): # Post GEOS 3.8: empty point is 2D actual = pygeos.to_wkb(geom, output_dimension=dims, byte_order=1) # Use numpy.isnan; there are many byte representations for NaN assert actual[:-2 * 8] == expected[:-2 * 8] assert np.isnan(struct.unpack("<2d", actual[-2 * 8:])).all()