def set_default_and_na_edge_noises(graph: ig.Graph, data_extent: Polygon, log: Logger) -> None: """Sets noise attributes of edges to their default values and None outside the extent of the noise data. """ # first set noise attributes of all edges as nodata graph.es[E.noises.value] = None graph.es[E.noise_source.value] = None edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig]) data_extent_gdf = gpd.GeoDataFrame(data=[{ 'has_noise_data': 1 }], geometry=[data_extent], crs=CRS.from_epsg(3879)) joined = gpd.sjoin(edge_gdf, data_extent_gdf, how='left', op='within').drop(['index_right'], axis=1) edges_within = joined[joined['has_noise_data'] == 1] real_edge_count = len([ geom for geom in list(edge_gdf['geometry']) if isinstance(geom, LineString) ]) log.info( f'found {real_edge_count - len(edges_within)} edges of {real_edge_count} outside noise data extent' ) # set noise attributes of edges within the data extent to default values (no noise) for edge in edges_within.itertuples(): graph.es[getattr(edge, E.id_ig.name)][E.noises.value] = {} graph.es[getattr(edge, E.id_ig.name)][E.noise_source.value] = ''
def __get_edge_gdf(self): edge_gdf = ig_utils.get_edge_gdf(self.graph, attrs=[E.id_way], drop_na_geoms=True) # drop edges with identical geometry edge_gdf = edge_gdf.drop_duplicates(E.id_way.name) edge_gdf = edge_gdf[[E.geometry.name]] self.log.info(f'Added {len(edge_gdf)} edges to edge_gdf') return edge_gdf
def test_add_sampling_points(self): graph = ig_utils.read_graphml('data/test_graph.graphml') gdf = ig_utils.get_edge_gdf(graph) # start_time = time.time() gdf = utils.add_sampling_points_to_gdf(gdf, 2) # log.duration(start_time, 'added sampling points') sampling_points_list = list(gdf['sampling_points']) self.assertEqual( len([sps for sps in sampling_points_list if sps != None]), 3522) self.assertEqual( len([sps for sps in sampling_points_list if sps == None]), 180) # test that all sample points are on the line geometries for edge in gdf.itertuples(): sampling_points = getattr(edge, 'sampling_points') if (sampling_points == None): continue line_geom = getattr(edge, 'geometry') for sp in sampling_points: self.assertAlmostEqual(sp.distance(line_geom), 0, 5) # validate sampling point gdf (exploaded from edge gdf with sampling points) sampling_gdf = utils.explode_sampling_point_gdf(gdf, 'sampling_points') self.assertGreater(len(sampling_gdf), len(gdf)) self.assertEqual(len(sampling_gdf), 58554) # check that the total representative length of each set of sampling points equals the length of the respective edge sps_by_edge = sampling_gdf.groupby('edge_id') for edge in gdf.itertuples(): if (edge.sampling_points != None): edge_sps = sps_by_edge.get_group(edge.Index) sampling_length_sum = edge_sps['sample_len'].sum() self.assertAlmostEqual(sampling_length_sum, edge.geometry.length, 5)
def test_joins_noises_to_graph_edges(): graph = ig_utils.read_graphml(f'{base_dir}/data/test_graph.graphml') edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig, E.length]) edge_gdf[E.id_ig.name] = edge_gdf.index # read noise data noise_layer_names = [layer for layer in fiona.listlayers(f'{base_dir}/data/noise_data_processed.gpkg')] noise_layers = {name: gpd.read_file(f'{base_dir}/data/noise_data_processed.gpkg', layer=name) for name in noise_layer_names} noise_layers = {name: gdf.rename(columns={'db_low': name}) for name, gdf in noise_layers.items()} # read nodata zone: narrow area between noise surfaces of different municipalities nodata_layer = gpd.read_file(f'{base_dir}/data/extents.gpkg', layer='municipal_boundaries') edge_noises = noise_graph_join.noise_graph_join( edge_gdf=edge_gdf, sampling_interval=3, noise_layers=noise_layers, nodata_layer=nodata_layer ) assert edge_noises[E.id_ig.name].nunique() == 3522 edge_noises_df = pd.merge(edge_gdf, edge_noises, how='inner', on=E.id_ig.name) edge_noises_df['total_noise_len'] = [round(sum(noises.values()), 4) for noises in edge_noises_df['noises']] def validate_edge_noises(row): assert round(row['total_noise_len'], 1) <= round(row['length'], 1) edge_noises_df.apply(lambda row: validate_edge_noises(row), axis=1) assert round(edge_noises_df['total_noise_len'].mean(), 2) == 33.20 # test frequency of different main noise sources noise_sources = dict(Counter(list(edge_noises_df[E.noise_source.name]))) assert noise_sources == {'road': 2322, 'train': 1198, '': 2}
def test_adds_sampling_points_to_edge_gdf(): graph = ig_utils.read_graphml(f'{base_dir}/data/test_graph.graphml') gdf = ig_utils.get_edge_gdf(graph) # start_time = time.time() gdf = noise_join_utils.add_sampling_points_to_gdf(gdf, 2) # log.duration(start_time, 'added sampling points') sampling_points_list = list(gdf['sampling_points']) assert len([sps for sps in sampling_points_list if sps != None]) == 3522 assert len([sps for sps in sampling_points_list if sps == None]) == 180 # test that all sample points are on the line geometries for edge in gdf.itertuples(): sampling_points = getattr(edge, 'sampling_points') if not sampling_points: continue line_geom = getattr(edge, 'geometry') for sp in sampling_points: assert round(sp.distance(line_geom), 1) == 0, 5 # validate sampling point gdf (exploaded from edge gdf with sampling points) sampling_gdf = noise_join_utils.explode_sampling_point_gdf(gdf, 'sampling_points') assert len(sampling_gdf) > len(gdf) assert len(sampling_gdf) == 58554 # check that the total representative length of each set of sampling points equals the length of the respective edge sps_by_edge = sampling_gdf.groupby('edge_id') for edge in gdf.itertuples(): if (edge.sampling_points != None): edge_sps = sps_by_edge.get_group(edge.Index) sampling_length_sum = edge_sps['sample_len'].sum() assert round(sampling_length_sum, 2) == round(edge.geometry.length, 2)
def test_graph_to_gdf(self): graph = ig_utils.read_graphml('data/test_graph.graphml', log=Logger(printing=True)) # test read graph to wgs gdf gdf = ig_utils.get_edge_gdf(graph, id_attr=Edge.id_ig, attrs=[Edge.length], geom_attr=Edge.geom_wgs) gdf['geom_length'] = [geom.length for geom in gdf[Edge.geom_wgs.name]] self.assertAlmostEqual(gdf['geom_length'].mean(), 0.000429, 6) # test read to projected gdf gdf = ig_utils.get_edge_gdf(graph, id_attr=Edge.id_ig, attrs=[Edge.length], geom_attr=Edge.geometry) gdf['geom_length'] = [geom.length for geom in gdf[Edge.geometry.name]] self.assertAlmostEqual(gdf['geom_length'].mean(), 31.65, 2)
def test_gets_graph_data_as_gdf(): graph = ig_utils.read_graphml(conf.igraph_out_file) # test read graph to wgs gdf gdf = ig_utils.get_edge_gdf(graph, id_attr=Edge.id_ig, attrs=[Edge.length], geom_attr=Edge.geom_wgs, drop_na_geoms=True) gdf['geom_length'] = [geom.length for geom in gdf[Edge.geom_wgs.name]] assert round(gdf['geom_length'].mean(), 6) == 0.000451 # test read to projected gdf gdf = ig_utils.get_edge_gdf(graph, id_attr=Edge.id_ig, attrs=[Edge.length], geom_attr=Edge.geometry, drop_na_geoms=True) gdf['geom_length'] = [geom.length for geom in gdf[Edge.geometry.name]] assert round(gdf['geom_length'].mean(), 2) == 33.27
def main(conf: GraphNoiseJoinConf): graph = ig_utils.read_graphml(conf.graph_in_fp) log.info(f'read graph of {graph.ecount()} edges') edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig]) edge_gdf = edge_gdf.sort_values(E.id_ig.name) # read noise data noise_layer_names = [ layer for layer in fiona.listlayers(conf.noise_data_fp) ] noise_layers = { name: gpd.read_file(conf.noise_data_fp, layer=name) for name in noise_layer_names } noise_layers = { name: gdf.rename(columns={'db_low': name}) for name, gdf in noise_layers.items() } log.info(f'read {len(noise_layers)} noise layers') # read nodata zone: narrow area between noise surfaces of different municipalities nodata_layer = gpd.read_file(conf.nodata_fp, layer=conf.nodata_layer_name) # process chunks of edges together by dividing gdf to parts processing_size = 50000 split_gdf_count = math.ceil(len(edge_gdf) / processing_size) gdfs = np.array_split(edge_gdf, split_gdf_count) # get max id of previously processed edges max_processed_id = get_previously_processed_max_id(conf.noise_data_csv_dir) if max_processed_id > 0: log.info( f'found previously processed edges up to edge id {max_processed_id}' ) for idx, gdf in enumerate(gdfs): if gdf[E.id_ig.name].max() <= max_processed_id: log.info( f'skipping {idx+1} of {len(gdfs)} edge gdfs (processed before)' ) continue else: log.info(f'processing {idx+1} of {len(gdfs)} edge gdfs') edge_noises = noise_graph_join( edge_gdf=gdf, sampling_interval=3, noise_layers=noise_layers, nodata_layer=nodata_layer, b_debug=False, debug_gpkg='debug/noise_join_debug.gpkg') export_edge_noise_csv(edge_noises, conf.noise_data_csv_dir)
def get_sampling_point_gdf_from_graph(graph) -> GeoDataFrame: """Creates GeoDataFrame of edges of the graph. Filters out null geometries and adds point geometries to be used as sampling points. """ edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig, E.id_way], geom_attr=E.geom_wgs) # filter out edges with null geometry edge_gdf = edge_gdf[edge_gdf[E.geom_wgs.name].apply(lambda x: isinstance(x, LineString))] edge_gdf['point_geom'] = [ geom.interpolate(0.5, normalized=True) for geom in edge_gdf[E.geom_wgs.name] ] return edge_gdf
def create_geojson(graph: ig.Graph) -> dict: df = ig_utils.get_edge_gdf(graph, attrs=[E.id_way, E.length, E.noises, E.gvi], geom_attr=E.geom_wgs) # drop edges without geometry df = df[df[E.geom_wgs.name].apply( lambda geom: isinstance(geom, LineString))] # drop edges with duplicate geometry df = df.drop_duplicates(E.id_way.name) df[E.noises.name] = df.apply( lambda x: __update_db_40_exp(x[E.noises.name], x[E.length.name]), axis=1) df['db'] = df.apply( lambda x: __get_mean_noise_level(x[E.noises.name], x[E.length.name]), axis=1) df['db'] = [__get_noise_range(db) for db in df['db']] # simplify geometries for vector tiles df[E.geom_wgs.name] = [ geom.simplify(0.00005, preserve_topology=True) for geom in df[E.geom_wgs.name] ] df['coords'] = [__get_coord_list(geom) for geom in df[E.geom_wgs.name]] return __as_geojson_feature_collection( df[[E.id_way.name, 'coords', 'db', E.gvi.name]].to_dict('records'))
graph_file_in = r'graph_in/kumpula.graphml' if subset else r'graph_in/hma.graphml' graph_file_out = r'graph_out/kumpula.graphml' if subset else r'graph_out/hma.graphml' edge_table_db_name = 'edge_buffers_subset' if subset else 'edge_buffers' execute_sql = db.get_sql_executor(log) db_tables = db.get_db_table_names(execute_sql) # load GSV GVI points from GPKG gsv_gvi_gdf = load_gsv_gvi_gdf(r'data/greenery_points.gpkg') # load street network graph from GraphML graph = ig_utils.read_graphml(graph_file_in) log.info(f'Read graph of {graph.ecount()} edges') # load edge_gdf edge_gdf: GeoDataFrame = ig_utils.get_edge_gdf( graph, attrs=[E.id_ig, E.length, E.id_way]) edge_gdf = edge_gdf.drop_duplicates(E.id_way.name, keep='first') # drop edges without geometry edge_gdf = edge_gdf[edge_gdf['geometry'].apply( lambda geom: isinstance(geom, LineString))] log.info(f'Subset edge_gdf to {len(edge_gdf)} unique geometries') # export edges to db if not there yet for land cover overlay analysis if edge_table_db_name not in db_tables: # add simplified buffers to edge_gdf edges_2_db = edge_gdf.copy() log.info(f'Calculating 30m buffers from edge geometries') edges_2_db['b30'] = [ geom.buffer(30, resolution=3) for geom in edges_2_db['geometry'] ] edges_2_db = edges_2_db.rename(columns={
edge_gdf['uv'] = edge_gdf.apply(lambda x: (x['source'], x['target']), axis=1) graph.es[E.uv.value] = list(edge_gdf['uv']) def set_way_ids(graph, edge_gdf): edge_gdf['way_id'] = edge_gdf.apply( lambda x: str(round(x['length'], 1)) + str(sorted(x['uv'])), axis=1) way_ids = list(edge_gdf['way_id'].unique()) way_ids_d = {way_id: idx for idx, way_id in enumerate(way_ids)} edge_gdf['way_id'] = [way_ids_d[way_id] for way_id in edge_gdf['way_id']] graph.es[E.id_way.value] = list(edge_gdf['way_id']) edge_gdf = ig_utils.get_edge_gdf( graph, attrs=[E.id_ig, E.length, E.bike_safety_factor], ig_attrs=['source', 'target']) set_biking_lengths(graph, edge_gdf) set_uv(graph, edge_gdf) set_way_ids(graph, edge_gdf) # set combined GVI to GVI attribute & export graph graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_veg.value]) ig_utils.export_to_graphml(graph, out_graph, n_attrs=out_node_attrs, e_attrs=out_edge_attrs) # create GeoJSON files for vector tiles geojson = utils.create_geojson(graph)
def convert_otp_graph_to_igraph( node_csv_file: str, edge_csv_file: str, hma_poly_file: str, igraph_out_file: str, b_export_otp_data_to_gpkg: bool = False, b_export_decomposed_igraphs_to_gpkg: bool = False, b_export_final_graph_to_gpkg: bool = False, debug_otp_graph_gpkg: str = 'debug/otp_graph_features.gpkg', debug_igraph_gpkg: str = 'debug/otp2igraph_features.gpkg', ) -> ig.Graph: hma_poly = geom_utils.project_geom(gpd.read_file(hma_poly_file)['geometry'][0]) # 1) read nodes nodes from CSV n = pd.read_csv(node_csv_file, sep=';') log.info(f'read {len(n.index)} nodes') log.debug(f'node column types: {n.dtypes}') log.debug(f'nodes head: {n.head()}') log.info('creating node gdf') n[Node.geometry.name] = [ shapely.wkt.loads(geom) if isinstance(geom, str) else Point() for geom in n[Node.geometry.name] ] n[Node.geom_wgs.name] = n[Node.geometry.name] n = gpd.GeoDataFrame(n, geometry=Node.geometry.name, crs=CRS.from_epsg(4326)) log.info('reprojecting nodes to etrs') n = n.to_crs(epsg=gp_conf.proj_crs_epsg) log.debug(f'nodes head: {n.head()}') # 2) read edges from CSV e = pd.read_csv(edge_csv_file, sep=';') log.info(f'read {len(e.index)} edges') log.debug(f'edge column types: {e.dtypes}') log.debug(f'edges head: {e.head()}') log.info('creating edge gdf') e[Edge.geometry.name] = [ shapely.wkt.loads(geom) if isinstance(geom, str) else LineString() for geom in e[Edge.geometry.name] ] e[Edge.geom_wgs.name] = e[Edge.geometry.name] e = gpd.GeoDataFrame(e, geometry=Edge.geometry.name, crs=CRS.from_epsg(4326)) log.info('reprojecting edges to etrs') e = e.to_crs(epsg=gp_conf.proj_crs_epsg) log.debug(f'edges head: {e.head()}') # 3) export graph data to gpkg if b_export_otp_data_to_gpkg: log.info('writing otp graph data to gpkg') e.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='edges', driver='GPKG') log.info(f'exported edges to {debug_otp_graph_gpkg} (layer=edges)') n.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='nodes', driver='GPKG') log.info(f'exported nodes to {debug_otp_graph_gpkg} (layer=nodes)') # 4) filter out edges that are unsuitable for both walking and cycling def filter_df_by_query(df: pd.DataFrame, query: str, name: str = 'rows'): count_before = len(df.index) df_filt = df.query(query).copy() filt_ratio = (count_before-len(df_filt.index)) / count_before log.info(f'filtered out {count_before-len(df_filt.index)} {name} ({round(filt_ratio * 100, 1)} %) by {query}') return df_filt e_filt = filter_df_by_query(e, f'{Edge.allows_walking.name} == True or {Edge.allows_biking.name} == True', name='edges') e_filt = filter_df_by_query(e_filt, f'{Edge.is_no_thru_traffic.name} == False', name='edges') # 5) create a dictionaries for converting otp ids to ig ids and vice versa log.debug('create maps for converting otp ids to ig ids') n[Node.id_ig.name] = np.arange(len(n.index)) ids_otp_ig = {} ids_ig_otp = {} for node in n.itertuples(): ids_otp_ig[getattr(node, Node.id_otp.name)] = getattr(node, Node.id_ig.name) ids_ig_otp[getattr(node, Node.id_ig.name)] = getattr(node, Node.id_otp.name) # 6) add nodes to graph log.info('adding nodes to graph') G = ig.Graph(directed=True) G.add_vertices(len(n.index)) for attr in Node: if attr.name in n.columns: G.vs[attr.value] = list(n[attr.name]) else: log.warning(f'node column {attr.name} not present in dataframe') # 7) add edges to graph log.info('adding edges to graph') # get edge lengths by projected geometry e_filt[Edge.length.name] = [ round(geom.length, 4) if isinstance(geom, LineString) else 0.0 for geom in e_filt[Edge.geometry.name] ] def get_ig_uv(edge): return (ids_otp_ig[edge['node_orig_id']], ids_otp_ig[edge['node_dest_id']]) e_filt['uv_ig'] = e_filt.apply(lambda row: get_ig_uv(row), axis=1) e_filt[Edge.id_ig.name] = np.arange(len(e_filt.index)) G.add_edges(list(e_filt['uv_ig'])) for attr in Edge: if attr.name in e_filt.columns: G.es[attr.value] = list(e_filt[attr.name]) else: log.warning(f'edge column {attr.name} not present in dataframe') # 8) delete edges outside Helsinki Metropolitan Area (HMA) hma_buffered = hma_poly.buffer(100) def intersects_hma(geom: Union[LineString, None]): if not geom or geom.is_empty: return True return geom.intersects(hma_buffered) e_gdf = ig_utils.get_edge_gdf(G) log.info('finding edges that intersect with HMA') e_gdf['in_hma'] = [intersects_hma(line) for line in e_gdf[Edge.geometry.name]] e_gdf_del = e_gdf.query('in_hma == False').copy() out_ratio = round(100 * len(e_gdf_del.index)/len(e_gdf.index), 1) log.info(f'found {len(e_gdf_del.index)} ({out_ratio} %) edges outside HMA') log.info('deleting edges') before_count = G.ecount() G.delete_edges(e_gdf_del.index.tolist()) after_count = G.ecount() log.info(f'deleted {before_count-after_count} edges') # check if id_ig:s need to be updated to edge attributes mismatch_count = len( [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index] ) log.info(f'invalid edge ids: {mismatch_count}') # reassign igraph indexes to edge and node attributes G.es[Edge.id_ig.value] = [e.index for e in G.es] G.vs[Node.id_ig.value] = [v.index for v in G.vs] # check if id_ig:s need to be updated to edge attributes mismatch_count = len( [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index] ) log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)') # 9) find and inspect subgraphs by decomposing the graph sub_graphs = G.decompose(mode='STRONG') log.info(f'found {len(sub_graphs)} subgraphs') graph_sizes = [graph.ecount() for graph in sub_graphs] log.info(f'subgraphs with more than 10 edges: {len([s for s in graph_sizes if s > 10])}') log.info(f'subgraphs with more than 50 edges: {len([s for s in graph_sizes if s > 50])}') log.info(f'subgraphs with more than 100 edges: {len([s for s in graph_sizes if s > 100])}') log.info(f'subgraphs with more than 500 edges: {len([s for s in graph_sizes if s > 500])}') log.info(f'subgraphs with more than 10000 edges: {len([s for s in graph_sizes if s > 10000])}') small_graphs = [graph for graph in sub_graphs if graph.ecount() <= 15] medium_graphs = [graph for graph in sub_graphs if (graph.ecount() > 15 and graph.ecount() <= 500)] big_graphs = [graph for graph in sub_graphs if graph.ecount() > 500] small_graph_edges = [] for graph_id, graph in enumerate(small_graphs): edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry]) for edge in edges: edge['graph_id'] = graph_id small_graph_edges.extend(edges) medium_graph_edges = [] for graph_id, graph in enumerate(medium_graphs): edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry]) for edge in edges: edge['graph_id'] = graph_id medium_graph_edges.extend(edges) big_graph_edges = [] for graph_id, graph in enumerate(big_graphs): edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry]) for edge in edges: edge['graph_id'] = graph_id big_graph_edges.extend(edges) if b_export_decomposed_igraphs_to_gpkg: log.info('exporting subgraphs to gpkg') # graphs with <= 15 edges small_graph_edges_gdf = gpd.GeoDataFrame(small_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg)) small_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='small_graph_edges', driver='GPKG') # graphs with 15–500 edges medium_graph_edges_gdf = gpd.GeoDataFrame(medium_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg)) medium_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='medium_graph_edges', driver='GPKG') # graphs with > 500 edges big_graph_edges_gdf = gpd.GeoDataFrame(big_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg)) big_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='big_graph_edges', driver='GPKG') log.info('graphs exported') # 10) delete smallest subgraphs from the graph del_edge_ids = [edge[Edge.id_ig.name] for edge in small_graph_edges] log.info(f'deleting {len(del_edge_ids)} isolated edges') before_count = G.ecount() G.delete_edges(del_edge_ids) after_count = G.ecount() del_ratio = round(100 * (before_count-after_count) / before_count, 1) log.info(f'deleted {before_count-after_count} ({del_ratio} %) edges') # 11) delete isolated nodes from the graph del_node_ids = [v.index for v in G.vs.select(_degree_eq=0)] log.info(f'deleting {len(del_node_ids)} isolated nodes') before_count = G.vcount() G.delete_vertices(del_node_ids) after_count = G.vcount() del_ratio = round(100 * (before_count-after_count) / before_count, 1) log.info(f'deleted {before_count-after_count} ({del_ratio} %) nodes') # check if id_ig:s need to be updated to edge attributes mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]) log.info(f'invalid edge ids: {mismatch_count}') # reassign igraph indexes to edge and node attributes G.es[Edge.id_ig.value] = [e.index for e in G.es] G.vs[Node.id_ig.value] = [v.index for v in G.vs] # check if id_ig:s need to be updated to edge attributes mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]) log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)') # 12) export graph data to GeoDataFrames fro debugging if b_export_final_graph_to_gpkg: log.info(f'exporting final graph to {debug_igraph_gpkg} for debugging') e_gdf = ig_utils.get_edge_gdf(G, attrs=[Edge.id_otp, Edge.id_ig], ig_attrs=['source', 'target']) n_gdf = ig_utils.get_node_gdf(G, ig_attrs=['index']) e_gdf.to_file(debug_igraph_gpkg, layer='final_graph_edges', driver='GPKG') n_gdf.to_file(debug_igraph_gpkg, layer='final_graph_nodes', driver='GPKG') if igraph_out_file: ig_utils.export_to_graphml(G, igraph_out_file) return G
def edge_gdf(graph) -> GeoDataFrame: yield ig_utils.get_edge_gdf(graph, attrs=[E.id_way, E.length], drop_na_geoms=True)
csv_files = os.listdir(csv_dir) max_ids = [int(name.split('_')[0]) for name in csv_files] return max(max_ids) if max_ids else 0 def export_edge_noise_csv(edge_noises: pd.DataFrame, out_dir: str): max_id = edge_noises[E.id_ig.name].max() csv_name = f'{max_id}_edge_noises.csv' edge_noises.to_csv(out_dir + csv_name) if (__name__ == '__main__'): log = Logger(printing=True, log_file='noise_graph_join.log', level='debug') graph = ig_utils.read_graphml('data/hma.graphml') log.info(f'read graph of {graph.ecount()} edges') edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig]) edge_gdf = edge_gdf.sort_values(E.id_ig.name) # read noise data noise_layer_names = [ layer for layer in fiona.listlayers('data/noise_data_processed.gpkg') ] noise_layers = { name: gpd.read_file('data/noise_data_processed.gpkg', layer=name) for name in noise_layer_names } noise_layers = { name: gdf.rename(columns={'db_low': name}) for name, gdf in noise_layers.items() } log.info(f'read {len(noise_layers)} noise layers')
graph_dir = r'graphs' graph_id = r'kumpula' # graph_id = r'hma_r_hel-clip' aqi_update_fp = fr'aqi_updates/yearly_2019_aqi_avg_sum_{graph_id}.csv' out_csv_fp = fr'examples/{graph_id}_edges.csv' edge_attrs_in = [E.id_ig, E.id_way, E.length, E.gvi, E.aqi, E.noises] # geometry is read by default edge_attrs_out = [ E.id_ig.name, E.length.name, E.gvi.name, E.aqi.name, E.noises.name, 'mdB' ] # only these are exported to CSV graph = ig_utils.read_graphml(fr'{graph_dir}/{graph_id}.graphml') edges = ig_utils.get_edge_gdf(graph, attrs=edge_attrs_in, drop_na_geoms=True) # edges = edges.drop_duplicates(E.id_way.name) # keep only edges unique by geometry # ensure sum of noise exposure is length by adding missing exposures to 40dB edges[E.noises.name] = edges.apply( lambda row: noise_exps.add_db_40_exp_to_noises(row[E.noises.name], row[ E.length.name]), axis=1) edges['mdB'] = edges.apply(lambda row: noise_exps.get_mean_noise_level( row[E.noises.name], row[E.length.name]), axis=1) # stringify noises dict edges[E.noises.name] = [str(noises) for noises in edges[E.noises.name]] # join AQI to edge data edge_aqis = pd.read_csv(aqi_update_fp)
def main(conf: GraphGreenViewJoinConf): edge_table_db_name = conf.db_edge_table execute_sql = db.get_sql_executor(log) db_tables = db.get_db_table_names(execute_sql) # load GSV GVI points from GPKG gsv_gvi_gdf = load_gsv_gvi_gdf(conf.greenery_points_fp) # load street network graph from GraphML graph = ig_utils.read_graphml(conf.graph_file_in) log.info(f'Read graph of {graph.ecount()} edges') # load edge_gdf edge_gdf: GeoDataFrame = ig_utils.get_edge_gdf( graph, attrs=[E.id_ig, E.length, E.id_way]) edge_gdf = edge_gdf.drop_duplicates(E.id_way.name, keep='first') # drop edges without geometry edge_gdf = edge_gdf[edge_gdf['geometry'].apply( lambda geom: isinstance(geom, LineString))] log.info(f'Subset edge_gdf to {len(edge_gdf)} unique geometries') # export edges to db if not there yet for land cover overlay analysis if edge_table_db_name not in db_tables: # add simplified buffers to edge_gdf edges_2_db = edge_gdf.copy() log.info('Calculating 30m buffers from edge geometries') edges_2_db['b30'] = [ geom.buffer(30, resolution=3) for geom in edges_2_db['geometry'] ] edges_2_db = edges_2_db.rename(columns={ 'geometry': 'line_geom', 'b30': 'geometry' }) edges_2_db = edges_2_db.set_geometry('geometry') log.info('Writing edges to PostGIS') write_to_postgis = db.get_db_writer(log) write_to_postgis(edges_2_db[[E.id_way.name, 'geometry']], edge_table_db_name) log.info( 'Wrote graph edges to db, run land_cover_overlay_analysis.py next') exit() else: log.info( f'Edges were already exported to db table: {edge_table_db_name}') # get mean GSV GVI per edge gsv_gvi_list_by_way_id = get_gsv_gvi_list_by_way_id(edge_gdf, gsv_gvi_gdf) mean_gsv_gvi_by_way_id = get_mean_gsv_gvi_by_way_id( gsv_gvi_list_by_way_id, edge_gdf) # fetch low and high vegetation shares from db per edge buffer (way ID) low_veg_share_by_way_id = lc_analysis.get_low_veg_share_by_way_id( conf.db_low_veg_share_table) high_veg_share_by_way_id = lc_analysis.get_high_veg_share_by_way_id( conf.db_high_veg_share_table) graph = update_gvi_attributes_to_graph(graph, mean_gsv_gvi_by_way_id, low_veg_share_by_way_id, high_veg_share_by_way_id) ig_utils.export_to_graphml(graph, conf.graph_file_out) log.info(f'Exported graph to file {conf.graph_file_out}')
def __create_updater_edge_df(self, G: GraphHandler): attrs = [E.length, E.bike_time_cost] if conf.cycling_enabled else [E.length] edge_df = ig_utils.get_edge_gdf(G.graph, attrs=attrs) edge_df[E.id_ig.name] = edge_df.index return edge_df[[E.id_ig.name] + [attr.name for attr in attrs]]
def graph_export(conf: GraphExportConf, ): in_graph = fr'{conf.base_dir}/graph_in/{conf.graph_id}.graphml' out_graph = fr'{conf.base_dir}/graph_out/{conf.graph_id}.graphml' out_graph_research = fr'{conf.base_dir}/graph_out/{conf.graph_id}_r.graphml' out_graph_research_hel = fr'{conf.base_dir}/graph_out/{conf.graph_id}_r_hel-clip.graphml' out_geojson_noise_gvi = fr'{conf.base_dir}/graph_out/{conf.graph_id}_noise_gvi.geojson' out_geojson = fr'{conf.base_dir}/graph_out/{conf.graph_id}.geojson' hel_extent = gpd.read_file(conf.hel_extent_fp) out_node_attrs = [N.geometry] out_edge_attrs = [ E.id_ig, E.uv, E.id_way, E.geometry, E.geom_wgs, E.length, E.allows_biking, E.is_stairs, E.bike_safety_factor, E.noises, E.gvi ] if not conf.with_noise_data: out_edge_attrs.remove(E.noises) if not conf.with_greenery_data: out_edge_attrs.remove(E.gvi) log.info(f'Reading graph file: {in_graph}') graph = ig_utils.read_graphml(in_graph) edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig, E.length], ig_attrs=['source', 'target']) set_uv(graph, edge_gdf) set_way_ids(graph, edge_gdf) graph.es[E.bike_safety_factor.value] = [ round(v, 2) if (v and np.isfinite(v)) else 1 for v in graph.es[E.bike_safety_factor.value] ] # set combined GVI to GVI attribute & export graph graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_veg.value]) ig_utils.export_to_graphml(graph, out_graph, n_attrs=out_node_attrs, e_attrs=out_edge_attrs) # create GeoJSON files for vector tiles geojson = utils.create_geojson(graph) utils.write_geojson(geojson, out_geojson, overwrite=True, id_attr=True) utils.write_geojson(geojson, out_geojson_noise_gvi, overwrite=True, db_prop=True, gvi_prop=True) # for research use, set combined GVI that omits low vegetation to GVI attribute and export graph graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_high_veg.value]) ig_utils.export_to_graphml(graph, out_graph_research, n_attrs=out_node_attrs, e_attrs=out_edge_attrs) # export clip of the graph by the extent of Helsinki node_gdf = ig_utils.get_node_gdf(graph, attrs=[N.id_ig]) # replace geometry with buffered one (500 m) hel_extent['geometry'] = [ geom.buffer(500) for geom in hel_extent['geometry'] ] inside_hel = gpd.sjoin(node_gdf, hel_extent) inside_hel_ids = list(inside_hel[N.id_ig.name]) outside_hel_ids = [ id_ig for id_ig in list(node_gdf[N.id_ig.name]) if id_ig not in inside_hel_ids ] graph.delete_vertices(outside_hel_ids) # delete isolated nodes del_node_ids = [v.index for v in graph.vs.select(_degree_eq=0)] graph.delete_vertices(del_node_ids) # reassign igraph indexes to edge and node attributes graph.es[E.id_ig.value] = [e.index for e in graph.es] graph.vs[N.id_ig.value] = [v.index for v in graph.vs] # recalculate uv_id edge attributes edge_gdf = ig_utils.get_edge_gdf(graph, ig_attrs=['source', 'target']) set_uv(graph, edge_gdf) # export clipped graph ig_utils.export_to_graphml(graph, out_graph_research_hel, n_attrs=out_node_attrs, e_attrs=out_edge_attrs)