def set_default_and_na_edge_noises(graph: ig.Graph, data_extent: Polygon,
                                   log: Logger) -> None:
    """Sets noise attributes of edges to their default values and None outside the extent of the noise data.
    """

    # first set noise attributes of all edges as nodata
    graph.es[E.noises.value] = None
    graph.es[E.noise_source.value] = None

    edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig])
    data_extent_gdf = gpd.GeoDataFrame(data=[{
        'has_noise_data': 1
    }],
                                       geometry=[data_extent],
                                       crs=CRS.from_epsg(3879))
    joined = gpd.sjoin(edge_gdf, data_extent_gdf, how='left',
                       op='within').drop(['index_right'], axis=1)
    edges_within = joined[joined['has_noise_data'] == 1]

    real_edge_count = len([
        geom for geom in list(edge_gdf['geometry'])
        if isinstance(geom, LineString)
    ])
    log.info(
        f'found {real_edge_count - len(edges_within)} edges of {real_edge_count} outside noise data extent'
    )

    # set noise attributes of edges within the data extent to default values (no noise)
    for edge in edges_within.itertuples():
        graph.es[getattr(edge, E.id_ig.name)][E.noises.value] = {}
        graph.es[getattr(edge, E.id_ig.name)][E.noise_source.value] = ''
 def __get_edge_gdf(self):
     edge_gdf = ig_utils.get_edge_gdf(self.graph, attrs=[E.id_way], drop_na_geoms=True)
     # drop edges with identical geometry
     edge_gdf = edge_gdf.drop_duplicates(E.id_way.name)
     edge_gdf = edge_gdf[[E.geometry.name]]
     self.log.info(f'Added {len(edge_gdf)} edges to edge_gdf')
     return edge_gdf
    def test_add_sampling_points(self):
        graph = ig_utils.read_graphml('data/test_graph.graphml')
        gdf = ig_utils.get_edge_gdf(graph)
        # start_time = time.time()
        gdf = utils.add_sampling_points_to_gdf(gdf, 2)
        # log.duration(start_time, 'added sampling points')
        sampling_points_list = list(gdf['sampling_points'])
        self.assertEqual(
            len([sps for sps in sampling_points_list if sps != None]), 3522)
        self.assertEqual(
            len([sps for sps in sampling_points_list if sps == None]), 180)
        # test that all sample points are on the line geometries
        for edge in gdf.itertuples():
            sampling_points = getattr(edge, 'sampling_points')
            if (sampling_points == None): continue
            line_geom = getattr(edge, 'geometry')
            for sp in sampling_points:
                self.assertAlmostEqual(sp.distance(line_geom), 0, 5)

        # validate sampling point gdf (exploaded from edge gdf with sampling points)
        sampling_gdf = utils.explode_sampling_point_gdf(gdf, 'sampling_points')
        self.assertGreater(len(sampling_gdf), len(gdf))
        self.assertEqual(len(sampling_gdf), 58554)
        # check that the total representative length of each set of sampling points equals the length of the respective edge
        sps_by_edge = sampling_gdf.groupby('edge_id')
        for edge in gdf.itertuples():
            if (edge.sampling_points != None):
                edge_sps = sps_by_edge.get_group(edge.Index)
                sampling_length_sum = edge_sps['sample_len'].sum()
                self.assertAlmostEqual(sampling_length_sum,
                                       edge.geometry.length, 5)
def test_joins_noises_to_graph_edges():
    graph = ig_utils.read_graphml(f'{base_dir}/data/test_graph.graphml')
    edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig, E.length])
    edge_gdf[E.id_ig.name] = edge_gdf.index
    # read noise data
    noise_layer_names = [layer for layer in fiona.listlayers(f'{base_dir}/data/noise_data_processed.gpkg')]
    noise_layers = {name: gpd.read_file(f'{base_dir}/data/noise_data_processed.gpkg', layer=name) for name in noise_layer_names}
    noise_layers = {name: gdf.rename(columns={'db_low': name}) for name, gdf in noise_layers.items()}

    # read nodata zone: narrow area between noise surfaces of different municipalities
    nodata_layer = gpd.read_file(f'{base_dir}/data/extents.gpkg', layer='municipal_boundaries')

    edge_noises = noise_graph_join.noise_graph_join(
        edge_gdf=edge_gdf,
        sampling_interval=3,
        noise_layers=noise_layers,
        nodata_layer=nodata_layer
    )

    assert edge_noises[E.id_ig.name].nunique() == 3522

    edge_noises_df = pd.merge(edge_gdf, edge_noises, how='inner', on=E.id_ig.name)
    edge_noises_df['total_noise_len'] = [round(sum(noises.values()), 4) for noises in edge_noises_df['noises']]

    def validate_edge_noises(row):
        assert round(row['total_noise_len'], 1) <= round(row['length'], 1)

    edge_noises_df.apply(lambda row: validate_edge_noises(row), axis=1)

    assert round(edge_noises_df['total_noise_len'].mean(), 2) == 33.20

    # test frequency of different main noise sources
    noise_sources = dict(Counter(list(edge_noises_df[E.noise_source.name])))
    assert noise_sources == {'road': 2322, 'train': 1198, '': 2}
def test_adds_sampling_points_to_edge_gdf():
    graph = ig_utils.read_graphml(f'{base_dir}/data/test_graph.graphml')
    gdf = ig_utils.get_edge_gdf(graph)
    # start_time = time.time()
    gdf = noise_join_utils.add_sampling_points_to_gdf(gdf, 2)
    # log.duration(start_time, 'added sampling points')
    sampling_points_list = list(gdf['sampling_points'])
    assert len([sps for sps in sampling_points_list if sps != None]) == 3522
    assert len([sps for sps in sampling_points_list if sps == None]) == 180
    # test that all sample points are on the line geometries
    for edge in gdf.itertuples():
        sampling_points = getattr(edge, 'sampling_points')
        if not sampling_points:
            continue
        line_geom = getattr(edge, 'geometry')
        for sp in sampling_points:
            assert round(sp.distance(line_geom), 1) == 0, 5

    # validate sampling point gdf (exploaded from edge gdf with sampling points)
    sampling_gdf = noise_join_utils.explode_sampling_point_gdf(gdf, 'sampling_points')
    assert len(sampling_gdf) > len(gdf)
    assert len(sampling_gdf) == 58554
    # check that the total representative length of each set of sampling points equals the length of the respective edge
    sps_by_edge = sampling_gdf.groupby('edge_id')
    for edge in gdf.itertuples():
        if (edge.sampling_points != None):
            edge_sps = sps_by_edge.get_group(edge.Index)
            sampling_length_sum = edge_sps['sample_len'].sum()
            assert round(sampling_length_sum, 2) == round(edge.geometry.length, 2)
 def test_graph_to_gdf(self):
     graph = ig_utils.read_graphml('data/test_graph.graphml',
                                   log=Logger(printing=True))
     # test read graph to wgs gdf
     gdf = ig_utils.get_edge_gdf(graph,
                                 id_attr=Edge.id_ig,
                                 attrs=[Edge.length],
                                 geom_attr=Edge.geom_wgs)
     gdf['geom_length'] = [geom.length for geom in gdf[Edge.geom_wgs.name]]
     self.assertAlmostEqual(gdf['geom_length'].mean(), 0.000429, 6)
     # test read to projected gdf
     gdf = ig_utils.get_edge_gdf(graph,
                                 id_attr=Edge.id_ig,
                                 attrs=[Edge.length],
                                 geom_attr=Edge.geometry)
     gdf['geom_length'] = [geom.length for geom in gdf[Edge.geometry.name]]
     self.assertAlmostEqual(gdf['geom_length'].mean(), 31.65, 2)
Exemplo n.º 7
0
def test_gets_graph_data_as_gdf():
    graph = ig_utils.read_graphml(conf.igraph_out_file)
    # test read graph to wgs gdf
    gdf = ig_utils.get_edge_gdf(graph,
                                id_attr=Edge.id_ig,
                                attrs=[Edge.length],
                                geom_attr=Edge.geom_wgs,
                                drop_na_geoms=True)
    gdf['geom_length'] = [geom.length for geom in gdf[Edge.geom_wgs.name]]
    assert round(gdf['geom_length'].mean(), 6) == 0.000451
    # test read to projected gdf
    gdf = ig_utils.get_edge_gdf(graph,
                                id_attr=Edge.id_ig,
                                attrs=[Edge.length],
                                geom_attr=Edge.geometry,
                                drop_na_geoms=True)
    gdf['geom_length'] = [geom.length for geom in gdf[Edge.geometry.name]]
    assert round(gdf['geom_length'].mean(), 2) == 33.27
Exemplo n.º 8
0
def main(conf: GraphNoiseJoinConf):
    graph = ig_utils.read_graphml(conf.graph_in_fp)
    log.info(f'read graph of {graph.ecount()} edges')
    edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig])
    edge_gdf = edge_gdf.sort_values(E.id_ig.name)

    # read noise data
    noise_layer_names = [
        layer for layer in fiona.listlayers(conf.noise_data_fp)
    ]
    noise_layers = {
        name: gpd.read_file(conf.noise_data_fp, layer=name)
        for name in noise_layer_names
    }
    noise_layers = {
        name: gdf.rename(columns={'db_low': name})
        for name, gdf in noise_layers.items()
    }
    log.info(f'read {len(noise_layers)} noise layers')

    # read nodata zone: narrow area between noise surfaces of different municipalities
    nodata_layer = gpd.read_file(conf.nodata_fp, layer=conf.nodata_layer_name)

    # process chunks of edges together by dividing gdf to parts
    processing_size = 50000
    split_gdf_count = math.ceil(len(edge_gdf) / processing_size)
    gdfs = np.array_split(edge_gdf, split_gdf_count)

    # get max id of previously processed edges
    max_processed_id = get_previously_processed_max_id(conf.noise_data_csv_dir)
    if max_processed_id > 0:
        log.info(
            f'found previously processed edges up to edge id {max_processed_id}'
        )

    for idx, gdf in enumerate(gdfs):

        if gdf[E.id_ig.name].max() <= max_processed_id:
            log.info(
                f'skipping {idx+1} of {len(gdfs)} edge gdfs (processed before)'
            )
            continue
        else:
            log.info(f'processing {idx+1} of {len(gdfs)} edge gdfs')

        edge_noises = noise_graph_join(
            edge_gdf=gdf,
            sampling_interval=3,
            noise_layers=noise_layers,
            nodata_layer=nodata_layer,
            b_debug=False,
            debug_gpkg='debug/noise_join_debug.gpkg')
        export_edge_noise_csv(edge_noises, conf.noise_data_csv_dir)
def get_sampling_point_gdf_from_graph(graph) -> GeoDataFrame:
    """Creates GeoDataFrame of edges of the graph. Filters out null geometries and
    adds point geometries to be used as sampling points.
    """
    edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig, E.id_way], geom_attr=E.geom_wgs)
    # filter out edges with null geometry
    edge_gdf = edge_gdf[edge_gdf[E.geom_wgs.name].apply(lambda x: isinstance(x, LineString))]
    edge_gdf['point_geom'] = [
        geom.interpolate(0.5, normalized=True)
        for geom in edge_gdf[E.geom_wgs.name]
    ]
    return edge_gdf
Exemplo n.º 10
0
def create_geojson(graph: ig.Graph) -> dict:
    df = ig_utils.get_edge_gdf(graph,
                               attrs=[E.id_way, E.length, E.noises, E.gvi],
                               geom_attr=E.geom_wgs)
    # drop edges without geometry
    df = df[df[E.geom_wgs.name].apply(
        lambda geom: isinstance(geom, LineString))]
    # drop edges with duplicate geometry
    df = df.drop_duplicates(E.id_way.name)
    df[E.noises.name] = df.apply(
        lambda x: __update_db_40_exp(x[E.noises.name], x[E.length.name]),
        axis=1)
    df['db'] = df.apply(
        lambda x: __get_mean_noise_level(x[E.noises.name], x[E.length.name]),
        axis=1)
    df['db'] = [__get_noise_range(db) for db in df['db']]
    # simplify geometries for vector tiles
    df[E.geom_wgs.name] = [
        geom.simplify(0.00005, preserve_topology=True)
        for geom in df[E.geom_wgs.name]
    ]
    df['coords'] = [__get_coord_list(geom) for geom in df[E.geom_wgs.name]]
    return __as_geojson_feature_collection(
        df[[E.id_way.name, 'coords', 'db', E.gvi.name]].to_dict('records'))
Exemplo n.º 11
0
    graph_file_in = r'graph_in/kumpula.graphml' if subset else r'graph_in/hma.graphml'
    graph_file_out = r'graph_out/kumpula.graphml' if subset else r'graph_out/hma.graphml'
    edge_table_db_name = 'edge_buffers_subset' if subset else 'edge_buffers'

    execute_sql = db.get_sql_executor(log)
    db_tables = db.get_db_table_names(execute_sql)

    # load GSV GVI points from GPKG
    gsv_gvi_gdf = load_gsv_gvi_gdf(r'data/greenery_points.gpkg')

    # load street network graph from GraphML
    graph = ig_utils.read_graphml(graph_file_in)
    log.info(f'Read graph of {graph.ecount()} edges')

    # load edge_gdf
    edge_gdf: GeoDataFrame = ig_utils.get_edge_gdf(
        graph, attrs=[E.id_ig, E.length, E.id_way])
    edge_gdf = edge_gdf.drop_duplicates(E.id_way.name, keep='first')
    # drop edges without geometry
    edge_gdf = edge_gdf[edge_gdf['geometry'].apply(
        lambda geom: isinstance(geom, LineString))]
    log.info(f'Subset edge_gdf to {len(edge_gdf)} unique geometries')

    # export edges to db if not there yet for land cover overlay analysis
    if edge_table_db_name not in db_tables:
        # add simplified buffers to edge_gdf
        edges_2_db = edge_gdf.copy()
        log.info(f'Calculating 30m buffers from edge geometries')
        edges_2_db['b30'] = [
            geom.buffer(30, resolution=3) for geom in edges_2_db['geometry']
        ]
        edges_2_db = edges_2_db.rename(columns={
Exemplo n.º 12
0
    edge_gdf['uv'] = edge_gdf.apply(lambda x: (x['source'], x['target']),
                                    axis=1)
    graph.es[E.uv.value] = list(edge_gdf['uv'])


def set_way_ids(graph, edge_gdf):
    edge_gdf['way_id'] = edge_gdf.apply(
        lambda x: str(round(x['length'], 1)) + str(sorted(x['uv'])), axis=1)
    way_ids = list(edge_gdf['way_id'].unique())
    way_ids_d = {way_id: idx for idx, way_id in enumerate(way_ids)}
    edge_gdf['way_id'] = [way_ids_d[way_id] for way_id in edge_gdf['way_id']]
    graph.es[E.id_way.value] = list(edge_gdf['way_id'])


edge_gdf = ig_utils.get_edge_gdf(
    graph,
    attrs=[E.id_ig, E.length, E.bike_safety_factor],
    ig_attrs=['source', 'target'])

set_biking_lengths(graph, edge_gdf)
set_uv(graph, edge_gdf)
set_way_ids(graph, edge_gdf)

# set combined GVI to GVI attribute & export graph
graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_veg.value])
ig_utils.export_to_graphml(graph,
                           out_graph,
                           n_attrs=out_node_attrs,
                           e_attrs=out_edge_attrs)

# create GeoJSON files for vector tiles
geojson = utils.create_geojson(graph)
def convert_otp_graph_to_igraph(
    node_csv_file: str,
    edge_csv_file: str,
    hma_poly_file: str,
    igraph_out_file: str,
    b_export_otp_data_to_gpkg: bool = False,
    b_export_decomposed_igraphs_to_gpkg: bool = False,
    b_export_final_graph_to_gpkg: bool = False,
    debug_otp_graph_gpkg: str = 'debug/otp_graph_features.gpkg',
    debug_igraph_gpkg: str = 'debug/otp2igraph_features.gpkg',
) -> ig.Graph:

    hma_poly = geom_utils.project_geom(gpd.read_file(hma_poly_file)['geometry'][0])

    # 1) read nodes nodes from CSV
    n = pd.read_csv(node_csv_file, sep=';')
    log.info(f'read {len(n.index)} nodes')
    log.debug(f'node column types: {n.dtypes}')
    log.debug(f'nodes head: {n.head()}')
    log.info('creating node gdf')
    n[Node.geometry.name] = [
        shapely.wkt.loads(geom) if isinstance(geom, str) else Point() for geom in n[Node.geometry.name]
    ]
    n[Node.geom_wgs.name] = n[Node.geometry.name]
    n = gpd.GeoDataFrame(n, geometry=Node.geometry.name, crs=CRS.from_epsg(4326))
    log.info('reprojecting nodes to etrs')
    n = n.to_crs(epsg=gp_conf.proj_crs_epsg)
    log.debug(f'nodes head: {n.head()}')

    # 2) read edges from CSV
    e = pd.read_csv(edge_csv_file, sep=';')
    log.info(f'read {len(e.index)} edges')
    log.debug(f'edge column types: {e.dtypes}')
    log.debug(f'edges head: {e.head()}')
    log.info('creating edge gdf')
    e[Edge.geometry.name] = [
        shapely.wkt.loads(geom) if isinstance(geom, str) else LineString() for geom in e[Edge.geometry.name]
    ]
    e[Edge.geom_wgs.name] = e[Edge.geometry.name]
    e = gpd.GeoDataFrame(e, geometry=Edge.geometry.name, crs=CRS.from_epsg(4326))
    log.info('reprojecting edges to etrs')
    e = e.to_crs(epsg=gp_conf.proj_crs_epsg)
    log.debug(f'edges head: {e.head()}')

    # 3) export graph data to gpkg
    if b_export_otp_data_to_gpkg:
        log.info('writing otp graph data to gpkg')
        e.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='edges', driver='GPKG')
        log.info(f'exported edges to {debug_otp_graph_gpkg} (layer=edges)')
        n.drop(columns=[Edge.geom_wgs.name]).to_file(debug_otp_graph_gpkg, layer='nodes', driver='GPKG')
        log.info(f'exported nodes to {debug_otp_graph_gpkg} (layer=nodes)')

    # 4) filter out edges that are unsuitable for both walking and cycling
    def filter_df_by_query(df: pd.DataFrame, query: str, name: str = 'rows'):
        count_before = len(df.index)
        df_filt = df.query(query).copy()
        filt_ratio = (count_before-len(df_filt.index)) / count_before
        log.info(f'filtered out {count_before-len(df_filt.index)} {name} ({round(filt_ratio * 100, 1)} %) by {query}')
        return df_filt

    e_filt = filter_df_by_query(e, f'{Edge.allows_walking.name} == True or {Edge.allows_biking.name} == True', name='edges')
    e_filt = filter_df_by_query(e_filt, f'{Edge.is_no_thru_traffic.name} == False', name='edges')

    # 5) create a dictionaries for converting otp ids to ig ids and vice versa
    log.debug('create maps for converting otp ids to ig ids')
    n[Node.id_ig.name] = np.arange(len(n.index))
    ids_otp_ig = {}
    ids_ig_otp = {}
    for node in n.itertuples():
        ids_otp_ig[getattr(node, Node.id_otp.name)] = getattr(node, Node.id_ig.name)
        ids_ig_otp[getattr(node, Node.id_ig.name)] = getattr(node, Node.id_otp.name)

    # 6) add nodes to graph
    log.info('adding nodes to graph')
    G = ig.Graph(directed=True)
    G.add_vertices(len(n.index))
    for attr in Node:
        if attr.name in n.columns:
            G.vs[attr.value] = list(n[attr.name])
        else:
            log.warning(f'node column {attr.name} not present in dataframe')

    # 7) add edges to graph
    log.info('adding edges to graph')

    # get edge lengths by projected geometry
    e_filt[Edge.length.name] = [
        round(geom.length, 4) if isinstance(geom, LineString) else 0.0 for geom in e_filt[Edge.geometry.name]
    ]

    def get_ig_uv(edge):
        return (ids_otp_ig[edge['node_orig_id']], ids_otp_ig[edge['node_dest_id']])

    e_filt['uv_ig'] = e_filt.apply(lambda row: get_ig_uv(row), axis=1)
    e_filt[Edge.id_ig.name] = np.arange(len(e_filt.index))
    G.add_edges(list(e_filt['uv_ig']))
    for attr in Edge:
        if attr.name in e_filt.columns:
            G.es[attr.value] = list(e_filt[attr.name])
        else:
            log.warning(f'edge column {attr.name} not present in dataframe')

    # 8) delete edges outside Helsinki Metropolitan Area (HMA)
    hma_buffered = hma_poly.buffer(100)

    def intersects_hma(geom: Union[LineString, None]):
        if not geom or geom.is_empty:
            return True
        return geom.intersects(hma_buffered)

    e_gdf = ig_utils.get_edge_gdf(G)
    log.info('finding edges that intersect with HMA')
    e_gdf['in_hma'] = [intersects_hma(line) for line in e_gdf[Edge.geometry.name]]
    e_gdf_del = e_gdf.query('in_hma == False').copy()
    out_ratio = round(100 * len(e_gdf_del.index)/len(e_gdf.index), 1)
    log.info(f'found {len(e_gdf_del.index)} ({out_ratio} %) edges outside HMA')

    log.info('deleting edges')
    before_count = G.ecount()
    G.delete_edges(e_gdf_del.index.tolist())
    after_count = G.ecount()
    log.info(f'deleted {before_count-after_count} edges')

    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len(
        [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]
    )
    log.info(f'invalid edge ids: {mismatch_count}')
    # reassign igraph indexes to edge and node attributes
    G.es[Edge.id_ig.value] = [e.index for e in G.es]
    G.vs[Node.id_ig.value] = [v.index for v in G.vs]
    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len(
        [edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index]
    )
    log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)')

    # 9) find and inspect subgraphs by decomposing the graph
    sub_graphs = G.decompose(mode='STRONG')
    log.info(f'found {len(sub_graphs)} subgraphs')

    graph_sizes = [graph.ecount() for graph in sub_graphs]
    log.info(f'subgraphs with more than 10 edges: {len([s for s in graph_sizes if s > 10])}')
    log.info(f'subgraphs with more than 50 edges: {len([s for s in graph_sizes if s > 50])}')
    log.info(f'subgraphs with more than 100 edges: {len([s for s in graph_sizes if s > 100])}')
    log.info(f'subgraphs with more than 500 edges: {len([s for s in graph_sizes if s > 500])}')
    log.info(f'subgraphs with more than 10000 edges: {len([s for s in graph_sizes if s > 10000])}')

    small_graphs = [graph for graph in sub_graphs if graph.ecount() <= 15]
    medium_graphs = [graph for graph in sub_graphs if (graph.ecount() > 15 and graph.ecount() <= 500)]
    big_graphs = [graph for graph in sub_graphs if graph.ecount() > 500]

    small_graph_edges = []
    for graph_id, graph in enumerate(small_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        small_graph_edges.extend(edges)

    medium_graph_edges = []
    for graph_id, graph in enumerate(medium_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        medium_graph_edges.extend(edges)

    big_graph_edges = []
    for graph_id, graph in enumerate(big_graphs):
        edges = ig_utils.get_edge_dicts(graph, attrs=[Edge.id_otp, Edge.id_ig, Edge.geometry])
        for edge in edges:
            edge['graph_id'] = graph_id
        big_graph_edges.extend(edges)

    if b_export_decomposed_igraphs_to_gpkg:
        log.info('exporting subgraphs to gpkg')
        # graphs with <= 15 edges
        small_graph_edges_gdf = gpd.GeoDataFrame(small_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        small_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='small_graph_edges', driver='GPKG')
        # graphs with  15–500 edges
        medium_graph_edges_gdf = gpd.GeoDataFrame(medium_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        medium_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='medium_graph_edges', driver='GPKG')
        # graphs with > 500 edges
        big_graph_edges_gdf = gpd.GeoDataFrame(big_graph_edges, crs=CRS.from_epsg(gp_conf.proj_crs_epsg))
        big_graph_edges_gdf.to_file(debug_igraph_gpkg, layer='big_graph_edges', driver='GPKG')
        log.info('graphs exported')

    # 10) delete smallest subgraphs from the graph
    del_edge_ids = [edge[Edge.id_ig.name] for edge in small_graph_edges]
    log.info(f'deleting {len(del_edge_ids)} isolated edges')
    before_count = G.ecount()
    G.delete_edges(del_edge_ids)
    after_count = G.ecount()
    del_ratio = round(100 * (before_count-after_count) / before_count, 1)
    log.info(f'deleted {before_count-after_count} ({del_ratio} %) edges')

    # 11) delete isolated nodes from the graph
    del_node_ids = [v.index for v in G.vs.select(_degree_eq=0)]
    log.info(f'deleting {len(del_node_ids)} isolated nodes')
    before_count = G.vcount()
    G.delete_vertices(del_node_ids)
    after_count = G.vcount()
    del_ratio = round(100 * (before_count-after_count) / before_count, 1)
    log.info(f'deleted {before_count-after_count} ({del_ratio} %) nodes')

    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index])
    log.info(f'invalid edge ids: {mismatch_count}')
    # reassign igraph indexes to edge and node attributes
    G.es[Edge.id_ig.value] = [e.index for e in G.es]
    G.vs[Node.id_ig.value] = [v.index for v in G.vs]
    # check if id_ig:s need to be updated to edge attributes
    mismatch_count = len([edge.index for edge in G.es if edge.attributes()[Edge.id_ig.value] != edge.index])
    log.info(f'invalid edge ids: {mismatch_count} (after re-indexing)')

    # 12) export graph data to GeoDataFrames fro debugging

    if b_export_final_graph_to_gpkg:
        log.info(f'exporting final graph to {debug_igraph_gpkg} for debugging')
        e_gdf = ig_utils.get_edge_gdf(G, attrs=[Edge.id_otp, Edge.id_ig], ig_attrs=['source', 'target'])
        n_gdf = ig_utils.get_node_gdf(G, ig_attrs=['index'])
        e_gdf.to_file(debug_igraph_gpkg, layer='final_graph_edges', driver='GPKG')
        n_gdf.to_file(debug_igraph_gpkg, layer='final_graph_nodes', driver='GPKG')

    if igraph_out_file:
        ig_utils.export_to_graphml(G, igraph_out_file)

    return G
Exemplo n.º 14
0
def edge_gdf(graph) -> GeoDataFrame:
    yield ig_utils.get_edge_gdf(graph, attrs=[E.id_way, E.length], drop_na_geoms=True)
Exemplo n.º 15
0
    csv_files = os.listdir(csv_dir)
    max_ids = [int(name.split('_')[0]) for name in csv_files]
    return max(max_ids) if max_ids else 0


def export_edge_noise_csv(edge_noises: pd.DataFrame, out_dir: str):
    max_id = edge_noises[E.id_ig.name].max()
    csv_name = f'{max_id}_edge_noises.csv'
    edge_noises.to_csv(out_dir + csv_name)


if (__name__ == '__main__'):
    log = Logger(printing=True, log_file='noise_graph_join.log', level='debug')
    graph = ig_utils.read_graphml('data/hma.graphml')
    log.info(f'read graph of {graph.ecount()} edges')
    edge_gdf = ig_utils.get_edge_gdf(graph, attrs=[E.id_ig])
    edge_gdf = edge_gdf.sort_values(E.id_ig.name)

    # read noise data
    noise_layer_names = [
        layer for layer in fiona.listlayers('data/noise_data_processed.gpkg')
    ]
    noise_layers = {
        name: gpd.read_file('data/noise_data_processed.gpkg', layer=name)
        for name in noise_layer_names
    }
    noise_layers = {
        name: gdf.rename(columns={'db_low': name})
        for name, gdf in noise_layers.items()
    }
    log.info(f'read {len(noise_layers)} noise layers')
graph_dir = r'graphs'
graph_id = r'kumpula'
# graph_id = r'hma_r_hel-clip'
aqi_update_fp = fr'aqi_updates/yearly_2019_aqi_avg_sum_{graph_id}.csv'
out_csv_fp = fr'examples/{graph_id}_edges.csv'

edge_attrs_in = [E.id_ig, E.id_way, E.length, E.gvi, E.aqi,
                 E.noises]  # geometry is read by default

edge_attrs_out = [
    E.id_ig.name, E.length.name, E.gvi.name, E.aqi.name, E.noises.name, 'mdB'
]  # only these are exported to CSV

graph = ig_utils.read_graphml(fr'{graph_dir}/{graph_id}.graphml')
edges = ig_utils.get_edge_gdf(graph, attrs=edge_attrs_in, drop_na_geoms=True)
# edges = edges.drop_duplicates(E.id_way.name)  # keep only edges unique by geometry

# ensure sum of noise exposure is length by adding missing exposures to 40dB
edges[E.noises.name] = edges.apply(
    lambda row: noise_exps.add_db_40_exp_to_noises(row[E.noises.name], row[
        E.length.name]),
    axis=1)
edges['mdB'] = edges.apply(lambda row: noise_exps.get_mean_noise_level(
    row[E.noises.name], row[E.length.name]),
                           axis=1)
# stringify noises dict
edges[E.noises.name] = [str(noises) for noises in edges[E.noises.name]]

# join AQI to edge data
edge_aqis = pd.read_csv(aqi_update_fp)
Exemplo n.º 17
0
def main(conf: GraphGreenViewJoinConf):
    edge_table_db_name = conf.db_edge_table

    execute_sql = db.get_sql_executor(log)
    db_tables = db.get_db_table_names(execute_sql)

    # load GSV GVI points from GPKG
    gsv_gvi_gdf = load_gsv_gvi_gdf(conf.greenery_points_fp)

    # load street network graph from GraphML
    graph = ig_utils.read_graphml(conf.graph_file_in)
    log.info(f'Read graph of {graph.ecount()} edges')

    # load edge_gdf
    edge_gdf: GeoDataFrame = ig_utils.get_edge_gdf(
        graph, attrs=[E.id_ig, E.length, E.id_way])
    edge_gdf = edge_gdf.drop_duplicates(E.id_way.name, keep='first')
    # drop edges without geometry
    edge_gdf = edge_gdf[edge_gdf['geometry'].apply(
        lambda geom: isinstance(geom, LineString))]
    log.info(f'Subset edge_gdf to {len(edge_gdf)} unique geometries')

    # export edges to db if not there yet for land cover overlay analysis
    if edge_table_db_name not in db_tables:
        # add simplified buffers to edge_gdf
        edges_2_db = edge_gdf.copy()
        log.info('Calculating 30m buffers from edge geometries')
        edges_2_db['b30'] = [
            geom.buffer(30, resolution=3) for geom in edges_2_db['geometry']
        ]
        edges_2_db = edges_2_db.rename(columns={
            'geometry': 'line_geom',
            'b30': 'geometry'
        })
        edges_2_db = edges_2_db.set_geometry('geometry')

        log.info('Writing edges to PostGIS')
        write_to_postgis = db.get_db_writer(log)
        write_to_postgis(edges_2_db[[E.id_way.name, 'geometry']],
                         edge_table_db_name)

        log.info(
            'Wrote graph edges to db, run land_cover_overlay_analysis.py next')
        exit()

    else:
        log.info(
            f'Edges were already exported to db table: {edge_table_db_name}')

    # get mean GSV GVI per edge
    gsv_gvi_list_by_way_id = get_gsv_gvi_list_by_way_id(edge_gdf, gsv_gvi_gdf)
    mean_gsv_gvi_by_way_id = get_mean_gsv_gvi_by_way_id(
        gsv_gvi_list_by_way_id, edge_gdf)

    # fetch low and high vegetation shares from db per edge buffer (way ID)
    low_veg_share_by_way_id = lc_analysis.get_low_veg_share_by_way_id(
        conf.db_low_veg_share_table)
    high_veg_share_by_way_id = lc_analysis.get_high_veg_share_by_way_id(
        conf.db_high_veg_share_table)

    graph = update_gvi_attributes_to_graph(graph, mean_gsv_gvi_by_way_id,
                                           low_veg_share_by_way_id,
                                           high_veg_share_by_way_id)

    ig_utils.export_to_graphml(graph, conf.graph_file_out)

    log.info(f'Exported graph to file {conf.graph_file_out}')
Exemplo n.º 18
0
 def __create_updater_edge_df(self, G: GraphHandler):
     attrs = [E.length, E.bike_time_cost] if conf.cycling_enabled else [E.length]
     edge_df = ig_utils.get_edge_gdf(G.graph, attrs=attrs)
     edge_df[E.id_ig.name] = edge_df.index
     return edge_df[[E.id_ig.name] + [attr.name for attr in attrs]]
def graph_export(conf: GraphExportConf, ):
    in_graph = fr'{conf.base_dir}/graph_in/{conf.graph_id}.graphml'
    out_graph = fr'{conf.base_dir}/graph_out/{conf.graph_id}.graphml'
    out_graph_research = fr'{conf.base_dir}/graph_out/{conf.graph_id}_r.graphml'
    out_graph_research_hel = fr'{conf.base_dir}/graph_out/{conf.graph_id}_r_hel-clip.graphml'
    out_geojson_noise_gvi = fr'{conf.base_dir}/graph_out/{conf.graph_id}_noise_gvi.geojson'
    out_geojson = fr'{conf.base_dir}/graph_out/{conf.graph_id}.geojson'

    hel_extent = gpd.read_file(conf.hel_extent_fp)

    out_node_attrs = [N.geometry]
    out_edge_attrs = [
        E.id_ig, E.uv, E.id_way, E.geometry, E.geom_wgs, E.length,
        E.allows_biking, E.is_stairs, E.bike_safety_factor, E.noises, E.gvi
    ]

    if not conf.with_noise_data:
        out_edge_attrs.remove(E.noises)

    if not conf.with_greenery_data:
        out_edge_attrs.remove(E.gvi)

    log.info(f'Reading graph file: {in_graph}')
    graph = ig_utils.read_graphml(in_graph)

    edge_gdf = ig_utils.get_edge_gdf(graph,
                                     attrs=[E.id_ig, E.length],
                                     ig_attrs=['source', 'target'])

    set_uv(graph, edge_gdf)
    set_way_ids(graph, edge_gdf)

    graph.es[E.bike_safety_factor.value] = [
        round(v, 2) if (v and np.isfinite(v)) else 1
        for v in graph.es[E.bike_safety_factor.value]
    ]

    # set combined GVI to GVI attribute & export graph
    graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_veg.value])
    ig_utils.export_to_graphml(graph,
                               out_graph,
                               n_attrs=out_node_attrs,
                               e_attrs=out_edge_attrs)

    # create GeoJSON files for vector tiles
    geojson = utils.create_geojson(graph)
    utils.write_geojson(geojson, out_geojson, overwrite=True, id_attr=True)
    utils.write_geojson(geojson,
                        out_geojson_noise_gvi,
                        overwrite=True,
                        db_prop=True,
                        gvi_prop=True)

    # for research use, set combined GVI that omits low vegetation to GVI attribute and export graph
    graph.es[E.gvi.value] = list(graph.es[E.gvi_comb_gsv_high_veg.value])
    ig_utils.export_to_graphml(graph,
                               out_graph_research,
                               n_attrs=out_node_attrs,
                               e_attrs=out_edge_attrs)

    # export clip of the graph by the extent of Helsinki

    node_gdf = ig_utils.get_node_gdf(graph, attrs=[N.id_ig])
    # replace geometry with buffered one (500 m)
    hel_extent['geometry'] = [
        geom.buffer(500) for geom in hel_extent['geometry']
    ]
    inside_hel = gpd.sjoin(node_gdf, hel_extent)
    inside_hel_ids = list(inside_hel[N.id_ig.name])
    outside_hel_ids = [
        id_ig for id_ig in list(node_gdf[N.id_ig.name])
        if id_ig not in inside_hel_ids
    ]

    graph.delete_vertices(outside_hel_ids)
    # delete isolated nodes
    del_node_ids = [v.index for v in graph.vs.select(_degree_eq=0)]
    graph.delete_vertices(del_node_ids)
    # reassign igraph indexes to edge and node attributes
    graph.es[E.id_ig.value] = [e.index for e in graph.es]
    graph.vs[N.id_ig.value] = [v.index for v in graph.vs]
    # recalculate uv_id edge attributes
    edge_gdf = ig_utils.get_edge_gdf(graph, ig_attrs=['source', 'target'])
    set_uv(graph, edge_gdf)

    # export clipped graph
    ig_utils.export_to_graphml(graph,
                               out_graph_research_hel,
                               n_attrs=out_node_attrs,
                               e_attrs=out_edge_attrs)