def test_creating_building_geometries(test_pbf): from pyrosm import OSM from pyrosm.data_manager import get_osm_data from pyrosm.geometry import create_way_geometries from numpy import ndarray from pygeos import Geometry osm = OSM(filepath=test_pbf) osm._read_pbf() custom_filter = {"building": True} nodes, ways, relation_ways, relations = get_osm_data( None, osm._way_records, osm._relations, osm.conf.tags.building, custom_filter, filter_type="keep") assert isinstance(ways, dict) geometries, lengths, from_ids, to_ids = create_way_geometries( osm._node_coordinates, ways, parse_network=False) assert isinstance(geometries, list), f"Type should be list, got {type(geometries)}." assert isinstance(geometries[0], Geometry) assert len(geometries) == len(ways["id"])
def test_directed_edge_generator(test_pbf): from geopandas import GeoDataFrame from pyrosm.graphs import generate_directed_edges from pyrosm import OSM osm = OSM(test_pbf) nodes, edges = osm.get_network(nodes=True) # Calculate the number of edges that should be oneway + bidirectional mask = edges[oneway_col].isin(oneway_values) oneway_edge_cnt = len(edges.loc[mask]) twoway_edge_cnt = len(edges.loc[~mask]) # Bidirectional edges bidir_edges = generate_directed_edges(edges, direction="oneway", from_id_col="u", to_id_col="v", force_bidirectional=True) assert len(bidir_edges) == 2 * len(edges) # Directed edges according the rules in "oneway" column dir_edges = generate_directed_edges(edges, direction="oneway", from_id_col="u", to_id_col="v", force_bidirectional=False) assert len(dir_edges) == oneway_edge_cnt + twoway_edge_cnt * 2
def test_exclude_filtering_nodes_and_relations(helsinki_pbf): from pyrosm import OSM # Initialize the reader osm = OSM(helsinki_pbf) custom_filter = {"amenity": ["library"]} gdf = osm.get_data_by_custom_criteria( custom_filter, filter_type="exclude", ) assert gdf.shape == (1081, 37) assert "library" not in gdf["amenity"].unique().tolist() # There should be nodes, ways and relations assert gdf["osm_type"].unique().tolist() == ["node", "way", "relation"] # Test other way around gdf = osm.get_data_by_custom_criteria( custom_filter, filter_type="keep", ) assert gdf.shape == (7, 23) assert gdf["amenity"].unique().tolist() == ["library"] # There should be nodes and ways (no relations) assert gdf["osm_type"].unique().tolist() == ["node", "way"]
def bike_nodes_and_edges(): from pyrosm import OSM # UlanBator is good small dataset for testing # (unmodified, i.e. not cropped) pbf_path = get_data("ulanbator") osm = OSM(pbf_path) return osm.get_network(nodes=True, network_type="cycling")
def test_passing_custom_filter_without_element_key(test_pbf): from pyrosm import OSM from geopandas import GeoDataFrame osm = OSM(filepath=test_pbf) gdf = osm.get_landuse(custom_filter={"leisure": True}) assert isinstance(gdf, GeoDataFrame)
def test_parsing_pois_with_defaults(helsinki_pbf, default_filter): from pyrosm import OSM from pyrosm.pois import get_poi_data from geopandas import GeoDataFrame import pyproj from pyrosm._arrays import concatenate_dicts_of_arrays osm = OSM(filepath=helsinki_pbf) osm._read_pbf() tags_as_columns = [] for k in default_filter.keys(): tags_as_columns += getattr(osm.conf.tags, k) nodes = concatenate_dicts_of_arrays(osm._nodes) gdf = get_poi_data(nodes, osm._node_coordinates, osm._way_records, osm._relations, tags_as_columns, default_filter, None) assert isinstance(gdf, GeoDataFrame) # Required keys required = ['id', 'geometry'] for col in required: assert col in gdf.columns # Test shape assert len(gdf) == 1782 assert gdf.crs == pyproj.CRS.from_epsg(4326)
def test_parsing_landuse_with_defaults(test_pbf): from pyrosm import OSM from pyrosm.landuse import get_landuse_data from geopandas import GeoDataFrame import pyproj from pyrosm._arrays import concatenate_dicts_of_arrays osm = OSM(filepath=test_pbf) osm._read_pbf() tags_as_columns = osm.conf.tags.landuse nodes = concatenate_dicts_of_arrays(osm._nodes) gdf = get_landuse_data(nodes, osm._node_coordinates, osm._way_records, osm._relations, tags_as_columns, None, None) assert isinstance(gdf, GeoDataFrame) # Required keys required = ['id', 'geometry'] for col in required: assert col in gdf.columns # Test shape assert len(gdf) == 50 assert gdf.crs == pyproj.CRS.from_epsg(4326)
def test_parse_buildings_with_bbox(test_pbf): from pyrosm import OSM from geopandas import GeoDataFrame from shapely.geometry import Polygon bounds = [26.94, 60.525, 26.96, 60.535] # Init with bounding box osm = OSM(filepath=test_pbf, bounding_box=bounds) gdf = osm.get_buildings() assert isinstance(gdf.loc[0, 'geometry'], Polygon) assert isinstance(gdf, GeoDataFrame) # Test shape assert gdf.shape == (569, 15) required_cols = [ 'building', 'addr:street', 'addr:postcode', 'addr:housenumber', 'opening_hours', 'id', 'timestamp', 'version', 'geometry', 'tags' ] for col in required_cols: assert col in gdf.columns # The total bounds of the result should not be larger than the filter # (allow some rounding error) result_bounds = gdf.total_bounds for coord1, coord2 in zip(bounds, result_bounds): assert round(coord2, 3) >= round(coord1, 3)
def test_getting_nodes_and_edges_with_bbox(test_pbf): from pyrosm import OSM from geopandas import GeoDataFrame from shapely.geometry import Point, LineString, box bounds = [26.94, 60.525, 26.96, 60.535] # Init with bounding box osm = OSM(filepath=test_pbf, bounding_box=bounds) nodes, edges = osm.get_network(nodes=True) nodes = nodes.reset_index(drop=True) assert isinstance(edges, GeoDataFrame) assert isinstance(edges.loc[0, 'geometry'], LineString) assert isinstance(nodes, GeoDataFrame) assert isinstance(nodes.loc[0, 'geometry'], Point) # Test shape assert edges.shape == (321, 23) assert nodes.shape == (317, 8) # Edges should have "u" and "v" columns required = ["u", "v", "length"] ecols = edges.columns for col in required: assert col in ecols # Nodes should have (at least) "id", "lat", and "lon" columns required = ["id", "lat", "lon"] ncols = nodes.columns for col in required: assert col in ncols
def test_getting_nodes_and_edges(test_pbf): from pyrosm import OSM from geopandas import GeoDataFrame from shapely.geometry import Point, LineString osm = OSM(filepath=test_pbf) nodes, edges = osm.get_network(nodes=True) nodes = nodes.reset_index(drop=True) assert isinstance(edges, GeoDataFrame) assert isinstance(edges.loc[0, 'geometry'], LineString) assert isinstance(nodes, GeoDataFrame) assert isinstance(nodes.loc[0, 'geometry'], Point) # Test shape assert edges.shape == (1215, 23) assert nodes.shape == (1147, 8) # Edges should have "u" and "v" columns required = ["u", "v", "length"] ecols = edges.columns for col in required: assert col in ecols # Nodes should have (at least) "id", "lat", and "lon" columns required = ["id", "lat", "lon"] ncols = nodes.columns for col in required: assert col in ncols
def test_parse_network_with_shapely_bbox(test_pbf): from pyrosm import OSM from geopandas import GeoDataFrame from shapely.geometry import MultiLineString, box bounds = box(*[26.94, 60.525, 26.96, 60.535]) # Init with bounding box osm = OSM(filepath=test_pbf, bounding_box=bounds) gdf = osm.get_network() assert isinstance(gdf.loc[0, 'geometry'], MultiLineString) assert isinstance(gdf, GeoDataFrame) # Test shape assert gdf.shape == (74, 21) required_cols = [ 'access', 'bridge', 'foot', 'highway', 'lanes', 'lit', 'maxspeed', 'name', 'oneway', 'ref', 'service', 'surface', 'id', 'geometry', 'tags', 'osm_type', 'length' ] for col in required_cols: assert col in gdf.columns # Should not include 'motorway' ways by default assert "motorway" not in gdf["highway"].unique() # The total bounds of the result should not be larger than the filter # (allow some rounding error) result_bounds = gdf.total_bounds for coord1, coord2 in zip(bounds.bounds, result_bounds): assert round(coord2, 3) >= round(coord1, 3)
def test_saving_network_to_shapefile(test_pbf, test_output_dir): import os from pyrosm import OSM import geopandas as gpd import shutil if not os.path.exists(test_output_dir): os.makedirs(test_output_dir) temp_path = os.path.join(test_output_dir, "pyrosm_test.shp") osm = OSM(filepath=test_pbf) gdf = osm.get_network(network_type="cycling") gdf.to_file(temp_path) # Ensure it can be read and matches with original one gdf2 = gpd.read_file(temp_path) cols = gdf.columns for col in cols: # Geometry col might contain different types of geoms # (due to saving MultiLineGeometries which might be read as a "single") if col == "geometry": continue assert gdf[col].tolist() == gdf2[col].tolist() # Clean up shutil.rmtree(test_output_dir)
def test_reading_with_custom_filters_with_excluding(test_pbf): from pyrosm import OSM from shapely.geometry import Polygon from geopandas import GeoDataFrame # Get first all data osm = OSM(filepath=test_pbf) gdf_all = osm.get_buildings() # Find out all 'building' tags cnts = gdf_all['building'].value_counts() n = len(gdf_all) for filter_, cnt in cnts.items(): # Use the custom filter filtered = osm.get_data_by_custom_criteria( custom_filter={'building': [filter_]}, filter_type="exclude") assert isinstance(filtered, GeoDataFrame) assert isinstance(filtered.loc[0, "geometry"], Polygon) assert len(filtered) == n - cnt # Now should not have the filter_ in buildings assert filter_ not in filtered["building"].unique() required_cols = ['building', 'id', 'timestamp', 'version', 'geometry'] for col in required_cols: assert col in filtered.columns
def test_parsing_basic_elements_from_pbf(test_pbf): from pyrosm import OSM import numpy as np osm = OSM(filepath=test_pbf) osm._read_pbf() nodes, ways = osm._nodes, osm._way_records assert isinstance(nodes, list) assert isinstance(ways, list) # Required node columns node_cols = [ 'id', 'version', 'changeset', 'timestamp', 'lon', 'lat', 'tags' ] for col in node_cols: for node_set in nodes: assert col in node_set.keys() # Nodes should be in numpy arrays assert isinstance(node_set[col], np.ndarray) # Check shape assert len(node_set[col]) in [6222, 8000] # Check ways shape assert len(ways) == 2653 for way in ways: assert isinstance(way, dict) # Required way columns way_cols = ['id', 'version', 'timestamp', 'nodes'] for way in ways: for col in way_cols: assert col in way.keys()
def test_using_rare_tag(helsinki_pbf): from pyrosm import OSM from geopandas import GeoDataFrame osm = OSM(filepath=helsinki_pbf) # There aren't any but should not raise an error still (#47) gdf = osm.get_pois({"park_ride": ["yes"]}) assert gdf is None
def test_invalid_filter_type(test_pbf): from pyrosm import OSM osm = OSM(filepath=test_pbf) try: osm.get_network("MyNetwork") except ValueError: pass except Exception as e: raise e
def compute_geometry(self, bbox, filename=None): """ Parse OSM file (area in bbox) to retrieve information about geometry. :param Sequence[float] bbox: area to be parsed in format (min_lon, min_lat, max_lon, max_lat) :param Optional[str] filename: map file in .osm.pbf format or None (map will be downloaded) """ assert len(bbox) == 4 self.bbox_size = (fabs(bbox[2] - bbox[0]), fabs(bbox[3] - bbox[1])) if filename is None: converter = OsmConverter(bbox) filename = converter.filename osm = OSM(filename, bounding_box=bbox) multipolygons = GeoDataFrame(columns=['tag', 'geometry']) natural = osm.get_natural() if natural is not None: natural = natural.loc[:, ['natural', 'geometry']].rename( columns={'natural': 'tag'}) self.polygons = self.polygons.append( natural.loc[natural.geometry.type == 'Polygon']) multipolygons = multipolygons.append( natural.loc[natural.geometry.type == 'MultiPolygon']) natural.drop(natural.index, inplace=True) landuse = osm.get_landuse() if landuse is not None: landuse = landuse.loc[:, ['landuse', 'geometry']].rename( columns={'landuse': 'tag'}) self.polygons = self.polygons.append( landuse.loc[landuse.geometry.type == 'Polygon']) multipolygons = multipolygons.append( landuse.loc[landuse.geometry.type == 'MultiPolygon']) landuse.drop(landuse.index, inplace=True) # splitting multipolygons to polygons for i in range(multipolygons.shape[0]): tag = multipolygons.tag.iloc[i] for polygon in multipolygons.geometry.iloc[i].geoms: self.polygons = self.polygons.append( { 'tag': tag, 'geometry': polygon }, ignore_index=True) roads = osm.get_network() if roads is not None: roads = self.__dissolve(roads[["highway", "geometry"]]) self.multilinestrings = GeoDataFrame( roads.loc[roads.geometry.type == 'MultiLineString']).rename( columns={'highway': 'tag'}) self.tag_value.eval(self.polygons, self.multilinestrings, "tag")
def test_pdgraph_connectivity(): from pyrosm.graphs import to_pandana import pandas as pd from pyrosm import OSM osm = OSM(get_data("helsinki_pbf")) nodes, edges = osm.get_network(nodes=True) # Prerare some test data for aggregations restaurants = osm.get_pois(custom_filter={"amenity": ["restaurant"]}) restaurants = restaurants.loc[restaurants["osm_type"] == "node"] restaurants["employee_cnt"] = 1 x = restaurants["lon"] y = restaurants["lat"] g = to_pandana(nodes, edges, retain_all=False) # Nodes and edges should be in DataFrames assert isinstance(g.nodes_df, pd.DataFrame) assert isinstance(g.edges_df, pd.DataFrame) # Precompute up to 1000 meters g.precompute(1000) # Link restaurants to graph g.set_pois("restaurants", 1000, 5, x, y) # Find the distance to nearest 5 restaurants from each node nearest_restaurants = g.nearest_pois(1000, "restaurants", num_pois=5) assert isinstance(nearest_restaurants, pd.DataFrame) assert nearest_restaurants.shape == (5750, 5) # Get closest node_ids for each restaurant node_ids = g.get_node_ids(x, y) assert isinstance(node_ids, pd.Series) assert node_ids.min() > 0 restaurants["node_id"] = node_ids # Attach employee counts to the graph g.set(node_ids, variable=restaurants.employee_cnt, name="employee_cnt") # Aggregate the number of employees within 500 meters from each node access = g.aggregate(500, type="sum", decay="linear", name="employee_cnt") assert isinstance(access, pd.Series) assert len(access) == 5750 # Test shortest path calculations shortest_distances = g.shortest_path_lengths(node_ids[0:100], node_ids[100:200], imp_name="length") assert isinstance(shortest_distances, list) assert len(shortest_distances) == 100 shortest_distances = pd.Series(shortest_distances) assert shortest_distances.min().round(0) == 22 assert shortest_distances.max().round(0) == 2453 assert shortest_distances.mean().round(0) == 856
def test_passing_incorrect_custom_filter(test_pbf): from pyrosm import OSM osm = OSM(filepath=test_pbf) try: osm.get_landuse(custom_filter="wrong") except ValueError as e: if "dictionary" in str(e): pass except Exception as e: raise e
def test_reading_with_custom_filters_selecting_specific_osm_element( helsinki_pbf): from pyrosm import OSM from geopandas import GeoDataFrame # Get first all data osm = OSM(filepath=helsinki_pbf) # Test getting only relations # --------------------------- filtered = osm.get_data_by_custom_criteria( custom_filter={'building': True}, filter_type="keep", keep_nodes=False, keep_ways=False, keep_relations=True) assert isinstance(filtered, GeoDataFrame) # Now should only have 'relation' osm_type assert len(filtered['osm_type'].unique()) == 1 assert filtered['osm_type'].unique()[0] == 'relation' assert len(filtered) == 66 # Test getting only ways # --------------------------- filtered = osm.get_data_by_custom_criteria( custom_filter={'building': True}, filter_type="keep", keep_nodes=False, keep_ways=True, keep_relations=False) assert isinstance(filtered, GeoDataFrame) # Now should only have 'way' osm_type assert len(filtered['osm_type'].unique()) == 1 assert filtered['osm_type'].unique()[0] == 'way' assert len(filtered) == 422 # Test getting only nodes # --------------------------- filtered = osm.get_data_by_custom_criteria( custom_filter={'building': True}, filter_type="keep", keep_nodes=True, keep_ways=False, keep_relations=False) assert isinstance(filtered, GeoDataFrame) # Now should only have 'node' osm_type assert len(filtered['osm_type'].unique()) == 1 assert filtered['osm_type'].unique()[0] == 'node' assert len(filtered) == 36
def test_reading_boundaries_with_defaults(helsinki_pbf): from pyrosm import OSM osm = OSM(helsinki_pbf) gdf = osm.get_boundaries() # Test shape assert gdf.shape == (8, 11) required_columns = ['name', 'admin_level', 'boundary', 'id', 'timestamp', 'version', 'changeset', 'geometry', 'tags', 'osm_type'] for col in required_columns: assert col in gdf.columns # osm_type should be 'relation' assert gdf.osm_type.unique()[0] == 'relation'
def test_nxgraph_immutable_counts(test_pbf): from geopandas import GeoDataFrame from pyrosm.graphs import to_networkx import networkx as nx from pyrosm import OSM osm = OSM(test_pbf) nodes, edges = osm.get_network(nodes=True) g = to_networkx(nodes, edges, retain_all=True) n_nodes = len(nodes) assert isinstance(g, nx.MultiDiGraph) # Check that the edge count matches assert nx.number_of_edges(g) == 2430 assert nx.number_of_nodes(g) == n_nodes
def test_using_two_level_custom_filter(helsinki_region_pbf): from pyrosm import OSM osm = OSM(filepath=helsinki_region_pbf) osm_keys = ["building"] custom_filter = {"amenity": ["school"]} gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter, osm_keys_to_keep=osm_keys) assert gdf.shape == (72, 25) # Now 'building' and 'amenity' should not have NaNs assert not gdf["building"].hasnans assert not gdf["amenity"].hasnans
def test_graph_exports_correct_number_of_nodes(test_pbf): """ Check issue: #97 """ from pyrosm import OSM osm = OSM(test_pbf) # NetworkX nodes, edges = osm.get_network(nodes=True) node_cnt = len(nodes) nxg = osm.to_graph(nodes, edges, graph_type="networkx", osmnx_compatible=False, retain_all=True) assert node_cnt == nxg.number_of_nodes()
def test_using_incorrect_filter_type(test_pbf): from pyrosm import OSM osm = OSM(filepath=test_pbf) custom_filter = {"building": ["retail"]} filter_type = "incorrect_test" # Test that passing incorrect data works as should try: gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter, filter_type=filter_type) except ValueError as e: if "should be either 'keep' or 'exclude'" in str(e): pass else: raise e
def test_adding_extra_attribute(helsinki_pbf): from pyrosm import OSM from geopandas import GeoDataFrame osm = OSM(filepath=helsinki_pbf) gdf = osm.get_boundaries() extra_col = "wikidata" extra = osm.get_boundaries(extra_attributes=[extra_col]) # The extra should have one additional column compared to the original one assert extra.shape[1] == gdf.shape[1]+1 # Should have same number of rows assert extra.shape[0] == gdf.shape[0] assert extra_col in extra.columns assert len(extra[extra_col].dropna().unique()) > 0 assert isinstance(gdf, GeoDataFrame)
def test_using_incorrect_osm_keys(test_pbf): from pyrosm import OSM osm = OSM(filepath=test_pbf) osm_keys = 1 custom_filter = {"building": ["retail"]} # Test that passing incorrect data works as should try: gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter, osm_keys_to_keep=osm_keys) except ValueError as e: if "'osm_keys_to_keep' -parameter should be of type str or list." in str( e): pass else: raise e
def test_passing_incorrect_filepath(): from pyrosm import OSM try: OSM(11) except ValueError: pass except Exception as e: raise e
def test_passing_wrong_file_format(): from pyrosm import OSM try: OSM("test.osm") except ValueError: pass except Exception as e: raise e
def test_using_incorrect_tags(test_pbf): from pyrosm import OSM osm = OSM(filepath=test_pbf) # Incorrect tags # -------------- tags_as_columns = [1] custom_filter = {"building": ["retail"]} # Test that passing incorrect data works as should try: gdf = osm.get_data_by_custom_criteria(custom_filter=custom_filter, tags_as_columns=tags_as_columns) except ValueError as e: if "All tags listed in 'tags_as_columns' should be strings" in str(e): pass else: raise e