def download_osm_features(place, osm_type, tag, values=None, by_poly=True, timeout=180): """ Download OSM features within given place :param place: single place name query (e.g: "London", "Bonn", etc.) :param osm_type: OSM geometry type str ('node', 'way', 'relation') :param tag: OSM tag to query :param values: str/list of possible values for the provided OSM tag :param by_poly: if True, retrieve features within polygon's list of coordinates, otherwise use bounds :param timeout: :return: """ gdf_geometry = geocode_to_gdf(place) try: geometry = gdf_geometry.geometry[0] except AttributeError: # Empty GeoDataFrame return None responses = [] if by_poly: polygon_coord_strs = get_polygons_coordinates(geometry) for poly_coord_str in polygon_coord_strs: query = ql_query(osm_type, tag, values, polygon_coord=poly_coord_str, timeout=timeout) responses.append(overpass_request(data={'data': query})) else: query = ql_query(osm_type, tag, values, bounds=geometry.bounds, timeout=timeout) responses.append(overpass_request(data={'data': query})) return responses
def build_index(input_filename="../data.yaml", index_filename="rtree"): with open(input_filename) as f: data = yaml.safe_load(f) rect_list = [None] * len(data) for i in range(len(data)): registrar_dict = clean_registrar_dict(data[i]) gdf = osmnx.geocode_to_gdf(registrar_dict["osm_name"]) if len(gdf) == 0: raise Exception("Couldn't find geometry for {}" .format(registrar_dict["osm_name"])) geometry = gdf.geometry[0] bbox_north = gdf.bbox_north[0] bbox_south = gdf.bbox_south[0] bbox_east = gdf.bbox_east[0] bbox_west = gdf.bbox_west[0] registrar_dict["id"] = i registrar_dict["geometry"] = geometry rect_list[i] = ( i, (bbox_west, bbox_south, bbox_east, bbox_north), registrar_dict ) if index_filename is None: return BetterPicklingIndex(rect_list) else: for filename in ("{}.idx".format(index_filename), "{}.dat".format(index_filename)): if os.path.isfile(filename): os.remove(filename) return BetterPicklingIndex(index_filename, rect_list)
def geography(): """Устанавливаем необходимые данные для работы с географией: - в БД заносим данные по муниципалитетам. - сохраняем карту в GEOJSON - сохраняем граф географических названий""" District.objects.all().delete() ArticleDistrict.objects.all().delete() # Качаем файл с регионами logger.info('Скачиваем данные по регионам России') file_path = os.path.join(settings.ML_MODELS, 'federal_subjects_index.csv') wget.download(federal_subjects_index_path, file_path) config = configparser.ConfigParser() config.read(settings.CONFIG_INI_PATH) federal_subjects_index = pd.read_csv(file_path) region = config['REGION']['NAME'] if region not in federal_subjects_index.name.to_list(): logger.error('Данных для региона с данным кодом не найдено. \ Проверьте правильность написания региона, либо обратитесь за \ помощью к разработчику: https://tlgg.ru/blanchefort') return False idx = int( federal_subjects_index[federal_subjects_index.name == region].idx) _, disctict_ids = osm_get_info(idx) district_names = [] extended_names = [] for i in disctict_ids: dist_name, _ = osm_get_info(i) district_names.append(dist_name) extended_names.append(dist_name + ', ' + region + ', Россия') logger.info('Сохраняем муниципалитеты в базу данных') District.objects.create(name='region') for dist_name in district_names: District.objects.create(name=dist_name) logger.info('Формируем карту всего региона') region_geo = ox.geocode_to_gdf(extended_names) region_geo['russian_name'] = district_names file_path = os.path.join(settings.ML_MODELS, 'region.geojson') region_geo.to_file(file_path, driver='GeoJSON') # Граф адресов региона # TODO: Пересобрать граф fname = f"fias_{config['REGION']['CODE']}" selected_file = None for flink in freyr_region_graphs: if fname in flink: selected_file = flink break if selected_file is None: logger.error('Данных для региона с данным кодом не найдено. \ Проверьте правильность написания кода региона, \ либо обратитесь за помощью к разработчику: https://tlgg.ru/blanchefort' ) return False logger.info('Сохраняем граф адресов региона') file_path = os.path.join(settings.ML_MODELS, 'geograph.edgelist') wget.download(selected_file, file_path) logger.info('Геоданные для региона полностью собраны!') return True
def get_boundries_osmnx(loc_name): poly = ox.geocode_to_gdf(loc_name) boundry = np.array(poly.geometry.exterior[0]).T bbox = np.array( poly[["bbox_north", "bbox_south", "bbox_east", "bbox_west"]])[0] boundry_line = Line(loc_name, boundry, color="r", type="boundry") return boundry_line, bbox
def get_osm_polygon(self, name, select=1, buffer_dist=20): log.info('searching for query=%s, which_result=%s', name, select) gdf = osmnx.geocode_to_gdf(name, buffer_dist=buffer_dist, which_result=select) log.info('gdf=%s', gdf) polygon = gdf.geometry.values[0] return polygon
def main(tiles_shp_filepath, nominatim_query, output_filepath, op): logger = logging.getLogger(__name__) tiles_gdf = gpd.read_file(tiles_shp_filepath) # get boundary logger.info("Querying Nominatim for boundaries for `%s`", nominatim_query) geom = ox.geocode_to_gdf(nominatim_query)["geometry"].to_crs( tiles_gdf.crs).iloc[0] # get the filename of the tiles whose geometry is within with the # municipal boundaries op_method = getattr(tiles_gdf["geometry"], op) tile_filename_ser = tiles_gdf[op_method(geom)]["location"] logger.info("Found %d intersecting tiles", len(tile_filename_ser)) tile_filename_ser.to_csv(output_filepath, header=False) logger.info("Dumped list of intersecting tiles to %s", output_filepath)
def main(): with open("data.yaml") as f: data = yaml.safe_load(f) names = [registrar_dict["osm_name"] for registrar_dict in data] gdf = osmnx.geocode_to_gdf(names) fig, ax = plt.subplots() ax.set_aspect("equal") # downloaded from TIGER states = geopandas.read_file("tiger_files/tl_2019_us_state.shp") states.plot(ax=ax, color="white", edgecolor="gray") gdf.plot(ax=ax) plt.show()
def setup(db_name: str = DB_NAME): db = db_connection(db_name) # Get the boundary of Downingtown PA gdf_bounds = ox.geocode_to_gdf({"city": "Downingtown", "state": "PA"}) gdf_bounds = gdf_bounds.to_crs("EPSG:26918") one_mile_in_meters = 1609.34 five_miles_in_meters = one_mile_in_meters * 5 gdf_bounds_buffer = gdf_bounds.copy() gdf_bounds_buffer["geometry"] = gdf_bounds.geometry.buffer( five_miles_in_meters) # Get all OSM road features polygon = gdf_bounds_buffer.to_crs("EPSG:4326").geometry[0] g = ox.graph_from_polygon(polygon) g = g.to_undirected() nodes, edges = ox.graph_to_gdfs(g) db.import_geodataframe(gdf_bounds, "boundary") db.import_geodataframe(gdf_bounds_buffer, "boundary_5mi_buffer") db.import_geodataframe(edges, "osm_edges") db.import_geodataframe(nodes, "osm_nodes") # Reproject from 4326 to 26918 to facilitate analysis queries db.table_reproject_spatial_data("osm_edges", 4326, 26918, "LINESTRING") db.table_reproject_spatial_data("osm_nodes", 4326, 26918, "POINT") # Make a uuid column make_id_query = """ CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; alter table osm_edges add column osmuuid uuid; update osm_edges set osmuuid = uuid_generate_v4(); """ db.execute(make_id_query)
def clip_by_nominatim(ldf, query, **geocode_to_gdf_kws): if ox: try: geometry = ox.geocode_to_gdf( query, **geocode_to_gdf_kws)["geometry"].iloc[0] return clip_by_geometry(ldf, geometry, geometry_crs=ox.settings.default_crs) except KeyError: logging.warning( "OSM returned no results (or fewer than which_result) for " 'query "{}".\n Returning empty SLSDataFrame'.format(query)) return ldf[0:0] else: # warn about missing dependences logging.warning( "The clip_by_nominatim module requires the osmnx package. " "The easiest wayto install it is as in\n" "conda install -c conda-forge osmnx" "See https://github.com/gboeing/osmnx for more information " "about installing osmnx")
def load_data_from_city(self, city_name: str, city_elements: dict = None, city_limits: bool = True): """ Creates info for plotting. cycleways: (networkx.MultiDiGraph) cycleways info from get_city func, or osmnx.graph_from_place roads: (networkx.MultiDiGraph) road info from get_city func, or ox.graph_from_place city_area: (geopandas.geodataframe.GeoDataFrame) city area from get_city func, or osmnx.geocode_to_gdf green water buildings """ # print(f"Loading data for {city_name}. May take a few minutes.") self.city_name = city_name self.city_limits = city_limits self.query_type = 'city' self._reset_data() if city_elements is None: city_elements = {} city_dict_copy = self.city_dict.copy() self.city_dict.update(city_elements) self.city_area = ox.geocode_to_gdf(self.city_name) self.west, self.south, self.east, self.north = self.city_area.total_bounds if self.city_limits is True: self._get_city_data_within_city_limits() else: self._get_city_data_within_rectangle() # reset self.city_dict to default self.city_dict = city_dict_copy.copy()
def osm_gdf_from_geocode( query, which_result=None, by_osmid=False, buffer_dist=None, ): """Retrieves place(s) by name or ID from the Nominatim API as a GeoDataFrame. Args: query (str | dict | list): Query string(s) or structured dict(s) to geocode. which_result (INT, optional): Which geocoding result to use. if None, auto-select the first (Multi)Polygon or raise an error if OSM doesn't return one. to get the top match regardless of geometry type, set which_result=1. Defaults to None. by_osmid (bool, optional): If True, handle query as an OSM ID for lookup rather than text search. Defaults to False. buffer_dist (float, optional): Distance to buffer around the place geometry, in meters. Defaults to None. Returns: GeoDataFrame: A GeoPandas GeoDataFrame. """ check_package("osmnx", "https://osmnx.readthedocs.io/en/stable/") import osmnx as ox gdf = ox.geocode_to_gdf(query, which_result, by_osmid, buffer_dist) return gdf
def poly_from_osm_cityid(osmid): #return shapely polygon admin_area = ox.geocode_to_gdf("R" + str(osmid), by_osmid=True) boundaries = admin_area.geometry[0] name = admin_area.display_name[0] return boundaries, name
from pandas import read_csv from sklearn.impute import SimpleImputer import gdal import math import inspect from sklearn.ensemble import RandomForestRegressor import lightgbm as lgb import rasterio import rasterio.mask import geopandas as gpd filedir = '/data/projects/mobiair' ras_rootdir = '/data/gghdc/gap/2021/output/areas/' import osmnx as ox wuhan = ox.geocode_to_gdf('武汉, China') utrecht = ox.geocode_to_gdf('Utrecht') #os.getcwd() # if we just do random sampling in space-time. We have a quite high R2. # But it is because if the Location 1 t1 is in the training, then the L1 t2 is going to be small. So it is not a reliable accuracy assessment. # Most importantly, for the location we want to predict, we dont know the entire time series. spreadurl = 'https://raw.githubusercontent.com/mengluchu/mobiair/master/mapping_data/DENL17_hr_spread.csv' res = 25 ap = pd.read_csv(spreadurl) #if for only 100m if res == 100: ap = ap.drop(ap.filter(regex='_25$|_50$').columns, axis=1) ap.shape
def test_geocode_to_gdf(): # test loading spatial boundaries and plotting city = ox.geocode_to_gdf(place1, which_result=1, buffer_dist=100) city_projected = ox.project_gdf(city, to_crs="epsg:3395")
def main(intersecting_tiles_csv_filepath, tiles_dir, output_filepath, resample_factor, keep_raw, raw_dir, nominatim_query, exclude_nominatim_query, crs): logger = logging.getLogger(__name__) if resample_factor is None: resample_factor = RESAMPLE_FACTOR if raw_dir is None: raw_dir = 'data/raw/tiles' if not path.exists(raw_dir): os.mkdir(raw_dir) tile_filenames = pd.read_csv(intersecting_tiles_csv_filepath, index_col=0, header=None).iloc[:, 0] output_tiles = [] for tile_filename in tqdm.tqdm(tile_filenames): raw_tile_filepath = path.join(raw_dir, tile_filename) tile_basename, tile_ext = path.splitext(tile_filename) if not path.exists(raw_tile_filepath): request.urlretrieve(BASE_URI + tile_filename, raw_tile_filepath) with rio.open(raw_tile_filepath) as src: interim_width = src.width // resample_factor interim_height = src.height // resample_factor data = src.read(out_shape=(src.count, interim_height, interim_width), resampling=Resampling.average) t = src.transform interim_transform = affine.Affine(t.a * resample_factor, t.b, t.c, t.d, t.e * resample_factor, t.f) for i, (dst_window, dst_transform) in enumerate( _get_window_transform(interim_width, interim_height, interim_transform, NUM_TILE_SUBDIVISIONS)): profile = src.profile.copy() profile.update(width=dst_window.width, height=dst_window.height, transform=dst_transform, crs=settings.CRS) row_off, col_off = dst_window.row_off, dst_window.col_off tile_filepath = _get_output_tile_filepath( tiles_dir, tile_basename, i, tile_ext) with rio.open(tile_filepath, 'w', **profile) as dst: for channel in range(src.count): dst.write( data[channel][row_off:row_off + dst_window.height, col_off:col_off + dst_window.width], channel + 1) output_tiles.append(tile_filepath) # if raw tiles are not to be preserved, remove them at the end of each # iteration (with original 10cm resolution, they can take a lot of # local storage) if not keep_raw: os.remove(raw_tile_filepath) # if raw tiles are not to be preserved, remove the folder at the end # if not keep_raw: # shutil.rmtree(raw_dir) if nominatim_query: # get only the tiles that intersect the extent of the result of the # nominatim query logger.info("Querying Nominatim for boundaries for `%s`", nominatim_query) gser = ox.geocode_to_gdf(nominatim_query)['geometry'] if crs: pass else: crs = settings.CRS geom = gser.to_crs(crs).iloc[0] if exclude_nominatim_query: logger.info("Querying Nominatim for boundaries for `%s`", exclude_nominatim_query) exclude_geom = ox.geocode_to_gdf( exclude_nominatim_query)['geometry'].to_crs(crs).iloc[0] geom = geom.difference(exclude_geom) def bbox_geom_from_tile(tile_filepath): with rio.open(tile_filepath) as src: return geometry.box(*src.bounds) tiles_gdf = gpd.GeoDataFrame(output_tiles, columns=['img_filepath'], geometry=list( map(bbox_geom_from_tile, output_tiles)), crs=crs) output_tiles_ser = gpd.sjoin(tiles_gdf, gpd.GeoDataFrame(geometry=[geom], crs=crs), op='intersects', how='inner')['img_filepath'] tiles_to_rm_ser = tiles_gdf['img_filepath'].loc[ ~tiles_gdf.index.isin(output_tiles_ser.index)] for img_filepath in tiles_to_rm_ser: os.remove(img_filepath) logger.info( "removed %d tiles that do not intersect with the extent of %s", len(tiles_to_rm_ser), nominatim_query) else: # just create a pandas series anyway to use the `to_csv` method below output_tiles_ser = pd.Series(output_tiles) # logger.info("Successfully dumped downscaled tiles to %s", tiles_dir) output_tiles_ser.to_csv(output_filepath, header=False) logger.info("Dumped list of downscaled tiles to %s", output_filepath)
input_key = str( input( 'Input category key:\n- for list of keys input "options"\n- for networks input "network"\n- for complex input, input "complex"\n' )) if input_key == 'complex': input_c = str( input( 'Complex input example:\n{"amenity":["pub","restaurant","hospital"],"tourism":"hotel"}\nFor complex inputs for network categories, use network option for category key input\n' )) input_c = json.loads(input_c) if input_choice == 1: place_name = (str( input('Input place name: \n (for example Winchester USA):\n'))) place = ox.geocode_to_gdf(place_name) #place.plot() else: bbox = [] put = input( 'input coordinates of a bbox in format [left,top,right,bottom]:\n') spliter = put.split(',') for i in spliter: bbox.append(float(i)) #network section if input_key == 'network': ntype = str(input('Input network type:\nfor example "drive"')) customf = str( input( 'Input custom filter parameters\nfor example ["highway"="motorway"] or\n["highway"~"cycleway"]["bicycle"!~"no"]\n' ))
def bikeability(place, scale='city', data=False): ''' A function that would calculate bikeability value for a given place of interest. Parameters place: the place of interest e.g "Freiburg, Germany" datatype = string Scale: can be either "grid" or "city" default is "city" datatype = string data: if True output returns a dataframe along with the standard dictionary output, datatype = boolean Returns the average_index for bikeability(number between 0 and 100) and some summary statistics of index, datatype = dictionary or dataframe and dictionary if data is set as True. Usage example a = bikeability('Freiburg, Germany', scale ='grid', data = False) ... for grid scale approach a,b = bikeability('Freiburg, Germany', scale ='grid', data = True) a =bikeability('Freiburg, Germany', scale = 'city')... for city scale approach a,b =bikeability('Freiburg, Germany', scale = 'city', data = True) ''' if scale != 'grid': place = place # Create and set osmnx to select important tags useful_tags_way = [ 'bridge', 'length', 'oneway', 'lanes', 'ref', 'name', 'highway', 'maxspeed', 'service', 'access', 'area', 'cycleway', 'landuse', 'width', 'est_width', 'junction', 'surface' ] ox.utils.config(useful_tags_way=useful_tags_way ) # = useful_tags_path change here1 # Create basic city graph place_name = place graph = ox.graph_from_place(place_name, network_type='all', retain_all=True) # # Calculate and add edge closeness centrality(connectedness) centrality = nx.degree_centrality(nx.line_graph(graph)) nx.set_edge_attributes(graph, centrality, 'centrality') # Extract nodes and edges to geopandas from graph #edges = ox.graph_to_gdfs(graph, nodes=False) try: edges = ox.graph_to_gdfs(graph, nodes=False) pass except Exception as e: print('{} at {}'.format(e, place)) # Remove unwanted columns and add weight variable cols = [ 'highway', 'cycleway', 'surface', 'maxspeed', 'length', 'lanes', 'oneway', 'width', 'centrality', 'geometry' ] try: df = edges.loc[:, cols] except KeyError as e: print(e) # Set appropriate data types df['maxspeed'] = pd.to_numeric(df['maxspeed'], errors='coerce', downcast='integer') df['lanes'] = pd.to_numeric(df['lanes'], errors='coerce', downcast='integer') df['width'] = pd.to_numeric(df['width'], errors='coerce', downcast='unsigned') df['highway'] = df['highway'].astype(str) df['surface'] = df['surface'].astype(str) df['oneway'] = df['oneway'].astype(int) df['cycleway'] = df['cycleway'].astype(str) # Dataframe cleaning and preprocessing # highway column df['highway'] = df['highway'].str.replace(r'[^\w\s-]', '', regex=True) highway_cols = (pd.DataFrame(df.highway.str.split(' ', expand=True))) highway_map = ({ 'service': 6, 'None': np.nan, 'residential': 8, 'unclassified': 7, 'footway': 7, 'track': 5, 'tertiary': 6, 'living_street': 9, 'path': 5, 'pedestrian': 7, 'secondary': 5, 'primary': 2, 'steps': 2, 'cycleway': 10, 'rest_area': 5, 'primary_link': 2, 'ferry': 1, 'construction': 2, 'byway': 8, 'bridleway': 6, 'trunk': 2, 'trunk_link': 2, 'motorway': 1, 'motorway_link': 1 }) for column in highway_cols: highway_cols[column] = highway_cols[column].map(highway_map) highway_cols['mean'] = np.nanmean(highway_cols, axis=1) df['highway'] = round(highway_cols['mean']) # cycleway column df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]', '', regex=True) cycleway_cols = (pd.DataFrame(df.cycleway.str.split(' ', expand=True))) cycleway_map = ({ 'opposite': 9, 'lane': 9, 'share_busway': 8, 'shared_lane': 8, 'segregated': 10, 'no': 1, 'opposite_lane': 9, 'crossing': 10, 'track': 10, 'designated': 10, 'opposite_share_busway': 8, 'seperate': 10, 'shoulder': 8 }) for column in cycleway_cols: cycleway_cols[column] = cycleway_cols[column].map(cycleway_map) cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1) df['cycleway'] = round(cycleway_cols['mean']) # surface column df['surface'] = df['surface'].str.replace(r'[^\w\s-]', '', regex=True) surface_cols = (pd.DataFrame(df.surface.str.split(' ', expand=True))) surface_map = ({ 'asphalt': 10, 'paved': 10, 'cobblestone': 5, 'fine_gravel': 9, 'ground': 7, 'sett': 6, 'gravel': 7, 'metal': 6, 'compacted': 10, 'dirt': 6, 'paving_stones': 7, 'grass_paver': 5, 'unpaved': 8, 'pebblestone': 9, 'concrete': 10, 'grass': 5, 'mud': 1 }) for column in surface_cols: surface_cols[column] = surface_cols[column].map(surface_map) surface_cols['mean'] = np.nanmean(surface_cols, axis=1) df['surface'] = round(surface_cols['mean']) # maxspeed column df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110 df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20 maxspeed_map = ({ 20: 10, 30: 9, 40: 8, 50: 7, 60: 6, 70: 5, 80: 4, 90: 3, 100: 2, 110: 1 }) df['maxspeed'] = df['maxspeed'].map(maxspeed_map) # lanes column df.loc[df['lanes'] > 8, 'lanes'] = 8 lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1} df['lanes'] = df['lanes'].map(lanes_map) # oneway column oneway_map = {0: 5, 1: 10, -1: 5} df['oneway'] = df['oneway'].map(oneway_map) # width column df.loc[df['width'] < 2, 'width'] = 1 df.loc[df['width'] > 6, 'width'] = 6 df['width'] = round(df['width']) width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10}) df['width'] = df['width'].map(width_map) # normalize centrality column (between o and 10) df['centrality'] = ( (df['centrality'] - np.min(df['centrality'])) / (np.max(df['centrality']) - np.min(df['centrality']))) * 10 # Switch to new df for calculation d_frame = df.copy(deep=True) # Multiply variables by weights d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534 d_frame['surface'] = d_frame['surface'] * 0.108695652 d_frame['highway'] = d_frame['highway'] * 0.167701863 d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994 d_frame['lanes'] = d_frame['lanes'] * 0.108695652 d_frame['centrality'] = d_frame['centrality'] * 0.071428571 d_frame['width'] = d_frame['width'] * 0.086956522 d_frame['oneway'] = d_frame['oneway'] * 0.059006211 # Normalize variables between 0 and 1 d_frame['index'] = (np.nanmean(d_frame[[ 'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width', 'oneway', 'centrality' ]], axis=1, dtype='float64')) * 80 # Final statistics index of city mean_index = np.average(d_frame['index'], weights=d_frame['length']) max_index = d_frame['index'].max() min_index = d_frame['index'].min() std_index = d_frame['index'].std() # Plot result #d_frame.plot(column = 'index',legend = True) # Result dictionary result = ({ 'place': place, 'average_index': mean_index, 'max_index': max_index, 'min_index': min_index, 'std_index': std_index }) else: #Get bounding box for place place_name = place area = ox.geocode_to_gdf(place_name) # graph first xmin, ymin, xmax, ymax = area.total_bounds #divide into grids x = lon, y = lat height = 0.041667 width = 0.041667 rows = int(np.ceil((ymax - ymin) / height)) cols = int(np.ceil((xmax - xmin) / width)) XleftOrigin = xmin XrightOrigin = xmin + width YtopOrigin = ymax YbottomOrigin = ymax - height polygons = [] for i in range(cols): Ytop = YtopOrigin Ybottom = YbottomOrigin for j in range(rows): polygons.append( Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop), (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)])) Ytop = Ytop - height Ybottom = Ybottom - height XleftOrigin = XleftOrigin + width XrightOrigin = XrightOrigin + width #Ensure the grids are within the polygon grid_list = [] for i in range(len(polygons)): p = Point(polygons[i].centroid.x, polygons[i].centroid.y) geome = shape(polygons[i]) q = gpd.GeoDataFrame({'geometry': geome}, index=[0]) q = q.set_crs("EPSG:4326") if area.geometry.iloc[0].contains(polygons[i]) == True: grid_list.append(q) #elif p.within(area.geometry.iloc[0]) == True and area.geometry.iloc[0].contains(polygons[i])== False: elif area.geometry.iloc[0].intersects(polygons[i]): #grid_list.append(polygons[i]) clip = gpd.clip(area, q) grid_list.append(clip) #Initialize important variables dflist = [] exception_grids = [] dfs = [] for i in tqdm(range(len(grid_list))): #graph useful_tags_way = [ 'bridge', 'length', 'oneway', 'lanes', 'ref', 'name', 'highway', 'maxspeed', 'surface', 'area', 'landuse', 'width', 'est_width', 'junction', 'cycleway' ] ox.utils.config(useful_tags_way=useful_tags_way ) # = =useful_tags_path change 2 try: box_graph = ox.graph_from_polygon( grid_list[i].geometry.iloc[0], network_type='bike', retain_all=True) pass except Exception as e: print('{} at grid {}, skip grid'.format(e, i + 1)) exception_grids.append(i + 1) continue # Calculate and add edge closeness centrality(connectedness) centrality = nx.degree_centrality(nx.line_graph(box_graph)) nx.set_edge_attributes(box_graph, centrality, 'centrality') # Extract nodes and edges to geopandas from graph try: edges = ox.graph_to_gdfs(box_graph, nodes=False) pass except Exception as e: print('{} at grid {}, skip grid'.format(e, i + 1)) exception_grids.append(i + 1) continue # Select only the important variables cols = [ 'highway', 'cycleway', 'surface', 'maxspeed', 'length', 'lanes', 'oneway', 'width', 'centrality', 'geometry' ] try: df = edges.loc[:, cols] pass except KeyError as e: print('{} at grid {}, skip grid'.format(e, i + 1)) exception_grids.append(i + 1) continue # Set appropriate data types df['maxspeed'] = pd.to_numeric(df['maxspeed'], errors='coerce', downcast='integer') df['lanes'] = pd.to_numeric(df['lanes'], errors='coerce', downcast='integer') df['width'] = pd.to_numeric(df['width'], errors='coerce', downcast='unsigned') df['highway'] = df['highway'].astype(str) df['surface'] = df['surface'].astype(str) df['oneway'] = df['oneway'].astype(int) df['cycleway'] = df['cycleway'].astype(str) # Dataframe cleaning and preprocessing # highway column df['highway'] = df['highway'].str.replace(r'[^\w\s-]', '', regex=True) highway_cols = (pd.DataFrame(df.highway.str.split(' ', expand=True))) highway_map = ({ 'service': 6, 'None': np.nan, 'residential': 8, 'unclassified': 7, 'footway': 7, 'track': 5, 'tertiary_link': 6, 'tertiary': 6, 'living_street': 9, 'path': 5, 'pedestrian': 7, 'secondary': 5, 'secondary_link': 5, 'primary': 2, 'steps': 2, 'cycleway': 10, 'rest_area': 5, 'primary_link': 2, 'ferry': 1, 'construction': 2, 'byway': 8, 'bridleway': 6, 'trunk': 2, 'trunk_link': 2, 'motorway': 1, 'motorway_link': 1 }) for column in highway_cols: highway_cols[column] = highway_cols[column].map(highway_map) highway_cols['mean'] = np.nanmean(highway_cols, axis=1) df['highway'] = round(highway_cols['mean']) #cycleway column df['cycleway'] = df['cycleway'].str.replace(r'[^\w\s-]', '', regex=True) cycleway_cols = (pd.DataFrame( df.cycleway.str.split(' ', expand=True))) cycleway_map = ({ 'opposite': 9, 'lane': 9, 'share_busway': 8, 'shared_lane': 8, 'segregated': 10, 'no': 1, 'opposite_lane': 9, 'crossing': 10, 'track': 10, 'designated': 10, 'opposite_share_busway': 8, 'seperate': 10, 'shoulder': 8 }) for column in cycleway_cols: cycleway_cols[column] = cycleway_cols[column].map(cycleway_map) cycleway_cols['mean'] = np.nanmean(cycleway_cols, axis=1) df['cycleway'] = round(cycleway_cols['mean']) # surface column df['surface'] = df['surface'].str.replace(r'[^\w\s-]', '', regex=True) #'' surface_cols = (pd.DataFrame(df.surface.str.split(' ', expand=True))) surface_map = ({ 'asphalt': 10, 'paved': 10, 'cobblestone': 3, 'fine_gravel': 9, 'ground': 6, 'sett': 4, 'gravel': 7, 'metal': 7, 'compacted': 9, 'dirt': 6, 'paving_stones': 7, 'grass_paver': 4, 'unpaved': 7, 'pebblestone': 7, 'concrete': 10, 'grass': 5, 'mud': 2, 'sand': 5, 'wood': 4, 'earth': 6, 'woodchips': 3, 'snow': 2, 'ice': 2, 'salt': 2 }) for column in surface_cols: surface_cols[column] = surface_cols[column].map(surface_map) surface_cols['mean'] = np.nanmean(surface_cols, axis=1) df['surface'] = round(surface_cols['mean']) # maxspeed column df.loc[df['maxspeed'] > 110, 'maxspeed'] = 110 df.loc[df['maxspeed'] < 20, 'maxspeed'] = 20 df['maxspeed'] = round(df['maxspeed'], -1) maxspeed_map = ({ 20: 10, 30: 9, 40: 8, 50: 7, 60: 6, 70: 5, 80: 4, 90: 3, 100: 2, 110: 1 }) df['maxspeed'] = df['maxspeed'].map(maxspeed_map) # lanes column df.loc[df['lanes'] > 8, 'lanes'] = 8 lanes_map = {1: 10, 2: 9, 3: 5, 4: 5, 5: 3, 6: 3, 7: 2, 8: 1} df['lanes'] = df['lanes'].map(lanes_map) # oneway column oneway_map = {0: 5, 1: 10, -1: 5} df['oneway'] = df['oneway'].map(oneway_map) # width column df.loc[df['width'] < 2, 'width'] = 1 df.loc[df['width'] > 6, 'width'] = 6 df['width'] = round(df['width']) width_map = ({1: 1, 2: 2, 3: 5, 4: 7, 5: 9, 6: 10}) df['width'] = df['width'].map(width_map) # normalize centrality column (between o and 10) df['centrality'] = ( (df['centrality'] - np.min(df['centrality'])) / (np.max(df['centrality']) - np.min(df['centrality']))) * 10 #Switch to new df for calculation d_frame = df.copy(deep=True) # Multiply variables by weights d_frame['cycleway'] = d_frame['cycleway'] * 0.208074534 d_frame['surface'] = d_frame['surface'] * 0.108695652 d_frame['highway'] = d_frame['highway'] * 0.167701863 d_frame['maxspeed'] = d_frame['maxspeed'] * 0.189440994 d_frame['lanes'] = d_frame['lanes'] * 0.108695652 d_frame['centrality'] = d_frame['centrality'] * 0.071428571 d_frame['width'] = d_frame['width'] * 0.086956522 d_frame['oneway'] = d_frame['oneway'] * 0.059006211 d_frame['index'] = (np.nanmean(d_frame[[ 'cycleway', 'highway', 'surface', 'maxspeed', 'lanes', 'width', 'oneway', 'centrality' ]], axis=1, dtype='float64')) * 80 d_frame['grid_index'] = np.average(d_frame['index'], weights=d_frame['length']) dflist.append(d_frame) dfs.append(df) #Final statistics index of city in dictionary df_indexes = pd.concat(dflist) result = ({ 'place': place_name, 'average_index': np.average(df_indexes['index'], weights=df_indexes['length']), 'max_index': df_indexes['index'].max(), 'min_index': df_indexes['index'].min(), 'std_index': df_indexes['index'].std(), 'grids': len(grid_list), 'nsegments': len(df_indexes), 'unused_grids': len(exception_grids) }) if data == False: return (result) else: return (d_frame, result)
Remember, our variogram defines the spatial autocorrelation of the data (i.e., how the locations in our region affect one another). Once we have a variogram model, we can use it to estimate the weights in our kriging model. I won't go into detail on how this is done, but there is a neat walkthrough in the [scikit-gstat docs here](https://scikit-gstat.readthedocs.io/en/latest/userguide/kriging.html). Anyway, I'll briefly use the [pykrige](https://github.com/GeoStat-Framework/PyKrige) library to do some kriging so you can get an idea of what it looks like: krig = OrdinaryKriging(x=gpm25["Easting"], y=gpm25["Northing"], z=gpm25["PM_25"], variogram_model="spherical") z, ss = krig.execute("grid", gridx, gridy) plt.imshow(z); Now let's convert our raster back to polygons so we can map it. I'm also going to load in a polygon of BC using `osmnx` to clip my data so it fits nicely on my map this time: polygons, values = pixel2poly(gridx, gridy, z, resolution) pm25_model = (gpd.GeoDataFrame({"PM_25_modelled": values}, geometry=polygons, crs="EPSG:3347") .to_crs("EPSG:4326") ) bc = ox.geocode_to_gdf("British Columbia, Canada") pm25_model = gpd.clip(pm25_model, bc) fig = px.choropleth_mapbox(pm25_model, geojson=pm25_model.geometry, locations=pm25_model.index, color="PM_25_modelled", color_continuous_scale="RdYlGn_r", center={"lat": 52.261, "lon": -123.246}, zoom=3.5, mapbox_style="carto-positron") fig.update_layout(margin=dict(l=0, r=0, t=30, b=10)) fig.update_traces(marker_line_width=0) I used an "ordinary kriging" interpolation above which is the simplest implementation of kriging. The are many other forms of kriging too that can account for underlying trends in the data ("universal kriging"), or even use a regression or classification model to make use of additional explanatory variables. `pykrige` [supports most variations](https://geostat-framework.readthedocs.io/projects/pykrige/en/stable/examples/index.html). In particular for the latter, `pykrige` can accept `sklearn` models which is useful! ### 2.3. Areal interpolation Areal interpolation is concerned with mapping data from one polygonal representation to another. Imagine I want to map the air pollution polygons I just made to FSA polygons (recall FSA is "forward sortation area", which are groups of postcodes). The most intuitive way to do this is to distribute values based on area proportions, hence "areal interpolation".
def get_perimeter(query, by_osmid=False): return ox.geocode_to_gdf(query, by_osmid=by_osmid)
import pandas as pd import geopandas as gpd import numpy as np import os from shapely.geometry import Point from rasterstats import zonal_stats, point_query import pyproj from shapely.ops import transform import modelutils as m import rasterio from matplotlib import pyplot as plt from rasterio.plot import show_hist from math import modf import osmnx as ox from scipy import signal wuhan = ox.geocode_to_gdf('武汉, China') utrecht = ox.geocode_to_gdf('Utrecht province') utrecht.plot() filedir = "/Users/menglu/Documents/GitHub/mobiair/" preddir =f"{filedir}prediction/" savedir = "/Volumes/Meng_Mac/mobi_result/Uni/" # each profile a savedir. def wgs2laea (p): wgs84 = pyproj.CRS('EPSG:4326') rd= pyproj.CRS('+proj=laea +lat_0=51 +lon_0=9.5 +x_0=0 +y_0=0 +ellps=GRS80 +units=m +no_defs') project = pyproj.Transformer.from_crs(wgs84, rd, always_xy=True) p=transform(project.transform, p) return (p) def plot_raster ():
cities.plot(ax=ax, markersize=180, edgecolor="0.2") plt.title("Big cities in B.C."); ### 2.4. Loading from Open Street Map So we can read vector data from a file and we can create our own, but let's see what real power feels like! ![](img/real-power.gif) Often it will be helpful to obtain data from an online source using an API. The most relevant "online source" here is [OpenStreetMap (OSM)](https://www.openstreetmap.org/), which is like the Wikipedia of geospatial data (think world map, road networks, bike networks, buidling heights, sandy coastlines, you name it). There are plenty of Python APIs for getting data from OSM but by far the best I've come across is [osmnx](https://github.com/gboeing/osmnx/tree/master): ```console conda install -c conda-forge osmnx ``` `osmnx` provides an easy-to-use API to query OSM data. I usually import it with the alias `ox`. Let's get a polygon of Vancouver now using the function `ox.geocode_to_gdf()`: >By default `osmnx` caches responses locally in a folder `cache` so that you can quickly access data again without needing to call the API. You can turn this behaviour off if you wish. import osmnx as ox vancouver = ox.geocode_to_gdf("Vancouver, Canada") vancouver.plot(edgecolor="0.2") plt.title("Vancouver"); It's certainly Vancouver, but it looks a bit blocky. It might be a bit low resolution, or someone just decided this was the best way to encapsulate "Vancouver" on OSM. Either way, let's use this polygon to "clip" a section of our higher-resolution provinces data which we downloaded earlier (and which is the official shapefile downloaded from [statcan](https://www12.statcan.gc.ca/census-recensement/2011/geo/bound-limit/bound-limit-2016-eng.cfm)). This is the first geometric wrangling operation we'll see. I'll show some more later, but think of "clipping" as passing a top layer of cookie dough (the map above), over a bottom layer cookiecutter (our high-resolution provinces data) to get a shape out: van_bc = gpd.clip(bc, vancouver) van_bc.plot(edgecolor="0.2")
import osmnx as ox munich = ox.geocode_to_gdf("Munich, Germany") ax = ox.project_gdf(munich).plot() _ = ax.axis("off")
def setup_osm(place_name): graph = ox.graph_from_place(place_name) buildings = ox.geometries_from_place(place_name, tags={'building': True}) area = ox.geocode_to_gdf(place_name) nodes, edges = ox.graph_to_gdfs(graph) return graph, buildings, area, edges
# test.to_file("/home/benjamin/srtm_corse.tif") # test = DigitalElevationModel("/home/benjamin/dem_test.tif").clip((13, 42, 15, 45)).to_file( # "/home/benjamin/dem_clip_test.tif") # pop = Raster("/home/benjamin/Documents/PRO/PRODUITS/POPULATION_DENSITY/001_DONNEES/COTE_D_IVOIRE" # "/population_civ_2019-07-01_geotiff/population_civ_2019-07-01.tif") from pyrasta.tools.stats import _zonal_stats dem = DigitalElevationModel( "/home/benjamin/Documents/PRO/PRODUITS/TESTS/dem_ci.tif") test = dem.to_crs(32630).slope("degree") test.to_file("/home/benjamin/dem_slope_ci.tif") country = PolygonLayer.from_gpd( ox.geocode_to_gdf( dict(country="Cote d'Ivoire", admin_level=2, type="boundary"))).to_crs(32630).clean_geometry() # dem = from_cgiar_online_database(country.total_bounds) # dem.to_file("/home/benjamin/dem_ci.tif") honeycomb = country.split(country.area[0] / 100, method="hexana", show_progressbar=True) honeycomb = honeycomb.to_crs(dem.crs) honeycomb.to_file("/home/benjamin/Documents/PRO/PRODUITS/TESTS/honeycomb.shp") honeycomb["ID"] = honeycomb.index # test = dem.clip(mask=honeycomb[[15]], all_touched=True) # test.to_file("/home/benjamin/pop.tif")