示例#1
0
def get_region_road_stats(x):
    try:
        data_path = load_config()['paths']['data']

        if os.path.exists(
                os.path.join(data_path, 'road_stats',
                             '{}_stats.csv'.format(x[3]))):
            print('{} already finished!'.format(x[3]))
            return None

        print('{} started!'.format(x[3]))

        road_dict = map_roads()
        road_gpd = roads(data_path, x[3], regional=True)
        road_gpd['length'] = road_gpd.geometry.apply(line_length)
        road_gpd['road_type'] = road_gpd.infra_type.apply(
            lambda x: road_dict[x])
        road_gpd = road_gpd.groupby('road_type').sum()
        road_gpd['continent'] = x[10]
        road_gpd['country'] = x[1]
        road_gpd['region'] = x[3]

        road_gpd.to_csv(
            os.path.join(data_path, 'road_stats',
                         '{}_stats.csv'.format(x.GID_2)))

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(x[3], e))
示例#2
0
def region_bridges(n):
    """
    This function will extract all bridges from OpenStreetMap for the specified region.
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
            
    Returns:
        *GeoDataFrame* : A geopandas GeoDataFrame with all bridges in a region. Will also save this to a .csv file.
                      
    """
    # specify the file path where all data is located.
    data_path = load_config()['paths']['data']

    # load shapefile with unique information for each region
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    # grab the row of the region from the global region shapefile
    x = global_regions.iloc[n]

    # get name of the region
    region = x.GID_2

    # extract bridges from OpenStreetMAp
    bridges_osm = bridges(data_path, region, regional=True)

    # estimate length of each bridges in meters
    bridges_osm['length'] = bridges_osm.geometry.apply(line_length)
    bridges_osm['length'] = bridges_osm['length'] * 1000
    road_dict = map_roads()

    # map roads to primary, secondary, tertiary and other roads.
    bridges_osm['road_type'] = bridges_osm.road_type.apply(
        lambda y: road_dict[y])
    bridges_osm['region'] = region
    bridges_osm['country'] = region[:3]

    # save to .csv
    bridges_osm.to_csv(
        os.path.join(data_path, 'bridges_osm', '{}.csv'.format(region)))

    print('{} finished!'.format(region))

    return bridges_osm
示例#3
0
def get_liquefaction_region(n, rail=False):
    """
    Function to intersect all return periods of a particualar hazard with all 
    road or railway assets in the specific region. 
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    Returns:
        *output* : a GeoDataFrame with all intersections between the 
        infrastructure assets and the liquefaction map. Will be saved as .feather file.
    """

    try:

        # specify the file path where all data is located.
        data_path = load_config()['paths']['data']

        # load shapefile with unique information for each region
        global_regions = geopandas.read_file(
            os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

        # grab the row of the region from the global region shapefile
        x = global_regions.iloc[n]

        # get name of the region and the geometry
        region = x.GID_2
        reg_geom = x.geometry

        # if intersection is already done for this region, stop and move on to the next region.
        if (not rail) & os.path.exists(
                os.path.join(data_path, 'liquefaction_road',
                             '{}_liq.ft'.format(region))):
            print('{} already finished!'.format(region))
            return None
        if (rail) & os.path.exists(
                os.path.join(data_path, 'liquefaction_rail',
                             '{}_liq.ft'.format(region))):
            print('{} already finished!'.format(region))
            return None

        # load OpenStreetMap data.
        if not rail:
            road_gpd = roads(data_path, region, regional=True)
            road_dict = map_roads()
            road_gpd['length'] = road_gpd.geometry.apply(line_length)
            road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5)
            road_gpd['road_type'] = road_gpd.infra_type.apply(
                lambda y: road_dict[y])
            infra_gpd = road_gpd.copy()

        else:
            rail_gpd = railway(data_path, region, regional=True)
            rail_gpd['length'] = rail_gpd.geometry.apply(line_length)
            rail_gpd.geometry = rail_gpd.geometry.simplify(tolerance=0.5)
            infra_gpd = rail_gpd.copy()

        # create geosjon geometry to do the rasterio masking
        geoms = [mapping(reg_geom.envelope.buffer(1))]

        # extract the raster values values within the polygon
        with rasterio.open(
                os.path.join(data_path, 'Hazards', 'Liquefaction', 'Global',
                             'liquefaction_v1_deg.tif')) as src:
            out_image, out_transform = mask(src, geoms, crop=True)
            out_image = out_image[0, :, :]

            # change array to integers, to reduce the size of the polygonized GeoDataFrame.
            out_image[out_image <= 0] = -1
            out_image = numpy.array(out_image, dtype='int32')

            # the actual polygonization of the raster map
            results = ({
                'properties': {
                    'raster_val': v
                },
                'geometry': s
            } for i, (s, v) in enumerate(
                shapes(out_image[:, :], mask=None, transform=out_transform)))

            # and save to a geodataframe
            gdf = geopandas.GeoDataFrame.from_features(list(results),
                                                       crs='epsg:4326')
            gdf['geometry'] = gdf.buffer(0)

        # now lets intersect the liquefaction map with the infrastructure assets.
        tqdm.pandas(desc=region)
        inb = infra_gpd.progress_apply(
            lambda x: intersect_hazard(x, gdf.sindex, gdf, liquefaction=True),
            axis=1).copy()
        inb = inb.apply(pandas.Series)
        inb.columns = ['geometry', 'liquefaction']
        inb['length_liq'] = inb.geometry.apply(line_length)
        infra_gpd[['length_liq',
                   'liquefaction']] = inb[['length_liq', 'liquefaction']]
        output = infra_gpd.drop(['geometry'], axis=1)
        output['country'] = region[:3]
        output['continent'] = x.continent
        output['region'] = region

        # and save the output to the designated folders.
        if not rail:
            output.to_feather(
                os.path.join(data_path, 'liquefaction_road',
                             '{}_liq.ft'.format(region)))
        else:
            output.to_feather(
                os.path.join(data_path, 'liquefaction_rail',
                             '{}_liq.ft'.format(region)))

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(region, e))
示例#4
0
def get_tree_density(n, rail=False):
    """
    Function to intersect all return periods of a particualar hazard with all 
    road or railway assets in the specific region. 
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    Returns:
        *output* : a GeoDataFrame with all intersections between the 
        infrastructure assets and the liquefaction map. Will be saved as .feather file.
    """
    try:

        # specify the file path where all data is located.
        data_path = load_config()['paths']['data']

        # load shapefile with unique information for each region
        global_regions = geopandas.read_file(
            os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

        # grab the row of the region from the global region shapefile
        x = global_regions.iloc[n]

        # get name of the region and the geometry
        region = x.GID_2
        reg_geom = x.geometry

        # load OpenStreetMap data.
        if not rail:
            road_gpd = roads(data_path, region, regional=True)
            road_dict = map_roads()
            road_gpd['road_type'] = road_gpd.infra_type.apply(
                lambda y: road_dict[y])
            infra_gpd = road_gpd.copy()

        else:
            rail_gpd = railway(data_path, region, regional=True)
            infra_gpd = rail_gpd.copy()

        # create geosjon geometry to do the rasterio masking
        geoms = [mapping(reg_geom.envelope.buffer(1))]

        # extract the raster values values within the polygon
        with rasterio.open(
                os.path.join(
                    data_path, 'input_data',
                    'Crowther_Nature_Biome_Revision_01_WGS84_GeoTiff.tif')
        ) as src:
            out_image, out_transform = mask(src, geoms, crop=True)
            out_image = out_image[0, :, :]

            # grab the tree density value for the road by using a point query
            tqdm.pandas(desc='Tree Density' + region)
            infra_gpd['Tree_Dens'] = infra_gpd.centroid.progress_apply(
                lambda x: get_raster_value(x, out_image, out_transform))

        infra_gpd['Tree_Dens'] = infra_gpd['Tree_Dens'].astype(float)
        infra_gpd['region'] = region
        infra_gpd = infra_gpd.drop('geometry', axis=1)

        # and save the output to the designated folders.
        if not rail:
            pandas.DataFrame(infra_gpd).to_feather(
                os.path.join(data_path, 'tree_cover_road',
                             '{}.ft'.format(region)))
        else:
            pandas.DataFrame(infra_gpd).to_feather(
                os.path.join(data_path, 'tree_cover_rail',
                             '{}.ft'.format(region)))

        print('{} finished!'.format(region))

    except:
        print('{} failed!'.format(region))
示例#5
0
def region_intersection(n, hzd, rail=False):
    """
    Function to intersect all return periods of a particualar hazard with all 
    road or railway assets in the specific region. 
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
        *hzd* : abbrevation of the hazard we want to intersect. **EQ** for earthquakes,
        **Cyc** for cyclones, **FU** for river flooding, **PU** for surface flooding
        and **CF** for coastal flooding.
        
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    Returns:
        *output* : a GeoDataFrame with all intersections between the 
        infrastructure assets and the specified hazard. Will also be saved as .feather file.
            
    """

    # get path where all hazards and data are located
    data_path = load_config()['paths']['data']
    hazard_path = load_config()['paths']['hazard_data']

    # load shapefile with unique information for each region
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    # grab the row of the region from the global region shapefile
    x = global_regions.iloc[n]

    # get the name of the region
    region = x.GID_2

    try:
        # check if we already did the hazard intersection for this region. If so, we dont do it again!
        if (not rail) & os.path.exists(
                os.path.join(data_path, 'output_{}_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd))):
            print('{} already finished!'.format(region))
            return pandas.read_feather(
                os.path.join(
                    os.path.join(data_path, 'output_{}_full'.format(hzd),
                                 '{}_{}.ft'.format(region, hzd))))

        elif (rail) & os.path.exists(
                os.path.join(data_path, 'output_{}_rail_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd))):
            print('{} already finished!'.format(region))
            return pandas.read_feather(
                os.path.join(
                    os.path.join(data_path, 'output_{}_rail_full'.format(hzd),
                                 '{}_{}.ft'.format(region, hzd))))

        # load specifics for the hazard we want to run.
        if hzd == 'EQ':
            hzd_name_dir = 'Earthquake'
            hzd_names = [
                'EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475'
            ]
        elif hzd == 'Cyc':
            hzd_name_dir = 'Cyclones'
            hzd_names = [
                'Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000'
            ]
        elif hzd == 'FU':
            hzd_name_dir = 'FluvialFlooding'
            hzd_names = [
                'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200',
                'FU-250', 'FU-500', 'FU-1000'
            ]
        elif hzd == 'PU':
            hzd_name_dir = 'PluvialFlooding'
            hzd_names = [
                'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200',
                'PU-250', 'PU-500', 'PU-1000'
            ]
        elif hzd == 'CF':
            hzd_name_dir = 'CoastalFlooding'
            hzd_names = [
                'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500',
                'CF-1000'
            ]

        # extract data from OpenStreetMap, either the roads or the railway data.
        try:
            if not rail:
                road_gpd = roads(data_path, region, regional=True)
                road_dict = map_roads()
                road_gpd['length'] = road_gpd.geometry.apply(line_length)
                road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5)
                road_gpd['road_type'] = road_gpd.infra_type.apply(
                    lambda x: road_dict[x])

                infra_gpd = road_gpd.copy()

            elif rail:
                rail_gpd = railway(data_path, region, regional=True)
                rail_gpd['length'] = rail_gpd.geometry.apply(line_length)
                rail_gpd['geometry'] = rail_gpd.geometry.simplify(
                    tolerance=0.5)

                infra_gpd = rail_gpd.copy()

            print('{} osm data loaded!'.format(region))
        except:
            print('{} osm data not properly loaded!'.format(region))
            return None

        # for the global datasets, we can just create a big dataframe with all the hazard polygons
        # (because the resolution is relatively coarse)
        if (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'):
            hazard_path = load_config()['paths']['hazard_data']
            hazard_path = os.path.join(hazard_path, hzd_name_dir, 'Global')
            hzd_list = [
                os.path.join(hazard_path, x) for x in os.listdir(hazard_path)
            ]
            try:
                hzds_data = multiple_polygonized(region, x.geometry, hzd_list,
                                                 hzd_names)
            except:
                hzds_data = pandas.DataFrame(columns=['hazard'])

        for iter_, hzd_name in enumerate(hzd_names):
            # for the country level datasets, we need to load hazard files in the loop, else we run into RAM problems (and time).
            if (hzd == 'PU') | (hzd == 'FU'):
                try:
                    hzds_data = single_polygonized(hzd_name, region,
                                                   x.geometry, x.ISO_3digit,
                                                   hzd)
                    hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name]
                    hzd_region.reset_index(inplace=True, drop=True)
                except:
                    hzd_region = pandas.DataFrame(columns=['hazard'])

            # for the global datasets, we just extract the individual hazard maps from the DataFrame we created before this loop.
            elif (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'):
                try:
                    hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name]
                    hzd_region.reset_index(inplace=True, drop=True)
                except:
                    hzd_region == pandas.DataFrame(columns=['hazard'])

            # if there are no hazard values in the region for the specific return period, just give everything zeros.
            if len(hzd_region) == 0:
                infra_gpd['length_{}'.format(hzd_name)] = 0
                infra_gpd['val_{}'.format(hzd_name)] = 0
                continue

            # now lets intersect the hazard with the ifnrastructure asset and
            #get the hazard values and intersection lengths for each asset.
            hzd_reg_sindex = hzd_region.sindex
            tqdm.pandas(desc=hzd_name + '_' + region)
            inb = infra_gpd.progress_apply(
                lambda x: intersect_hazard(x, hzd_reg_sindex, hzd_region),
                axis=1).copy()
            inb = inb.apply(pandas.Series)
            inb.columns = ['geometry', 'val_{}'.format(hzd_name)]
            inb['length_{}'.format(hzd_name)] = inb.geometry.apply(line_length)

            # and at the results to the dataframe with all the infrastructure assets.
            infra_gpd[[
                'length_{}'.format(hzd_name), 'val_{}'.format(hzd_name)
            ]] = inb[['length_{}'.format(hzd_name), 'val_{}'.format(hzd_name)]]
        output = infra_gpd.drop(['geometry'], axis=1)
        output['country'] = global_regions.loc[global_regions['GID_2'] ==
                                               region]['ISO_3digit'].values[0]
        output['continent'] = global_regions.loc[global_regions['GID_2'] ==
                                                 region]['continent'].values[0]
        output['region'] = region

        # and save output to the designated folder for the hazard.
        if not rail:
            output.to_feather(
                os.path.join(data_path, 'output_{}_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd)))
        else:
            output.to_feather(
                os.path.join(data_path, 'output_{}_rail_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd)))

        print('Finished {}!'.format(region))
        return output

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(region, e))