Пример #1
0
def get_region_rail_stats(n):
    try:
        data_path = load_config()['paths']['data']
        global_data = gpd.read_file(
            os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))
        global_data = global_data.loc[global_data.GID_2.isin([
            (x.split('.')[0])
            for x in os.listdir(os.path.join(data_path, 'region_osm'))
        ])]

        x = global_data.iloc[n]

        if os.path.exists(
                os.path.join(data_path, 'railway_stats',
                             '{}_stats.csv'.format(x.GID_2))):
            print('{} already finished!'.format(x.GID_2))
            return None

        print('{} started!'.format(x.GID_2))

        rail_gpd = railway(data_path, x.GID_2, regional=True)
        rail_gpd['length'] = rail_gpd.geometry.apply(line_length)
        rail_gpd = rail_gpd.groupby('infra_type').sum()
        rail_gpd['continent'] = x.continent
        rail_gpd['country'] = x.ISO_3digit
        rail_gpd['region'] = x.GID_2

        rail_gpd.to_csv(
            os.path.join(data_path, 'railway_stats',
                         '{}_stats.csv'.format(x.GID_2)))

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(n, e))
Пример #2
0
def get_region_road_stats(x):
    try:
        data_path = load_config()['paths']['data']

        if os.path.exists(
                os.path.join(data_path, 'road_stats',
                             '{}_stats.csv'.format(x[3]))):
            print('{} already finished!'.format(x[3]))
            return None

        print('{} started!'.format(x[3]))

        road_dict = map_roads()
        road_gpd = roads(data_path, x[3], regional=True)
        road_gpd['length'] = road_gpd.geometry.apply(line_length)
        road_gpd['road_type'] = road_gpd.infra_type.apply(
            lambda x: road_dict[x])
        road_gpd = road_gpd.groupby('road_type').sum()
        road_gpd['continent'] = x[10]
        road_gpd['country'] = x[1]
        road_gpd['region'] = x[3]

        road_gpd.to_csv(
            os.path.join(data_path, 'road_stats',
                         '{}_stats.csv'.format(x.GID_2)))

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(x[3], e))
Пример #3
0
def bridge_extraction(save_all=False):
    """
    Function to extract all bridges from OpenStreetMap.
    
    Optional Arguments:
        *save_all* : Default is **False**. Set to **True** if you would like to 
        save all bridges of the world in one csv file. Will become a big csv!
    
    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with all regions
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))
    global_regions = global_regions.loc[(global_regions.GID_2.isin([
        (x.split('.')[0])
        for x in os.listdir(os.path.join(data_path, 'region_osm'))
    ]))]

    # run the bridge extraction for all regions
    with Pool(cpu_count() - 1) as pool:
        collect_bridges = pool.map(region_bridges,
                                   list(global_regions.index),
                                   chunksize=1)

    # save all bridges in one file.
    if save_all:

        # concat them to one file
        all_bridges = pandas.concat(collect_bridges)
        all_bridges.reset_index(inplace=True, drop=True)

        all_bridges.to_csv(
            os.path.join(data_path, 'output_data', 'osm_bridges.csv'))
Пример #4
0
def tree_values(rail=False):
    """
    Function to run intersection with global tree density map for all regions parallel.
    
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with all regions
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    if not rail:
        global_regions = global_regions.loc[(global_regions.GID_2.isin([
            x[:-10] for x in os.listdir(os.path.join(data_path, 'road_stats'))
        ]))]
    else:
        global_regions = global_regions.loc[(global_regions.GID_2.isin([
            x[:-10]
            for x in os.listdir(os.path.join(data_path, 'railway_stats'))
        ]))]

    # run tree value extraction for all regions parallel
    with Pool(cpu_count() - 1) as pool:
        pool.map(get_tree_density, list(global_regions.index), chunksize=1)
Пример #5
0
def exposure_analysis(rail=False):
    """
    Get exposure statistics for all road or railway assets in all regions.
    
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
    
    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with all regions
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    # load csv with income group data and assign income group to regions
    incomegroups = pandas.read_csv(os.path.join(data_path, 'input_data',
                                                'incomegroups_2018.csv'),
                                   index_col=[0])
    income_dict = dict(zip(incomegroups.index, incomegroups.GroupCode))
    global_regions['wbincome'] = global_regions.GID_0.apply(
        lambda x: income_dict[x])

    # only keep regions for which we have data
    global_regions = global_regions.loc[global_regions.GID_2.isin([
        (x.split('.')[0])
        for x in os.listdir(os.path.join(data_path, 'region_osm'))
    ])]

    # create dictionary with information on protection standards
    prot_lookup = dict(
        zip(global_regions['GID_2'], global_regions['prot_stand']))

    # create lists for the parallelization
    regions = list(global_regions.index)
    prot_lookups = [prot_lookup] * len(regions)
    data_paths = [data_path] * len(regions)

    # run exposure analysis parallel
    if not rail:
        with Pool(cpu_count() - 1) as pool:
            collect_output = pool.starmap(regional_roads,
                                          zip(regions, prot_lookups,
                                              data_paths),
                                          chunksize=1)

        pandas.concat(collect_output).to_csv(
            os.path.join(data_path, 'summarized', 'total_exposure_road.csv'))

    else:
        with Pool(cpu_count() - 1) as pool:
            collect_output = pool.starmap(regional_railway,
                                          zip(regions, prot_lookups,
                                              data_paths),
                                          chunksize=1)

        pandas.concat(collect_output).to_csv(
            os.path.join(data_path, 'summarized',
                         'total_exposure_railway.csv'))
Пример #6
0
def liquefaction_overlays(rail=False):
    """
    Function to run intersection with global liquefaction map for all regions parallel.
    
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with all regions and their information
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    if not rail:
        global_regions = global_regions.loc[(global_regions.GID_2.isin([
            (x.split('.')[0])
            for x in os.listdir(os.path.join(data_path, 'region_osm'))
        ]))]
    else:
        global_regions = global_regions.loc[(global_regions.GID_2.isin([
            (x.split('.')[0])
            for x in os.listdir(os.path.join(data_path, 'region_osm'))
        ]))]

    # run liquefaction intersection for all regions parallel
    with Pool(cpu_count() - 1) as pool:
        pool.map(get_liquefaction_region,
                 list(global_regions.index)[:4],
                 chunksize=1)
Пример #7
0
def all_region_stats():
    data_path = load_config()['paths']['data']
    global_data = gpd.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))
    global_data = global_data.loc[global_data.GID_2.isin([
        (x.split('.')[0])
        for x in os.listdir(os.path.join(data_path, 'region_osm'))
    ])]

    with Pool(cpu_count() - 1) as pool:
        pool.map(get_region_road_stats,
                 list(global_data.to_records()),
                 chunksize=1)
Пример #8
0
def all_country_stats():
    data_path = load_config()['paths']['data']

    global_countries = gpd.read_file(
        os.path.join(data_path, 'input_data', 'global_countries.shp'))
    list_iso3 = [x.split('_')[0] for x in global_countries.ISO_3digit]

    with Pool(cpu_count() - 1) as pool:
        collect_countries = pool.map(get_country_road_stats,
                                     list_iso3,
                                     chunksize=1)

    pd.concat(collect_countries).to_csv(
        os.path.join(data_path, 'summarized', 'country_road_stats.csv'))
Пример #9
0
def region_bridges(n):
    """
    This function will extract all bridges from OpenStreetMap for the specified region.
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
            
    Returns:
        *GeoDataFrame* : A geopandas GeoDataFrame with all bridges in a region. Will also save this to a .csv file.
                      
    """
    # specify the file path where all data is located.
    data_path = load_config()['paths']['data']

    # load shapefile with unique information for each region
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    # grab the row of the region from the global region shapefile
    x = global_regions.iloc[n]

    # get name of the region
    region = x.GID_2

    # extract bridges from OpenStreetMAp
    bridges_osm = bridges(data_path, region, regional=True)

    # estimate length of each bridges in meters
    bridges_osm['length'] = bridges_osm.geometry.apply(line_length)
    bridges_osm['length'] = bridges_osm['length'] * 1000
    road_dict = map_roads()

    # map roads to primary, secondary, tertiary and other roads.
    bridges_osm['road_type'] = bridges_osm.road_type.apply(
        lambda y: road_dict[y])
    bridges_osm['region'] = region
    bridges_osm['country'] = region[:3]

    # save to .csv
    bridges_osm.to_csv(
        os.path.join(data_path, 'bridges_osm', '{}.csv'.format(region)))

    print('{} finished!'.format(region))

    return bridges_osm
Пример #10
0
def get_country_road_stats(iso3):
    try:
        print(iso3)
        data_path = load_config()['paths']['data']

        list_files = [
            os.path.join(data_path, 'road_stats', x)
            for x in os.listdir(os.path.join(data_path, 'road_stats'))
            if (iso3 in x)
        ]

        collect_regions = []
        for file in list_files:
            collect_regions.append(pd.read_csv(file))
        return pd.concat(collect_regions).groupby(
            ['road_type', 'continent', 'country']).sum()
    except Exception as e:
        print('Failed to finish {} because of {}!'.format(iso3, e))
Пример #11
0
def planet_osm():
    """
    This function will download the planet file from the OSM servers. 
    """
    data_path = load_config()['paths']['data']
    osm_path_in = os.path.join(data_path, 'planet_osm')

    # create directory to save planet osm file if that directory does not exit yet.
    if not os.path.exists(osm_path_in):
        os.makedirs(osm_path_in)

    # if planet file is not downloaded yet, download it.
    if 'planet-latest.osm.pbf' not in os.listdir(osm_path_in):

        url = 'https://planet.openstreetmap.org/pbf/planet-latest.osm.pbf'
        urllib.request.urlretrieve(
            url, os.path.join(osm_path_in, 'planet-latest.osm.pbf'))

    else:
        print('Planet file is already downloaded')
Пример #12
0
def SSBN_merge(from_=0, to_=235):
    """
    Merge all countries parallel.
    
    Optional Arguments:
        *from_* : Default is **0**. Set to a different value if you would 
        like to select a different subset.
        
        *to_* : Default is **235**. Set to a different value if you would 
        like to select a different subset.
    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with all countries
    global_data = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_countries.shp'))
    global_data = global_data[int(from_):int(to_)]

    # run SSBN merge for all countries parallel
    with Pool(cpu_count() - 1) as pool:
        pool.map(merge_SSBN_maps, (global_data['ISO_3digit']), chunksize=1)
Пример #13
0
def cyclone_sensitivity(rail=False, region_count=1000):
    """
    Function to perform the caculations for a sensitivity analysis related 
    to cyclone damage to road or railway assets for all regions.
    
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
        *region_count* : Default is **1000**. Change this number if you want to include a different amount of regions.
        
    """
    # set data path
    data_path = load_config()['paths']['data']

    # set list of events
    events = ['Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000']

    # get list of all files for which we have hazard intersection information

    # do this for roads
    if not rail:
        all_files = [
            os.path.join(data_path, 'output_Cyc_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_Cyc_full'))
        ][:region_count]

        # set parameter values
        problem = {
            'num_vars': 4,
            'names': ['x1', 'x2', 'x3', 'x4'],
            'bounds': [[5000, 50000], [1000, 10000], [500, 5000], [0, 500]]
        }

        # Generate samples and save them, to be used in uncertainty and sensitivity analysis of results
        param_values = morris.sample(problem,
                                     10,
                                     num_levels=4,
                                     grid_jump=2,
                                     local_optimization=True)
        param_values.tofile(
            os.path.join(data_path, 'input_data', 'param_values_cyc_road.pkl'))

    # and for railways
    else:
        all_files = [
            os.path.join(data_path, 'output_Cyc_rail_full',
                         x) for x in os.listdir(
                             os.path.join(data_path, 'output_Cyc_rail_full'))
        ][:region_count]

        # set parameter values
        problem = {
            'num_vars': 3,
            'names': ['x1', 'x2', 'x3'],
            'bounds': [[5000, 50000], [1000, 10000], [0, 1]]
        }

        # Generate samples and save them, to be used in uncertainty and sensitivity analysis of results
        param_values = morris.sample(problem,
                                     10,
                                     num_levels=4,
                                     grid_jump=2,
                                     local_optimization=True)
        param_values.tofile(
            os.path.join(data_path, 'input_data', 'param_values_cyc_rail.pkl'))

    # prepare multiprocessing
    data_paths = [data_path] * len(all_files)
    event_list = [events] * len(all_files)
    param_list = [param_values] * len(all_files)
    rail_list = [rail] * len(all_files)

    # run cyclone sensitivity analysis parallel and save outputs
    with Pool(cpu_count() - 1) as pool:
        if not rail:
            collect_output = pool.starmap(sensitivity.regional_cyclone,
                                          zip(all_files, data_paths,
                                              event_list, param_list,
                                              rail_list),
                                          chunksize=1)
            pandas.concat(collect_output).to_csv(
                os.path.join(data_path, 'summarized', 'sa_cyc_road.csv'))

        else:
            collect_output = pool.starmap(sensitivity.regional_cyclone,
                                          zip(all_files, data_paths,
                                              event_list, param_list,
                                              rail_list),
                                          chunksize=1)
            pandas.concat(collect_output).to_csv(
                os.path.join(data_path, 'summarized', 'sa_cyc_rail.csv'))
Пример #14
0
def flood_sensitivity(hazard, rail=False, region_count=1000):
    """
    Function to perform the caculations for a sensitivity analysis related 
    to flood damage to road or railway assets for all regions.

    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
        *region_count* : Default is **1000**. Change this number if you want to include a different amount of regions.

    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with country level information
    global_countries = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_countries.shp'))
    global_countries.wbregion = global_countries.wbregion.str.replace(
        'LCA', 'LAC')
    global_countries['wbregion'].loc[
        global_countries.wbregion.isnull()] = 'YHI'
    wbreg_lookup = dict(
        zip(global_countries['ISO_3digit'], global_countries['wbregion']))

    # import curves, costs and paved vs unpaved ratios
    if not rail:
        # import curves
        flood_curve_paved = pandas.read_excel(os.path.join(
            data_path, 'input_data', 'Costs_curves.xlsx'),
                                              usecols=[1, 2, 3, 4, 5, 6, 7, 8],
                                              sheet_name='Flooding',
                                              index_col=[0],
                                              skiprows=1)

        flood_curve_unpaved = pandas.read_excel(
            os.path.join(data_path, 'input_data', 'Costs_curves.xlsx'),
            usecols=[11, 12, 13, 14, 15, 16, 17, 18],
            sheet_name='Flooding',
            index_col=[0],
            skiprows=1)
        flood_curve_unpaved.columns = flood_curve_paved.columns

        global_costs = pandas.read_excel(os.path.join(data_path, 'input_data',
                                                      'Costs_curves.xlsx'),
                                         usecols=[0, 1, 2, 3, 4, 5, 6, 7],
                                         header=0,
                                         index_col=0,
                                         skipfooter=45)
        global_costs.columns = [
            'SAS', 'SSA', 'MNA', 'EAP', 'LAC', 'ECA', 'YHI'
        ]

        paved_ratios = pandas.read_csv(os.path.join(data_path, 'input_data',
                                                    'paved_ratios.csv'),
                                       index_col=[0, 1])
        paved_ratios.index.names = ['ISO3', 'road_type']
        paved_ratios = paved_ratios.reset_index()

    else:
        curve = pandas.read_excel(os.path.join(data_path, 'input_data',
                                               'Costs_curves.xlsx'),
                                  usecols=[1, 2, 3, 4, 5, 6, 7, 8],
                                  sheet_name='Flooding',
                                  index_col=[0],
                                  skipfooter=9,
                                  skiprows=1)
        curve.columns = curve.columns.droplevel(0)

    # Load all files for which we have intersection data
    if not rail:
        all_files = [
            os.path.join(data_path, 'output_{}_full'.format(hazard), x)
            for x in os.listdir(
                os.path.join(data_path, 'output_{}_full'.format(hazard)))
        ][:region_count]
    else:
        all_files = [
            os.path.join(data_path, 'output_{}_rail_full'.format(hazard), x)
            for x in os.listdir(
                os.path.join(data_path, 'output_{}_rail_full'.format(hazard)))
        ][:region_count]

    # create list with all hazard events
    if hazard == 'FU':
        events = [
            'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200',
            'FU-250', 'FU-500', 'FU-1000'
        ]
    elif hazard == 'PU':
        events = [
            'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200',
            'PU-250', 'PU-500', 'PU-1000'
        ]
    elif hazard == 'CF':
        events = [
            'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500', 'CF-1000'
        ]

    # prepare multiprocessing
    data_paths = [data_path] * len(all_files)
    events_list = [events] * len(all_files)
    wbreg_list = [wbreg_lookup] * len(all_files)
    pav_cost_list = [global_costs] * len(all_files)
    pav_rat_list = [paved_ratios] * len(all_files)
    cur_pav_list = [flood_curve_paved] * len(all_files)
    cur_unpav_list = [flood_curve_unpaved] * len(all_files)
    hzd_list = [hazard] * len(all_files)

    # run flood damage sensitivity analysis parallel and save outputs
    with Pool(cpu_count() - 1) as pool:
        if not rail:
            collect_output = pool.starmap(sensitivity.regional_flood,
                                          zip(all_files, hzd_list, data_paths,
                                              pav_cost_list, pav_rat_list,
                                              cur_pav_list, cur_unpav_list,
                                              events_list, wbreg_list),
                                          chunksize=1)
            pandas.concat(collect_output).to_csv(
                os.path.join(data_path, 'summarized',
                             'sa_{}_road.csv').format(hazard))

        else:
            collect_output = pool.starmap(sensitivity.regional_flood,
                                          zip(all_files, hzd_list, data_paths,
                                              pav_cost_list, pav_rat_list,
                                              cur_pav_list, cur_unpav_list,
                                              events_list, wbreg_list,
                                              hzd_list),
                                          chunksize=1)
            pandas.concat(collect_output).to_csv(
                os.path.join(data_path, 'summarized',
                             'sa_{}_rail.csv').format(hazard))
Пример #15
0
def earthquake_sensitivity(rail=False, region_count=1000):
    """
    Function to perform the caculations for a sensitivity analysis related 
    to earthquake damage to road or railway assets for all regions.    

    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
        *region_count* : Default is **1000**. Change this number if you want to include a different amount of regions.


    """
    # set data path
    data_path = load_config()['paths']['data']

    # load shapefile with country level information
    global_countries = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_countries.shp'))
    global_countries.wbregion = global_countries.wbregion.str.replace(
        'LCA', 'LAC')
    global_countries['wbregion'].loc[
        global_countries.wbregion.isnull()] = 'YHI'
    wbreg_lookup = dict(
        zip(global_countries['ISO_3digit'], global_countries['wbregion']))

    # import cost values for different World Bank regions
    global_costs = pandas.read_excel(os.path.join(data_path, 'input_data',
                                                  'Costs_curves.xlsx'),
                                     usecols=[0, 1, 2, 3, 4, 5, 6, 7],
                                     header=0,
                                     index_col=0,
                                     skipfooter=45)
    global_costs.columns = ['SAS', 'SSA', 'MNA', 'EAP', 'LAC', 'ECA', 'YHI']

    # read csv file with information on paved and unpaved roads.
    paved_ratios = pandas.read_csv(os.path.join(data_path, 'input_data',
                                                'paved_ratios.csv'),
                                   index_col=[0, 1])
    paved_ratios.index.names = ['ISO3', 'road_type']
    paved_ratios = paved_ratios.reset_index()

    # Load all files for which we have intersection data
    if not rail:
        all_files = [
            os.path.join(data_path, 'output_EQ_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_EQ_full'))
        ][:region_count]
    else:
        all_files = [
            os.path.join(data_path, 'output_EQ_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_EQ_rail_full'))
        ][:region_count]

    # set list of hazard events
    events = ['EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475']

    # prepare multiprocessing
    data_paths = [data_path] * len(all_files)
    pav_cost_list = [global_costs] * len(all_files)
    pav_rat_list = [paved_ratios] * len(all_files)
    events_list = [events] * len(all_files)
    wbreg_list = [wbreg_lookup] * len(all_files)
    rail_list = [rail] * len(all_files)

    # run earthquake sensitivity analysis parallel and save outputs
    with Pool(cpu_count() - 1) as pool:
        collect_output = pool.starmap(sensitivity.regional_earthquake,
                                      zip(all_files, data_paths, pav_cost_list,
                                          pav_rat_list, events_list,
                                          wbreg_list, rail_list),
                                      chunksize=1)

    if not rail:
        pandas.concat(collect_output).to_csv(
            os.path.join(data_path, 'summarized', 'sa_eq_road.csv'))
    else:
        pandas.concat(collect_output).to_csv(
            os.path.join(data_path, 'summarized', 'sa_eq_rail.csv'))
Пример #16
0
def region_intersection(n, hzd, rail=False):
    """
    Function to intersect all return periods of a particualar hazard with all 
    road or railway assets in the specific region. 
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
        *hzd* : abbrevation of the hazard we want to intersect. **EQ** for earthquakes,
        **Cyc** for cyclones, **FU** for river flooding, **PU** for surface flooding
        and **CF** for coastal flooding.
        
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    Returns:
        *output* : a GeoDataFrame with all intersections between the 
        infrastructure assets and the specified hazard. Will also be saved as .feather file.
            
    """

    # get path where all hazards and data are located
    data_path = load_config()['paths']['data']
    hazard_path = load_config()['paths']['hazard_data']

    # load shapefile with unique information for each region
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    # grab the row of the region from the global region shapefile
    x = global_regions.iloc[n]

    # get the name of the region
    region = x.GID_2

    try:
        # check if we already did the hazard intersection for this region. If so, we dont do it again!
        if (not rail) & os.path.exists(
                os.path.join(data_path, 'output_{}_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd))):
            print('{} already finished!'.format(region))
            return pandas.read_feather(
                os.path.join(
                    os.path.join(data_path, 'output_{}_full'.format(hzd),
                                 '{}_{}.ft'.format(region, hzd))))

        elif (rail) & os.path.exists(
                os.path.join(data_path, 'output_{}_rail_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd))):
            print('{} already finished!'.format(region))
            return pandas.read_feather(
                os.path.join(
                    os.path.join(data_path, 'output_{}_rail_full'.format(hzd),
                                 '{}_{}.ft'.format(region, hzd))))

        # load specifics for the hazard we want to run.
        if hzd == 'EQ':
            hzd_name_dir = 'Earthquake'
            hzd_names = [
                'EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500', 'EQ_rp2475'
            ]
        elif hzd == 'Cyc':
            hzd_name_dir = 'Cyclones'
            hzd_names = [
                'Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500', 'Cyc_rp1000'
            ]
        elif hzd == 'FU':
            hzd_name_dir = 'FluvialFlooding'
            hzd_names = [
                'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100', 'FU-200',
                'FU-250', 'FU-500', 'FU-1000'
            ]
        elif hzd == 'PU':
            hzd_name_dir = 'PluvialFlooding'
            hzd_names = [
                'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100', 'PU-200',
                'PU-250', 'PU-500', 'PU-1000'
            ]
        elif hzd == 'CF':
            hzd_name_dir = 'CoastalFlooding'
            hzd_names = [
                'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500',
                'CF-1000'
            ]

        # extract data from OpenStreetMap, either the roads or the railway data.
        try:
            if not rail:
                road_gpd = roads(data_path, region, regional=True)
                road_dict = map_roads()
                road_gpd['length'] = road_gpd.geometry.apply(line_length)
                road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5)
                road_gpd['road_type'] = road_gpd.infra_type.apply(
                    lambda x: road_dict[x])

                infra_gpd = road_gpd.copy()

            elif rail:
                rail_gpd = railway(data_path, region, regional=True)
                rail_gpd['length'] = rail_gpd.geometry.apply(line_length)
                rail_gpd['geometry'] = rail_gpd.geometry.simplify(
                    tolerance=0.5)

                infra_gpd = rail_gpd.copy()

            print('{} osm data loaded!'.format(region))
        except:
            print('{} osm data not properly loaded!'.format(region))
            return None

        # for the global datasets, we can just create a big dataframe with all the hazard polygons
        # (because the resolution is relatively coarse)
        if (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'):
            hazard_path = load_config()['paths']['hazard_data']
            hazard_path = os.path.join(hazard_path, hzd_name_dir, 'Global')
            hzd_list = [
                os.path.join(hazard_path, x) for x in os.listdir(hazard_path)
            ]
            try:
                hzds_data = multiple_polygonized(region, x.geometry, hzd_list,
                                                 hzd_names)
            except:
                hzds_data = pandas.DataFrame(columns=['hazard'])

        for iter_, hzd_name in enumerate(hzd_names):
            # for the country level datasets, we need to load hazard files in the loop, else we run into RAM problems (and time).
            if (hzd == 'PU') | (hzd == 'FU'):
                try:
                    hzds_data = single_polygonized(hzd_name, region,
                                                   x.geometry, x.ISO_3digit,
                                                   hzd)
                    hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name]
                    hzd_region.reset_index(inplace=True, drop=True)
                except:
                    hzd_region = pandas.DataFrame(columns=['hazard'])

            # for the global datasets, we just extract the individual hazard maps from the DataFrame we created before this loop.
            elif (hzd == 'EQ') | (hzd == 'Cyc') | (hzd == 'CF'):
                try:
                    hzd_region = hzds_data.loc[hzds_data.hazard == hzd_name]
                    hzd_region.reset_index(inplace=True, drop=True)
                except:
                    hzd_region == pandas.DataFrame(columns=['hazard'])

            # if there are no hazard values in the region for the specific return period, just give everything zeros.
            if len(hzd_region) == 0:
                infra_gpd['length_{}'.format(hzd_name)] = 0
                infra_gpd['val_{}'.format(hzd_name)] = 0
                continue

            # now lets intersect the hazard with the ifnrastructure asset and
            #get the hazard values and intersection lengths for each asset.
            hzd_reg_sindex = hzd_region.sindex
            tqdm.pandas(desc=hzd_name + '_' + region)
            inb = infra_gpd.progress_apply(
                lambda x: intersect_hazard(x, hzd_reg_sindex, hzd_region),
                axis=1).copy()
            inb = inb.apply(pandas.Series)
            inb.columns = ['geometry', 'val_{}'.format(hzd_name)]
            inb['length_{}'.format(hzd_name)] = inb.geometry.apply(line_length)

            # and at the results to the dataframe with all the infrastructure assets.
            infra_gpd[[
                'length_{}'.format(hzd_name), 'val_{}'.format(hzd_name)
            ]] = inb[['length_{}'.format(hzd_name), 'val_{}'.format(hzd_name)]]
        output = infra_gpd.drop(['geometry'], axis=1)
        output['country'] = global_regions.loc[global_regions['GID_2'] ==
                                               region]['ISO_3digit'].values[0]
        output['continent'] = global_regions.loc[global_regions['GID_2'] ==
                                                 region]['continent'].values[0]
        output['region'] = region

        # and save output to the designated folder for the hazard.
        if not rail:
            output.to_feather(
                os.path.join(data_path, 'output_{}_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd)))
        else:
            output.to_feather(
                os.path.join(data_path, 'output_{}_rail_full'.format(hzd),
                             '{}_{}.ft'.format(region, hzd)))

        print('Finished {}!'.format(region))
        return output

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(region, e))
Пример #17
0
def all_outputs():
    """
    Summarize all outputs into .csv files per hazard and asset type.
    """
    data_path = load_config()['paths']['data']

    # Fluvial Flooding
    get_files = os.listdir(os.path.join(data_path, 'FU_impacts'))
    with Pool(40) as pool:
        tot_road_FU = list(
            tqdm(pool.imap(load_FU_csv, get_files), total=len(get_files)))
    pd.concat(tot_road_FU, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'FU_road_losses.csv'))

    get_files = os.listdir(os.path.join(data_path, 'FU_impacts_rail'))
    with Pool(40) as pool:
        tot_road_FU = list(
            tqdm(pool.imap(load_FU_csv_rail, get_files), total=len(get_files)))
    pd.concat(tot_road_FU, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'FU_rail_losses.csv'))

    # Pluvial Flooding
    get_files = os.listdir(os.path.join(data_path, 'PU_impacts'))
    with Pool(40) as pool:
        tot_road_PU = list(
            tqdm(pool.imap(load_PU_csv, get_files), total=len(get_files)))
    pd.concat(tot_road_PU, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'PU_road_losses.csv'))

    get_files = os.listdir(os.path.join(data_path, 'PU_impacts_rail'))
    with Pool(40) as pool:
        tot_road_PU = list(
            tqdm(pool.imap(load_PU_csv_rail, get_files), total=len(get_files)))
    pd.concat(tot_road_PU, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'PU_rail_losses.csv'))

    # Earthquakes
    get_files = os.listdir(os.path.join(data_path, 'EQ_impacts'))
    with Pool(40) as pool:
        tot_road_EQ = list(
            tqdm(pool.imap(load_EQ_csv, get_files), total=len(get_files)))
    pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'EQ_road_losses.csv'))

    get_files = os.listdir(os.path.join(data_path, 'EQ_impacts_rail'))
    with Pool(40) as pool:
        tot_road_EQ = list(
            tqdm(pool.imap(load_EQ_csv_rail, get_files), total=len(get_files)))
    pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'EQ_rail_losses.csv'))

    # Coastal Flooding
    get_files = os.listdir(os.path.join(data_path, 'CF_impacts'))
    with Pool(40) as pool:
        tot_road_CF = list(
            tqdm(pool.imap(load_CF_csv, get_files), total=len(get_files)))
    pd.concat(tot_road_CF, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'CF_road_losses.csv'))

    get_files = os.listdir(os.path.join(data_path, 'CF_impacts_rail'))
    with Pool(40) as pool:
        tot_road_CF = list(
            tqdm(pool.imap(load_CF_csv_rail, get_files), total=len(get_files)))
    pd.concat(tot_road_CF, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'CF_rail_losses.csv'))

    # Coastal Flooding
    get_files = os.listdir(os.path.join(data_path, 'Cyc_impacts'))
    with Pool(40) as pool:
        tot_road_Cyc = list(
            tqdm(pool.imap(load_Cyc_csv, get_files), total=len(get_files)))
    pd.concat(tot_road_Cyc, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'Cyc_road_losses.csv'))

    get_files = os.listdir(os.path.join(data_path, 'Cyc_impacts_rail'))
    with Pool(40) as pool:
        tot_road_Cyc = list(
            tqdm(pool.imap(load_Cyc_csv_rail, get_files),
                 total=len(get_files)))
    pd.concat(tot_road_Cyc, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'Cyc_rail_losses.csv'))

    # Fluvial
    get_files = os.listdir(os.path.join(data_path, 'FU_sensitivity'))
    with Pool(40) as pool:
        tot_road_EQ = list(
            tqdm(pool.imap(load_FU_csv_sens, get_files), total=len(get_files)))
    pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'sa_FU_road_losses.csv'))  #
    # Pluvial
    get_files = os.listdir(os.path.join(data_path, 'PU_sensitivity'))
    with Pool(40) as pool:
        tot_road_EQ = list(
            tqdm(pool.imap(load_PU_csv_sens, get_files), total=len(get_files)))
    pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'sa_PU_road_losses.csv'))  #
    # Coastal
    get_files = os.listdir(os.path.join(data_path, 'CF_sensitivity'))
    with Pool(40) as pool:
        tot_road_EQ = list(
            tqdm(pool.imap(load_CF_csv_sens, get_files), total=len(get_files)))
    pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'sa_CF_road_losses.csv'))  #
    # Earthquakes
    get_files = os.listdir(os.path.join(data_path, 'EQ_sensitivity'))
    with Pool(40) as pool:
        tot_road_EQ = list(
            tqdm(pool.imap(load_EQ_csv_sens, get_files), total=len(get_files)))
    pd.concat(tot_road_EQ, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'sa_EQ_road_losses.csv'))  #
    # Cyclones
    get_files = os.listdir(os.path.join(data_path, 'Cyc_sensitivity'))
    with Pool(40) as pool:
        tot_road_Cyc = list(
            tqdm(pool.imap(load_Cyc_csv_sens, get_files),
                 total=len(get_files)))
    pd.concat(tot_road_Cyc, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'sa_Cyc_road_losses.csv'))  #

    # Bridges
    get_files = os.listdir(os.path.join(data_path, 'bridge_rail_risk'))
    with Pool(40) as pool:
        tot_bridges = list(
            tqdm(pool.imap(load_bridge_rail_csv, get_files),
                 total=len(get_files)))
    pd.concat(tot_bridges, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'bridge_rail_risk_.csv'))

    #    # Bridges
    get_files = os.listdir(os.path.join(data_path, 'bridge_road_risk'))
    with Pool(40) as pool:
        tot_bridges = list(
            tqdm(pool.imap(load_bridge_road_csv, get_files),
                 total=len(get_files)))
    pd.concat(tot_bridges, sort=True).reset_index(drop=True).to_csv(
        os.path.join(data_path, 'summarized', 'bridges_road_risk_.csv'))
Пример #18
0
def all_countries(subset=[], regionalized=False, reversed_order=False):
    """    
    Clip all countries from the planet osm file and save them to individual osm.pbf files
    
    Optional Arguments:
        *subset* : allow for a pre-defined subset of countries. REquires ISO3 codes. Will run all countries if left empty.
        
        *regionalized* : Default is **False**. Set to **True** if you want to have the regions of a country as well.
        
        *reversed_order* : Default is **False**. Set to **True**  to work backwards for a second process of the same country set to prevent overlapping calculations.
    
    Returns:
        clipped osm.pbf files for the defined set of countries (either the whole world by default or the specified subset)
    
    """

    # set data path
    data_path = load_config()['paths']['data']

    # path to planet file
    planet_path = os.path.join(data_path, 'planet_osm',
                               'planet-latest.osm.pbf')

    # global shapefile path
    if regionalized == True:
        world_path = os.path.join(data_path, 'input_data',
                                  'global_regions.shp')
    else:
        world_path = os.path.join(data_path, 'input_data',
                                  'global_countries.shp')

    # create poly files for all countries
    poly_files(data_path,
               world_path,
               save_shapefile=False,
               regionalized=regionalized)

    # prepare lists for multiprocessing
    if not os.path.exists(os.path.join(data_path, 'country_poly_files')):
        os.makedirs(os.path.join(data_path, 'country_poly_files'))

    if not os.path.exists(os.path.join(data_path, 'country_osm')):
        os.makedirs(os.path.join(data_path, 'country_osm'))

    if regionalized == False:

        get_poly_files = os.listdir(
            os.path.join(data_path, 'country_poly_files'))
        if len(subset) > 0:
            polyPaths = [
                os.path.join(data_path, 'country_poly_files', x)
                for x in get_poly_files if x[:3] in subset
            ]
            area_pbfs = [
                os.path.join(data_path, 'region_osm_admin1',
                             x.split('.')[0] + '.osm.pbf')
                for x in get_poly_files if x[:3] in subset
            ]
        else:
            polyPaths = [
                os.path.join(data_path, 'country_poly_files', x)
                for x in get_poly_files
            ]
            area_pbfs = [
                os.path.join(data_path, 'region_osm_admin1',
                             x.split('.')[0] + '.osm.pbf')
                for x in get_poly_files
            ]

        big_osm_paths = [planet_path] * len(polyPaths)

    elif regionalized == True:

        if not os.path.exists(os.path.join(data_path, 'regional_poly_files')):
            os.makedirs(os.path.join(data_path, 'regional_poly_files'))

        if not os.path.exists(os.path.join(data_path, 'region_osm')):
            os.makedirs(os.path.join(data_path, 'region_osm_admin1'))

        get_poly_files = os.listdir(
            os.path.join(data_path, 'regional_poly_files'))
        if len(subset) > 0:
            polyPaths = [
                os.path.join(data_path, 'regional_poly_files', x)
                for x in get_poly_files if x[:3] in subset
            ]
            area_pbfs = [
                os.path.join(data_path, 'region_osm_admin1',
                             x.split('.')[0] + '.osm.pbf')
                for x in get_poly_files if x[:3] in subset
            ]
            big_osm_paths = [
                os.path.join(data_path, 'country_osm', x[:3] + '.osm.pbf')
                for x in get_poly_files if x[:3] in subset
            ]
        else:
            polyPaths = [
                os.path.join(data_path, 'regional_poly_files', x)
                for x in get_poly_files
            ]
            area_pbfs = [
                os.path.join(data_path, 'region_osm_admin1',
                             x.split('.')[0] + '.osm.pbf')
                for x in get_poly_files
            ]
            big_osm_paths = [
                os.path.join(data_path, 'country_osm', x[:3] + '.osm.pbf')
                for x in get_poly_files
            ]

    data_paths = [data_path] * len(polyPaths)

    # allow for reversed order if you want to run two at the same time (convenient to work backwards for the second process, to prevent overlapping calculation)
    if reversed_order == True:
        polyPaths = polyPaths[::-1]
        area_pbfs = area_pbfs[::-1]
        big_osm_paths = big_osm_paths[::-1]

    # extract all country osm files through multiprocesing
    pool = Pool(cpu_count() - 1)
    pool.starmap(clip_osm, zip(data_paths, big_osm_paths, polyPaths,
                               area_pbfs))
Пример #19
0
def bridge_intersection(file, rail=False):
    """
    Function to obtain all bridge intersection values from the regional intersection data.
    
    To be able to do this, we require all other hazard intersection files to be finished.
    
    Arguments:
        *file* : file with all unique road bridges in a region.
        
    Returns:
        *.feather file* : a geopandas GeoDataframe, saved as .feather file with all intersection values. 
    
    """
    # specify the file path where all data is located.
    data_path = load_config()['paths']['data']

    # obtain the paths for all intersected data for all hazards
    if not rail:
        all_EQ_files = [
            os.path.join(data_path, 'output_EQ_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_EQ_full'))
        ]
        all_Cyc_files = [
            os.path.join(data_path, 'output_Cyc_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_Cyc_full'))
        ]
        all_PU_files = [
            os.path.join(data_path, 'output_PU_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_PU_full'))
        ]
        all_FU_files = [
            os.path.join(data_path, 'output_FU_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_FU_full'))
        ]
        all_CF_files = [
            os.path.join(data_path, 'output_CF_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_CF_full'))
        ]

    else:
        all_EQ_files = [
            os.path.join(data_path, 'output_EQ_rail_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_EQ_rail_full'))
        ]
        all_Cyc_files = [
            os.path.join(data_path, 'output_Cyc_rail_full',
                         x) for x in os.listdir(
                             os.path.join(data_path, 'output_Cyc_rail_full'))
        ]
        all_PU_files = [
            os.path.join(data_path, 'output_PU_rail_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_PU_rail_full'))
        ]
        all_FU_files = [
            os.path.join(data_path, 'output_FU_rail_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_FU_rail_full'))
        ]
        all_CF_files = [
            os.path.join(data_path, 'output_CF_rail_full', x)
            for x in os.listdir(os.path.join(data_path, 'output_CF_rail_full'))
        ]

    # read the datafile with all bridges in the region we are interested in.
    df_bridge = pandas.read_csv(file, index_col=[0])
    df_bridge['osm_id'] = df_bridge.osm_id.astype(str)

    # load earthquake intersection file for this region
    df_EQ = pandas.read_feather(
        [x for x in all_EQ_files if os.path.split(file)[1][:-6] in x][0])
    df_EQ['osm_id'] = df_EQ.osm_id.astype(str)

    # load cyclone intersection file for this region
    df_Cyc = pandas.read_feather(
        [x for x in all_Cyc_files if os.path.split(file)[1][:-6] in x][0])
    df_Cyc['osm_id'] = df_Cyc.osm_id.astype(str)

    # load surface flooding intersection file for this region
    df_PU = pandas.read_feather(
        [x for x in all_PU_files if os.path.split(file)[1][:-6] in x][0])
    df_PU['osm_id'] = df_PU.osm_id.astype(str)

    # load river flooding intersection file for this region
    df_FU = pandas.read_feather(
        [x for x in all_FU_files if os.path.split(file)[1][:-6] in x][0])
    df_FU['osm_id'] = df_FU.osm_id.astype(str)

    # load coastal flooding intersection file for this region
    df_CF = pandas.read_feather(
        [x for x in all_CF_files if os.path.split(file)[1][:-6] in x][0])
    df_CF['osm_id'] = df_CF.osm_id.astype(str)

    # grab all bridges from each of the datasets
    if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_EQ.osm_id))]) == 0:
        df_output = pandas.DataFrame(columns=list(df_EQ[[
            x for x in list(df_EQ.columns) if ('val' in x) | ('length_' in x)
        ]].columns),
                                     index=df_bridge.index).fillna(0)
        df_bridge = pandas.concat([df_bridge, df_output], axis=1)
    else:
        region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list(
            df_EQ.osm_id))]
        df_reg_bridges = df_EQ.loc[df_EQ.osm_id.isin(
            [str(x) for x in list(region_bridges.osm_id)])]
        df_bridge = df_bridge.merge(df_reg_bridges[[
            x for x in list(df_EQ.columns) if ('val' in x) | ('length_' in x)
        ] + ['osm_id']],
                                    left_on='osm_id',
                                    right_on='osm_id',
                                    how='left')

    if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_Cyc.osm_id))]) == 0:
        df_output = pandas.DataFrame(columns=list(df_Cyc[[
            x for x in list(df_Cyc.columns) if ('val' in x) | ('length_' in x)
        ]].columns),
                                     index=df_bridge.index).fillna(0)
        df_bridge = pandas.concat([df_bridge, df_output], axis=1)
    else:
        region_bridges = df_bridge.loc[df_bridge.osm_id.isin(
            list(df_Cyc.osm_id))]
        df_reg_bridges = df_Cyc.loc[df_Cyc.osm_id.isin(
            [str(x) for x in list(region_bridges.osm_id)])]
        df_bridge = df_bridge.merge(df_reg_bridges[[
            x for x in list(df_Cyc.columns) if ('val' in x) | ('length_' in x)
        ] + ['osm_id']],
                                    left_on='osm_id',
                                    right_on='osm_id',
                                    how='left')

    if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_FU.osm_id))]) == 0:
        df_output = pandas.DataFrame(columns=list(df_FU[[
            x for x in list(df_FU.columns) if ('val' in x) | ('length_' in x)
        ]].columns),
                                     index=df_bridge.index).fillna(0)
        df_bridge = pandas.concat([df_bridge, df_output], axis=1)
    else:
        region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list(
            df_FU.osm_id))]
        df_reg_bridges = df_FU.loc[df_FU.osm_id.isin(
            [str(x) for x in list(region_bridges.osm_id)])]
        df_bridge = df_bridge.merge(df_reg_bridges[[
            x for x in list(df_FU.columns) if ('val' in x) | ('length_' in x)
        ] + ['osm_id']],
                                    left_on='osm_id',
                                    right_on='osm_id',
                                    how='left')

    if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_PU.osm_id))]) == 0:
        df_output = pandas.DataFrame(columns=list(df_PU[[
            x for x in list(df_PU.columns) if ('val' in x) | ('length_' in x)
        ]].columns),
                                     index=df_bridge.index).fillna(0)
        df_bridge = pandas.concat([df_bridge, df_output], axis=1)
    else:
        region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list(
            df_PU.osm_id))]
        df_reg_bridges = df_PU.loc[df_PU.osm_id.isin(
            [str(x) for x in list(region_bridges.osm_id)])]
        df_bridge = df_bridge.merge(df_reg_bridges[[
            x for x in list(df_PU.columns) if ('val' in x) | ('length_' in x)
        ] + ['osm_id']],
                                    left_on='osm_id',
                                    right_on='osm_id',
                                    how='left')

    if len(df_bridge.loc[df_bridge.osm_id.isin(list(df_CF.osm_id))]) == 0:
        df_output = pandas.DataFrame(columns=list(df_CF[[
            x for x in list(df_CF.columns) if ('val' in x) | ('length_' in x)
        ]].columns),
                                     index=df_bridge.index).fillna(0)
        df_bridge = pandas.concat([df_bridge, df_output], axis=1)
    else:
        region_bridges = df_bridge.loc[df_bridge.osm_id.isin(list(
            df_CF.osm_id))]
        df_reg_bridges = df_CF.loc[df_CF.osm_id.isin(
            [str(x) for x in list(region_bridges.osm_id)])]
        df_bridge = df_bridge.merge(df_reg_bridges[[
            x for x in list(df_CF.columns) if ('val' in x) | ('length_' in x)
        ] + ['osm_id']],
                                    left_on='osm_id',
                                    right_on='osm_id',
                                    how='left')

    df_bridge.drop('geometry', inplace=True, axis=1)

    # save the intersected bridges to a new file with all hazard intersections.
    if not rail:
        df_bridge.to_feather(
            os.path.join(data_path, 'bridges_osm_roads',
                         '{}.ft'.format(list(df_bridge.region.unique())[0])))
    else:
        df_bridge.to_feather(
            os.path.join(data_path, 'bridges_osm_rail',
                         '{}.ft'.format(list(df_bridge.region.unique())[0])))
Пример #20
0
def single_polygonized(flood_scen, region, geometry, country_ISO3, hzd='FU'):
    """
    Function to overlay a surface or river flood hazard map with the infrastructure assets. 
    
    Arguments:
        *flood_scen* : Unique ID for the flood scenario to be used.
        
        *region* : Unique ID of the region that is intersected.
        
        *geometry* : Shapely geometry of the region that is being intersected.
        
        *country_ISO3* : ISO3 code of the country in which the region is situated. 
        Required to get the FATHOM flood maps.
    
    Optional Arguments:
        *hzd* : Default is **FU**. Can be changed to **PU** for surface flooding.
        
    Returns:
        *gdf* : A GeoDataFrame where each row is a poylgon with the same flood depth.
    """

    # get path where all hazards all located
    hazard_path = load_config()['paths']['hazard_data']

    # get dictioniary in which we can lookup the name of the country used in the FATHOM flood files.
    folder_dict = create_folder_lookup()

    # fix a few things that are still wrong in the data
    if (country_ISO3 == 'SDN') | (country_ISO3 == 'SSD'):
        country_full = 'sudan'
        country_ISO2 = 'SD'
    else:
        country_full = folder_dict[country_ISO3]
        country_ISO2 = coco.convert(names=[country_ISO3], to='ISO2')

    # create geosjon geometry to do the rasterio masking
    geoms = [mapping(geometry)]

    # get the full path name of fluvial or pluvial flooding
    if hzd == 'FU':
        flood_type = 'fluvial_undefended'
    else:
        flood_type = 'pluvial_undefended'

    # specify path to the hazard map
    flood_path = os.path.join(hazard_path, 'InlandFlooding', country_full,
                              '{}_{}_merged'.format(country_ISO2, flood_type),
                              '{}-{}.tif'.format(country_ISO2, flood_scen))

    # load hazard map with rasterio and clip it to the area we are interested in.
    with rasterio.open(flood_path) as src:
        out_image, out_transform = mask(src, geoms, crop=True)

        # change points in waterbodies and zeros to -1, so we can easily remove it from the dataset
        out_image[out_image == 999] = -1
        out_image[out_image <= 0] = -1
        out_image = numpy.round(out_image, 1)

        # change to centimeters and integers, substantially reduces the size.
        out_image = numpy.array(out_image * 100, dtype='int32')

        # the actual polygonization of the raster map
        results = ({
            'properties': {
                'raster_val': v
            },
            'geometry': s
        } for i, (s, v) in enumerate(
            shapes(out_image[0, :, :], mask=None, transform=out_transform)))

        # and save to a new geopandas GeoDataFrame
        gdf = geopandas.GeoDataFrame.from_features(list(results),
                                                   crs='epsg:4326')
        gdf = gdf.loc[gdf.raster_val > 0]
        gdf = gdf.loc[gdf.raster_val < 5000]
        gdf['geometry'] = gdf.buffer(0)
        gdf['hazard'] = flood_scen

    return gdf
Пример #21
0
def single_country(country, regionalized=False, create_poly_files=False):
    """    
    Clip a country from the planet osm file and save to individual osm.pbf files
    
    This function has the option to extract individual regions
    
    Arguments:
        *country* : The country for which we want extract the data.
    
    Keyword Arguments:
        *regionalized* : Default is **False**. Set to **True** will parallelize the extraction over all regions within a country.
        
        *create_poly_files* : Default is **False**. Set to **True** will create new .poly files. 
        
    """

    # set data path
    data_path = load_config()['paths']['data']

    # path to planet file
    planet_path = os.path.join(data_path, 'planet_osm',
                               'planet-latest.osm.pbf')

    # global shapefile path
    if regionalized == True:
        world_path = os.path.join(data_path, 'input_data',
                                  'global_regions.shp')
    else:
        world_path = os.path.join(data_path, 'input_data',
                                  'global_countries.shp')

    # create poly files for all countries
    if create_poly_files == True:
        poly_files(data_path,
                   world_path,
                   save_shapefile=False,
                   regionalized=regionalized)

    if not os.path.exists(os.path.join(data_path, 'country_poly_files')):
        os.makedirs(os.path.join(data_path, 'country_poly_files'))

    if not os.path.exists(os.path.join(data_path, 'country_osm')):
        os.makedirs(os.path.join(data_path, 'country_osm'))

    ctry_poly = os.path.join(data_path, 'country_poly_files',
                             '{}.poly'.format(country))
    ctry_pbf = os.path.join(data_path, 'country_osm',
                            '{}.osm.pbf'.format(country))

    if regionalized == False:
        clip_osm(data_path, planet_path, ctry_poly, ctry_pbf)

    elif regionalized == True:

        if (os.path.exists(ctry_pbf) is not True):
            clip_osm(data_path, planet_path, ctry_poly, ctry_pbf)

        if not os.path.exists(os.path.join(data_path, 'regional_poly_files')):
            os.makedirs(os.path.join(data_path, 'regional_poly_files'))

        if not os.path.exists(os.path.join(data_path, 'region_osm_admin1')):
            os.makedirs(os.path.join(data_path, 'region_osm_admin1'))

        get_poly_files = [
            x
            for x in os.listdir(os.path.join(data_path, 'regional_poly_files'))
            if x.startswith(country)
        ]
        polyPaths = [
            os.path.join(data_path, 'regional_poly_files', x)
            for x in get_poly_files
        ]
        area_pbfs = [
            os.path.join(data_path, 'region_osm_admin1',
                         x.split('.')[0] + '.osm.pbf') for x in get_poly_files
        ]
        data_paths = [data_path] * len(polyPaths)
        planet_paths = [ctry_pbf] * len(polyPaths)

        # and run all regions parallel to each other
        pool = Pool(cpu_count() - 1)
        pool.starmap(clip_osm,
                     zip(data_paths, planet_paths, polyPaths, area_pbfs))
Пример #22
0
def get_liquefaction_region(n, rail=False):
    """
    Function to intersect all return periods of a particualar hazard with all 
    road or railway assets in the specific region. 
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    Returns:
        *output* : a GeoDataFrame with all intersections between the 
        infrastructure assets and the liquefaction map. Will be saved as .feather file.
    """

    try:

        # specify the file path where all data is located.
        data_path = load_config()['paths']['data']

        # load shapefile with unique information for each region
        global_regions = geopandas.read_file(
            os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

        # grab the row of the region from the global region shapefile
        x = global_regions.iloc[n]

        # get name of the region and the geometry
        region = x.GID_2
        reg_geom = x.geometry

        # if intersection is already done for this region, stop and move on to the next region.
        if (not rail) & os.path.exists(
                os.path.join(data_path, 'liquefaction_road',
                             '{}_liq.ft'.format(region))):
            print('{} already finished!'.format(region))
            return None
        if (rail) & os.path.exists(
                os.path.join(data_path, 'liquefaction_rail',
                             '{}_liq.ft'.format(region))):
            print('{} already finished!'.format(region))
            return None

        # load OpenStreetMap data.
        if not rail:
            road_gpd = roads(data_path, region, regional=True)
            road_dict = map_roads()
            road_gpd['length'] = road_gpd.geometry.apply(line_length)
            road_gpd.geometry = road_gpd.geometry.simplify(tolerance=0.5)
            road_gpd['road_type'] = road_gpd.infra_type.apply(
                lambda y: road_dict[y])
            infra_gpd = road_gpd.copy()

        else:
            rail_gpd = railway(data_path, region, regional=True)
            rail_gpd['length'] = rail_gpd.geometry.apply(line_length)
            rail_gpd.geometry = rail_gpd.geometry.simplify(tolerance=0.5)
            infra_gpd = rail_gpd.copy()

        # create geosjon geometry to do the rasterio masking
        geoms = [mapping(reg_geom.envelope.buffer(1))]

        # extract the raster values values within the polygon
        with rasterio.open(
                os.path.join(data_path, 'Hazards', 'Liquefaction', 'Global',
                             'liquefaction_v1_deg.tif')) as src:
            out_image, out_transform = mask(src, geoms, crop=True)
            out_image = out_image[0, :, :]

            # change array to integers, to reduce the size of the polygonized GeoDataFrame.
            out_image[out_image <= 0] = -1
            out_image = numpy.array(out_image, dtype='int32')

            # the actual polygonization of the raster map
            results = ({
                'properties': {
                    'raster_val': v
                },
                'geometry': s
            } for i, (s, v) in enumerate(
                shapes(out_image[:, :], mask=None, transform=out_transform)))

            # and save to a geodataframe
            gdf = geopandas.GeoDataFrame.from_features(list(results),
                                                       crs='epsg:4326')
            gdf['geometry'] = gdf.buffer(0)

        # now lets intersect the liquefaction map with the infrastructure assets.
        tqdm.pandas(desc=region)
        inb = infra_gpd.progress_apply(
            lambda x: intersect_hazard(x, gdf.sindex, gdf, liquefaction=True),
            axis=1).copy()
        inb = inb.apply(pandas.Series)
        inb.columns = ['geometry', 'liquefaction']
        inb['length_liq'] = inb.geometry.apply(line_length)
        infra_gpd[['length_liq',
                   'liquefaction']] = inb[['length_liq', 'liquefaction']]
        output = infra_gpd.drop(['geometry'], axis=1)
        output['country'] = region[:3]
        output['continent'] = x.continent
        output['region'] = region

        # and save the output to the designated folders.
        if not rail:
            output.to_feather(
                os.path.join(data_path, 'liquefaction_road',
                             '{}_liq.ft'.format(region)))
        else:
            output.to_feather(
                os.path.join(data_path, 'liquefaction_rail',
                             '{}_liq.ft'.format(region)))

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(region, e))
Пример #23
0
def load_Cyc_csv_sens(x):
    data_path = load_config()['paths']['data']
    return pd.read_csv(os.path.join(data_path, 'Cyc_sensitivity', x))
Пример #24
0
def regional_railway(n, prot_lookup, data_path):
    """
    Function to get summarized exposure values for each region for all railway assets.
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
        *prot_lookup* : dictionary with dike design standards for a region.
        
        *data_path* : file path to location of all data.
    
    Returns:
        *dataframe* : a pandas DataFrame with exposure statistics.
    """
    # specify the file path where all data is located.
    data_path = load_config()['paths']['data']

    # load shapefile with unique information for each region
    global_regions = geopandas.read_file(
        os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

    # grab the row of the region from the global region shapefile
    region = global_regions.iloc[n]

    print('{} started!'.format(region.GID_2))

    try:
        # load ID and income group for the region
        ID = region.GID_2
        wbincome = region.wbincome

        # specify all unique hazard abbrevations
        hazards = ['EQ', 'Cyc', 'PU', 'FU', 'CF']
        collect_risks = []

        # load regional statistics
        reg_stats = pandas.read_csv(
            os.path.join(data_path, 'railway_stats',
                         '{}_stats.csv'.format(ID)))

        # loop over all hazards
        for hazard in hazards:
            try:
                # read exposure data
                df = pandas.read_feather(
                    os.path.join(data_path,
                                 'output_{}_rail_full'.format(hazard),
                                 '{}_{}.ft'.format(ID, hazard)))
            except:
                continue

            # correct for protection standards for fluval and coastal flooding
            if (hazard == 'FU') | (hazard == 'CF'):
                prot_stand = prot_lookup[ID]
                no_floods = [
                    x for x in [x for x in df.columns if ('val' in x)]
                    if prot_stand > int(x.split('-')[1])
                ]
                df[no_floods] = 0

            # correct for (assumed) design standards for surface flooding
            if (hazard == 'PU'):
                if wbincome == 'HIC':
                    df.loc[:,
                           ['val_PU-5', 'val_PU-10', 'val_PU-20', 'val_PU-50'
                            ]] = 0
                elif wbincome == 'UMC':
                    df.loc[:, ['val_PU-5', 'val_PU-10', 'val_PU-20']] = 0
                else:
                    df.loc[:, [
                        'val_PU-5',
                        'val_PU-10',
                    ]] = 0

            # correct for (assumed) design standards for river flooding
            if (hazard == 'FU'):
                if wbincome == 'HIC':
                    df.loc[:,
                           ['val_FU-5', 'val_FU-10', 'val_FU-20', 'val_FU-50'
                            ]] = 0
                elif wbincome == 'UMC':
                    df.loc[:, [
                        'val_FU-5',
                        'val_FU-10',
                        'val_FU-20',
                    ]] = 0
                else:
                    df.loc[:, ['val_FU-5', 'val_FU-10']] = 0

            # correct for (assumed) design standards for coastal flooding
            if (hazard == 'CF'):
                if wbincome == 'HIC':
                    df.loc[:, [
                        'val_CF-10',
                        'val_CF-20',
                        'val_CF-50',
                    ]] = 0
                elif wbincome == 'UMC':
                    df.loc[:, ['val_CF-10', 'val_CF-20']] = 0
                else:
                    df.loc[:, ['val_CF-10']] = 0

            if hazard == 'EQ':
                reg_df = df.copy()
            elif hazard != 'EQ':
                reg_df = reg_df.merge(df[[
                    x for x in df.columns if ('val_' in x) | ('length_' in x)
                ] + ['osm_id']],
                                      left_on='osm_id',
                                      right_on='osm_id')

            # something went wrong in the order of the azard maps, correct that here.
            if hazard == 'EQ':
                event_list = [
                    'EQ_rp250', 'EQ_rp475', 'EQ_rp975', 'EQ_rp1500',
                    'EQ_rp2475'
                ]  #
                RPS = [1 / 250, 1 / 475, 1 / 975, 1 / 1500, 1 / 2475]
                cat_list = [1, 2, 3, 4]
                bins = [-1, 92, 180, 340, 650, 2000]
                df = df.rename(
                    {
                        'val_EQ_rp250': 'val_EQ_rp475',
                        'val_EQ_rp475': 'val_EQ_rp1500',
                        'val_EQ_rp975': 'val_EQ_rp250',
                        'val_EQ_rp1500': 'val_EQ_rp2475',
                        'val_EQ_rp2475': 'val_EQ_rp975',
                        'length_EQ_rp250': 'length_EQ_rp475',
                        'length_EQ_rp475': 'length_EQ_rp1500',
                        'length_EQ_rp975': 'length_EQ_rp250',
                        'length_EQ_rp1500': 'length_EQ_rp2475',
                        'length_EQ_rp2475': 'length_EQ_rp975',
                    },
                    axis='columns')

            elif hazard == 'Cyc':
                event_list = [
                    'Cyc_rp50', 'Cyc_rp100', 'Cyc_rp250', 'Cyc_rp500',
                    'Cyc_rp1000'
                ]
                RPS = [1 / 50, 1 / 100, 1 / 250, 1 / 500, 1 / 1000]
                cat_list = [1, 2, 3, 4]
                bins = [-1, 154, 178, 209, 252, 1000]
                df = df.rename(
                    {
                        'val_Cyc_rp100': 'val_Cyc_rp1000',
                        'val_Cyc_rp500': 'val_Cyc_rp100',
                        'val_Cyc_rp1000': 'val_Cyc_rp500',
                        'length_Cyc_rp100': 'length_Cyc_rp1000',
                        'length_Cyc_rp500': 'length_Cyc_rp100',
                        'length_Cyc_rp1000': 'length_Cyc_rp500'
                    },
                    axis='columns')
            elif hazard == 'FU':
                event_list = [
                    'FU-5', 'FU-10', 'FU-20', 'FU-50', 'FU-75', 'FU-100',
                    'FU-200', 'FU-250', 'FU-500', 'FU-1000'
                ]
                RPS = [
                    1 / 5, 1 / 10, 1 / 20, 1 / 50, 1 / 75, 1 / 100, 1 / 200,
                    1 / 250, 1 / 500, 1 / 1000
                ]
                cat_list = [1, 2, 3, 4]
                bins = [-1, 25, 50, 100, 200, 2000]
            elif hazard == 'PU':
                event_list = [
                    'PU-5', 'PU-10', 'PU-20', 'PU-50', 'PU-75', 'PU-100',
                    'PU-200', 'PU-250', 'PU-500', 'PU-1000'
                ]
                RPS = [
                    1 / 5, 1 / 10, 1 / 20, 1 / 50, 1 / 75, 1 / 100, 1 / 200,
                    1 / 250, 1 / 500, 1 / 1000
                ]
                cat_list = [1, 2, 3, 4]
                bins = [-1, 25, 50, 100, 200, 2000]
            elif hazard == 'CF':
                event_list = [
                    'CF-10', 'CF-20', 'CF-50', 'CF-100', 'CF-200', 'CF-500',
                    'CF-1000'
                ]
                RPS = [
                    1 / 10, 1 / 20, 1 / 50, 1 / 100, 1 / 200, 1 / 500, 1 / 1000
                ]
                cat_list = [1, 2, 3, 4]
                bins = [-1, 25, 50, 100, 200, 2000]

            # calculate the annual kilometers of total possible roads for each asset
            reg_stats[hazard] = reg_stats.apply(
                lambda x: total_length_risk(x, RPS), axis=1)

            # bin this into the four risk categories, as specified in the Supplementary Materials of Koks et al. (2019)
            for event in event_list:
                reg_df['binned_{}'.format(event)] = pandas.cut(
                    reg_df['val_{}'.format(event)],
                    bins=bins,
                    labels=[0] + cat_list)

            get_all_cats = []
            # calculate the annual exposed kilometers of road per risk category per asset type
            for cat in cat_list[:]:
                get_all_events = []
                for event in event_list:
                    event_sep = reg_df.loc[reg_df['binned_{}'.format(
                        event)] == cat][[
                            'length_{}'.format(event), 'country', 'region',
                            'continent', 'infra_type'
                        ]]
                    cont_out = pandas.DataFrame(
                        event_sep.groupby(
                            ['continent', 'country', 'region',
                             'infra_type'])['length_{}'.format(event)].sum())
                    get_all_events.append(cont_out)

                cat_df = pandas.concat(get_all_events, axis=1)
                cat_df = cat_df.fillna(0)

                if len(cat_df) == 0:
                    cat_df = pandas.DataFrame(
                        columns=list(cat_df.columns) +
                        ['risk_{}_{}'.format(cat, hazard)],
                        index=df.groupby(
                            ['continent', 'country', 'region',
                             'infra_type']).sum().index).fillna(0)
                else:
                    cat_df['risk_{}_{}'.format(cat, hazard)] = cat_df.apply(
                        lambda x: exposed_length_risk(x, hazard, RPS), axis=1)
                    cat_df.loc[
                        cat_df['risk_{}_{}'.format(cat, hazard)] < 0] = 0
                    cat_df.reset_index(inplace=True)

                get_all_cats.append(
                    cat_df.groupby([
                        'continent', 'country', 'region', 'infra_type'
                    ]).sum()['risk_{}_{}'.format(cat, hazard)])

            collect_risks.append(pandas.concat(get_all_cats, axis=1).fillna(0))

        # return results to be saved in one big file for all regions combined
        return (pandas.concat(collect_risks, axis=1).fillna(0))

    except Exception as e:
        print('Failed to finish {} because of {}!'.format(region.GID_2, e))
Пример #25
0
def load_EQ_csv(x):
    data_path = load_config()['paths']['data']
    return pd.read_csv(os.path.join(data_path, 'EQ_impacts', x))
Пример #26
0
def merge_SSBN_maps(country):
    """
    Function to merge SSBN maps to a country level.
    
    Arguments:
        *country* : ISO3 code of the country for which we want to merge the river and surface flood maps to country level.
   
    """
    try:
        print('{} started!'.format(country))

        # get path where all hazards all located
        hazard_path = load_config()['paths']['hazard_data']

        # get dictioniary in which we can lookup the name of the country used in the FATHOM flood files.
        folder_lookup = create_folder_lookup()

        # get ISO2 and full country names for each country
        country_ISO2 = coco.convert(names=[country], to='ISO2')
        country_full = folder_lookup[country]

        rps = ['5', '10', '20', '50', '75', '100', '200', '250', '1000']

        flood_types = ['fluvial_undefended', 'pluvial_undefended']
        flood_types_abb = ['FU', 'PU']

        flood_mapping = dict(zip(flood_types, flood_types_abb))

        # merge all subcountry files into one country file for each hazard.
        for flood_type in flood_types:
            new_folder = os.path.join(
                hazard_path, 'InlandFlooding', country_full,
                '{}_{}_merged'.format(country_ISO2, flood_type))
            try:
                os.mkdir(new_folder)
            except:
                None
            path_to_all_files = os.path.join(
                hazard_path, 'InlandFlooding', country_full,
                '{}_{}'.format(country_ISO2, flood_type))
            full_paths = [
                os.path.join(path_to_all_files, x)
                for x in os.listdir(path_to_all_files) if x.endswith('.tif')
            ]
            for rp in tqdm(rps,
                           desc=flood_type + '_' + country,
                           leave=False,
                           total=len(rps),
                           unit='rp'):
                get_one_rp = [x for x in full_paths if '-{}-'.format(rp) in x]
                stringlist_rp = ' '.join(get_one_rp)
                rp_out = os.path.join(
                    new_folder,
                    '{}-{}-{}.tif'.format(country_ISO2,
                                          flood_mapping[flood_type], rp))
                os.system(
                    'gdal_merge.py -q -o {} {} -co COMPRESS=LZW -co BIGTIFF=YES -co PREDICTOR=2 -co TILED=YES'
                    .format(rp_out, stringlist_rp))
        print('{} finished!'.format(country))

    except:
        print(
            '{} failed! It seems we do not have proper flood data for this country.'
            .format(country))
Пример #27
0
def load_Cyc_csv_rail(x):
    data_path = load_config()['paths']['data']
    return pd.read_csv(os.path.join(data_path, 'Cyc_impacts_rail', x))
Пример #28
0
def get_tree_density(n, rail=False):
    """
    Function to intersect all return periods of a particualar hazard with all 
    road or railway assets in the specific region. 
    
    Arguments:
        *n* : the index ID of a region in the specified shapefile with all the regions.
        
    Optional Arguments:
        *rail* : Default is **False**. Set to **True** if you would like to 
        intersect the railway assets in a region.
        
    Returns:
        *output* : a GeoDataFrame with all intersections between the 
        infrastructure assets and the liquefaction map. Will be saved as .feather file.
    """
    try:

        # specify the file path where all data is located.
        data_path = load_config()['paths']['data']

        # load shapefile with unique information for each region
        global_regions = geopandas.read_file(
            os.path.join(data_path, 'input_data', 'global_regions_v2.shp'))

        # grab the row of the region from the global region shapefile
        x = global_regions.iloc[n]

        # get name of the region and the geometry
        region = x.GID_2
        reg_geom = x.geometry

        # load OpenStreetMap data.
        if not rail:
            road_gpd = roads(data_path, region, regional=True)
            road_dict = map_roads()
            road_gpd['road_type'] = road_gpd.infra_type.apply(
                lambda y: road_dict[y])
            infra_gpd = road_gpd.copy()

        else:
            rail_gpd = railway(data_path, region, regional=True)
            infra_gpd = rail_gpd.copy()

        # create geosjon geometry to do the rasterio masking
        geoms = [mapping(reg_geom.envelope.buffer(1))]

        # extract the raster values values within the polygon
        with rasterio.open(
                os.path.join(
                    data_path, 'input_data',
                    'Crowther_Nature_Biome_Revision_01_WGS84_GeoTiff.tif')
        ) as src:
            out_image, out_transform = mask(src, geoms, crop=True)
            out_image = out_image[0, :, :]

            # grab the tree density value for the road by using a point query
            tqdm.pandas(desc='Tree Density' + region)
            infra_gpd['Tree_Dens'] = infra_gpd.centroid.progress_apply(
                lambda x: get_raster_value(x, out_image, out_transform))

        infra_gpd['Tree_Dens'] = infra_gpd['Tree_Dens'].astype(float)
        infra_gpd['region'] = region
        infra_gpd = infra_gpd.drop('geometry', axis=1)

        # and save the output to the designated folders.
        if not rail:
            pandas.DataFrame(infra_gpd).to_feather(
                os.path.join(data_path, 'tree_cover_road',
                             '{}.ft'.format(region)))
        else:
            pandas.DataFrame(infra_gpd).to_feather(
                os.path.join(data_path, 'tree_cover_rail',
                             '{}.ft'.format(region)))

        print('{} finished!'.format(region))

    except:
        print('{} failed!'.format(region))
Пример #29
0
def load_bridge_road_csv(file):
    data_path = load_config()['paths']['data']
    return pd.read_csv(os.path.join(data_path, 'bridge_road_risk', file))
Пример #30
0
def global_shapefiles(regionalized=False):
    """ 
    This function will simplify shapes and add necessary columns, to make further processing more quickly
    
    For now, we will make use of the latest GADM data: https://gadm.org/download_world.html

    Optional Arguments:
        *regionalized*  : Default is **False**. Set to **True** will also create the global_regions.shp file.
    """

    data_path = load_config()['paths']['data']

    # path to country GADM file
    if regionalized == False:

        # load country file
        country_gadm_path = os.path.join(data_path, 'GADM36', 'gadm36_0.shp')
        gadm_level0 = geopandas.read_file(country_gadm_path)

        # remove antarctica, no roads there anyways
        gadm_level0 = gadm_level0.loc[~gadm_level0['NAME_0'].
                                      isin(['Antarctica'])]

        # remove tiny shapes to reduce size substantially
        gadm_level0['geometry'] = gadm_level0.apply(remove_tiny_shapes, axis=1)

        # simplify geometries
        gadm_level0['geometry'] = gadm_level0.simplify(
            tolerance=0.005, preserve_topology=True).buffer(0.01).simplify(
                tolerance=0.005, preserve_topology=True)

        # add additional info
        glob_info_path = os.path.join(data_path, 'input_data',
                                      'global_information.xlsx')
        load_glob_info = pandas.read_excel(glob_info_path)

        gadm_level0 = gadm_level0.merge(load_glob_info,
                                        left_on='GID_0',
                                        right_on='ISO_3digit')

        #save to new country file
        glob_ctry_path = os.path.join(data_path, 'input_data',
                                      'global_countries.shp')
        gadm_level0.to_file(glob_ctry_path)

    else:

        # this is dependent on the country file, so check whether that one is already created:
        glob_ctry_path = os.path.join(data_path, 'input_data',
                                      'global_countries.shp')
        if os.path.exists(glob_ctry_path):
            gadm_level0 = geopandas.read_file(
                os.path.join(data_path, 'input_data', 'global_countries.shp'))
        else:
            print('ERROR: You need to create the country file first')
            return None

    # load region file
        region_gadm_path = os.path.join(data_path, 'GADM36', 'gadm36_2.shp')
        gadm_level1 = geopandas.read_file(region_gadm_path)

        # remove tiny shapes to reduce size substantially
        gadm_level1['geometry'] = gadm_level1.apply(remove_tiny_shapes, axis=1)

        # simplify geometries
        gadm_level1['geometry'] = gadm_level1.simplify(
            tolerance=0.005, preserve_topology=True).buffer(0.01).simplify(
                tolerance=0.005, preserve_topology=True)

        # add additional info
        glob_info_path = os.path.join(data_path, 'input_data',
                                      'global_information.xlsx')
        load_glob_info = pandas.read_excel(glob_info_path)

        gadm_level1 = gadm_level1.merge(load_glob_info,
                                        left_on='GID_0',
                                        right_on='ISO_3digit')
        gadm_level1.rename(columns={'coordinates': 'coordinate'}, inplace=True)

        # add some missing geometries from countries with no subregions
        get_missing_countries = list(
            set(list(gadm_level0.GID_0.unique())).difference(
                list(gadm_level1.GID_0.unique())))

        mis_country = gadm_level0.loc[gadm_level0['GID_0'].isin(
            get_missing_countries)]  #
        mis_country['GID_1'] = mis_country['GID_0'] + '_' + str(0) + '_' + str(
            1)

        gadm_level1 = geopandas.GeoDataFrame(
            pandas.concat([gadm_level1, mis_country], ignore_index=True))
        gadm_level1.reset_index(drop=True, inplace=True)

        #save to new country file
        gadm_level1.to_file(
            os.path.join(data_path, 'input_data', 'global_regions.shp'))