Exemplo n.º 1
0
def compute_storage_capacities(sto_capacity_ds: pd.Series) -> pd.Series:
    """
     Computing STO energy capacities (TWh) per unit region.

     Parameters
     ----------
     sto_capacity_ds: pd.Series
         DataFrame containing STO installed capacities per unit region (e.g., "countries", "NUTS3")

     Returns
     -------
     hydro_storage_energy_cap_ds: pd.Series
         DataFrame containing STO energy storage ratings.

    """
    source_dir = f"{data_path}generation/hydro/source/"
    # Initially reading modelled data from Hartel et. al (2017)
    hydro_storage_capacities_fn = f"{source_dir}Hartel_2017_EU_hydro_storage_capacities.xlsx"
    hydro_storage_energy_cap_ds = pd.read_excel(hydro_storage_capacities_fn, skiprows=1,
                                                usecols=['ISO2', 'Eq. Storage'], index_col='ISO2', squeeze=True) * 1e3

    # Get storage capacities for countries which are not in the Hartel study
    iso2_codes = sorted(replace_iso2_codes(list(set([region_code[:2] for region_code in sto_capacity_ds.index]))))
    hydro_storage_energy_cap_ds = hydro_storage_energy_cap_ds.reindex(iso2_codes)
    for iso2_code in iso2_codes:
        # If c is not covered in the Hartel study...
        # if iso2_code not in hydro_storage_energy_cap_ds.index:
        if np.isnan(hydro_storage_energy_cap_ds[iso2_code]):
            country_name = revert_old_country_names(convert_country_codes([iso2_code], 'alpha_2', 'name', True)[0])
            try:
                # ...look-up for ENTSO-E reservoir data...
                hydro_storage_capacities_entsoe_fn = f"{source_dir}ENTSOE/Water Reservoirs and Hydro Storage Plants" \
                    f"_201412290000-201912300000_{iso2_code}.csv"
                hydro_storage_energy_cap = pd.read_csv(hydro_storage_capacities_entsoe_fn, index_col=0)
                max_storage = np.nanmax(np.nan_to_num(hydro_storage_energy_cap.values.flatten()))
                if max_storage > 0.:
                    hydro_storage_energy_cap_ds.loc[iso2_code] = max_storage * 1e-3
                else:
                    # ...if ENTSO-E data is missing (NaNs replaced by 0s), approximate storage via GRanD v1.3
                    hydro_storage_energy_cap_ds.loc[iso2_code] = get_country_storage_from_grand(country_name)
            except FileNotFoundError:
                # ...if ENTSO-E file is missing altogether, approximate storage via GRanD v1.3
                hydro_storage_energy_cap_ds.loc[iso2_code] = get_country_storage_from_grand(country_name)

    # If topology unit is "countries", return series directly
    if len(sto_capacity_ds.index[0]) == 2:
        return hydro_storage_energy_cap_ds.round(3)
    else:
        # If some NUTS-based topology in place, storage distribution among regions is done via GRanD v1.3
        storage_distribution_by_nuts = get_nuts_storage_distribution_from_grand(sto_capacity_ds.index)
        for nuts in storage_distribution_by_nuts.index:
            storage_distribution_by_nuts.loc[nuts] *= hydro_storage_energy_cap_ds.loc[replace_iso2_codes([nuts[:2]])[0]]
        hydro_storage_energy_cap_ds = storage_distribution_by_nuts.copy()
        return hydro_storage_energy_cap_ds.round(3)
Exemplo n.º 2
0
def build_ror_data(ror_capacity_ds: pd.Series, timestamps: pd.DatetimeIndex,
                   runoff_dataset: xr.Dataset, runoff_points_region_ds: pd.Series) -> Tuple[pd.Series, pd.DataFrame]:
    """
    Compute total ROR capacities (in GW) and inflow (p.u. of capacity) for a series of regions.

    Parameters
    ----------
    ror_capacity_ds: pd.Series
        Series containing ROR power (GW) capacity per plant, indexed by the region in which the plant is located.
    timestamps: pd.DatetimeIndex
        Time stamps over which the inflows must be computed.
    runoff_dataset: xr.Dataset
        ERA5 runoff dataset
    runoff_points_region_ds: pd.Series
        Indicates in which region each ERA5 point falls.

    Returns
    -------
    ror_capacity_ds: pd.Series
        Series containing ROR power (GW) capacity per region.
    ror_inflows_df: pd.DataFrame
        ROR inflow time-series (p.u. of power capacity) for each region.
    """

    ror_thresholds_fn = f"{data_path}generation/hydro/source/ror_flood_event_thresholds.csv"
    ror_thresholds = pd.read_csv(ror_thresholds_fn, index_col=0)

    ror_capacity_ds = ror_capacity_ds.groupby(ror_capacity_ds.index).sum() * 1e-3

    ror_inflows_df = pd.DataFrame(index=timestamps, columns=ror_capacity_ds.index)
    for region in ror_capacity_ds.index:
        points = runoff_points_region_ds[runoff_points_region_ds == region].index.to_list()
        flood_event_threshold = ror_thresholds.loc[replace_iso2_codes([region[:2]])[0], 'value']
        if points:
            ror_inflows_df[region] = compute_ror_series(runoff_dataset, points, flood_event_threshold)
    ror_inflows_df.dropna(axis=1, inplace=True)
    missing_inflows_indexes = ~ror_capacity_ds.index.isin(ror_inflows_df.columns)
    missing_ror = ror_capacity_ds.loc[missing_inflows_indexes].dropna().sum()
    ror_capacity_ds = ror_capacity_ds[ror_inflows_df.columns]
    logger.info(f'ROR capacity factors computed. '
                f'{missing_ror} GW removed because of ERA5 point unavailability in regions.')

    return ror_capacity_ds, ror_inflows_df
Exemplo n.º 3
0
def get_topology(network: pypsa.Network,
                 countries: List[str] = None,
                 add_offshore: bool = True,
                 extend_line_cap: bool = True,
                 use_ex_line_cap: bool = True,
                 plot: bool = False) -> pypsa.Network:
    """
    Load the e-highway network topology (buses and links) using PyPSA.

    Parameters
    ----------
    network: pypsa.Network
        Network instance
    countries: List[str] (default: None)
        List of ISO codes of countries for which we want the e-highway topology
    add_offshore: bool (default: True)
        Whether to include offshore nodes
    extend_line_cap: bool (default True)
        Whether line capacity is allowed to be expanded
    use_ex_line_cap: bool (default True)
        Whether to use existing line capacity
    plot: bool (default: False)
        Whether to show loaded topology or not

    Returns
    -------
    network: pypsa.Network
        Updated network
    """

    assert countries is None or len(countries) != 0, "Error: Countries list must not be empty. If you want to " \
                                                     "obtain, the full topology, don't pass anything as argument."

    topology_dir = f"{data_path}topologies/e-highways/generated/"
    buses_fn = f"{topology_dir}buses.csv"
    assert isfile(
        buses_fn), f"Error: Buses are undefined. Please run 'preprocess'."
    buses = pd.read_csv(buses_fn, index_col='id')
    lines_fn = f"{topology_dir}lines.csv"
    assert isfile(
        lines_fn), f"Error: Lines are undefined. Please run 'preprocess'."
    lines = pd.read_csv(lines_fn, index_col='id')

    # Remove offshore buses if not considered
    if not add_offshore:
        buses = buses.dropna(subset=["onshore_region"])

    if countries is not None:
        # In e-highway, GB is referenced as UK
        iso_to_ehighway = {"GB": "UK"}
        ehighway_countries = [
            iso_to_ehighway[c] if c in iso_to_ehighway else c
            for c in countries
        ]

        # Remove onshore buses that are not in the considered region,
        # keep also buses that are offshore (i.e. with a country name that is not a string)
        def filter_buses(bus):
            return (not isinstance(
                bus.country, str)) or (bus.name[2:] in ehighway_countries)

        buses = buses.loc[buses.apply(filter_buses, axis=1)]
    else:
        countries = replace_iso2_codes(
            list(
                set([
                    idx[2:]
                    for idx in buses.dropna(subset=["onshore_region"]).index
                ])))

    # Converting polygons strings to Polygon object
    for region_type in ["onshore_region", "offshore_region"]:
        regions = buses[region_type].values
        # Convert strings
        for i, region in enumerate(regions):
            if isinstance(region, str):
                regions[i] = shapely.wkt.loads(region)

    # Remove lines for which one of the two end buses has been removed
    lines = pd.DataFrame(lines.loc[lines.bus0.isin(buses.index)
                                   & lines.bus1.isin(buses.index)])

    # Removing offshore buses that are not connected anymore
    connected_buses = sorted(list(
        set(lines["bus0"]).union(set(lines["bus1"]))))
    buses = buses.loc[connected_buses]
    assert len(
        buses
    ) != 0, "Error: No buses are located in the given list of countries."

    # Add offshore polygons to remaining offshore buses
    if add_offshore:
        offshore_shapes = get_shapes(countries, which='offshore',
                                     save=True)["geometry"]
        if len(offshore_shapes) != 0:
            offshore_zones_shape = unary_union(offshore_shapes.values)
            offshore_bus_indexes = buses[
                buses["onshore_region"].isnull()].index
            offshore_buses = buses.loc[offshore_bus_indexes]
            # Use a home-made 'voronoi' partition to assign a region to each offshore bus
            buses.loc[offshore_bus_indexes,
                      "offshore_region"] = voronoi_special(
                          offshore_zones_shape, offshore_buses[["x", "y"]])

    # Setting line parameters
    """ For DC-opf
    lines['s_nom'] *= 1000.0  # PyPSA uses MW
    lines['s_nom_min'] = lines['s_nom']
    # Define reactance   # TODO: do sth more clever
    lines['x'] = pd.Series(0.00001, index=lines.index)
    lines['s_nom_extendable'] = pd.Series(True, index=lines.index) # TODO: parametrize
    lines['capital_cost'] = pd.Series(index=lines.index)
    for idx in lines.index:
        carrier = lines.loc[idx].carrier
        cap_cost, _ = get_costs(carrier, sum(network.snapshot_weightings['objective']))
        lines.loc[idx, ('capital_cost', )] = cap_cost * lines.length.loc[idx]
    """

    lines['p_nom'] = lines["s_nom"]
    if not use_ex_line_cap:
        lines['p_nom'] = 0
    lines['p_nom_min'] = lines['p_nom']
    lines['p_min_pu'] = -1.  # Making the link bi-directional
    lines = lines.drop('s_nom', axis=1)
    lines['p_nom_extendable'] = extend_line_cap
    lines['capital_cost'] = pd.Series(index=lines.index)
    for idx in lines.index:
        carrier = lines.loc[idx].carrier
        cap_cost, _ = get_costs(carrier,
                                sum(network.snapshot_weightings['objective']))
        lines.loc[idx, ('capital_cost', )] = cap_cost * lines.length.loc[idx]

    network.import_components_from_dataframe(buses, "Bus")
    network.import_components_from_dataframe(lines, "Link")
    # network.import_components_from_dataframe(lines, "Line") for dc-opf

    if plot:
        from epippy.topologies.core.plot import plot_topology
        plot_topology(buses, lines)
        plt.show()

    return network
Exemplo n.º 4
0
def get_powerplants(tech_name: str, country_codes: List[str]) -> pd.DataFrame:
    """
    Return power plants filtered by technology and country list.

    Parameters
    ----------
    tech_name: str
        Name of one of the technologies defined in the system.
    country_codes: List[str]
        List of target ISO2 country codes.

    Returns
    -------
    pp_df: pd.DataFrame
        List of powerplants with the following attributes: name, capacity (in MW), ISO2 code, longitude and latitude.

    """

    assert len(country_codes) != 0, "Error: List of country must be non-empty."
    assert all([len(c) == 2 for c in country_codes]), "Error: Countries must be identified with ISO2 codes which" \
                                                      " are of length 2. Found code of different length than 2."

    tech_config = get_config_dict([tech_name])[tech_name]

    assert 'jrc_type' in tech_config, "Error: Capacities cannot be retrieved for this technology."

    jrc_dir = f"{data_path}generation/misc/source/JRC/"
    if tech_name in ['ror', 'sto', 'phs']:
        # Hydro entries read from richer hydro-only database.
        pp_fn = f"{jrc_dir}hydro-power-database-master/data/jrc-hydro-power-plant-database.csv"
        pp_df = pd.read_csv(pp_fn, index_col=0)
        pp_df.rename(columns={
            'installed_capacity_MW': 'Capacity',
            'name': 'Name',
            'country_code': 'ISO2'
        },
                     inplace=True)
        # Replace ISO2 codes.
        pp_df["ISO2"] = pp_df["ISO2"].map(lambda x: replace_iso2_codes([x])[0])

        # Filter out plants outside target countries, of other tech than the target tech, whose capacity is missing.
        pp_df = pp_df.loc[(pp_df["ISO2"].isin(country_codes))
                          & (pp_df['type'] == tech_config['jrc_type']) &
                          (~pp_df['Capacity'].isnull())]

    else:
        # All other technologies read from JRC's PPDB.
        pp_fn = f"{jrc_dir}JRC-PPDB-OPEN.ver1.0/JRC_OPEN_UNITS.csv"
        pp_df = pd.read_csv(pp_fn, sep=';')

        pp_df["ISO2"] = convert_country_codes(pp_df['country'], 'name',
                                              'alpha_2', True)

        # Plants in the PPDB are listed per generator (multiple per plant), duplicates are hereafter dropped.
        pp_df = pp_df.drop_duplicates(subset='eic_p',
                                      keep='first').set_index('eic_p')
        # Filter out plants outside target countries, of other tech than the target tech, which are decommissioned.
        pp_df = pp_df.loc[(pp_df["ISO2"].isin(country_codes))
                          & (pp_df['type_g'] == tech_config['jrc_type']) &
                          (pp_df["status_g"] == 'COMMISSIONED')]
        # Remove plants whose commissioning year goes back further than specified year.
        if 'comm_year_threshold' in tech_config:
            pp_df = pp_df[~(
                pp_df['year_commissioned'] < tech_config['comm_year_threshold']
            )]

        # Column renaming for consistency across different datasets.
        pp_df.rename(columns={
            'capacity_p': 'Capacity',
            'name_p': 'Name'
        },
                     inplace=True)

    # Filter out plants in countries with additional constraints (e.g., nuclear decommissioning in DE)
    if 'countries_out' in tech_config:
        pp_df = pp_df[~pp_df['ISO2'].isin(tech_config['countries_out'])]
    pp_df['Name'] = pp_df['Name'].apply(unidecode)

    return pp_df[['Name', 'Capacity', 'ISO2', 'lon', 'lat']]
Exemplo n.º 5
0
def generate_eu_hydro_files(resolution: float, topology_unit: str,
                            timestamps: pd.DatetimeIndex):
    """
     Generating hydro files, i.e., capacities and inflows.

     Parameters
     ----------
     resolution: float
         Runoff data spatial resolution.
     topology_unit: str
         Topology in use ('countries', 'NUTS2', 'NUTS3').
     timestamps: pd.DatetimeIndex
         Time horizon for which inflows are computed.

     """

    assert topology_unit in ["countries", "NUTS2", "NUTS3"], "Error: requested topology_unit not available."

    # Load shapes based on topology
    if topology_unit == 'countries':
        shapes = get_natural_earth_shapes()
    else:  # topology in ['NUTS2', 'NUTS3']
        shapes = get_nuts_shapes(topology_unit[-1:])
    shapes_countries = replace_iso2_codes([code[:2] for code in shapes.index])
    countries = sorted(list(set(shapes_countries)))

    tech_dir = f"{data_path}technologies/"
    tech_config = yaml.load(open(join(tech_dir, 'tech_config.yml')), Loader=yaml.FullLoader)

    # Runoff data
    runoff_dataset = read_runoff_data(resolution, timestamps)

    # Find to which nuts region each of the runoff points belong
    runoff_points_region_ds = \
        match_points_to_regions(runoff_dataset.locations.values, shapes, keep_outside=False).dropna()
    logger.info('Runoff measurement points mapped to regions shapes.')

    def add_region_code(pp_df: pd.DataFrame):
        if topology_unit == "countries":
            pp_df['region_code'] = pp_df["ISO2"]
        else:
            pp_df['region_code'] = match_powerplants_to_regions(pp_df, shapes, shapes_countries)
            pp_df = pp_df[~pp_df['region_code'].isnull()]
        return pp_df

    # Build ROR data
    # Get all ROR powerplants in the countries of interest and add region name
    logging.info('Building ROR data')
    ror_plants_df = get_powerplants('ror', countries)
    ror_plants_df = add_region_code(ror_plants_df)
    # Get capacity and inflow per region (for which inflow data exists)
    ror_capacity_ds, ror_inflows_df = build_ror_data(ror_plants_df.set_index(["region_code"])["Capacity"], timestamps,
                                                     runoff_dataset, runoff_points_region_ds)

    # Build STO data
    logging.info('Building STO data')
    sto_plants_df = get_powerplants('sto', countries)
    sto_plants_df = add_region_code(sto_plants_df)
    sto_capacity_df, sto_inflows_df, sto_multipliers_ds = \
        build_sto_data(sto_plants_df.set_index(["region_code"])["Capacity"], timestamps,
                       runoff_dataset, runoff_points_region_ds, ror_capacity_ds, ror_inflows_df)

    # Build PHS data
    logging.info('Building PHS data')
    default_phs_duration = tech_config['phs']['default_duration']

    phs_plants_df = get_powerplants('phs', countries)
    phs_plants_df = add_region_code(phs_plants_df)
    phs_capacity_df = build_phs_data(phs_plants_df, default_phs_duration)

    # Merge capacities DataFrame.
    capacities_df = pd.concat([ror_capacity_ds, sto_capacity_df, phs_capacity_df], axis=1, sort=True).round(3)
    capacities_df.columns = ['ROR_CAP [GW]', 'STO_CAP [GW]', 'STO_EN_CAP [GWh]', 'PSP_CAP [GW]', 'PSP_EN_CAP [GWh]']
    capacities_df.replace(0., np.nan, inplace=True)
    capacities_df.dropna(how='all', inplace=True)
    ror_inflows_df = ror_inflows_df[capacities_df['ROR_CAP [GW]'].dropna().index]
    sto_inflows_df = sto_inflows_df[capacities_df['STO_CAP [GW]'].dropna().index]

    # Saving files
    save_dir = f"{data_path}generation/hydro/generated/"
    capacities_df.to_csv(f"{save_dir}hydro_capacities_per_{topology_unit}.csv")
    ror_inflows_df.to_csv(f"{save_dir}hydro_ror_time_series_per_{topology_unit}_pu.csv")
    sto_inflows_df.to_csv(f"{save_dir}hydro_sto_inflow_time_series_per_{topology_unit}_GWh.csv")
    sto_multipliers_ds.to_csv(f"{save_dir}hydro_sto_multipliers_per_{topology_unit}.csv", header=['multiplier'])
    logger.info('Files saved to disk.')
Exemplo n.º 6
0
def build_sto_data(sto_capacity_ds: pd.Series, timestamps: pd.DatetimeIndex,
                   runoff_dataset: xr.Dataset, runoff_points_region_ds: pd.Series,
                   ror_capacity_ds: pd.Series, ror_inflows_df: pd.DataFrame) \
        -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series]:
    """
    Compute total STO power (GW) and energy( (GWh) capacities and inflow (GWh) for a series of regions.

    Parameters
    ----------
    sto_capacity_ds: pd.Series
        Series containing STO power (GW) capacity per plant, indexed by the region in which the plant is located.
    timestamps: pd.DatetimeIndex
        Time stamps over which the inflows must be computed.
    runoff_dataset: xr.Dataset
        ERA5 runoff dataset with area per dataset point.
    runoff_points_region_ds: pd.Series
        Indicates in which region each ERA5 point falls.
    ror_inflows_df: pd.DataFrame
        Data frame with ROR (p.u.) capacity factors for each geographical unit across the time horizon considered.
    ror_capacity_ds: pd.Series
        Series with ROR hydro capacities (GW) for each geographical unit considered.

    Returns
    -------
    sto_capacity_df: pd.DataFrame
        Series containing STO power (GW) capacity per region.
    sto_inflows_df: pd.DataFrame
        STO inflow time-series (GWh) for each region.
    sto_multipliers_ds: pd.Series
         STO multipliers per country.
    """
    sto_capacity_ds = sto_capacity_ds.groupby(sto_capacity_ds.index).sum() * 1e-3

    # Compute energy capacity of STO plants by regions
    storage_capacities = compute_storage_capacities(sto_capacity_ds)
    sto_capacity_df = pd.concat([sto_capacity_ds, storage_capacities], axis=1, ignore_index=True, sort=True)
    sto_capacity_df.columns = ['Capacity', 'Energy']
    # Some regions can end up without any storage capacities
    sto_capacity_df = sto_capacity_df.dropna()

    # STO inflow (in GWh)
    sto_inflows_df = pd.DataFrame(index=timestamps, columns=sto_capacity_df.index)
    for region in sto_capacity_df.index:
        points = runoff_points_region_ds[runoff_points_region_ds == region].index.to_list()
        if points:
            sto_inflows_df[region] = compute_sto_inflows(runoff_dataset, points).values
    sto_inflows_df.dropna(axis=1, inplace=True)
    missing_inflows_indexes = ~sto_capacity_df.index.isin(sto_inflows_df.columns)
    missing_sto_gw = sto_capacity_df.loc[missing_inflows_indexes]['Capacity'].dropna().sum()
    missing_sto_gwh = sto_capacity_df.loc[missing_inflows_indexes]['Energy'].dropna().sum()
    sto_capacity_df = sto_capacity_df.loc[sto_inflows_df.columns]
    logger.info(f'STO inflows computed., '
                f'{missing_sto_gw} GW / {missing_sto_gwh} GWh removed because '
                f'of ERA5 point unavailability in regions.')

    # Compute STO multipliers
    years = list(timestamps.year.unique())
    countries = replace_iso2_codes(list(set([code[:2] for code in sto_inflows_df.columns])))
    sto_multipliers_ds = compute_countries_sto_multipliers(years, countries, sto_inflows_df,
                                                           ror_inflows_df, ror_capacity_ds)

    # Apply multipliers to STO inflows
    for nuts in sto_inflows_df.columns:
        sto_inflows_df[nuts] *= sto_multipliers_ds[nuts[:2]]

    return sto_capacity_df, sto_inflows_df, sto_multipliers_ds
Exemplo n.º 7
0
def get_nuts_storage_distribution_from_grand(nuts_codes: List[str]) -> pd.Series:
    """
     Estimating STO energy storage distribution per NUTS sub-divisions.

     Parameters
     ----------
     nuts_codes: List[str]
         List of NUTS (e.g., "NUTS2", "NUTS3") codes for which data is retrieved.

     Returns
     -------
     storage_distribution_ds: pd.DataFrame
         DataFrame containing STO energy storage distribution keys per NUTS regions.

    """

    assert len(nuts_codes) != 0, "Error: Empty list of NUTS codes."

    # Read GRanD database
    source_dir = f"{data_path}generation/hydro/source/GDW/GRanD_Version_1_3/"
    grand_reservoirs_fn = f"{source_dir}GRanD_reservoirs_v1_3.shp"
    reservoirs_df = pd.DataFrame(gpd.read_file(grand_reservoirs_fn)).set_index('GRAND_ID')
    # A particular reservoir is manually removed (others could follow). The Vanern lake (SE) is labeled as a reservoir
    # with hydro power activities, though information online suggests otherwise. Its presence in the associated NUTS
    # region leads to inconsistencies in the distribution of Swedish storage potential across the country.
    reservoirs_df = reservoirs_df[reservoirs_df['RES_NAME'] != 'Vanern']

    # Get NUTS0, ISO2 and countries names of the countries which NUTS regions are part of
    nuts0_codes = list(set([nuts[:2] for nuts in nuts_codes]))
    iso2_codes = replace_iso2_codes(nuts0_codes)
    countries_names = convert_country_codes(iso2_codes, 'alpha_2', 'name', True)

    # Get NUTS region shapes
    shapes = get_nuts_shapes(str(len(nuts_codes[0]) - 2), nuts_codes)
    shapes_countries = replace_iso2_codes([c[:2] for c in shapes.index])

    # Filtering out reservoirs whose purpose is not for hydro power generation.
    reservoirs_df["COUNTRY"] = reservoirs_df["COUNTRY"].apply(convert_old_country_names)
    reservoirs_hydropower_df = reservoirs_df[(reservoirs_df['USE_ELEC'].isin(['Main', 'Sec', 'Major'])) &
                                             (reservoirs_df['COUNTRY'].isin(countries_names))].copy()

    # Associating each plant to the corresponding NUTS region
    reservoirs_hydropower_df["ISO2"] = \
        convert_country_codes(reservoirs_hydropower_df['COUNTRY'], 'name', 'alpha_2', True)
    reservoirs_hydropower_df["region_code"] = \
        match_powerplants_to_regions(reservoirs_hydropower_df.rename(columns={'LONG_DD': 'lon', 'LAT_DD': 'lat'}),
                                     shapes, shapes_countries)
    reservoirs_hydropower_df = reservoirs_hydropower_df[~reservoirs_hydropower_df['region_code'].isnull()]

    # Aggregating storage capacity per NUTS region
    storage_by_nuts_ds = reservoirs_hydropower_df.groupby(by=reservoirs_hydropower_df['region_code'])['CAP_MCM'].sum()

    # Computing storage distribution keys per NUTS by dividing the capacity
    # per NUTS by the total capacity of all NUTS in the same country.
    storage_distribution_ds = pd.Series()
    for nuts0_code, iso2_code in zip(nuts0_codes, iso2_codes):
        storage_sum_per_country = \
            reservoirs_hydropower_df[reservoirs_hydropower_df['ISO2'] == iso2_code]['CAP_MCM'].sum()
        storage_ds_temp = storage_by_nuts_ds[storage_by_nuts_ds.index.str.contains(nuts0_code)]
        storage_ds_temp /= storage_sum_per_country
        storage_distribution_ds = storage_distribution_ds.append(storage_ds_temp)

    return storage_distribution_ds