def read_worldbank(model="World Bank", scenario="WDI", **kwargs): """Read data from the World Bank Data Catalogue and return as IamDataFrame This function is a simple wrapper for the class :class:`pandas_datareader.wb.WorldBankReader` and the function :func:`pandas_datareader.wb.download`. Import the module to retrieve/search the list of indicators (and their id's), countries, etc. .. code-block:: python from pandas_datareader import wb Parameters ---------- model : str, optional The `model` name to be used for the returned timeseries data. scenario : str, optional The `scenario` name to be used for the returned timeseries data. kwargs passed to :func:`pandas_datareader.wb.download` Notes ----- The function :func:`pandas_datareader.wb.download` takes an `indicator` argument, which can be a string or list of strings. If the `indicator` passed to :func:`read_worldbank` is a dictionary of a World Bank id mapped to a string, the variables in the returned IamDataFrame will be renamed. The function :func:`pandas_datareader.wb.download` does not return a unit, but it can be collected for some indicators using the function :func:`pandas_datareader.wb.get_indicators`. In the current implementation, unit is defined as `n/a` for all data; this can be enhanced later (if there is interest from users). Returns ------- :class:`IamDataFrame` """ if not HAS_DATAREADER: # pragma: no cover raise ImportError("Required package `pandas-datareader` not found!") data = wb.download(**kwargs) df = IamDataFrame( data.reset_index(), model=model, scenario=scenario, value=data.columns, unit="n/a", region="country", ) # TODO use wb.get_indicators to retrieve corrent units (where available) # if `indicator` is a mapping, use it for renaming if "indicator" in kwargs and isinstance(kwargs["indicator"], dict): df.rename(variable=kwargs["indicator"], inplace=True) return df
def test_init_from_iamdf(test_df_year): # casting an IamDataFrame instance again works df = IamDataFrame(test_df_year) # inplace-operations on the new object have effects on the original object df.rename(scenario={'scen_a': 'scen_foo'}, inplace=True) assert test_df_year.scenario == ['scen_b', 'scen_foo'] # overwrites on the new object do not have effects on the original object df = df.rename(scenario={'scen_foo': 'scen_bar'}) assert df.scenario == ['scen_b', 'scen_bar'] assert test_df_year.scenario == ['scen_b', 'scen_foo']
def _validate(df: pyam.IamDataFrame) -> pyam.IamDataFrame: """Validation function for variables, regions, and subannual time resolution""" # load definitions (including 'subannual' if included in the scenario data) if "subannual" in df.dimensions or df.time_col == "time": dimensions = ["region", "variable", "subannual"] else: dimensions = ["region", "variable"] definition = DataStructureDefinition(here / "definitions", dimensions=dimensions) # apply a renaming from region-synonyms to region-names rename_dict = {} for region, attibutes in definition.region.items(): for synonym in ["abbr", "iso3"]: if synonym in attibutes: rename_dict[attibutes[synonym]] = region df.rename(region=rename_dict, inplace=True) # check variables and regions definition.validate(df, dimensions=["region", "variable"]) # convert to subannual format if data provided in datetime format if df.time_col == "time": logger.info( 'Re-casting from "time" column to categorical "subannual" format') df = df.swap_time_for_year(subannual=OE_SUBANNUAL_FORMAT) # check that any datetime-like items in "subannual" are valid datetime and UTC+01:00 if "subannual" in df.dimensions: _datetime = [s for s in df.subannual if s not in definition.subannual] for d in _datetime: try: _dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z") except ValueError: try: datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M") except ValueError: raise ValueError(f"Invalid subannual timeslice: {d}") raise ValueError(f"Missing timezone: {d}") # casting to datetime with timezone was successful if not (_dt.tzname() == EXP_TZ or _dt.utcoffset() == EXP_TIME_OFFSET): raise ValueError(f"Invalid timezone: {d}") return df
def test_rename(): df = IamDataFrame( pd.DataFrame( [ ['model', 'scen', 'SST', 'test_1', 'unit', 1, 5], ['model', 'scen', 'SDN', 'test_2', 'unit', 2, 6], ['model', 'scen', 'SST', 'test_3', 'unit', 3, 7], ], columns=[ 'model', 'scenario', 'region', 'variable', 'unit', 2005, 2010 ], )) mapping = {'variable': {'test_1': 'test', 'test_3': 'test'}} obs = df.rename(mapping).data.reset_index(drop=True) exp = IamDataFrame( pd.DataFrame( [ ['model', 'scen', 'SST', 'test', 'unit', 4, 12], ['model', 'scen', 'SDN', 'test_2', 'unit', 2, 6], ], columns=[ 'model', 'scenario', 'region', 'variable', 'unit', 2005, 2010 ], )).data.sort_values(by='region').reset_index(drop=True) pd.testing.assert_frame_equal(obs, exp, check_index_type=False)
def recursive_df(request): data = (RECURSIVE_DF if request.param == "year" else RECURSIVE_DF.rename( DTS_MAPPING, axis="columns")) df = IamDataFrame(data, model="model_a", scenario="scen_a", region="World") df2 = df.rename(scenario={"scen_a": "scen_b"}) df2._data *= 2 df.append(df2, inplace=True) yield df
def test_aggregate_recursive(time_col): # use the feature `recursive=True` data = (RECURSIVE_DF if time_col == "year" else RECURSIVE_DF.rename( DTS_MAPPING, axis="columns")) df = IamDataFrame(data, model="model_a", scenario="scen_a", region="World") df2 = df.rename(scenario={"scen_a": "scen_b"}) df2.data.value *= 2 df.append(df2, inplace=True) # create object without variables to be aggregated v = "Secondary Energy|Electricity" agg_vars = [f"{v}{i}" for i in ["", "|Wind"]] df_minimal = df.filter(variable=agg_vars, keep=False) # return recursively aggregated data as new object obs = df_minimal.aggregate(variable=v, recursive=True) assert_iamframe_equal(obs, df.filter(variable=agg_vars)) # append to `self` df_minimal.aggregate(variable=v, recursive=True, append=True) assert_iamframe_equal(df_minimal, df)
def test_aggregate_recursive(time_col): # use the feature `recursive=True` data = RECURSIVE_DF if time_col == 'year' \ else RECURSIVE_DF.rename(DTS_MAPPING, axis='columns') df = IamDataFrame(data, model='model_a', scenario='scen_a', region='World') df2 = df.rename(scenario={'scen_a': 'scen_b'}) df2.data.value *= 2 df.append(df2, inplace=True) # create object without variables to be aggregated v = 'Secondary Energy|Electricity' agg_vars = [f'{v}{i}' for i in ['', '|Wind']] df_minimal = df.filter(variable=agg_vars, keep=False) # return recursively aggregated data as new object obs = df_minimal.aggregate(variable=v, recursive=True) assert_iamframe_equal(obs, df.filter(variable=agg_vars)) # append to `self` df_minimal.aggregate(variable=v, recursive=True, append=True) assert_iamframe_equal(df_minimal, df)