def test_rename_duplicates(): mapping = {'variable': {'test_1': 'test_3'}} pytest.raises(ValueError, RENAME_DF.rename, **mapping) obs = RENAME_DF.rename(check_duplicates=False, **mapping) exp = IamDataFrame( pd.DataFrame( [ ['model', 'scen', 'region_a', 'test_2', 'unit', 2, 6], ['model', 'scen', 'region_a', 'test_3', 'unit', 4, 12], ['model', 'scen', 'region_b', 'test_3', 'unit', 4, 8], ], columns=IAMC_IDX + [2005, 2010], )) assert compare(obs, exp).empty pd.testing.assert_frame_equal(obs.data, exp.data)
def test_swap_time_to_year_errors(test_df): """Assert that swapping time column for year (int) raises the expected errors""" # swapping time to year raises when the IamDataFrame has time domain `year` if test_df.time_col == "year": match = "Time domain must be datetime to use this method" with pytest.raises(ValueError, match=match): test_df.swap_time_for_year() else: # set time column to same year so that dropping month/day leads to duplicates tdf = test_df.data tdf["time"] = tdf["time"].apply( lambda x: datetime(2005, x.month, x.day)) with pytest.raises(ValueError, match="Swapping time for year causes duplicate"): IamDataFrame(tdf).swap_time_for_year()
def test_cast_from_value_col_and_args(meta_df): # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210) df_with_value_cols = pd.DataFrame([ ['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5], ['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3], ['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None], ['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None] ], columns=['scenario', 'iso', 'unit', 'time', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, model='model_a', region='iso', value=['Primary Energy', 'Primary Energy|Coal']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_rename_duplicates(): mapping = {"variable": {"test_1": "test_3"}} pytest.raises(ValueError, RENAME_DF.rename, **mapping) obs = RENAME_DF.rename(check_duplicates=False, **mapping) exp = IamDataFrame( pd.DataFrame( [ ["model", "scen", "region_a", "test_2", "unit", 2, 6], ["model", "scen", "region_a", "test_3", "unit", 4, 12], ["model", "scen", "region_b", "test_3", "unit", 4, 8], ], columns=IAMC_IDX + [2005, 2010], )) assert compare(obs, exp).empty pd.testing.assert_frame_equal(obs.data, exp.data)
def test_derive_relationship_int_years(self): # The code should be able to use ether int or int64 years times = [2010, 2020, 2030] regular_db = IamDataFrame( pd.DataFrame( [[_ma, _sa + str(val), "World", _eco2, _gtc, val, val, val] for val in range(4)], columns=_msrvu + times, )) tcruncher = self.tclass(regular_db) quantile_dict = {times[0]: 0.4, times[1]: 0.9, times[2]: 0.01} res = tcruncher.derive_relationship( "Emissions|CO2", ["Emissions|CO2"], quantile_dict, ) crunched = res(regular_db) assert len(crunched["value"]) == len( regular_db.filter(variable="Emissions|CO2")["value"])
def test_relationship_usage(self, test_db, simple_df, add_col): tcruncher = self.tclass(test_db) lead = ["Emissions|CO2"] follow = "Emissions|CH4" simple_df = self._adjust_time_style_to_match(simple_df, test_db) res = tcruncher.derive_relationship(follow, lead) if add_col: add_col_val = "blah" simple_df[add_col] = add_col_val simple_df = IamDataFrame(simple_df.data) assert simple_df.extra_cols[0] == add_col infilled = res(simple_df) # We compare the results with the expected results: for T1, we are below the # lower limit on the first, in the middle on the second scenario. At later times # we are always above the highest value. time_filter = {infilled.time_col: [infilled[infilled.time_col][0]]} sorted_follow_t0 = np.sort( test_db.filter(variable=follow, **time_filter)["value"].values) assert np.allclose( infilled.filter(**time_filter)["value"].values, [ sorted_follow_t0[0], sorted_follow_t0[1], ], ) for time_ind in range(1, 3): time_filter = { infilled.time_col: [infilled[infilled.time_col][time_ind]] } assert np.allclose( infilled.filter(**time_filter)["value"].values, max( test_db.filter(variable=follow).filter( **time_filter)["value"].values), ) # Test we can append our answer append_df = simple_df.filter(variable=lead).append(infilled) assert append_df.filter(variable=follow).equals(infilled) if add_col: assert all(append_df[add_col] == add_col_val)
def test_cast_from_value_col_and_args(meta_df): # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210) df_with_value_cols = pd.DataFrame( [['scen_a', 'World', 'EJ/y', 2005, 1, 0.5], ['scen_a', 'World', 'EJ/y', 2010, 6., 3], ['scen_b', 'World', 'EJ/y', 2005, 2, None], ['scen_b', 'World', 'EJ/y', 2010, 7, None]], columns=[ 'scenario', 'iso', 'unit', 'year', 'Primary Energy', 'Primary Energy|Coal' ], ) df = IamDataFrame(df_with_value_cols, model='model_a', region='iso', value=['Primary Energy', 'Primary Energy|Coal']) assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data)
def test_swap_time_to_year(test_df, inplace): if "year" in test_df.data: return # year df not relevant for this test exp = test_df.data.copy() exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") exp = IamDataFrame(exp) obs = test_df.swap_time_for_year(inplace=inplace) if inplace: assert obs is None obs = test_df assert compare(obs, exp).empty assert obs.year == [2005, 2010] with pytest.raises(AttributeError): obs.time
def test_swap_time_to_year(test_df, inplace): """Swap time column for year (int) dropping subannual time resolution (default)""" if test_df.time_col == "year": pytest.skip( "IamDataFrame with time domain `year` not relevant for this test.") exp = test_df.data exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") exp = IamDataFrame(exp, meta=test_df.meta) obs = test_df.swap_time_for_year(inplace=inplace) if inplace: assert obs is None obs = test_df assert_iamframe_equal(obs, exp) pdt.assert_index_equal(obs.time, pd.Index([2005, 2010], name="time"))
def test_divide_scenario(test_df_year, append): """Verify that in-dataframe addition works on a custom axis (`scenario`)""" v = ("scen_a", "scen_b", "scen_ratio") exp = IamDataFrame( pd.DataFrame([1 / 2, 6 / 7], index=[2005, 2010]).T, model="model_a", scenario=v[2], region="World", variable="Primary Energy", unit="", ) if append: obs = test_df_year.copy() obs.divide(*v, axis="scenario", append=True) assert_iamframe_equal(test_df_year.append(exp), obs) else: obs = test_df_year.divide(*v, axis="scenario") assert_iamframe_equal(exp, obs)
def test_relationship_usage_wrong_time_col(self, test_db, test_downscale_df): test_db = test_db.filter( variable=["Emissions|HFC|C5F12", "Emissions|HFC|C2F6"]) tcruncher = self.tclass(test_db) filler = tcruncher.derive_relationship("Emissions|HFC|C5F12", ["Emissions|HFC|C2F6"]) if test_db.time_col == "year": test_downscale_df = test_downscale_df.timeseries() test_downscale_df.columns = test_downscale_df.columns.map( lambda x: dt.datetime(x, 1, 1)) test_downscale_df = IamDataFrame(test_downscale_df) error_msg = re.escape( "`in_iamdf` time column must be the same as the time column used " "to generate this filler function (`{}`)".format(test_db.time_col)) with pytest.raises(ValueError, match=error_msg): filler(test_downscale_df)
def test_48a(): # tests fix for #48 mapping many->few df = IamDataFrame(pd.DataFrame([ ['model', 'scen', 'SSD', 'var', 'unit', 1, 6], ['model', 'scen', 'SDN', 'var', 'unit', 2, 7], ['model', 'scen1', 'SSD', 'var', 'unit', 2, 7], ['model', 'scen1', 'SDN', 'var', 'unit', 2, 7], ], columns=['model', 'scenario', 'region', 'variable', 'unit', 2005, 2010], )) exp = _r5_regions_exp(df) columns = df.data.columns grp = list(columns) grp.remove('value') exp = exp.groupby(grp).sum().reset_index() exp = exp[columns] obs = df.map_regions('r5_region', region_col='iso', agg='sum').data pd.testing.assert_frame_equal(obs, exp, check_index_type=False)
def test_aggregate_recursive(time_col): # use the feature `recursive=True` data = RECURSIVE_DF if time_col == 'year' \ else RECURSIVE_DF.rename(DTS_MAPPING, axis='columns') df = IamDataFrame(data, model='model_a', scenario='scen_a', region='World') df2 = df.rename(scenario={'scen_a': 'scen_b'}) df2.data.value *= 2 df.append(df2, inplace=True) # create object without variables to be aggregated v = 'Secondary Energy|Electricity' agg_vars = [f'{v}{i}' for i in ['', '|Wind']] df_minimal = df.filter(variable=agg_vars, keep=False) # return recursively aggregated data as new object obs = df_minimal.aggregate(variable=v, recursive=True) assert_iamframe_equal(obs, df.filter(variable=agg_vars)) # append to `self` df_minimal.aggregate(variable=v, recursive=True, append=True) assert_iamframe_equal(df_minimal, df)
def test_aggregate_recursive(time_col): # use the feature `recursive=True` data = (RECURSIVE_DF if time_col == "year" else RECURSIVE_DF.rename( DTS_MAPPING, axis="columns")) df = IamDataFrame(data, model="model_a", scenario="scen_a", region="World") df2 = df.rename(scenario={"scen_a": "scen_b"}) df2.data.value *= 2 df.append(df2, inplace=True) # create object without variables to be aggregated v = "Secondary Energy|Electricity" agg_vars = [f"{v}{i}" for i in ["", "|Wind"]] df_minimal = df.filter(variable=agg_vars, keep=False) # return recursively aggregated data as new object obs = df_minimal.aggregate(variable=v, recursive=True) assert_iamframe_equal(obs, df.filter(variable=agg_vars)) # append to `self` df_minimal.aggregate(variable=v, recursive=True, append=True) assert_iamframe_equal(df_minimal, df)
def test_extreme_values_relationship(self): # Our cruncher has a closest-point extrapolation algorithm and therefore # should return the same values when filling for data outside tht limits of # its cruncher # Calculate the values using the cruncher for a fairly detailed dataset large_db_int = IamDataFrame(self.large_db) tcruncher = self.tclass(large_db_int) follow = "Emissions|CH4" lead = ["Emissions|CO2"] res = tcruncher.derive_relationship(follow, lead) crunched = res(large_db_int) # Increase the maximum values modify_extreme_db = large_db_int.filter( variable="Emissions|CO2").copy() max_scen = modify_extreme_db["scenario"].loc[ modify_extreme_db["value"] == max(modify_extreme_db["value"])] ind = modify_extreme_db["value"].idxmax modify_extreme_db["value"].loc[ind] += 10 extreme_crunched = res(modify_extreme_db) # Check results are the same assert crunched.equals(extreme_crunched) # Also check that the results are correct assert crunched.filter(scenario=max_scen)["value"].iloc[0] == max( large_db_int.filter(variable=follow)["value"].values) # Repeat with reducing the minimum value. This works differently because the # minimum point is doubled. This modification causes the cruncher to pick the # lower value. min_scen = modify_extreme_db["scenario"].loc[ modify_extreme_db["value"] == min(modify_extreme_db["value"])] ind = modify_extreme_db["value"].idxmin modify_extreme_db["value"].loc[ind] -= 10 extreme_crunched = res(modify_extreme_db) assert crunched.filter(scenario=min_scen)["value"].iloc[0] != min( large_db_int.filter(variable=follow)["value"].values) assert extreme_crunched.filter( scenario=min_scen)["value"].iloc[0] == min( large_db_int.filter(variable=follow)["value"].values)
def test_relationship_usage(self, test_downscale_df, add_col): units = "new units" tcruncher = self.tclass() test_downscale_df = test_downscale_df.filter(year=[2010, 2015]) if add_col: # what should happen if there's more than one value in the `add_col`? add_col_val = "blah" test_downscale_df[add_col] = add_col_val test_downscale_df = IamDataFrame(test_downscale_df.data) assert test_downscale_df.extra_cols[0] == add_col lead = ["Emissions|HFC|C2F6"] follow = "Emissions|HFC|C5F12" filler = tcruncher.derive_relationship(follow, lead, ratio=2, units=units) res = filler(test_downscale_df) exp = test_downscale_df.filter(variable=lead) exp.data["variable"] = follow exp.data["value"] = exp.data["value"] * 2 exp.data["unit"] = units pd.testing.assert_frame_equal(res.timeseries(), exp.timeseries(), check_like=True) # comes back on input timepoints np.testing.assert_array_equal( res.timeseries().columns.values.squeeze(), test_downscale_df.timeseries().columns.values.squeeze(), ) # Test we can append the results correctly append_df = test_downscale_df.append(res) assert append_df.filter(variable=follow).equals(res) if add_col: assert all(append_df.filter(variable=lead)[add_col] == add_col_val)
def test_interpolate_extra_cols(): # check hat interpolation with non-matching extra_cols has no effect (#351) EXTRA_COL_DF = pd.DataFrame( [ ['foo', 2005, 1], ['bar', 2010, 3], ], columns=['extra_col', 'year', 'value'], ) df = IamDataFrame(EXTRA_COL_DF, model='model_a', scenario='scen_a', region='World', variable='Primary Energy', unit='EJ/yr') # create a copy, interpolate df2 = df.copy() df2.interpolate(2007) # assert that interpolation didn't change any data assert_iamframe_equal(df, df2)
def test_relationship_usage(self, simple_df, add_col): tcruncher = self.tclass(simple_df) lead = ["Emissions|CH4"] follow = "Emissions|CO2" res = tcruncher.derive_relationship(follow, lead, required_scenario="scen_a") if add_col: add_col_val = "blah" simple_df[add_col] = add_col_val simple_df = IamDataFrame(simple_df.data) assert simple_df.extra_cols[0] == add_col expect_00 = res(simple_df) assert expect_00.filter(scenario="scen_a", year=2010)["value"].iloc[0] == 0 assert expect_00.filter(scenario="scen_b", year=2010)["value"].iloc[0] == 0 assert all(expect_00.filter(year=2030)["value"] == 1000) assert all(expect_00.filter(year=2050)["value"] == 5000) # If we include data from scen_b, we then get a slightly different answer res = tcruncher.derive_relationship( "Emissions|CO2", ["Emissions|CH4"], required_scenario=["scen_a", "scen_b"]) expect_01 = res(simple_df) assert expect_01.filter(scenario="scen_a", year=2010)["value"].iloc[0] == 0 assert expect_01.filter(scenario="scen_b", year=2010)["value"].iloc[0] == 1 assert all(expect_01.filter(year=2030)["value"] == 1000) assert all(expect_01.filter(year=2050)["value"] == 5000) # Test we can append our answer append_df = simple_df.filter(variable=lead).append(expect_01) assert append_df.filter(variable=follow).equals(expect_01) if add_col: assert all(append_df[add_col] == add_col_val)
def test_swap_time_to_year(test_df, inplace): """Swap time column for year (int) dropping subannual time resolution (default)""" if test_df.time_col == "year": pytest.skip( "IamDataFrame with time domain `year` not relevant for this test.") exp = test_df.data exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") exp = IamDataFrame(exp, meta=test_df.meta) obs = test_df.swap_time_for_year(inplace=inplace) if inplace: assert obs is None obs = test_df assert_iamframe_equal(obs, exp) match = "'IamDataFrame' object has no attribute 'time'" with pytest.raises(AttributeError, match=match): obs.time
def test_derive_relationship_averaging_info(self, test_db, extra_info): # test that crunching uses average values if there's more than a single point # in the latest year for the lead gas in the database variable_follower = "Emissions|HFC|C5F12" variable_leader = ["Emissions|HFC|C2F6"] tdb = test_db.filter(variable=variable_follower, keep=False) tcruncher = self.tclass( self._join_iamdfs_time_wrangle(tdb, IamDataFrame(extra_info))) cruncher = tcruncher.derive_relationship(variable_follower, variable_leader) lead_db = test_db.filter(variable=variable_leader) infilled = cruncher(lead_db) # In both cases, the average follower value at the latest time is 2. We divide # by the value in 2015, which we have data for in both cases. lead_db_time = lead_db.data[lead_db.time_col] latest_time = lead_db_time == max(lead_db_time) expected = (2 * lead_db.data["value"] / lead_db.data["value"].loc[latest_time].values) assert np.allclose(infilled.data["value"], expected) # Test that the result can be appended without problems. lead_db.append(infilled, inplace=True) assert lead_db.filter(variable=variable_follower).equals(infilled)
def test_cast_from_value_col(test_df_year): df_with_value_cols = pd.DataFrame( [ ["model_a", "scen_a", "World", "EJ/yr", 2005, 1, 0.5], ["model_a", "scen_a", "World", "EJ/yr", 2010, 6.0, 3], ["model_a", "scen_b", "World", "EJ/yr", 2005, 2, None], ["model_a", "scen_b", "World", "EJ/yr", 2010, 7, None], ], columns=[ "model", "scenario", "region", "unit", "year", "Primary Energy", "Primary Energy|Coal", ], ) df = IamDataFrame(df_with_value_cols, value=["Primary Energy", "Primary Energy|Coal"]) assert compare(test_df_year, df).empty pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
def test_48a(): # tests fix for #48 mapping many->few df = IamDataFrame( pd.DataFrame( [ ["model", "scen", "SSD", "var", "unit", 1, 6], ["model", "scen", "SDN", "var", "unit", 2, 7], ["model", "scen1", "SSD", "var", "unit", 2, 7], ["model", "scen1", "SDN", "var", "unit", 2, 7], ], columns=["model", "scenario", "region", "variable", "unit", 2005, 2010], ) ) exp = _r5_regions_exp(df) columns = df.data.columns grp = list(columns) grp.remove("value") exp = exp.groupby(grp).sum().reset_index() exp = exp[columns] obs = df.map_regions("r5_region", region_col="iso", agg="sum").data pd.testing.assert_frame_equal(obs, exp, check_index_type=False)
def test_filter_index_with_custom_index(test_pd_df): # rename 'model' column and add a version column to the dataframe test_pd_df.rename(columns={"model": "source"}, inplace=True) test_pd_df["version"] = [1, 2, 3] index = ["source", "scenario", "version"] df = IamDataFrame(test_pd_df, index=index) obs = df.filter(index=[("model_a", "scen_a", 1), ("model_a", "scen_a", 2)]) assert ( obs.source == ["model_a"] and obs.scenario == ["scen_a"] and obs.version == [1, 2] ) # a sub-set of levels is also supported index = pd.MultiIndex.from_tuples( [("model_a", "scen_a")], names=["source", "scenario"] ) obs = df.filter(index=index) assert ( obs.source == ["model_a"] and obs.scenario == ["scen_a"] and obs.version == [1, 2] )
def test_load_rcp_database_downloaded_file(test_df_year): exp = test_df_year.filter(**FILTER_ARGS).as_pandas() obs_df = IamDataFrame( os.path.join(TEST_DATA_DIR, 'test_RCP_database_raw_download.xlsx')) pd.testing.assert_frame_equal(obs_df.as_pandas(), exp)
def plot_stackplot_df(): df = IamDataFrame(TEST_STACKPLOT_DF) yield df
def plot_df(): df = IamDataFrame(data=os.path.join(TEST_DATA_DIR, "plot_data.csv")) yield df
def reg_df(): df = IamDataFrame(data=REG_DF) yield df
def test_df_year(): df = IamDataFrame(data=TEST_DF) for i in META_COLS: df.set_meta(META_DF[i]) yield df
def test_df(request): df = IamDataFrame(data=TEST_DF.rename(request.param, axis="columns")) for i in META_COLS: df.set_meta(META_DF[i]) yield df
def test_load_RCP_database_downloaded_file(test_df): obs_df = IamDataFrame( os.path.join(TEST_DATA_DIR, 'test_RCP_database_raw_download.xlsx')) pd.testing.assert_frame_equal(obs_df.as_pandas(), test_df.as_pandas())