Пример #1
0
def test_rename_duplicates():
    mapping = {'variable': {'test_1': 'test_3'}}
    pytest.raises(ValueError, RENAME_DF.rename, **mapping)

    obs = RENAME_DF.rename(check_duplicates=False, **mapping)

    exp = IamDataFrame(
        pd.DataFrame(
            [
                ['model', 'scen', 'region_a', 'test_2', 'unit', 2, 6],
                ['model', 'scen', 'region_a', 'test_3', 'unit', 4, 12],
                ['model', 'scen', 'region_b', 'test_3', 'unit', 4, 8],
            ],
            columns=IAMC_IDX + [2005, 2010],
        ))

    assert compare(obs, exp).empty
    pd.testing.assert_frame_equal(obs.data, exp.data)
Пример #2
0
def test_swap_time_to_year_errors(test_df):
    """Assert that swapping time column for year (int) raises the expected errors"""

    # swapping time to year raises when the IamDataFrame has time domain `year`
    if test_df.time_col == "year":
        match = "Time domain must be datetime to use this method"
        with pytest.raises(ValueError, match=match):
            test_df.swap_time_for_year()

    else:
        # set time column to same year so that dropping month/day leads to duplicates
        tdf = test_df.data
        tdf["time"] = tdf["time"].apply(
            lambda x: datetime(2005, x.month, x.day))

        with pytest.raises(ValueError,
                           match="Swapping time for year causes duplicate"):
            IamDataFrame(tdf).swap_time_for_year()
Пример #3
0
def test_cast_from_value_col_and_args(meta_df):
    # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210)
    df_with_value_cols = pd.DataFrame([
        ['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5],
        ['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3],
        ['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None],
        ['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None]
    ],
        columns=['scenario', 'iso', 'unit', 'time',
                 'Primary Energy', 'Primary Energy|Coal'],
    )
    df = IamDataFrame(df_with_value_cols, model='model_a', region='iso',
                      value=['Primary Energy', 'Primary Energy|Coal'])
    if "year" in meta_df.data.columns:
        df = df.swap_time_for_year()

    assert compare(meta_df, df).empty
    pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
Пример #4
0
def test_rename_duplicates():
    mapping = {"variable": {"test_1": "test_3"}}
    pytest.raises(ValueError, RENAME_DF.rename, **mapping)

    obs = RENAME_DF.rename(check_duplicates=False, **mapping)

    exp = IamDataFrame(
        pd.DataFrame(
            [
                ["model", "scen", "region_a", "test_2", "unit", 2, 6],
                ["model", "scen", "region_a", "test_3", "unit", 4, 12],
                ["model", "scen", "region_b", "test_3", "unit", 4, 8],
            ],
            columns=IAMC_IDX + [2005, 2010],
        ))

    assert compare(obs, exp).empty
    pd.testing.assert_frame_equal(obs.data, exp.data)
 def test_derive_relationship_int_years(self):
     # The code should be able to use ether int or int64 years
     times = [2010, 2020, 2030]
     regular_db = IamDataFrame(
         pd.DataFrame(
             [[_ma, _sa + str(val), "World", _eco2, _gtc, val, val, val]
              for val in range(4)],
             columns=_msrvu + times,
         ))
     tcruncher = self.tclass(regular_db)
     quantile_dict = {times[0]: 0.4, times[1]: 0.9, times[2]: 0.01}
     res = tcruncher.derive_relationship(
         "Emissions|CO2",
         ["Emissions|CO2"],
         quantile_dict,
     )
     crunched = res(regular_db)
     assert len(crunched["value"]) == len(
         regular_db.filter(variable="Emissions|CO2")["value"])
Пример #6
0
    def test_relationship_usage(self, test_db, simple_df, add_col):
        tcruncher = self.tclass(test_db)
        lead = ["Emissions|CO2"]
        follow = "Emissions|CH4"
        simple_df = self._adjust_time_style_to_match(simple_df, test_db)
        res = tcruncher.derive_relationship(follow, lead)
        if add_col:
            add_col_val = "blah"
            simple_df[add_col] = add_col_val
            simple_df = IamDataFrame(simple_df.data)
            assert simple_df.extra_cols[0] == add_col

        infilled = res(simple_df)
        # We compare the results with the expected results: for T1, we are below the
        # lower limit on the first, in the middle on the second scenario. At later times
        # we are always above the highest value.
        time_filter = {infilled.time_col: [infilled[infilled.time_col][0]]}
        sorted_follow_t0 = np.sort(
            test_db.filter(variable=follow, **time_filter)["value"].values)
        assert np.allclose(
            infilled.filter(**time_filter)["value"].values,
            [
                sorted_follow_t0[0],
                sorted_follow_t0[1],
            ],
        )
        for time_ind in range(1, 3):
            time_filter = {
                infilled.time_col: [infilled[infilled.time_col][time_ind]]
            }
            assert np.allclose(
                infilled.filter(**time_filter)["value"].values,
                max(
                    test_db.filter(variable=follow).filter(
                        **time_filter)["value"].values),
            )

        # Test we can append our answer
        append_df = simple_df.filter(variable=lead).append(infilled)
        assert append_df.filter(variable=follow).equals(infilled)

        if add_col:
            assert all(append_df[add_col] == add_col_val)
Пример #7
0
def test_cast_from_value_col_and_args(meta_df):
    # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210)
    df_with_value_cols = pd.DataFrame(
        [['scen_a', 'World', 'EJ/y', 2005, 1, 0.5],
         ['scen_a', 'World', 'EJ/y', 2010, 6., 3],
         ['scen_b', 'World', 'EJ/y', 2005, 2, None],
         ['scen_b', 'World', 'EJ/y', 2010, 7, None]],
        columns=[
            'scenario', 'iso', 'unit', 'year', 'Primary Energy',
            'Primary Energy|Coal'
        ],
    )
    df = IamDataFrame(df_with_value_cols,
                      model='model_a',
                      region='iso',
                      value=['Primary Energy', 'Primary Energy|Coal'])

    assert compare(meta_df, df).empty
    pd.testing.assert_frame_equal(df.data, meta_df.data)
Пример #8
0
def test_swap_time_to_year(test_df, inplace):
    if "year" in test_df.data:
        return  # year df not relevant for this test

    exp = test_df.data.copy()
    exp["year"] = exp["time"].apply(lambda x: x.year)
    exp = exp.drop("time", axis="columns")
    exp = IamDataFrame(exp)

    obs = test_df.swap_time_for_year(inplace=inplace)

    if inplace:
        assert obs is None
        obs = test_df

    assert compare(obs, exp).empty
    assert obs.year == [2005, 2010]
    with pytest.raises(AttributeError):
        obs.time
Пример #9
0
def test_swap_time_to_year(test_df, inplace):
    """Swap time column for year (int) dropping subannual time resolution (default)"""

    if test_df.time_col == "year":
        pytest.skip(
            "IamDataFrame with time domain `year` not relevant for this test.")

    exp = test_df.data
    exp["year"] = exp["time"].apply(lambda x: x.year)
    exp = exp.drop("time", axis="columns")
    exp = IamDataFrame(exp, meta=test_df.meta)

    obs = test_df.swap_time_for_year(inplace=inplace)

    if inplace:
        assert obs is None
        obs = test_df

    assert_iamframe_equal(obs, exp)
    pdt.assert_index_equal(obs.time, pd.Index([2005, 2010], name="time"))
Пример #10
0
def test_divide_scenario(test_df_year, append):
    """Verify that in-dataframe addition works on a custom axis (`scenario`)"""

    v = ("scen_a", "scen_b", "scen_ratio")
    exp = IamDataFrame(
        pd.DataFrame([1 / 2, 6 / 7], index=[2005, 2010]).T,
        model="model_a",
        scenario=v[2],
        region="World",
        variable="Primary Energy",
        unit="",
    )

    if append:
        obs = test_df_year.copy()
        obs.divide(*v, axis="scenario", append=True)
        assert_iamframe_equal(test_df_year.append(exp), obs)
    else:
        obs = test_df_year.divide(*v, axis="scenario")
        assert_iamframe_equal(exp, obs)
Пример #11
0
    def test_relationship_usage_wrong_time_col(self, test_db,
                                               test_downscale_df):
        test_db = test_db.filter(
            variable=["Emissions|HFC|C5F12", "Emissions|HFC|C2F6"])
        tcruncher = self.tclass(test_db)

        filler = tcruncher.derive_relationship("Emissions|HFC|C5F12",
                                               ["Emissions|HFC|C2F6"])

        if test_db.time_col == "year":
            test_downscale_df = test_downscale_df.timeseries()
            test_downscale_df.columns = test_downscale_df.columns.map(
                lambda x: dt.datetime(x, 1, 1))
            test_downscale_df = IamDataFrame(test_downscale_df)

        error_msg = re.escape(
            "`in_iamdf` time column must be the same as the time column used "
            "to generate this filler function (`{}`)".format(test_db.time_col))
        with pytest.raises(ValueError, match=error_msg):
            filler(test_downscale_df)
Пример #12
0
def test_48a():
    # tests fix for #48 mapping many->few
    df = IamDataFrame(pd.DataFrame([
        ['model', 'scen', 'SSD', 'var', 'unit', 1, 6],
        ['model', 'scen', 'SDN', 'var', 'unit', 2, 7],
        ['model', 'scen1', 'SSD', 'var', 'unit', 2, 7],
        ['model', 'scen1', 'SDN', 'var', 'unit', 2, 7],
    ], columns=['model', 'scenario', 'region',
                'variable', 'unit', 2005, 2010],
    ))

    exp = _r5_regions_exp(df)
    columns = df.data.columns
    grp = list(columns)
    grp.remove('value')
    exp = exp.groupby(grp).sum().reset_index()
    exp = exp[columns]

    obs = df.map_regions('r5_region', region_col='iso', agg='sum').data

    pd.testing.assert_frame_equal(obs, exp, check_index_type=False)
Пример #13
0
def test_aggregate_recursive(time_col):
    # use the feature `recursive=True`
    data = RECURSIVE_DF if time_col == 'year' \
        else RECURSIVE_DF.rename(DTS_MAPPING, axis='columns')
    df = IamDataFrame(data, model='model_a', scenario='scen_a', region='World')
    df2 = df.rename(scenario={'scen_a': 'scen_b'})
    df2.data.value *= 2
    df.append(df2, inplace=True)

    # create object without variables to be aggregated
    v = 'Secondary Energy|Electricity'
    agg_vars = [f'{v}{i}' for i in ['', '|Wind']]
    df_minimal = df.filter(variable=agg_vars, keep=False)

    # return recursively aggregated data as new object
    obs = df_minimal.aggregate(variable=v, recursive=True)
    assert_iamframe_equal(obs, df.filter(variable=agg_vars))

    # append to `self`
    df_minimal.aggregate(variable=v, recursive=True, append=True)
    assert_iamframe_equal(df_minimal, df)
Пример #14
0
def test_aggregate_recursive(time_col):
    # use the feature `recursive=True`
    data = (RECURSIVE_DF if time_col == "year" else RECURSIVE_DF.rename(
        DTS_MAPPING, axis="columns"))
    df = IamDataFrame(data, model="model_a", scenario="scen_a", region="World")
    df2 = df.rename(scenario={"scen_a": "scen_b"})
    df2.data.value *= 2
    df.append(df2, inplace=True)

    # create object without variables to be aggregated
    v = "Secondary Energy|Electricity"
    agg_vars = [f"{v}{i}" for i in ["", "|Wind"]]
    df_minimal = df.filter(variable=agg_vars, keep=False)

    # return recursively aggregated data as new object
    obs = df_minimal.aggregate(variable=v, recursive=True)
    assert_iamframe_equal(obs, df.filter(variable=agg_vars))

    # append to `self`
    df_minimal.aggregate(variable=v, recursive=True, append=True)
    assert_iamframe_equal(df_minimal, df)
Пример #15
0
    def test_extreme_values_relationship(self):
        # Our cruncher has a closest-point extrapolation algorithm and therefore
        # should return the same values when filling for data outside tht limits of
        # its cruncher

        # Calculate the values using the cruncher for a fairly detailed dataset
        large_db_int = IamDataFrame(self.large_db)
        tcruncher = self.tclass(large_db_int)
        follow = "Emissions|CH4"
        lead = ["Emissions|CO2"]
        res = tcruncher.derive_relationship(follow, lead)
        crunched = res(large_db_int)

        # Increase the maximum values
        modify_extreme_db = large_db_int.filter(
            variable="Emissions|CO2").copy()
        max_scen = modify_extreme_db["scenario"].loc[
            modify_extreme_db["value"] == max(modify_extreme_db["value"])]
        ind = modify_extreme_db["value"].idxmax
        modify_extreme_db["value"].loc[ind] += 10
        extreme_crunched = res(modify_extreme_db)
        # Check results are the same
        assert crunched.equals(extreme_crunched)
        # Also check that the results are correct
        assert crunched.filter(scenario=max_scen)["value"].iloc[0] == max(
            large_db_int.filter(variable=follow)["value"].values)

        # Repeat with reducing the minimum value. This works differently because the
        # minimum point is doubled. This modification causes the cruncher to pick the
        # lower value.
        min_scen = modify_extreme_db["scenario"].loc[
            modify_extreme_db["value"] == min(modify_extreme_db["value"])]
        ind = modify_extreme_db["value"].idxmin
        modify_extreme_db["value"].loc[ind] -= 10
        extreme_crunched = res(modify_extreme_db)
        assert crunched.filter(scenario=min_scen)["value"].iloc[0] != min(
            large_db_int.filter(variable=follow)["value"].values)
        assert extreme_crunched.filter(
            scenario=min_scen)["value"].iloc[0] == min(
                large_db_int.filter(variable=follow)["value"].values)
    def test_relationship_usage(self, test_downscale_df, add_col):
        units = "new units"
        tcruncher = self.tclass()
        test_downscale_df = test_downscale_df.filter(year=[2010, 2015])
        if add_col:
            # what should happen if there's more than one value in the `add_col`?
            add_col_val = "blah"
            test_downscale_df[add_col] = add_col_val
            test_downscale_df = IamDataFrame(test_downscale_df.data)
            assert test_downscale_df.extra_cols[0] == add_col

        lead = ["Emissions|HFC|C2F6"]
        follow = "Emissions|HFC|C5F12"
        filler = tcruncher.derive_relationship(follow,
                                               lead,
                                               ratio=2,
                                               units=units)
        res = filler(test_downscale_df)

        exp = test_downscale_df.filter(variable=lead)
        exp.data["variable"] = follow
        exp.data["value"] = exp.data["value"] * 2
        exp.data["unit"] = units

        pd.testing.assert_frame_equal(res.timeseries(),
                                      exp.timeseries(),
                                      check_like=True)

        # comes back on input timepoints
        np.testing.assert_array_equal(
            res.timeseries().columns.values.squeeze(),
            test_downscale_df.timeseries().columns.values.squeeze(),
        )

        # Test we can append the results correctly
        append_df = test_downscale_df.append(res)
        assert append_df.filter(variable=follow).equals(res)

        if add_col:
            assert all(append_df.filter(variable=lead)[add_col] == add_col_val)
Пример #17
0
def test_interpolate_extra_cols():
    # check hat interpolation with non-matching extra_cols has no effect (#351)
    EXTRA_COL_DF = pd.DataFrame(
        [
            ['foo', 2005, 1],
            ['bar', 2010, 3],
        ],
        columns=['extra_col', 'year', 'value'],
    )
    df = IamDataFrame(EXTRA_COL_DF,
                      model='model_a',
                      scenario='scen_a',
                      region='World',
                      variable='Primary Energy',
                      unit='EJ/yr')

    # create a copy, interpolate
    df2 = df.copy()
    df2.interpolate(2007)

    # assert that interpolation didn't change any data
    assert_iamframe_equal(df, df2)
Пример #18
0
    def test_relationship_usage(self, simple_df, add_col):
        tcruncher = self.tclass(simple_df)
        lead = ["Emissions|CH4"]
        follow = "Emissions|CO2"
        res = tcruncher.derive_relationship(follow,
                                            lead,
                                            required_scenario="scen_a")
        if add_col:
            add_col_val = "blah"
            simple_df[add_col] = add_col_val
            simple_df = IamDataFrame(simple_df.data)
            assert simple_df.extra_cols[0] == add_col

        expect_00 = res(simple_df)
        assert expect_00.filter(scenario="scen_a",
                                year=2010)["value"].iloc[0] == 0
        assert expect_00.filter(scenario="scen_b",
                                year=2010)["value"].iloc[0] == 0
        assert all(expect_00.filter(year=2030)["value"] == 1000)
        assert all(expect_00.filter(year=2050)["value"] == 5000)

        # If we include data from scen_b, we then get a slightly different answer
        res = tcruncher.derive_relationship(
            "Emissions|CO2", ["Emissions|CH4"],
            required_scenario=["scen_a", "scen_b"])
        expect_01 = res(simple_df)
        assert expect_01.filter(scenario="scen_a",
                                year=2010)["value"].iloc[0] == 0
        assert expect_01.filter(scenario="scen_b",
                                year=2010)["value"].iloc[0] == 1
        assert all(expect_01.filter(year=2030)["value"] == 1000)
        assert all(expect_01.filter(year=2050)["value"] == 5000)

        # Test we can append our answer
        append_df = simple_df.filter(variable=lead).append(expect_01)
        assert append_df.filter(variable=follow).equals(expect_01)

        if add_col:
            assert all(append_df[add_col] == add_col_val)
Пример #19
0
def test_swap_time_to_year(test_df, inplace):
    """Swap time column for year (int) dropping subannual time resolution (default)"""

    if test_df.time_col == "year":
        pytest.skip(
            "IamDataFrame with time domain `year` not relevant for this test.")

    exp = test_df.data
    exp["year"] = exp["time"].apply(lambda x: x.year)
    exp = exp.drop("time", axis="columns")
    exp = IamDataFrame(exp, meta=test_df.meta)

    obs = test_df.swap_time_for_year(inplace=inplace)

    if inplace:
        assert obs is None
        obs = test_df

    assert_iamframe_equal(obs, exp)
    match = "'IamDataFrame' object has no attribute 'time'"
    with pytest.raises(AttributeError, match=match):
        obs.time
 def test_derive_relationship_averaging_info(self, test_db, extra_info):
     # test that crunching uses average values if there's more than a single point
     # in the latest year for the lead gas in the database
     variable_follower = "Emissions|HFC|C5F12"
     variable_leader = ["Emissions|HFC|C2F6"]
     tdb = test_db.filter(variable=variable_follower, keep=False)
     tcruncher = self.tclass(
         self._join_iamdfs_time_wrangle(tdb, IamDataFrame(extra_info)))
     cruncher = tcruncher.derive_relationship(variable_follower,
                                              variable_leader)
     lead_db = test_db.filter(variable=variable_leader)
     infilled = cruncher(lead_db)
     # In both cases, the average follower value at the latest time is 2. We divide
     # by the value in 2015, which we have data for in both cases.
     lead_db_time = lead_db.data[lead_db.time_col]
     latest_time = lead_db_time == max(lead_db_time)
     expected = (2 * lead_db.data["value"] /
                 lead_db.data["value"].loc[latest_time].values)
     assert np.allclose(infilled.data["value"], expected)
     # Test that the result can be appended without problems.
     lead_db.append(infilled, inplace=True)
     assert lead_db.filter(variable=variable_follower).equals(infilled)
Пример #21
0
def test_cast_from_value_col(test_df_year):
    df_with_value_cols = pd.DataFrame(
        [
            ["model_a", "scen_a", "World", "EJ/yr", 2005, 1, 0.5],
            ["model_a", "scen_a", "World", "EJ/yr", 2010, 6.0, 3],
            ["model_a", "scen_b", "World", "EJ/yr", 2005, 2, None],
            ["model_a", "scen_b", "World", "EJ/yr", 2010, 7, None],
        ],
        columns=[
            "model",
            "scenario",
            "region",
            "unit",
            "year",
            "Primary Energy",
            "Primary Energy|Coal",
        ],
    )
    df = IamDataFrame(df_with_value_cols,
                      value=["Primary Energy", "Primary Energy|Coal"])

    assert compare(test_df_year, df).empty
    pd.testing.assert_frame_equal(df.data, test_df_year.data, check_like=True)
Пример #22
0
def test_48a():
    # tests fix for #48 mapping many->few
    df = IamDataFrame(
        pd.DataFrame(
            [
                ["model", "scen", "SSD", "var", "unit", 1, 6],
                ["model", "scen", "SDN", "var", "unit", 2, 7],
                ["model", "scen1", "SSD", "var", "unit", 2, 7],
                ["model", "scen1", "SDN", "var", "unit", 2, 7],
            ],
            columns=["model", "scenario", "region", "variable", "unit", 2005, 2010],
        )
    )

    exp = _r5_regions_exp(df)
    columns = df.data.columns
    grp = list(columns)
    grp.remove("value")
    exp = exp.groupby(grp).sum().reset_index()
    exp = exp[columns]

    obs = df.map_regions("r5_region", region_col="iso", agg="sum").data

    pd.testing.assert_frame_equal(obs, exp, check_index_type=False)
Пример #23
0
def test_filter_index_with_custom_index(test_pd_df):
    # rename 'model' column and add a version column to the dataframe
    test_pd_df.rename(columns={"model": "source"}, inplace=True)
    test_pd_df["version"] = [1, 2, 3]
    index = ["source", "scenario", "version"]
    df = IamDataFrame(test_pd_df, index=index)

    obs = df.filter(index=[("model_a", "scen_a", 1), ("model_a", "scen_a", 2)])
    assert (
        obs.source == ["model_a"]
        and obs.scenario == ["scen_a"]
        and obs.version == [1, 2]
    )

    # a sub-set of levels is also supported
    index = pd.MultiIndex.from_tuples(
        [("model_a", "scen_a")], names=["source", "scenario"]
    )
    obs = df.filter(index=index)
    assert (
        obs.source == ["model_a"]
        and obs.scenario == ["scen_a"]
        and obs.version == [1, 2]
    )
Пример #24
0
def test_load_rcp_database_downloaded_file(test_df_year):
    exp = test_df_year.filter(**FILTER_ARGS).as_pandas()
    obs_df = IamDataFrame(
        os.path.join(TEST_DATA_DIR, 'test_RCP_database_raw_download.xlsx'))
    pd.testing.assert_frame_equal(obs_df.as_pandas(), exp)
Пример #25
0
def plot_stackplot_df():
    df = IamDataFrame(TEST_STACKPLOT_DF)
    yield df
Пример #26
0
def plot_df():
    df = IamDataFrame(data=os.path.join(TEST_DATA_DIR, "plot_data.csv"))
    yield df
Пример #27
0
def reg_df():
    df = IamDataFrame(data=REG_DF)
    yield df
Пример #28
0
def test_df_year():
    df = IamDataFrame(data=TEST_DF)
    for i in META_COLS:
        df.set_meta(META_DF[i])
    yield df
Пример #29
0
def test_df(request):
    df = IamDataFrame(data=TEST_DF.rename(request.param, axis="columns"))
    for i in META_COLS:
        df.set_meta(META_DF[i])
    yield df
Пример #30
0
def test_load_RCP_database_downloaded_file(test_df):
    obs_df = IamDataFrame(
        os.path.join(TEST_DATA_DIR, 'test_RCP_database_raw_download.xlsx'))
    pd.testing.assert_frame_equal(obs_df.as_pandas(), test_df.as_pandas())