def _validate(df: pyam.IamDataFrame) -> pyam.IamDataFrame: """Validation function for variables, regions, and subannual time resolution""" # load definitions (including 'subannual' if included in the scenario data) if "subannual" in df.dimensions or df.time_col == "time": dimensions = ["region", "variable", "subannual"] else: dimensions = ["region", "variable"] definition = DataStructureDefinition(here / "definitions", dimensions=dimensions) # apply a renaming from region-synonyms to region-names rename_dict = {} for region, attibutes in definition.region.items(): for synonym in ["abbr", "iso3"]: if synonym in attibutes: rename_dict[attibutes[synonym]] = region df.rename(region=rename_dict, inplace=True) # check variables and regions definition.validate(df, dimensions=["region", "variable"]) # convert to subannual format if data provided in datetime format if df.time_col == "time": logger.info( 'Re-casting from "time" column to categorical "subannual" format') df = df.swap_time_for_year(subannual=OE_SUBANNUAL_FORMAT) # check that any datetime-like items in "subannual" are valid datetime and UTC+01:00 if "subannual" in df.dimensions: _datetime = [s for s in df.subannual if s not in definition.subannual] for d in _datetime: try: _dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z") except ValueError: try: datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M") except ValueError: raise ValueError(f"Invalid subannual timeslice: {d}") raise ValueError(f"Missing timezone: {d}") # casting to datetime with timezone was successful if not (_dt.tzname() == EXP_TZ or _dt.utcoffset() == EXP_TIME_OFFSET): raise ValueError(f"Invalid timezone: {d}") return df
def test_cast_by_column_concat(meta_df): dts = TEST_DTS df = pd.DataFrame([ ['scen_a', 'World', 'Primary Energy', None, 'EJ/y', 1, 6.], ['scen_a', 'World', 'Primary Energy', 'Coal', 'EJ/y', 0.5, 3], ['scen_b', 'World', 'Primary Energy', None, 'EJ/y', 2, 7], ], columns=['scenario', 'region', 'var_1', 'var_2', 'unit'] + dts, ) df = IamDataFrame(df, model='model_a', variable=['var_1', 'var_2']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_cast_from_value_col(meta_df): df_with_value_cols = pd.DataFrame([ ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5], ['model_a', 'scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3], ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None], ['model_a', 'scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None] ], columns=['model', 'scenario', 'region', 'unit', 'time', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, value=['Primary Energy', 'Primary Energy|Coal']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_cast_from_value_col_and_args(meta_df): # checks for issue [#210](https://github.com/IAMconsortium/pyam/issues/210) df_with_value_cols = pd.DataFrame([ ['scen_a', 'World', 'EJ/y', TEST_DTS[0], 1, 0.5], ['scen_a', 'World', 'EJ/y', TEST_DTS[1], 6., 3], ['scen_b', 'World', 'EJ/y', TEST_DTS[0], 2, None], ['scen_b', 'World', 'EJ/y', TEST_DTS[1], 7, None] ], columns=['scenario', 'iso', 'unit', 'time', 'Primary Energy', 'Primary Energy|Coal'], ) df = IamDataFrame(df_with_value_cols, model='model_a', region='iso', value=['Primary Energy', 'Primary Energy|Coal']) if "year" in meta_df.data.columns: df = df.swap_time_for_year() assert compare(meta_df, df).empty pd.testing.assert_frame_equal(df.data, meta_df.data, check_like=True)
def test_swap_time_to_year_subannual(test_pd_df, columns, subannual, dates, inplace): """Swap time column for year (int) keeping subannual resolution as extra-column""" test_pd_df.rename({ 2005: columns[0], 2010: columns[1] }, axis=1, inplace=True) # check swapping time for year df = IamDataFrame(test_pd_df) obs = df.swap_time_for_year(subannual=subannual, inplace=inplace) if inplace: assert obs is None obs = df exp = get_subannual_df(dates[0], dates[1]) assert_iamframe_equal(obs, exp) # check that reverting using `swap_year_for_time` yields the original data assert_iamframe_equal(obs.swap_year_for_time(), IamDataFrame(test_pd_df))