def test_pd_join_by_meta_nonmatching_index(meta_df): data = df_filter_by_meta_nonmatching_idx meta_df.set_meta(['a', 'b'], 'string') obs = filter_by_meta(data, meta_df, join_meta=True, string=None) obs = obs.reindex(columns=['scenario', 2010, 2020, 'string']) exp = data.copy() exp['string'] = [np.nan, np.nan, 'b'] pd.testing.assert_frame_equal(obs.sort_index(level=1), exp)
def test_pd_join_by_meta_nonmatching_index(test_df): data = df_filter_by_meta_nonmatching_idx test_df.set_meta(["a", "b"], "string") obs = filter_by_meta(data, test_df, join_meta=True, string=None) obs = obs.reindex(columns=["scenario", 2010, 2020, "string"]) exp = data.copy() exp["string"] = [np.nan, np.nan, "b"] pd.testing.assert_frame_equal(obs.sort_index(level=1), exp)
def test_pd_filter_by_meta_nonmatching_index(meta_df): data = df_filter_by_meta_nonmatching_idx meta_df.set_meta(['a', 'b'], 'string') obs = filter_by_meta(data, meta_df, join_meta=True, string='b') obs = obs.reindex(columns=['scenario', 2010, 2020, 'string']) exp = data.iloc[2:3].copy() exp['string'] = 'b' pd.testing.assert_frame_equal(obs, exp)
def test_pd_filter_by_meta_nonmatching_index(test_df): data = df_filter_by_meta_nonmatching_idx test_df.set_meta(["a", "b"], "string") obs = filter_by_meta(data, test_df, join_meta=True, string="b") obs = obs.reindex(columns=["scenario", 2010, 2020, "string"]) exp = data.iloc[2:3].copy() exp["string"] = "b" pd.testing.assert_frame_equal(obs, exp)
def test_pd_filter_by_meta_no_index(meta_df): data = df_filter_by_meta_matching_idx meta_df.set_meta([True, False], 'boolean') meta_df.set_meta(0, 'int') obs = filter_by_meta(data, meta_df, join_meta=True, boolean=True, int=None) obs = obs.reindex(columns=META_IDX + ['region', 'col', 'boolean', 'int']) exp = data.iloc[0:2].copy() exp['boolean'] = True exp['int'] = 0 pd.testing.assert_frame_equal(obs, exp)
def test_pd_filter_by_meta_no_index(test_df): data = df_filter_by_meta_matching_idx test_df.set_meta([True, False], "boolean") test_df.set_meta(0, "int") obs = filter_by_meta(data, test_df, join_meta=True, boolean=True, int=None) obs = obs.reindex(columns=META_IDX + ["region", "col", "boolean", "int"]) exp = data.iloc[0:2].copy() exp["boolean"] = True exp["int"] = 0 pd.testing.assert_frame_equal(obs, exp)
def test_pd_filter_by_meta(test_df): data = df_filter_by_meta_matching_idx.set_index(["model", "region"]) test_df.set_meta([True, False], "boolean") test_df.set_meta(0, "integer") obs = filter_by_meta(data, test_df, join_meta=True, boolean=True, integer=None) obs = obs.reindex(columns=["scenario", "col", "boolean", "integer"]) exp = data.iloc[0:2].copy() exp["boolean"] = True exp["integer"] = 0 pd.testing.assert_frame_equal(obs, exp)
def test_pd_filter_by_meta(test_df): data = df_filter_by_meta_matching_idx.set_index(['model', 'region']) test_df.set_meta([True, False], 'boolean') test_df.set_meta(0, 'integer') obs = filter_by_meta(data, test_df, join_meta=True, boolean=True, integer=None) obs = obs.reindex(columns=['scenario', 'col', 'boolean', 'integer']) exp = data.iloc[0:2].copy() exp['boolean'] = True exp['integer'] = 0 pd.testing.assert_frame_equal(obs, exp)
def test_pd_filter_by_meta_no_index(meta_df): data = pd.DataFrame([ ['a_model', 'a_scenario', 'a_region1', 1], ['a_model', 'a_scenario', 'a_region2', 2], ['a_model', 'a_scenario2', 'a_region3', 3], ], columns=['model', 'scenario', 'region', 'col']) meta_df.set_meta([True, False], 'boolean') meta_df.set_meta(0, 'int') obs = filter_by_meta(data, meta_df, join_meta=True, boolean=True, int=None) obs = obs.reindex(columns=META_IDX + ['region', 'col', 'boolean', 'int']) exp = data.iloc[0:2].copy() exp['boolean'] = True exp['int'] = 0 pd.testing.assert_frame_equal(obs, exp)
def add(self, data, header, row=None, subheader=None): """Filter `data` by arguments of this SummaryStats instance, then apply `pd.describe()` and format the statistics Parameters ---------- data : pd.DataFrame or pd.Series data for which summary statistics should be computed header : str column name for descriptive statistics row : str row name for descriptive statistics (required if `pyam.Statistics(rows=True)`) subheader : str, optional column name (level=1) if data is a unnamed `pd.Series` """ # verify validity of specifications if self.rows is not None and row is None: raise ValueError('row specification required') if self.rows is None and row is not None: raise ValueError('row arg illegal for this `Statistics` instance') if isinstance(data, pd.Series): if subheader is not None: data.name = subheader elif data.name is None: msg = '`data` must be named `pd.Series` or provide `subheader`' raise ValueError(msg) data = pd.DataFrame(data) if self.rows is not None and row not in self.rows: self.rows.append(row) _stats = None # describe with groupby feature if self.groupby is not None: filter_args = dict(data=data, df=self.df, join_meta=True) filter_args.update(self.groupby) _stats = (filter_by_meta(**filter_args).groupby( self.col).describe(percentiles=self.percentiles)) _stats = pd.concat([_stats], keys=[self.col], names=[''], axis=0) if self.rows: _stats['row'] = row _stats.set_index('row', append=True, inplace=True) _stats.index.names = [''] * 3 if self.rows else [''] * 2 # describe with filter feature for (idx, _filter) in self.filters: filter_args = dict(data=data, df=self.df) filter_args.update(_filter) _stats_f = (filter_by_meta(**filter_args).describe( percentiles=self.percentiles)) _stats_f = pd.DataFrame(_stats_f.unstack()).T if self.idx_depth == 1: levels = [[idx]] else: levels = [[idx[0]], [idx[1]]] lvls, lbls = (levels, [[0]] * self.idx_depth) if not self.rows \ else (levels + [[row]], [[0]] * (self.idx_depth + 1)) _stats_f.index = pd.MultiIndex(levels=lvls, codes=lbls) _stats = _stats_f if _stats is None else _stats.append(_stats_f) # add header _stats = pd.concat([_stats], keys=[header], names=[''], axis=1) subheader = _stats.columns.get_level_values(1).unique() self._add_to_header(header, subheader) # set statistics if self.stats is None: self.stats = _stats else: self.stats = _stats.combine_first(self.stats)