def test_nan_melt_unmelt(self): df = pd.DataFrame({'sum(x)': [np.nan]}) expected = pd.DataFrame({'Value': [np.nan]}, index=['sum(x)']) expected.index.name = 'Metric' testing.assert_frame_equal(expected, utils.melt(df)) df.columns.name = 'Metric' testing.assert_frame_equal(df, utils.unmelt(expected))
def manipulate(self, res: pd.Series, melted: bool = False, return_dataframe: bool = True): """Common adhoc data manipulation. It does 1. Converts res to a DataFrame if asked. 2. Melts res to long format if asked. 3. Removes redundant index levels in res. Args: res: Returned by compute_through(). Usually a DataFrame, but could be a pd.Series or a base type. melted: Whether to transform the result to long format. return_dataframe: Whether to convert the result to DataFrame if it's not. If False, it could still return a DataFrame if the input is already a DataFrame. Returns: Final result returned to user. If split_by, it's a pd.Series or a pd.DataFrame, otherwise it could be a base type. """ if isinstance(res, pd.Series): res.name = self.name res = self.to_dataframe(res) if return_dataframe else res if self.manipulate_input_type == 'unmelted' and melted: res = utils.melt(res) elif self.manipulate_input_type == 'melted' and not melted: res = utils.unmelt(res) return utils.remove_empty_level(res)
def test_one_level_not_value_column_and_no_splitby_unmelt(self): melted = pd.DataFrame({'Baz': [1, 2]}, index=['foo', 'bar']) melted.index.name = 'Metric' expected = pd.DataFrame([[1, 2]], columns=pd.MultiIndex.from_product( [['foo', 'bar'], ['Baz']], names=['Metric', None])) testing.assert_frame_equal(expected, utils.unmelt(melted))
def test_one_level_value_column_and_no_splitby_unmelt(self): melted = pd.DataFrame({'Value': [1, 2]}, index=['foo', 'bar']) melted.index.name = 'Metric' expected = pd.DataFrame({ 'foo': [1], 'bar': [2] }, columns=['foo', 'bar']) expected.columns.names = ['Metric'] testing.assert_frame_equal(expected, utils.unmelt(melted))
def get_stderrs_or_ci_half_width(self, replicates): """Returns confidence interval infomation in an unmelted DataFrame.""" stderrs, dof = self.get_stderrs(replicates) if self.confidence: res = pd.DataFrame(self.get_ci_width(stderrs, dof)).T res.columns = [self.prefix + ' CI-lower', self.prefix + ' CI-upper'] else: res = pd.DataFrame(stderrs, columns=[self.prefix + ' SE']) res = utils.unmelt(res) return res
def test_multiple_index_columns_and_no_splitby_unmelt(self): melted = pd.DataFrame(data={ 'Value': [1, 3], 'SE': [2, 4] }, index=['foo', 'bar'], columns=['Value', 'SE']) melted.index.name = 'Metric' expected = pd.DataFrame([[1, 2, 3, 4]], columns=pd.MultiIndex.from_product( (['foo', 'bar'], ['Value', 'SE']))) expected.columns.names = ['Metric', None] testing.assert_frame_equal(expected, utils.unmelt(melted))
def test_one_level_column_and_single_splitby_unmelt(self): expected = pd.DataFrame(data={ 'foo': [0, 1], 'bar': [2, 3] }, columns=['foo', 'bar'], index=['B', 'A']) expected.index.name = 'grp' expected.columns.name = 'Metric' melted = pd.DataFrame({'Value': range(4)}, index=pd.MultiIndex.from_product( (['foo', 'bar'], ['B', 'A']), names=['Metric', 'grp'])) melted.index.name = 'Metric' testing.assert_frame_equal(expected, utils.unmelt(melted))
def test_multiple_index_column_and_single_splitby_unmelt(self): melted = pd.DataFrame(data={ 'Value': [1, 5, 3, 7], 'SE': [2, 6, 4, 8] }, index=pd.MultiIndex.from_product( (['foo', 'bar'], ['B', 'A']), names=['Metric', 'grp']), columns=['Value', 'SE']) expected = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=pd.MultiIndex.from_product( (['foo', 'bar'], ['Value', 'SE'])), index=['B', 'A']) expected.index.name = 'grp' expected.columns.names = ['Metric', None] testing.assert_frame_equal(expected, utils.unmelt(melted))
def test_one_level_column_and_multiple_splitby_unmelt(self): melted = pd.DataFrame( {'Value': range(8)}, index=pd.MultiIndex.from_product( (['foo', 'bar'], ['B', 'A'], ['US', 'non-US']), names=['Metric', 'grp', 'country'])) expected = pd.DataFrame(data={ 'foo': range(4), 'bar': range(4, 8) }, columns=['foo', 'bar'], index=pd.MultiIndex.from_product( (['B', 'A'], ['US', 'non-US']), names=['grp', 'country'])) expected.columns.name = 'Metric' testing.assert_frame_equal(expected, utils.unmelt(melted))
def compute_slices(self, df, split_by=None): if self.weight: # When there is weight, just loop through slices. return super(Quantile, self).compute_slices(df, split_by) res = self.group(df, split_by)[self.var].quantile( self.quantile, interpolation=self.interpolation, **self.kwargs) if self.one_quantile: return res if split_by: res = res.unstack() res.columns = [self.name_tmpl.format(self.var, c) for c in res] return res res = utils.unmelt(pd.DataFrame(res)) res.columns = [self.name_tmpl.format(self.var, c[0]) for c in res] return res
def test_multiple_index_column_and_multiple_splitby_unmelt(self): melted = pd.DataFrame( data={ 'Value': [0, 4, 8, 12, 2, 6, 10, 14], 'SE': [1, 5, 9, 13, 3, 7, 11, 15] }, index=pd.MultiIndex.from_product( (['foo', 'bar'], ['B', 'A'], ['US', 'non-US']), names=['Metric', 'grp', 'country']), columns=['Value', 'SE']) expected = pd.DataFrame( [range(4), range(4, 8), range(8, 12), range(12, 16)], columns=pd.MultiIndex.from_product((['foo', 'bar'], ['Value', 'SE'])), index=pd.MultiIndex.from_product((['B', 'A'], ['US', 'non-US']), names=['grp', 'country'])) expected.columns.names = ['Metric', None] testing.assert_frame_equal(expected, utils.unmelt(melted))
def final_compute(self, std, melted: bool = False, return_dataframe: bool = True, split_by: Optional[List[Text]] = None, df=None): """Computes point estimates and returns it with stderrs or CI range.""" if self.where: df = df.query(self.where) point_est = self.compute_child(df, split_by, melted=True) res = point_est.join(std) if self.confidence: res[self.prefix + ' CI-lower'] = res.iloc[:, 0] - res[self.prefix + ' CI-lower'] res[self.prefix + ' CI-upper'] += res.iloc[:, 0] if not melted: res = utils.unmelt(res) return res