Пример #1
0
 def test_nan_melt_unmelt(self):
     df = pd.DataFrame({'sum(x)': [np.nan]})
     expected = pd.DataFrame({'Value': [np.nan]}, index=['sum(x)'])
     expected.index.name = 'Metric'
     testing.assert_frame_equal(expected, utils.melt(df))
     df.columns.name = 'Metric'
     testing.assert_frame_equal(df, utils.unmelt(expected))
Пример #2
0
  def manipulate(self,
                 res: pd.Series,
                 melted: bool = False,
                 return_dataframe: bool = True):
    """Common adhoc data manipulation.

    It does
    1. Converts res to a DataFrame if asked.
    2. Melts res to long format if asked.
    3. Removes redundant index levels in res.

    Args:
      res: Returned by compute_through(). Usually a DataFrame, but could be a
        pd.Series or a base type.
      melted: Whether to transform the result to long format.
      return_dataframe: Whether to convert the result to DataFrame if it's not.
        If False, it could still return a DataFrame if the input is already a
        DataFrame.

    Returns:
      Final result returned to user. If split_by, it's a pd.Series or a
      pd.DataFrame, otherwise it could be a base type.
    """
    if isinstance(res, pd.Series):
      res.name = self.name
    res = self.to_dataframe(res) if return_dataframe else res
    if self.manipulate_input_type == 'unmelted' and melted:
      res = utils.melt(res)
    elif self.manipulate_input_type == 'melted' and not melted:
      res = utils.unmelt(res)
    return utils.remove_empty_level(res)
Пример #3
0
 def test_one_level_not_value_column_and_no_splitby_unmelt(self):
     melted = pd.DataFrame({'Baz': [1, 2]}, index=['foo', 'bar'])
     melted.index.name = 'Metric'
     expected = pd.DataFrame([[1, 2]],
                             columns=pd.MultiIndex.from_product(
                                 [['foo', 'bar'], ['Baz']],
                                 names=['Metric', None]))
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #4
0
 def test_one_level_value_column_and_no_splitby_unmelt(self):
     melted = pd.DataFrame({'Value': [1, 2]}, index=['foo', 'bar'])
     melted.index.name = 'Metric'
     expected = pd.DataFrame({
         'foo': [1],
         'bar': [2]
     },
                             columns=['foo', 'bar'])
     expected.columns.names = ['Metric']
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #5
0
 def get_stderrs_or_ci_half_width(self, replicates):
   """Returns confidence interval infomation in an unmelted DataFrame."""
   stderrs, dof = self.get_stderrs(replicates)
   if self.confidence:
     res = pd.DataFrame(self.get_ci_width(stderrs, dof)).T
     res.columns = [self.prefix + ' CI-lower', self.prefix + ' CI-upper']
   else:
     res = pd.DataFrame(stderrs, columns=[self.prefix + ' SE'])
   res = utils.unmelt(res)
   return res
Пример #6
0
 def test_multiple_index_columns_and_no_splitby_unmelt(self):
     melted = pd.DataFrame(data={
         'Value': [1, 3],
         'SE': [2, 4]
     },
                           index=['foo', 'bar'],
                           columns=['Value', 'SE'])
     melted.index.name = 'Metric'
     expected = pd.DataFrame([[1, 2, 3, 4]],
                             columns=pd.MultiIndex.from_product(
                                 (['foo', 'bar'], ['Value', 'SE'])))
     expected.columns.names = ['Metric', None]
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #7
0
 def test_one_level_column_and_single_splitby_unmelt(self):
     expected = pd.DataFrame(data={
         'foo': [0, 1],
         'bar': [2, 3]
     },
                             columns=['foo', 'bar'],
                             index=['B', 'A'])
     expected.index.name = 'grp'
     expected.columns.name = 'Metric'
     melted = pd.DataFrame({'Value': range(4)},
                           index=pd.MultiIndex.from_product(
                               (['foo', 'bar'], ['B', 'A']),
                               names=['Metric', 'grp']))
     melted.index.name = 'Metric'
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #8
0
 def test_multiple_index_column_and_single_splitby_unmelt(self):
     melted = pd.DataFrame(data={
         'Value': [1, 5, 3, 7],
         'SE': [2, 6, 4, 8]
     },
                           index=pd.MultiIndex.from_product(
                               (['foo', 'bar'], ['B', 'A']),
                               names=['Metric', 'grp']),
                           columns=['Value', 'SE'])
     expected = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
                             columns=pd.MultiIndex.from_product(
                                 (['foo', 'bar'], ['Value', 'SE'])),
                             index=['B', 'A'])
     expected.index.name = 'grp'
     expected.columns.names = ['Metric', None]
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #9
0
 def test_one_level_column_and_multiple_splitby_unmelt(self):
     melted = pd.DataFrame(
         {'Value': range(8)},
         index=pd.MultiIndex.from_product(
             (['foo', 'bar'], ['B', 'A'], ['US', 'non-US']),
             names=['Metric', 'grp', 'country']))
     expected = pd.DataFrame(data={
         'foo': range(4),
         'bar': range(4, 8)
     },
                             columns=['foo', 'bar'],
                             index=pd.MultiIndex.from_product(
                                 (['B', 'A'], ['US', 'non-US']),
                                 names=['grp', 'country']))
     expected.columns.name = 'Metric'
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #10
0
    def compute_slices(self, df, split_by=None):
        if self.weight:
            # When there is weight, just loop through slices.
            return super(Quantile, self).compute_slices(df, split_by)
        res = self.group(df, split_by)[self.var].quantile(
            self.quantile, interpolation=self.interpolation, **self.kwargs)
        if self.one_quantile:
            return res

        if split_by:
            res = res.unstack()
            res.columns = [self.name_tmpl.format(self.var, c) for c in res]
            return res
        res = utils.unmelt(pd.DataFrame(res))
        res.columns = [self.name_tmpl.format(self.var, c[0]) for c in res]
        return res
Пример #11
0
 def test_multiple_index_column_and_multiple_splitby_unmelt(self):
     melted = pd.DataFrame(
         data={
             'Value': [0, 4, 8, 12, 2, 6, 10, 14],
             'SE': [1, 5, 9, 13, 3, 7, 11, 15]
         },
         index=pd.MultiIndex.from_product(
             (['foo', 'bar'], ['B', 'A'], ['US', 'non-US']),
             names=['Metric', 'grp', 'country']),
         columns=['Value', 'SE'])
     expected = pd.DataFrame(
         [range(4), range(4, 8),
          range(8, 12), range(12, 16)],
         columns=pd.MultiIndex.from_product((['foo',
                                              'bar'], ['Value', 'SE'])),
         index=pd.MultiIndex.from_product((['B', 'A'], ['US', 'non-US']),
                                          names=['grp', 'country']))
     expected.columns.names = ['Metric', None]
     testing.assert_frame_equal(expected, utils.unmelt(melted))
Пример #12
0
    def final_compute(self,
                      std,
                      melted: bool = False,
                      return_dataframe: bool = True,
                      split_by: Optional[List[Text]] = None,
                      df=None):
        """Computes point estimates and returns it with stderrs or CI range."""
        if self.where:
            df = df.query(self.where)
        point_est = self.compute_child(df, split_by, melted=True)
        res = point_est.join(std)

        if self.confidence:
            res[self.prefix +
                ' CI-lower'] = res.iloc[:, 0] - res[self.prefix + ' CI-lower']
            res[self.prefix + ' CI-upper'] += res.iloc[:, 0]

        if not melted:
            res = utils.unmelt(res)
        return res