def replace_totals(data_frame): index_names = data_frame.index.names raw = data_frame.reset_index() for name in index_names: marker = get_totals_marker_for_dtype(raw[name].dtype) raw[name].replace(marker, np.nan, inplace=True) return raw
def _replace_nans_for_totals_values(data_frame, dtypes): # some things are just easier to do without an index. Reset it temporarily to replace NaN values with the rollup # marker values index_names = data_frame.index.names data_frame.reset_index(inplace=True) for dimension_key, dtype in dtypes.items(): data_frame[dimension_key] = data_frame[dimension_key].fillna( get_totals_marker_for_dtype(dtype)) return data_frame.set_index(index_names)
def _replace_rollup_constants_for_totals_markers(data_frame, dtypes): # some things are just easier to do without an index. Reset it temporarily to replace Rollup constants with the # rollup marker values index_names = data_frame.index.names data_frame.reset_index(inplace=True) for dimension_key, dtype in dtypes.items(): data_frame[dimension_key] = data_frame[dimension_key].replace( RollupValue.CONSTANT, get_totals_marker_for_dtype(dtype)) return data_frame.set_index(index_names)
def _totals(df): if isinstance(df, pd.Series): return df.sum() totals_index_value = get_totals_marker_for_dtype(df.index.levels[-1].dtype) return pd.DataFrame( [df.sum()], columns=columns, index=pd.Index([totals_index_value], name=df.index.names[-1]), )
def apply(self, data_frame, reference): metric, over = self.args f_metric_alias = alias_selector(reference_alias(metric, reference)) if over is None: df = data_frame[f_metric_alias] return 100 * df / df if not isinstance(data_frame.index, pd.MultiIndex): marker = get_totals_marker_for_dtype(data_frame.index.dtype) totals = data_frame.loc[marker, f_metric_alias] if totals == 0: return np.nan return 100 * data_frame[f_metric_alias] / totals f_over_alias = alias_selector(over.alias) idx = data_frame.index.names.index(f_over_alias) group_levels = data_frame.index.names[idx:] over_dim_value = get_totals_marker_for_dtype( data_frame.index.levels[idx].dtype) totals_alias = (slice(None), ) * idx + (slice(over_dim_value, over_dim_value), ) totals = reduce_data_frame_levels( data_frame.loc[totals_alias, f_metric_alias], group_levels) def apply_totals(group_df): if not isinstance(totals, pd.Series): return 100 * group_df / totals n_index_levels = len(totals.index.names) extra_level_names = group_df.index.names[n_index_levels:] group_df = group_df.reset_index(extra_level_names, drop=True) share = 100 * group_df / totals[group_df.index] return pd.Series(share.values, index=group_df.index) return (data_frame[f_metric_alias].groupby( level=group_levels).apply(apply_totals).reorder_levels( order=data_frame.index.names).sort_index())
def totals(data_frame, dimensions, columns): """ Computes the totals across a dimension and adds the total as an extra row. """ if not isinstance(data_frame.index, pd.MultiIndex): totals_marker = get_totals_marker_for_dtype(data_frame.index.dtype) totals_df = pd.DataFrame( [data_frame.sum()], index=pd.Index([totals_marker], name=data_frame.index.name), ) return data_frame.append(totals_df) def _totals(df): if isinstance(df, pd.Series): return df.sum() totals_index_value = get_totals_marker_for_dtype( df.index.levels[-1].dtype) return pd.DataFrame( [df.sum()], columns=columns, index=pd.Index([totals_index_value], name=df.index.names[-1]), ) totals_df = None for i in range(-1, -1 - len(dimensions), -1): groupby_levels = data_frame.index.names[:i] if groupby_levels: level_totals_df = (data_frame[columns].groupby( level=groupby_levels).apply(_totals)) missing_dims = set(data_frame.index.names) - set( level_totals_df.index.names) if missing_dims: for dim in missing_dims: dtype = data_frame.index.levels[ data_frame.index.names.index(dim)].dtype level_totals_df[dim] = get_totals_marker_for_dtype(dtype) level_totals_df.set_index(dim, append=True, inplace=True) level_totals_df = level_totals_df.reorder_levels( data_frame.index.names) else: totals_index_values = [ get_totals_marker_for_dtype(level.dtype) for level in data_frame.index.levels ] level_totals_df = pd.DataFrame( [data_frame[columns].apply(_totals)], columns=columns, index=pd.MultiIndex.from_tuples([totals_index_values], names=data_frame.index.names), ) totals_df = (totals_df.append(level_totals_df) if totals_df is not None else level_totals_df) return data_frame.append(totals_df).sort_index()