Пример #1
0
def replace_totals(data_frame):
    index_names = data_frame.index.names

    raw = data_frame.reset_index()
    for name in index_names:
        marker = get_totals_marker_for_dtype(raw[name].dtype)
        raw[name].replace(marker, np.nan, inplace=True)

    return raw
Пример #2
0
def _replace_nans_for_totals_values(data_frame, dtypes):
    # some things are just easier to do without an index. Reset it temporarily to replace NaN values with the rollup
    # marker values
    index_names = data_frame.index.names
    data_frame.reset_index(inplace=True)

    for dimension_key, dtype in dtypes.items():
        data_frame[dimension_key] = data_frame[dimension_key].fillna(
            get_totals_marker_for_dtype(dtype))

    return data_frame.set_index(index_names)
Пример #3
0
def _replace_rollup_constants_for_totals_markers(data_frame, dtypes):
    # some things are just easier to do without an index. Reset it temporarily to replace Rollup constants with the
    # rollup marker values
    index_names = data_frame.index.names
    data_frame.reset_index(inplace=True)

    for dimension_key, dtype in dtypes.items():
        data_frame[dimension_key] = data_frame[dimension_key].replace(
            RollupValue.CONSTANT, get_totals_marker_for_dtype(dtype))

    return data_frame.set_index(index_names)
Пример #4
0
    def _totals(df):
        if isinstance(df, pd.Series):
            return df.sum()

        totals_index_value = get_totals_marker_for_dtype(df.index.levels[-1].dtype)

        return pd.DataFrame(
            [df.sum()],
            columns=columns,
            index=pd.Index([totals_index_value], name=df.index.names[-1]),
        )
Пример #5
0
    def apply(self, data_frame, reference):
        metric, over = self.args
        f_metric_alias = alias_selector(reference_alias(metric, reference))

        if over is None:
            df = data_frame[f_metric_alias]
            return 100 * df / df

        if not isinstance(data_frame.index, pd.MultiIndex):
            marker = get_totals_marker_for_dtype(data_frame.index.dtype)
            totals = data_frame.loc[marker, f_metric_alias]
            if totals == 0:
                return np.nan
            return 100 * data_frame[f_metric_alias] / totals

        f_over_alias = alias_selector(over.alias)
        idx = data_frame.index.names.index(f_over_alias)
        group_levels = data_frame.index.names[idx:]
        over_dim_value = get_totals_marker_for_dtype(
            data_frame.index.levels[idx].dtype)
        totals_alias = (slice(None), ) * idx + (slice(over_dim_value,
                                                      over_dim_value), )

        totals = reduce_data_frame_levels(
            data_frame.loc[totals_alias, f_metric_alias], group_levels)

        def apply_totals(group_df):
            if not isinstance(totals, pd.Series):
                return 100 * group_df / totals

            n_index_levels = len(totals.index.names)
            extra_level_names = group_df.index.names[n_index_levels:]
            group_df = group_df.reset_index(extra_level_names, drop=True)
            share = 100 * group_df / totals[group_df.index]
            return pd.Series(share.values, index=group_df.index)

        return (data_frame[f_metric_alias].groupby(
            level=group_levels).apply(apply_totals).reorder_levels(
                order=data_frame.index.names).sort_index())
Пример #6
0
def totals(data_frame, dimensions, columns):
    """
    Computes the totals across a dimension and adds the total as an extra row.
    """
    if not isinstance(data_frame.index, pd.MultiIndex):
        totals_marker = get_totals_marker_for_dtype(data_frame.index.dtype)
        totals_df = pd.DataFrame(
            [data_frame.sum()],
            index=pd.Index([totals_marker], name=data_frame.index.name),
        )

        return data_frame.append(totals_df)

    def _totals(df):
        if isinstance(df, pd.Series):
            return df.sum()

        totals_index_value = get_totals_marker_for_dtype(
            df.index.levels[-1].dtype)

        return pd.DataFrame(
            [df.sum()],
            columns=columns,
            index=pd.Index([totals_index_value], name=df.index.names[-1]),
        )

    totals_df = None
    for i in range(-1, -1 - len(dimensions), -1):
        groupby_levels = data_frame.index.names[:i]

        if groupby_levels:
            level_totals_df = (data_frame[columns].groupby(
                level=groupby_levels).apply(_totals))

            missing_dims = set(data_frame.index.names) - set(
                level_totals_df.index.names)
            if missing_dims:
                for dim in missing_dims:
                    dtype = data_frame.index.levels[
                        data_frame.index.names.index(dim)].dtype
                    level_totals_df[dim] = get_totals_marker_for_dtype(dtype)
                    level_totals_df.set_index(dim, append=True, inplace=True)

                level_totals_df = level_totals_df.reorder_levels(
                    data_frame.index.names)

        else:
            totals_index_values = [
                get_totals_marker_for_dtype(level.dtype)
                for level in data_frame.index.levels
            ]
            level_totals_df = pd.DataFrame(
                [data_frame[columns].apply(_totals)],
                columns=columns,
                index=pd.MultiIndex.from_tuples([totals_index_values],
                                                names=data_frame.index.names),
            )

        totals_df = (totals_df.append(level_totals_df)
                     if totals_df is not None else level_totals_df)

    return data_frame.append(totals_df).sort_index()