示例#1
0
    def map_display_values(df, dimensions):
        """
        Creates a mapping for dimension values to their display values.

        :param df:
            The result data set that is being transformed.
        :param dimensions:
            The list of dimensions included in the query that created the result data set df.
        :return:
            A tree-structure dict with two levels of depth. The top level dict has keys for each dimension's display
            key. The lower level dict has keys for each raw dimension value and values which are the display value.
        """
        dimension_display_values = {}

        for dimension in dimensions:
            f_dimension_key = format_dimension_key(dimension.key)

            if dimension.has_display_field:
                f_display_key = format_dimension_key(dimension.display_key)

                dimension_display_values[f_dimension_key] = \
                    df[f_display_key].groupby(f_dimension_key).first().to_dict()

                del df[f_display_key]

            if hasattr(dimension, 'display_values'):
                dimension_display_values[
                    f_dimension_key] = dimension.display_values

        return dimension_display_values
示例#2
0
def extract_display_values(dimensions, data_frame):
    """
    Retrieves the display values for each dimension.

    For UniqueDimension, this will retrieve the display values from the data frame containing the data from the slicer
    query. For CategoricalDimension, the values are retrieved from the set of display values configured in the slicer.

    :param dimensions:
        A list of dimensions present in a slicer query.
    :param data_frame:
        The data frame containing the data result of the slicer query.
    :return:
        A dict containing keys for dimensions with display values (If there are no display values then the
        dimension's key will not be present). The value of the dict will be either a dict or a data frame where the
        display value can be accessed using the display value as the key.
    """
    display_values = {}

    for dimension in dimensions:
        key = utils.format_dimension_key(dimension.key)

        if hasattr(dimension, 'display_values'):
            display_values[key] = dimension.display_values

        elif dimension.has_display_field:
            f_display_key = utils.format_dimension_key(dimension.display_key)

            display_values[key] = data_frame[f_display_key] \
                .groupby(level=key) \
                .first() \
                .fillna(value=NULL_VALUE) \
                .replace([np.inf, -np.inf], INF_VALUE)

    return display_values
示例#3
0
def make_terms_for_dimension(dimension, window=None):
    """
    Makes a list of pypika terms for a given slicer definition.

    :param dimension:
        A slicer dimension.
    :param window:
        A window function to apply to the dimension definition if it is a continuous dimension.
    :return:
        a list of terms required to select and group by in a SQL query given a slicer dimension. This list will contain
        either one or two elements. A second element will be included if the dimension has a definition for its display
        field.
    """

    # Apply the window function to continuous dimensions only
    dimension_definition = (window(dimension.definition, dimension.interval)
                            if window and hasattr(dimension, 'interval') else
                            dimension.definition).as_(
                                format_dimension_key(dimension.key))

    # Include the display definition if there is one
    return [
        dimension_definition,
        dimension.display_definition.as_(
            format_dimension_key(dimension.display_key))
    ] if dimension.has_display_field else [dimension_definition]
示例#4
0
def make_terms_for_dimension(dimension, window=None):
    """
    Makes a list of pypika terms for a given slicer definition.

    :param dimension:
        A slicer dimension.
    :param window:
        A window function to apply to the dimension definition if it is a continuous dimension.
    :return:
        a list of terms required to select and group by in a SQL query given a slicer dimension. This list will contain
        either one or two elements. A second element will be included if the dimension has a definition for its display
        field.
    """

    # Apply the window function to continuous dimensions only
    dimension_definition = (
        window(dimension.definition, dimension.interval)
        if window and hasattr(dimension, 'interval')
        else dimension.definition
    ).as_(format_dimension_key(dimension.key))

    # Include the display definition if there is one
    return [
        dimension_definition,
        dimension.display_definition.as_(format_dimension_key(dimension.display_key))
    ] if dimension.has_display_field else [
        dimension_definition
    ]
示例#5
0
    def transform(self, data_frame, slicer, dimensions, references):
        """
        Transforms a data frame into a format for ReactTable. This is an object containing attributes `columns` and
        `data` which align with the props in ReactTable with the same name.

        :param data_frame:
            The result set data frame
        :param slicer:
            The slicer that generated the data query
        :param dimensions:
            A list of dimensions that were selected in the data query
        :param references:
            A list of references that were selected in the data query
        :return:
            An dict containing attributes `columns` and `data` which align with the props in ReactTable with the same
            names.
        """
        df_dimension_columns = [
            format_dimension_key(d.display_key) for d in dimensions
            if d.has_display_field
        ]
        item_map = OrderedDict([(format_metric_key(reference_key(i,
                                                                 reference)),
                                 ReferenceItem(i, reference))
                                for i in self.items
                                for reference in [None] + references])
        df_metric_columns = list(item_map.keys())

        # Add an extra item to map the totals markers to it's label
        item_map[MAX_NUMBER] = TotalsItem
        item_map[MAX_STRING] = TotalsItem
        item_map[TOTALS_LABEL] = TotalsItem

        df = data_frame[df_dimension_columns + df_metric_columns].copy()

        dimension_display_values = self.map_display_values(df, dimensions)

        self.format_data_frame(df, dimensions)

        dimension_keys = [
            format_dimension_key(dimension.key) for dimension in self.pivot
        ]
        df = self.pivot_data_frame(df, dimension_keys, self.transpose) \
            .fillna(value=NULL_VALUE) \
            .replace([np.inf, -np.inf], INF_VALUE)

        dimension_hyperlink_templates = self.map_hyperlink_templates(
            df, dimensions)

        dimension_columns = self.transform_dimension_column_headers(
            df, dimensions)
        metric_columns = self.transform_metric_column_headers(
            df, item_map, dimension_display_values)
        data = self.transform_data(df, item_map, dimension_display_values,
                                   dimension_hyperlink_templates)

        return {
            'columns': dimension_columns + metric_columns,
            'data': data,
        }
示例#6
0
def reduce_result_set(results: Iterable[pd.DataFrame], reference_groups,
                      dimensions: Iterable[Dimension],
                      share_dimensions: Iterable[Dimension]):
    """
    Reduces the result sets from individual queries into a single data frame. This effectively joins sets of references
    and concats the sets of totals.

    :param results: A list of data frame
    :param reference_groups: A list of groups of references (grouped by interval such as WoW, etc)
    :param dimensions: A list of dimensions, used for setting the index on the result data frame.
    :param share_dimensions: A list of dimensions from which the totals are used for calculating share operations.
    :return:
    """

    # One result group for each rolled up dimension. Groups contain one member plus one for each reference type used.
    result_groups = chunks(results, 1 + len(reference_groups))

    dimension_keys = [format_dimension_key(d.key) for d in dimensions]
    totals_dimension_keys = [
        format_dimension_key(d.key)
        for d in find_totals_dimensions(dimensions, share_dimensions)
    ]
    dimension_dtypes = result_groups[0][0][dimension_keys].dtypes

    # Reduce each group to one data frame per rolled up dimension
    group_data_frames = []
    for i, result_group in enumerate(result_groups):
        if dimension_keys:
            result_group = [
                result.set_index(dimension_keys) for result in result_group
            ]

        base_df = result_group[0]
        reference_dfs = [
            _make_reference_data_frame(base_df, result, reference) for result,
            reference_group in zip(result_group[1:], reference_groups)
            for reference in reference_group
        ]

        reduced = reduce(
            lambda left, right: pd.merge(
                left, right, how='outer', left_index=True, right_index=True),
            [base_df] + reference_dfs)

        # If there are rolled up dimensions in this result set then replace the NaNs for that dimension value with a
        # marker to indicate totals.
        # The data frames will be ordered so that the first group will contain the data without any rolled up
        # dimensions, then followed by the groups with them, ordered by the last rollup dimension first.
        if totals_dimension_keys[:i]:
            reduced = _replace_nans_for_totals_values(reduced,
                                                      dimension_dtypes)

        group_data_frames.append(reduced)

    return pd.concat(group_data_frames, sort=False) \
        .sort_index(na_position='first')
示例#7
0
    def map_hyperlink_templates(df, dimensions):
        """
        Creates a mapping for each dimension to it's hyperlink template if it is possible to create the hyperlink
        template for it.

        The hyperlink template is a URL-like string containing curley braces enclosing dimension keys: `{dimension}`.
        While rendering this widget, the dimension key placeholders need to be replaced with the dimension values for
        that row.

        :param df:
            The result data set that is being transformed. The data frame SHOULD be pivoted/transposed if that step is
            required, before calling this function, in order to prevent the template from being included for the
            dimension if one of the required dimensions is pivoted.
        :param dimensions:
            The list of dimensions included in the query that created the result data set df.
        :return:
            A dict with the dimension key as the key and the hyperlink template as the value. Templates will only be
            included if it will be possible to fill in the required parameters.
        """
        hyperlink_templates = {}
        pattern = re.compile(r'{[^{}]+}')

        for dimension in dimensions:
            hyperlink_template = dimension.hyperlink_template
            if hyperlink_template is None:
                continue

            required_hyperlink_parameters = [
                format_dimension_key(argument[1:-1])
                for argument in pattern.findall(hyperlink_template)
            ]

            # Check that all of the required dimensions are in the result data set. Only include the hyperlink template
            # in the return value of this function if all are present.
            unavailable_hyperlink_parameters = set(
                required_hyperlink_parameters) & set(df.index.names)
            if not unavailable_hyperlink_parameters:
                continue

            # replace the dimension keys with the formatted values. This will come in handy later when replacing the
            # actual values
            hyperlink_template = hyperlink_template.format(
                **{
                    argument[3:]: '{' + argument + '}'
                    for argument in required_hyperlink_parameters
                })

            f_dimension_key = format_dimension_key(dimension.key)
            hyperlink_templates[f_dimension_key] = hyperlink_template

        return hyperlink_templates
示例#8
0
def reduce_result_set(results: Iterable[pd.DataFrame],
                      reference_groups,
                      dimensions: Iterable[Dimension],
                      share_dimensions: Iterable[Dimension]):
    """
    Reduces the result sets from individual queries into a single data frame. This effectively joins sets of references
    and concats the sets of totals.

    :param results: A list of data frame
    :param reference_groups: A list of groups of references (grouped by interval such as WoW, etc)
    :param dimensions: A list of dimensions, used for setting the index on the result data frame.
    :param share_dimensions: A list of dimensions from which the totals are used for calculating share operations.
    :return:
    """

    # One result group for each rolled up dimension. Groups contain one member plus one for each reference type used.
    result_groups = chunks(results, 1 + len(reference_groups))

    dimension_keys = [format_dimension_key(d.key)
                      for d in dimensions]
    totals_dimension_keys = [format_dimension_key(d.key)
                             for d in find_totals_dimensions(dimensions, share_dimensions)]
    dimension_dtypes = result_groups[0][0][dimension_keys].dtypes

    # Reduce each group to one data frame per rolled up dimension
    group_data_frames = []
    for i, result_group in enumerate(result_groups):
        if dimension_keys:
            result_group = [result.set_index(dimension_keys)
                            for result in result_group]

        base_df = result_group[0]
        reference_dfs = [_make_reference_data_frame(base_df, result, reference)
                         for result, reference_group in zip(result_group[1:], reference_groups)
                         for reference in reference_group]

        reduced = reduce(lambda left, right: pd.merge(left, right, how='outer', left_index=True, right_index=True),
                         [base_df] + reference_dfs)

        # If there are rolled up dimensions in this result set then replace the NaNs for that dimension value with a
        # marker to indicate totals.
        # The data frames will be ordered so that the first group will contain the data without any rolled up
        # dimensions, then followed by the groups with them, ordered by the last rollup dimension first.
        if totals_dimension_keys[:i]:
            reduced = _replace_nans_for_totals_values(reduced, dimension_dtypes)

        group_data_frames.append(reduced)

    return pd.concat(group_data_frames, sort=False) \
        .sort_index(na_position='first')
示例#9
0
    def _data_row(self, dimensions, dimension_values, dimension_display_values, references, row_data):
        """
        WRITEME

        :param dimensions:
        :param dimension_values:
        :param dimension_display_values:
        :param row_data:
        :return:
        """
        row = {}

        for dimension, dimension_value in zip(dimensions, utils.wrap_list(dimension_values)):
            df_key = format_dimension_key(dimension.key)
            row[dimension.key] = _render_dimension_cell(dimension_value, dimension_display_values.get(df_key))

        for metric in self.items:
            for reference in [None] + references:
                key = reference_key(metric, reference)
                df_key = format_metric_key(key)

                row[key] = _render_dimensional_metric_cell(row_data, metric) \
                    if isinstance(row_data.index, pd.MultiIndex) \
                    else _format_metric_cell(row_data[df_key], metric)

        return row
示例#10
0
    def apply(self, data_frame, reference):
        f_metric_key = format_metric_key(reference_key(self.metric, reference))

        if self.over is None:
            df = data_frame[f_metric_key]
            return 100 * df / df

        if not isinstance(data_frame.index, pd.MultiIndex):
            marker = get_totals_marker_for_dtype(data_frame.index.dtype)
            totals = data_frame.loc[marker, f_metric_key]
            return 100 * data_frame[f_metric_key] / totals

        f_over_key = format_dimension_key(self.over.key)
        idx = data_frame.index.names.index(f_over_key)
        group_levels = data_frame.index.names[idx:]
        over_dim_value = get_totals_marker_for_dtype(data_frame.index.levels[idx].dtype)
        totals_key = (slice(None),) * idx + (slice(over_dim_value, over_dim_value),)

        totals = reduce_data_frame_levels(data_frame.loc[totals_key, f_metric_key], group_levels)

        def apply_totals(group_df):
            if not isinstance(totals, pd.Series):
                return 100 * group_df / totals

            n_index_levels = len(totals.index.names)
            extra_level_names = group_df.index.names[n_index_levels:]
            group_df = group_df.reset_index(extra_level_names, drop=True)
            share = 100 * group_df / totals[group_df.index]
            return pd.Series(share.values, index=group_df.index)

        return data_frame[f_metric_key] \
            .groupby(level=group_levels) \
            .apply(apply_totals) \
            .reorder_levels(order=data_frame.index.names) \
            .sort_index()
示例#11
0
文件: builder.py 项目: kayak/fireant
    def fetch(self, hint=None, force_include=()) -> pd.Series:
        """
        Fetch the data for this query and transform it into the widgets.

        :param hint:
            For database vendors that support it, add a query hint to collect analytics on the queries triggerd by
            fireant.
        :param force_include:
            A list of dimension values to include in the result set. This can be used to avoid having necessary results
            cut off due to the pagination.  These results will be returned at the head of the results.
        :return:
            A list of dict (JSON) objects containing the widget configurations.
        """
        query = add_hints(self.queries, hint)[0]

        dimension = self._dimensions[0]
        definition = dimension.display_definition.as_(format_dimension_key(dimension.display_key)) \
            if dimension.has_display_field \
            else dimension.definition.as_(format_dimension_key(dimension.key))

        if force_include:
            include = self.slicer.database.to_char(dimension.definition) \
                .isin([str(x) for x in force_include])

            # Ensure that these values are included
            query = query.orderby(include, order=Order.desc)

        # Order by the dimension definition that the choices are for
        query = query.orderby(definition)

        data = fetch_data(self.slicer.database, [query], self._dimensions)

        df_key = format_dimension_key(getattr(dimension, 'display_key', None))
        if df_key is not None:
            return data[df_key]

        display_key = 'display'
        if hasattr(dimension, 'display_values'):
            # Include provided display values
            data[display_key] = pd.Series(dimension.display_values)
        else:
            data[display_key] = data.index.tolist()

        return data[display_key]
示例#12
0
    def fetch(self, hint=None, force_include=()) -> pd.Series:
        """
        Fetch the data for this query and transform it into the widgets.

        :param hint:
            For database vendors that support it, add a query hint to collect analytics on the queries triggerd by
            fireant.
        :param force_include:
            A list of dimension values to include in the result set. This can be used to avoid having necessary results
            cut off due to the pagination.  These results will be returned at the head of the results.
        :return:
            A list of dict (JSON) objects containing the widget configurations.
        """
        query = add_hints(self.queries, hint)[0]

        dimension = self._dimensions[0]
        definition = dimension.display_definition.as_(format_dimension_key(dimension.display_key)) \
            if dimension.has_display_field \
            else dimension.definition.as_(format_dimension_key(dimension.key))

        if force_include:
            include = self.slicer.database.to_char(dimension.definition) \
                .isin([str(x) for x in force_include])

            # Ensure that these values are included
            query = query.orderby(include, order=Order.desc)

        # Order by the dimension definition that the choices are for
        query = query.orderby(definition)

        data = fetch_data(self.slicer.database, [query], self._dimensions)

        df_key = format_dimension_key(getattr(dimension, 'display_key', None))
        if df_key is not None:
            return data[df_key]

        display_key = 'display'
        if hasattr(dimension, 'display_values'):
            # Include provided display values
            data[display_key] = pd.Series(dimension.display_values)
        else:
            data[display_key] = data.index.tolist()

        return data[display_key]
示例#13
0
def make_latest_query(database: Database,
                      base_table: Table,
                      joins: Iterable[Join] = (),
                      dimensions: Iterable[Dimension] = ()):
    query = database.query_cls.from_(base_table)

    # Add joins
    join_tables_needed_for_query = find_required_tables_to_join(dimensions, base_table)
    for join in find_joins_for_tables(joins, base_table, join_tables_needed_for_query):
        query = query.join(join.table, how=join.join_type).on(join.criterion)

    for dimension in dimensions:
        f_dimension_key = format_dimension_key(dimension.key)
        query = query.select(fn.Max(dimension.definition).as_(f_dimension_key))

    return query
示例#14
0
def make_latest_query(database: Database,
                      base_table: Table,
                      joins: Iterable[Join] = (),
                      dimensions: Iterable[Dimension] = ()):
    query = database.query_cls.from_(base_table)

    # Add joins
    join_tables_needed_for_query = find_required_tables_to_join(
        dimensions, base_table)
    for join in find_joins_for_tables(joins, base_table,
                                      join_tables_needed_for_query):
        query = query.join(join.table, how=join.join_type).on(join.criterion)

    for dimension in dimensions:
        f_dimension_key = format_dimension_key(dimension.key)
        query = query.select(fn.Max(dimension.definition).as_(f_dimension_key))

    return query
示例#15
0
    def _replace_display_values_in_index(self, dimension, result):
        """
        Replaces the raw values of a (categorical) dimension in the index with their corresponding display values.
        """
        if len(result) == 0:
            return result

        if isinstance(result.index, pd.MultiIndex):
            df_key = format_dimension_key(dimension.key)
            level = result.index.names.index(df_key)
            values = [
                dimension.display_values.get(x, x)
                for x in result.index.levels[level]
            ]
            result.index.set_levels(level=df_key, levels=values, inplace=True)
            return result

        values = [dimension.display_values.get(x, x) for x in result.index]
        result.index = pd.Index(values, name=result.index.name)
        return result
示例#16
0
    def transform_dimension_column_headers(data_frame, dimensions):
        """
        Convert the un-pivoted dimensions into ReactTable column header definitions.

        :param data_frame:
            The result set data frame
        :param dimensions:
            A list of dimensions in the data frame that are part of the index
        :return:
            A list of column header definitions with the following structure.


        .. code-block:: jsx

            columns = [{
              Header: 'Column A',
              accessor: 'a',
            }, {
              Header: 'Column B',
              accessor: 'b',
            }]
        """
        dimension_map = {
            format_dimension_key(d.key): d
            for d in dimensions + [metrics]
        }

        columns = []
        if not isinstance(data_frame.index,
                          pd.MultiIndex) and data_frame.index.name is None:
            return columns

        for f_dimension_key in data_frame.index.names:
            dimension = dimension_map[f_dimension_key]

            columns.append({
                'Header': getattr(dimension, 'label', dimension.key),
                'accessor': f_dimension_key,
            })

        return columns
示例#17
0
文件: pandas.py 项目: kayak/fireant
    def _replace_display_values_in_index(self, dimension, result):
        """
        Replaces the raw values of a (categorical) dimension in the index with their corresponding display values.
        """
        if len(result) == 0:
            return result

        if isinstance(result.index, pd.MultiIndex):
            df_key = format_dimension_key(dimension.key)
            level = result.index.names.index(df_key)
            values = [dimension.display_values.get(x, x)
                      for x in result.index.levels[level]]
            result.index.set_levels(level=df_key,
                                    levels=values,
                                    inplace=True)
            return result

        values = [dimension.display_values.get(x, x)
                  for x in result.index]
        result.index = pd.Index(values, name=result.index.name)
        return result
示例#18
0
    def render_series_label(dimension_values, metric=None, reference=None):
        """
        Returns a string label for a metric, reference, and set of values for zero or more dimensions.

        :param metric:
            an instance of fireant.Metric
        :param reference:
            an instance of fireant.Reference
        :param dimension_values:
            a tuple of dimension values. Can be zero-length or longer.
        :return:
        """
        # normalize the dimension values, as we expect them to be an iterable, in
        # order to calculate the number of used dimension safely
        dimension_values = utils.wrap_list(dimension_values)

        num_used_dimensions = len(dimensions) - len(dimension_values)
        used_dimensions = dimensions[num_used_dimensions:]

        dimension_labels = [utils.getdeepattr(dimension_display_values,
                                              (utils.format_dimension_key(dimension.key), dimension_value),
                                              dimension_value)
                            if dimension_value not in TOTALS_MARKERS
                            else 'Totals'
                            for dimension, dimension_value in zip(used_dimensions, dimension_values)]

        label = ", ".join([str(label) for label in dimension_labels])

        if metric is None:
            if reference is not None:
                return '{} ({})'.format(label, reference.label)
            return label

        if dimension_labels:
            return '{} ({})'.format(reference_label(metric, reference),
                                    label)

        return reference_label(metric, reference)
示例#19
0
    def apply(self, data_frame, reference):
        f_metric_key = format_metric_key(reference_key(self.metric, reference))

        if self.over is None:
            df = data_frame[f_metric_key]
            return 100 * df / df

        if not isinstance(data_frame.index, pd.MultiIndex):
            marker = get_totals_marker_for_dtype(data_frame.index.dtype)
            totals = data_frame.loc[marker, f_metric_key]
            return 100 * data_frame[f_metric_key] / totals

        f_over_key = format_dimension_key(self.over.key)
        idx = data_frame.index.names.index(f_over_key)
        group_levels = data_frame.index.names[idx:]
        over_dim_value = get_totals_marker_for_dtype(
            data_frame.index.levels[idx].dtype)
        totals_key = (slice(None), ) * idx + (slice(over_dim_value,
                                                    over_dim_value), )

        totals = reduce_data_frame_levels(
            data_frame.loc[totals_key, f_metric_key], group_levels)

        def apply_totals(group_df):
            if not isinstance(totals, pd.Series):
                return 100 * group_df / totals

            n_index_levels = len(totals.index.names)
            extra_level_names = group_df.index.names[n_index_levels:]
            group_df = group_df.reset_index(extra_level_names, drop=True)
            share = 100 * group_df / totals[group_df.index]
            return pd.Series(share.values, index=group_df.index)

        return data_frame[f_metric_key] \
            .groupby(level=group_levels) \
            .apply(apply_totals) \
            .reorder_levels(order=data_frame.index.names) \
            .sort_index()
示例#20
0
    annually: '%Y',
}
from fireant.utils import (
    format_dimension_key,
    wrap_list,
)
from fireant.slicer.totals import TOTALS_MARKERS
from fireant.slicer.totals import (
    MAX_NUMBER,
    MAX_STRING,
    MAX_TIMESTAMP,
)

TOTALS_LABEL = 'Totals'
metrics = Dimension('metrics', '')
metrics_dimension_key = format_dimension_key(metrics.key)


def map_index_level(index, level, func):
    # If the index is empty, do not do anything
    if 0 == index.size:
        return index

    if isinstance(index, pd.MultiIndex):
        values = index.levels[level]
        return index.set_levels(values.map(func), level)

    assert level == 0

    return index.map(func)
示例#21
0
文件: pandas.py 项目: kayak/fireant
    def transform(self, data_frame, slicer, dimensions, references):
        """
        WRITEME

        :param data_frame:
        :param slicer:
        :param dimensions:
        :param references:
        :return:
        """
        result = data_frame.copy()

        for metric in self.items:
            if any([metric.precision is not None,
                    metric.prefix is not None,
                    metric.suffix is not None]):
                df_key = format_metric_key(metric.key)

                result[df_key] = result[df_key] \
                    .apply(lambda x: formats.metric_display(x, metric.prefix, metric.suffix, metric.precision))

            for reference in references:
                df_ref_key = format_metric_key(reference_key(metric, reference))

                if reference.delta_percent:
                    result[df_ref_key] = result[df_ref_key].apply(lambda x: formats.metric_display(
                        x,
                        reference_prefix(metric, reference),
                        reference_suffix(metric, reference),
                        metric.precision))

        for dimension in dimensions:
            if dimension.has_display_field:
                result = result.set_index(format_dimension_key(dimension.display_key), append=True)
                result = result.reset_index(format_dimension_key(dimension.key), drop=True)

            if hasattr(dimension, 'display_values'):
                self._replace_display_values_in_index(dimension, result)

        if isinstance(data_frame.index, pd.MultiIndex):
            index_levels = [dimension.display_key
                            if dimension.has_display_field
                            else dimension.key
                            for dimension in dimensions]

            result = result.reorder_levels([format_dimension_key(level)
                                            for level in index_levels])

        result = result[[format_metric_key(reference_key(item, reference))
                         for reference in [None] + references
                         for item in self.items]]

        if dimensions:
            result.index.names = [dimension.label or dimension.key
                                  for dimension in dimensions]

        result.columns = pd.Index([reference_label(item, reference)
                                   for item in self.items
                                   for reference in [None] + references],
                                  name='Metrics')

        return self.pivot_data_frame(result, [d.label or d.key for d in self.pivot], self.transpose)
示例#22
0
    def transform(self, data_frame, slicer, dimensions, references):
        """
        WRITEME

        :param data_frame:
        :param slicer:
        :param dimensions:
        :param references:
        :return:
        """
        result = data_frame.copy()

        for metric in self.items:
            if any([
                    metric.precision is not None, metric.prefix is not None,
                    metric.suffix is not None
            ]):
                df_key = format_metric_key(metric.key)

                result[df_key] = result[df_key] \
                    .apply(lambda x: formats.metric_display(x, metric.prefix, metric.suffix, metric.precision))

            for reference in references:
                df_ref_key = format_metric_key(reference_key(
                    metric, reference))

                if reference.delta_percent:
                    result[df_ref_key] = result[df_ref_key].apply(
                        lambda x: formats.metric_display(
                            x, reference_prefix(metric, reference),
                            reference_suffix(metric, reference), metric.
                            precision))

        for dimension in dimensions:
            if dimension.has_display_field:
                result = result.set_index(format_dimension_key(
                    dimension.display_key),
                                          append=True)
                result = result.reset_index(format_dimension_key(
                    dimension.key),
                                            drop=True)

            if hasattr(dimension, 'display_values'):
                self._replace_display_values_in_index(dimension, result)

        if isinstance(data_frame.index, pd.MultiIndex):
            index_levels = [
                dimension.display_key
                if dimension.has_display_field else dimension.key
                for dimension in dimensions
            ]

            result = result.reorder_levels(
                [format_dimension_key(level) for level in index_levels])

        result = result[[
            format_metric_key(reference_key(item, reference))
            for reference in [None] + references for item in self.items
        ]]

        if dimensions:
            result.index.names = [
                dimension.label or dimension.key for dimension in dimensions
            ]

        result.columns = pd.Index([
            reference_label(item, reference) for item in self.items
            for reference in [None] + references
        ],
                                  name='Metrics')

        return self.pivot_data_frame(result,
                                     [d.label or d.key for d in self.pivot],
                                     self.transpose)