def transform(self, data_frame, slicer, dimensions, references): import matplotlib.pyplot as plt data_frame = data_frame.copy() n_axes = len(self.items) figsize = (14, 5 * n_axes) fig, plt_axes = plt.subplots(n_axes, sharex='row', figsize=figsize) fig.suptitle(self.title) if not hasattr(plt_axes, '__iter__'): plt_axes = (plt_axes, ) colors = itertools.cycle('bgrcmyk') for axis, plt_axis in zip(self.items, plt_axes): for series in axis: series_color = next(colors) linestyles = itertools.cycle(['-', '--', '-.', ':']) for reference in [None] + references: metric = series.metric f_metric_key = utils.format_metric_key( reference_key(metric, reference)) f_metric_label = reference_label(metric, reference) plot = self.get_plot_func_for_series_type( data_frame[f_metric_key], f_metric_label, series) plot(ax=plt_axis, title=axis.label, color=series_color, stacked=series.stacking is not None, linestyle=next(linestyles)) \ .legend(loc='center left', bbox_to_anchor=(1, 0.5)) return plt_axes
def transform(self, data_frame, slicer, dimensions, references): """ Transforms a data frame into a format for ReactTable. This is an object containing attributes `columns` and `data` which align with the props in ReactTable with the same name. :param data_frame: The result set data frame :param slicer: The slicer that generated the data query :param dimensions: A list of dimensions that were selected in the data query :param references: A list of references that were selected in the data query :return: An dict containing attributes `columns` and `data` which align with the props in ReactTable with the same names. """ df_dimension_columns = [ format_dimension_key(d.display_key) for d in dimensions if d.has_display_field ] item_map = OrderedDict([(format_metric_key(reference_key(i, reference)), ReferenceItem(i, reference)) for i in self.items for reference in [None] + references]) df_metric_columns = list(item_map.keys()) # Add an extra item to map the totals markers to it's label item_map[MAX_NUMBER] = TotalsItem item_map[MAX_STRING] = TotalsItem item_map[TOTALS_LABEL] = TotalsItem df = data_frame[df_dimension_columns + df_metric_columns].copy() dimension_display_values = self.map_display_values(df, dimensions) self.format_data_frame(df, dimensions) dimension_keys = [ format_dimension_key(dimension.key) for dimension in self.pivot ] df = self.pivot_data_frame(df, dimension_keys, self.transpose) \ .fillna(value=NULL_VALUE) \ .replace([np.inf, -np.inf], INF_VALUE) dimension_hyperlink_templates = self.map_hyperlink_templates( df, dimensions) dimension_columns = self.transform_dimension_column_headers( df, dimensions) metric_columns = self.transform_metric_column_headers( df, item_map, dimension_display_values) data = self.transform_data(df, item_map, dimension_display_values, dimension_hyperlink_templates) return { 'columns': dimension_columns + metric_columns, 'data': data, }
def transform(self, data_frame, slicer, dimensions, references): import matplotlib.pyplot as plt data_frame = data_frame.copy() n_axes = len(self.items) figsize = (14, 5 * n_axes) fig, plt_axes = plt.subplots(n_axes, sharex='row', figsize=figsize) fig.suptitle(self.title) if not hasattr(plt_axes, '__iter__'): plt_axes = (plt_axes,) colors = itertools.cycle('bgrcmyk') for axis, plt_axis in zip(self.items, plt_axes): for series in axis: series_color = next(colors) linestyles = itertools.cycle(['-', '--', '-.', ':']) for reference in [None] + references: metric = series.metric f_metric_key = utils.format_metric_key(reference_key(metric, reference)) f_metric_label = reference_label(metric, reference) plot = self.get_plot_func_for_series_type(data_frame[f_metric_key], f_metric_label, series) plot(ax=plt_axis, title=axis.label, color=series_color, stacked=series.stacking is not None, linestyle=next(linestyles)) \ .legend(loc='center left', bbox_to_anchor=(1, 0.5)) return plt_axes
def apply(self, data_frame, reference): f_metric_key = format_metric_key(reference_key(self.metric, reference)) if self.over is None: df = data_frame[f_metric_key] return 100 * df / df if not isinstance(data_frame.index, pd.MultiIndex): marker = get_totals_marker_for_dtype(data_frame.index.dtype) totals = data_frame.loc[marker, f_metric_key] return 100 * data_frame[f_metric_key] / totals f_over_key = format_dimension_key(self.over.key) idx = data_frame.index.names.index(f_over_key) group_levels = data_frame.index.names[idx:] over_dim_value = get_totals_marker_for_dtype(data_frame.index.levels[idx].dtype) totals_key = (slice(None),) * idx + (slice(over_dim_value, over_dim_value),) totals = reduce_data_frame_levels(data_frame.loc[totals_key, f_metric_key], group_levels) def apply_totals(group_df): if not isinstance(totals, pd.Series): return 100 * group_df / totals n_index_levels = len(totals.index.names) extra_level_names = group_df.index.names[n_index_levels:] group_df = group_df.reset_index(extra_level_names, drop=True) share = 100 * group_df / totals[group_df.index] return pd.Series(share.values, index=group_df.index) return data_frame[f_metric_key] \ .groupby(level=group_levels) \ .apply(apply_totals) \ .reorder_levels(order=data_frame.index.names) \ .sort_index()
def _render_dimensional_metric_cell(row_data: pd.Series, metric: Metric): """ Renders a table cell in a metric column for pivoted tables where there are two or more dimensions. This function is recursive to traverse multi-dimensional indices. :param row_data: A series containing the value for the metric and it's index (for the dimension values). :param metric: A reference to the slicer metric to access the display formatting. :return: A deep dict in a tree structure with keys matching each dimension level. The top level will have keys matching the first level of dimension values, and the next level will contain the next level of dimension values, for as many index levels as there are. The last level will contain the return value of `_format_metric_cell`. """ level = {} # Group by the last dimension, drop it, and fill the dict with either the raw metric values or the next level of # dicts. for key, next_row in row_data.groupby(level=1): next_row.reset_index(level=1, drop=True, inplace=True) df_key = format_metric_key(metric.key) level[key] = _render_dimensional_metric_cell(next_row, metric) \ if isinstance(next_row.index, pd.MultiIndex) \ else _format_metric_cell(next_row[df_key], metric) return level
def _data_row(self, dimensions, dimension_values, dimension_display_values, references, row_data): """ WRITEME :param dimensions: :param dimension_values: :param dimension_display_values: :param row_data: :return: """ row = {} for dimension, dimension_value in zip(dimensions, utils.wrap_list(dimension_values)): df_key = format_dimension_key(dimension.key) row[dimension.key] = _render_dimension_cell(dimension_value, dimension_display_values.get(df_key)) for metric in self.items: for reference in [None] + references: key = reference_key(metric, reference) df_key = format_metric_key(key) row[key] = _render_dimensional_metric_cell(row_data, metric) \ if isinstance(row_data.index, pd.MultiIndex) \ else _format_metric_cell(row_data[df_key], metric) return row
def test_apply_to_zero_dims(self): share = Share(slicer.metrics.votes) result = share.apply(single_metric_df, None) f_metric_key = format_metric_key(slicer.metrics.votes.key) expected = pd.Series([100.], name=f_metric_key) pandas.testing.assert_series_equal(expected, result)
def apply(self, data_frame, reference): df_key = format_metric_key(reference_key(self.arg, reference)) if isinstance(data_frame.index, pd.MultiIndex): levels = self._group_levels(data_frame.index) return data_frame[df_key] \ .groupby(level=levels) \ .apply(self.rolling_mean) return self.rolling_mean(data_frame[df_key])
def apply(self, data_frame, reference): df_key = format_metric_key(reference_key(self.arg, reference)) if isinstance(data_frame.index, pd.MultiIndex): levels = self._group_levels(data_frame.index) return data_frame[df_key] \ .groupby(level=levels) \ .apply(self.rolling_mean) return self.rolling_mean(data_frame[df_key])
def test_apply_to_two_dims_over_first(self): share = Share(slicer.metrics.votes, over=slicer.dimensions.timestamp) result = share.apply(cont_uni_dim_all_totals_df, None) f_metric_key = format_metric_key(slicer.metrics.votes.key) metric_series = cont_uni_dim_all_totals_df[f_metric_key] expected = 100 * metric_series / metric_series.iloc[-1] pandas.testing.assert_series_equal(expected, result, check_less_precise=True)
def test_apply_to_one_dim_over_none(self): share = Share(slicer.metrics.votes) result = share.apply(cat_dim_df, None) f_metric_key = format_metric_key(slicer.metrics.votes.key) expected = pd.Series([100.] * 3, name=f_metric_key, index=cat_dim_df.index) pandas.testing.assert_series_equal(expected, result, check_less_precise=True)
def test_apply_to_one_dim_over_first(self): share = Share(slicer.metrics.votes, over=slicer.dimensions.political_party) result = share.apply(cat_dim_totals_df, None) f_metric_key = format_metric_key(slicer.metrics.votes.key) expected = pd.Series([48.849, 0.964, 50.187, 100.0], name=f_metric_key, index=cat_dim_totals_df.index) pandas.testing.assert_series_equal(expected, result, check_less_precise=True)
def test_apply_to_two_dims_over_second_with_one_row_per_group(self): raw_df = cont_uni_dim_totals_df.iloc[[0, 2, 3, 5]] share = Share(slicer.metrics.votes, over=slicer.dimensions.state) result = share.apply(raw_df, None) f_metric_key = format_metric_key(slicer.metrics.votes.key) expected = pd.Series([36.624, 100., 37.411, 100.], name=f_metric_key, index=raw_df.index) pandas.testing.assert_series_equal(expected, result, check_less_precise=True)
def test_apply_to_two_dims_over_second(self): share = Share(slicer.metrics.votes, over=slicer.dimensions.state) result = share.apply(cont_uni_dim_totals_df, None) f_metric_key = format_metric_key(slicer.metrics.votes.key) expected = pd.Series([ 36.624, 63.376, 100., 37.411, 62.589, 100., 37.521, 62.479, 100., 37.606, 62.394, 100., 38.294, 61.706, 100., 27.705, 72.295, 100. ], name=f_metric_key, index=cont_uni_dim_totals_df.index) pandas.testing.assert_series_equal(expected, result, check_less_precise=True)
def transform(self, data_frame, slicer, dimensions, references): """ WRITEME :param data_frame: :param slicer: :param dimensions: :return: """ dimension_display_values = extract_display_values(dimensions, data_frame) metric_keys = [format_metric_key(reference_key(metric, reference)) for metric in self.items for reference in [None] + references] data_frame = data_frame[metric_keys] pivot_index_to_columns = self.pivot and isinstance(data_frame.index, pd.MultiIndex) if pivot_index_to_columns: levels = data_frame.index.names[1:] data_frame = data_frame \ .unstack(level=levels) \ .fillna(value=0) dimension_columns = self._dimension_columns(dimensions[:1]) render_column_label = dimensional_metric_label(dimensions, dimension_display_values) metric_columns = self._metric_columns_pivoted(references, data_frame.columns, render_column_label) else: dimension_columns = self._dimension_columns(dimensions) metric_columns = self._metric_columns(references) columns = (dimension_columns + metric_columns)[:self.max_columns] data = [self._data_row(dimensions, dimension_values, dimension_display_values, references, row_data) for dimension_values, row_data in data_frame.iterrows()] return dict(columns=columns, data=data)
def test_operations_results_stored_in_data_frame(self, mock_fetch_data: Mock, *mocks): mock_operation = Mock(name='mock_operation ', spec=f.Operation) mock_operation.key, mock_operation.definition = 'mock_operation', slicer.table.abc mock_operation.metrics = [] mock_widget = f.Widget(mock_operation) mock_widget.transform = Mock() mock_df = {} mock_fetch_data.return_value = mock_df # Need to keep widget the last call in the chain otherwise the object gets cloned and the assertion won't work slicer.data \ .dimension(slicer.dimensions.timestamp) \ .widget(mock_widget) \ .fetch() f_op_key = format_metric_key(mock_operation.key) self.assertIn(f_op_key, mock_df) self.assertEqual(mock_df[f_op_key], mock_operation.apply.return_value)
def test_operations_results_stored_in_data_frame(self, mock_fetch_data: Mock, *mocks): mock_operation = Mock(name='mock_operation ', spec=f.Operation) mock_operation.key, mock_operation.definition = 'mock_operation', slicer.table.abc mock_operation.metrics = [] mock_widget = f.Widget(mock_operation) mock_widget.transform = Mock() mock_df = {} mock_fetch_data.return_value = mock_df # Need to keep widget the last call in the chain otherwise the object gets cloned and the assertion won't work slicer.data \ .dimension(slicer.dimensions.timestamp) \ .widget(mock_widget) \ .fetch() f_op_key = format_metric_key(mock_operation.key) self.assertIn(f_op_key, mock_df) self.assertEqual(mock_df[f_op_key], mock_operation.apply.return_value)
def fetch(self, hint=None) -> Iterable[Dict]: """ Fetch the data for this query and transform it into the widgets. :param hint: A query hint label used with database vendors which support it. Adds a label comment to the query. :return: A list of dict (JSON) objects containing the widget configurations. """ queries = add_hints(self.queries, hint) operations = find_operations_for_widgets(self._widgets) share_dimensions = find_share_dimensions(self._dimensions, operations) data_frame = fetch_data(self.slicer.database, queries, self._dimensions, share_dimensions, self.reference_groups) # Apply operations for operation in operations: for reference in [None] + self._references: df_key = format_metric_key(reference_key(operation, reference)) data_frame[df_key] = operation.apply(data_frame, reference) data_frame = scrub_totals_from_share_results(data_frame, self._dimensions) data_frame = special_cases.apply_operations_to_data_frame( operations, data_frame) data_frame = paginate(data_frame, self._widgets, orders=self._orders, limit=self._limit, offset=self._offset) # Apply transformations return [ widget.transform(data_frame, self.slicer, self._dimensions, self._references) for widget in self._widgets ]
def apply(self, data_frame, reference): f_metric_key = format_metric_key(reference_key(self.metric, reference)) if self.over is None: df = data_frame[f_metric_key] return 100 * df / df if not isinstance(data_frame.index, pd.MultiIndex): marker = get_totals_marker_for_dtype(data_frame.index.dtype) totals = data_frame.loc[marker, f_metric_key] return 100 * data_frame[f_metric_key] / totals f_over_key = format_dimension_key(self.over.key) idx = data_frame.index.names.index(f_over_key) group_levels = data_frame.index.names[idx:] over_dim_value = get_totals_marker_for_dtype( data_frame.index.levels[idx].dtype) totals_key = (slice(None), ) * idx + (slice(over_dim_value, over_dim_value), ) totals = reduce_data_frame_levels( data_frame.loc[totals_key, f_metric_key], group_levels) def apply_totals(group_df): if not isinstance(totals, pd.Series): return 100 * group_df / totals n_index_levels = len(totals.index.names) extra_level_names = group_df.index.names[n_index_levels:] group_df = group_df.reset_index(extra_level_names, drop=True) share = 100 * group_df / totals[group_df.index] return pd.Series(share.values, index=group_df.index) return data_frame[f_metric_key] \ .groupby(level=group_levels) \ .apply(apply_totals) \ .reorder_levels(order=data_frame.index.names) \ .sort_index()
def fetch(self, hint=None) -> Iterable[Dict]: """ Fetch the data for this query and transform it into the widgets. :param hint: A query hint label used with database vendors which support it. Adds a label comment to the query. :return: A list of dict (JSON) objects containing the widget configurations. """ queries = add_hints(self.queries, hint) operations = find_operations_for_widgets(self._widgets) share_dimensions = find_share_dimensions(self._dimensions, operations) data_frame = fetch_data(self.slicer.database, queries, self._dimensions, share_dimensions, self.reference_groups) # Apply operations for operation in operations: for reference in [None] + self._references: df_key = format_metric_key(reference_key(operation, reference)) data_frame[df_key] = operation.apply(data_frame, reference) data_frame = scrub_totals_from_share_results(data_frame, self._dimensions) data_frame = special_cases.apply_operations_to_data_frame(operations, data_frame) data_frame = paginate(data_frame, self._widgets, orders=self._orders, limit=self._limit, offset=self._offset) # Apply transformations return [widget.transform(data_frame, self.slicer, self._dimensions, self._references) for widget in self._widgets]
def _render_pie_series(self, series, reference, data_frame, render_series_label): metric = series.metric name = reference_label(metric, reference) df_key = utils.format_metric_key(series.metric.key) data = [] for dimension_values, y in data_frame[df_key].sort_values(ascending=False).iteritems(): data.append({ "name": render_series_label(dimension_values) if dimension_values else name, "y": formats.metric_value(y), }) return { "name": name, "type": series.type, "data": data, 'tooltip': { 'pointFormat': '<span style="color:{point.color}">\u25CF</span> {series.name}: ' '<b>{point.y} ({point.percentage:.1f}%)</b><br/>', 'valueDecimals': metric.precision, 'valuePrefix': reference_prefix(metric, reference), 'valueSuffix': reference_suffix(metric, reference), }, }
def original_field(metric): return original_query.field(utils.format_metric_key(metric.key))
def transform(self, data_frame, slicer, dimensions, references): """ WRITEME :param data_frame: :param slicer: :param dimensions: :param references: :return: """ result = data_frame.copy() for metric in self.items: if any([metric.precision is not None, metric.prefix is not None, metric.suffix is not None]): df_key = format_metric_key(metric.key) result[df_key] = result[df_key] \ .apply(lambda x: formats.metric_display(x, metric.prefix, metric.suffix, metric.precision)) for reference in references: df_ref_key = format_metric_key(reference_key(metric, reference)) if reference.delta_percent: result[df_ref_key] = result[df_ref_key].apply(lambda x: formats.metric_display( x, reference_prefix(metric, reference), reference_suffix(metric, reference), metric.precision)) for dimension in dimensions: if dimension.has_display_field: result = result.set_index(format_dimension_key(dimension.display_key), append=True) result = result.reset_index(format_dimension_key(dimension.key), drop=True) if hasattr(dimension, 'display_values'): self._replace_display_values_in_index(dimension, result) if isinstance(data_frame.index, pd.MultiIndex): index_levels = [dimension.display_key if dimension.has_display_field else dimension.key for dimension in dimensions] result = result.reorder_levels([format_dimension_key(level) for level in index_levels]) result = result[[format_metric_key(reference_key(item, reference)) for reference in [None] + references for item in self.items]] if dimensions: result.index.names = [dimension.label or dimension.key for dimension in dimensions] result.columns = pd.Index([reference_label(item, reference) for item in self.items for reference in [None] + references], name='Metrics') return self.pivot_data_frame(result, [d.label or d.key for d in self.pivot], self.transpose)
def ref_field(metric): return ref_query.field(utils.format_metric_key(metric.key))
def make_terms_for_metrics(metrics): return [metric.definition.as_(format_metric_key(metric.key)) for metric in metrics]
def make_terms_for_metrics(metrics): return [ metric.definition.as_(format_metric_key(metric.key)) for metric in metrics ]
def transform(self, data_frame, slicer, dimensions, references): """ WRITEME :param data_frame: :param slicer: :param dimensions: :param references: :return: """ result = data_frame.copy() for metric in self.items: if any([ metric.precision is not None, metric.prefix is not None, metric.suffix is not None ]): df_key = format_metric_key(metric.key) result[df_key] = result[df_key] \ .apply(lambda x: formats.metric_display(x, metric.prefix, metric.suffix, metric.precision)) for reference in references: df_ref_key = format_metric_key(reference_key( metric, reference)) if reference.delta_percent: result[df_ref_key] = result[df_ref_key].apply( lambda x: formats.metric_display( x, reference_prefix(metric, reference), reference_suffix(metric, reference), metric. precision)) for dimension in dimensions: if dimension.has_display_field: result = result.set_index(format_dimension_key( dimension.display_key), append=True) result = result.reset_index(format_dimension_key( dimension.key), drop=True) if hasattr(dimension, 'display_values'): self._replace_display_values_in_index(dimension, result) if isinstance(data_frame.index, pd.MultiIndex): index_levels = [ dimension.display_key if dimension.has_display_field else dimension.key for dimension in dimensions ] result = result.reorder_levels( [format_dimension_key(level) for level in index_levels]) result = result[[ format_metric_key(reference_key(item, reference)) for reference in [None] + references for item in self.items ]] if dimensions: result.index.names = [ dimension.label or dimension.key for dimension in dimensions ] result.columns = pd.Index([ reference_label(item, reference) for item in self.items for reference in [None] + references ], name='Metrics') return self.pivot_data_frame(result, [d.label or d.key for d in self.pivot], self.transpose)
def _render_series(self, axis, axis_idx, axis_color, colors, series_data_frames, render_series_label, references, is_timeseries=False): """ Renders the series configuration. https://api.highcharts.com/highcharts/series :param axis: :param axis_idx: :param axis_color: :param colors: :param series_data_frames: :param render_series_label: :param references: :param is_timeseries: :return: """ hc_series = [] for series in axis: symbols = itertools.cycle(MARKER_SYMBOLS) for (dimension_values, group_df), symbol in zip(series_data_frames, symbols): if is_timeseries: group_df = group_df.sort_index(level=0) dimension_values = utils.wrap_list(dimension_values) if isinstance(series, self.PieSeries): # pie charts suck for reference in [None] + references: hc_series.append(self._render_pie_series(series, reference, group_df, render_series_label)) continue # With a single axis, use different colors for each series # With multiple axes, use the same color for the entire axis and only change the dash style series_color = next(colors) for reference, dash_style in zip([None] + references, itertools.cycle(DASH_STYLES)): metric_key = utils.format_metric_key(reference_key(series.metric, reference)) hc_series.append({ "type": series.type, "name": render_series_label(dimension_values, series.metric, reference), "data": ( self._render_timeseries_data(group_df, metric_key) if is_timeseries else self._render_category_data(group_df, metric_key) ), "tooltip": self._render_tooltip(series.metric, reference), "yAxis": ("{}_{}".format(axis_idx, reference.key) if reference is not None and reference.delta else str(axis_idx)), "marker": ({"symbol": symbol, "fillColor": axis_color or series_color} if isinstance(series, SERIES_NEEDING_MARKER) else {}), "stacking": series.stacking, }) if isinstance(series, ContinuousAxisSeries): # Set each series in a continuous series to a specific color hc_series[-1]["color"] = series_color hc_series[-1]["dashStyle"] = dash_style return hc_series