Пример #1
0
    def map_labels(self, mapping: StringMapper) -> 'LegendFormatter':
        """
        Replace label text using a dictionary or function.

        :param mapping: Mappings to replace text.
        """
        handles, labels = self._legend.axes.get_legend_handles_labels()
        labels = [map_text(text=label, mapping=mapping) for label in labels]
        self.recreate_legend(labels=labels)
        return self
Пример #2
0
    def map_label_text(self, mapping: StringMapper) -> 'TicksFormatter':
        """
        Map the tick label text using a dictionary or function.

        :param mapping: Dictionary or a function mapping old text to new text.
        """
        for axis, minor in self._iter_axis_minor():
            labels = [
                label.get_text() for label in axis.get_ticklabels(minor=minor)
            ]
            axis.set_ticklabels(map_text(labels, mapping))
        return self
Пример #3
0
    def plot_distribution(self,
                          data: Optional[Series] = None,
                          drop_na: bool = True,
                          transpose: bool = True,
                          color: str = 'C0',
                          pct_size: int = None,
                          significance: bool = False,
                          sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'),
                          label_mappings: Optional[Dict[str, str]] = None,
                          grid: bool = False,
                          max_axis_label_chars: Optional[int] = None,
                          title: Optional[str] = None,
                          x_label: Optional[str] = None,
                          y_label: Optional[str] = None,
                          ax: Optional[Axes] = None,
                          **kwargs) -> Axes:
        """
        Plot the distribution of answers to the Question.

        :param data: The answers given by Respondents to the Question.
        :param drop_na: Whether to drop null responses from the dataset
                        (affects % calculations).
        :param transpose: Whether to transpose the labels to the y-axis.
        :param ax: Optional matplotlib axes to plot on.
        :param color: Color or list of colors for the bars.
        :param pct_size: Font size for the percent markers.
        :param significance: Whether to highlight significant categories.
        :param sig_colors: Tuple of (high, low) colors for highlighting
                           significance.
        :param label_mappings: Optional dict of replacements for labels.
        :param grid: Whether to show a plot grid or not.
        :param max_axis_label_chars: Maximum number of characters in axis labels
                                     before wrapping.
        :param title: Axes title.
        :param x_label: Label for the x-axis.
        :param y_label: Label for the y-axis.
        """
        data = data if data is not None else self._data
        if data is None:
            raise ValueError('No data!')
        if len(data) == 0:
            return ax
        if title is None:
            title = self.text
        if x_label is None:
            x_label = self.name if not transpose else '# Respondents'
        if y_label is None:
            y_label = '# Respondents' if not transpose else self.name
        features = self.make_features(answers=data,
                                      drop_na=drop_na,
                                      naming='{{choice}}')
        item_counts: Series = features.sum()
        plot_type = 'barh' if transpose else 'bar'
        ax = ax or new_axes()
        if label_mappings is not None:
            item_counts.index = wrap_text(map_text(item_counts.index,
                                                   mapping=label_mappings
                                                   or {}),
                                          max_width=max_axis_label_chars)
        else:
            item_counts.index = wrap_text(item_counts.index,
                                          max_width=max_axis_label_chars)

        edge_color = None
        line_width = None
        if significance:
            one_vs_any = self.significance_one_vs_any()
            edge_color = [
                sig_colors[0] if one_vs_any[category] >= 0.945 else
                sig_colors[1] if one_vs_any[category] < 0.055 else color
                for category in self.category_names
            ]
            line_width = [2 if ec != color else None for ec in edge_color]

        item_counts.plot(kind=plot_type,
                         ax=ax,
                         color=color,
                         edgecolor=edge_color,
                         linewidth=line_width,
                         **kwargs)

        # add percentages
        item_pcts = 100 * item_counts.div(len(features))
        label_bar_plot_pcts(item_counts=item_counts,
                            item_pcts=item_pcts,
                            ax=ax,
                            transpose=transpose,
                            font_size=pct_size)

        # add titles and grid
        AxesFormatter(ax).set_text(
            title=title, x_label=x_label,
            y_label=y_label).set_axis_below(True).grid(grid)
        if transpose and not x_label:
            ax.set_xlabel('# Respondents')
        elif not transpose and not y_label:
            ax.set_ylabel('# Respondents')

        return ax
Пример #4
0
    def test_map_text(self):

        mapping = {'a': 'A', 'b': 'B'}
        self.assertEqual('A', map_text(text='a', mapping=mapping))
        self.assertEqual('B', map_text(text='b', mapping=mapping))
        self.assertEqual('c', map_text(text='c', mapping=mapping))
Пример #5
0
def plot_pt(pt: DataFrame, transpose: bool = True,
            set_title: bool = True, cbar: bool = True,
            x_label: Union[bool, str] = True,
            y_label: Union[bool, str] = True,
            x_tick_labels: bool = True, y_tick_labels: bool = True,
            dividers: bool = True,
            as_percent: bool = True, precision: int = None,
            p_max: float = 1.0, var_sep: str = '|', ax: Optional[Axes] = None,
            pct_size: int = None, cmap: str = 'Blues',
            min_pct: Optional[int] = None,
            label_mappings: Optional[Dict[str, str]] = None,
            max_axis_label_chars: int = None) -> Axes:
    """
    Plot a probability table.

    :param pt: DataFrame with condition as index and probability as
               columns (for cpts) or 2 probs (for jpts).
    :param transpose: Set to True to put `condition` on x-axis.
    :param cbar: Whether to show the color-bar.
    :param set_title: Whether to add a title to the plot.
    :param x_label: Whether to show the default label on the x-axis or not,
                    or string of text for the label.
    :param y_label: Whether to show the default label on the y-axis or not,
                    or string of text for the label.
    :param x_tick_labels: Whether to show labels on the ticks on the x-axis.
    :param y_tick_labels: Whether to show labels on the ticks on the y-axis.
    :param dividers: Whether to show dividing lines between each condition.
    :param as_percent: Whether to show probabilities as a percentage.
    :param precision: Number of decimal places to display values.
                      Defaults to 1 for proportions, 2 for percentages.
    :param p_max: The value for the highest probability (0 to 1).
    :param var_sep: The separator to use between variables in the title
                    e.g. '|' for conditional, ',' for joint
    :param ax: Optional matplotlib axes to plot on.
    :param cmap: Name of the colormap.
    :param pct_size: Size of the font for the percent labels.
    :param min_pct: Minimum sum of percentages across rows / columns
                    to keep those rows / columns.
    :param label_mappings: Optional dict of replacements for labels.
    """
    # calculate cpt and fix labels
    p1 = pt.index.name
    p2 = pt.columns.name
    if transpose:
        pt = pt.T
    pt.index = wrap_text(map_text(pt.index, label_mappings),
                         max_axis_label_chars)
    pt.columns = wrap_text(map_text(pt.columns, label_mappings),
                           max_axis_label_chars)
    if as_percent:
        precision = precision if precision is not None else 1
        fmt = f'.{precision}%'
        v_max = p_max
        cbar_kws = {'format': FuncFormatter(lambda x, pos: f'{x:.0%}')}
    else:
        precision = precision if precision is not None else 2
        fmt = f'.{precision}f'
        v_max = p_max if p_max is not None else None
        cbar_kws = {}
    ax = ax or new_axes()
    # plot
    if min_pct is not None:
        pt = pt.reindex(index=pt.loc[(pt.sum(axis=1) > min_pct)].index,
                        columns=pt.loc[:, pt.sum(axis=0) > min_pct].columns)
    heatmap(pt, annot=True, cbar=cbar,
            vmin=0, vmax=v_max, fmt=fmt, cmap=cmap,
            linewidths=1, linecolor='#bbbbbb', cbar_kws=cbar_kws,
            annot_kws={'fontsize': pct_size} if pct_size is not None else {},
            ax=ax)
    ax.invert_yaxis()
    if dividers:
        if transpose:
            draw_vertical_dividers(ax)
        else:
            draw_horizontal_dividers(ax)
    # set labels
    set_cpt_axes_labels(ax=ax, x_label=x_label, y_label=y_label,
                        cond_name=p1, prob_name=p2,
                        transpose=transpose)
    set_cp_tick_labels(ax, x_tick_labels, y_tick_labels)
    if set_title:
        if as_percent:
            ax.set_title(f'p({p2}{var_sep}{p1})')
        else:
            ax.set_title(f'|{p2}{var_sep}{p1}|')

    return ax
Пример #6
0
def plot_categorical_distribution(
        categorical_data: Series, title: str = '',
        order: list = None,
        transpose: bool = False,
        x_label: bool = True, y_label: bool = True,
        x_tick_labels: bool = True, y_tick_labels: bool = True,
        color: Union[str, List[str]] = 'C0',
        edge_color: Union[Optional[str], Optional[List[str]]] = None,
        line_style: Optional[str] = None,
        line_width: Union[Optional[str], Optional[List[str]]] = None,
        ax: Optional[Axes] = None, pct_size: Optional[int] = None,
        label_mappings: Optional[Dict[str, str]] = None,
        drop_na: bool = True,
        max_axis_label_chars: int = None,
        grid: bool = False,
        y_lim: Optional[Tuple[int, int]] = None,
        alpha: float = 1.0
) -> Axes:
    """
    Create a bar-plot of the counts of a categorical variable.

    :param categorical_data: The data to plot e.g. a sequence of answers
                             or attribute values.
    :param title: The title for the plot.
    :param order: Optional list of labels to order the plotted categories by.
    :param transpose: True to plot horizontal bars.
    :param x_label: Whether to add a label to the x-axis.
    :param y_label: Whether to add a label to the y-axis.
    :param x_tick_labels: Whether to show labels on the ticks on the x-axis.
    :param y_tick_labels: Whether to show labels on the ticks on the y-axis.
    :param color: Single color or list of colors for bars.
    :param edge_color: Single color or list of colors for bar edges.
    :param line_style: Line style for bar edges.
    :param line_width: Single width or list of widths for bar edges.
    :param pct_size: Font size for percentage labels.
    :param ax: Optional matplotlib axes to plot on.
    :param label_mappings: Optional mappings to modify axis labels.
    :param drop_na: Whether to exclude null values from the percentage counts.
    :param max_axis_label_chars: Maximum number of characters before wrapping
                                 axis labels.
    :param grid: Whether to show a grid.
    :param y_lim: Optional limits for the y-axis.
    """
    plot_type = 'barh' if transpose else 'bar'
    item_counts = categorical_data.value_counts()
    if order:
        # add zero value for missing categories
        for item_name in order:
            if item_name not in item_counts.index:
                item_counts = item_counts.append(Series({item_name: 0}))
        # sort categories for plot
        item_counts = item_counts.reindex(order)
    ax = ax or new_axes()
    if label_mappings is not None:
        item_counts.index = wrap_text(
            map_text(item_counts.index, label_mappings),
            max_axis_label_chars
        )
    else:
        item_counts.index = wrap_text(item_counts.index, max_axis_label_chars)
    item_counts.plot(kind=plot_type, ax=ax, color=color, edgecolor=edge_color,
                     linestyle=line_style, linewidth=line_width, alpha=alpha)
    # add percentages
    item_pcts = 100 * item_counts.div(
        len(categorical_data) if not drop_na
        else len(categorical_data.dropna())
    )
    label_bar_plot_pcts(item_counts=item_counts, item_pcts=item_pcts,
                        ax=ax, transpose=transpose, font_size=pct_size)
    # add titles
    ax.set_title(title)
    if transpose:
        x_label_value = '# Respondents'
        y_label_value = categorical_data.name
    else:
        x_label_value = categorical_data.name
        y_label_value = '# Respondents'
    if x_label:
        ax.set_xlabel(x_label_value)
    else:
        ax.set_xlabel('')
    if y_label:
        ax.set_ylabel(y_label_value)
    else:
        ax.set_ylabel('')
    if not x_tick_labels:
        ax.set_xticklabels([])
    if not y_tick_labels:
        ax.set_yticklabels([])
    if grid:
        ax.grid(True)
        ax.set_axisbelow(True)
    if y_lim is not None:
        ax.set_ylim(y_lim)

    return ax
Пример #7
0
    def plot_distribution(self,
                          data: Optional[Series] = None,
                          transpose: bool = False,
                          normalize: bool = False,
                          significance: bool = False,
                          sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'),
                          sig_values: Tuple[float, float] = (0.945, 0.055),
                          label_mappings: Optional[Dict[str, str]] = None,
                          ax: Optional[Axes] = None) -> Axes:
        """
        Plot the distribution of answers to the Question.

        :param data: The answers given by Respondents to the Question.
        :param transpose: Whether to transpose the labels to the y-axis.
        :param normalize: Whether to normalize number of responses in each
                          position to total number of responses.
        :param significance: Whether to highlight significant choices.
        :param sig_colors: Tuple of (high, low) colors for highlighting
                           significance.
        :param sig_values: Tuple of (high, low) values for assessing
                           significance.
        :param label_mappings: Optional dict of replacements for labels.
        :param ax: Optional matplotlib axes to plot on.
        """
        data = data if data is not None else self._data
        if data is None:
            raise ValueError('No data!')
        order_counts = []
        for index, str_user_order in data.iteritems():
            if isnull(str_user_order):
                continue
            user_order = str_user_order.split(CATEGORY_SPLITTER)
            for i in range(len(user_order)):
                order_counts.append({
                    'choice': user_order[i],
                    'rank': i + 1,
                })
        counts = DataFrame(order_counts).groupby(
            ['choice',
             'rank']).size().reset_index().rename(columns={0: 'count'})
        pivot = pivot_table(data=counts,
                            index='choice',
                            columns='rank',
                            values='count').reindex(self.categories)
        pivot.index = wrap_text(
            map_text(pivot.index, mapping=label_mappings or {}))
        if normalize:
            fmt = '.2f'
            pivot = pivot / len(data)
        else:
            fmt = '.0f'
        if transpose:
            pivot = pivot.T
        axf = AxesFormatter(axes=ax)
        ax = axf.axes
        heatmap(data=pivot, annot=True, fmt=fmt, cmap='Blues', ax=ax)

        if significance:
            cat_sigs = self.significance__one_vs_any()
            for category, sig_value in cat_sigs.iteritems():
                if sig_values[1] < sig_value < sig_values[0]:
                    continue
                elif sig_value <= sig_values[1]:
                    color = sig_colors[1]
                elif sig_value >= sig_values[0]:
                    color = sig_colors[0]
                if not transpose:
                    x_min = 0.1
                    x_max = len(self.categories) - 0.1
                    y_min = self.categories.index(category) + 0.1
                    y_max = self.categories.index(category) + 0.9
                else:
                    y_min = 0.1
                    y_max = len(self.categories) - 0.1
                    x_min = self.categories.index(category) + 0.1
                    x_max = self.categories.index(category) + 0.9
                ax.plot([x_min, x_max, x_max, x_min, x_min],
                        [y_min, y_min, y_max, y_max, y_min],
                        color=color,
                        linewidth=2)

        axf.x_axis.tick_labels.set_ha_center()
        axf.y_axis.tick_labels.set_va_center()
        if transpose:
            draw_vertical_dividers(ax)
        else:
            draw_horizontal_dividers(ax)
        axf.set_title_text(self.text)
        return ax