Exemplo n.º 1
0
    def draw(self, points, target=None, **kwargs):
        """
        Called from the fit method, this method creates the canvas and
        draws the plot on it.
        Parameters
        ----------
        kwargs: generic keyword arguments.
        """

        # Resolve the labels with the classes
        labels = self.labels if self.labels is not None else self.classes_
        if len(labels) != len(self.classes_):
            raise YellowbrickValueError(
                ("number of supplied labels ({}) does not "
                 "match the number of classes ({})").format(
                     len(labels), len(self.classes_)))

        # Create the color mapping for the labels.
        color_values = resolve_colors(n_colors=len(labels),
                                      colormap=self.colormap,
                                      colors=self.color)
        colors = dict(zip(labels, color_values))

        # Transform labels into a map of class to label
        labels = dict(zip(self.classes_, labels))

        # Define boundaries with a vertical line
        if self.annotate_docs:
            for xcoords in self.boundaries_:
                self.ax.axvline(x=xcoords,
                                color="lightgray",
                                linestyle="dashed")

        series = defaultdict(lambda: {"x": [], "y": []})

        if target is not None:
            for point, t in zip(points, target):
                label = labels[t]
                series[label]["x"].append(point[0])
                series[label]["y"].append(point[1])
        else:
            label = self.classes_[0]
            for x, y in points:
                series[label]["x"].append(x)
                series[label]["y"].append(y)

        for label, points in series.items():
            self.ax.scatter(
                points["x"],
                points["y"],
                marker="|",
                c=colors[label],
                zorder=100,
                label=label,
            )

        self.ax.set_yticks(list(range(len(self.indexed_words_))))
        self.ax.set_yticklabels(self.indexed_words_)

        return self.ax
Exemplo n.º 2
0
    def draw(self):
        """
        Renders the class balance chart on the specified axes from support.
        """
        # Number of colors is either number of classes or 2
        colors = resolve_colors(
            len(self.support_), colormap=self.colormap, colors=self.colors
        )

        if self._mode == BALANCE:
            self.ax.bar(
                np.arange(len(self.support_)),
                self.support_,
                color=colors,
                align="center",
                width=0.5,
            )

        # Compare mode
        else:
            bar_width = 0.35
            labels = ["train", "test"]

            for idx, support in enumerate(self.support_):
                index = np.arange(len(self.classes_))
                if idx > 0:
                    index = index + bar_width

                self.ax.bar(
                    index, support, bar_width, color=colors[idx], label=labels[idx]
                )

        return self.ax
Exemplo n.º 3
0
def generate_tsne(title, X, labels):

    fig, (ax1) = plt.subplots(1, 1, figsize=(4, 2))
    title_dic = {'fontsize': 7, 'fontweight': 'bold'}

    colors = resolve_colors(11, 'Spectral_r')
    colors2 = resolve_colors(10, 'BrBG_r')
    tsne = TSNEVisualizer(ax1, colors=colors + colors2,decompose=None)
    tsne.fit(X, labels)
    tsne.finalize()
    ax1 = tsne.ax
    ax1.set_title(title, title_dic)

    path = os.path.join(OUTPUT)
    filename = title
    filename = os.path.join(path, filename)
    plt.savefig(filename)
Exemplo n.º 4
0
    def draw(self):
        """
        Draws the cv scores as a line chart on the current axes.
        """
        # Set the colors from the supplied values or reasonable defaults
        color_values = resolve_colors(n_colors=4, colors=self.color)

        # Get the metric used to annotate the graph with its maximizing value
        argmax = self._check_argmax(self.argmax, self.exclude)

        for idx, metric in enumerate(METRICS):
            # Skip any excluded labels
            if metric not in self.cv_scores_:
                continue

            # Get the color ensuring every metric has a static color
            color = color_values[idx]

            # Make the label pretty
            if metric == "fscore":
                if self.fbeta == 1.0:
                    label = "$f_1$"
                else:
                    label = "$f_{{\beta={:0.1f}}}".format(self.fbeta)
            else:
                label = metric.replace("_", " ")

            # Draw the metric values
            self.ax.plot(self.thresholds_,
                         self.cv_scores_[metric],
                         color=color,
                         label=label)

            # Draw the upper and lower bounds
            lower = self.cv_scores_["{}_lower".format(metric)]
            upper = self.cv_scores_["{}_upper".format(metric)]

            self.ax.fill_between(self.thresholds_,
                                 upper,
                                 lower,
                                 alpha=0.35,
                                 linewidth=0,
                                 color=color)

            # Annotate the graph with the maximizing value
            if argmax and argmax == metric:
                argmax = self.cv_scores_[metric].argmax()
                threshold = self.thresholds_[argmax]
                self.ax.axvline(
                    threshold,
                    ls="--",
                    c="k",
                    lw=1,
                    label="$t_{}={:0.2f}$".format(metric[0], threshold),
                )

        return self.ax
Exemplo n.º 5
0
    def draw(self, **kwargs):
        """
        Draws the feature importances as a bar chart; called from fit.
        """
        # Quick validation
        for param in ("feature_importances_", "features_"):
            if not hasattr(self, param):
                raise NotFitted("missing required param '{}'".format(param))

        # Find the positions for each bar
        pos = np.arange(self.features_.shape[0]) + 0.5

        # Plot the bar chart
        if self.stack:
            colors = resolve_colors(len(self.classes_), colormap=self.colormap)
            legend_kws = {"bbox_to_anchor": (1.04, 0.5), "loc": "center left"}
            bar_stack(
                self.feature_importances_,
                ax=self.ax,
                labels=list(self.classes_),
                ticks=self.features_,
                orientation="h",
                colors=colors,
                legend_kws=legend_kws,
            )
        else:
            colors = resolve_colors(len(self.features_),
                                    colormap=self.colormap,
                                    colors=self.colors)
            self.ax.barh(pos,
                         self.feature_importances_,
                         color=colors,
                         align="center")

            # Set the labels for the bars
            self.ax.set_yticks(pos)
            self.ax.set_yticklabels(self.features_)

        return self.ax
Exemplo n.º 6
0
    def draw(self, points, target=None, **kwargs):
        """
        Called from the fit method, this method draws the TSNE scatter plot,
        from a set of decomposed points in 2 dimensions. This method also
        accepts a third dimension, target, which is used to specify the colors
        of each of the points. If the target is not specified, then the points
        are plotted as a single cloud to show similar documents.
        """
        # Resolve the labels with the classes
        labels = self.labels if self.labels is not None else self.classes_
        if len(labels) != len(self.classes_):
            raise YellowbrickValueError(
                (
                    "number of supplied labels ({}) does not "
                    "match the number of classes ({})"
                ).format(len(labels), len(self.classes_))
            )

        # Create the color mapping for the labels.
        self.color_values_ = resolve_colors(
            n_colors=len(labels), colormap=self.colormap, colors=self.colors
        )
        colors = dict(zip(labels, self.color_values_))

        # Transform labels into a map of class to label
        labels = dict(zip(self.classes_, labels))

        # Expand the points into vectors of x and y for scatter plotting,
        # assigning them to their label if the label has been passed in.
        # Additionally, filter classes not specified directly by the user.
        series = defaultdict(lambda: {"x": [], "y": []})

        if target is not None:
            for t, point in zip(target, points):
                label = labels[t]
                series[label]["x"].append(point[0])
                series[label]["y"].append(point[1])
        else:
            label = self.classes_[0]
            for x, y in points:
                series[label]["x"].append(x)
                series[label]["y"].append(y)

        # Plot the points
        for label, points in series.items():
            self.ax.scatter(
                points["x"], points["y"], c=colors[label], alpha=self.alpha, label=label
            )

        return self.ax
Exemplo n.º 7
0
    def draw(self):
        """
        Draws the cv scores as a line chart on the current axes.
        """
        # Set the colors from the supplied values or reasonable defaults
        color_values = resolve_colors(n_colors=4, colors=self.color)

        for idx, metric in enumerate(METRICS):
            # Skip any excluded labels
            if metric not in self.cv_scores_:
                continue

            # Get the color ensuring every metric has a static color
            color = color_values[idx]

            # Make the label pretty
            if metric == "fscore":
                if self.fbeta == 1.0:
                    label = "$f_1$"
                else:
                    label = "$f_{{\beta={:0.1f}}}".format(self.fbeta)
            else:
                label = metric.replace("_", " ")

            # Draw the metric values
            self.ax.plot(
                self.thresholds_, self.cv_scores_[metric],
                color=color, label=label
            )

            # Draw the upper and lower bounds
            lower = self.cv_scores_["{}_lower".format(metric)]
            upper = self.cv_scores_["{}_upper".format(metric)]

            self.ax.fill_between(
                self.thresholds_, upper, lower,
                alpha=0.35, linewidth=0, color=color
            )

            # Annotate the graph with the maximizing value
            if self.argmax.lower() == metric:
                argmax = self.cv_scores_[metric].argmax()
                threshold = self.thresholds_[argmax]
                self.ax.axvline(
                    threshold, ls='--', c='k', lw=1,
                    label="$t_{}={:0.2f}$".format(metric[0], threshold)
                )

        return self.ax
Exemplo n.º 8
0
    def draw(self, points, target=None, **kwargs):
        """
        Called from the fit method, this method creates the canvas and
        draws the plot on it.
        Parameters
        ----------
        kwargs: generic keyword arguments.
        """

        # Resolve the labels with the classes
        labels = self.labels if self.labels is not None else self.classes_
        if len(labels) != len(self.classes_):
            raise YellowbrickValueError((
                "number of supplied labels ({}) does not "
                "match the number of classes ({})"
            ).format(len(labels), len(self.classes_)))

        # Create the color mapping for the labels.
        color_values = resolve_colors(
            n_colors=len(labels), colormap=self.colormap, colors=self.color)
        colors = dict(zip(labels, color_values))

        # Transform labels into a map of class to label
        labels = dict(zip(self.classes_, labels))

        # Define boundaries with a vertical line
        if self.annotate_docs:
            for xcoords in self.boundaries_:
                self.ax.axvline(x=xcoords, color='lightgray', linestyle='dashed')

        series = defaultdict(lambda: {'x':[], 'y':[]})

        if target is not None:
            for point, t in zip(points, target):
                label = labels[t]
                series[label]['x'].append(point[0])
                series[label]['y'].append(point[1])
        else:
            label = self.classes_[0]
            for x, y in points:
                series[label]['x'].append(x)
                series[label]['y'].append(y)

        for label, points in series.items():
            self.ax.scatter(points['x'], points['y'], marker='|',
                            c=colors[label], zorder=100, label=label)

        self.ax.set_yticks(list(range(len(self.indexed_words_))))
        self.ax.set_yticklabels(self.indexed_words_)
Exemplo n.º 9
0
    def draw(self, *kwargs):
        """
        Renders the visualization

        Parameters
        ----------
        kwargs: dict
            keyword arguments passed to Scikit-Learn API.

        Returns
        -------
        self.ax : AxesSubplot of the visualizer
            Returns the AxesSubplot instance of the visualizer
        """
        # Set the colors from the supplied values or reasonable defaults
        color_values = resolve_colors(n_colors=3, colors=self.color)

        uniform_thresholds = np.linspace(0, 1, num=101)
        uniform_precision_plots = []
        uniform_recall_plots = []
        uniform_queue_rate_plots = []

        for data in self.plot_data:
            uniform_precision = []
            uniform_recall = []
            uniform_queue_rate = []
            for ut in uniform_thresholds:
                index = bisect.bisect_left(data['thresholds'], ut)
                uniform_precision.append(data['precision'][index])
                uniform_recall.append(data['recall'][index])
                uniform_queue_rate.append(data['queue_rate'][index])

            uniform_precision_plots.append(uniform_precision)
            uniform_recall_plots.append(uniform_recall)
            uniform_queue_rate_plots.append(uniform_queue_rate)

        uplots = (uniform_precision_plots, uniform_recall_plots, uniform_queue_rate_plots)

        for uniform_plot, color in zip(uplots, color_values):
            # Compute the lower, median, and upper plots
            lower, median, upper = mstats.mquantiles(uniform_plot, prob=self.quantiles, axis=0)

            # Draw the median line
            self.ax.plot(uniform_thresholds, median, color=color)

            # Draw the fill between the lower and upper bounds
            self.ax.fill_between(uniform_thresholds, upper, lower, alpha=0.5, linewidth=0, color=color)

        return self.ax
Exemplo n.º 10
0
    def draw(self, X, y, **kwargs):
        """Called from the fit method, this method creates a scatter plot that
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Set the axes limits
        self.ax.set_xlim([-1,1])
        self.ax.set_ylim([-1,1])

        # set the colors
        if self.colormap is not None or self.color is not None:
            color_values = resolve_colors(
                num_colors=len(self.classes_),
                colormap=self.colormap,
                color=self.color)
        else:
            color_values = get_color_cycle()

        colors = dict(zip(self.classes_, color_values))

        # Create a data structure to hold the scatter plot representations
        to_plot = {}
        for kls in self.classes_:
            to_plot[kls] = [[], []]

        # Add each row of the data set to to_plot for plotting
        # TODO: make this an independent function for override
        for i, row in enumerate(X):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            x_, y_ = row_[0], row_[1]
            kls = self.classes_[y[i]]

            to_plot[kls][0].append(x_)
            to_plot[kls][1].append(y_)

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for i, kls in enumerate(self.classes_):
            self.ax.scatter(
                to_plot[kls][0],
                to_plot[kls][1],
                marker=next(self.markers),
                color=colors[kls],
                label=str(kls),
                **kwargs)

        self.ax.axis('equal')
Exemplo n.º 11
0
    def draw(self, points, target=None, **kwargs):
        """
        Called from the fit method, this method draws the TSNE scatter plot,
        from a set of decomposed points in 2 dimensions. This method also
        accepts a third dimension, target, which is used to specify the colors
        of each of the points. If the target is not specified, then the points
        are plotted as a single cloud to show similar documents.
        """
        # Resolve the labels with the classes
        labels = self.labels if self.labels is not None else self.classes_
        if len(labels) != len(self.classes_):
            raise YellowbrickValueError((
                "number of supplied labels ({}) does not "
                "match the number of classes ({})"
            ).format(len(labels), len(self.classes_)))


        # Create the color mapping for the labels.
        self.color_values_ = resolve_colors(
            n_colors=len(labels), colormap=self.colormap, colors=self.color)
        colors = dict(zip(labels, self.color_values_))

        # Transform labels into a map of class to label
        labels = dict(zip(self.classes_, labels))

        # Expand the points into vectors of x and y for scatter plotting,
        # assigning them to their label if the label has been passed in.
        # Additionally, filter classes not specified directly by the user.
        series = defaultdict(lambda: {'x':[], 'y':[]})

        if target is not None:
            for t, point in zip(target, points):
                label = labels[t]
                series[label]['x'].append(point[0])
                series[label]['y'].append(point[1])
        else:
            label = self.classes_[0]
            for x,y in points:
                series[label]['x'].append(x)
                series[label]['y'].append(y)

        # Plot the points
        for label, points in series.items():
            self.ax.scatter(
                points['x'], points['y'], c=colors[label],
                alpha=self.alpha, label=label
            )
Exemplo n.º 12
0
    def draw(self, X, y, **kwargs):
        """Called from the fit method, this method creates a scatter plot that
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Set the axes limits
        self.ax.set_xlim([-1,1])
        self.ax.set_ylim([-1,1])

        # set the colors
        color_values = resolve_colors(
            n_colors=len(self.classes_),
            colormap=self.colormap,
            colors=self.color
        )

        colors = dict(zip(self.classes_, color_values))

        # Create a data structure to hold the scatter plot representations
        to_plot = {}
        for kls in self.classes_:
            to_plot[kls] = [[], []]

        # Add each row of the data set to to_plot for plotting
        # TODO: make this an independent function for override
        for i, row in enumerate(X):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            x_, y_ = row_[0], row_[1]
            kls = self.classes_[y[i]]

            to_plot[kls][0].append(x_)
            to_plot[kls][1].append(y_)

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for i, kls in enumerate(self.classes_):
            self.ax.scatter(
                to_plot[kls][0],
                to_plot[kls][1],
                marker=next(self.markers),
                color=colors[kls],
                label=str(kls),
                alpha=self.alpha,
                **kwargs)

        self.ax.axis('equal')
Exemplo n.º 13
0
    def draw(self, **kwargs):
        """
        Called from the fit method, this method creates the canvas and
        draws the part-of-speech tag mapping as a bar chart.

        Parameters
        ----------
        kwargs: dict
            generic keyword arguments.

        Returns
        -------
        ax : matplotlib axes
            Axes on which the PosTagVisualizer was drawn.
        """
        # Converts nested dict to nested list
        pos_tag_counts = np.array(
            [list(i.values()) for i in self.pos_tag_counts_.values()])
        # stores sum of nested list column wise
        pos_tag_sum = np.sum(pos_tag_counts, axis=0)

        if self.frequency:
            # sorts the count and tags by sum for frequency true
            idx = (pos_tag_sum).argsort()[::-1]
            self._pos_tags = np.array(self._pos_tags)[idx]
            pos_tag_counts = pos_tag_counts[:, idx]

        if self.stack:
            bar_stack(
                pos_tag_counts,
                ax=self.ax,
                labels=list(self.labels_),
                ticks=self._pos_tags,
                colors=self.colors,
                colormap=self.colormap,
            )
        else:
            xidx = np.arange(len(self._pos_tags))
            colors = resolve_colors(n_colors=len(self._pos_tags),
                                    colormap=self.colormap,
                                    colors=self.colors)
            self.ax.bar(xidx, pos_tag_counts[0], color=colors)

        return self.ax
Exemplo n.º 14
0
    def draw(self):
        """
        Draws the precision-recall curves computed in score on the axes.
        """
        # set the colors
        self._colors = resolve_colors(n_colors=len(self.classes_),
                                      colormap=self.cmap,
                                      colors=self.colors)

        if self.iso_f1_curves:
            for f1 in self.iso_f1_values:
                x = np.linspace(0.01, 1)
                y = f1 * x / (2 * x - f1)
                self.ax.plot(x[y >= 0], y[y >= 0], color="#333333", alpha=0.2)
                self.ax.annotate("$f_1={:0.1f}$".format(f1),
                                 xy=(0.9, y[45] + 0.02))

        if self.target_type_ == BINARY:
            return self._draw_binary()
        return self._draw_multiclass()
Exemplo n.º 15
0
    def draw(self, X, y=None, **kwargs):
        """
        Called from the fit method, this method creates a decision boundary
        plot, and if self.scatter is True, it will scatter plot that draws
        each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # ensure that if someone is passing in another X such as X_test, that
        # features will be properly handled
        X = self._select_feature_columns(X)

        color_cycle = iter(
            resolve_colors(colors=self.colors, n_colors=len(self.classes_)))
        colors = OrderedDict([(c, next(color_cycle))
                              for c in self.classes_.keys()])

        self.ax.pcolormesh(
            self.xx,
            self.yy,
            self.Z_shape,
            alpha=self.pcolormesh_alpha,
            cmap=ListedColormap(colors.values()))

        # Create a data structure to hold the scatter plot representations
        to_plot = OrderedDict()
        for index in self.classes_.values():
            to_plot[index] = [[], []]

        # Add each row of the data set to to_plot for plotting
        for i, row in enumerate(X):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            x_, y_ = row_[0], row_[1]
            # look up the y class name if given in init
            if self.class_labels is not None:
                target = self.class_labels[y[i]]
            else:
                target = y[i]
            index = self.classes_[target]
            to_plot[index][0].append(x_)
            to_plot[index][1].append(y_)

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function

        if self.show_scatter:
            for kls, index in self.classes_.items():
                self.ax.scatter(
                    to_plot[index][0],
                    to_plot[index][1],
                    marker=next(self.markers),
                    color=colors[kls],
                    alpha=self.scatter_alpha,
                    s=30,
                    edgecolors='black',
                    label=str(kls),
                    **kwargs)
        else:
            labels = [
                Patch(color=colors[kls], label=kls)
                for kls in self.classes_.keys()
            ]
            self.ax.legend(handles=labels)
Exemplo n.º 16
0
    def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the radviz canvas and
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.values

        # Clean out nans and warn that the user they aren't plotted
        nan_warnings.warn_if_nans_exist(X)
        X, y = nan_warnings.filter_missing(X, y)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Set the axes limits
        self.ax.set_xlim([-1,1])
        self.ax.set_ylim([-1,1])

        # Create the colors
        # TODO: Allow both colormap, listed colors, and palette definition
        # TODO: Make this an independent function or property for override!
        color_values = resolve_colors(
            n_colors=len(self.classes_), colormap=self.colormap, colors=self.color
        )
        self._colors = dict(zip(self.classes_, color_values))

        # Create a data structure to hold scatter plot representations
        to_plot = {}
        for kls in self.classes_:
            to_plot[kls] = [[], []]

        # Compute the arcs around the circumference for each feature axis
        # TODO: make this an independent function for override
        s = np.array([
                (np.cos(t), np.sin(t))
                for t in [
                    2.0 * np.pi * (i / float(ncols))
                    for i in range(ncols)
                ]
            ])

        # Compute the locations of the scatter plot for each class
        # Normalize the data first to plot along the 0, 1 axis
        for i, row in enumerate(self.normalize(X)):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            xy   = (s * row_).sum(axis=0) / row.sum()
            kls = self.classes_[y[i]]

            to_plot[kls][0].append(xy[0])
            to_plot[kls][1].append(xy[1])

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for i, kls in enumerate(self.classes_):
            self.ax.scatter(
                to_plot[kls][0], to_plot[kls][1], color=self._colors[kls],
                label=str(kls), alpha=self.alpha, **kwargs
            )

        # Add the circular axis path
        # TODO: Make this a seperate function (along with labeling)
        self.ax.add_patch(patches.Circle(
            (0.0, 0.0), radius=1.0, facecolor='none', edgecolor='grey', linewidth=.5
        ))

        # Add the feature names
        for xy, name in zip(s, self.features_):
            # Add the patch indicating the location of the axis
            self.ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='#777777'))

            # Add the feature names offset around the axis marker
            if xy[0] < 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small')
            elif xy[0] < 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small')
            elif xy[0] >= 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small')
            elif xy[0] >= 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small')

        self.ax.axis('equal')
Exemplo n.º 17
0
    def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the radviz canvas and
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.values

        # Clean out nans and warn that the user they aren't plotted
        nan_warnings.warn_if_nans_exist(X)
        X, y = nan_warnings.filter_missing(X, y)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Set the axes limits
        self.ax.set_xlim([-1, 1])
        self.ax.set_ylim([-1, 1])

        # Create the colors
        # TODO: Allow both colormap, listed colors, and palette definition
        # TODO: Make this an independent function or property for override!
        color_values = resolve_colors(n_colors=len(self.classes_),
                                      colormap=self.colormap,
                                      colors=self.color)
        self._colors = dict(zip(self.classes_, color_values))

        # Create a data structure to hold scatter plot representations
        to_plot = {}
        for kls in self.classes_:
            to_plot[kls] = [[], []]

        # Compute the arcs around the circumference for each feature axis
        # TODO: make this an independent function for override
        s = np.array([
            (np.cos(t), np.sin(t))
            for t in [2.0 * np.pi * (i / float(ncols)) for i in range(ncols)]
        ])

        # Compute the locations of the scatter plot for each class
        # Normalize the data first to plot along the 0, 1 axis
        for i, row in enumerate(self.normalize(X)):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            xy = (s * row_).sum(axis=0) / row.sum()
            kls = self.classes_[y[i]]

            to_plot[kls][0].append(xy[0])
            to_plot[kls][1].append(xy[1])

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for i, kls in enumerate(self.classes_):
            self.ax.scatter(to_plot[kls][0],
                            to_plot[kls][1],
                            color=self._colors[kls],
                            label=str(kls),
                            alpha=self.alpha,
                            **kwargs)

        # Add the circular axis path
        # TODO: Make this a seperate function (along with labeling)
        self.ax.add_patch(
            patches.Circle((0.0, 0.0),
                           radius=1.0,
                           facecolor='none',
                           edgecolor='grey',
                           linewidth=.5))

        # Add the feature names
        for xy, name in zip(s, self.features_):
            # Add the patch indicating the location of the axis
            self.ax.add_patch(
                patches.Circle(xy, radius=0.025, facecolor='#777777'))

            # Add the feature names offset around the axis marker
            if xy[0] < 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] - 0.025,
                             xy[1] - 0.025,
                             name,
                             ha='right',
                             va='top',
                             size='small')
            elif xy[0] < 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] - 0.025,
                             xy[1] + 0.025,
                             name,
                             ha='right',
                             va='bottom',
                             size='small')
            elif xy[0] >= 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] + 0.025,
                             xy[1] - 0.025,
                             name,
                             ha='left',
                             va='top',
                             size='small')
            elif xy[0] >= 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] + 0.025,
                             xy[1] + 0.025,
                             name,
                             ha='left',
                             va='bottom',
                             size='small')

        self.ax.axis('equal')
Exemplo n.º 18
0
    def draw(self, X, y=None, **kwargs):
        """
        Called from the fit method, this method creates a decision boundary
        plot, and if self.scatter is True, it will scatter plot that draws
        each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # ensure that if someone is passing in another X such as X_test, that
        # features will be properly handled
        X = self._select_feature_columns(X)

        color_cycle = iter(
            resolve_colors(color=self.colors, num_colors=len(self.classes_)))
        colors = OrderedDict([(c, next(color_cycle))
                              for c in self.classes_.keys()])

        self.ax.pcolormesh(self.xx,
                           self.yy,
                           self.Z_shape,
                           alpha=self.pcolormesh_alpha,
                           cmap=ListedColormap(colors.values()))

        # Create a data structure to hold the scatter plot representations
        to_plot = OrderedDict()
        for index in self.classes_.values():
            to_plot[index] = [[], []]

        # Add each row of the data set to to_plot for plotting
        for i, row in enumerate(X):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            x_, y_ = row_[0], row_[1]
            # look up the y class name if given in init
            if self.class_labels is not None:
                target = self.class_labels[y[i]]
            else:
                target = y[i]
            index = self.classes_[target]
            to_plot[index][0].append(x_)
            to_plot[index][1].append(y_)

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function

        if self.show_scatter:
            for kls, index in self.classes_.items():
                self.ax.scatter(to_plot[index][0],
                                to_plot[index][1],
                                marker=next(self.markers),
                                color=colors[kls],
                                alpha=self.scatter_alpha,
                                s=30,
                                edgecolors='black',
                                label=str(kls),
                                **kwargs)
        else:
            labels = [
                Patch(color=colors[kls], label=kls)
                for kls in self.classes_.keys()
            ]
            self.ax.legend(handles=labels)
Exemplo n.º 19
0
    def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the parallel
        coordinates canvas and draws each instance and vertical lines on it.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.as_matrix()

        # Choose a subset of samples
        # TODO: allow selection of a random subset of samples instead of head

        if isinstance(self.sample, int):
            self.n_samples = min([self.sample, len(X)])
        elif isinstance(self.sample, float):
            self.n_samples = int(len(X) * self.sample)
        X = X[:self.n_samples, :]

        # Normalize
        if self.normalize is not None:
            X = self.normalizers[self.normalize].fit_transform(X)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Create the xticks for each column
        # TODO: Allow the user to specify this feature
        x = list(range(ncols))

        # Create the colors
        # TODO: Allow both colormap, listed colors, and palette definition
        # TODO: Make this an independent function or property for override!
        color_values = resolve_colors(
            n_colors=len(self.classes_), colormap=self.colormap, colors=self.color
        )
        colors = dict(zip(self.classes_, color_values))

        # Track which labels are already in the legend
        used_legends = set([])

        # TODO: Make this function compatible with DataFrames!
        # TODO: Make an independent function to allow addition of instances!
        for idx, row in enumerate(X):
            # TODO: How to map classmap to labels?
            label = y[idx] # Get the label for the row
            label = self.classes_[label]

            if label not in used_legends:
                used_legends.add(label)
                self.ax.plot(x, row, color=colors[label], alpha=0.25, label=label, **kwargs)
            else:
                self.ax.plot(x, row, color=colors[label], alpha=0.25, **kwargs)

        # Add the vertical lines
        # TODO: Make an independent function for override!
        if self.show_vlines:
            for idx in x:
                self.ax.axvline(idx, **self.vlines_kwds)

        # Set the limits
        self.ax.set_xticks(x)
        self.ax.set_xticklabels(self.features_)
        self.ax.set_xlim(x[0], x[-1])
Exemplo n.º 20
0
    def fit(self, X, y=None, **kwargs):
        """
        The fit method is the primary drawing input for the
        visualization since it has both the X and y data required for the
        viz and the transform method does not.

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        kwargs : dict
            Pass generic arguments to the drawing method

        Returns
        -------
        self : instance
            Returns the instance of the transformer/visualizer
        """

        # Convert from pandas data types
        if is_dataframe(X):
            # Get column names before reverting to an np.ndarray
            if self.features_ is None:
                self.features_ = np.array(X.columns)

            X = X.values
        if is_series(y):
            y = y.values

        # Assign integer labels to the feature columns from the input
        if self.features_ is None:
            self.features_ = np.arange(0, X.shape[1])

        # Ensure that all classes are represented in the color mapping (before sample)
        # NOTE: np.unique also specifies the ordering of the classes
        if self.classes_ is None:
            self.classes_ = [str(label) for label in np.unique(y)]

        # Create the color mapping for each class
        # TODO: Allow both colormap, listed colors, and palette definition
        # TODO: Make this an independent function or property for override!
        color_values = resolve_colors(n_colors=len(self.classes_),
                                      colormap=self.colormap,
                                      colors=self.color)
        self._colors = dict(zip(self.classes_, color_values))

        # Ticks for each feature specified
        self._increments = np.arange(len(self.features_))

        # Subsample instances
        X, y = self._subsample(X, y)

        # Normalize instances
        if self.normalize is not None:
            X = self.NORMALIZERS[self.normalize].fit_transform(X)

        # the super method calls draw and returns self
        return super(ParallelCoordinates, self).fit(X, y, **kwargs)