Пример #1
0
    def transform(self, X, y=None, **kwargs):
        """
        Calls the internal `transform` method of the scikit-learn PCA transformer, which
        performs a dimensionality reduction on the input features ``X``. Next calls the
        ``draw`` method of the Yellowbrick visualizer, finally returning a new array of
        transformed features of shape ``(len(X), projection)``.

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features.

        y : ndarray or Series of length n
            An array or series of target or class values.

        Returns
        -------
        Xp : ndarray or DataFrame of shape n x m
            Returns a new array-like object of transformed features of shape
            ``(len(X), projection)``.
        """
        try:
            Xp = self.pca_transformer.transform(X)
            self.draw(Xp, y)
            return Xp
        except NotFittedError:
            raise NotFitted.from_estimator(self, "transform")
Пример #2
0
 def test_not_fitted_from_estimator(self, method):
     """
     Ensure not fitted can be raised directly from an estimator
     """
     msg = "instance is not fitted yet, please call fit"
     with pytest.raises(NotFitted, match=msg):
         raise NotFitted.from_estimator(self, method)
Пример #3
0
    def score(self, X, y):
        """
        Generates the Precision-Recall curve on the specified test data.

        Returns
        -------
        score_ : float
            Average precision, a summary of the plot as a weighted mean of
            precision at each threshold, weighted by the increase in recall from
            the previous threshold.

        """
        # If we don't do this check, then it is possible that OneVsRestClassifier
        # has not correctly been fitted for multi-class targets.
        if not hasattr(self, "target_type_"):
            raise NotFitted.from_estimator(self, "score")

        # Must perform label binarization before calling super
        if self.target_type_ == MULTICLASS:
            # Use label_binarize to create multi-label output for OneVsRestClassifier
            y = label_binarize(y, classes=self._target_labels)

        # Call super to check if fitted and to compute classes_
        # Note that self.score_ computed in super will be overridden below
        super(PrecisionRecallCurve, self).score(X, y)

        # Compute the prediction/threshold scores
        y_scores = self._get_y_scores(X)

        # Handle binary and multiclass cases to create correct data structure
        if self.target_type_ == BINARY:
            self.precision_, self.recall_, _ = sk_precision_recall_curve(
                y, y_scores)
            self.score_ = average_precision_score(y, y_scores)
        else:
            self.precision_, self.recall_, self.score_ = {}, {}, {}

            # Compute PRCurve for all classes
            for i, class_i in enumerate(self.classes_):
                self.precision_[class_i], self.recall_[
                    class_i], _ = sk_precision_recall_curve(
                        y[:, i], y_scores[:, i])
                self.score_[class_i] = average_precision_score(
                    y[:, i], y_scores[:, i])

            # Compute micro average PR curve
            self.precision_[MICRO], self.recall_[
                MICRO], _ = sk_precision_recall_curve(y.ravel(),
                                                      y_scores.ravel())
            self.score_[MICRO] = average_precision_score(y,
                                                         y_scores,
                                                         average=MICRO)

        # Draw the figure
        self.draw()

        # Return a score between 0 and 1
        if self.target_type_ == BINARY:
            return self.score_
        return self.score_[MICRO]
Пример #4
0
 def get_target_color_type(self):
     """
     Returns the computed target color type if fitted or specified by the user.
     """
     if self._target_color_type is None:
         raise NotFitted("unknown target color type on unfitted visualizer")
     return self._target_color_type
Пример #5
0
    def draw(self, X, y=None):
        """
        Draws the points described by X and colored by the points in y. Can be
        called multiple times before finalize to add more scatter plots to the
        axes, however ``fit()`` must be called before use.

        Parameters
        ----------
        X : array-like of shape (n, 2)
            The matrix produced by the ``transform()`` method.

        y : array-like of shape (n,), optional
            The target, used to specify the colors of the points.

        Returns
        -------
        self.ax : matplotlib Axes object
            Returns the axes that the scatter plot was drawn on.
        """
        scatter_kwargs = {"alpha": self.alpha}

        # Determine the colors
        if self._target_color_type == SINGLE:
            scatter_kwargs["c"] = "b"

        elif self._target_color_type == DISCRETE:
            if y is None:
                raise YellowbrickValueError(
                    "y is required for discrete target")

            scatter_kwargs["c"] = [
                self._colors[np.searchsorted(self.classes_, (yi))] for yi in y
            ]

        elif self._target_color_type == CONTINUOUS:
            if y is None:
                raise YellowbrickValueError(
                    "y is required for continuous target")

            # TODO manually make colorbar so we can draw it in finalize
            scatter_kwargs["c"] = y
            scatter_kwargs["cmap"] = self.colors or palettes.DEFAULT_SEQUENCE

        else:
            # Technically this should never be raised
            raise NotFitted("could not determine target color type")

        # Draw the scatter plot with the associated colors and alpha
        self._scatter = self.ax.scatter(X[:, 0], X[:, 1], **scatter_kwargs)
        return self.ax
Пример #6
0
    def score(self, X, y):
        """
        The score function is the hook for visual interaction. Pass in test
        data and the visualizer will create predictions on the data and
        evaluate them with respect to the test values. The evaluation will
        then be passed to draw() and the result of the estimator score will
        be returned.

        Parameters
        ----------
        X : array-like
            X (also X_test) are the dependent variables of test set to predict

        y : array-like
            y (also y_test) is the independent actual variables to score against

        Returns
        -------
        score : float
            Returns the score of the underlying model, usually accuracy for
            classification models. Refer to the specific model for more details.
        """
        # If the estimator has been passed in fitted but the visualizer was not fit
        # then we can retrieve the classes from the estimator, unfortunately we cannot
        # retrieve the class counts so we simply set them to None and warn the user.
        # NOTE: cannot test if hasattr(self, "classes_") because it will be proxied.
        if not hasattr(self, "class_counts_"):
            if not hasattr(self.estimator, "classes_"):
                raise NotFitted(
                    (
                        "could not determine required property classes_; "
                        "the visualizer must either be fit or instantiated with a "
                        "fitted classifier before calling score()"
                    )
                )

            self.class_counts_ = None
            self.classes_ = self._decode_labels(self.estimator.classes_)
            warnings.warn(
                "could not determine class_counts_ from previously fitted classifier",
                YellowbrickWarning,
            )

        # This method implements ScoreVisualizer (do not call super).
        self.score_ = self.estimator.score(X, y)
        return self.score_
Пример #7
0
    def _determine_scatter_kwargs(self, y=None):
        """
        Determines scatter argumnets to pass into ``plt.scatter()``. If y is
        discrete or single then determine colors. If continuous then determine
        colors and colormap.Also normalize to range

        Parameters
        ----------
        y : array-like of shape (n,), optional
            The target, used to specify the colors of the points for continuous
            target.
        """

        scatter_kwargs = {"alpha": self.alpha}
        # Determine the colors
        if self._target_color_type == TargetType.SINGLE:
            scatter_kwargs["c"] = self._colors

        elif self._target_color_type == TargetType.DISCRETE:
            if y is None:
                raise YellowbrickValueError(
                    "y is required for discrete target")

            try:
                scatter_kwargs["c"] = [
                    self._colors[self.classes_[yi]] for yi in y
                ]
            except IndexError:
                raise YellowbrickValueError(
                    "Target needs to be label encoded.")

        elif self._target_color_type == TargetType.CONTINUOUS:
            if y is None:
                raise YellowbrickValueError(
                    "y is required for continuous target")

            scatter_kwargs["c"] = y
            scatter_kwargs["cmap"] = self._colors
            self._norm = mpl.colors.Normalize(vmin=self.range_[0],
                                              vmax=self.range_[1])

        else:
            # Technically this should never be raised
            raise NotFitted("could not determine target color type")
        return scatter_kwargs
Пример #8
0
    def get_colors(self, y):
        """
        Returns the color for the specified value(s) of y based on the learned
        colors property for any specified target type.

        Parameters
        ----------
        y : array-like
            The values of y to get the associated colors for.

        Returns
        -------
        colors : list
            Returns a list of colors for each value in y.
        """
        if self._colors is None:
            raise NotFitted("cannot determine colors on unfitted visualizer")

        if self._target_color_type == TargetType.SINGLE:
            return [self._colors] * len(y)

        if self._target_color_type == TargetType.DISCRETE:
            try:
                # Use the label encoder to get the class name (or use the value
                # if the label is not mapped in the encoder) then use the class
                # name to get the color from the color map.
                return [
                    self._colors[self._label_encoder.get(yi, yi)] for yi in y
                ]
            except KeyError:
                unknown = set(y) - set(self._label_encoder.keys())
                unknown = ", ".join(["'{}'".format(uk) for uk in unknown])
                raise YellowbrickKeyError(
                    "could not determine color for classes {}".format(unknown))

        if self._target_color_type == TargetType.CONTINUOUS:
            # Normalize values into target range and compute colors from colormap
            norm = Normalize(*self.range_)
            return self._colors(norm(y))

        # This is a developer error, we should never get here!
        raise YellowbrickValueError("unknown target color type '{}'".format(
            self._target_color_type))
Пример #9
0
    def draw(self, **kwargs):
        """
        Draws the feature importances as a bar chart; called from fit.
        """
        # Quick validation
        for param in ('feature_importances_', 'features_'):
            if not hasattr(self, param):
                raise NotFitted("missing required param '{}'".format(param))

        # Find the positions for each bar
        pos = np.arange(self.features_.shape[0]) + 0.5

        # Plot the bar chart
        self.ax.barh(pos, self.feature_importances_, align='center')

        # Set the labels for the bars
        self.ax.set_yticks(pos)
        self.ax.set_yticklabels(self.features_)

        return self.ax
Пример #10
0
    def class_colors_(self):
        """
        Returns ``_colors`` if it exists, otherwise computes a categorical color
        per class based on the matplotlib color cycle. If the visualizer is not
        fitted, raises a NotFitted exception.

        If subclasses require users to choose colors or have specialized color
        handling, they should set ``_colors`` on init or during fit.

        Notes
        -----
        Because this is a property, this docstring is for developers only.
        """
        if not hasattr(self, "_colors"):
            if not hasattr(self, "classes_"):
                raise NotFitted("cannot determine colors before fit")

            # TODO: replace with resolve_colors
            self._colors = color_palette(None, len(self.classes_))
        return self._colors
Пример #11
0
    def draw(self, **kwargs):
        """
        Draws the feature importances as a bar chart; called from fit.
        """
        # Quick validation
        for param in ("feature_importances_", "features_"):
            if not hasattr(self, param):
                raise NotFitted("missing required param '{}'".format(param))

        # Find the positions for each bar
        pos = np.arange(self.features_.shape[0]) + 0.5

        # Plot the bar chart
        if self.stack:
            colors = resolve_colors(len(self.classes_), colormap=self.colormap)
            legend_kws = {"bbox_to_anchor": (1.04, 0.5), "loc": "center left"}
            bar_stack(
                self.feature_importances_,
                ax=self.ax,
                labels=list(self.classes_),
                ticks=self.features_,
                orientation="h",
                colors=colors,
                legend_kws=legend_kws,
            )
        else:
            colors = resolve_colors(len(self.features_),
                                    colormap=self.colormap,
                                    colors=self.colors)
            self.ax.barh(pos,
                         self.feature_importances_,
                         color=colors,
                         align="center")

            # Set the labels for the bars
            self.ax.set_yticks(pos)
            self.ax.set_yticklabels(self.features_)

        return self.ax
    def draw(self):
        """
        Renders the class prediction error across the axis.

        Returns
        -------
        ax : Matplotlib Axes
            The axes on which the figure is plotted
        """

        if not hasattr(self, "predictions_") or not hasattr(self, "classes_"):
            raise NotFitted.from_estimator(self, "draw")

        legend_kws = {"bbox_to_anchor": (1.04, 0.5), "loc": "center left"}
        bar_stack(
            self.predictions_,
            self.ax,
            labels=list(self.classes_),
            ticks=self.classes_,
            colors=self.class_colors_,
            legend_kws=legend_kws,
        )
        return self.ax
Пример #13
0
    def transform(self, X, y=None, **kwargs):
        """
        Returns the transformed data points from the manifold embedding.

        Parameters
        ----------
        X : array-like of shape (n, m)
            A matrix or data frame with n instances and m features

        y : array-like of shape (n,), optional
            The target, used to specify the colors of the points.

        Returns
        -------
        Xprime : array-like of shape (n, 2)
            Returns the 2-dimensional embedding of the instances.

        Note
        ----
        This method does not work with MDS, TSNE and SpectralEmbedding because
        it is yet to be implemented in sklearn.
        """
        # Because some manifolds do not have transform we cannot call super
        try:
            Xp = self.manifold.transform(X)
            self.draw(Xp, y)
            return Xp
        except NotFittedError:
            raise NotFitted.from_estimator(self, "transform")
        except AttributeError:
            name = self.manifold.__class__.__name__
            raise ModelError(
                ("{} requires data to be simultaneously fit and transformed, "
                 "use fit_transform instead").format(name))

        return Xp