Exemplo n.º 1
0
    def draw(self, **kwargs):
        """
        Renders the training and test curves.
        """
        # Specify the curves to draw and their labels
        labels = ("Training Score", "Cross Validation Score")
        curves = (
            (self.train_scores_mean_, self.train_scores_std_),
            (self.test_scores_mean_, self.test_scores_std_),
        )

        # Get the colors for the train and test curves
        colors = resolve_colors(n_colors=2)

        # Plot the fill betweens first so they are behind the curves.
        for idx, (mean, std) in enumerate(curves):
            # Plot one standard deviation above and below the mean
            self.ax.fill_between(self.param_range,
                                 mean - std,
                                 mean + std,
                                 alpha=0.25,
                                 color=colors[idx])

        # Plot the mean curves so they are in front of the variance fill
        for idx, (mean, _) in enumerate(curves):
            self.ax.plot(self.param_range,
                         mean,
                         "d-",
                         color=colors[idx],
                         label=labels[idx])

        if self.logx:
            self.ax.set_xscale("log")

        return self.ax
    def draw(self, **kwargs):
        """
        Renders the training and test learning curves.
        """
        # Specify the curves to draw and their labels
        labels = ("Training Score", "Cross Validation Score")
        curves = (
            (self.train_scores_mean_, self.train_scores_std_),
            (self.test_scores_mean_, self.test_scores_std_),
        )

        # Get the colors for the train and test curves
        colors = resolve_colors(n_colors=2)

        # Plot the fill betweens first so they are behind the curves.
        for idx, (mean, std) in enumerate(curves):
            # Plot one standard deviation above and below the mean
            self.ax.fill_between(
                self.train_sizes_, mean - std, mean+std, alpha=0.25,
                color=colors[idx],
            )

        # Plot the mean curves so they are in front of the variance fill
        for idx, (mean, _) in enumerate(curves):
            self.ax.plot(
                self.train_sizes_, mean, 'o-', color=colors[idx],
                label=labels[idx],
            )

        return self.ax
Exemplo n.º 3
0
    def draw(self, x, y, c):
        colors = resolve_colors(self.n_blobs)

        for i in np.arange(self.n_blobs):
            mask = c == i
            label = "c{}".format(i)
            self.ax.scatter(x[mask], y[mask], label=label, c=colors[i])

        return self.ax
Exemplo n.º 4
0
    def fit(self, X, y=None):
        """
        Fits the manifold on X and transforms the data to plot it on the axes.
        The optional y specified can be used to declare discrete colors. If
        the target is set to 'auto', this method also determines the target
        type, and therefore what colors will be used.

        Note also that fit records the amount of time it takes to fit the
        manifold and reports that information in the visualization.

        Parameters
        ----------
        X : array-like of shape (n, m)
            A matrix or data frame with n instances and m features where m > 2.

        y : array-like of shape (n,), optional
            A vector or series with target values for each instance in X. This
            vector is used to determine the color of the points in X.

        Returns
        -------
        self : Manifold
            Returns the visualizer object.
        """
        # Determine target type
        self._determine_target_color_type(y)

        # Compute classes and colors if target type is discrete
        if self._target_color_type == DISCRETE:
            self.classes_ = np.unique(y)

            color_kwargs = {'n_colors': len(self.classes_)}

            if isinstance(self.colors, string_types):
                color_kwargs['colormap'] = self.colors
            else:
                color_kwargs['colors'] = self.colors

            self._colors = resolve_colors(**color_kwargs)

        # Compute target range if colors are continuous
        elif self._target_color_type == CONTINUOUS:
            y = np.asarray(y)
            self.range_ = (y.min(), y.max())

        start = time.time()
        Xp = self.manifold.fit_transform(X)
        self.fit_time_ = time.time() - start

        self.draw(Xp, y)
        return self
Exemplo n.º 5
0
    def fit_transform(self, X, y=None):
        """
        Fits the manifold on X and transforms the data to plot it on the axes.
        The optional y specified can be used to declare discrete colors. If
        the target is set to 'auto', this method also determines the target
        type, and therefore what colors will be used.

        Note also that fit records the amount of time it takes to fit the
        manifold and reports that information in the visualization.

        Parameters
        ----------
        X : array-like of shape (n, m)
            A matrix or data frame with n instances and m features where m > 2.

        y : array-like of shape (n,), optional
            A vector or series with target values for each instance in X. This
            vector is used to determine the color of the points in X.

        Returns
        -------
        self : Manifold
            Returns the visualizer object.
        """
        # Determine target type
        self._determine_target_color_type(y)

        # Compute classes and colors if target type is discrete
        if self._target_color_type == DISCRETE:
            self.classes_ = np.unique(y)

            color_kwargs = {'n_colors': len(self.classes_)}

            if isinstance(self.colors, string_types):
                color_kwargs['colormap'] = self.colors
            else:
                color_kwargs['colors'] = self.colors

            self._colors = resolve_colors(**color_kwargs)

        # Compute target range if colors are continuous
        elif self._target_color_type == CONTINUOUS:
            y = np.asarray(y)
            self.range_ = (y.min(), y.max())

        with Timer() as self.fit_time_:
            Xp = self.manifold.fit_transform(X)

        self.draw(Xp, y)
        return Xp
Exemplo n.º 6
0
    def draw(self, labels):
        """
        Draw the silhouettes for each sample and the average score.

        Parameters
        ----------

        labels : array-like
            An array with the cluster label for each silhouette sample,
            usually computed with ``predict()``. Labels are not stored on the
            visualizer so that the figure can be redrawn with new data.
        """

        # Track the positions of the lines being drawn
        y_lower = 10  # The bottom of the silhouette

        # Get the colors from the various properties
        color_kwargs = {"n_colors": self.n_clusters_}

        if self.colors is None:
            color_kwargs["colormap"] = "Set1"
        elif isinstance(self.colors, str):
            color_kwargs["colormap"] = self.colors
        else:
            color_kwargs["colors"] = self.colors

        colors = resolve_colors(**color_kwargs)

        # For each cluster, plot the silhouette scores
        self.y_tick_pos_ = []
        for idx in range(self.n_clusters_):

            # Collect silhouette scores for samples in the current cluster .
            values = self.silhouette_samples_[labels == idx]
            values.sort()

            # Compute the size of the cluster and find upper limit
            size = values.shape[0]
            y_upper = y_lower + size

            color = colors[idx]
            self.ax.fill_betweenx(
                np.arange(y_lower, y_upper),
                0,
                values,
                facecolor=color,
                edgecolor=color,
                alpha=0.5,
            )

            # Collect the tick position for each cluster
            self.y_tick_pos_.append(y_lower + 0.5 * size)

            # Compute the new y_lower for next plot
            y_lower = y_upper + 10

        # The vertical line for average silhouette score of all the values
        self.ax.axvline(
            x=self.silhouette_score_,
            color="red",
            linestyle="--",
            label="Average Silhouette Score",
        )

        return self.ax
Exemplo n.º 7
0
    def fit(self, X, y=None):
        """
        Fits the visualizer to the training data set by determining the
        target type, colors, classes, and range of the data to ensure that
        the visualizer can accurately portray the instances in data space.

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        Returns
        -------
        self : DataVisualizer
            Returns the instance of the transformer/visualizer
        """
        # Compute the features from the data
        super(DataVisualizer, self).fit(X, y)

        # Determine the target color type
        self._determine_target_color_type(y)

        # Handle the single target color type
        if self._target_color_type == TargetType.SINGLE:
            # use the user supplied color or the first color in the color cycle
            self._colors = self.colors or "C0"

        # Compute classes and colors if target type is discrete
        elif self._target_color_type == TargetType.DISCRETE:
            # Unique labels are used both for validation and color mapping
            labels = np.unique(y)

            # Handle user supplied classes
            if self.classes is not None:
                self.classes_ = np.asarray([str(c) for c in self.classes])

                # Validate user supplied class labels
                if len(self.classes_) != len(labels):
                    raise YellowbrickValueError(
                        ("number of specified classes does not match "
                         "number of unique values in target"))

            # Get the string labels from the unique values in y
            else:
                self.classes_ = np.asarray([str(c) for c in labels])

            # Create a map of class labels to colors
            color_values = resolve_colors(n_colors=len(self.classes_),
                                          colormap=self.colormap,
                                          colors=self.colors)
            self._colors = dict(zip(self.classes_, color_values))
            self._label_encoder = dict(zip(labels, self.classes_))

        # Compute target range if colors are continuous
        elif self._target_color_type == TargetType.CONTINUOUS:
            y = np.asarray(y)
            self.range_ = (y.min(), y.max())
            if self.colormap is None:
                self.colormap = palettes.DEFAULT_SEQUENCE
            # TODO: allow for Yellowbrick palettes here as well
            self._colors = mpl.cm.get_cmap(self.colormap)

        # If this exception is raised a developer error has occurred because
        # unknown types should have errored when the type was determined.
        else:
            raise YellowbrickValueError(
                "unknown target color type '{}'".format(
                    self._target_color_type))

        # NOTE: cannot call draw in fit to support data transformers
        return self