def draw(self, **kwargs): """ Renders the training and test curves. """ # Specify the curves to draw and their labels labels = ("Training Score", "Cross Validation Score") curves = ( (self.train_scores_mean_, self.train_scores_std_), (self.test_scores_mean_, self.test_scores_std_), ) # Get the colors for the train and test curves colors = resolve_colors(n_colors=2) # Plot the fill betweens first so they are behind the curves. for idx, (mean, std) in enumerate(curves): # Plot one standard deviation above and below the mean self.ax.fill_between(self.param_range, mean - std, mean + std, alpha=0.25, color=colors[idx]) # Plot the mean curves so they are in front of the variance fill for idx, (mean, _) in enumerate(curves): self.ax.plot(self.param_range, mean, "d-", color=colors[idx], label=labels[idx]) if self.logx: self.ax.set_xscale("log") return self.ax
def draw(self, **kwargs): """ Renders the training and test learning curves. """ # Specify the curves to draw and their labels labels = ("Training Score", "Cross Validation Score") curves = ( (self.train_scores_mean_, self.train_scores_std_), (self.test_scores_mean_, self.test_scores_std_), ) # Get the colors for the train and test curves colors = resolve_colors(n_colors=2) # Plot the fill betweens first so they are behind the curves. for idx, (mean, std) in enumerate(curves): # Plot one standard deviation above and below the mean self.ax.fill_between( self.train_sizes_, mean - std, mean+std, alpha=0.25, color=colors[idx], ) # Plot the mean curves so they are in front of the variance fill for idx, (mean, _) in enumerate(curves): self.ax.plot( self.train_sizes_, mean, 'o-', color=colors[idx], label=labels[idx], ) return self.ax
def draw(self, x, y, c): colors = resolve_colors(self.n_blobs) for i in np.arange(self.n_blobs): mask = c == i label = "c{}".format(i) self.ax.scatter(x[mask], y[mask], label=label, c=colors[i]) return self.ax
def fit(self, X, y=None): """ Fits the manifold on X and transforms the data to plot it on the axes. The optional y specified can be used to declare discrete colors. If the target is set to 'auto', this method also determines the target type, and therefore what colors will be used. Note also that fit records the amount of time it takes to fit the manifold and reports that information in the visualization. Parameters ---------- X : array-like of shape (n, m) A matrix or data frame with n instances and m features where m > 2. y : array-like of shape (n,), optional A vector or series with target values for each instance in X. This vector is used to determine the color of the points in X. Returns ------- self : Manifold Returns the visualizer object. """ # Determine target type self._determine_target_color_type(y) # Compute classes and colors if target type is discrete if self._target_color_type == DISCRETE: self.classes_ = np.unique(y) color_kwargs = {'n_colors': len(self.classes_)} if isinstance(self.colors, string_types): color_kwargs['colormap'] = self.colors else: color_kwargs['colors'] = self.colors self._colors = resolve_colors(**color_kwargs) # Compute target range if colors are continuous elif self._target_color_type == CONTINUOUS: y = np.asarray(y) self.range_ = (y.min(), y.max()) start = time.time() Xp = self.manifold.fit_transform(X) self.fit_time_ = time.time() - start self.draw(Xp, y) return self
def fit_transform(self, X, y=None): """ Fits the manifold on X and transforms the data to plot it on the axes. The optional y specified can be used to declare discrete colors. If the target is set to 'auto', this method also determines the target type, and therefore what colors will be used. Note also that fit records the amount of time it takes to fit the manifold and reports that information in the visualization. Parameters ---------- X : array-like of shape (n, m) A matrix or data frame with n instances and m features where m > 2. y : array-like of shape (n,), optional A vector or series with target values for each instance in X. This vector is used to determine the color of the points in X. Returns ------- self : Manifold Returns the visualizer object. """ # Determine target type self._determine_target_color_type(y) # Compute classes and colors if target type is discrete if self._target_color_type == DISCRETE: self.classes_ = np.unique(y) color_kwargs = {'n_colors': len(self.classes_)} if isinstance(self.colors, string_types): color_kwargs['colormap'] = self.colors else: color_kwargs['colors'] = self.colors self._colors = resolve_colors(**color_kwargs) # Compute target range if colors are continuous elif self._target_color_type == CONTINUOUS: y = np.asarray(y) self.range_ = (y.min(), y.max()) with Timer() as self.fit_time_: Xp = self.manifold.fit_transform(X) self.draw(Xp, y) return Xp
def draw(self, labels): """ Draw the silhouettes for each sample and the average score. Parameters ---------- labels : array-like An array with the cluster label for each silhouette sample, usually computed with ``predict()``. Labels are not stored on the visualizer so that the figure can be redrawn with new data. """ # Track the positions of the lines being drawn y_lower = 10 # The bottom of the silhouette # Get the colors from the various properties color_kwargs = {"n_colors": self.n_clusters_} if self.colors is None: color_kwargs["colormap"] = "Set1" elif isinstance(self.colors, str): color_kwargs["colormap"] = self.colors else: color_kwargs["colors"] = self.colors colors = resolve_colors(**color_kwargs) # For each cluster, plot the silhouette scores self.y_tick_pos_ = [] for idx in range(self.n_clusters_): # Collect silhouette scores for samples in the current cluster . values = self.silhouette_samples_[labels == idx] values.sort() # Compute the size of the cluster and find upper limit size = values.shape[0] y_upper = y_lower + size color = colors[idx] self.ax.fill_betweenx( np.arange(y_lower, y_upper), 0, values, facecolor=color, edgecolor=color, alpha=0.5, ) # Collect the tick position for each cluster self.y_tick_pos_.append(y_lower + 0.5 * size) # Compute the new y_lower for next plot y_lower = y_upper + 10 # The vertical line for average silhouette score of all the values self.ax.axvline( x=self.silhouette_score_, color="red", linestyle="--", label="Average Silhouette Score", ) return self.ax
def fit(self, X, y=None): """ Fits the visualizer to the training data set by determining the target type, colors, classes, and range of the data to ensure that the visualizer can accurately portray the instances in data space. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values Returns ------- self : DataVisualizer Returns the instance of the transformer/visualizer """ # Compute the features from the data super(DataVisualizer, self).fit(X, y) # Determine the target color type self._determine_target_color_type(y) # Handle the single target color type if self._target_color_type == TargetType.SINGLE: # use the user supplied color or the first color in the color cycle self._colors = self.colors or "C0" # Compute classes and colors if target type is discrete elif self._target_color_type == TargetType.DISCRETE: # Unique labels are used both for validation and color mapping labels = np.unique(y) # Handle user supplied classes if self.classes is not None: self.classes_ = np.asarray([str(c) for c in self.classes]) # Validate user supplied class labels if len(self.classes_) != len(labels): raise YellowbrickValueError( ("number of specified classes does not match " "number of unique values in target")) # Get the string labels from the unique values in y else: self.classes_ = np.asarray([str(c) for c in labels]) # Create a map of class labels to colors color_values = resolve_colors(n_colors=len(self.classes_), colormap=self.colormap, colors=self.colors) self._colors = dict(zip(self.classes_, color_values)) self._label_encoder = dict(zip(labels, self.classes_)) # Compute target range if colors are continuous elif self._target_color_type == TargetType.CONTINUOUS: y = np.asarray(y) self.range_ = (y.min(), y.max()) if self.colormap is None: self.colormap = palettes.DEFAULT_SEQUENCE # TODO: allow for Yellowbrick palettes here as well self._colors = mpl.cm.get_cmap(self.colormap) # If this exception is raised a developer error has occurred because # unknown types should have errored when the type was determined. else: raise YellowbrickValueError( "unknown target color type '{}'".format( self._target_color_type)) # NOTE: cannot call draw in fit to support data transformers return self