def get_model_name(model): """ Detects the model name for a Scikit-Learn model or pipeline. Parameters ---------- model: class or instance The object to determine the name for. If the model is an estimator it returns the class name; if it is a Pipeline it returns the class name of the final transformer or estimator in the Pipeline. Returns ------- name : string The name of the model or pipeline. """ if not is_estimator(model): raise YellowbrickTypeError( "Cannot detect the model name for non estimator: '{}'".format( type(model))) else: if isinstance(model, Pipeline): return get_model_name(model.steps[-1][-1]) else: return model.__class__.__name__
def __init__(self, estimator, ax=None, fig=None, classes=None, encoder=None, is_fitted="auto", force_model=False, **kwargs): # A bit of type checking if not force_model and not isclassifier(estimator): raise YellowbrickTypeError( "This estimator is not a classifier; " "try a regression or clustering score visualizer instead!") # Initialize the super method. super(ClassificationScoreVisualizer, self).__init__(estimator, ax=ax, fig=fig, is_fitted=is_fitted, **kwargs) self.classes = classes self.encoder = encoder self.force_model = force_model
def __init__(self, model, ax=None, alphas=None, cv=None, scoring=None, **kwargs): # Check to make sure this is not a "RegressorCV" name = model.__class__.__name__ if name.endswith("CV"): raise YellowbrickTypeError( ("'{}' is a CV regularization model;" " try AlphaSelection instead.").format(name)) # Call super to initialize the class super(AlphaSelection, self).__init__(model, ax=ax, **kwargs) # Set manual alpha selection parameters if alphas is not None: self.alphas = alphas else: self.alphas = np.logspace(-10, -2, 200) self.errors = None self.score_method = partial(cross_val_score, cv=cv, scoring=scoring)
def __init__(self, estimator, ax=None, fig=None, force_model=False, **kwargs): if not force_model and not isclusterer(estimator): raise YellowbrickTypeError( "The supplied model is not a clustering estimator; try a " "classifier or regression score visualizer instead!" ) self.force_model = force_model super(ClusteringScoreVisualizer, self).__init__( estimator, ax=ax, fig=fig, **kwargs )
def _find_importances_param(self): """ Searches the wrapped model for the feature importances parameter. """ for attr in ("feature_importances_", "coef_"): try: return getattr(self.estimator, attr) except AttributeError: continue raise YellowbrickTypeError( "could not find feature importances param on {}".format( self.estimator.__class__.__name__))
def _find_classes_param(self): """ Searches the wrapped model for the classes_ parameter. """ for attr in ["classes_"]: try: return getattr(self.estimator, attr) except AttributeError: continue raise YellowbrickTypeError( "could not find classes_ param on {}".format( self.estimator.__class__.__name__))
def get_model_name(model): """ Detects the model name for a Scikit-Learn model or pipeline """ if not is_estimator(model): raise YellowbrickTypeError( "Cannot detect the model name for non estimator: '{}'".format( type(model))) else: if isinstance(model, Pipeline): return model.steps[-1][-1].__class__.__name__ else: return model.__class__.__name__
def __init__(self, model, ax=None, is_fitted="auto", **kwargs): # Check to make sure this is a "RegressorCV" name = model.__class__.__name__ if not name.endswith("CV"): raise YellowbrickTypeError( ("'{}' is not a CV regularization model;" " try ManualAlphaSelection instead.").format(name)) # Set the store_cv_values parameter on RidgeCV if "store_cv_values" in model.get_params().keys(): model.set_params(store_cv_values=True) # Call super to initialize the class super(AlphaSelection, self).__init__(model, ax=ax, **kwargs)
def __init__( self, estimator, ax=None, n_trials=50, cv=0.1, fbeta=1.0, argmax="fscore", exclude=None, quantiles=QUANTILES_MEDIAN_80, random_state=None, is_fitted="auto", force_model=False, **kwargs ): # Perform some quick type checking to help users avoid error. if not force_model and ( not is_classifier(estimator) or not is_probabilistic(estimator) ): raise YellowbrickTypeError( "{} requires a probabilistic binary classifier".format( self.__class__.__name__ ) ) # Check the various inputs self._check_quantiles(quantiles) self._check_cv(cv) self._check_exclude(exclude) # Initialize the ModelVisualizer super(DiscriminationThreshold, self).__init__( estimator, ax=ax, is_fitted=is_fitted, **kwargs ) # Set params self.n_trials = n_trials self.cv = cv self.fbeta = fbeta self.argmax = argmax self.exclude = exclude self.quantiles = quantiles self.random_state = random_state
def __init__(self, ax=None, features=None, classes=None, normalize=None, sample=1.0, color=None, colormap=None, vlines=True, vlines_kwds=None, **kwargs): super(ParallelCoordinates, self).__init__(ax, features, classes, color, colormap, **kwargs) # Validate 'normalize' argument if normalize in self.normalizers or normalize is None: self.normalize = normalize else: raise YellowbrickValueError( "'{}' is an unrecognized normalization method".format( normalize)) # Validate 'sample' argument if isinstance(sample, int): if sample < 1: raise YellowbrickValueError( "`sample` parameter of type `int` must be greater than 1") elif isinstance(sample, float): if sample <= 0 or sample > 1: raise YellowbrickValueError( "`sample` parameter of type `float` must be between 0 and 1" ) else: raise YellowbrickTypeError( "`sample` parameter must be int or float") self.sample = sample # Visual Parameters self.show_vlines = vlines self.vlines_kwds = vlines_kwds or {'linewidth': 1, 'color': 'black'}
def __init__(self, model, n_trials=50, test_size_percent=0.1, quantiles=(0.1, 0.5, 0.9), random_state=None, **kwargs): # Check to see if model is an instance of a classifier. # Should return an error if it isn't. if not isclassifier(model): raise YellowbrickTypeError( "This estimator is not a classifier; try a regression or clustering score visualizer instead!" ) super(ThresholdVisualizer, self).__init__(model, **kwargs) self.estimator = model self.n_trials = n_trials self.test_size_percent = test_size_percent self.quantiles = quantiles self.random_state = random_state # to be set later self.plot_data = None
def __init__(self, model, ax=None, n_trials=50, cv=0.1, fbeta=1.0, argmax='fscore', exclude=None, quantiles=QUANTILES_MEDIAN_80, random_state=None, **kwargs): # Perform some quick type checking to help users avoid error. if not is_classifier(model) or not is_probabilistic(model): raise YellowbrickTypeError( "{} requires a probabilistic binary classifier".format( self.__class__.__name__)) # Check the various inputs self._check_quantiles(quantiles) self._check_cv(cv) self._check_exclude(exclude) # Initialize the ModelVisualizer super(DiscriminationThreshold, self).__init__(model, ax=ax, **kwargs) # Set params self.set_params( n_trials=n_trials, cv=cv, fbeta=fbeta, argmax=argmax, exclude=exclude, quantiles=quantiles, random_state=random_state, )
def fit(self, X, y=None, **kwargs): """ The fit method is the primary drawing input for the decision boundaries visualization since it has both the X and y data required for the viz and the transform method does not. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Pass generic arguments to the drawing method Returns ------- self : instance Returns the instance of the visualizer """ X = self._select_feature_columns(X) # Assign each class a unique number for drawing if self.classes_ is None: self.classes_ = { label: str(kls_num) for kls_num, label in enumerate(np.unique(y)) } self.class_labels = None elif len(set(y)) == len(self.classes_): self.classes_ = { label: str(kls_num) for kls_num, label in enumerate(self.classes_) } self.class_labels = dict(zip(set(y), self.classes_)) else: raise YellowbrickTypeError( """Number of classes must be the same length of number of target y""") # ensure that only self.estimator.fit(X, y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - (X[:, 0].min() * .01), X[:, 0].max() + ( X[:, 0].max() * .01) y_min, y_max = X[:, 1].min() - (X[:, 1].min() * .01), X[:, 1].max() + ( X[:, 1].max() * .01) self.ax.set_xlim([x_min, x_max]) self.ax.set_ylim([y_min, y_max]) # set the step increment for drawing the boundary graph x_step = (x_max - x_min) * self.step_size y_step = (y_max - y_min) * self.step_size self.xx, self.yy = np.meshgrid(np.arange(x_min, x_max, x_step), np.arange(y_min, y_max, y_step)) # raise Exception(self.yy.ravel().shape) Z = self.estimator.predict(np.c_[self.xx.ravel(), self.yy.ravel()]) self.Z_shape = Z.reshape(self.xx.shape) return self
def __init__(self, ax=None, features=None, classes=None, normalize=None, sample=1.0, random_state=None, shuffle=False, colors=None, colormap=None, alpha=None, fast=False, vlines=True, vlines_kwds=None, **kwargs): if "target_type" not in kwargs: kwargs["target_type"] = "discrete" super(ParallelCoordinates, self).__init__(ax=ax, features=features, classes=classes, colors=colors, colormap=colormap, **kwargs) # Validate 'normalize' argument if normalize in self.NORMALIZERS or normalize is None: self.normalize = normalize else: raise YellowbrickValueError( "'{}' is an unrecognized normalization method".format( normalize)) # Validate 'sample' argument if isinstance(sample, int): if sample < 1: raise YellowbrickValueError( "`sample` parameter of type `int` must be greater than 1") elif isinstance(sample, float): if sample <= 0 or sample > 1: raise YellowbrickValueError( "`sample` parameter of type `float` must be between 0 and 1" ) else: raise YellowbrickTypeError( "`sample` parameter must be int or float") self.sample = sample # Set sample parameters if isinstance(shuffle, bool): self.shuffle = shuffle else: raise YellowbrickTypeError("`shuffle` parameter must be boolean") if self.shuffle: if (random_state is None) or isinstance(random_state, int): self._rng = RandomState(random_state) elif isinstance(random_state, RandomState): self._rng = random_state else: raise YellowbrickTypeError( "`random_state` must be None, int, or np.random.RandomState" ) else: self._rng = None # Visual and drawing parameters self.fast = fast self.alpha = alpha self.show_vlines = vlines self.vlines_kwds = vlines_kwds or {"linewidth": 1, "color": "black"} # Internal properties self._increments = None self._colors = None
def ax(self, ax): raise YellowbrickTypeError( "cannot set new axes objects on multiple visualizers")