def test_requires_classifier(self): """ Assert requires a classifier """ message = "requires a probabilistic binary classifier" assert not is_classifier(Ridge) with pytest.raises(yb.exceptions.YellowbrickError, match=message): DiscriminationThreshold(Ridge())
def test_requires_classifier(self): """ Assert requires a classifier """ message = "requires a probabilistic binary classifier" assert not is_classifier(Ridge) with pytest.raises(yb.exceptions.YellowbrickError, match=message): DiscriminationThreshold(Ridge())
def test_requires_probabilistic_classifier(self): """ Assert requires probabilistic classifier """ message = "requires a probabilistic binary classifier" assert is_classifier(RadiusNeighborsClassifier) assert not is_probabilistic(RadiusNeighborsClassifier) with pytest.raises(yb.exceptions.YellowbrickError, match=message): DiscriminationThreshold(RadiusNeighborsClassifier())
def test_requires_probabilistic_classifier(self): """ Assert requires probabilistic classifier """ message = "requires a probabilistic binary classifier" assert is_classifier(RadiusNeighborsClassifier) assert not is_probabilistic(RadiusNeighborsClassifier) with pytest.raises(yb.exceptions.YellowbrickError, match=message): DiscriminationThreshold(RadiusNeighborsClassifier())
def test_accepts_decision_function(self): """ Will accept classifiers with decision function """ model = LinearSVC assert is_classifier(model) assert is_probabilistic(model) assert hasattr(model, "decision_function") assert not hasattr(model, "predict_proba") try: DiscriminationThreshold(model()) except YellowbrickTypeError: pytest.fail("did not accept decision function model")
def test_accepts_decision_function(self): """ Will accept classifiers with decision function """ model = LinearSVC assert is_classifier(model) assert is_probabilistic(model) assert hasattr(model, "decision_function") assert not hasattr(model, "predict_proba") try: DiscriminationThreshold(model()) except YellowbrickTypeError: pytest.fail("did not accept decision function model")
def __init__( self, estimator, ax=None, n_trials=50, cv=0.1, fbeta=1.0, argmax="fscore", exclude=None, quantiles=QUANTILES_MEDIAN_80, random_state=None, is_fitted="auto", force_model=False, **kwargs ): # Perform some quick type checking to help users avoid error. if not force_model and ( not is_classifier(estimator) or not is_probabilistic(estimator) ): raise YellowbrickTypeError( "{} requires a probabilistic binary classifier".format( self.__class__.__name__ ) ) # Check the various inputs self._check_quantiles(quantiles) self._check_cv(cv) self._check_exclude(exclude) # Initialize the ModelVisualizer super(DiscriminationThreshold, self).__init__( estimator, ax=ax, is_fitted=is_fitted, **kwargs ) # Set params self.n_trials = n_trials self.cv = cv self.fbeta = fbeta self.argmax = argmax self.exclude = exclude self.quantiles = quantiles self.random_state = random_state
def test_requires_probabilistic_classifier(self): """ Assert requires probabilistic classifier """ message = "requires a probabilistic binary classifier" class RoboClassifier(ClassifierMixin): """ Dummy Non-Probabilistic Classifier """ def fit(self, X, y): self.classes_ = [0, 1] return self assert is_classifier(RoboClassifier) assert not is_probabilistic(RoboClassifier) with pytest.raises(yb.exceptions.YellowbrickError, match=message): DiscriminationThreshold(RoboClassifier())
def __init__(self, model, ax=None, n_trials=50, cv=0.1, fbeta=1.0, argmax='fscore', exclude=None, quantiles=QUANTILES_MEDIAN_80, random_state=None, **kwargs): # Perform some quick type checking to help users avoid error. if not is_classifier(model) or not is_probabilistic(model): raise YellowbrickTypeError( "{} requires a probabilistic binary classifier".format( self.__class__.__name__)) # Check the various inputs self._check_quantiles(quantiles) self._check_cv(cv) self._check_exclude(exclude) # Initialize the ModelVisualizer super(DiscriminationThreshold, self).__init__(model, ax=ax, **kwargs) # Set params self.set_params( n_trials=n_trials, cv=cv, fbeta=fbeta, argmax=argmax, exclude=exclude, quantiles=quantiles, random_state=random_state, )
def __init__(self, model, ax=None, n_trials=50, cv=0.1, fbeta=1.0, argmax='fscore', exclude=None, quantiles=QUANTILES_MEDIAN_80, random_state=None, **kwargs): # Perform some quick type checking to help users avoid error. if not is_classifier(model) or not is_probabilistic(model): raise YellowbrickTypeError( "{} requires a probabilistic binary classifier".format( self.__class__.__name__ )) # Check the various inputs self._check_quantiles(quantiles) self._check_cv(cv) self._check_exclude(exclude) # Initialize the ModelVisualizer super(DiscriminationThreshold, self).__init__(model, ax=ax, **kwargs) # Set params self.set_params( n_trials=n_trials, cv=cv, fbeta=fbeta, argmax=argmax, exclude=exclude, quantiles=quantiles, random_state=random_state, )
def fit(self, X, y=None, **kwargs): """ Fits the estimator to discover the feature importances described by the data, then draws those importances as a bar plot. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Keyword arguments passed to the fit method of the estimator. Returns ------- self : visualizer The fit method must always return self to support pipelines. """ super(FeatureImportances, self).fit(X, y, **kwargs) # Get the feature importances from the model self.feature_importances_ = self._find_importances_param() # Get the classes from the model if is_classifier(self): self.classes_ = self._find_classes_param() else: self.classes_ = None self.stack = False # If self.stack = True and feature importances is a multidim array, # we're expecting a shape of (n_classes, n_features) # therefore we flatten by taking the average by # column to get shape (n_features,) (see LogisticRegression) if not self.stack and self.feature_importances_.ndim > 1: self.feature_importances_ = np.mean(self.feature_importances_, axis=0) # Apply absolute value filter before normalization if self.absolute: self.feature_importances_ = np.abs(self.feature_importances_) # Normalize features relative to the maximum if self.relative: maxv = np.abs(self.feature_importances_).max() self.feature_importances_ /= maxv self.feature_importances_ *= 100.0 # Create labels for the feature importances # NOTE: this code is duplicated from MultiFeatureVisualizer if self.labels is None: # Use column names if a dataframe if is_dataframe(X): self.features_ = np.array(X.columns) # Otherwise use the column index as the labels else: _, ncols = X.shape self.features_ = np.arange(0, ncols) else: self.features_ = np.array(self.labels) # Sort the features and their importances if self.stack: sort_idx = np.argsort(np.mean(self.feature_importances_, 0)) self.features_ = self.features_[sort_idx] self.feature_importances_ = self.feature_importances_[:, sort_idx] else: sort_idx = np.argsort(self.feature_importances_) self.features_ = self.features_[sort_idx] self.feature_importances_ = self.feature_importances_[sort_idx] # Draw the feature importances self.draw() return self
def fit(self, X, y=None, **kwargs): """ Fits the estimator to discover the feature importances described by the data, then draws those importances as a bar plot. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values kwargs : dict Keyword arguments passed to the fit method of the estimator. Returns ------- self : visualizer The fit method must always return self to support pipelines. """ # Super call fits the underlying estimator if it's not already fitted super(FeatureImportances, self).fit(X, y, **kwargs) # Get the feature importances from the model self.feature_importances_ = self._find_importances_param() # Get the classes from the model if is_classifier(self): self.classes_ = self._find_classes_param() else: self.classes_ = None self.stack = False # If self.stack = True and feature importances is a multidim array, # we're expecting a shape of (n_classes, n_features) # therefore we flatten by taking the average by # column to get shape (n_features,) (see LogisticRegression) if not self.stack and self.feature_importances_.ndim > 1: self.feature_importances_ = np.mean(self.feature_importances_, axis=0) warnings.warn( ("detected multi-dimensional feature importances but stack=False, " "using mean to aggregate them."), YellowbrickWarning, ) # Apply absolute value filter before normalization if self.absolute: self.feature_importances_ = np.abs(self.feature_importances_) # Normalize features relative to the maximum if self.relative: maxv = np.abs(self.feature_importances_).max() self.feature_importances_ /= maxv self.feature_importances_ *= 100.0 # Create labels for the feature importances # NOTE: this code is duplicated from MultiFeatureVisualizer if self.labels is None: # Use column names if a dataframe if is_dataframe(X): self.features_ = np.array(X.columns) # Otherwise use the column index as the labels else: _, ncols = X.shape self.features_ = np.arange(0, ncols) else: self.features_ = np.array(self.labels) # Sort the features and their importances if self.stack: sort_idx = np.argsort(np.mean(self.feature_importances_, 0)) self.features_ = self.features_[sort_idx] self.feature_importances_ = self.feature_importances_[:, sort_idx] else: sort_idx = np.argsort(self.feature_importances_) self.features_ = self.features_[sort_idx] self.feature_importances_ = self.feature_importances_[sort_idx] # Draw the feature importances self.draw() return self