def test_quick_method(self):
        """
        Test the quick method with a random dataset
        """
        X, y = make_classification(
            n_samples=400, n_features=20, n_informative=8, n_redundant=8,
            n_classes=2, n_clusters_per_class=4, random_state=27
        )

        _, ax = plt.subplots()
        classification_report(DecisionTreeClassifier(), X, y, ax=ax)

        self.assert_images_similar(ax=ax)
    def test_quick_method(self):
        """
        Test the quick method with a random dataset
        """
        X, y = make_classification(
            n_samples=400, n_features=20, n_informative=8, n_redundant=8,
            n_classes=2, n_clusters_per_class=4, random_state=27
        )

        _, ax = plt.subplots()
        model = DecisionTreeClassifier(random_state=19)
        classification_report(model, X, y, ax=ax, random_state=42)

        self.assert_images_similar(ax=ax, tol=25.0)
示例#3
0
    def test_quick_method(self):
        """
        Test the quick method with a random dataset
        """
        X, y = make_classification(
            n_samples=400,
            n_features=20,
            n_informative=8,
            n_redundant=8,
            n_classes=2,
            n_clusters_per_class=4,
            random_state=27,
        )

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=42)
        X_train, X_test, y_train, y_test = splits
        
        _, ax = plt.subplots()
        model = DecisionTreeClassifier(random_state=19)
        visualizer = classification_report(
            model, X_train, y_train, X_test, y_test, ax=ax, show=False
        )

        assert isinstance(visualizer, ClassificationReport)
        self.assert_images_similar(visualizer, tol=12)
示例#4
0
    def test_catboost_classifier(self):
        """
        Validate CatBoost classifier with wrapper
        """
        X, y = make_classification(n_samples=400,
                                   n_features=10,
                                   n_informative=2,
                                   n_redundant=3,
                                   n_classes=2,
                                   n_clusters_per_class=2,
                                   random_state=8311982)
        X_train, X_test, y_train, y_test = tts(X, y)

        model = classifier(
            catboost.CatBoostClassifier(iterations=2,
                                        depth=2,
                                        learning_rate=1,
                                        loss_function='Logloss'))

        # For some reason, this works if you call fit directly and pass is_fitted to
        # the visualizer, but does not work if you rely on the visualizer to fit the
        # model on the data. I can't tell if this is a catboost or Yellowbrick issue.
        model.fit(X_train, y_train)

        oz = classification_report(model,
                                   X_train,
                                   y_train,
                                   X_test,
                                   y_test,
                                   is_fitted=True,
                                   show=False)
        assert is_fitted(oz)
示例#5
0
    def test_cuml_classifier(self):
        """
        Validate cuML classifier with wrapper
        """
        # NOTE: this is currently untested as I wasn't able to install cuML
        X, y = make_classification(n_samples=400,
                                   n_features=10,
                                   n_informative=2,
                                   n_redundant=3,
                                   n_classes=2,
                                   n_clusters_per_class=2,
                                   random_state=8311982)
        X_train, X_test, y_train, y_test = tts(X, y)

        # Convert to cudf dataframes
        X_train = cudf.DataFrame(X_train)
        y_train = cudf.Series(y_train)
        X_test = cudf.DataFrame(X_test)
        y_test = cudf.Series(y_test)

        model = classifier(curfc(n_estimators=40, max_depth=8, max_features=1))
        oz = classification_report(model,
                                   X_train,
                                   y_train,
                                   X_test,
                                   y_test,
                                   show=False)
        assert is_fitted(oz)
示例#6
0
    verbose_feature_names = lines
feature_names = np.arange(len(verbose_feature_names))

# %%
class_names = ['+', '-']
verbose_class_names = ['Activator', 'Repressor']

# %%
X = load('X.joblib')
y = load('y.joblib')

# %%
to_graphviz(clf, num_trees=0, rankdir='LR')

# %%
classification_report(clf, X, y)

# %%
visualizer = ROCAUC(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = ClassPredictionError(clf, classes=class_names)
visualizer.score(X, y)
visualizer.poof()

# %%
visualizer = DiscriminationThreshold(clf)
visualizer.fit(X, y)
visualizer.poof()