def test_curve_diffs(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax_micro = clf.plot_precision_recall_curve(self.X, self.y, curves='micro') ax_class = clf.plot_precision_recall_curve(self.X, self.y, curves='each_class') self.assertNotEqual(ax_micro, ax_class)
def test_do_cv(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_precision_recall_curve(self.X, self.y) self.assertRaises(AttributeError, clf.plot_precision_recall_curve, self.X, self.y, do_cv=False)
def test_do_cv(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_precision_recall_curve(self.X, self.y) self.assertRaises(AttributeError, clf.plot_precision_recall_curve, self.X, self.y, do_cv=False)
def test_train_sizes(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_learning_curve(self.X, self.y, train_sizes=np.linspace(0.1, 1.0, 8))
def test_curve_diffs(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax_micro = clf.plot_precision_recall_curve(self.X, self.y, curves='micro') ax_class = clf.plot_precision_recall_curve(self.X, self.y, curves='each_class') self.assertNotEqual(ax_micro, ax_class)
def test_order(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) ax = clf.plot_feature_importances(order='ascending') ax = clf.plot_feature_importances(order='descending') ax = clf.plot_feature_importances(order=None)
def test_cmap(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y, cmap='nipy_spectral') ax = clf.plot_confusion_matrix(self.X, self.y, cmap=plt.cm.nipy_spectral)
def test_max_num_features(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) ax = clf.plot_feature_importances(max_num_features=2) ax = clf.plot_feature_importances(max_num_features=4) ax = clf.plot_feature_importances(max_num_features=6)
def test_order(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) ax = clf.plot_feature_importances(order='ascending') ax = clf.plot_feature_importances(order='descending') ax = clf.plot_feature_importances(order=None)
def test_max_num_features(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) ax = clf.plot_feature_importances(max_num_features=2) ax = clf.plot_feature_importances(max_num_features=4) ax = clf.plot_feature_importances(max_num_features=6)
def test_ax(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) fig, ax = plt.subplots(1, 1) out_ax = clf.plot_precision_recall_curve(self.X, self.y) assert ax is not out_ax out_ax = clf.plot_precision_recall_curve(self.X, self.y, ax=ax) assert ax is out_ax
def test_ax(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) fig, ax = plt.subplots(1, 1) out_ax = clf.plot_precision_recall_curve(self.X, self.y) assert ax is not out_ax out_ax = clf.plot_precision_recall_curve(self.X, self.y, ax=ax) assert ax is out_ax
def test_invalid_curve_arg(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) self.assertRaises(ValueError, clf.plot_precision_recall_curve, self.X, self.y, curves='zzz')
def test_do_split(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_ks_statistic(self.X, self.y) self.assertRaises(AttributeError, clf.plot_ks_statistic, self.X, self.y, do_split=False)
def test_ax(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) fig, ax = plt.subplots(1, 1) out_ax = clf.plot_feature_importances() assert ax is not out_ax out_ax = clf.plot_feature_importances(ax=ax) assert ax is out_ax
def test_do_cv(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y) self.assertRaises(NotFittedError, clf.plot_confusion_matrix, self.X, self.y, do_cv=False)
def test_cmap(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_precision_recall_curve(self.X, self.y, cmap='nipy_spectral') ax = clf.plot_precision_recall_curve(self.X, self.y, cmap=plt.cm.nipy_spectral)
def test_true_pred_labels(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) true_labels = [0, 1] pred_labels = [0, 2] ax = clf.plot_confusion_matrix(self.X, self.y, true_labels=true_labels, pred_labels=pred_labels)
def test_ax(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) fig, ax = plt.subplots(1, 1) out_ax = clf.plot_feature_importances() assert ax is not out_ax out_ax = clf.plot_feature_importances(ax=ax) assert ax is out_ax
def test_true_pred_labels(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) true_labels = [0, 1] pred_labels = [0, 2] ax = clf.plot_confusion_matrix(self.X, self.y, true_labels=true_labels, pred_labels=pred_labels)
def test_instance_validation(self): clf = self.Classifier() scikitplot.classifier_factory(clf) not_clf = self.NotClassifier() self.assertRaises(TypeError, scikitplot.classifier_factory, not_clf) partial_clf = self.PartialClassifier() with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') scikitplot.classifier_factory(partial_clf) assert len(w) == 2 assert issubclass(w[-1].category, UserWarning) assert " not in clf. Some plots may not be possible to generate." in str(w[-1].message)
def test_instance_validation(self): clf = self.Classifier() scikitplot.classifier_factory(clf) not_clf = self.NotClassifier() self.assertRaises(TypeError, scikitplot.classifier_factory, not_clf) partial_clf = self.PartialClassifier() with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') scikitplot.classifier_factory(partial_clf) assert len(w) == 1 assert issubclass(w[-1].category, UserWarning) assert " not in clf. Some plots may not be possible to generate." in str(w[-1].message)
def test_predict_proba(self): np.random.seed(0) class DummyClassifier: def __init__(self): pass def fit(self): pass def predict(self): pass def score(self): pass clf = DummyClassifier() scikitplot.classifier_factory(clf) self.assertRaises(TypeError, clf.plot_ks_statistic, self.X, self.y)
def test_predict_proba(self): np.random.seed(0) class DummyClassifier: def __init__(self): pass def fit(self): pass def predict(self): pass def score(self): pass clf = DummyClassifier() scikitplot.classifier_factory(clf) self.assertRaises(TypeError, clf.plot_precision_recall_curve, self.X, self.y)
def test_method_insertion(self): clf = self.Classifier() scikitplot.classifier_factory(clf) assert hasattr(clf, 'plot_learning_curve') assert hasattr(clf, 'plot_confusion_matrix') assert hasattr(clf, 'plot_roc_curve') assert hasattr(clf, 'plot_ks_statistic') assert hasattr(clf, 'plot_precision_recall_curve') assert hasattr(clf, 'plot_feature_importances') with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') scikitplot.classifier_factory(clf) assert len(w) == 7 for warning in w[1:]: assert issubclass(warning.category, UserWarning) assert ' method already in clf. ' \ 'Overriding anyway. This may ' \ 'result in unintended behavior.' in str(warning.message)
def test_method_insertion(self): clf = self.Classifier() scikitplot.classifier_factory(clf) assert hasattr(clf, 'plot_learning_curve') assert hasattr(clf, 'plot_confusion_matrix') assert hasattr(clf, 'plot_roc_curve') assert hasattr(clf, 'plot_ks_statistic') assert hasattr(clf, 'plot_precision_recall_curve') assert hasattr(clf, 'plot_feature_importances') with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') scikitplot.classifier_factory(clf) assert len(w) == 7 for warning in w[1:]: assert issubclass(warning.category, UserWarning) assert ' method already in clf. ' \ 'Overriding anyway. This may ' \ 'result in unintended behavior.' in str(warning.message)
def test_shuffle(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=True) ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=False)
def test_shuffle(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=True) ax = clf.plot_confusion_matrix(self.X, self.y, shuffle=False)
def test_two_classes(self): clf = LogisticRegression() scikitplot.classifier_factory(clf) X, y = load_data(return_X_y=True) self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
def test_invalid_curve_arg(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) self.assertRaises(ValueError, clf.plot_precision_recall_curve, self.X, self.y, curves='zzz')
def test_n_jobs(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_learning_curve(self.X, self.y, n_jobs=-1)
'Please specify a valid dataset to use. Options: default, compare') # X = normalize(X, norm='l2', axis=1) # X = VarianceThreshold(threshold=0.00000005).fit_transform(X) # X = SelectKBest(f_classif, k=50).fit_transform(X, Y) X = StandardScaler().fit_transform(X) print('# features used: %d / %d' % (len(X[0]), x_len_before)) # clf1 = LogisticRegression(random_state=1, verbose=VERBOSE) # clf2 = RandomForestClassifier(random_state=1, verbose=VERBOSE) # clf3 = GaussianNB() # # vc1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], # voting='hard') # scores = cross_val_score(vc1, X, Y, cv=CROSS_VAL) estimators = generate_tuple_lists(CLASSIFIERS, NAMES) vc = VotingClassifier(estimators, voting='hard') nb = classifier_factory(vc) kfold = KFold(n_splits=CROSS_VAL, shuffle=True) start_time = time.time() scores = cross_val_score(vc, X, Y, cv=kfold, verbose=VERBOSE) end_time = time.time() print(scores) # The mean score and the 95% confidence interval print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print("--- %s seconds ---" % (end_time - start_time))
def test_string_classes(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_precision_recall_curve( self.X, convert_labels_into_string(self.y))
def test_cmap(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y, cmap='nipy_spectral') ax = clf.plot_confusion_matrix(self.X, self.y, cmap=plt.cm.nipy_spectral)
"""An example showing the plot_feature_importances method used by a scikit-learn classifier""" from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris as load_data import matplotlib.pyplot as plt from scikitplot import classifier_factory X, y = load_data(return_X_y=True) rf = classifier_factory(RandomForestClassifier(random_state=1)) rf.fit(X, y) rf.plot_feature_importances(feature_names=[ 'petal length', 'petal width', 'sepal length', 'sepal width' ]) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt rf = RandomForestClassifier() rf = rf.fit(X, y) skplt.plot_feature_importances(rf, feature_names=[ 'petal length', 'petal width', 'sepal length', 'sepal width' ]) plt.show()
"""An example showing the plot_ks_statistic method used by a scikit-learn classifier""" from __future__ import absolute_import import matplotlib.pyplot as plt from scikitplot import classifier_factory from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_breast_cancer as load_data X, y = load_data(return_X_y=True) lr = classifier_factory(LogisticRegression()) lr.plot_ks_statistic(X, y, random_state=1) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt lr = LogisticRegression() lr = lr.fit(X, y) probas = lr.predict_proba(X) skplt.plot_ks_statistic(y_true=y, y_probas=probas) plt.show()
def test_train_sizes(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_learning_curve(self.X, self.y, train_sizes=np.linspace(0.1, 1.0, 8))
def test_feature_importances_in_clf(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) self.assertRaises(TypeError, clf.plot_feature_importances)
# kjør clustering på t-sne #sjekk feature importance opp mot de 50 andre #feature importance from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris as load_data import matplotlib.pyplot as plt from scikitplot import classifier_factory X1, y1 = load_data(return_X_y=True) X1.shape y1.shape X.shape y.shape rf = classifier_factory(RandomForestClassifier(random_state=1)) rf.fit(X, Y) rf.plot_feature_importances(feature_names=["feature"+str(i)for i in range(50)]) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt rf = RandomForestClassifier() rf = rf.fit(X, y) skplt.plot_feature_importances(rf, feature_names=['petal length', 'petal width', 'sepal length', 'sepal width']) plt.show() #kjøre PCA med 50 variable og se hvor mye dimensjonene forklarer varians from sklearn.decomposition import PCA from sklearn.datasets import load_digits as load_data
def test_string_classes(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, convert_labels_into_string(self.y)) ax = clf.plot_feature_importances()
def test_string_classes(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, convert_labels_into_string(self.y)) ax = clf.plot_feature_importances()
def test_two_classes(self): clf = LogisticRegression() scikitplot.classifier_factory(clf) X, y = load_data(return_X_y=True) self.assertRaises(ValueError, clf.plot_ks_statistic, X, y)
def test_feature_names(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) ax = clf.plot_feature_importances(feature_names=["a", "b", "c", "d"])
def test_do_cv(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y) self.assertRaises(NotFittedError, clf.plot_confusion_matrix, self.X, self.y, do_cv=False)
def test_feature_names(self): np.random.seed(0) clf = RandomForestClassifier() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) ax = clf.plot_feature_importances(feature_names=["a", "b", "c", "d"])
def test_labels(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y, labels=[0, 1, 2])
def test_n_jobs(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_learning_curve(self.X, self.y, n_jobs=-1)
def test_feature_importances_in_clf(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) clf.fit(self.X, self.y) self.assertRaises(TypeError, clf.plot_feature_importances)
def test_cv(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y) ax = clf.plot_confusion_matrix(self.X, self.y, cv=5)
def test_cv(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y) ax = clf.plot_confusion_matrix(self.X, self.y, cv=5)
def test_labels(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_confusion_matrix(self.X, self.y, labels=[0, 1, 2])
def test_string_classes(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_precision_recall_curve(self.X, convert_labels_into_string(self.y))
def test_cmap(self): np.random.seed(0) clf = LogisticRegression() scikitplot.classifier_factory(clf) ax = clf.plot_precision_recall_curve(self.X, self.y, cmap='nipy_spectral') ax = clf.plot_precision_recall_curve(self.X, self.y, cmap=plt.cm.nipy_spectral)
"""An example showing the plot_roc_curve method used by a scikit-learn classifier""" from __future__ import absolute_import import matplotlib.pyplot as plt from scikitplot import classifier_factory from sklearn.naive_bayes import GaussianNB from sklearn.datasets import load_digits as load_data X, y = load_data(return_X_y=True) nb = classifier_factory(GaussianNB()) nb.plot_roc_curve(X, y, random_state=1) plt.show()
"""An example showing the plot_learning_curve method used by a scikit-learn classifier""" from __future__ import absolute_import import matplotlib.pyplot as plt from scikitplot import classifier_factory from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_breast_cancer as load_data X, y = load_data(return_X_y=True) rf = classifier_factory(RandomForestClassifier()) rf.plot_learning_curve(X, y) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt rf = RandomForestClassifier() skplt.plot_learning_curve(rf, X, y) plt.show()