def test_632plus(): tree = DecisionTreeClassifier(random_state=123) scores = bootstrap_point632_score(tree, X, y, random_seed=123, method='.632+') acc = np.mean(scores) assert len(scores == 200) assert np.round(acc, 5) == 0.96528, np.round(acc, 5) tree2 = DecisionTreeClassifier(random_state=123, max_depth=1) scores = bootstrap_point632_score(tree2, X, y, random_seed=123, method='.632+') acc = np.mean(scores) assert len(scores == 200) assert np.round(acc, 5) == 0.65034, np.round(acc, 5)
def bootstrap_632(self): """Performs bootstrap validation. """ print (self.problem_name) for k in range(1, len(self.grid)): start_time = time.time() scores = bootstrap_point632_score( self.grid[k].best_estimator_, self.X.values, self.y.values, n_splits=1000, method='.632', random_seed=42 ) acc = np.mean(scores) lower = np.percentile(scores, 2.5) upper = np.percentile(scores, 97.5) end_time = np.round(time.time() - start_time, 2) self.bootstrap_results.append([np.round(100*acc, 2), [np.round(100*lower, 2), np.round(100*upper, 2)], end_time]) print( self.classifiers[k].upper(), ' acc: %.2f%%' % (100*acc), ' 95%% Confidence interval: [%.2f, %.2f]' % \ (100*lower, 100*upper), ' time', end_time )
def bootstrap_632(self, n_splits=None): """ Performs bootstrap validation. 1000 iteration are good for small datasets, for large datasets we can skip bootstraping. """ if n_splits == None: n_splits = self.n_bootstrap_splits print (self.problem_name) self.bootstrap_results = [] # for k in range(1, len(self.grid)): for key in self.grid.keys(): start_time = time.time() if isinstance(self.X, np.ndarray): scores = bootstrap_point632_score( # self.grid[k].best_estimator_, self.X, self.grid[key].best_estimator_, self.X, self.y, n_splits=n_splits, method='.632', random_seed=self.random_state ) else: scores = bootstrap_point632_score( # self.grid[k].best_estimator_, self.X.values, self.grid[key].best_estimator_, self.X.values, self.y.values, n_splits=n_splits, method='.632', random_seed=self.random_state ) acc = np.mean(scores) lower = np.percentile(scores, 2.5) upper = np.percentile(scores, 97.5) end_time = np.round(time.time() - start_time, 2) self.bootstrap_results.append([np.round(100*acc, 2), [np.round(100*lower, 2), np.round(100*upper, 2)], end_time]) print( # self.classifiers[k].upper(), key, ' acc: %.2f%%' % (100*acc), ' 95%% Confidence interval: [%.2f, %.2f]' % \ (100*lower, 100*upper), ' time', end_time )
def test_scoring(): lr = LogisticRegression(solver='liblinear', multi_class='ovr') scores = bootstrap_point632_score(lr, X[:100], y[:100], scoring='f1', random_seed=123) f1 = np.mean(scores) assert len(scores == 200) assert np.round(f1, 2) == 1.0, f1
def test_scoring(): lr = LogisticRegression() scores = bootstrap_point632_score(lr, X[:100], y[:100], scoring='f1', random_seed=123) f1 = np.mean(scores) assert len(scores == 200) assert np.round(f1, 2) == 1.0, f1
def test_scoring(): from sklearn.metrics import f1_score lr = LogisticRegression(solver='liblinear', multi_class='ovr') scores = bootstrap_point632_score(lr, X[:100], y[:100], scoring_func=f1_score, random_seed=123) f1 = np.mean(scores) assert len(scores == 200) assert np.round(f1, 2) == 1.0, f1
def test_oob(): tree = DecisionTreeClassifier(random_state=123) scores = bootstrap_point632_score(tree, X, y, random_seed=123, method='oob') acc = np.mean(scores) assert len(scores == 200) assert np.round(acc, 5) == 0.94667, np.round(acc, 5)
def test_custom_accuracy(): def accuracy2(targets, predictions): return sum([i == j for i, j in zip(targets, predictions)]) / len(targets) lr = LogisticRegression(solver='liblinear', multi_class='ovr') scores = bootstrap_point632_score(lr, X, y, random_seed=123, scoring_func=accuracy2) acc = np.mean(scores) assert len(scores == 200) assert np.round(acc, 5) == 0.95306, np.round(acc, 5)
def test_scoring_proba(): from sklearn.metrics import roc_auc_score lr = LogisticRegression(solver='liblinear', multi_class='ovr') # test predict_proba scores = bootstrap_point632_score(lr, X[:100], y[:100], scoring_func=roc_auc_score, predict_proba=True, random_seed=123) roc_auc = np.mean(scores) assert len(scores == 200) assert np.round(roc_auc, 2) == 1.0, roc_auc with pytest.raises(RuntimeError): clf = FakeClassifier() scores = bootstrap_point632_score(clf, X[:100], y[:100], scoring_func=roc_auc_score, predict_proba=True, random_seed=123)
def compute_acc_with_ci(clf, X_test, y_test): from mlxtend.evaluate import bootstrap_point632_score from sklearn.metrics import balanced_accuracy_score scores = bootstrap_point632_score( clf, X=X_test, y=y_test, n_splits=500, method=".632+", clone_estimator=True, scoring_func=balanced_accuracy_score, ) return scores
def test_pandas_pass(): tree = DecisionTreeClassifier(random_state=123) X_df = pd.DataFrame(X) y_ser = pd.Series(y) bootstrap_point632_score(tree, X_df, y_ser, random_seed=123, method='oob') bootstrap_point632_score(tree, X_df, y_ser, random_seed=123, method='.632') bootstrap_point632_score(tree, X_df, y_ser, random_seed=123, method='.632+')
def test_defaults(): lr = LogisticRegression() scores = bootstrap_point632_score(lr, X, y, random_seed=123) acc = np.mean(scores) assert len(scores == 200) assert np.round(acc, 2) == 0.95
def test_defaults(): lr = LogisticRegression(solver='liblinear', multi_class='ovr') scores = bootstrap_point632_score(lr, X, y, random_seed=123) acc = np.mean(scores) assert len(scores == 200) assert np.round(acc, 5) == 0.95306, np.round(acc, 5)
s = np.random.randint(X.shape[0], size=X.shape[0]) B.append(X[s].mean()) se = np.sqrt(np.var(B)) Cn = (p - z * se, p + z * se) _ = ["%0.4f" % x for x in Cn] print(_) Cn = (np.percentile(B, alpha * 100), np.percentile(B, (1 - alpha) * 100)) _ = ["%0.4f" % x for x in Cn] print(_) from mlxtend.evaluate import bootstrap_point632_score X, y = load_iris(return_X_y=True) cl = GaussianNB() B = bootstrap_point632_score(cl, X, y, n_splits=500) Cn = (np.percentile(B, alpha * 100), np.percentile(B, (1 - alpha) * 100)) _ = ["%0.4f" % x for x in Cn] print(_) ### macro Recall from scipy.stats import norm import numpy as np from sklearn.datasets import load_iris from sklearn import datasets from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import StratifiedKFold from sklearn.metrics import recall_score alpha = 0.05