def test_classification_toy(algorithm): # Check classification on a toy dataset. clf = AdaBoostClassifier(algorithm=algorithm, random_state=0) clf.fit(X, y_class) assert_array_equal(clf.predict(T), y_t_class) assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_) assert clf.predict_proba(T).shape == (len(T), 2) assert clf.decision_function(T).shape == (len(T), )
def test_staged_predict(algorithm): # Check staged predictions. rng = np.random.RandomState(0) iris_weights = rng.randint(10, size=iris.target.shape) boston_weights = rng.randint(10, size=boston.target.shape) clf = AdaBoostClassifier(algorithm=algorithm, n_estimators=10) clf.fit(iris.data, iris.target, sample_weight=iris_weights) predictions = clf.predict(iris.data) staged_predictions = [p for p in clf.staged_predict(iris.data)] proba = clf.predict_proba(iris.data) staged_probas = [p for p in clf.staged_predict_proba(iris.data)] score = clf.score(iris.data, iris.target, sample_weight=iris_weights) staged_scores = [ s for s in clf.staged_score( iris.data, iris.target, sample_weight=iris_weights) ] assert len(staged_predictions) == 10 assert_array_almost_equal(predictions, staged_predictions[-1]) assert len(staged_probas) == 10 assert_array_almost_equal(proba, staged_probas[-1]) assert len(staged_scores) == 10 assert_array_almost_equal(score, staged_scores[-1]) # AdaBoost regression clf = AdaBoostRegressor(n_estimators=10, random_state=0) clf.fit(boston.data, boston.target, sample_weight=boston_weights) predictions = clf.predict(boston.data) staged_predictions = [p for p in clf.staged_predict(boston.data)] score = clf.score(boston.data, boston.target, sample_weight=boston_weights) staged_scores = [ s for s in clf.staged_score( boston.data, boston.target, sample_weight=boston_weights) ] assert len(staged_predictions) == 10 assert_array_almost_equal(predictions, staged_predictions[-1]) assert len(staged_scores) == 10 assert_array_almost_equal(score, staged_scores[-1])
def test_adaboost_consistent_predict(algorithm): # check that predict_proba and predict give consistent results # regression test for: # https://github.com/scikit-learn/scikit-learn/issues/14084 X_train, X_test, y_train, y_test = train_test_split( *datasets.load_digits(return_X_y=True), random_state=42) model = AdaBoostClassifier(algorithm=algorithm, random_state=42) model.fit(X_train, y_train) assert_array_equal(np.argmax(model.predict_proba(X_test), axis=1), model.predict(X_test))
def test_multidimensional_X(): """ Check that the AdaBoost estimators can work with n-dimensional data matrix """ from mrex.dummy import DummyClassifier, DummyRegressor rng = np.random.RandomState(0) X = rng.randn(50, 3, 3) yc = rng.choice([0, 1], 50) yr = rng.randn(50) boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent')) boost.fit(X, yc) boost.predict(X) boost.predict_proba(X) boost = AdaBoostRegressor(DummyRegressor()) boost.fit(X, yr) boost.predict(X)
bdt.fit(X, y) plot_colors = "br" plot_step = 0.02 class_names = "AB" plt.figure(figsize=(10, 5)) # Plot the decision boundaries plt.subplot(121) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step), np.arange(y_min, y_max, plot_step)) Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired) plt.axis("tight") # Plot the training points for i, n, c in zip(range(2), class_names, plot_colors): idx = np.where(y == i) plt.scatter(X[idx, 0], X[idx, 1], c=c, cmap=plt.cm.Paired, s=20, edgecolor='k', label="Class %s" % n) plt.xlim(x_min, x_max)
def test_sparse_classification(): # Check classification with sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set.""" def fit(self, X, y, sample_weight=None): """Modification on fit caries data type for later verification.""" super().fit(X, y, sample_weight=sample_weight) self.data_type_ = type(X) return self X, y = datasets.make_multilabel_classification(n_classes=1, n_samples=15, n_features=5, random_state=42) # Flatten y to a 1d array y = np.ravel(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for sparse_format in [ csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix ]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) # Trained on sparse format sparse_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME").fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME").fit(X_train, y_train) # predict sparse_results = sparse_classifier.predict(X_test_sparse) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) # decision_function sparse_results = sparse_classifier.decision_function(X_test_sparse) dense_results = dense_classifier.decision_function(X_test) assert_array_almost_equal(sparse_results, dense_results) # predict_log_proba sparse_results = sparse_classifier.predict_log_proba(X_test_sparse) dense_results = dense_classifier.predict_log_proba(X_test) assert_array_almost_equal(sparse_results, dense_results) # predict_proba sparse_results = sparse_classifier.predict_proba(X_test_sparse) dense_results = dense_classifier.predict_proba(X_test) assert_array_almost_equal(sparse_results, dense_results) # score sparse_results = sparse_classifier.score(X_test_sparse, y_test) dense_results = dense_classifier.score(X_test, y_test) assert_array_almost_equal(sparse_results, dense_results) # staged_decision_function sparse_results = sparse_classifier.staged_decision_function( X_test_sparse) dense_results = dense_classifier.staged_decision_function(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_almost_equal(sprase_res, dense_res) # staged_predict sparse_results = sparse_classifier.staged_predict(X_test_sparse) dense_results = dense_classifier.staged_predict(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict_proba sparse_results = sparse_classifier.staged_predict_proba(X_test_sparse) dense_results = dense_classifier.staged_predict_proba(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_almost_equal(sprase_res, dense_res) # staged_score sparse_results = sparse_classifier.staged_score(X_test_sparse, y_test) dense_results = dense_classifier.staged_score(X_test, y_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # Verify sparsity of data is maintained during training types = [i.data_type_ for i in sparse_classifier.estimators_] assert all([(t == csc_matrix or t == csr_matrix) for t in types])