def test_mem_layout(): # Test with different memory layouts of X and y X_ = np.asfortranarray(X) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X_, y) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_)) X_ = np.ascontiguousarray(X) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X_, y) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_)) y_ = np.asarray(y, dtype=np.int32) y_ = np.ascontiguousarray(y_) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y_) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_)) y_ = np.asarray(y, dtype=np.int32) y_ = np.asfortranarray(y_) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y_) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
def test_degenerate_targets(): # Check if we can fit even though all targets are equal. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) # classifier should raise exception assert_raises(ValueError, clf.fit, X, np.ones(len(X))) clf = GradientBoostingRegressor(n_estimators=100, random_state=1) clf.fit(X, np.ones(len(X))) clf.predict([rng.rand(2)]) assert_array_equal(np.ones((1, ), dtype=np.float64), clf.predict([rng.rand(2)]))
def test_staged_predict_proba(): # Test whether staged predict proba eventually gives # the same prediction. X, y = datasets.make_hastie_10_2(n_samples=1200, random_state=1) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] clf = GradientBoostingClassifier(n_estimators=20) # test raise NotFittedError if not fitted assert_raises( NotFittedError, lambda X: np.fromiter(clf.staged_predict_proba(X), dtype=np.float64), X_test) clf.fit(X_train, y_train) # test if prediction for last stage equals ``predict`` for y_pred in clf.staged_predict(X_test): assert_equal(y_test.shape, y_pred.shape) assert_array_equal(clf.predict(X_test), y_pred) # test if prediction for last stage equals ``predict_proba`` for staged_proba in clf.staged_predict_proba(X_test): assert_equal(y_test.shape[0], staged_proba.shape[0]) assert_equal(2, staged_proba.shape[1]) assert_array_equal(clf.predict_proba(X_test), staged_proba)
def test_symbol_labels(): # Test with non-integer class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) symbol_y = tosequence(map(str, y)) clf.fit(X, symbol_y) assert_array_equal(clf.predict(T), tosequence(map(str, true_result))) assert_equal(100, len(clf.estimators_))
def test_serialization(): # Check model serialization. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_)) try: import cPickle as pickle except ImportError: import pickle serialized_clf = pickle.dumps(clf, protocol=pickle.HIGHEST_PROTOCOL) clf = None clf = pickle.loads(serialized_clf) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
def test_non_uniform_weights_toy_edge_case_clf(): X = [[1, 0], [1, 0], [1, 0], [0, 1]] y = [0, 0, 1, 0] # ignore the first 2 training samples by setting their weight to 0 sample_weight = [0, 0, 1, 1] for loss in ('deviance', 'exponential'): gb = GradientBoostingClassifier(n_estimators=5) gb.fit(X, y, sample_weight=sample_weight) assert_array_equal(gb.predict([[1, 0]]), [1])
def test_float_class_labels(): # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) float_y = np.asarray(y, dtype=np.float32) clf.fit(X, float_y) assert_array_equal(clf.predict(T), np.asarray(true_result, dtype=np.float32)) assert_equal(100, len(clf.estimators_))
def test_shape_y(): # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) y_ = np.asarray(y, dtype=np.int32) y_ = y_[:, np.newaxis] # This will raise a DataConversionWarning that we want to # "always" raise, elsewhere the warnings gets ignored in the # later tests, and the tests that check for this warning fail assert_warns(DataConversionWarning, clf.fit, X, y_) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
def test_probability_log(): # Predict probabilities. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert_true(np.all(y_proba >= 0.0)) assert_true(np.all(y_proba <= 1.0)) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)
def check_classification_toy(presort, loss): # Check classification on a toy dataset. clf = GradientBoostingClassifier(loss=loss, n_estimators=10, random_state=1, presort=presort) assert_raises(ValueError, clf.predict, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) assert_equal(10, len(clf.estimators_)) deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:]) assert_true(np.any(deviance_decrease >= 0.0)) leaves = clf.apply(X) assert_equal(leaves.shape, (6, 10, 1))
def test_probability_exponential(): # Predict probabilities. clf = GradientBoostingClassifier(loss='exponential', n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert_true(np.all(y_proba >= 0.0)) assert_true(np.all(y_proba <= 1.0)) score = clf.decision_function(T).ravel() assert_array_almost_equal(y_proba[:, 1], 1.0 / (1.0 + np.exp(-2 * score))) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)