def check_classification_synthetic(presort, loss): # Test GradientBoostingClassifier on synthetic dataset used by # Hastie et al. in ESLII Example 12.7. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, random_state=0) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.09) gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, subsample=0.5, random_state=0, presort=presort) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.08)
def test_oob_multilcass_iris(): # Check OOB improvement on multi-class dataset. clf = GradientBoostingClassifier(n_estimators=100, loss='deviance', random_state=1, subsample=0.5) clf.fit(iris.data, iris.target) score = clf.score(iris.data, iris.target) assert_greater(score, 0.9) assert_equal(clf.oob_improvement_.shape[0], clf.n_estimators)
def check_iris(presort, subsample, sample_weight): # Check consistency on dataset iris. clf = GradientBoostingClassifier(n_estimators=100, loss='deviance', random_state=1, subsample=subsample, presort=presort) clf.fit(iris.data, iris.target, sample_weight=sample_weight) score = clf.score(iris.data, iris.target) assert_greater(score, 0.9) leaves = clf.apply(iris.data) assert_equal(leaves.shape, (150, 100, 3))
def test_zero_estimator_clf(): # Test if ZeroEstimator works for classification. X = iris.data y = np.array(iris.target) est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init=ZeroEstimator()) est.fit(X, y) assert_greater(est.score(X, y), 0.96) est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(X, y) assert_greater(est.score(X, y), 0.96) # binary clf mask = y != 0 y[mask] = 1 y[~mask] = 0 est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(X, y) assert_greater(est.score(X, y), 0.96) est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='foobar') assert_raises(ValueError, est.fit, X, y)