def check_classification_synthetic(presort, loss): # Test GradientBoostingClassifier on synthetic dataset used by # Hastie et al. in ESLII Example 12.7. X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) X_train, X_test = X[:2000], X[2000:] y_train, y_test = y[:2000], y[2000:] gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, random_state=0) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.09) gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=1, loss=loss, learning_rate=1.0, subsample=0.5, random_state=0, presort=presort) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) assert_less(error_rate, 0.08)
def test_non_uniform_weights_toy_edge_case_clf(): X = [[1, 0], [1, 0], [1, 0], [0, 1]] y = [0, 0, 1, 0] # ignore the first 2 training samples by setting their weight to 0 sample_weight = [0, 0, 1, 1] for loss in ('deviance', 'exponential'): gb = GradientBoostingClassifier(n_estimators=5) gb.fit(X, y, sample_weight=sample_weight) assert_array_equal(gb.predict([[1, 0]]), [1])
def test_check_inputs(): # Test input checks (shape and type of X and y). clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.fit, X, y + [0, 1]) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.fit, X, y, sample_weight=([1] * len(y)) + [0, 1])
def test_loss_function(): assert_raises(ValueError, GradientBoostingClassifier(loss='ls').fit, X, y) assert_raises(ValueError, GradientBoostingClassifier(loss='lad').fit, X, y) assert_raises(ValueError, GradientBoostingClassifier(loss='quantile').fit, X, y) assert_raises(ValueError, GradientBoostingClassifier(loss='huber').fit, X, y) assert_raises(ValueError, GradientBoostingRegressor(loss='deviance').fit, X, y) assert_raises(ValueError, GradientBoostingRegressor(loss='exponential').fit, X, y)
def test_check_inputs_predict(): # X has wrong shape clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y) x = np.array([1.0, 2.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) x = np.array([[]]) assert_raises(ValueError, clf.predict, x) x = np.array([1.0, 2.0, 3.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) clf = GradientBoostingRegressor(n_estimators=100, random_state=1) clf.fit(X, rng.rand(len(X))) x = np.array([1.0, 2.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x) x = np.array([[]]) assert_raises(ValueError, clf.predict, x) x = np.array([1.0, 2.0, 3.0])[:, np.newaxis] assert_raises(ValueError, clf.predict, x)
def test_friedman_mse_in_graphviz(): clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0) clf.fit(X, y) dot_data = StringIO() export_graphviz(clf, out_file=dot_data) clf = GradientBoostingClassifier(n_estimators=2, random_state=0) clf.fit(X, y) for estimator in clf.estimators_: export_graphviz(estimator[0], out_file=dot_data) for finding in finditer("\[.*?samples.*?\]", dot_data.getvalue()): assert_in("friedman_mse", finding.group())
def test_warm_start_wo_nestimators_change(): # Test if warm_start does nothing if n_estimators is not changed. # Regression test for #3513. clf = GradientBoostingClassifier(n_estimators=10, warm_start=True) clf.fit([[0, 1], [2, 3]], [0, 1]) assert_equal(clf.estimators_.shape[0], 10) clf.fit([[0, 1], [2, 3]], [0, 1]) assert_equal(clf.estimators_.shape[0], 10)
def check_iris(presort, subsample, sample_weight): # Check consistency on dataset iris. clf = GradientBoostingClassifier(n_estimators=100, loss='deviance', random_state=1, subsample=subsample, presort=presort) clf.fit(iris.data, iris.target, sample_weight=sample_weight) score = clf.score(iris.data, iris.target) assert_greater(score, 0.9) leaves = clf.apply(iris.data) assert_equal(leaves.shape, (150, 100, 3))
def test_probability_log(): # Predict probabilities. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert_true(np.all(y_proba >= 0.0)) assert_true(np.all(y_proba <= 1.0)) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)
def test_probability_exponential(): # Predict probabilities. clf = GradientBoostingClassifier(loss='exponential', n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert_true(np.all(y_proba >= 0.0)) assert_true(np.all(y_proba <= 1.0)) score = clf.decision_function(T).ravel() assert_array_almost_equal(y_proba[:, 1], 1.0 / (1.0 + np.exp(-2 * score))) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)