def test_zero_estimator_reg(): # Test if ZeroEstimator works for regression. est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init=ZeroEstimator()) est.fit(boston.data, boston.target) y_pred = est.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_almost_equal(mse, 33.0, decimal=0) est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(boston.data, boston.target) y_pred = est.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_almost_equal(mse, 33.0, decimal=0) est = GradientBoostingRegressor(n_estimators=20, max_depth=1, random_state=1, init='foobar') assert_raises(ValueError, est.fit, boston.data, boston.target)
def test_plot_partial_dependence(): # Test partial dependence plot function. clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(boston.data, boston.target) grid_resolution = 25 fig, axs = plot_partial_dependence(clf, boston.data, [0, 1, (0, 1)], grid_resolution=grid_resolution, feature_names=boston.feature_names) assert len(axs) == 3 assert all(ax.has_data for ax in axs) # check with str features and array feature names fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN', ('CRIM', 'ZN')], grid_resolution=grid_resolution, feature_names=boston.feature_names) assert len(axs) == 3 assert all(ax.has_data for ax in axs) # check with list feature_names feature_names = boston.feature_names.tolist() fig, axs = plot_partial_dependence(clf, boston.data, ['CRIM', 'ZN', ('CRIM', 'ZN')], grid_resolution=grid_resolution, feature_names=feature_names) assert len(axs) == 3 assert all(ax.has_data for ax in axs)
def check_boston(presort, loss, subsample): # Check consistency on dataset boston house prices with least squares # and least absolute deviation. ones = np.ones(len(boston.target)) last_y_pred = None for sample_weight in None, ones, 2 * ones: clf = GradientBoostingRegressor(n_estimators=100, loss=loss, max_depth=4, subsample=subsample, min_samples_split=2, random_state=1, presort=presort) assert_raises(ValueError, clf.predict, boston.data) clf.fit(boston.data, boston.target, sample_weight=sample_weight) leaves = clf.apply(boston.data) assert_equal(leaves.shape, (506, 100)) y_pred = clf.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_less(mse, 6.0) if last_y_pred is not None: assert_array_almost_equal(last_y_pred, y_pred) last_y_pred = y_pred
def test_non_uniform_weights_toy_edge_case_reg(): X = [[1, 0], [1, 0], [1, 0], [0, 1]] y = [0, 0, 1, 0] # ignore the first 2 training samples by setting their weight to 0 sample_weight = [0, 0, 1, 1] for loss in ('huber', 'ls', 'lad', 'quantile'): gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss) gb.fit(X, y, sample_weight=sample_weight) assert_greater(gb.predict([[1, 0]])[0], 0.5)
def test_partial_dependence_regressor(): # Test partial dependence for regressor clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(boston.data, boston.target) grid_resolution = 25 pdp, axes = partial_dependence(clf, [0], X=boston.data, grid_resolution=grid_resolution) assert pdp.shape == (1, grid_resolution) assert axes[0].shape[0] == grid_resolution
def test_complete_regression(): # Test greedy trees with max_depth + 1 leafs. from sklearn.tree._tree import TREE_LEAF k = 4 est = GradientBoostingRegressor(n_estimators=20, max_depth=None, random_state=1, max_leaf_nodes=k + 1) est.fit(boston.data, boston.target) tree = est.estimators_[-1, 0].tree_ assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0], k + 1)
def test_quantile_loss(): # Check if quantile loss with alpha=0.5 equals lad. clf_quantile = GradientBoostingRegressor(n_estimators=100, loss='quantile', max_depth=4, alpha=0.5, random_state=7) clf_quantile.fit(boston.data, boston.target) y_quantile = clf_quantile.predict(boston.data) clf_lad = GradientBoostingRegressor(n_estimators=100, loss='lad', max_depth=4, random_state=7) clf_lad.fit(boston.data, boston.target) y_lad = clf_lad.predict(boston.data) assert_array_almost_equal(y_quantile, y_lad, decimal=4)
def test_staged_predict(): # Test whether staged decision function eventually gives # the same prediction. X, y = datasets.make_friedman1(n_samples=1200, random_state=1, noise=1.0) X_train, y_train = X[:200], y[:200] X_test = X[200:] clf = GradientBoostingRegressor() # test raise ValueError if not fitted assert_raises( ValueError, lambda X: np.fromiter(clf.staged_predict(X), dtype=np.float64), X_test) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) # test if prediction for last stage equals ``predict`` for y in clf.staged_predict(X_test): assert_equal(y.shape, y_pred.shape) assert_array_equal(y_pred, y)
def test_feature_importances(): X = np.array(boston.data, dtype=np.float32) y = np.array(boston.target, dtype=np.float32) for presort in True, False: clf = GradientBoostingRegressor(n_estimators=100, max_depth=5, min_samples_split=2, random_state=1, presort=presort) clf.fit(X, y) assert_true(hasattr(clf, 'feature_importances_')) # XXX: Remove this test in 0.19 after transform support to estimators # is removed. X_new = assert_warns(DeprecationWarning, clf.transform, X, threshold="mean") assert_less(X_new.shape[1], X.shape[1]) feature_mask = (clf.feature_importances_ > clf.feature_importances_.mean()) assert_array_almost_equal(X_new, X[:, feature_mask])
def test_regression_synthetic(): # Test on synthetic regression datasets used in Leo Breiman, # `Bagging Predictors?. Machine Learning 24(2): 123-140 (1996). random_state = check_random_state(1) regression_params = { 'n_estimators': 100, 'max_depth': 4, 'min_samples_split': 2, 'learning_rate': 0.1, 'loss': 'ls' } # Friedman1 X, y = datasets.make_friedman1(n_samples=1200, random_state=random_state, noise=1.0) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: clf = GradientBoostingRegressor(presort=presort) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 5.0) # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 1700.0) # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) X_train, y_train = X[:200], y[:200] X_test, y_test = X[200:], y[200:] for presort in True, False: regression_params['presort'] = presort clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) assert_less(mse, 0.015)