def test_partial_dependence_unknown_feature(estimator, features): X, y = make_classification(random_state=0) estimator.fit(X, y) err_msg = 'all features must be in' with pytest.raises(ValueError, match=err_msg): partial_dependence(estimator, X, [features])
def test_output_shape(Estimator, method, data, grid_resolution, features): # Check that partial_dependence has consistent output shape for different # kinds of estimators: # - classifiers with binary and multiclass settings # - regressors # - multi-task regressors est = Estimator() # n_target corresponds to the number of classes (1 for binary classif) or # the number of tasks / outputs in multi task settings. It's equal to 1 for # classical regression_data. (X, y), n_targets = data est.fit(X, y) pdp, axes = partial_dependence(est, X=X, features=features, method=method, grid_resolution=grid_resolution) expected_pdp_shape = (n_targets, *[grid_resolution for _ in range(len(features))]) expected_axes_shape = (len(features), grid_resolution) assert pdp.shape == expected_pdp_shape assert axes is not None assert np.asarray(axes).shape == expected_axes_shape
def test_warning_recursion_non_constant_init(): # make sure that passing a non-constant init parameter to a GBDT and using # recursion method yields a warning. gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0) gbc.fit(X, y) with pytest.warns( UserWarning, match='Using recursion method with a non-constant init predictor'): partial_dependence(gbc, X, [0], method='recursion') with pytest.warns( UserWarning, match='Using recursion method with a non-constant init predictor'): partial_dependence(gbc, X, [0], method='recursion')
def test_multiclass_multioutput(Estimator): # Make sure error is raised for multiclass-multioutput classifiers # make multiclass-multioutput dataset X, y = make_classification(n_classes=3, n_clusters_per_class=1, random_state=0) y = np.array([y, y]).T est = Estimator() est.fit(X, y) with pytest.raises( ValueError, match="Multiclass-multioutput estimators are not supported"): partial_dependence(est, X, [0])
def test_partial_dependence_easy_target(est, power): # If the target y only depends on one feature in an obvious way (linear or # quadratic) then the partial dependence for that feature should reflect # it. # We here fit a linear regression_data model (with polynomial features if # needed) and compute r_squared to check that the partial dependence # correctly reflects the target. rng = np.random.RandomState(0) n_samples = 200 target_variable = 2 X = rng.normal(size=(n_samples, 5)) y = X[:, target_variable]**power est.fit(X, y) averaged_predictions, values = partial_dependence( est, features=[target_variable], X=X, grid_resolution=1000) new_X = values[0].reshape(-1, 1) new_y = averaged_predictions[0] # add polynomial features if needed new_X = PolynomialFeatures(degree=power).fit_transform(new_X) lr = LinearRegression().fit(new_X, new_y) r2 = r2_score(new_y, lr.predict(new_X)) assert r2 > .99
def test_partial_dependence_pipeline(): # check that the partial dependence support pipeline iris = load_iris() scaler = StandardScaler() clf = DummyClassifier(random_state=42) pipe = make_pipeline(scaler, clf) clf.fit(scaler.fit_transform(iris.data), iris.target) pipe.fit(iris.data, iris.target) features = 0 pdp_pipe, values_pipe = partial_dependence(pipe, iris.data, features=[features]) pdp_clf, values_clf = partial_dependence(clf, scaler.transform(iris.data), features=[features]) assert_allclose(pdp_pipe, pdp_clf) assert_allclose( values_pipe[0], values_clf[0] * scaler.scale_[features] + scaler.mean_[features])
def test_recursion_decision_function(est, target_feature): # Make sure the recursion method (implicitly uses decision_function) has # the same result as using brute method with # response_method=decision_function X, y = make_classification(n_classes=2, n_clusters_per_class=1, random_state=1) assert np.mean(y) == .5 # make sure the init estimator predicts 0 anyway est.fit(X, y) preds_1, _ = partial_dependence(est, X, [target_feature], response_method='decision_function', method='recursion') preds_2, _ = partial_dependence(est, X, [target_feature], response_method='decision_function', method='brute') assert_allclose(preds_1, preds_2, atol=1e-7)
def test_partial_dependence_sample_weight(): # Test near perfect correlation between partial dependence and diagonal # when sample weights emphasize y = x predictions # non-regression test for #13193 # TODO: extend to HistGradientBoosting once sample_weight is supported N = 1000 rng = np.random.RandomState(123456) mask = rng.randint(2, size=N, dtype=bool) x = rng.rand(N) # set y = x on mask and y = -x outside y = x.copy() y[~mask] = -y[~mask] X = np.c_[mask, x] # sample weights to emphasize data points where y = x sample_weight = np.ones(N) sample_weight[mask] = 1000. clf = GradientBoostingRegressor(n_estimators=10, random_state=1) clf.fit(X, y, sample_weight=sample_weight) pdp, values = partial_dependence(clf, X, features=[1]) assert np.corrcoef(pdp, values)[0, 1] > 0.99
def test_partial_dependence_X_list(estimator): # check that array-like objects are accepted X, y = make_classification(random_state=0) estimator.fit(X, y) partial_dependence(estimator, list(X), [0])
def test_partial_dependence_unfitted_estimator(estimator): err_msg = "'estimator' parameter must be a fitted estimator" with pytest.raises(ValueError, match=err_msg): partial_dependence(estimator, X, [0])
def test_partial_dependence_error(estimator, params, err_msg): X, y = make_classification(random_state=0) estimator.fit(X, y) with pytest.raises(ValueError, match=err_msg): partial_dependence(estimator, X, **params)
# two features: for an average occupancy greater than two, the house price is # nearly independent of the house age, whereas for values less than two there # is a strong dependence on age. ############################################################################## # 3D interaction plots # -------------------- # # Let's make the same partial dependence plot for the 2 features interaction, # this time in 3 dimensions. fig = plt.figure() target_feature = (1, 5) pdp, axes = partial_dependence(est, X_train, target_feature, grid_resolution=20) XX, YY = np.meshgrid(axes[0], axes[1]) Z = pdp[0].T ax = Axes3D(fig) surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor='k') ax.set_xlabel(names[target_feature[0]]) ax.set_ylabel(names[target_feature[1]]) ax.set_zlabel('Partial dependence') # pretty init view