def test_grid_from_X(): # tests for _grid_from_X: sanity check for output, and for shapes. # Make sure that the grid is a cartesian product of the input (it will use # the unique values instead of the percentiles) percentiles = (.05, .95) grid_resolution = 100 X = np.asarray([[1, 2], [3, 4]]) grid, axes = _grid_from_X(X, percentiles, grid_resolution) assert_array_equal(grid, [[1, 2], [1, 4], [3, 2], [3, 4]]) assert_array_equal(axes, X.T) # test shapes of returned objects depending on the number of unique values # for a feature. rng = np.random.RandomState(0) grid_resolution = 15 # n_unique_values > grid_resolution X = rng.normal(size=(20, 2)) grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution) assert grid.shape == (grid_resolution * grid_resolution, X.shape[1]) assert np.asarray(axes).shape == (2, grid_resolution) # n_unique_values < grid_resolution, will use actual values n_unique_values = 12 X[n_unique_values - 1:, 0] = 12345 rng.shuffle(X) # just to make sure the order is irrelevant grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution) assert grid.shape == (n_unique_values * grid_resolution, X.shape[1]) # axes is a list of arrays of different shapes assert axes[0].shape == (n_unique_values, ) assert axes[1].shape == (grid_resolution, )
def _grid(self, X, features, percentiles, grid_resolution): features_indices = np.asarray( _get_column_indices(X, features), dtype=np.int32, order='C' ).ravel() grid, values = _grid_from_X( _safe_indexing(X, features_indices, axis=1), percentiles, grid_resolution ) return grid, values
def partial_dependence(estimator, X, features): """Calculate the partial dependence of features. Partial dependence of a feature (or a set of features) corresponds to the average response of an estimator for each possible value of the feature. Code from sklearn's _partial_dependence.py. Note that this implementation always uses method="brute", grid_resolution=100 and percentiles=(0.05, 0.95). Parameters ---------- estimator : class Model estimator to use. X : pd.DataFrame Feature set used to generate a grid of values for the target features (where the partial dependence is evaluated), and also to generate values for the complement features. features : int, str or sequence The feature or pair of interacting features for which the partial dependency should be computed. Returns ------- averaged_predictions: np.ndarray Average of the predictions. values: list Values used for the predictions. """ grid, values = _grid_from_X(_safe_indexing(X, features, axis=1), (0.05, 0.95), 100) averaged_predictions, _ = _partial_dependence_brute( estimator, grid, features, X, "auto") # Reshape averaged_predictions to (n_outputs, n_values_feature_0, ...) averaged_predictions = averaged_predictions.reshape( -1, *[val.shape[0] for val in values]) return averaged_predictions, values
def test_grid_from_X_error(grid_resolution, percentiles, err_msg): X = np.asarray([[1, 2], [3, 4]]) with pytest.raises(ValueError, match=err_msg): _grid_from_X(X, grid_resolution=grid_resolution, percentiles=percentiles)