def partial_dependency(trials, space, params=None, model="RandomForestRegressor", n_grid_points=10, n_samples=50, **kwargs): """ Calculates the partial dependency of parameters in a collection of :class:`Trial`. Parameters ---------- trials: DataFrame or dict A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict equivalent. space: Space object A space object from an experiment. params: list of str, optional The parameters to include in the computation. All parameters are included by default. model: str Name of the regression model to use. Can be one of - AdaBoostRegressor - BaggingRegressor - ExtraTreesRegressor - GradientBoostingRegressor - RandomForestRegressor (Default) n_grid_points: int Number of points in the grid to compute partial dependency. Default is 10. n_samples: int Number of samples to randomly generate the grid used to compute the partial dependency. Default is 50. **kwargs Arguments for the regressor model. Returns ------- dict Dictionary of DataFrames. Each combination of parameters as keys (dim1.name, dim2.name) and for each parameters individually (dim1.name). Columns are (dim1.name, dim2.name, objective) or (dim1.name, objective). """ params = flatten_params(space, params) flattened_space = build_required_space( space, dist_requirement="linear", type_requirement="numerical", shape_requirement="flattened", ) if trials.empty or trials.shape[0] == 0: return {} data = to_numpy(trials, space) data = flatten_numpy(data, flattened_space) model = train_regressor(model, data, **kwargs) data = flattened_space.sample(n_samples) data = pandas.DataFrame(data, columns=flattened_space.keys()) partial_dependencies = dict() for x_i, x_name in enumerate(params): grid, averages, stds = partial_dependency_grid(flattened_space, model, [x_name], data, n_grid_points) grid = reverse(flattened_space, grid) partial_dependencies[x_name] = (grid, averages, stds) for y_i in range(x_i + 1, len(params)): y_name = params[y_i] grid, averages, stds = partial_dependency_grid( flattened_space, model, [x_name, y_name], data, n_grid_points) grid = reverse(flattened_space, grid) partial_dependencies[(x_name, y_name)] = (grid, averages, stds) return partial_dependencies
def test_flattened_params(self, hspace): """Test selecting specific flattened params""" params = ["x[0]", "x[2]", "y[0,2]", "y[1,1]", "z"] assert flatten_params(hspace, params) == params
def test_top_params(self, hspace): """Test selecting all flattened keys of a parameter""" params = ["x", "y[0,2]", "y[1,1]", "z"] assert (flatten_params(hspace, params) == [f"x[{i}]" for i in range(3)] + params[1:])
def test_no_flatten(self, space, hspace): """Test selection of params not involving flattening""" assert flatten_params(space, ["x", "y"]) == ["x", "y"] assert flatten_params(hspace, ["z"]) == ["z"]
def test_unexisting_params(self, space): """Test that ValueError is raised if passing unexisting params""" with pytest.raises(ValueError) as exc: flatten_params(space, ["idoexistbelieveme!!!"]) assert exc.match( f"Parameter idoexistbelieveme!!! not contained in space: ")
def test_params_unchanged(self, space): """Test that params list passed is not modified""" params = ["x", "y"] flatten_params(space, params) assert params == ["x", "y"]
def test_flat_no_params(self, hspace, flat_params): """Test that all flattened params are returned if None""" assert flatten_params(hspace) == flat_params
def test_no_params(self, space, params): """Test that all params are returned if None""" assert flatten_params(space) == params