def _initialise_gp(self, gp, hyper_grid): """ Initialises the Gaussian process surrogate model. If `gp` is `None` uses the default kernel and Gaussian process: `kernel = sklearn.gaussian_process.kernels.Matern(nu=2.5)` `gp = sklearn.gaussian_process.GaussianProcessRegressor( kernel, alpha=1e-6, normalize_y=True, n_restarts_optimizer=5, random_state=self.generator)`, such that `random_state` is always set to the class generator. The data is always scaled using `sklearn.preprocessing.StandardScaler`. If `hyper_grid` is not `None` the best fit combination will be used as a surrogate model (calls `sklearn.model_selection.GridSearchCV`) with 5-fold cross-validation. Parameters ---------- gp : None or `sklearn.gaussian_process.GaussianProcessRegressor` Surrogate model Gaussian process. hyper_grid : None or dict of dictionaries Hyperparameter grid to be explored when fitting the Gaussian process. """ # Set up the Gaussian process, pipeline and grid search if gp is None: kernel = kernels.Matern(nu=2.5) gp = GaussianProcessRegressor(kernel, alpha=1e-6, normalize_y=True, n_restarts_optimizer=5, random_state=self.generator) elif not isinstance(gp, GaussianProcessRegressor): raise TypeError( "`gp` must be of {} type.".format(GaussianProcessRegressor)) else: # Always overwrite the random state gp.random_state = self.generator # Set up the pipeline to scale the data pipe = Pipeline([('scaler', StandardScaler()), ('gp', gp)]) # Optionally set the hyperparameter grid if hyper_grid is None: self._surrogate_model = pipe else: self._surrogate_model = GridSearchCV(pipe, hyper_grid, n_jobs=self.nthreads)