def bay_opt(self): # Initialize sample for i in range(self.niter): m52_1 = ConstantKernel(1) * RBF(np.array([100] * 12)) self.gpr_obj = GaussianProcessRegressor(kernel=m52_1, alpha=10, noise="gaussian") m52_2 = ConstantKernel(1) * RBF(np.array([100] * 12)) self.gpr_constraint = GaussianProcessRegressor(kernel=m52_2, alpha=10, noise="gaussian") # Update Gaussian process with existing samples #print(self.x.shape,self.y_obj.shape ) self.gpr_obj.fit(self.x, self.y_obj) #print(self.gpr_obj.predict(self.x)) self.gpr_constraint.fit(self.x, self.y_constraint) # Obtain next sampling point from the acquisition function (expected_improvement) X_next = self.propose_location() # Obtain next noisy sample from the objective function Y_next1 = np.array([self.obj_func(X_next)]).reshape(-1, 1) Y_next2 = np.array([self.constraint_func(X_next)]).reshape(-1, 1) #print(Y_next1, Y_next1.shape, Y_next2,Y_next2.shape) # Add sample to previous samples self.x = np.vstack((self.x, X_next)) self.y_obj = np.vstack((self.y_obj, Y_next1)) self.y_constraint = np.vstack((self.y_constraint, Y_next2)) idx = np.where(self.y_constraint > 0)[0] t = idx[np.argmin(self.y_obj[idx])] self.f_best = self.y_obj[t] self.min_x = self.x[t] return self.f_best, self.min_x
def mediator_triage(mediator_id, base_params_dict): # Mediator id 1 is the optimal if mediator_id == 1: return OptimalMediator(**base_params_dict) elif 2 <= mediator_id <= 4: # Poly Mediator Individual. Need to add an additional parameter for the degree of the polynomial. poly_mediator_params = base_params_dict.copy() poly_mediator_params['degree'] = mediator_id return PolyMediatorIndividual(**poly_mediator_params) elif mediator_id == 5: # Default Bayes Mediator. Does not take in any additional parameters. return BayesMediatorSocial(**base_params_dict) elif 6 <= mediator_id <= 8: # Polynomial Bayes Mediator, Social. base_mediator_params = base_params_dict.copy() base_mediator_params['base_estimator'] = \ GaussianProcessRegressor( kernel=Exponentiation(Sum(Product(ConstantKernel(), DotProduct()), ConstantKernel(1.0, (0.01, 1000.0))), float(mediator_id) - 4.0), normalize_y=True, noise="gaussian", n_restarts_optimizer=2) return BayesMediatorSocial(**base_mediator_params) elif mediator_id == 9: # GP Mediator Individual with default kernel, which is the same as that used by BayesMediator (Mattern kernel). return GPMediatorIndividual(**base_params_dict) elif mediator_id == 10: # GP Mediator Social with default kernel, which is the same as that used by BayesMediator (Mattern kernel). return GPMediatorSocial(**base_params_dict) elif 11 <= mediator_id <= 13: poly_mediator_params = base_params_dict.copy() poly_mediator_params['degree'] = mediator_id - 9 return PolyMediatorSocial(**poly_mediator_params) else: raise Exception('Unknown mediator with id = ', mediator_id)
def interpolate(thetas, z_thetas, xx, yy, method='linear', z_uncertainties_thetas=None, matern_exponent=0.5, length_scale_min=0.001, length_scale_default=1., length_scale_max=1000., noise_level=0.001, subtract_min=False): if method == 'cubic': interpolator = CloughTocher2DInterpolator(thetas[:], z_thetas) zz = interpolator(np.dstack((xx.flatten(), yy.flatten()))) zi = zz.reshape(xx.shape) elif method == 'gp': if z_uncertainties_thetas is not None: gp = GaussianProcessRegressor( normalize_y=True, kernel=ConstantKernel(1.0, (1.e-9, 1.e9)) * Matern( length_scale=[length_scale_default], length_scale_bounds=[(length_scale_min, length_scale_max)], nu=matern_exponent) + WhiteKernel(noise_level), n_restarts_optimizer=10, alpha=z_uncertainties_thetas) else: gp = GaussianProcessRegressor( normalize_y=True, kernel=ConstantKernel(1.0, (1.e-9, 1.e9)) * Matern( length_scale=length_scale_default, length_scale_bounds=(length_scale_min, length_scale_max), nu=matern_exponent) + WhiteKernel(noise_level), n_restarts_optimizer=10) gp.fit(thetas[:], z_thetas[:]) zz, _ = gp.predict(np.c_[xx.ravel(), yy.ravel()], return_std=True) zi = zz.reshape(xx.shape) elif method == 'linear': interpolator = LinearNDInterpolator(thetas[:], z_thetas) zz = interpolator(np.dstack((xx.flatten(), yy.flatten()))) zi = zz.reshape(xx.shape) else: raise ValueError mle = np.unravel_index(zi.argmin(), zi.shape) if subtract_min: zi -= zi[mle] return zi, mle
def init_model(self): """initializes the surrogate model of the gaussian process the model gets created with the right parameters, but is not fit with any data yet. the `base_model` will be cloned in `update_model` and fit with observation data """ # n_dims == n_hparams n_dims = len(self.searchspace.keys()) if self.interim_results: n_dims += 1 # add one dim for augumented budget cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5, ) base_model = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2, ) self.base_model = base_model
def setup_tuner(self): self.tunecfg = self.experiment["tuner"] self.parameters = list(self.tunecfg["parameters"].keys()) self.dimensions = self.parse_dimensions(self.tunecfg["parameters"]) self.space = normalize_dimensions(self.dimensions) self.priors = self.parse_priors(self.tunecfg["priors"]) self.kernel = ConstantKernel( constant_value=self.tunecfg.get("variance_value", 0.1**2), constant_value_bounds=tuple( self.tunecfg.get("variance_bounds", (0.01**2, 0.5**2))), ) * Matern( length_scale=self.tunecfg.get("length_scale_value", 0.3), length_scale_bounds=tuple( self.tunecfg.get("length_scale_bounds", (0.2, 0.8))), nu=2.5, ) self.opt = Optimizer( dimensions=self.dimensions, n_points=self.tunecfg.get("n_points", 1000), n_initial_points=self.tunecfg.get("n_initial_points", 5 * len(self.dimensions)), gp_kernel=self.kernel, gp_kwargs=dict(normalize_y=True), gp_priors=self.priors, acq_func=self.tunecfg.get("acq_func", "ts"), acq_func_kwargs=self.tunecfg.get( "acq_func_kwargs", None), # TODO: Check if this works for all parameters random_state=self.rng.randint(0, np.iinfo(np.int32).max), )
def cook_estimator(base_estimator, space=None, **kwargs): """Cook a default estimator. For the special base_estimator called "DUMMY" the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator. Parameters ---------- base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY" or sklearn regressor Should inherit from `sklearn.base.RegressorMixin`. In addition the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`` along with `E[Y | x]`. If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a surrogate model corresponding to the relevant `X_minimize` function is created. space : Space instance Has to be provided if the base_estimator is a gaussian process. Ignored otherwise. kwargs : dict Extra parameters provided to the base_estimator at init time. """ if isinstance(base_estimator, str): base_estimator = base_estimator.upper() if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY"]: raise ValueError("Valid strings for the base_estimator parameter " " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not " "%s." % base_estimator) elif not is_regressor(base_estimator): raise ValueError("base_estimator has to be a regressor.") if base_estimator == "GP": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) if ('n_jobs' in kwargs.keys()) and not hasattr(base_estimator, 'n_jobs'): del kwargs['n_jobs'] base_estimator.set_params(**kwargs) return base_estimator
def minimal_gp(request): kernel = ConstantKernel( constant_value=1 ** 2, constant_value_bounds=(0.01 ** 2, 1 ** 2) ) * RBF(length_scale=1.0, length_scale_bounds=(0.5, 1.5)) gp = BayesGPR( random_state=1, normalize_y=False, kernel=kernel, warp_inputs=request.param ) return gp
def __init__(self, dimensions_file: str, min_num_results_to_fit: int=8, lease_timout='2 days'): self.__all_experiments = pd.DataFrame() self.__all_experiments['status'] = [self.WAITING] * len(self.__all_experiments) self.__all_experiments['last_update'] = pd.Series(pd.Timestamp(float('NaN'))) self.__all_experiments['client'] = [""] * len(self.__all_experiments) self.__lease_duration = pd.to_timedelta(lease_timout) self.__leased_experiments = [] dims = self.__load_dimensions(dimensions_file) self.__dimension_names = list(dims.keys()) self.__dimensions = list(dims.values()) self.__min_num_results_to_fit = min_num_results_to_fit # Initialize dim_types = [check_dimension(d) for d in self.__dimensions] is_cat = all([isinstance(check_dimension(d), Categorical) for d in dim_types]) if is_cat: transformed_dims = [check_dimension(d, transform="identity") for d in self.__dimensions] else: transformed_dims = [] for dim_type, dim in zip(dim_types, self.__dimensions): if isinstance(dim_type, Categorical): transformed_dims.append(check_dimension(dim, transform="onehot")) # To make sure that GP operates in the [0, 1] space else: transformed_dims.append(check_dimension(dim, transform="normalize")) space = Space(transformed_dims) # Default GP cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) if is_cat: other_kernel = HammingKernel(length_scale=np.ones(space.transformed_n_dims)) acq_optimizer = "lbfgs" else: other_kernel = Matern( length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, random_state=None, alpha=0.0, noise='gaussian', n_restarts_optimizer=2) self.__opt = Optimizer(self.__dimensions, base_estimator, acq_optimizer="lbfgs", n_random_starts=100, acq_optimizer_kwargs=dict(n_points=10000))
def __init__(self, seq_len, embedder, Xinit, yinit, noise_std=None): self.seq_len = seq_len self.noise_std = noise_std self.current_best_seq = None self.current_best_val = np.inf self.X_sample = [] self.y_sample = [] self.nqueries = 0 m52 = ConstantKernel(1.0) * StringEmbedKernel(seq_len=seq_len, embedder=embedder) if noise_std is None: noise_std = np.std(yinit) gpr = GaussianProcessRegressor(kernel=m52, alpha=noise_std**2) gpr.fit(Xinit, yinit) self.gpr = gpr
def _get_gp_regressor(length_scale=1., nu=2.5, noise=0.1): """Creates the GaussianProcessRegressor model Args: length_scale (Union[float, list]): Length scale of the GP kernel. If float, it is the same for all dimensions, if array each element defines the length scale of the dimension nu (float): Controls the smoothness of the approximation. see https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.Matern.html Returns: A skopt.learning.GaussianProcessRegressor with the given parameters """ kernel = ConstantKernel(1.0) * Matern(length_scale=length_scale, nu=nu) return GaussianProcessRegressor(kernel=kernel, alpha=noise ** 2)
def cook_estimator(base_estimator, space=None, **kwargs): if isinstance(base_estimator, str): base_estimator = base_estimator.upper() allowed_estimators = ['GP', 'ET', 'RF', 'GBRT', 'DUMMY'] if base_estimator not in allowed_estimators: raise ValueError( 'invalid estimator, should be in {}, got {}'.format( allowed_estimators, base_estimator)) elif not is_regressor(base_estimator): raise ValueError('base estimator should be a regressor, got {}'.format( base_estimator)) if base_estimator == 'GP': if space is not None: # space = Space(space) space = Space(normalize_param_space(space)) n_params = space.transformed_n_params is_cat = space.is_categorical else: raise ValueError('expected a space instance, got None') cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_params)) else: other_kernel = Matern(length_scale=np.ones(n_params), length_scale_bounds=[(0.01, 100)] * n_params, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise='gaussian', n_restarts_optimizer=2) elif base_estimator == 'RF': base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == 'ET': base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == 'GRBT': grbt = GradientBoostingRegressor(n_estimators=30, loss='quantile') base_estimator = GradientBoostingQuantileRegressor(base_estimator=grbt) elif base_estimator == 'DUMMY': return None base_estimator.set_params(**kwargs) return base_estimator
def __init__(self, num_issues, X, y): BaseSurrogate.__init__(self, num_issues, X, y) # Instantiate a Gaussian Process model. # TODO. A question we need to investigate is what kernel we should be using? # TODO. For now, to have a valid comparison, I am using the same kernel used by the bayesian optimization. # TODO. Note that I am using the Kernels as given by the skopt library, with the same parameters. cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) other_kernel = Matern(length_scale=np.ones(num_issues), length_scale_bounds=[(0.01, 100)] * num_issues, nu=2.5) self.gp = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, n_restarts_optimizer=2, noise=0.000000001, random_state=np.random.mtrand._rand.randint( 0, np.iinfo(np.int32).max))
def test_guess_priors(): """Construct a complicated kernel and check if priors are constructed correctly.""" kernel = Exponentiation( ConstantKernel(constant_value_bounds="fixed") * Matern() + WhiteKernel() + RBF(length_scale=(1.0, 1.0)), 2.0, ) priors = guess_priors(kernel) assert len(priors) == 4 expected = [ -0.02116327824572739, -2.112906921232193, -0.02116327824572739, -0.02116327824572739, ] for p, v in zip(priors, expected): assert_almost_equal(p(-0.9), v)
def test_guess_priors(): """Construct a complicated kernel and check if priors are constructed correctly.""" kernel = Exponentiation( ConstantKernel(constant_value_bounds="fixed") * Matern() + WhiteKernel() + CompoundKernel([RBF(), Matern()]), 2.0, ) priors = guess_priors(kernel) assert len(priors) == 4 expected = [ -1.737085713764618, -4.107091211892862, -1.737085713764618, -1.737085713764618, ] for p, v in zip(priors, expected): assert_almost_equal(p(0.0), v)
def test_bayes_opt_base_estimator(): from skopt.learning import GaussianProcessRegressor from skopt.learning.gaussian_process.kernels import ConstantKernel from skopt.learning.gaussian_process.kernels import Matern cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) matern = Matern( length_scale=np.ones(2), length_scale_bounds=[(0.01, 100)] * 2, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * matern, normalize_y=True, random_state=0, alpha=0.0, noise="gaussian", n_restarts_optimizer=2) opt = SkOptOptimizer( dimensions=[(-1.0, 1.0), (-1.0, 1.0)], base_estimator=base_estimator, random_state=0) opt.init(2) params = np.empty(2) for _ in range(10): opt.get_next_parameters(params) feedback = [-np.linalg.norm(params - 0.5384 * np.ones(2))] opt.set_evaluation_feedback(feedback) assert_greater(opt.get_best_fitness(), -0.3)
def construct_default_kernel(dimensions): """Construct a Matern kernel as default kernel to be used in the optimizer. Parameters ---------- dimensions : list of dimensions Elements are skopt.space.Dimension instances (Real, Integer or Categorical) or any other valid value that defines skopt dimension (see skopt.Optimizer docs) Returns ------- kernel : kernel object The kernel specifying the covariance function of the GP used in the optimization. """ n_parameters = len(dimensions) kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.1, 2.0)) * Matern( length_scale=[0.3] * n_parameters, length_scale_bounds=(0.05, 1.0), nu=2.5) return kernel
from sklearn.base import clone from skopt import gp_minimize from skopt.learning import GaussianProcessRegressor from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern from bayesian_optimization_util import plot_approximation, plot_acquisition noise = 0.2 bounds = np.array([[-1.0, 2.0]]) def f(X, noise=noise): return -np.sin(3*X) - X**2 + 0.7*X + noise * np.random.randn(*X.shape) X = np.arange(bounds[:, 0], bounds[:, 1], 0.01).reshape(-1, 1) # Noise-free objective function values at X Y = f(X,0) # Use custom kernel and estimator to match previous example m52 = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) gpr = GaussianProcessRegressor(kernel=m52, alpha=noise**2) X_init = np.array([[-0.9], [1.1]]) Y_init = f(X_init) r = gp_minimize(lambda x: -f(np.array(x))[0], bounds.tolist(), base_estimator=gpr, acq_func='EI', # expected improvement xi=0.01, # exploitation-exploration trade-off n_calls=10, # number of iterations n_random_starts=0, # initial samples are provided x0=X_init.tolist(), # initial samples y0=-Y_init.ravel())
from skopt.learning.gaussian_process.kernels import Matern from skopt.learning.gaussian_process.kernels import RationalQuadratic from skopt.learning.gaussian_process.kernels import RBF from skopt.learning.gaussian_process.kernels import WhiteKernel KERNELS = [] for length_scale in [np.arange(1, 6), [0.2, 0.3, 0.5, 0.6, 0.1]]: KERNELS.extend([ RBF(length_scale=length_scale), Matern(length_scale=length_scale, nu=0.5), Matern(length_scale=length_scale, nu=1.5), Matern(length_scale=length_scale, nu=2.5), RationalQuadratic(alpha=2.0, length_scale=2.0), ExpSineSquared(length_scale=2.0, periodicity=3.0), ConstantKernel(constant_value=1.0), WhiteKernel(noise_level=2.0), Matern(length_scale=length_scale, nu=2.5)**3.0, RBF(length_scale=length_scale) + Matern(length_scale=length_scale, nu=1.5), RBF(length_scale=length_scale) * Matern(length_scale=length_scale, nu=1.5), DotProduct(sigma_0=2.0) ]) # Copied (shamelessly) from sklearn.gaussian_process.kernels def _approx_fprime(xk, f, epsilon, args=()): f0 = f(*((xk, ) + args)) grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float) ei = np.zeros((len(xk), ), float)
def cook_estimator(base_estimator, space=None, **kwargs): """Cook a default estimator For the special `base_estimator` called "DUMMY", the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator Parameters ---------- base_estimator: {SKLearn Regressor, "GP", "RF", "ET", "GBRT", "DUMMY"}, default="GP" If not string, should inherit from `sklearn.base.RegressorMixin`. In addition, the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`, along with `E[Y | x]`. If `base_estimator` is a string in {"GP", "RF", "ET", "GBRT", "DUMMY"}, a surrogate model corresponding to the relevant `X_minimize` function is created space: `hyperparameter_hunter.space.space_core.Space` Required only if the `base_estimator` is a Gaussian Process. Ignored otherwise **kwargs: Dict Extra parameters provided to the `base_estimator` at initialization time Returns ------- SKLearn Regressor Regressor instance cooked up according to `base_estimator` and `kwargs`""" #################### Validate `base_estimator` #################### str_estimators = ["GP", "ET", "RF", "GBRT", "DUMMY"] if isinstance(base_estimator, str): if base_estimator.upper() not in str_estimators: raise ValueError( f"Expected `base_estimator` in {str_estimators}. Got {base_estimator}" ) # Convert to upper after error check, so above error shows actual given `base_estimator` base_estimator = base_estimator.upper() elif not is_regressor(base_estimator): raise ValueError("`base_estimator` must be a regressor") #################### Get Cooking #################### if base_estimator == "GP": if space is not None: space = Space(space) # NOTE: Below `normalize_dimensions` is NOT an unnecessary duplicate of the call in # `Optimizer` - `Optimizer` calls `cook_estimator` before its `dimensions` have been # normalized, so `normalize_dimensions` must also be called here space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a `Space` instance, not None") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # Only special if *all* dimensions are `Categorical` if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2, ) elif base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt) elif base_estimator == "DUMMY": return None base_estimator.set_params(**kwargs) return base_estimator
def bayesian(X, Y): bds = [{ 'name': 'learning_rate', 'type': 'continuous', 'domain': (0.0, 1.0) }, { 'name': 'gamma', 'type': 'continuous', 'domain': (0.0, 5.0) }, { 'name': 'max_depth', 'type': 'discrete', 'domain': (1, 50) }, { 'name': 'n_estimators', 'type': 'discrete', 'domain': (1, 300) }, { 'name': 'min_child_weight', 'type': 'continuous', 'domain': (1.0, 100.) }, { 'name': 'colsample_bytree', 'type': 'continuous', 'domain': (0.1, 0.8) }, { 'name': 'subsample', 'type': 'continuous', 'domain': (0.1, 0.8) }] noise = 0 m52 = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5) gpr = GaussianProcessRegressor(kernel=m52, alpha=noise**2) def XGBRegressorFake(learning_rate, gamma, max_depth, n_estimators, min_child_weight, colsample_bytree, subsample): # print("hei") return learning_rate**2 def cv_score_fake(parameters): # parameters = parameters[0] return (parameters[0] - 0.5)**2 # Optimization objective def cv_score(parameters): # parameters = parameters[0] score = cross_val_score(XGBRegressor(learning_rate=parameters[0], gamma=int(parameters[1]), max_depth=int(parameters[2]), n_estimators=int(parameters[3]), min_child_weight=parameters[4], colsample_bytree=parameters[5], subsample=parameters[6]), X, Y, scoring='neg_mean_squared_error').mean() print(score) return score # optimizer = BayesianOptimization(f=cv_score, # domain=bds, # model_type='GP', # acquisition_type ='EI', # acquisition_jitter = 0.05, # exact_feval=True, # maximize=True) # On|ly 20 iterations because we have 5 initial random points boundaries_of_parameters = list(map(lambda x: x['domain'], bds)) print(boundaries_of_parameters) # sys.exit() opti_obj = gp_minimize( lambda x: cv_score(x), boundaries_of_parameters, base_estimator=gpr, acq_func='EI', # expected improvement xi=0.01, # exploitation-exploration trade-off n_calls=10, # number of iterations n_random_starts=4 # initial samples are provided ) return opti_obj
def minimal_gp(): kernel = (ConstantKernel(constant_value=1**2, constant_value_bounds=(0.01**2, 1**2)) * RBF(length_scale=1.0, length_scale_bounds=(0.5, 1.5))) gp = BayesGPR(random_state=1, normalize_y=False, kernel=kernel) return gp
def cook_estimator(base_estimator, space=None, **kwargs): """ Cook a default estimator. For the special base_estimator called "DUMMY" the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator. Parameters ---------- * `base_estimator` ["GP", "RF", "ET", "GBRT", "DUMMY" or sklearn regressor, default="GP"]: Should inherit from `sklearn.base.RegressorMixin`. In addition the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`` along with `E[Y | x]`. If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a surrogate model corresponding to the relevant `X_minimize` function is created. * `space` [Space instance]: Has to be provided if the base_estimator is a gaussian process. Ignored otherwise. * `kwargs` [dict]: Extra parameters provided to the base_estimator at init time. """ if isinstance(base_estimator, str): base_estimator = base_estimator.upper() if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY", "GPM32", "GPM1", "RBF", "RQ"]: raise ValueError("Valid strings for the base_estimator parameter " " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not " "%s." % base_estimator) elif not is_regressor(base_estimator): raise ValueError("base_estimator has to be a regressor.") if base_estimator == "GP": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "GPM32": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "GPM1": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RBF": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) other_kernel = RBF(length_scale=np.ones(n_dims)) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RQ": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) other_kernel = RationalQuadratic(length_scale=np.ones(n_dims), alpha=0.1) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt) elif base_estimator == "DUMMY": return None base_estimator.set_params(**kwargs) return base_estimator
from skopt.learning import GaussianProcessRegressor from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern # Gaussian process with Matérn kernel as surrogate model from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic, ExpSineSquared, DotProduct, ConstantKernel) kernels = [ 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 10.0)), 1.0 * RationalQuadratic(length_scale=1.0, alpha=0.1), 1.0 * ExpSineSquared(length_scale=1.0, periodicity=3.0, length_scale_bounds=(0.1, 10.0), periodicity_bounds=(1.0, 10.0)), ConstantKernel(0.1, (0.01, 10.0)) * (DotProduct(sigma_0=1.0, sigma_0_bounds=(0.1, 10.0))**2), 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 10.0), nu=2.5) ] ############################################################################# for kernel in kernels: gpr = GaussianProcessRegressor(kernel=kernel, alpha=noise_level**2, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) opt = Optimizer([(-2.0, 2.0)], base_estimator=gpr, n_initial_points=5,
def __init__(self, hyper_param_conf, command, expdir, exp_recipe_dir, recipe, computing, exp_proposal_watch_dir=None): base_estimator = 'GP' self.hyper_param_conf = hyper_param_conf self.command = command self.expdir = expdir self.exp_recipe_dir = exp_recipe_dir self.recipe = recipe self.computing = computing # read the hyper parameter file hyper_param_cfg = configparser.ConfigParser() hyper_param_cfg.read(hyper_param_conf) hyper_info = dict(hyper_param_cfg.items('info')) self.hyper_param_names = hyper_info['hyper_params'].split(' ') self.num_iters = int(hyper_info['num_iters']) self.n_initial_points = int(hyper_info['n_initial_points']) self.n_initial_points_to_start = int( hyper_info['n_initial_points_to_start']) self.max_parallel_jobs = int(hyper_info['max_parallel_jobs']) self.selected_segment_length = hyper_info['segment_length'] self.selected_task = hyper_info['task'] if 'adapt_hyper_param' in hyper_info: self.adapt_param = { 'param_name': hyper_info['adapt_hyper_param'], 'param_thr': int(hyper_info['param_thr']), 'par_cnt_scheme': hyper_info['par_cnt_scheme'] } else: self.adapt_param = None hyper_param_dict = dict() skopt_dims = [] for par_name in self.hyper_param_names: par_dict = dict(hyper_param_cfg.items(par_name)) par_type = par_dict['type'] if par_type == 'Integer': skopt_dim = skopt_space.Integer(low=int(par_dict['min']), high=int(par_dict['max']), name=par_name) elif par_type == 'Real': skopt_dim = skopt_space.Real(low=float(par_dict['min']), high=float(par_dict['max']), name=par_name) elif par_type == 'Categorical': skopt_dim = skopt_space.Categorical( categories=par_dict['categories'].split(' '), name=par_name) else: raise ValueError('Type %s is not a valid parameter type' % par_type) hyper_param_dict[par_name] = par_dict skopt_dims.append(skopt_dim) self.hyper_param_dict = hyper_param_dict self.skopt_dims = skopt_dims self.last_result = None # self.all_results = [] self.start_new_run_flag = True self.iter_ind = 0 self.watch_list = dict() self.all_dim_values = [] self.all_losses = dict() self.n_job_running = 0 self.n_initial_points_started = 0 self.n_unsuitable_points_for_estimator = 0 self.max_n_unsuitable_points_for_estimator = 10000 self.unsuitable_runs = [] self.lost_runs = [] self.exp_proposal_watch_dir = exp_proposal_watch_dir self.use_proposal_run = False self.proposed_loss_runs = [] # only 0.25% of the point sample in the hyper space are wanted (since they lead to rougly the wanted amount of # trainable parameters) self.acq_optimizer_kwargs = {'n_points': 4000000} if 'debug' in expdir: self.acq_optimizer_kwargs = {'n_points': 40000} if base_estimator == 'boundedGP': # Make own estimator based on Gaussian Process Regressor. if skopt_dims is not None: space = Space(skopt_dims) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = BoundedGaussianProcessRegressor( space, self.hyper_param_names, self.adapt_param, kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) super(HyperParamOptimizer, self).__init__(skopt_dims, base_estimator=base_estimator, n_initial_points=self.n_initial_points, acq_optimizer_kwargs=self.acq_optimizer_kwargs)
def __init__(self, dimensions, base_estimator="GP", maximize=True, n_random_starts=10, acq_func="LCB", acq_optimizer="lbfgs", random_state=None, n_points=10000, n_restarts_optimizer=5, xi=0.01, kappa=1.96, n_jobs=1): if not skopt_available: raise ImportError("skopt is not installed correctly") self.maximize = maximize self.n_params = len(dimensions) rng = check_random_state(random_state) if isinstance(base_estimator, str): if base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3, n_jobs=n_jobs, random_state=rng) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3, n_jobs=n_jobs, random_state=rng) elif base_estimator == "GP": cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) matern = Matern(length_scale=np.ones(len(dimensions)), length_scale_bounds=[(0.01, 100)] * len(dimensions), nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * matern, normalize_y=True, random_state=rng, alpha=0.0, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor( base_estimator=gbrt, n_jobs=n_jobs, random_state=rng) else: raise ValueError( "Valid strings for the base_estimator parameter" " are: 'RF', 'ET', or 'GP', not '%s'" % base_estimator) acq_func_kwargs = {"xi": xi, "kappa": kappa} acq_optimizer_kwargs = { "n_points": n_points, "n_restarts_optimizer": n_restarts_optimizer, "n_jobs": n_jobs } self.optimizer = _SkOptOptimizer( dimensions=dimensions, base_estimator=base_estimator, n_initial_points=n_random_starts, acq_func=acq_func, acq_optimizer=acq_optimizer, random_state=random_state, acq_func_kwargs=acq_func_kwargs, acq_optimizer_kwargs=acq_optimizer_kwargs)
# from skopt.learning.gaussian_process.kernels import ConstantKernel, RBF from skopt.learning.gaussian_process.kernels import ConstantKernel, Matern # from skopt.plots import plot_convergence import numpy as np import pandas as pd import matplotlib.pyplot as plt from skopt.sampler import Lhs from skopt.space import Space from skopt import dump, load from QTransport_cls import OpenQT, NCohQT from QTdata import BO_data_write job = 'w_config' # Optimization Parameters # rbf = ConstantKernel(1.0) * RBF(length_scale_bounds=(0.01, 0.5)) # .107 m52 = ConstantKernel(1.0) * Matern(nu=2.5, length_scale_bounds=(0.01, 0.5)) gpr = GaussianProcessRegressor(kernel=m52, n_restarts_optimizer=2) num_init = 50 num_itr = 200 T = np.pi / (2 * 0.125) dim = (7, 3) # dimension of the problem s, d = dim w_bound = [(0.125, 12.5)] * (s - 2) # for w as the unit of V=0.125 x_bound = [(-0.9999, 0.9999)] * ((s - 2) * d) bound = x_bound + w_bound inv_G_ls = [0.35938137 * T] #[2.58*10**(-3)*T] repeat = 1 for G, inv_G in enumerate(inv_G_ls): y = lambda site_w: OpenQT(s, d,
# TODO: data structure for bounds are inconsistent among multiple mediators. Should have the same data structure everywhere. lower_bounds = [ float(scenario.getroot()[0][i].attrib['lowerbound']) for i in range(0, num_issues) ] upper_bounds = [ float(scenario.getroot()[0][i].attrib['upperbound']) for i in range(0, num_issues) ] # Run the mediator num_init_random_points = 1 num_random_restarts = 5 base_estimator = GaussianProcessRegressor(kernel=Exponentiation( Sum(Product(ConstantKernel(), DotProduct()), ConstantKernel(1.0, (0.01, 1000.0))), 2.0), normalize_y=True, noise="gaussian", n_restarts_optimizer=2) base_estimator = None bayes_mediator_social = BayesMediatorSocial( num_issues=num_issues, num_agents=2, u_funcs=u_funcs, lower_bounds=lower_bounds, upper_bounds=upper_bounds, num_init_random_points=num_init_random_points, num_random_restarts=num_random_restarts, base_estimator=base_estimator, plot_mediator=num_issues == 1,
first_loop_legal_upper_bounds = [i for i in range(3, 301, 3)] #second_loop_legal_upper_bounds = [i // 3 * 4 for i in first_loop_legal_upper_bounds] #space_size = len(first_loop_legal_upper_bounds) #total_amount_of_inner_part = [first_loop_legal_upper_bounds[i] * second_loop_legal_upper_bounds[i] \ # for i in range(space_size)] def crop_number(n): return min(first_loop_legal_upper_bounds, key=lambda t: abs(t - n)) ### # In[ ]: kernel = Product(ConstantKernel(1), RBF(1)) + ConstantKernel(1) model = GaussianProcessRegressor(alpha=0, normalize_y=True, noise='gaussian', n_restarts_optimizer=10, kernel=kernel) optimizer = Optimizer([[low_constraint, high_constraint]] * dim, model, n_initial_points=n_initial_points, acq_func='EI', acq_optimizer='lbfgs', random_state=None) # In[ ]:
class GaussianProcessRegressor(sk_GaussianProcessRegressor): """ GaussianProcessRegressor that allows noise tunability. The implementation is based on Algorithm 2.1 of Gaussian Processes for Machine Learning (GPML) by Rasmussen and Williams. In addition to standard scikit-learn estimator API, GaussianProcessRegressor: * allows prediction without prior fitting (based on the GP prior); * provides an additional method sample_y(X), which evaluates samples drawn from the GPR (prior or posterior) at given inputs; * exposes a method log_marginal_likelihood(theta), which can be used externally for other ways of selecting hyperparameters, e.g., via Markov chain Monte Carlo. Parameters ---------- kernel : kernel object The kernel specifying the covariance function of the GP. If None is passed, the kernel "1.0 * RBF(1.0)" is used as default. Note that the kernel's hyperparameters are optimized during fitting. alpha : float or array-like, optional (default: 1e-10) Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to increased noise level in the observations and reduce potential numerical issue during fitting. If an array is passed, it must have the same number of entries as the data used for fitting and is used as datapoint-dependent noise level. Note that this is equivalent to adding a WhiteKernel with c=alpha. Allowing to specify the noise level directly as a parameter is mainly for convenience and for consistency with Ridge. optimizer : string or callable, optional (default: "fmin_l_bfgs_b") Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature:: def optimizer(obj_func, initial_theta, bounds): # * 'obj_func' is the objective function to be maximized, which # takes the hyperparameters theta as parameter and an # optional flag eval_gradient, which determines if the # gradient is returned additionally to the function value # * 'initial_theta': the initial value for theta, which can be # used by local optimizers # * 'bounds': the bounds on the values of theta .... # Returned are the best found hyperparameters theta and # the corresponding value of the target function. return theta_opt, func_min Per default, the 'fmin_l_bfgs_b' algorithm from scipy.optimize is used. If None is passed, the kernel's parameters are kept fixed. Available internal optimizers are:: 'fmin_l_bfgs_b' n_restarts_optimizer : int, optional (default: 0) The number of restarts of the optimizer for finding the kernel's parameters which maximize the log-marginal likelihood. The first run of the optimizer is performed from the kernel's initial parameters, the remaining ones (if any) from thetas sampled log-uniform randomly from the space of allowed theta-values. If greater than 0, all bounds must be finite. Note that n_restarts_optimizer == 0 implies that one run is performed. normalize_y : boolean, optional (default: False) Whether the target values y are normalized, i.e., the mean of the observed target values become zero. This parameter should be set to True if the target values' mean is expected to differ considerable from zero. When enabled, the normalization effectively modifies the GP's prior based on the data, which contradicts the likelihood principle; normalization is thus disabled per default. copy_X_train : bool, optional (default: True) If True, a persistent copy of the training data is stored in the object. Otherwise, just a reference to the training data is stored, which might cause predictions to change if the data is modified externally. random_state : integer or numpy.RandomState, optional The generator used to initialize the centers. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. noise : string, "gaussian", optional If set to "gaussian", then it is assumed that `y` is a noisy estimate of `f(x)` where the noise is gaussian. Attributes ---------- X_train_ : array-like, shape = (n_samples, n_features) Feature values in training data (also required for prediction) y_train_ : array-like, shape = (n_samples, [n_output_dims]) Target values in training data (also required for prediction) kernel_ kernel object The kernel used for prediction. The structure of the kernel is the same as the one passed as parameter but with optimized hyperparameters L_ : array-like, shape = (n_samples, n_samples) Lower-triangular Cholesky decomposition of the kernel in ``X_train_`` alpha_ : array-like, shape = (n_samples,) Dual coefficients of training data points in kernel space log_marginal_likelihood_value_ : float The log-marginal-likelihood of ``self.kernel_.theta`` noise_ : float Estimate of the gaussian noise. Useful only when noise is set to "gaussian". """ def __init__(self, kernel=None, alpha=1e-10, optimizer="fmin_l_bfgs_b", n_restarts_optimizer=0, normalize_y=False, copy_X_train=True, random_state=None, noise=None): self.noise = noise if isinstance(self.noise, str) and self.noise != "gaussian": raise ValueError("expected noise to be 'gaussian', got %s" % self.noise) super(GaussianProcessRegressor, self).__init__(kernel=kernel, alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, normalize_y=normalize_y, copy_X_train=copy_X_train, random_state=random_state) def fit(self, X, y): """Fit Gaussian process regression model. Parameters ---------- X : array-like, shape = (n_samples, n_features) Training data y : array-like, shape = (n_samples, [n_output_dims]) Target values Returns ------- self Returns an instance of self. """ if self.kernel is None: self.kernel = ConstantKernel(1.0, constant_value_bounds="fixed") \ * RBF(1.0, length_scale_bounds="fixed") if self.noise and not _param_for_white_kernel_in_Sum(self.kernel)[0]: if self.noise == "gaussian": self.kernel = self.kernel + WhiteKernel() else: self.kernel = self.kernel + WhiteKernel( noise_level=self.noise, noise_level_bounds="fixed") super(GaussianProcessRegressor, self).fit(X, y) self.noise_ = None if self.noise: # The noise component of this kernel should be set to zero # while estimating K(X_test, X_test) # Note that the term K(X, X) should include the noise but # this (K(X, X))^-1y is precomputed as the attribute `alpha_`. # (Notice the underscore). # This has been described in Eq 2.24 of # http://www.gaussianprocess.org/gpml/chapters/RW2.pdf # Hence this hack if isinstance(self.kernel_, WhiteKernel): self.kernel_.set_params(noise_level=0.0) else: white_present, white_param = _param_for_white_kernel_in_Sum( self.kernel_) # This should always be true. Just in case. if white_present: noise_kernel = self.kernel_.get_params()[white_param] self.noise_ = noise_kernel.noise_level self.kernel_.set_params( **{white_param: WhiteKernel(noise_level=0.0)}) # Precompute arrays needed at prediction L_inv = solve_triangular(self.L_.T, np.eye(self.L_.shape[0])) self.K_inv_ = L_inv.dot(L_inv.T) # Fix deprecation warning #462 if int(sklearn.__version__[2:4]) >= 23: self.y_train_std_ = self._y_train_std self.y_train_mean_ = self._y_train_mean elif int(sklearn.__version__[2:4]) >= 19: self.y_train_mean_ = self._y_train_mean self.y_train_std_ = 1 else: self.y_train_mean_ = self.y_train_mean self.y_train_std_ = 1 return self def predict(self, X, return_std=False, return_cov=False, return_mean_grad=False, return_std_grad=False): """ Predict output for X. In addition to the mean of the predictive distribution, also its standard deviation (return_std=True) or covariance (return_cov=True), the gradient of the mean and the standard-deviation with respect to X can be optionally provided. Parameters ---------- X : array-like, shape = (n_samples, n_features) Query points where the GP is evaluated. return_std : bool, default: False If True, the standard-deviation of the predictive distribution at the query points is returned along with the mean. return_cov : bool, default: False If True, the covariance of the joint predictive distribution at the query points is returned along with the mean. return_mean_grad : bool, default: False Whether or not to return the gradient of the mean. Only valid when X is a single point. return_std_grad : bool, default: False Whether or not to return the gradient of the std. Only valid when X is a single point. Returns ------- y_mean : array, shape = (n_samples, [n_output_dims]) Mean of predictive distribution a query points y_std : array, shape = (n_samples,), optional Standard deviation of predictive distribution at query points. Only returned when return_std is True. y_cov : array, shape = (n_samples, n_samples), optional Covariance of joint predictive distribution a query points. Only returned when return_cov is True. y_mean_grad : shape = (n_samples, n_features) The gradient of the predicted mean y_std_grad : shape = (n_samples, n_features) The gradient of the predicted std. """ if return_std and return_cov: raise RuntimeError( "Not returning standard deviation of predictions when " "returning full covariance.") if return_std_grad and not return_std: raise ValueError("Not returning std_gradient without returning " "the std.") X = check_array(X) if X.shape[0] != 1 and (return_mean_grad or return_std_grad): raise ValueError("Not implemented for n_samples > 1") if not hasattr(self, "X_train_"): # Not fit; predict based on GP prior y_mean = np.zeros(X.shape[0]) if return_cov: y_cov = self.kernel(X) return y_mean, y_cov elif return_std: y_var = self.kernel.diag(X) return y_mean, np.sqrt(y_var) else: return y_mean else: # Predict based on GP posterior K_trans = self.kernel_(X, self.X_train_) y_mean = K_trans.dot(self.alpha_) # Line 4 (y_mean = f_star) # undo normalisation y_mean = self.y_train_std_ * y_mean + self.y_train_mean_ if return_cov: v = cho_solve((self.L_, True), K_trans.T) # Line 5 y_cov = self.kernel_(X) - K_trans.dot(v) # Line 6 # undo normalisation y_cov = y_cov * self.y_train_std_**2 return y_mean, y_cov elif return_std: K_inv = self.K_inv_ # Compute variance of predictive distribution y_var = self.kernel_.diag(X) y_var -= np.einsum("ki,kj,ij->k", K_trans, K_trans, K_inv) # Check if any of the variances is negative because of # numerical issues. If yes: set the variance to 0. y_var_negative = y_var < 0 if np.any(y_var_negative): warnings.warn("Predicted variances smaller than 0. " "Setting those variances to 0.") y_var[y_var_negative] = 0.0 # undo normalisation y_var = y_var * self.y_train_std_**2 y_std = np.sqrt(y_var) if return_mean_grad: grad = self.kernel_.gradient_x(X[0], self.X_train_) grad_mean = np.dot(grad.T, self.alpha_) # undo normalisation grad_mean = grad_mean * self.y_train_std_ if return_std_grad: grad_std = np.zeros(X.shape[1]) if not np.allclose(y_std, grad_std): grad_std = -np.dot(K_trans, np.dot(K_inv, grad))[0] / y_std # undo normalisation grad_std = grad_std * self.y_train_std_**2 return y_mean, y_std, grad_mean, grad_std if return_std: return y_mean, y_std, grad_mean else: return y_mean, grad_mean else: if return_std: return y_mean, y_std else: return y_mean
import numpy as np from skopt import Optimizer from skopt.learning import GaussianProcessRegressor from skopt.learning.gaussian_process.kernels import ConstantKernel from skopt.learning.gaussian_process.kernels import Matern, WhiteKernel #cov_amplitude = ConstantKernel(1.0, (0.01, 5.0)) cov_amplitude = ConstantKernel(1.0, "fixed") other_kernel = Matern( length_scale=np.ones(1), length_scale_bounds=[(0.3, 10)], nu=2.5) white_kernel = WhiteKernel() gp = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel + white_kernel, normalize_y=True, alpha=0.0, noise=10e-7, n_restarts_optimizer=2) def get_optimizer(range,nrandom): return Optimizer(dimensions=[range], base_estimator=gp, n_random_starts=nrandom)