def test_BO(dim, obj_fun, ftarget, max_FEs, lb, ub, logfile): sys.path.insert(0, "../") sys.path.insert(0, "../../GaussianProcess") from BayesOpt import BO, DiscreteSpace, IntegerSpace, RandomForest, RealSpace from GaussianProcess import GaussianProcess from GaussianProcess.trend import constant_trend space = RealSpace([lb, ub]) * dim # kernel = 1.0 * Matern(length_scale=(1, 1), length_scale_bounds=(1e-10, 1e2)) # model = _GaussianProcessRegressor(kernel=kernel, alpha=0, n_restarts_optimizer=30, normalize_y=False) mean = constant_trend(dim, beta=0) # equivalent to Simple Kriging thetaL = 1e-5 * (ub - lb) * np.ones(dim) thetaU = 10 * (ub - lb) * np.ones(dim) theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL model = GaussianProcess( mean=mean, corr="matern", theta0=theta0, thetaL=thetaL, thetaU=thetaU, noise_estim=False, nugget=0, optimizer="BFGS", wait_iter=5, random_start=10 * dim, eval_budget=200 * dim, ) return BO( search_space=space, obj_fun=obj_fun, model=model, DoE_size=dim * 10, max_FEs=max_FEs, verbose=True, n_point=1, minimize=True, acquisition_fun="EI", ftarget=ftarget, logger=None, )
def create_optimizer(dim, fitness, n_step, n_init_sample, model_type): x1 = {'name' : "x1", 'type' : 'R', 'bounds': [-6, 6]} x2 = {'name' : "x2", 'type' : 'R', 'bounds': [-6, 6]} search_space = [x1, x2] if model_type == 'GP': thetaL = 1e-3 * (ub - lb) * np.ones(dim) thetaU = 10 * (ub - lb) * np.ones(dim) theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL mean = constant_trend(dim, beta=None) model = GaussianProcess(mean=mean, corr='matern', theta0=theta0, thetaL=thetaL, thetaU=thetaU, nugget=1e-5, noise_estim=False, random_start=15 * dim, likelihood='concentrated', random_state=None, eval_budget=100 * dim) elif model_type == 'sklearn-RF': min_samples_leaf = max(1, int(n_init_sample / 20.)) max_features = int(np.ceil(dim * 5 / 6.)) model = RandomForest(n_estimators=100, max_features=max_features, min_samples_leaf=min_samples_leaf) elif model_type == 'R-RF': model = RrandomForest() opt = mipego(search_space, fitness, model, max_iter=n_step, random_seed=None, n_init_sample=n_init_sample, minimize=True, optimizer='BFGS') return opt
np.random.seed(1) dim = 2 n_step = 20 n_init_sample = 10 obj_func = lambda x: benchmarks.himmelblau(x)[0] lb = np.array([-6] * dim) ub = np.array([6] * dim) search_space = ContinuousSpace(['x1', 'x2'], zip(lb, ub)) thetaL = 1e-3 * (ub - lb) * np.ones(dim) thetaU = 10 * (ub - lb) * np.ones(dim) theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL mean = constant_trend(dim, beta=None) model = GaussianProcess(mean=mean, corr='matern', theta0=theta0, thetaL=thetaL, thetaU=thetaU, nugget=None, noise_estim=False, optimizer='BFGS', wait_iter=5, random_start=15 * dim, likelihood='concentrated', eval_budget=100 * dim) # search_space = [x1, x2] opt = BayesOpt(search_space, obj_func, model, max_iter=n_step, random_seed=None,
def init_with_rh(self, data, **kwargs): X = np.atleast_2d([ Configuration(values=_[0], configuration_space=self.cs).get_array()\ for _ in data ]) y = np.array([_[1] for _ in data]) dim = X.shape[1] fopt = np.min(y) xopt = X[np.where(y == fopt)[0][0]] mean = constant_trend(dim, beta=None) # Simple Kriging thetaL = 1e-10 * np.ones(dim) thetaU = 10 * np.ones(dim) theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL model = GaussianProcess( mean=mean, corr='squared_exponential', theta0=theta0, thetaL=thetaL, thetaU=thetaU, nugget=1e-6, noise_estim=False, optimizer='BFGS', wait_iter=5, random_start=5 * dim, eval_budget=100 * dim ) model.fit(X, y) # obtain the Hessian and gradient from the GP mean surface H = model.Hessian(xopt) g = model.gradient(xopt)[0] w, B = np.linalg.eigh(H) w[w <= 0] = 1e-6 # replace the negative eigenvalues by a very small value w_min, w_max = np.min(w), np.max(w) # to avoid the conditional number gets too high cond_upper = 1e3 delta = (cond_upper * w_min - w_max) / (1 - cond_upper) w += delta # compute the upper bound for step-size M = np.diag(1 / np.sqrt(w)).dot(B.T) H_inv = B.dot(np.diag(1 / w)).dot(B.T) p = -1 * H_inv.dot(g).ravel() alpha = np.linalg.norm(p) if np.isnan(alpha): alpha = 1 H_inv = np.eye(dim) # use a backtracking line search to determine the initial step-size tau, c = 0.9, 1e-4 slope = np.inner(g.ravel(), p.ravel()) if slope > 0: # this should not happen.. p *= -1 slope *= -1 f = lambda x: model.predict(x) while True: _x = (xopt + alpha * p).reshape(1, -1) if f(_x) <= f(xopt.reshape(1, -1)) + c * alpha * slope: break alpha *= tau sigma0 = np.linalg.norm(M.dot(alpha * p)) / np.sqrt(dim - 0.5) self.Cov = H_inv self.sigma = self.sigma0 = sigma0 self._set_x0(xopt) self.mean = self.gp.geno( np.array(self.x0, copy=True), from_bounds=self.boundary_handler.inverse, copy=False ) self.mean0 = np.array(self.mean, copy=True) self.best = BestSolution(x=self.mean, f=fopt)
def __init__(self, max_iter=None, n_init_sample=None, eval_budget=None, n_random_start=None, n_point=None): """ Creates a hyperparameter optimizer :param fid: The function id (from bbob) :param dim: The dimension to run the bbob-problem in :param rep1: The configuration to run before the splitpoint :param rep2: The configuration to run after the splitpoint :param split_idx: The splitpoint-index at which to switch between rep1 and rep2 :param iids: The instances of the bbob-function to run :param num_reps: The amount of repetitions to run :param part_to_optimize: Which part of the adaptive configuration to optimize. Can be 1, 2 or -1. The -1 option optimizes both parts, with the same parameter values for both. To better optimize a complete configuration, first optimize part 1, then part 2 using the optimial value for part1 in the param_val argument. :param param_val: The parameter values for the part of the configuration which is not optimized here. :return: The result of the hyper-parameter optimzation """ self.params = [ 'c_1', 'c_c', 'c_mu' ] #, 'm1', 'm2', 'm3', 'm4', 'm5', 'm6', 'm7', 'm8', 'm9', 'm10', 'm11'] self.dim_hyperparams = len(self.params) if max_iter is None: self.max_iter = 1000 else: self.max_iter = max_iter if n_init_sample is None: self.n_init_sample = 250 else: self.n_init_sample = n_init_sample if eval_budget is None: self.eval_budget = 20 else: self.eval_budget = eval_budget if n_random_start is None: self.n_random_start = 5 else: self.n_random_start = n_random_start if n_point is None: self.n_point = 1 else: self.n_point = n_point self.lb = np.zeros((3, 1)) self.ub = [0.35, 1, 0.35] p1 = [[0, 1]] * 9 p1.append([0, 1, 2]) p1.append([0, 1, 2]) search_space_nominal = NominalSpace(p1) # search_space_discrete = OrdinalSpace(list(zip([0,0,0,0,0,0,0,0,0,0,0], [2,2,2,2,2,2,2,2,2,3,3]))) search_space_cont = ContinuousSpace( list(zip([0, 0, 0], [0.35, 1, 0.35]))) self.search_space = search_space_cont + search_space_nominal self.mean = constant_trend(self.dim_hyperparams, beta=0) # autocorrelation parameters of GPR self.thetaL = 1e-10 * (self.ub - self.lb) * np.ones( self.dim_hyperparams) self.thetaU = 2 * (self.ub - self.lb) * np.ones(self.dim_hyperparams) np.random.seed(0) self.theta0 = np.random.rand( self.dim_hyperparams) * (self.thetaU - self.thetaL) + self.thetaL
def __init__(self, params=None, max_iter=None, n_init_sample=None, ub=None, lb=None, eval_budget=None, n_random_start=None, part_to_optimize=1, param_vals=None, n_point=None): """ Creates a hyperparameter optimizer :param fid: The function id (from bbob) :param dim: The dimension to run the bbob-problem in :param rep1: The configuration to run before the splitpoint :param rep2: The configuration to run after the splitpoint :param split_idx: The splitpoint-index at which to switch between rep1 and rep2 :param iids: The instances of the bbob-function to run :param num_reps: The amount of repetitions to run :param part_to_optimize: Which part of the adaptive configuration to optimize. Can be 1, 2 or -1. The -1 option optimizes both parts, with the same parameter values for both. To better optimize a complete configuration, first optimize part 1, then part 2 using the optimial value for part1 in the param_val argument. :param param_val: The parameter values for the part of the configuration which is not optimized here. :return: The result of the hyper-parameter optimzation """ if params is None: self.params = ['c_1', 'c_c', 'c_mu'] else: self.params = params if param_vals is not None: self.param_vals = param_vals else: self.param_vals = None self.part_to_optimize = part_to_optimize self.dim_hyperparams = len(self.params) if max_iter is None: self.max_iter = 100 else: self.max_iter = max_iter if n_init_sample is None: self.n_init_sample = 20 else: self.n_init_sample = n_init_sample if eval_budget is None: self.eval_budget = 20 else: self.eval_budget = eval_budget if n_random_start is None: self.n_random_start = 5 else: self.n_random_start = n_random_start if n_point is None: self.n_point = 1 else: self.n_point = n_point self.set_bounds(lb, ub, params) if "lambda_" in self.params: print("Contains discrete variable (lambda_)") self.contains_discrete = True search_space_discrete = OrdinalSpace(list(zip([4], [250]))) search_space_cont = ContinuousSpace(list(zip(self.lb, self.ub))) self.search_space = search_space_cont + search_space_discrete self.lb = np.append(self.lb, [4]) self.ub = np.append(self.ub, [250]) else: self.contains_discrete = False self.search_space = ContinuousSpace(list(zip(self.lb, self.ub)), var_name=params) # trend function of GPR # this is a standard setting. no need to change self.mean = constant_trend(self.dim_hyperparams, beta=0) # autocorrelation parameters of GPR self.thetaL = 1e-10 * (self.ub - self.lb) * np.ones( self.dim_hyperparams) self.thetaU = 2 * (self.ub - self.lb) * np.ones(self.dim_hyperparams) np.random.seed(0) self.theta0 = np.random.rand( self.dim_hyperparams) * (self.thetaU - self.thetaL) + self.thetaL