class Bohamiann(Optimizer): def __init__(self, config_space, burnin=3000, n_iters=10000): super(Bohamiann, self).__init__(sacred_space_to_configspace(config_space)) self.rng = np.random.RandomState(np.random.seed()) self.n_dims = len(self.config_space.get_hyperparameters()) # All inputs are mapped to be in [0, 1]^D self.lower = np.zeros([self.n_dims]) self.upper = np.ones([self.n_dims]) self.incumbents = [] self.X = None self.y = None self.model = BayesianNeuralNetwork(sampling_method="sghmc", l_rate=np.sqrt(1e-4), mdecay=0.05, burn_in=burnin, n_iters=n_iters, precondition=True, normalize_input=True, normalize_output=True) self.acquisition_func = LogEI(self.model) self.maximizer = Direct(self.acquisition_func, self.lower, self.upper, verbose=False) def suggest_configuration(self): if self.X is None and self.y is None: # No data points yet to train a model, just return a random configuration instead new_x = init_random_uniform(self.lower, self.upper, n_points=1, rng=self.rng)[0, :] else: # Train the model on all finished runs self.model.train(self.X, self.y) self.acquisition_func.update(self.model) # Maximize the acquisition function new_x = self.maximizer.maximize() # Maps from [0, 1]^D space back to original space next_config = Configuration(self.config_space, vector=new_x) # Transform to sacred configuration result = configspace_config_to_sacred(next_config) return result
def warmstart_mtbo(objective_function, lower, upper, observed_X, observed_y, n_tasks=2, num_iterations=30, model_type="gp_mcmc", target_task_id=1, burnin=100, chain_length=200, n_hypers=20, output_path=None, rng=None): """ Interface to MTBO[1] which uses an auxiliary cheaper task to warm start the optimization on new but similar task. Note here we only warmstart the optimization process, in case you want to speed up Bayesian optimization by evaluating on auxiliary task during the optimization check out mtbo() or fabolas(). [1] Multi-Task Bayesian Optimization K. Swersky and J. Snoek and R. Adams Proceedings of the 27th International Conference on Advances in Neural Information Processing Systems (NIPS'13) Parameters ---------- objective_function: function Objective function that will be optimized lower: np.array(D,) Lower bound of the input space upper: np.array(D,) Upper bound of the input space observed_X: np.array(N, D + 1) observed point from the auxiliary task. Make sure that the last dimension identifies the auxiliary task (default=0). We assume the main task to have the task id = 1 observed_y: np.array(N,) corresponding target values n_tasks: int Number of task target_task_id: int the id of the target task num_iterations: int Number of iterations chain_length : int The length of the MCMC chain for each walker. burnin : int The number of burnin steps before the actual MCMC sampling starts. output_path: string Specifies the path where the intermediate output after each iteration will be saved. If None no output will be saved to disk. rng: numpy.random.RandomState Random number generator Returns ------- dict with all results """ assert lower.shape[0] == upper.shape[ 0], "Dimension miss match between upper and lower bound" time_start = time.time() if rng is None: rng = np.random.RandomState(np.random.randint(0, 10000)) n_dims = lower.shape[0] # Bookkeeping time_func_eval = [] time_overhead = [] incumbents = [] incumbent_values = [] runtime = [] X = deepcopy(observed_X) y = deepcopy(observed_y) if model_type == "gp_mcmc": # Define model for the objective function cov_amp = 1 # Covariance amplitude kernel = cov_amp # ARD Kernel for the configuration space for d in range(n_dims): kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01, ndim=n_dims + 1, axes=d) task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks) kernel *= task_kernel # Take 3 times more samples than we have hyperparameters if n_hypers < 2 * len(kernel): n_hypers = 3 * len(kernel) if n_hypers % 2 == 1: n_hypers += 1 prior = MTBOPrior(len(kernel) + 1, n_ls=n_dims, n_kt=len(task_kernel), rng=rng) model_objective = MTBOGPMCMC(kernel, prior=prior, burnin_steps=burnin, chain_length=chain_length, n_hypers=n_hypers, lower=lower, upper=upper, rng=rng) elif model_type == "bohamiann": model_objective = WrapperBohamiannMultiTask(n_tasks=n_tasks) acquisition_func = LogEI(model_objective) # Optimize acquisition function only on the main task def wrapper(x): x_ = np.append(x, np.ones([x.shape[0], 1]) * target_task_id, axis=1) if y.shape[0] == init_points: eta = 0 else: eta = np.min(y[init_points:]) a = acquisition_func(x_, eta=eta) return a maximizer = DifferentialEvolution(wrapper, lower, upper) X = np.array(X) y = np.array(y) init_points = y.shape[0] for it in range(num_iterations): logger.info("Start iteration %d ... ", it) start_time = time.time() # Train models model_objective.train(X, y, do_optimize=True) # Maximize acquisition function acquisition_func.update(model_objective) new_x = maximizer.maximize() new_x = np.append(new_x, np.array([target_task_id])) time_overhead.append(time.time() - start_time) logger.info("Optimization overhead was %f seconds", time_overhead[-1]) # Evaluate the chosen configuration logger.info("Evaluate candidate %s", str(new_x)) start_time = time.time() new_y = objective_function(new_x[:-1], int(new_x[-1])) time_func_eval.append(time.time() - start_time) logger.info("Configuration achieved a performance of %f", new_y) logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1]) # Add new observation to the data X = np.concatenate((X, new_x[None, :]), axis=0) y = np.concatenate( (y, np.array([new_y])), axis=0) # Model the target function on a logarithmic scale # Estimate incumbent as the best observed value so far best_idx = np.argmin(y[init_points:]) + init_points incumbent = X[best_idx][:-1] incumbent_value = y[best_idx] incumbents.append(incumbent) incumbent_values.append(incumbent_value) logger.info("Current incumbent %s with estimated performance %f", str(incumbent), incumbent_value) runtime.append(time.time() - time_start) if output_path is not None: data = dict() data["optimization_overhead"] = time_overhead[it] data["runtime"] = runtime[it] data["incumbent"] = incumbents[it].tolist() data["time_func_eval"] = time_func_eval[it] data["iteration"] = it json.dump( data, open(os.path.join(output_path, "mtbo_iter_%d.json" % it), "w")) logger.info("Final incumbent %s with estimated performance %f", str(incumbent), incumbent_value) results = dict() results["x_opt"] = incumbent.tolist() results["incumbents"] = [inc.tolist() for inc in incumbents] results["runtime"] = runtime results["overhead"] = time_overhead results["time_func_eval"] = time_func_eval results["incumbent_values"] = incumbent_values results["X"] = X results["y"] = y return results