def test_constrained_q_expected_hypervolume_improvement(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} ref_point = [0.0, 0.0] t_ref_point = torch.tensor(ref_point, **tkwargs) pareto_Y = torch.tensor( [[4.0, 5.0], [5.0, 5.0], [8.5, 3.5], [8.5, 3.0], [9.0, 1.0]], **tkwargs) partitioning = NondominatedPartitioning(ref_point=t_ref_point) partitioning.update(Y=pareto_Y) # test q=1 # the event shape is `b x q x m` = 1 x 1 x 2 samples = torch.tensor([[[6.5, 4.5]]], **tkwargs) mm = MockModel(MockPosterior(samples=samples)) sampler = IIDNormalSampler(num_samples=1) X = torch.zeros(1, 1, **tkwargs) # test zero slack for eta in (1e-1, 1e-2): acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, constraints=[lambda Z: torch.zeros_like(Z[..., -1])], eta=eta, ) res = acqf(X) self.assertAlmostEqual(res.item(), 0.5 * 1.5, places=4) # test feasible acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, constraints=[lambda Z: -100.0 * torch.ones_like(Z[..., -1])], eta=1e-3, ) res = acqf(X) self.assertAlmostEqual(res.item(), 1.5, places=4) # test infeasible acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, constraints=[lambda Z: 100.0 * torch.ones_like(Z[..., -1])], eta=1e-3, ) res = acqf(X) self.assertAlmostEqual(res.item(), 0.0, places=4)
def _torch_optimize_qehvi_and_get_observation(self): torch_anti_ideal_point = torch.tensor( self._transformed_anti_ideal_point, dtype=torch.double) qehvi_partitioning = NondominatedPartitioning( ref_point=torch_anti_ideal_point, Y=torch.stack(self._torch_model.train_targets, dim=1)) qehvi_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) self._acquisition = qExpectedHypervolumeImprovement( model=self._torch_model, ref_point=self._transformed_anti_ideal_point, partitioning=qehvi_partitioning, sampler=qehvi_sampler) # these options all come from the tutorial # and likely need a serious review candidates, _ = optimize_acqf( acq_function=self._acquisition, bounds=self._botorch_domain, q=BATCH_SIZE, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) # is unnormalize necessary here? # we are providing the same bounds here and in optimizer new_x = unnormalize(candidates.detach(), bounds=self._botorch_domain) transformed_eps, transformed_err = self._optimization_handler(new_x) return new_x, transformed_eps, transformed_err
def optimize_qehvi_and_get_observation(model, train_obj, sampler): """Optimizes the qEHVI acquisition function, and returns a new candidate and observation.""" # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning(ref_point=problem.ref_point, Y=train_obj) acq_func = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist(), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize candidates, _ = optimize_acqf( acq_function=acq_func, bounds=standard_bounds, q=BATCH_SIZE, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) # observe new values new_x = unnormalize(candidates.detach(), bounds=problem.bounds) new_obj = problem(new_x) return new_x, new_obj
def test_q_expected_hypervolume_improvement(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): ref_point = [0.0, 0.0] tkwargs["dtype"] = dtype pareto_Y = torch.tensor( [[4.0, 5.0], [5.0, 5.0], [8.5, 3.5], [8.5, 3.0], [9.0, 1.0]], **tkwargs) partitioning = NondominatedPartitioning(num_outcomes=2) # the event shape is `b x q x m` = 1 x 1 x 2 samples = torch.zeros(1, 1, 2, **tkwargs) mm = MockModel(MockPosterior(samples=samples)) # test error if there is not pareto_Y initialized in partitioning with self.assertRaises(BotorchError): qExpectedHypervolumeImprovement(model=mm, ref_point=ref_point, partitioning=partitioning) partitioning.update(Y=pareto_Y) # test error if ref point has wrong shape with self.assertRaises(ValueError): qExpectedHypervolumeImprovement(model=mm, ref_point=ref_point[:1], partitioning=partitioning) X = torch.zeros(1, 1, **tkwargs) # basic test sampler = IIDNormalSampler(num_samples=1) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) # check ref point self.assertTrue( torch.equal(acqf.ref_point, torch.tensor(ref_point, **tkwargs))) # check cached indices self.assertTrue(hasattr(acqf, "q_subset_indices")) self.assertIn("q_choose_1", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_1"], torch.tensor([[0]], device=self.device), )) # test q=2 X2 = torch.zeros(2, 1, **tkwargs) samples2 = torch.zeros(1, 2, 2, **tkwargs) mm2 = MockModel(MockPosterior(samples=samples2)) acqf.model = mm2 res = acqf(X2) self.assertEqual(res.item(), 0.0) # check cached indices self.assertTrue(hasattr(acqf, "q_subset_indices")) self.assertIn("q_choose_1", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_1"], torch.tensor([[0], [1]], device=self.device), )) self.assertIn("q_choose_2", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_2"], torch.tensor([[0, 1]], device=self.device), )) self.assertNotIn("q_choose_3", acqf.q_subset_indices) # now back to 1 and sure all caches were cleared acqf.model = mm res = acqf(X) self.assertNotIn("q_choose_2", acqf.q_subset_indices) self.assertIn("q_choose_1", acqf.q_subset_indices) self.assertTrue( torch.equal( acqf.q_subset_indices["q_choose_1"], torch.tensor([[0]], device=self.device), )) X = torch.zeros(1, 1, **tkwargs) samples = torch.zeros(1, 1, 2, **tkwargs) mm = MockModel(MockPosterior(samples=samples)) # basic test, no resample sampler = IIDNormalSampler(num_samples=2, seed=12345) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 2])) bs = acqf.sampler.base_samples.clone() res = acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, no resample sampler = SobolQMCNormalSampler(num_samples=2) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 2])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertTrue(torch.equal(acqf.sampler.base_samples, bs)) # basic test, qmc, resample sampler = SobolQMCNormalSampler(num_samples=2, resample=True) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 2])) bs = acqf.sampler.base_samples.clone() acqf(X) self.assertFalse(torch.equal(acqf.sampler.base_samples, bs)) # basic test for X_pending and warning acqf.set_X_pending() self.assertIsNone(acqf.X_pending) acqf.set_X_pending(None) self.assertIsNone(acqf.X_pending) acqf.set_X_pending(X) self.assertEqual(acqf.X_pending, X) res = acqf(X) X2 = torch.zeros(1, 1, 1, requires_grad=True, **tkwargs) with warnings.catch_warnings( record=True) as ws, settings.debug(True): acqf.set_X_pending(X2) self.assertEqual(acqf.X_pending, X2) self.assertEqual(len(ws), 1) self.assertTrue(issubclass(ws[-1].category, BotorchWarning)) # test objective acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=IdentityMCMultiOutputObjective(), ) res = acqf(X) self.assertEqual(res.item(), 0.0) # Test that the hypervolume improvement is correct for given sample # test q = 1 X = torch.zeros(1, 1, **tkwargs) # basic test samples = torch.tensor([[[6.5, 4.5]]], **tkwargs) mm = MockModel(MockPosterior(samples=samples)) sampler = IIDNormalSampler(num_samples=1) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 1.5) # test q = 1, does not contribute samples = torch.tensor([0.0, 1.0], **tkwargs).view(1, 1, 2) sampler = IIDNormalSampler(1) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 0.0) # test q = 2, both points contribute X = torch.zeros(2, 1, **tkwargs) samples = torch.tensor([[6.5, 4.5], [7.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 1.75) # test q = 2, only 1 point contributes samples = torch.tensor([[6.5, 4.5], [6.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 1.5) # test q = 2, neither contributes samples = torch.tensor([[2.0, 2.0], [0.0, 0.1]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 0.0) # test q = 2, test point better than current best second objective samples = torch.tensor([[6.5, 4.5], [6.0, 6.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf.model = mm res = acqf(X) self.assertEqual(res.item(), 8.0) # test q = 2, test point better than current-best first objective samples = torch.tensor([[6.5, 4.5], [9.0, 2.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 2.0) # test q = 3, all contribute X = torch.zeros(3, 1, **tkwargs) samples = torch.tensor([[6.5, 4.5], [9.0, 2.0], [7.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 2.25) # test q = 3, not all contribute samples = torch.tensor([[6.5, 4.5], [9.0, 2.0], [7.0, 5.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 3.5) # test q = 3, none contribute samples = torch.tensor([[0.0, 4.5], [1.0, 2.0], [3.0, 0.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) # test m = 3, q=1 pareto_Y = torch.tensor( [[4.0, 2.0, 3.0], [3.0, 5.0, 1.0], [2.0, 4.0, 2.0], [1.0, 3.0, 4.0]], **tkwargs, ) partitioning = NondominatedPartitioning(num_outcomes=3, Y=pareto_Y) samples = torch.tensor([[1.0, 2.0, 6.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) ref_point = [-1.0] * 3 acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) X = torch.zeros(1, 2, **tkwargs) res = acqf(X) self.assertEqual(res.item(), 12.0) # change reference point ref_point = [0.0] * 3 acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 4.0) # test m = 3, no contribution ref_point = [1.0] * 3 acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) res = acqf(X) self.assertEqual(res.item(), 0.0) # test m = 3, q = 2 pareto_Y = torch.tensor( [[4.0, 2.0, 3.0], [3.0, 5.0, 1.0], [2.0, 4.0, 2.0]], **tkwargs) samples = torch.tensor([[1.0, 2.0, 6.0], [1.0, 3.0, 4.0]], **tkwargs).unsqueeze(0) mm = MockModel(MockPosterior(samples=samples)) ref_point = [-1.0] * 3 partitioning = NondominatedPartitioning(num_outcomes=3, Y=pareto_Y) acqf = qExpectedHypervolumeImprovement( model=mm, ref_point=ref_point, partitioning=partitioning, sampler=sampler, ) X = torch.zeros(2, 2, **tkwargs) res = acqf(X) self.assertEqual(res.item(), 22.0)
def qehvi_candidates_func( train_x: "torch.Tensor", train_obj: "torch.Tensor", train_con: Optional["torch.Tensor"], bounds: "torch.Tensor", ) -> "torch.Tensor": """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` with multi-objective optimization when the number of objectives is three or less. .. seealso:: :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value descriptions. """ n_objectives = train_obj.size(-1) if train_con is not None: train_y = torch.cat([train_obj, train_con], dim=-1) is_feas = (train_con <= 0).all(dim=-1) train_obj_feas = train_obj[is_feas] constraints = [] n_constraints = train_con.size(1) for i in range(n_constraints): constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) additional_qehvi_kwargs = { "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), "constraints": constraints, } else: train_y = train_obj train_obj_feas = train_obj additional_qehvi_kwargs = {} train_x = normalize(train_x, bounds=bounds) model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # Approximate box decomposition similar to Ax when the number of objectives is large. # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults if n_objectives > 2: alpha = 10**(-8 + n_objectives) else: alpha = 0.0 partitioning = NondominatedPartitioning(num_outcomes=n_objectives, Y=train_obj_feas, alpha=alpha) ref_point = train_obj.min(dim=0).values - 1e-8 ref_point_list = ref_point.tolist() acqf = qExpectedHypervolumeImprovement( model=model, ref_point=ref_point_list, partitioning=partitioning, sampler=SobolQMCNormalSampler(num_samples=256), **additional_qehvi_kwargs, ) standard_bounds = torch.zeros_like(bounds) standard_bounds[1] = 1 candidates, _ = optimize_acqf( acq_function=acqf, bounds=standard_bounds, q=1, num_restarts=20, raw_samples=1024, options={ "batch_limit": 5, "maxiter": 200, "nonnegative": True }, sequential=True, ) candidates = unnormalize(candidates.detach(), bounds=bounds) return candidates
def get_acquisition_function( acquisition_function_name: str, model: Model, objective: MCAcquisitionObjective, X_observed: Tensor, X_pending: Optional[Tensor] = None, constraints: Optional[List[Callable[[Tensor], Tensor]]] = None, mc_samples: int = 500, qmc: bool = True, seed: Optional[int] = None, **kwargs, ) -> monte_carlo.MCAcquisitionFunction: r"""Convenience function for initializing botorch acquisition functions. Args: acquisition_function_name: Name of the acquisition function. model: A fitted model. objective: A MCAcquisitionObjective. X_observed: A `m1 x d`-dim Tensor of `m1` design points that have already been observed. X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation is pending. constraints: A list of callables, each mapping a Tensor of dimension `sample_shape x batch-shape x q x m` to a Tensor of dimension `sample_shape x batch-shape x q`, where negative values imply feasibility. Used when constraint_transforms are not passed as part of the objective. mc_samples: The number of samples to use for (q)MC evaluation of the acquisition function. qmc: If True, use quasi-Monte-Carlo sampling (instead of iid). seed: If provided, perform deterministic optimization (i.e. the function to optimize is fixed and not stochastic). Returns: The requested acquisition function. Example: >>> model = SingleTaskGP(train_X, train_Y) >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0])) >>> acqf = get_acquisition_function("qEI", model, obj, train_X) """ # initialize the sampler if qmc: sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed) else: sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed) # instantiate and return the requested acquisition function if acquisition_function_name == "qEI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qExpectedImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qPI": best_f = objective(model.posterior(X_observed).mean).max().item() return monte_carlo.qProbabilityOfImprovement( model=model, best_f=best_f, sampler=sampler, objective=objective, X_pending=X_pending, tau=kwargs.get("tau", 1e-3), ) elif acquisition_function_name == "qNEI": return monte_carlo.qNoisyExpectedImprovement( model=model, X_baseline=X_observed, sampler=sampler, objective=objective, X_pending=X_pending, prune_baseline=kwargs.get("prune_baseline", False), ) elif acquisition_function_name == "qSR": return monte_carlo.qSimpleRegret(model=model, sampler=sampler, objective=objective, X_pending=X_pending) elif acquisition_function_name == "qUCB": if "beta" not in kwargs: raise ValueError("`beta` must be specified in kwargs for qUCB.") return monte_carlo.qUpperConfidenceBound( model=model, beta=kwargs["beta"], sampler=sampler, objective=objective, X_pending=X_pending, ) elif acquisition_function_name == "qEHVI": # pyre-fixme [16]: `Model` has no attribute `train_targets` try: ref_point = kwargs["ref_point"] except KeyError: raise ValueError( "`ref_point` must be specified in kwargs for qEHVI") try: Y = kwargs["Y"] except KeyError: raise ValueError("`Y` must be specified in kwargs for qEHVI") # get feasible points if constraints is not None: feas = torch.stack([c(Y) <= 0 for c in constraints], dim=-1).all(dim=-1) Y = Y[feas] obj = objective(Y) partitioning = NondominatedPartitioning( ref_point=torch.as_tensor(ref_point, dtype=Y.dtype, device=Y.device), Y=obj, alpha=kwargs.get("alpha", 0.0), ) return moo_monte_carlo.qExpectedHypervolumeImprovement( model=model, ref_point=ref_point, partitioning=partitioning, sampler=sampler, objective=objective, constraints=constraints, X_pending=X_pending, ) raise NotImplementedError( f"Unknown acquisition function {acquisition_function_name}")
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) res['objs'] = [-y for y in res['objs']] return res # Caution: negative values imply feasibility in botorch hv_diffs = [] time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj, train_con = generate_initial_data( initial_runs, objective_function, time_list, global_start_time) # fix bug: find feasible real_initial_runs = initial_runs while real_initial_runs < max_runs: # compute feasible observations is_feas = (train_con <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj > problem.ref_point).all(dim=-1) if (is_feas & better_than_ref).any(): break train_x, train_obj, train_con = expand_initial_data( train_x, train_obj, train_con, objective_function, time_list, global_start_time) real_initial_runs += 1 print('=== Expand initial data to find feasible. Iter =', real_initial_runs) mll, model = initialize_model(train_x, train_obj, train_con) # for plot X_init = train_x.cpu().numpy().astype(np.float64) Y_init = -1 * train_obj.cpu().numpy().astype(np.float64) # calculate hypervolume of init data for i in range(real_initial_runs): train_obj_i = train_obj[:i + 1] train_con_i = train_con[:i + 1] # compute pareto front is_feas_i = (train_con_i <= 0).all(dim=-1) feas_train_obj_i = train_obj_i[is_feas_i] pareto_mask = is_non_dominated(feas_train_obj_i) pareto_y = feas_train_obj_i[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # run (max_runs - real_initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(real_initial_runs + 1, max_runs + 1): t0 = time.time() try: # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # compute feasible observations is_feas = (train_con <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj > problem.ref_point).all(dim=-1) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, # use observations that are better than the specified reference point and feasible Y=train_obj[better_than_ref & is_feas], ) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, # define an objective that specifies which outcomes are the objectives objective=IdentityMCMultiOutputObjective( outcomes=list(range(problem.num_objs))), # specify that the constraint is on the last outcome constraints=constraint_callable_list( problem.num_constraints, num_objs=problem.num_objs), ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) except Exception as e: # handle numeric problem step = 2 print( '===== Exception in optimization loop, restart with 1/%d of training data: %s' % (step, str(e))) if refit == 1: mll, model = initialize_model(train_x[::step], train_obj[::step], train_con[::step]) else: mll, model = initialize_model( train_x[::step], train_obj[::step], train_con[::step], model.state_dict(), ) # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # compute feasible observations is_feas = (train_con[::step] <= 0).all(dim=-1) # compute points that are better than the known reference point better_than_ref = (train_obj[::step] > problem.ref_point).all( dim=-1) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, # use observations that are better than the specified reference point and feasible Y=train_obj[::step][better_than_ref & is_feas], ) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, # define an objective that specifies which outcomes are the objectives objective=IdentityMCMultiOutputObjective( outcomes=list(range(problem.num_objs))), # specify that the constraint is on the last outcome constraints=constraint_callable_list( problem.num_constraints, num_objs=problem.num_objs), ) # optimize and get new observation new_x, new_obj, new_con = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) assert len(time_list) == iteration # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) train_con = torch.cat([train_con, new_con]) # update progress # compute pareto front is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting # Note: they find improved performance from not warm starting the model hyperparameters # using the hyperparameters from the previous iteration if refit == 1: mll, model = initialize_model(train_x, train_obj, train_con) else: mll, model = initialize_model( train_x, train_obj, train_con, model.state_dict(), ) t1 = time.time() print( "Iter %d: x=%s, perf=%s, con=%s, hv_diff=%f, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, new_con, hv_diff, t1 - t0, time_list[-1]), flush=True) # compute pareto front is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] pf = -1 * pareto_y.cpu().numpy().astype(np.float64) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution train_obj[~is_feas] = -INFEASIBLE_OBJ_VALUE # set infeasible Y = -1 * train_obj.cpu().numpy().astype(np.float64) # plot for debugging if plot_mode == 1: plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, X, Y, time_list
def evaluate(mth, run_i, seed): print(mth, run_i, seed, '===== start =====', flush=True) def objective_function(x: torch.Tensor): # Caution: unnormalize and maximize x = unnormalize(x, bounds=problem_bounds) x = x.cpu().numpy().astype(np.float64) # caution res = problem.evaluate(x) objs = [-y for y in res['objs']] return objs hv_diffs = [] time_list = [] global_start_time = time.time() # random seed np.random.seed(seed) torch.manual_seed(seed) # call helper functions to generate initial training data and initialize model train_x, train_obj = generate_initial_data(initial_runs, objective_function, time_list, global_start_time) mll, model = initialize_model(train_x, train_obj) # for plot X_init = train_x.cpu().numpy().astype(np.float64) Y_init = -1 * train_obj.cpu().numpy().astype(np.float64) # calculate hypervolume of init data for i in range(initial_runs): train_obj_i = train_obj[:i + 1] # compute pareto front pareto_mask = is_non_dominated(train_obj_i) pareto_y = train_obj_i[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # run (max_runs - initial_runs) rounds of BayesOpt after the initial random batch for iteration in range(initial_runs + 1, max_runs + 1): t0 = time.time() try: # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, Y=train_obj) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) except Exception as e: step = 2 print( '===== Exception in optimization loop, restart with 1/%d of training data: %s' % (step, str(e))) if refit == 1: mll, model = initialize_model(train_x[::step], train_obj[::step]) else: mll, model = initialize_model( train_x[::step], train_obj[::step], model.state_dict(), ) # fit the models fit_gpytorch_model(mll) # define the qEHVI acquisition modules using a QMC sampler sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES) # partition non-dominated space into disjoint rectangles partitioning = NondominatedPartitioning( num_outcomes=problem.num_objs, Y=train_obj[::step]) qEHVI = qExpectedHypervolumeImprovement( model=model, ref_point=problem.ref_point.tolist( ), # use known reference point partitioning=partitioning, sampler=sampler, ) # optimize and get new observation new_x, new_obj = optimize_acqf_and_get_observation( qEHVI, objective_function, time_list, global_start_time) assert len(time_list) == iteration # update training points train_x = torch.cat([train_x, new_x]) train_obj = torch.cat([train_obj, new_obj]) # update progress # compute pareto front pareto_mask = is_non_dominated(train_obj) pareto_y = train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) hv_diff = problem.max_hv - volume hv_diffs.append(hv_diff) # reinitialize the models so they are ready for fitting on next iteration # use the current state dict to speed up fitting # Note: they find improved performance from not warm starting the model hyperparameters # using the hyperparameters from the previous iteration if refit == 1: mll, model = initialize_model(train_x, train_obj) else: mll, model = initialize_model( train_x, train_obj, model.state_dict(), ) t1 = time.time() print( "Iter %d: x=%s, perf=%s, hv_diff=%f, time=%.2f, global_time=%.2f" % (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj, hv_diff, t1 - t0, time_list[-1]), flush=True) # Save result X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype( np.float64) # caution Y = -1 * train_obj.cpu().numpy().astype(np.float64) # compute pareto front pareto_mask = is_non_dominated(train_obj) pareto_y = train_obj[pareto_mask] pf = -1 * pareto_y.cpu().numpy().astype(np.float64) # plot for debugging if plot_mode == 1: plot_pf(problem, problem_str, mth, pf, Y_init) return hv_diffs, pf, X, Y, time_list
def test_get_X_baseline(self): tkwargs = {"device": self.device} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype X_train = torch.rand(20, 2, **tkwargs) model = MockModel( MockPosterior(mean=(2 * X_train + 1).sum(dim=-1, keepdim=True))) # test NEI with X_baseline acqf = qNoisyExpectedImprovement(model, X_baseline=X_train[:2]) X = get_X_baseline(acq_function=acqf) self.assertTrue(torch.equal(X, acqf.X_baseline)) # test EI without X_baseline acqf = qExpectedImprovement(model, best_f=0.0) with warnings.catch_warnings( record=True) as w, settings.debug(True): X_rnd = get_X_baseline(acq_function=acqf, ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, BotorchWarning)) self.assertIsNone(X_rnd) # set train inputs model.train_inputs = (X_train, ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test that we fail back to train_inputs if X_baseline is an empty tensor acqf.register_buffer("X_baseline", X_train[:0]) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test acquisitipon function without X_baseline or model acqf = FixedFeatureAcquisitionFunction(acqf, d=2, columns=[0], values=[0]) with warnings.catch_warnings( record=True) as w, settings.debug(True): X_rnd = get_X_baseline(acq_function=acqf, ) self.assertEqual(len(w), 1) self.assertTrue(issubclass(w[-1].category, BotorchWarning)) self.assertIsNone(X_rnd) Y_train = 2 * X_train[:2] + 1 moo_model = MockModel(MockPosterior(mean=Y_train, samples=Y_train)) ref_point = torch.zeros(2, **tkwargs) # test NEHVI with X_baseline acqf = qNoisyExpectedHypervolumeImprovement( moo_model, ref_point=ref_point, X_baseline=X_train[:2], cache_root=False, ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, acqf.X_baseline)) # test qEHVI without train_inputs acqf = qExpectedHypervolumeImprovement( moo_model, ref_point=ref_point, partitioning=FastNondominatedPartitioning( ref_point=ref_point, Y=Y_train, ), ) # test extracting train_inputs from model list GP model_list = ModelListGP( SingleTaskGP(X_train, Y_train[:, :1]), SingleTaskGP(X_train, Y_train[:, 1:]), ) acqf = qExpectedHypervolumeImprovement( model_list, ref_point=ref_point, partitioning=FastNondominatedPartitioning( ref_point=ref_point, Y=Y_train, ), ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test MESMO for which we need to use # `acqf.mo_model` batched_mo_model = SingleTaskGP(X_train, Y_train) acqf = qMultiObjectiveMaxValueEntropy( batched_mo_model, sample_pareto_frontiers=lambda model: torch.rand( 10, 2, **tkwargs), ) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train)) # test that if there is an input transform that is applied # to the train_inputs when the model is in eval mode, we # extract the untransformed train_inputs model = SingleTaskGP(X_train, Y_train[:, :1], input_transform=Warp(indices=[0, 1])) model.eval() self.assertFalse(torch.equal(model.train_inputs[0], X_train)) acqf = qExpectedImprovement(model, best_f=0.0) X = get_X_baseline(acq_function=acqf, ) self.assertTrue(torch.equal(X, X_train))