def test_roundtrip(self): for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue(all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # SingleTaskMultiFidelityGP for lin_trunc in (False, True): batch_gp = SingleTaskMultiFidelityGP( train_X, train_Y, iteration_fidelity=1, linear_truncated=lin_trunc ) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig) )
def test_roundtrip(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): train_X = torch.rand(10, 2, device=device, dtype=dtype) train_Y1 = train_X.sum(dim=-1) train_Y2 = train_X[:, 0] - train_X[:, 1] train_Y = torch.stack([train_Y1, train_Y2], dim=-1) # SingleTaskGP batch_gp = SingleTaskGP(train_X, train_Y) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig)) # FixedNoiseGP batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y)) list_gp = batched_to_model_list(batch_gp) batch_gp_recov = model_list_to_batched(list_gp) sd_orig = batch_gp.state_dict() sd_recov = batch_gp_recov.state_dict() self.assertTrue(set(sd_orig) == set(sd_recov)) self.assertTrue( all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
def __init__( self, model: Model, sample_pareto_frontiers: Callable[[Model], Tensor], num_fantasies: int = 16, X_pending: Optional[Tensor] = None, sampler: Optional[MCSampler] = None, **kwargs: Any, ) -> None: r"""Multi-objective max-value entropy search acquisition function. Args: model: A fitted multi-output model. sample_pareto_frontiers: A callable that takes a model and returns a `num_samples x n' x m`-dim tensor of outcomes to use for constructing `num_samples` sampled Pareto frontiers. num_fantasies: Number of fantasies to generate. The higher this number the more accurate the model (at the expense of model complexity, wall time and memory). Ignored if `X_pending` is `None`. X_pending: A `m x d`-dim Tensor of `m` design points that have been submitted for function evaluation but have not yet been evaluated. """ MultiObjectiveMCAcquisitionFunction.__init__(self, model=model, sampler=sampler) # Batch GP models (e.g. fantasized models) are not currently supported if isinstance(model, ModelListGP): train_X = model.models[0].train_inputs[0] else: train_X = model.train_inputs[0] if train_X.ndim > 3: raise NotImplementedError( "Batch GP models (e.g. fantasized models) " "are not yet supported by qMultiObjectiveMaxValueEntropy" ) # convert to batched MO model batched_mo_model = ( model_list_to_batched(model) if isinstance(model, ModelListGP) else model ) self._init_model = batched_mo_model self.mo_model = batched_mo_model self.model = batched_multi_output_to_single_output( batch_mo_model=batched_mo_model ) self.fantasies_sampler = SobolQMCNormalSampler(num_fantasies) self.num_fantasies = num_fantasies # weight is used in _compute_information_gain self.maximize = True self.weight = 1.0 self.sample_pareto_frontiers = sample_pareto_frontiers # this avoids unnecessary model conversion if X_pending is None if X_pending is None: self._sample_max_values() else: self.set_X_pending(X_pending)
def test_model_list_to_batched(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1 = SingleTaskGP(train_X, train_Y1) gp2 = SingleTaskGP(train_X, train_Y2) list_gp = ModelListGP(gp1, gp2) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp, SingleTaskGP) # test degenerate (single model) batch_gp = model_list_to_batched(ModelListGP(gp1)) self.assertEqual(batch_gp._num_outputs, 1) # test different model classes gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test non-batched models gp1_ = SimpleGPyTorchModel(train_X, train_Y1) gp2_ = SimpleGPyTorchModel(train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1_, gp2_)) # test list of multi-output models train_Y = torch.cat([train_Y1, train_Y2], dim=-1) gp2 = SingleTaskGP(train_X, train_Y) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test different training inputs gp2 = SingleTaskGP(2 * train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check scalar agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check tensor shape agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.covar_module.raw_outputscale = torch.nn.Parameter( torch.tensor([0.0], device=self.device, dtype=dtype)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test HeteroskedasticSingleTaskGP gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1)) gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2)) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) # test SingleTaskMultiFidelityGP gp1_ = SingleTaskMultiFidelityGP(train_X, train_Y1, iteration_fidelity=1) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=1) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) gp2_ = SingleTaskMultiFidelityGP(train_X, train_Y2, iteration_fidelity=2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test input transform input_tf = Normalize( d=2, bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp.input_transform, Normalize) self.assertTrue( torch.equal(batch_gp.input_transform.bounds, input_tf.bounds)) # test different input transforms input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test batched input transform input_tf2 = Normalize( d=2, bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]], device=self.device, dtype=dtype), batch_shape=torch.Size([3]), ) gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf2) gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp) # test outcome transform octf = Standardize(m=1) gp1_ = SingleTaskGP(train_X, train_Y1, outcome_transform=octf) gp2_ = SingleTaskGP(train_X, train_Y2, outcome_transform=octf) list_gp = ModelListGP(gp1_, gp2_) with self.assertRaises(UnsupportedError): model_list_to_batched(list_gp)
def fit_gpytorch_model(mll: MarginalLogLikelihood, optimizer: Callable = fit_gpytorch_scipy, **kwargs: Any) -> MarginalLogLikelihood: r"""Fit hyperparameters of a GPyTorch model. On optimizer failures, a new initial condition is sampled from the hyperparameter priors and optimization is retried. The maximum number of retries can be passed in as a `max_retries` kwarg (default is 5). Optimizer functions are in botorch.optim.fit. Args: mll: MarginalLogLikelihood to be maximized. optimizer: The optimizer function. kwargs: Arguments passed along to the optimizer function, including `max_retries` and `sequential` (controls the fitting of `ModelListGP` and `BatchedMultiOutputGPyTorchModel` models) or `approx_mll` (whether to use gpytorch's approximate MLL computation). Returns: MarginalLogLikelihood with optimized parameters. Example: >>> gp = SingleTaskGP(train_X, train_Y) >>> mll = ExactMarginalLogLikelihood(gp.likelihood, gp) >>> fit_gpytorch_model(mll) """ sequential = kwargs.pop("sequential", True) max_retries = kwargs.pop("max_retries", 5) if isinstance(mll, SumMarginalLogLikelihood) and sequential: for mll_ in mll.mlls: fit_gpytorch_model(mll=mll_, optimizer=optimizer, max_retries=max_retries, **kwargs) return mll elif (isinstance(mll.model, BatchedMultiOutputGPyTorchModel) and mll.model._num_outputs > 1 and sequential): tf = None try: # check if backwards-conversion is possible # remove the outcome transform since the training targets are already # transformed and the outcome transform cannot currently be split. # TODO: support splitting outcome transforms. if hasattr(mll.model, "outcome_transform"): tf = mll.model.outcome_transform mll.model.outcome_transform = None model_list = batched_to_model_list(mll.model) mll_ = SumMarginalLogLikelihood(model_list.likelihood, model_list) fit_gpytorch_model( mll=mll_, optimizer=optimizer, sequential=True, max_retries=max_retries, **kwargs, ) model_ = model_list_to_batched(mll_.model) mll.model.load_state_dict(model_.state_dict()) # setting the transformed inputs is necessary because gpytorch # stores the raw training inputs on the ExactGP in the # ExactGP.__init__ call. At evaluation time, the test inputs will # already be in the transformed space if some transforms have # transform_on_eval set to False. ExactGP.__call__ will # concatenate the test points with the training inputs. Therefore, # it is important to set the ExactGP's train_inputs to also be # transformed data using all transforms (including the transforms # with transform_on_train set to True). mll.train() if tf is not None: mll.model.outcome_transform = tf return mll.eval() # NotImplementedError is omitted since it derives from RuntimeError except (UnsupportedError, RuntimeError, AttributeError): warnings.warn(FAILED_CONVERSION_MSG, BotorchWarning) if tf is not None: mll.model.outcome_transform = tf return fit_gpytorch_model(mll=mll, optimizer=optimizer, sequential=False, max_retries=max_retries) # retry with random samples from the priors upon failure mll.train() original_state_dict = deepcopy(mll.model.state_dict()) retry = 0 while retry < max_retries: with warnings.catch_warnings(record=True) as ws: if retry > 0: # use normal initial conditions on first try mll.model.load_state_dict(original_state_dict) sample_all_priors(mll.model) try: mll, _ = optimizer(mll, track_iterations=False, **kwargs) except NotPSDError: retry += 1 logging.log( logging.DEBUG, f"Fitting failed on try {retry} due to a NotPSDError.", ) continue has_optwarning = False for w in ws: # Do not count reaching `maxiter` as an optimization failure. if "ITERATIONS REACHED LIMIT" in str(w.message): logging.log( logging.DEBUG, "Fitting ended early due to reaching the iteration limit.", ) continue has_optwarning |= issubclass(w.category, OptimizationWarning) warnings.warn(w.message, w.category) if not has_optwarning: mll.eval() return mll retry += 1 logging.log(logging.DEBUG, f"Fitting failed on try {retry}.") warnings.warn("Fitting failed on all retries.", OptimizationWarning) return mll.eval()
def test_model_list_to_batched(self): for dtype in (torch.float, torch.double): # basic test train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1 = SingleTaskGP(train_X, train_Y1) gp2 = SingleTaskGP(train_X, train_Y2) list_gp = ModelListGP(gp1, gp2) batch_gp = model_list_to_batched(list_gp) self.assertIsInstance(batch_gp, SingleTaskGP) # test degenerate (single model) batch_gp = model_list_to_batched(ModelListGP(gp1)) self.assertEqual(batch_gp._num_outputs, 1) # test different model classes gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1)) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test non-batched models gp1_ = SimpleGPyTorchModel(train_X, train_Y1) gp2_ = SimpleGPyTorchModel(train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1_, gp2_)) # test list of multi-output models train_Y = torch.cat([train_Y1, train_Y2], dim=-1) gp2 = SingleTaskGP(train_X, train_Y) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test different training inputs gp2 = SingleTaskGP(2 * train_X, train_Y2) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check scalar agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # check tensor shape agreement gp2 = SingleTaskGP(train_X, train_Y2) gp2.covar_module.raw_outputscale = torch.nn.Parameter( torch.tensor([0.0], device=self.device, dtype=dtype) ) with self.assertRaises(UnsupportedError): model_list_to_batched(ModelListGP(gp1, gp2)) # test HeteroskedasticSingleTaskGP gp2 = HeteroskedasticSingleTaskGP( train_X, train_Y1, torch.ones_like(train_Y1) ) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test custom likelihood gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood()) with self.assertRaises(NotImplementedError): model_list_to_batched(ModelListGP(gp2)) # test FixedNoiseGP train_X = torch.rand(10, 2, device=self.device, dtype=dtype) train_Y1 = train_X.sum(dim=-1, keepdim=True) train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1) gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1)) gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2)) list_gp = ModelListGP(gp1_, gp2_) batch_gp = model_list_to_batched(list_gp)
def fit_gpytorch_model(mll: MarginalLogLikelihood, optimizer: Callable = fit_gpytorch_scipy, **kwargs: Any) -> MarginalLogLikelihood: r"""Fit hyperparameters of a GPyTorch model. On optimizer failures, a new initial condition is sampled from the hyperparameter priors and optimization is retried. The maximum number of retries can be passed in as a `max_retries` kwarg (default is 5). Optimizer functions are in botorch.optim.fit. Args: mll: MarginalLogLikelihood to be maximized. optimizer: The optimizer function. kwargs: Arguments passed along to the optimizer function, including `max_retries` and `sequential` (controls the fitting of `ModelListGP` and `BatchedMultiOutputGPyTorchModel` models) or `approx_mll` (whether to use gpytorch's approximate MLL computation). Returns: MarginalLogLikelihood with optimized parameters. Example: >>> gp = SingleTaskGP(train_X, train_Y) >>> mll = ExactMarginalLogLikelihood(gp.likelihood, gp) >>> fit_gpytorch_model(mll) """ sequential = kwargs.pop("sequential", True) max_retries = kwargs.pop("max_retries", 5) if isinstance(mll, SumMarginalLogLikelihood) and sequential: for mll_ in mll.mlls: fit_gpytorch_model(mll=mll_, optimizer=optimizer, max_retries=max_retries, **kwargs) return mll elif (isinstance(mll.model, BatchedMultiOutputGPyTorchModel) and mll.model._num_outputs > 1 and sequential): try: # check if backwards-conversion is possible model_list = batched_to_model_list(mll.model) model_ = model_list_to_batched(model_list) mll_ = SumMarginalLogLikelihood(model_list.likelihood, model_list) fit_gpytorch_model( mll=mll_, optimizer=optimizer, sequential=True, max_retries=max_retries, **kwargs, ) model_ = model_list_to_batched(mll_.model) mll.model.load_state_dict(model_.state_dict()) return mll.eval() # NotImplentedError is omitted since it derives from RuntimeError except (UnsupportedError, RuntimeError, AttributeError): warnings.warn(FAILED_CONVERSION_MSG, BotorchWarning) return fit_gpytorch_model(mll=mll, optimizer=optimizer, sequential=False, max_retries=max_retries) # retry with random samples from the priors upon failure mll.train() original_state_dict = deepcopy(mll.model.state_dict()) retry = 0 while retry < max_retries: with warnings.catch_warnings(record=True) as ws: if retry > 0: # use normal initial conditions on first try mll.model.load_state_dict(original_state_dict) sample_all_priors(mll.model) mll, _ = optimizer(mll, track_iterations=False, **kwargs) if not any( issubclass(w.category, OptimizationWarning) for w in ws): mll.eval() return mll retry += 1 logging.log(logging.DEBUG, f"Fitting failed on try {retry}.") warnings.warn("Fitting failed on all retries.", OptimizationWarning) return mll.eval()