def test_lcm_icm_equivalence(self): # Training points are every 0.1 in [0,1] (note that they're the same for both tasks) train_x = torch.linspace(0, 1, 100) # y1 function is sin(2*pi*x) with noise N(0, 0.04) train_y1 = torch.sin(train_x.data * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2 # y2 function is cos(2*pi*x) with noise N(0, 0.04) train_y2 = torch.cos(train_x.data * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2 # Create a train_y which interleaves the two train_y = torch.stack([train_y1, train_y2], -1) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = MultitaskGPModel(train_x, train_y, likelihood) # Use the adam optimizer optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) # Includes GaussianLikelihood parameters model.train() likelihood.train() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 50 for _ in range(n_iter): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() model.eval() likelihood.eval() # Make predictions for LCM with torch.no_grad(): test_x = torch.linspace(0, 1, 51) observed_pred = likelihood(model(test_x)) mean = observed_pred.mean model_icm = MultitaskGPModel_ICM(train_x, train_y, likelihood) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model_icm.train() likelihood.train() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model_icm) optimizer = torch.optim.Adam( [{"params": model_icm.parameters()}], lr=0.1 # Includes GaussianLikelihood parameters ) for _ in range(n_iter): optimizer.zero_grad() output = model_icm(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() model_icm.eval() likelihood.eval() # Make predictions for ICM with torch.no_grad(): test_x = torch.linspace(0, 1, 51) observed_pred_icm = likelihood(model_icm(test_x)) mean_icm = observed_pred_icm.mean # Make sure predictions from LCM with one base kernel and ICM are the same. self.assertLess((mean - mean_icm).pow(2).mean(), 1e-2)
def test_train_and_eval(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood(num_tasks=4) model = LMCModel() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = torch.optim.Adam([ {'params': model.parameters()}, {'params': likelihood.parameters()}, ], lr=0.01) # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0)) # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less # effective for VI. for i in range(400): # Within each iteration, we will go over each minibatch of data optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): batch_predictions = likelihood(model(train_x)) preds1 = batch_predictions.mean[:, 0] preds2 = batch_predictions.mean[:, 1] preds3 = batch_predictions.mean[:, 2] preds4 = batch_predictions.mean[:, 3] mean_abs_error1 = torch.mean(torch.abs(train_y[..., 0] - preds1)) mean_abs_error2 = torch.mean(torch.abs(train_y[..., 1] - preds2)) mean_abs_error3 = torch.mean(torch.abs(train_y[..., 2] - preds3)) mean_abs_error4 = torch.mean(torch.abs(train_y[..., 3] - preds4)) self.assertLess(mean_abs_error1.squeeze().item(), 0.15) self.assertLess(mean_abs_error2.squeeze().item(), 0.15) self.assertLess(mean_abs_error3.squeeze().item(), 0.15) self.assertLess(mean_abs_error4.squeeze().item(), 0.15) # Smoke test for getting predictive uncertainties lower, upper = batch_predictions.confidence_region() self.assertEqual(lower.shape, train_y.shape) self.assertEqual(upper.shape, train_y.shape)
def test_multitask_gp_mean_abs_error(self, cuda=False): train_x, train_y = self._get_data(cuda=cuda) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = MultitaskGPModel(train_x, train_y, likelihood) if cuda: model.cuda() # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam( model.parameters(), lr=0.1) # Includes GaussianLikelihood parameters # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 50 for _ in range(n_iter): # Zero prev backpropped gradients optimizer.zero_grad() # Make predictions from training data # Again, note feeding duplicated x_data and indices indicating which task output = model(train_x) # TODO: Fix this view call!! loss = -mll(output, train_y) loss.backward() optimizer.step() # Test the model model.eval() likelihood.eval() test_x = torch.linspace( 0, 1, 51, device=torch.device("cuda") if cuda else torch.device("cpu")) test_y1 = torch.sin(test_x * (2 * pi)) test_y2 = torch.cos(test_x * (2 * pi)) test_preds = likelihood(model(test_x)).mean mean_abs_error_task_1 = torch.mean( torch.abs(test_y1 - test_preds[:, 0])) mean_abs_error_task_2 = torch.mean( torch.abs(test_y2 - test_preds[:, 1])) self.assertLess(mean_abs_error_task_1.squeeze().item(), 0.05) self.assertLess(mean_abs_error_task_2.squeeze().item(), 0.05)
def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood( log_noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1), scale=torch.ones(1), log_transform=True), num_tasks=2, ) gp_model = ExactGPModel(train_x1, train_y1, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) for _ in range(50): optimizer.zero_grad() output = gp_model(train_x1) loss = -mll(output, train_y1).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. batch_predictions = likelihood(gp_model(test_x12)) preds1 = batch_predictions.mean[0] preds2 = batch_predictions.mean[1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.05) self.assertLess(mean_abs_error2.squeeze().item(), 0.05)
def test_train_and_eval(self): # We're manually going to set the hyperparameters to something they shouldn't be likelihood = MultitaskGaussianLikelihood(num_tasks=2) gp_model = ExactGPModel(train_x, train_y12, likelihood) mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) # Find optimal model hyperparameters gp_model.train() likelihood.train() optimizer = optim.Adam(gp_model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(75): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y12).sum() loss.backward() optimizer.step() for param in gp_model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Test the model gp_model.eval() likelihood.eval() # Make predictions for both sets of test points, and check MAEs. with torch.no_grad(), gpytorch.settings.max_eager_kernel_size(1): batch_predictions = likelihood(gp_model(test_x)) preds1 = batch_predictions.mean[:, 0] preds2 = batch_predictions.mean[:, 1] mean_abs_error1 = torch.mean(torch.abs(test_y1 - preds1)) mean_abs_error2 = torch.mean(torch.abs(test_y2 - preds2)) self.assertLess(mean_abs_error1.squeeze().item(), 0.01) self.assertLess(mean_abs_error2.squeeze().item(), 0.01) # Smoke test for getting predictive uncertainties lower, upper = batch_predictions.confidence_region() self.assertEqual(lower.shape, test_y12.shape) self.assertEqual(upper.shape, test_y12.shape)
def test_multitask_low_rank_noise_covar(self): likelihood = MultitaskGaussianLikelihood(n_tasks=2, rank=1) model = MultitaskGPModel(train_x, train_y, likelihood) # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam( [{ "params": model.parameters() }], # Includes GaussianLikelihood parameters lr=0.1, ) # "Loss" for GPs - the marginal log likelihood mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) n_iter = 50 for _ in range(n_iter): # Zero prev backpropped gradients optimizer.zero_grad() # Make predictions from training data # Again, note feeding duplicated x_data and indices indicating which task output = model(train_x) # TODO: Fix this view call!! loss = -mll(output, train_y) loss.backward() optimizer.step() # Test the model model.eval() likelihood.eval() n_tasks = 2 task_noise_covar_factor = likelihood.task_noise_covar_factor log_noise = likelihood.log_noise task_noise_covar = task_noise_covar_factor.matmul( task_noise_covar_factor.transpose( -1, -2)) + log_noise.exp() * torch.eye(n_tasks) self.assertGreater(task_noise_covar[0, 1].data.squeeze().item(), 0.05)
class GP(BaseModel): support_grad = True support_multi_output = True def __init__(self, num_cont, num_enum, num_out, **conf): super().__init__(num_cont, num_enum, num_out, **conf) self.lr = conf.get('lr', 3e-2) self.num_epochs = conf.get('num_epochs', 100) self.verbose = conf.get('verbose', False) self.print_every = conf.get('print_every', 10) self.noise_free = conf.get('noise_free', False) self.pred_likeli = conf.get('pred_likeli', True) self.noise_lb = conf.get('noise_lb', 1e-5) self.xscaler = TorchMinMaxScaler((-1, 1)) self.yscaler = TorchStandardScaler() def fit_scaler(self, Xc: Tensor, Xe: Tensor, y: Tensor): if Xc is not None and Xc.shape[1] > 0: self.xscaler.fit(Xc) self.yscaler.fit(y) def xtrans(self, Xc: Tensor, Xe: Tensor, y: Tensor = None): if Xc is not None and Xc.shape[1] > 0: Xc_t = self.xscaler.transform(Xc) else: Xc_t = torch.zeros(Xe.shape[0], 0) if Xe is None: Xe_t = torch.zeros(Xc.shape[0], 0).long() else: Xe_t = Xe.long() if y is not None: y_t = self.yscaler.transform(y) return Xc_t, Xe_t, y_t else: return Xc_t, Xe_t def fit(self, Xc: Tensor, Xe: Tensor, y: Tensor): Xc, Xe, y = filter_nan(Xc, Xe, y, 'all') self.fit_scaler(Xc, Xe, y) Xc, Xe, y = self.xtrans(Xc, Xe, y) assert (Xc.shape[1] == self.num_cont) assert (Xe.shape[1] == self.num_enum) assert (y.shape[1] == self.num_out) self.Xc = Xc self.Xe = Xe self.y = y n_constr = GreaterThan(self.noise_lb) n_prior = LogNormalPrior(-4.63, 0.5) if self.num_out == 1: self.lik = GaussianLikelihood(noise_constraint=n_constr, noise_prior=n_prior) else: self.lik = MultitaskGaussianLikelihood(num_tasks=self.num_out, noise_constraint=n_constr, noise_prior=n_prior) self.gp = GPyTorchModel(self.Xc, self.Xe, self.y, self.lik, **self.conf) if self.num_out == 1: # XXX: only tuned for single-output BO if self.num_cont > 0: self.gp.kern.outputscale = self.y.var() lscales = self.gp.kern.base_kernel.lengthscale.detach().clone( ).view(1, -1) for i in range(self.num_cont): lscales[0, i] = torch.pdist(self.Xc[:, i].view( -1, 1)).median().clamp(min=0.02) self.gp.kern.base_kernel.lengthscale = lscales if self.noise_free: self.gp.likelihood.noise = self.noise_lb * 1.1 self.gp.likelihood.raw_noise.requires_grad = False else: self.gp.likelihood.noise = max(1e-2, self.noise_lb) self.gp.train() self.lik.train() opt = torch.optim.LBFGS(self.gp.parameters(), lr=self.lr, max_iter=5, line_search_fn='strong_wolfe') mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.lik, self.gp) for epoch in range(self.num_epochs): def closure(): dist = self.gp(self.Xc, self.Xe) loss = -1 * mll(dist, self.y.squeeze()) opt.zero_grad() loss.backward() return loss opt.step(closure) if self.verbose and ((epoch + 1) % self.print_every == 0 or epoch == 0): print('After %d epochs, loss = %g' % (epoch + 1, closure().item()), flush=True) self.gp.eval() self.lik.eval() def predict(self, Xc, Xe): Xc, Xe = self.xtrans(Xc, Xe) with gpytorch.settings.fast_pred_var(), gpytorch.settings.debug(False): pred = self.gp(Xc, Xe) if self.pred_likeli: pred = self.lik(pred) mu_ = pred.mean.reshape(-1, self.num_out) var_ = pred.variance.reshape(-1, self.num_out) mu = self.yscaler.inverse_transform(mu_) var = var_ * self.yscaler.std**2 return mu, var def sample_y(self, Xc, Xe, n_samples=1) -> FloatTensor: """ Should return (n_samples, Xc.shape[0], self.num_out) """ Xc, Xe = self.xtrans(Xc, Xe) with gpytorch.settings.debug(False): if self.pred_likeli: pred = self.lik(self.gp(Xc, Xe)) else: pred = self.gp(Xc, Xe) samp = pred.rsample(torch.Size( (n_samples, ))).view(n_samples, Xc.shape[0], self.num_out) return self.yscaler.inverse_transform(samp) def sample_f(self): raise NotImplementedError( 'Thompson sampling is not supported for GP, use `sample_y` instead' ) @property def noise(self): if self.num_out == 1: return (self.gp.likelihood.noise * self.yscaler.std**2).view( self.num_out).detach() else: return (self.gp.likelihood.noise_covar.noise * self.yscaler.std**2).view(self.num_out).detach()
def train_gp(train_x, train_y, use_ard, num_steps, hypers={}): """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized.""" assert train_x.ndim == 2 assert train_y.ndim == 2 assert train_x.shape[0] == train_y.shape[0] # Create hyper parameter bounds noise_constraint = Interval(5e-4, 0.2) if use_ard: lengthscale_constraint = Interval(0.005, 2.0) else: lengthscale_constraint = Interval(0.005, math.sqrt( train_x.shape[1])) # [0.005, sqrt(dim)] outputscale_constraint = Interval(0.05, 20.0) # Create models likelihood = MultitaskGaussianLikelihood( num_tasks=train_y.size(-1), noise_constraint=noise_constraint, ).to(device=train_x.device, dtype=train_y.dtype) ard_dims = train_x.shape[1] if use_ard else None model = GP( train_x=train_x, train_y=train_y, likelihood=likelihood, lengthscale_constraint=lengthscale_constraint, outputscale_constraint=outputscale_constraint, ard_dims=ard_dims, ).to(device=train_x.device, dtype=train_x.dtype) # Find optimal model hyperparameters model.train() likelihood.train() # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(likelihood, model) # Initialize model hypers if hypers: model.load_state_dict(hypers) else: hypers = {} hypers["covar_module.outputscale"] = 1.0 hypers["covar_module.base_kernel.lengthscale"] = 0.5 hypers["likelihood.noise"] = 0.005 model.initialize(**hypers) # Use the adam optimizer optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) for _ in range(num_steps): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() # Switch to eval mode model.eval() likelihood.eval() return model