def test_classification_error_cuda(self): if torch.cuda.is_available(): train_x, train_y = train_data(cuda=True) likelihood = BernoulliLikelihood().cuda() model = GPClassificationModel(train_x).cuda() mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(50): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Set back to eval mode model.eval() test_preds = likelihood(model(train_x)).mean.ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-5)
def test_classification_error(self, cuda=False, mll_cls=gpytorch.mlls.VariationalELBO): train_x, train_y = train_data(cuda=cuda) likelihood = BernoulliLikelihood() model = SVGPClassificationModel(torch.linspace(0, 1, 25)) mll = mll_cls(likelihood, model, num_data=len(train_y)) if cuda: likelihood = likelihood.cuda() model = model.cuda() mll = mll.cuda() # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam([{ "params": model.parameters() }, { "params": likelihood.parameters() }], lr=0.1) _wrapped_cg = MagicMock(wraps=gpytorch.utils.linear_cg) _cg_mock = patch("gpytorch.utils.linear_cg", new=_wrapped_cg) with warnings.catch_warnings(record=True) as ws, _cg_mock as cg_mock: for _ in range(400): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood( model(train_x)).mean.squeeze().round().float() mean_abs_error = torch.mean(torch.ne(train_y, test_preds).float()) self.assertLess(mean_abs_error.item(), 2e-1) # Make sure CG was called (or not), and no warnings were thrown self.assertFalse(cg_mock.called) self.assertFalse( any( issubclass(w.category, ExtraComputationWarning) for w in ws))
def test_kissgp_classification_error_cuda(): if torch.cuda.is_available(): train_x, train_y = train_data(cuda=True) likelihood = BernoulliLikelihood().cuda() model = GPClassificationModel(train_x.data).cuda() mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, model, n_data=len(train_y)) # Find optimal model hyperparameters model.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for i in range(50): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() test_preds = likelihood( model(train_x)).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert (mean_abs_error.data.squeeze()[0] < 1e-5)
def __init__(self): super(GPClassificationModel, self).__init__(BernoulliLikelihood()) self.mean_module = ConstantMean(constant_bounds=[-1e-5, 1e-5]) self.covar_module = RBFKernel(log_lengthscale_bounds=(-5, 6)) self.register_parameter('log_outputscale', nn.Parameter(torch.Tensor([0])), bounds=(-5, 6))
def __init__(self, stem, init_x, num_inducing, lr, streaming=False, beta=1.0, learn_inducing_locations=True, num_update_steps=1, **kwargs): super().__init__() likelihood = BernoulliLikelihood() inducing_points = torch.empty(num_inducing, stem.output_dim) inducing_points.uniform_(-1, 1) mean_module = ZeroMean() covar_module = ScaleKernel(RBFKernel(ard_num_dims=stem.output_dim)) self.gp = VariationalGPModel( inducing_points, mean_module, covar_module, streaming, likelihood, beta=beta, learn_inducing_locations=learn_inducing_locations) self.mll = None self.stem = stem self.optimizer = torch.optim.Adam(self.parameters(), lr=lr) self.num_update_steps = num_update_steps self._raw_inputs = [init_x]
def test_kissgp_classification_error(self): model = GPClassificationModel() likelihood = BernoulliLikelihood() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.SGD(model.parameters(), lr=0.01) optimizer.n_iter = 0 for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for _, param in model.named_parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.ge(0.5).float() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.squeeze().item(), 1e-5)
def test_classification_fast_pred_var(self): with gpytorch.settings.fast_pred_var(): train_x, train_y = train_data() likelihood = BernoulliLikelihood() model = GPClassificationModel(train_x) mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam(model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(75): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) optimizer.step() # Set back to eval mode model.eval() likelihood.eval() test_preds = likelihood(model(train_x)).mean.round() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.item(), 1e-5)
def test_kissgp_classification_error(self): model = GPClassificationModel() likelihood = BernoulliLikelihood() mll = gpytorch.mlls.VariationalMarginalLogLikelihood(likelihood, model, n_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() with gpytorch.settings.max_preconditioner_size(5): optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for _ in range(20): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) for param in likelihood.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub(1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.squeeze().item(), 1e-5)
def testClassification(self): # Init target = 0.75 model_gen_options = {"num_restarts": 1, "raw_samples": 3, "epochs": 5} lb = torch.tensor([0, 0]) ub = torch.tensor([4, 4]) m = MonotonicRejectionGP( lb=lb, ub=ub, likelihood=BernoulliLikelihood(), fixed_prior_mean=target, monotonic_idxs=[1], num_induc=2, num_samples=3, num_rejection_samples=4, ) strat = Strategy( lb=lb, ub=ub, model=m, generator=MonotonicRejectionGenerator( MonotonicMCLSE, acqf_kwargs={ "target": target, "objective": ProbitObjective() }, model_gen_options=model_gen_options, ), n_trials=1, ) # Fit train_x = torch.tensor([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]]) train_y = torch.tensor([1.0, 1.0, 0.0]) m.fit(train_x=train_x, train_y=train_y) self.assertEqual(m.inducing_points.shape, torch.Size([2, 2])) self.assertAlmostEqual(m.mean_module.constant.item(), norm.ppf(0.75)) # Predict f, var = m.predict(train_x) self.assertEqual(f.shape, torch.Size([3])) self.assertEqual(var.shape, torch.Size([3])) # Gen strat.add_data(train_x, train_y) Xopt = strat.gen() self.assertEqual(Xopt.shape, torch.Size([1, 2])) # Acquisition function acq = strat.generator._instantiate_acquisition_fn(m) self.assertEqual(acq.deriv_constraint_points.shape, torch.Size([2, 3])) self.assertTrue( torch.equal(acq.deriv_constraint_points[:, -1], 2 * torch.ones(2))) self.assertEqual(acq.target, 0.75) self.assertTrue(isinstance(acq.objective, ProbitObjective)) # Update m.update(train_x=train_x[:2, :2], train_y=train_y[:2], warmstart=True) self.assertEqual(m.train_inputs[0].shape, torch.Size([2, 3]))
def test_kissgp_classification_error(self): with gpytorch.settings.use_toeplitz(False), gpytorch.settings.max_preconditioner_size(5): model = GPClassificationModel() likelihood = BernoulliLikelihood() mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam(model.parameters(), lr=0.14455771335700404) optimizer.n_iter = 0 for _ in range(10): optimizer.zero_grad() # Get predictive output output = model(train_x) # Calc loss and backprop gradients loss = -mll(output, train_y).sum() loss.backward() optimizer.n_iter += 1 optimizer.step() for param in model.parameters(): self.assertTrue(param.grad is not None) self.assertGreater(param.grad.norm().item(), 0) # Set back to eval mode model.eval() likelihood.eval() test_preds = model(train_x).mean.ge(0.5).float() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.squeeze().item(), 0.15)
def test_kissgp_classification_error(): with gpytorch.settings.use_toeplitz(False): model = GPClassificationModel() likelihood = BernoulliLikelihood() mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, model, n_data=len(train_y)) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.Adam(model.parameters(), lr=0.15) optimizer.n_iter = 0 for i in range(25): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() likelihood.eval() test_preds = model(train_x).mean().ge(0.5).float().mul(2).sub( 1).squeeze() mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) assert (mean_abs_error.data.squeeze()[0] < 0.15)
def test_kissgp_classification_error(self): model = GPClassificationModel() likelihood = BernoulliLikelihood() mll = gpytorch.mlls.VariationalMarginalLogLikelihood( likelihood, model, n_data=len(train_y), ) # Find optimal model hyperparameters model.train() likelihood.train() optimizer = optim.SGD(model.parameters(), lr=0.1) optimizer.n_iter = 0 for _ in range(200): optimizer.zero_grad() output = model(train_x) loss = -mll(output, train_y) loss.backward() optimizer.n_iter += 1 optimizer.step() # Set back to eval mode model.eval() likelihood.eval() test_preds = (likelihood( model(train_x)).mean().ge(0.5).float().mul(2).sub(1).squeeze()) mean_abs_error = torch.mean(torch.abs(train_y - test_preds) / 2) self.assertLess(mean_abs_error.data.squeeze()[0], 1e-5)
def main(): # Initialize classification model model = GPClassificationModel().cuda() # Likelihood is Bernoulli, warm predictive mean likelihood = BernoulliLikelihood().cuda() if mode == 'Train': train_x, train_y = prepare_training_data() train(train_x, train_y, model, likelihood) #elif mode == 'Eval': print("start to test the model") predictions = eval_superpixels(model, likelihood) plot_result(predictions) else: raise Exception("No such mode")
def _set_model( self, train_x: Tensor, train_y: Tensor, model_state_dict: Optional[Dict[str, Tensor]] = None, likelihood_state_dict: Optional[Dict[str, Tensor]] = None, ) -> None: # Augment the data with the derivative index train_x_aug = self._augment_with_deriv_index(train_x, 0) inducing_points_aug = self._augment_with_deriv_index(self.inducing_points, 0) # Create and fit the model scales = self.bounds_[1, :] - self.bounds_[0, :] fixed_prior_mean = self.fixed_prior_mean if fixed_prior_mean is not None and self.likelihood == "probit-bernoulli": fixed_prior_mean = norm.ppf(fixed_prior_mean) self.model = MixedDerivativeVariationalGP( train_x=train_x_aug, train_y=train_y.squeeze(), inducing_points=inducing_points_aug, scales=scales, fixed_prior_mean=fixed_prior_mean, covar_module=self.covar_module, mean_module=self.mean_module, ) self.model_likelihood = ( BernoulliLikelihood() if self.likelihood == "probit-bernoulli" else GaussianLikelihood() ) # Set model parameters if model_state_dict is not None: self.model.load_state_dict(model_state_dict) if likelihood_state_dict is not None: self.model_likelihood.load_state_dict(likelihood_state_dict) # Fit! mll = VariationalELBO( likelihood=self.model_likelihood, model=self.model, num_data=train_y.numel() ) mll = fit_gpytorch_model(mll)
def testMixedDerivativeVariationalGP(self): train_x = torch.cat( (torch.tensor([1.0, 2.0, 3.0, 4.0]).unsqueeze(1), torch.zeros( 4, 1)), dim=1) train_y = torch.tensor([1.0, 2.0, 3.0, 4.0]) m = MixedDerivativeVariationalGP( train_x=train_x, train_y=train_y, inducing_points=train_x, fixed_prior_mean=0.5, ) self.assertEqual(m.mean_module.constant.item(), 0.5) self.assertEqual(m.covar_module.base_kernel.raw_lengthscale.shape, torch.Size([1, 1])) mll = VariationalELBO(likelihood=BernoulliLikelihood(), model=m, num_data=train_y.numel()) mll = fit_gpytorch_model(mll) test_x = torch.tensor([[1.0, 0], [3.0, 1.0]]) m(test_x)
def __init__( self, monotonic_idxs: Sequence[int], lb: Union[np.ndarray, torch.Tensor], ub: Union[np.ndarray, torch.Tensor], dim: Optional[int] = None, mean_module: Optional[Mean] = None, covar_module: Optional[Kernel] = None, likelihood: Optional[Likelihood] = None, fixed_prior_mean: Optional[float] = None, num_induc: int = 25, num_samples: int = 250, num_rejection_samples: int = 5000, ) -> None: """Initialize MonotonicRejectionGP. Args: likelihood (str): Link function and likelihood. Can be 'probit-bernoulli' or 'identity-gaussian'. monotonic_idxs (List[int]): List of which columns of x should be given monotonicity constraints. fixed_prior_mean (Optional[float], optional): Fixed prior mean. If classification, should be the prior classification probability (not the latent function value). Defaults to None. covar_module (Optional[Kernel], optional): Covariance kernel to use (default: scaled RBF). mean_module (Optional[Mean], optional): Mean module to use (default: constant mean). num_induc (int, optional): Number of inducing points for variational GP.]. Defaults to 25. num_samples (int, optional): Number of samples for estimating posterior on preDict or acquisition function evaluation. Defaults to 250. num_rejection_samples (int, optional): Number of samples used for rejection sampling. Defaults to 4096. acqf (MonotonicMCAcquisition, optional): Acquisition function to use for querying points. Defaults to MonotonicMCLSE. objective (Optional[MCAcquisitionObjective], optional): Transformation of GP to apply before computing acquisition function. Defaults to identity transform for gaussian likelihood, probit transform for probit-bernoulli. extra_acqf_args (Optional[Dict[str, object]], optional): Additional arguments to pass into the acquisition function. Defaults to None. """ self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim) if likelihood is None: likelihood = BernoulliLikelihood() self.inducing_size = num_induc inducing_points = self._select_inducing_points(method="sobol") inducing_points_aug = self._augment_with_deriv_index( inducing_points, 0) variational_distribution = CholeskyVariationalDistribution( inducing_points_aug.size(0)) variational_strategy = VariationalStrategy( model=self, inducing_points=inducing_points_aug, variational_distribution=variational_distribution, learn_inducing_locations=False, ) if mean_module is None: mean_module = ConstantMeanPartialObsGrad() if fixed_prior_mean is not None: if isinstance(likelihood, BernoulliLikelihood): fixed_prior_mean = norm.ppf(fixed_prior_mean) mean_module.constant.requires_grad_(False) mean_module.constant.copy_(torch.tensor([fixed_prior_mean])) if covar_module is None: ls_prior = gpytorch.priors.GammaPrior(concentration=4.6, rate=1.0, transform=lambda x: 1 / x) ls_prior_mode = ls_prior.rate / (ls_prior.concentration + 1) ls_constraint = gpytorch.constraints.Positive( transform=None, initial_value=ls_prior_mode) covar_module = gpytorch.kernels.ScaleKernel( RBFKernelPartialObsGrad( lengthscale_prior=ls_prior, lengthscale_constraint=ls_constraint, ard_num_dims=dim, ), outputscale_prior=gpytorch.priors.SmoothedBoxPrior(a=1, b=4), ) super().__init__(variational_strategy) self.bounds_ = torch.stack([self.lb, self.ub]) self.mean_module = mean_module self.covar_module = covar_module self.likelihood = likelihood self.num_induc = num_induc self.monotonic_idxs = monotonic_idxs self.num_samples = num_samples self.num_rejection_samples = num_rejection_samples self.fixed_prior_mean = fixed_prior_mean self.inducing_points = inducing_points
super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel()) def forward(self, x): x_mean = self.mean(x) x_covar = self.covar(x) return MultivariateNormal(x_mean, x_covar) x_train = torch.linspace(0, 1, 10) y_train = torch.sign(torch.cos(x_train * (4 * math.pi))).add(1).div(2) # Initialize model and likelihood model = GaussianProcessClassification(x_train) likelihood = BernoulliLikelihood() # Find optimal model hyperparameters model.train() likelihood.train() # Use the adam optimizer optimizer = torch.optim.Adam(model.parameters(), lr=0.1) # "Loss" for GPs - the marginal log likelihood # num_data refers to the number of training datapoints mll = gpytorch.mlls.VariationalELBO(likelihood, model, y_train.numel()) n_iterations = 100 for i in range(n_iterations): # Zero backpropped gradients from previous iteration
class GPClassifier(ApproximateGP): _num_outputs = 1 # to inform GPyTorchModel API def __init__(self, dim: int, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None: variational_distribution = CholeskyVariationalDistribution( train_X.size(0)) variational_strategy = UnwhitenedVariationalStrategy( self, train_X, variational_distribution, learn_inducing_locations=False) super(GPClassifier, self).__init__(variational_strategy) self.dim = dim # pdb.set_trace() if len(train_X) == 0: # No data case train_X = None train_Y = None self.train_inputs = None self.train_targets = None self.train_x = None self.train_yl = None else: # Error checking: assert train_Y.dim() == 1, "train_Y is required to be 1D" assert train_X.shape[ -1] == self.dim, "Input dimensions do not agree ... (!)" self.train_inputs = [train_X.clone()] self.train_targets = train_Y.clone() self.train_x = train_X.clone() self.train_yl = torch.cat( [torch.zeros((len(train_Y)), 1), train_Y.view(-1, 1)], dim=1) print("\n") logger.info("### Initializing GP classifier for constraint g(x) ###") # Likelihood: noise_std = options.hyperpars.noise_std.value self.likelihood = BernoulliLikelihood() # For compatibility: self.threshold = torch.tensor([float("Inf")]) # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF hyperpriors = dict( lengthscales=eval(options.hyperpars.lenthscales.prior), outputscale=eval(options.hyperpars.outputscale.prior)) # Index hyperparameters: self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)), outputscale=[self.dim]) self.dim_hyperpars = sum( [len(val) for val in self.idx_hyperpars.values()]) # Get bounds: self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors) logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds)) # Initialize prior mean: # self.mean_module = ConstantMean() self.mean_module = ZeroMean() # Initialize covariance function: base_kernel = MaternKernel(nu=2.5, ard_num_dims=self.dim, lengthscale=0.1 * torch.ones(self.dim)) self.covar_module = ScaleKernel(base_kernel=base_kernel) self.disp_info_scipy_opti = True # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors): hyperpars_sample = self._sample_hyperparameters_within_bounds( Nsamples=1).squeeze(0) self.covar_module.outputscale = hyperpars_sample[ self.idx_hyperpars["outputscale"]] self.covar_module.base_kernel.lengthscale = hyperpars_sample[ self.idx_hyperpars["lengthscales"]] self.noise_std = options.hyperpars.noise_std.value # The evaluation noise is fixed, and given by the user self.Nrestarts = options.hyperpars.optimization.Nrestarts self._update_hyperparameters() self.eval() self.likelihood.eval() # pdb.set_trace() def set_hyperparameters(self, lengthscale, outputscale, noise): self.covar_module.base_kernel.lengthscale = lengthscale self.covar_module.outputscale = outputscale # self.likelihood.noise[:] = noise # self.mean_module.constant[:] = 0.0 # Assume zero mean def display_hyperparameters(self): logger.info(" Re-optimized hyperparameters") logger.info(" ----------------------------") logger.info(" Outputscale (stddev) | {0:2.4f}".format( self.covar_module.outputscale.item())) logger.info(" Lengthscale(s) | " + str(self.covar_module.base_kernel.lengthscale.detach().cpu( ).numpy().flatten())) def logging(self): log_out = dict() log_out[ "lengthscale"] = self.covar_module.base_kernel.lengthscale.detach( ).cpu().numpy() log_out["outputscale"] = self.covar_module.outputscale.item() # log_out["noise"] = self.likelihood.noise.detach().cpu().numpy() log_out[ "train_inputs"] = None if self.train_inputs is None else self.train_inputs[ 0].detach().cpu().numpy() log_out[ "train_targets"] = None if self.train_targets is None else self.train_targets.detach( ).cpu().numpy() return log_out def _update_hyperparameters(self): # Find optimal model hyperparameters self.train() self.likelihood.train() # Use the adam optimizer optimizer = Adam(self.parameters(), lr=0.1) # "Loss" for GPs - the marginal log likelihood # num_data refers to the number of training datapoints mll = VariationalELBO(self.likelihood, self, self.train_targets.numel()) training_iterations = 50 for i in range(training_iterations): # Zero backpropped gradients from previous iteration optimizer.zero_grad() # Get predictive output output = self(self.train_inputs[0]) # Calc loss and backprop gradients loss = -mll(output, self.train_targets) loss.backward() # print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item())) optimizer.step() def _optimize_acqui_use_restarts_individually(self): # Get initial random restart points: logger.info(" Generating random restarts ...") options = { "maxiter": 200, "ftol": 1e-9, "method": "L-BFGS-B", "iprint": 2, "maxls": 20, "disp": self.disp_info_scipy_opti } bounds = torch.tensor(self.hyperpars_bounds, device=device, dtype=dtype) initial_conditions = gen_batch_initial_conditions( acq_function=self.mll_objective, bounds=bounds, q=1, num_restarts=self.Nrestarts, raw_samples=500, options=options) logger.info( " Optimizing loss function with {0:d} restarts ...".format( self.Nrestarts)) new_hyperpars_many = torch.zeros(size=(self.Nrestarts, 1, self.dim_hyperpars)) new_hyperpars_loss_many = torch.zeros(size=(self.Nrestarts, )) new_hyperpars, _ = self.opti_hyperpars.run_optimization( x_restarts=initial_conditions.view(self.Nrestarts, self.dim_hyperpars)) logger.info(" Done!") return new_hyperpars def _get_hyperparameters_bounds(self, hyperpriors): # Compute the domain for hyperparameter search by truncating the support of the corresponding hyperprior at the .75 quantile # The lower bound is necessary for numerical stability, i.e., when computing logpdf() in classireg.models.mll_gpcr.log_marginal() # All values of the dictionary are defined as double lists hyperpriors_support = dict( lengthscales=[[0.001] * self.dim, [hyperpriors["lengthscales"].ppf(.75)] * self.dim], outputscale=[[0.001], [hyperpriors["outputscale"].ppf(.75)]]) # Automatically get the bounds from the dictionary: hyperpars_lb = [] hyperpars_ub = [] for hyperpar in hyperpriors_support.values(): hyperpars_lb += hyperpar[0] hyperpars_ub += hyperpar[1] hyperpars_bounds = [hyperpars_lb, hyperpars_ub] return hyperpars_bounds def _sample_hyperparameters_within_bounds(self, Nsamples): # Get a sample from the prior for initialization: new_seed = torch.randint(low=0, high=100000, size=(1, )).item( ) # Top-level seeds have an impact on this one herein; contrary to the case new_seed = None hyperpars_restarts = draw_sobol_samples(bounds=torch.tensor( self.hyperpars_bounds), n=Nsamples, q=1, seed=new_seed) hyperpars_restarts = hyperpars_restarts.squeeze( 1) # Remove batch dimension [n q dim] -> [n dim] return hyperpars_restarts def forward(self, x): # A `num_restarts x q x d` tensor of initial conditions. mean_x = self.mean_module(x) covar_x = self.covar_module(x) mvn = MultivariateNormal(mean_x, covar_x) return mvn def plot(self, axes=None, block=False, Ndiv=100, legend=True, title="GPgrad", plotting=True, plotCDF=False, clear_axes=False, Nsamples=None, ylabel=None, ylim=None, pause=None, showtickslabels_x=True, xlabel=None, labelsize=None, showtickslabels=None, showticks=None, linewidth=None, color=None, prob=False): ''' This function hardcodes the plotting limits between zero and one for now ''' if plotting == False or self.dim > 1: return pp = PlotProbability() xpred_vec = torch.linspace(0.0, 1.0, Ndiv)[:, None] xpred_vec = xpred_vec.unsqueeze( 0) # Ndiv batches of [q=1 x self.dim] dimensions each mvn_cons = self(xpred_vec) pred_lik = self.likelihood(mvn_cons) mean_vec = pred_lik.mean # Get upper and lower confidence bounds (2 standard deviations from the mean): var_vec = pred_lik.variance std_vec = var_vec.sqrt() lower_ci, upper_ci = mean_vec - std_vec, mean_vec + std_vec if self.dim == 1: axes = pp.plot_GP_1D( xpred_vec=xpred_vec.squeeze().cpu().numpy(), fpred_mode_vec=mean_vec.squeeze().detach().cpu().numpy(), fpred_quan_minus=lower_ci.squeeze().detach().cpu().numpy(), fpred_quan_plus=upper_ci.squeeze().detach().cpu().numpy(), X_sta=None if self.train_inputs is None else self.train_inputs[0].detach().cpu().numpy(), Y_sta=None if self.train_targets is None else self.train_targets.detach().cpu().numpy(), title=title, axes=axes, block=block, legend=legend, clear_axes=True, xlabel=xlabel, ylabel=ylabel, xlim=np.array([0., 1.]), ylim=ylim, labelsize="x-large", legend_loc="best", colormap="paper", showtickslabels_x=showtickslabels_x) if Nsamples is not None: f_sample = posterior.sample( sample_shape=torch.Size([Nsamples])) for k in range(Nsamples): axes.plot(xpred_vec.squeeze().detach().cpu().numpy(), f_sample[k, 0, :, 0], linestyle="--", linewidth=1.0, color="sienna") elif self.dim == 2: pass plt.show(block=block) if pause is not None: plt.pause(pause) return axes
def __init__(self, dim: int, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None: variational_distribution = CholeskyVariationalDistribution( train_X.size(0)) variational_strategy = UnwhitenedVariationalStrategy( self, train_X, variational_distribution, learn_inducing_locations=False) super(GPClassifier, self).__init__(variational_strategy) self.dim = dim # pdb.set_trace() if len(train_X) == 0: # No data case train_X = None train_Y = None self.train_inputs = None self.train_targets = None self.train_x = None self.train_yl = None else: # Error checking: assert train_Y.dim() == 1, "train_Y is required to be 1D" assert train_X.shape[ -1] == self.dim, "Input dimensions do not agree ... (!)" self.train_inputs = [train_X.clone()] self.train_targets = train_Y.clone() self.train_x = train_X.clone() self.train_yl = torch.cat( [torch.zeros((len(train_Y)), 1), train_Y.view(-1, 1)], dim=1) print("\n") logger.info("### Initializing GP classifier for constraint g(x) ###") # Likelihood: noise_std = options.hyperpars.noise_std.value self.likelihood = BernoulliLikelihood() # For compatibility: self.threshold = torch.tensor([float("Inf")]) # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF hyperpriors = dict( lengthscales=eval(options.hyperpars.lenthscales.prior), outputscale=eval(options.hyperpars.outputscale.prior)) # Index hyperparameters: self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)), outputscale=[self.dim]) self.dim_hyperpars = sum( [len(val) for val in self.idx_hyperpars.values()]) # Get bounds: self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors) logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds)) # Initialize prior mean: # self.mean_module = ConstantMean() self.mean_module = ZeroMean() # Initialize covariance function: base_kernel = MaternKernel(nu=2.5, ard_num_dims=self.dim, lengthscale=0.1 * torch.ones(self.dim)) self.covar_module = ScaleKernel(base_kernel=base_kernel) self.disp_info_scipy_opti = True # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors): hyperpars_sample = self._sample_hyperparameters_within_bounds( Nsamples=1).squeeze(0) self.covar_module.outputscale = hyperpars_sample[ self.idx_hyperpars["outputscale"]] self.covar_module.base_kernel.lengthscale = hyperpars_sample[ self.idx_hyperpars["lengthscales"]] self.noise_std = options.hyperpars.noise_std.value # The evaluation noise is fixed, and given by the user self.Nrestarts = options.hyperpars.optimization.Nrestarts self._update_hyperparameters() self.eval() self.likelihood.eval()
def __init__(self): super(GPClassificationModel, self).__init__(BernoulliLikelihood()) self.latent_function = LatentFunction()
def create_likelihood(self): return BernoulliLikelihood()
def __init__( self, lb: Union[np.ndarray, torch.Tensor], ub: Union[np.ndarray, torch.Tensor], dim: Optional[int] = None, mean_module: Optional[gpytorch.means.Mean] = None, covar_module: Optional[gpytorch.kernels.Kernel] = None, likelihood: Optional[Likelihood] = None, inducing_size: int = 100, max_fit_time: Optional[float] = None, inducing_point_method: str = "auto", ): """Initialize the GP Classification model Args: lb (Union[numpy.ndarray, torch.Tensor]): Lower bounds of the parameters. ub (Union[numpy.ndarray, torch.Tensor]): Upper bounds of the parameters. dim (int, optional): The number of dimensions in the parameter space. If None, it is inferred from the size of lb and ub. mean_module (gpytorch.means.Mean, optional): GP mean class. Defaults to a constant with a normal prior. covar_module (gpytorch.kernels.Kernel, optional): GP covariance kernel class. Defaults to scaled RBF with a gamma prior. likelihood (gpytorch.likelihood.Likelihood, optional): The likelihood function to use. If None defaults to Bernouli likelihood. inducing_size (int): Number of inducing points. Defaults to 100. max_fit_time (float, optional): The maximum amount of time, in seconds, to spend fitting the model. If None, there is no limit to the fitting time. inducing_point_method (string): The method to use to select the inducing points. Defaults to "auto". If "sobol", a number of Sobol points equal to inducing_size will be selected. If "pivoted_chol", selects points based on the pivoted Cholesky heuristic. If "kmeans++", selects points by performing kmeans++ clustering on the training data. If "auto", tries to determine the best method automatically. """ self.lb, self.ub, self.dim = _process_bounds(lb, ub, dim) self.max_fit_time = max_fit_time self.inducing_size = inducing_size if likelihood is None: likelihood = BernoulliLikelihood() self.max_fit_time = max_fit_time self.inducing_point_method = inducing_point_method # initialize to sobol before we have data inducing_points = self._select_inducing_points(method="sobol") variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0), batch_shape=torch.Size([self._batch_size])) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=False, ) super().__init__(variational_strategy) if mean_module is None or covar_module is None: config = Config( config_dict={ "default_mean_covar_factory": { "lb": str(self.lb.tolist()), "ub": str(self.ub.tolist()), } }) # type: ignore default_mean, default_covar = default_mean_covar_factory(config) self.mean_module = mean_module or default_mean self.covar_module = covar_module or default_covar self.likelihood = likelihood self._fresh_state_dict = deepcopy(self.state_dict()) self._fresh_likelihood_dict = deepcopy(self.likelihood.state_dict())