def create_mean(self): return MultitaskMean([ ConstantMean(batch_shape=torch.Size([2, 3])), ZeroMean(), ZeroMean() ], num_tasks=3)
def setUp(self): self.mean = MultitaskMean( [ConstantMean(), ZeroMean(), ZeroMean(), ConstantMean()], n_tasks=4) self.mean.base_means[0].constant.data.fill_(5) self.mean.base_means[3].constant.data.fill_(7)
def __init__(self, stem, init_x, num_inducing, lr, streaming=False, beta=1.0, learn_inducing_locations=True, num_update_steps=1, **kwargs): super().__init__() likelihood = BernoulliLikelihood() inducing_points = torch.empty(num_inducing, stem.output_dim) inducing_points.uniform_(-1, 1) mean_module = ZeroMean() covar_module = ScaleKernel(RBFKernel(ard_num_dims=stem.output_dim)) self.gp = VariationalGPModel( inducing_points, mean_module, covar_module, streaming, likelihood, beta=beta, learn_inducing_locations=learn_inducing_locations) self.mll = None self.stem = stem self.optimizer = torch.optim.Adam(self.parameters(), lr=lr) self.num_update_steps = num_update_steps self._raw_inputs = [init_x]
def _parse_mean(input_size: int, dim_outputs: int = 1, kind: str = 'zero') -> Mean: """Parse Mean string. Parameters ---------- input_size: int. Size of input to GP (needed for linear mean functions). kind: str. String that identifies mean function. Returns ------- mean: Mean. Mean function. """ if kind.lower() == 'constant': mean = ConstantMean() elif kind.lower() == 'zero': mean = ZeroMean() elif kind.lower() == 'linear': mean = LinearMean(input_size=input_size, batch_shape=torch.Size([dim_outputs])) else: raise NotImplementedError( 'Mean function {} not implemented.'.format(kind)) return mean
def setUp(self, batched=False, learnable=False): torch.set_default_tensor_type(torch.DoubleTensor) torch.random.manual_seed(10) train_x = torch.rand(10, 2) train_y = torch.sin(2 * train_x[:, 0] + 3 * train_x[:, 1]).unsqueeze(-1) train_y_var = 0.1 * torch.ones_like(train_y) if batched: train_y = torch.cat( ( train_y, train_y + 0.3 * torch.randn_like(train_y), train_y + 0.3 * torch.randn_like(train_y), ), dim=1 ) train_y_var = train_y_var.repeat(1, 3) model = FixedNoiseOnlineSKIGP( train_inputs=train_x, train_targets=train_y, train_noise_term=train_y_var, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), grid_size=5, learn_additional_noise=learnable ) equivalent_model = SingleTaskGP( train_X=train_x, train_Y=train_y, likelihood=FixedNoiseGaussianLikelihood(train_y_var.t(), learn_additional_noise=learnable), covar_module = deepcopy(model.covar_module) ) equivalent_model.mean_module = ZeroMean() return model, equivalent_model, train_x, train_y
def __init__(self, input, inducing_size, device='cpu'): if device == 'gpu' and torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') if input.ndim == 1: self.input_size = 1 else: self.input_size = input.shape[-1] self.inducing_size = inducing_size _likelihood = GaussianLikelihood() super(SparseGPRegressor, self).__init__(train_inputs=None, train_targets=None, likelihood=_likelihood) self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) inducing_idx = np.random.choice(len(input), inducing_size, replace=False) self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=input[inducing_idx, ...], likelihood=_likelihood) self.input_trans = None self.target_trans = None
def __init__(self, train_x, train_y, likelihood, Z_init): # Locations Z corresponding to u, they can be randomly initialized or # regularly placed. self.inducing_inputs = Z_init self.num_inducing = len(Z_init) self.n = len(train_y) self.data_dim = train_x.shape[1] # Sparse Variational Formulation q_u = CholeskyVariationalDistribution(self.num_inducing) q_f = VariationalStrategy(self, self.inducing_inputs, q_u, learn_inducing_locations=True) super(BayesianStochasticVariationalGP, self).__init__(q_f) self.likelihood = likelihood self.train_x = train_x self.train_y = train_y self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) # Hyperparameter Variational distribution hyper_prior_mean = torch.Tensor([0]) hyper_dim = len(hyper_prior_mean) log_hyper_prior = NormalPrior(hyper_prior_mean, torch.ones_like(hyper_prior_mean)) self.log_theta = LogHyperVariationalDist(hyper_dim, log_hyper_prior, self.n, self.data_dim)
def __init__(self, train_x, train_y, likelihood): super(GPRegressionModel, self).__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel(ard_num_dims=2)) self.covar_module = AdditiveStructureKernel(GridInterpolationKernel( self.base_covar_module, grid_size=100, num_dims=1), num_dims=2)
def init(): r_lik = GaussianLikelihood() r_kernel = GridInterpolationKernelWithFantasy( RBFKernel(), grid_size=self.grid_size, grid_bounds=[(-4.0, 14.0)]).double() r_model = RegularExactGP(self.xs, self.labels, r_lik, r_kernel, ZeroMean()) lik = GaussianLikelihood() kernel = GridInterpolationKernelWithFantasy( RBFKernel(), grid_size=self.grid_size, grid_bounds=[(-4.0, 14.0)]).double() model = OnlineWoodburyGP(self.xs, self.labels, lik, kernel, ZeroMean()) return r_model, model
def __init__(self, train_x, train_y, likelihood): super(GPRegressionModel, self).__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel( RBFKernel(log_lengthscale_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1, log_transform=True)) ) self.covar_module = AdditiveGridInterpolationKernel( self.base_covar_module, grid_size=100, grid_bounds=[(-0.5, 1.5)], n_components=2 )
def __init__(self, kernel, depth, dim=1, collect=True): super().__init__() self.depth = depth self.dim = dim self.covar_module = kernel self.mean_module = ZeroMean() self.collect = collect if self.collect: self.collector = Collector(depth)
def __init__(self, train_x, train_y, likelihood, outputscale=1.0): super().__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.kernel = ScaleKernel( MaternKernel(nu=2.5, # ard_num_dims=train_x.shape[-1] )) self.kernel.outputscale = outputscale
def __init__(self, train_x, train_y, likelihood): super(GPRegressionModel, self).__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel( RBFKernel(ard_num_dims=2, log_lengthscale_prior=SmoothedBoxPrior( exp(-3), exp(3), sigma=0.1, log_transform=True))) self.covar_module = AdditiveStructureKernel(GridInterpolationKernel( self.base_covar_module, grid_size=100, num_dims=2), num_dims=2)
def __init__(self, train_x, train_y, likelihood, num_tasks, rank=1, covar_module=None): super(HadamardMTGPModel, self).__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.num_dims = train_x[0].size(-1) self._init_covar_module(covar_module) self.task_covar_module = IndexKernel(num_tasks=num_tasks, rank=rank)
def __init__(self, train_x, train_y, likelihood, Z_init): super(BayesianSparseGPR_HMC, self).__init__(train_x, train_y, likelihood) self.train_x = train_x self.train_y = train_y self.inducing_points = Z_init self.num_inducing = len(Z_init) self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=Z_init, likelihood=likelihood)
def __init__(self, train_x, train_y, likelihood, Z_init): """The sparse GP class for regression with the collapsed bound. q*(u) is implicit. """ super(SparseGPR, self).__init__(train_x, train_y, likelihood) self.train_x = train_x self.train_y = train_y self.inducing_points = Z_init self.num_inducing = len(Z_init) self.likelihood = likelihood self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = InducingPointKernel(self.base_covar_module, inducing_points=Z_init, likelihood=self.likelihood)
def __init__(self, train_x, train_y, likelihood, outputscale=10, transform_input_fn=None): super().__init__(train_x, train_y, likelihood) self.mean_module = ZeroMean() self.kernel = ScaleKernel(MaternKernel(nu=2.5)) self.likelihood.noise_covar.noise = 1e-8 self.kernel.outputscale = outputscale self.transform_input_fn = transform_input_fn
def __init__(self, input_size, device='cpu'): if device == 'gpu' and torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') self.input_size = input_size _likelihood = GaussianLikelihood() super(GPRegressor, self).__init__(train_inputs=None, train_targets=None, likelihood=_likelihood) self.mean_module = ZeroMean() self.covar_module = ScaleKernel(RBFKernel()) self.input_trans = None self.target_trans = None
def __init__(self, input_size, target_size, device='cpu'): if device == 'gpu' and torch.cuda.is_available(): self.device = torch.device('cuda:0') else: self.device = torch.device('cpu') self.input_size = input_size self.target_size = target_size _likelihood = MultitaskGaussianLikelihood(num_tasks=self.target_size) super(MultiTaskGPRegressor, self).__init__(train_inputs=None, train_targets=None, likelihood=_likelihood) self.mean_module = MultitaskMean(ZeroMean(), num_tasks=self.target_size) self.covar_module = MultitaskKernel(RBFKernel(), num_tasks=self.target_size, rank=1) self.input_trans = None self.target_trans = None
def __init__(self, train_x, train_y, likelihood, Z_init): # Locations Z corresponding to u, they can be randomly initialized or # regularly placed. self.inducing_inputs = Z_init self.num_inducing = len(Z_init) # Sparse Variational Formulation q_u = CholeskyVariationalDistribution(self.num_inducing) q_f = VariationalStrategy(self, self.inducing_inputs, q_u, learn_inducing_locations=True) super(StochasticVariationalGP, self).__init__(q_f) self.likelihood = likelihood self.train_x = train_x self.train_y = train_y self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel())
def __init__(self, input_dims, output_dims, n_inducing=50, mean=None, kernel=None, collapsed=False): # Cast in case numpy-type self.input_dims = int(input_dims) self.output_dims = int(output_dims) n_inducing = int(n_inducing) if output_dims is None or output_dims == 1: batch_shape = torch.Size([]) else: batch_shape = torch.Size([self.output_dims]) x_u = torch.randn(n_inducing, self.input_dims) if collapsed: assert batch_shape == torch.Size([]) strategy = CollapsedStrategy(self, n_inducing) else: variational_dist = CholeskyVariationalDistribution( n_inducing, batch_shape=batch_shape) strategy = UnwhitenedVariationalStrategy( self, x_u, variational_dist, learn_inducing_locations=True) super().__init__(strategy) if mean is None: mean = ZeroMean() self.mean = mean if kernel is None: rbf = RBFKernel(ard_num_dims=input_dims) kernel = ScaleKernel(rbf) self.kernel = kernel self.prior_point_process = SquaredReducingPointProcess(n_inducing) self.variational_point_process = PoissonPointProcess(n_inducing)
def __init__(self, train_x, train_y, likelihood, var=None, latent=None, kernel_params=None, latent_params=None): super(ExactGPModel, self).__init__(train_x, train_y, likelihood) if latent_params is None: latent_params = {'input_dim': train_x.size(-1)} self._set_latent_function(latent, latent_params) self.mean_module = ZeroMean() ard_num_dims = self.latent_func.embed_dim if self.latent_func.embed_dim is not None else train_x.size( -1) kernel = kernel_params['type'] if kernel_params is not None else 'rbf' if kernel is None or kernel == 'rbf': self.kernel_covar_module = ScaleKernel( RBFKernel(ard_num_dims=ard_num_dims)) elif kernel == 'matern': self.kernel_covar_module = ScaleKernel( MaternKernel(nu=1.5, ard_num_dims=ard_num_dims)) # without scale kernel: very poor performance # matern 0.5, 1.5 and 2.5 all have similar performance elif kernel == 'spectral_mixture': self.kernel_covar_module = SpectralMixtureKernel( num_mixtures=kernel_params['n_mixtures'], ard_num_dims=train_x.size(-1)) self.kernel_covar_module.initialize_from_data(train_x, train_y) else: raise NotImplementedError # set covariance module if var is not None: self.noise_covar_module = WhiteNoiseKernel(var) self.covar_module = self.kernel_covar_module + self.noise_covar_module else: self.covar_module = self.kernel_covar_module
def __init__( self, stem, init_x, init_y, num_inducing, lr, streaming=False, prior_beta=1., online_beta=1., learn_inducing_locations=True, num_update_steps=1, covar_module=None, inducing_points=None, **kwargs ): super().__init__() assert init_y.ndimension() == 2 target_dim = init_y.size(-1) batch_shape = target_dim if target_dim > 1 else [] likelihood = GaussianLikelihood(batch_shape=batch_shape) if inducing_points is None: inducing_points = torch.empty(num_inducing, stem.output_dim) inducing_points.uniform_(-1, 1) mean_module = ZeroMean() if covar_module is None: covar_module = ScaleKernel( RBFKernel(ard_num_dims=stem.output_dim, batch_shape=batch_shape), batch_shape=batch_shape ) self.gp = VariationalGPModel(inducing_points, mean_module, covar_module, streaming, likelihood, beta=online_beta, learn_inducing_locations=learn_inducing_locations) self.mll = None self.stem = stem self.optimizer = torch.optim.Adam(self.param_groups(lr)) self.num_update_steps = num_update_steps self._raw_inputs = [init_x] self.target_dim = target_dim self._prior_beta = prior_beta
def _setUp(self): self.xs = torch.tensor([2.0, 3.0, 4.0, 1.0, 7.0], dtype=torch.double) # self.xs = torch.rand(100).double() * 14 - 2 self.grid_size = 20 self.kernel = GridInterpolationKernelWithFantasy( RBFKernel(), grid_size=self.grid_size, grid_bounds=[(-4.0, 14.0)]).double() # self.lengthscale = np.random.rand()*10 + 0.1 # self.noise_var = np.random.rand()*10 + 0.1 self.lengthscale = 10.0 self.noise_var = 0.01 self.lr = 0.1 self.mean_module = ZeroMean() self.labels = torch.sin(self.xs) + torch.tensor( [0.1, 0.2, -0.1, -0.2, -0.2], dtype=torch.double) # self.labels = torch.sin(self.xs) + torch.randn_like(self.xs)*0.1 self.test_points = torch.tensor([5.0, 8.0], dtype=torch.double) self.new_points = torch.tensor([2.4, 4.7], dtype=torch.double) self.new_targets = torch.sin(self.new_points) + torch.tensor( [0.1, -0.15], dtype=torch.double) self.points_sequence = [ self.xs, self.new_points, torch.tensor([2.3]), torch.tensor([4.1]), torch.tensor([4.3]), ] self.targets_sequence = [ self.labels, self.new_targets, torch.sin(torch.tensor([2.3])), torch.sin(torch.tensor([4.1])) + 1, torch.sin(torch.tensor([4.3])), ]
def __init__(self, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None: # Error checking: assert train_Y.dim() == 1, "train_Y is required to be 1D" self._validate_tensor_args(X=train_X, Y=train_Y[:,None]) # Only for this function, train_Y must be 2D (this must be a bug in botorch) # Dimensionality of the input space: self.dim = train_X.shape[-1] # Model identity: self.iden = "GP_model_{0:s}".format(which_type) # Likelihood: noise_std = options["noise_std_obj"] lik = FixedNoiseGaussianLikelihood(noise=torch.full_like(train_Y, noise_std**2)) # Initialize parent class: super().__init__(train_X, train_Y, lik) # Obtain hyperprior for lengthscale and outputscale: # NOTE: The mean (zero) and the model noise are fixed lengthscale_prior, outputscale_prior = extract_prior(options,which_type) # Initialize prior mean: # self.mean_module = ConstantMean() self.mean_module = ZeroMean() # Initialize covariance function: # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2)) self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=outputscale_prior) # Make sure we're on the right device/dtype self.to(train_X) # Instantiate the gradient model: self.model_grad = GPmodelWithGrad(dim=self.dim)
def __init__(self, dim: int, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None: self.dim = dim if len(train_Y) == 0: # No data case train_X = None train_Y = None else: # Error checking: assert train_Y.dim() == 1, "train_Y is required to be 1D" self._validate_tensor_args( X=train_X, Y=train_Y[:, None] ) # Only for this function, train_Y must be 2D (this must be a bug in botorch) print("\n") logger.info("### Initializing GP model for objective f(x) ###") # Likelihood: noise_std = options.hyperpars.noise_std.value if train_Y is not None: lik = FixedNoiseGaussianLikelihood( noise=torch.full_like(train_Y, noise_std**2)) else: lik = FixedNoiseGaussianLikelihood( noise=torch.tensor([noise_std**2], device=device, dtype=dtype)) # Initialize parent class: super().__init__(train_X, train_Y, lik) # # Obtain hyperprior for lengthscale and outputscale: # # NOTE: The mean (zero) and the model noise are fixed # lengthscale_prior, outputscale_prior = extract_prior(options.hyperpriors) # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF hyperpriors = dict( lengthscales=eval(options.hyperpars.lenthscales.prior), outputscale=eval(options.hyperpars.outputscale.prior)) # Index hyperparameters: self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)), outputscale=[self.dim]) self.dim_hyperpars = sum( [len(val) for val in self.idx_hyperpars.values()]) # Get bounds: self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors) logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds)) # Initialize prior mean: # self.mean_module = ConstantMean() self.mean_module = ZeroMean() # Initialize covariance function: # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original # base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2)) base_kernel = MaternKernel(nu=2.5, ard_num_dims=self.dim, lengthscale=0.1 * torch.ones(self.dim)) self.covar_module = ScaleKernel(base_kernel=base_kernel) self.disp_info_scipy_opti = True # self.method = "L-BFGS-B" self.method = "LN_BOBYQA" # self.method = 'trust-constr' # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors): hyperpars_sample = self._sample_hyperparameters_within_bounds( Nsamples=1).squeeze(0) self.covar_module.outputscale = hyperpars_sample[ self.idx_hyperpars["outputscale"]] self.covar_module.base_kernel.lengthscale = hyperpars_sample[ self.idx_hyperpars["lengthscales"]] self.noise_std = options.hyperpars.noise_std.value # The evaluation noise is fixed, and given by the user # Initialize marginal log likelihood for the GPCR model. # mll_objective is callable # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine self.mll_objective = MLLGP(model_gp=self, likelihood_gp=self.likelihood, hyperpriors=hyperpriors) # Define nlopt optimizer: self.opti_hyperpars = OptimizationNonLinear( dim=self.dim_hyperpars, fun_obj=self.mll_objective, algo_str=self.method, tol_x=1e-4, Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals, bounds=self.hyperpars_bounds, what2optimize_str="GP hyperparameters") # Make sure we're on the right device/dtype if train_Y is not None: self.to(train_X) self.Nrestarts = options.hyperpars.optimization.Nrestarts self._update_hyperparameters() self.eval()
def __init__(self, dim: int, train_x: Tensor, train_yl: Tensor, options): """ train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise. """ # Initialize parent class: super().__init__( ) # This is needed because torch.nn.Module, which is parent of GPyTorchModel, needs it print("\n") logger.info("### Initializing GPCR model for constraint g(x) ###") self.discard_too_close_points = options.discard_too_close_points self.dim = dim assert self.dim == train_x.shape[ 1], "The input dimension must agree with train_x" self.train_x = torch.tensor([], device=device, dtype=dtype, requires_grad=False) self.train_yl = torch.tensor([], device=device, dtype=dtype, requires_grad=False) self.update_XY(train_x, train_yl) # One output # ========== # pdb.set_trace() self._validate_tensor_args(X=self.train_xs, Y=self.train_ys.view(-1, 1)) # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) self._set_dimensions(train_X=self.train_xs, train_Y=self.train_ys.view(-1, 1)) # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys) # # Two outputs # # =========== # # pdb.set_trace() # self._validate_tensor_args(X=self.train_xs, Y=self.train_yl) # # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) # self._set_dimensions(train_X=self.train_xs, train_Y=self.train_yl) # # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys) # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF hyperpriors = dict( lengthscales=eval(options.hyperpars.lenthscales.prior), outputscale=eval(options.hyperpars.outputscale.prior), threshold=eval(options.hyperpars.threshold.prior)) # Index hyperparameters: self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)), outputscale=[self.dim], threshold=[self.dim + 1]) self.dim_hyperpars = sum( [len(val) for val in self.idx_hyperpars.values()]) # Get bounds: self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors) logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds)) # Define meand and covariance modules with dummy hyperparameters self.mean_module = ZeroMean() self.covar_module = ScaleKernel(base_kernel=MaternKernel( nu=2.5, ard_num_dims=self.dim, lengthscale=0.1 * torch.ones(self.dim)), outputscale=10.0) # # If non-zero mean, constant mean is assumed: # if "constant" in dir(self.mean_module): # self.__threshold = self.mean_module.constant # else: # self.__threshold = 0.0 # If non-zero mean, constant mean is assumed: if "constant" in dir(self.mean_module): self.__threshold = self.mean_module.constant self.thres_init = self.mean_module.constant else: self.__threshold = options.hyperpars.threshold.init self.thres_init = options.hyperpars.threshold.init # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors): hyperpars_sample = self._sample_hyperparameters_within_bounds( Nsamples=1).squeeze(0) self.covar_module.outputscale = hyperpars_sample[ self.idx_hyperpars["outputscale"]] print("self.covar_module.outputscale:", str(self.covar_module.outputscale)) self.covar_module.base_kernel.lengthscale = hyperpars_sample[ self.idx_hyperpars["lengthscales"]] self.threshold = hyperpars_sample[self.idx_hyperpars["threshold"]] self.noise_std = options.hyperpars.noise_std.value # The evaluation noise is fixed, and given by the user self.gauss_tools = GaussianTools() # Initialize EP self.ep = ExpectationPropagation( prior_mean=self.mean_module(train_x).cpu().detach().numpy(), prior_cov=self.covar_module(train_x).cpu().detach().numpy(), Maxiter=options.ep.maxiter, required_precission=options.ep.prec, verbosity=options.ep.verbo) # Initialize marginal log likelihood for the GPCR model. # mll_objective is callable # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine self.mll_objective = MLLGPCR(model_gpcr=self, hyperpriors=hyperpriors) # Define nlopt optimizer: self.opti = OptimizationNonLinear( dim=self.dim_hyperpars, fun_obj=self.mll_objective, algo_str=options.hyperpars.optimization.algo_name, tol_x=1e-3, Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals, bounds=self.hyperpars_bounds, what2optimize_str="GPCR hyperparameters") # Extra parameters: self.top_dist_ambiguous_points = 0.5 * torch.min( self.covar_module.base_kernel.lengthscale).item() self.factor_heteroscedastic_noise = 10**4 # Update hyperparameters: self.Nrestarts_hyperpars = options.hyperpars.optimization.Nrestarts self._update_hyperparameters(Nrestarts=self.Nrestarts_hyperpars) # self.likelihood = FixedNoiseGaussianLikelihood(noise=torch.eye()) self.likelihood = None
def __init__( self, train_inputs=None, train_targets=None, train_noise_term=None, covar_module=None, kernel_cache=None, grid_bounds=None, grid_size=30, likelihood=None, learn_additional_noise=False, num_data=None, ): super().__init__() assert train_inputs is not None or kernel_cache is not None if train_targets is not None: num_outputs = train_targets.shape[-1] input_batch_shape = train_inputs.shape[:-2] self.num_data = train_inputs.shape[-2] else: # pull from kernel_cache num_outputs = kernel_cache["response_cache"].shape[-1] input_batch_shape = kernel_cache["WtW"].shape[0] self.num_data = num_data self.num_outputs = num_outputs _batch_shape = input_batch_shape if num_outputs > 1: _batch_shape += torch.Size([num_outputs]) if covar_module is None: if grid_bounds is None: grid_bounds = torch.stack(( train_inputs.min(dim=-2)[0] - 0.1, train_inputs.max(dim=-2)[0] + 0.1, )).transpose(-1, -2) covar_module = ScaleKernel( RBFKernel(batch_shape=_batch_shape, ard_num_dims=train_inputs.size(-1)), batch_shape=_batch_shape, ) if type(covar_module) is not GridInterpolationKernel: covar_module = GridInterpolationKernel( base_kernel=covar_module, grid_size=grid_size, num_dims=train_inputs.shape[-1], grid_bounds=grid_bounds, ) self._batch_shape = _batch_shape self.train_inputs = [None] self.train_targets = None self.covar_module = covar_module self.mean_module = ZeroMean() if likelihood is None: if train_noise_term is None: train_noise_term = torch.ones_like(train_targets) self.likelihood = FNMGLikelihood( noise=train_noise_term.transpose(-1, -2), learn_additional_noise=learn_additional_noise, ) else: self.likelihood = likelihood self.has_learnable_noise = learn_additional_noise # initialize the kernel caches immediately so we can throw away the data if kernel_cache is None: self.covar_module = self.covar_module.to(train_inputs.device) initial_kxx = self.covar_module(train_inputs).evaluate_kernel() initial_wmat = _get_wmat_from_kernel(initial_kxx) self._kernel_cache = _initialize_caches(train_targets, train_noise_term.transpose( -1, -2), initial_wmat, create_w_cache=True) else: self._kernel_cache = kernel_cache
def setUp(self): self.mean = ZeroMean()
def main(args): if args.cuda and torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") init_dict, train_dict, test_dict = prepare_data(args.data_loc, args.num_init, args.num_total) init_x, init_y, init_y_var = ( init_dict["x"].to(device), init_dict["y"].to(device), init_dict["y_var"].to(device), ) train_x, train_y, train_y_var = ( train_dict["x"].to(device), train_dict["y"].to(device), train_dict["y_var"].to(device), ) test_x, test_y, test_y_var = ( test_dict["x"].to(device), test_dict["y"].to(device), test_dict["y_var"].to(device), ) covar_module = ScaleKernel( MaternKernel( ard_num_dims=2, nu=0.5, lengthscale_prior=GammaPrior(3.0, 6.0), ), outputscale_prior=GammaPrior(2.0, 0.15), ) if not args.exact: covar_module = GridInterpolationKernel( base_kernel=covar_module, grid_size=30, num_dims=2, grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]), ) model = FixedNoiseGP( init_x, init_y.view(-1, 1), init_y_var.view(-1, 1), covar_module=covar_module, ).to(device) model.mean_module = ZeroMean() mll = ExactMarginalLogLikelihood(model.likelihood, model) print("---- Fitting initial model ----") start = time.time() with skip_logdet_forward(True), use_toeplitz(args.toeplitz): fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000}) end = time.time() print("Elapsed fitting time: ", end - start) model.zero_grad() model.eval() print("--- Generating initial predictions on test set ----") start = time.time() with detach_test_caches(True), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): pred_dist = model(train_x) pred_mean = pred_dist.mean.detach() # pred_var = pred_dist.variance.detach() end = time.time() print("Elapsed initial prediction time: ", end - start) rmse_initial = ((pred_mean.view(-1) - train_y.view(-1))**2).mean().sqrt() print("Initial RMSE: ", rmse_initial.item()) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) mll_time_list = [] rmse_list = [] for i in range(500, train_x.shape[0]): model.zero_grad() model.train() start = time.time() with skip_logdet_forward(True), max_cholesky_size( args.cholesky_size), use_toeplitz(args.toeplitz): loss = -mll(model(*model.train_inputs), model.train_targets).sum() loss.backward() mll_time = start - time.time() optimizer.step() model.zero_grad() optimizer.zero_grad() start = time.time() if not args.reset_training_data: with torch.no_grad(): model.eval() model.posterior(train_x[i].unsqueeze(0)) model = model.condition_on_observations( X=train_x[i].unsqueeze(0), Y=train_y[i].view(1, 1), noise=train_y_var[i].view(-1, 1), ) else: model.set_train_data(train_x[:i], train_y[:i], strict=False) model.likelihood.noise = train_y_var[:i].t() fantasy_time = start - time.time() mll_time_list.append([-mll_time, -fantasy_time]) if i % 25 == 0: start = time.time() model.eval() model.zero_grad() with detach_test_caches(), max_cholesky_size(10000): pred_dist = model(train_x) end = time.time() rmse = (((pred_dist.mean - train_y.view(-1))**2).mean().sqrt().item()) rmse_list.append([rmse, end - start]) print("Current RMSE: ", rmse) #print( # "Outputscale: ", model.covar_module.base_kernel.raw_outputscale #) #print( # "Lengthscale: ", # model.covar_module.base_kernel.base_kernel.raw_lengthscale, #) print("Step: ", i, "Train Loss: ", loss) optimizer.param_groups[0]["lr"] *= 0.9 torch.save({ "training": mll_time_list, "predictions": rmse_list }, args.output)