def __init__(self, train_x): if train_x.dim() == 3: variational_distribution = CholeskyVariationalDistribution(train_x.size(-2), batch_size=train_x.size(0)) else: variational_distribution = CholeskyVariationalDistribution(train_x.size(-2)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def __init__(self, n_inducing): # number of inducing points and optimisation samples assert isinstance(n_inducing, int) self.m = n_inducing # variational distribution and strategy # NOTE: we put random normal dumby inducing points # here, which we'll change in self.fit vardist = CholeskyVariationalDistribution(self.m) varstra = VariationalStrategy(self, torch.randn((self.m, 2)), vardist, learn_inducing_locations=True) VariationalGP.__init__(self, varstra) # kernel — implemented in self.forward self.mean = ConstantMean() self.cov = MaternKernel(ard_num_dims=2) # self.cov = GaussianSymmetrizedKLKernel() self.cov = ScaleKernel(self.cov, ard_num_dims=2) # likelihood self.likelihood = GaussianLikelihood() # hardware allocation self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.likelihood.to(self.device).float() self.to(self.device).float()
def __init__(self, train_x, train_y): variational_distribution = CholeskyVariationalDistribution(train_x.size(0)) variational_strategy = UnwhitenedVariationalStrategy( self, train_x, variational_distribution, learn_inducing_locations=False ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() kern = gpytorch.kernels.RBFKernel(ard_num_dims=2) kern.lengthscale = torch.tensor([0.5, 0.1]) self.covar_module = gpytorch.kernels.ScaleKernel(kern) self.likelihood = gpytorch.likelihoods.BernoulliLikelihood() self.train() self.likelihood.train() def custom_auto_tune_params(): """ this is how we can remove certain params from optimization """ for param in self.named_parameters(): name = param[0] if name != "covar_module.base_kernel.raw_lengthscale": yield param[1] optimizer = torch.optim.Adam(custom_auto_tune_params(), lr=0.1) mll = gpytorch.mlls.VariationalELBO(self.likelihood, self, train_y.numel()) for i in range(30): optimizer.zero_grad() output = self.__call__(train_x) loss = -mll(output, train_y) loss.backward() optimizer.step() self.eval() self.likelihood.eval()
def __init__(self, input_dims, output_dims, num_inducing=128, mean_type='constant'): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(ApproximateDeepGPHiddenLayer, self).__init__(variational_strategy, input_dims, output_dims) if mean_type == 'constant': self.mean_module = ConstantMean(batch_shape=batch_shape) else: self.mean_module = LinearMean(input_dims) self.covar_module = ScaleKernel(RBFKernel(batch_shape=batch_shape, ard_num_dims=input_dims), batch_shape=batch_shape, ard_num_dims=None) self.linear_layer = Linear(input_dims, 1)
def __init__(self, num_dim, grid_bounds, grid_size, num_mixtures): """Initialize Gaussian process layer Args: num_dim(int): data input dimension grid_bound(tuple): bound of the grid, entries,represent min/max values of each dimensionn and represent number of inducing points grid_size(int): size of grid in each dimension num_mixture(int): number of mixture components """ variational_distribution = CholeskyVariationalDistribution( num_inducing_points=grid_size, batch_size=num_dim) variational_strategy = AdditiveGridInterpolationVariationalStrategy( self, grid_size=grid_size, grid_bounds=[grid_bounds], num_dim=num_dim, variational_distribution=variational_distribution) super(GaussianProcessLayer, self).__init__(variational_strategy) self.covar_module = gpytorch.kernels.SpectralMixtureKernel( num_mixtures=num_mixtures, ard_dum_dims=num_dim) self.mean_module = gpytorch.means.ConstantMean() self.grid_bounds = grid_bounds
def __init__(self, train_x, train_y, likelihood, learned_kernel=None, learned_mean=None, mean_module=None, covar_module=None, beta=1.0): self.beta = beta self.n_train_samples = train_x.shape[0] variational_distribution = CholeskyVariationalDistribution( self.n_train_samples) variational_strategy = VariationalStrategy( self, train_x, variational_distribution, learn_inducing_locations=False) super().__init__(variational_strategy) if mean_module is None: self.mean_module = gpytorch.means.ZeroMean() else: self.mean_module = mean_module self.covar_module = covar_module self.learned_kernel = learned_kernel self.learned_mean = learned_mean self.likelihood = likelihood
def __init__(self, train_x, train_y, likelihood, Z_init): # Locations Z corresponding to u, they can be randomly initialized or # regularly placed. self.inducing_inputs = Z_init self.num_inducing = len(Z_init) self.n = len(train_y) self.data_dim = train_x.shape[1] # Sparse Variational Formulation q_u = CholeskyVariationalDistribution(self.num_inducing) q_f = VariationalStrategy(self, self.inducing_inputs, q_u, learn_inducing_locations=True) super(BayesianStochasticVariationalGP, self).__init__(q_f) self.likelihood = likelihood self.train_x = train_x self.train_y = train_y self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) # Hyperparameter Variational distribution hyper_prior_mean = torch.Tensor([0]) hyper_dim = len(hyper_prior_mean) log_hyper_prior = NormalPrior(hyper_prior_mean, torch.ones_like(hyper_prior_mean)) self.log_theta = LogHyperVariationalDist(hyper_dim, log_hyper_prior, self.n, self.data_dim)
def __init__(self, input_dims, output_dims, num_inducing=128): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) if torch.cuda.is_available(): inducing_points = inducing_points.cuda() batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) if torch.cuda.is_available(): inducing_points = inducing_points.cuda() batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy, input_dims, output_dims) self.mean_module = ConstantMean(batch_shape=batch_shape) self.covar_module = ScaleKernel(RBFKernel(ard_num_dims=None), ard_num_dims=None)
def __init__(self, input_dims, output_dims, num_inducing=128, mean_type="constant"): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy, input_dims=input_dims, output_dims=output_dims) if mean_type == "constant": self.mean = ConstantMean(batch_shape=batch_shape) else: self.mean = LinearMean(input_dims) self.covar = ScaleKernel(RBFKernel(ard_num_dims=input_dims, batch_shape=batch_shape), batch_shape=batch_shape, ard_num_dims=None)
def __init__(self, train_x): variational_distribution = CholeskyVariationalDistribution(train_x.size(0)) variational_strategy = UnwhitenedVariationalStrategy( self, train_x, variational_distribution, learn_inducing_locations=False ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def __init__(self, train_x, lengthscale=None): variational_distribution = CholeskyVariationalDistribution(train_x.size(0)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution, learn_inducing_locations=True) super(VSGPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() if lengthscale is not None: self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(lengthscale=lengthscale)) else: self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def __init__(self, train_x, likelihood): variational_distribution = CholeskyVariationalDistribution(train_x.size(0)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution) super(Spatiotemporal_GP, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ZeroMean() self.covar_season = gpytorch.kernels.PeriodicKernel() self.covar_week = gpytorch.kernels.RBFKernel() self.covar_spatial = gpytorch.kernels.MaternKernel() self.covar_remote = gpytorch.kernels.MaternKernel()
def __init__(self, train_x, use_inducing=False): variational_distribution = CholeskyVariationalDistribution(train_x.size(-2), batch_shape=train_x.shape[:-2]) inducing_points = torch.randn(50, train_x.size(-1)) if use_inducing else train_x strategy_cls = VariationalStrategy variational_strategy = strategy_cls( self, inducing_points, variational_distribution, learn_inducing_locations=use_inducing ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def __init__(self, inducing_points): variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ZeroMean() self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) self.quadrature = GaussHermiteQuadrature1D()
def __init__(self, train_x, likelihood, feature_extractor): variational_distribution = CholeskyVariationalDistribution( train_x.size(0)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution) super(DKL, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.MaternKernel()) self.feature_extractor = feature_extractor
def __init__(self, inducing_points): variational_distribution = CholeskyVariationalDistribution(inducing_points.size(-2), batch_size=2) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) super(SVGPRegressionModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel(lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(0.001, 1.0, sigma=0.1)) )
def __init__(self): init_inducing = torch.randn(100, 10) variational_distribution = CholeskyVariationalDistribution( init_inducing.size(0)) variational_strategy = VariationalStrategy( self, init_inducing, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy)
def __init__(self, inducing_inputs, kernel, mean, flags, **kwargs): num_inducing = inducing_inputs.shape[0] print(f"num inducing from shape: {num_inducing}") if flags.cov == "GridInterpolationKernel": num_dim = inducing_inputs.shape[1] grid_bounds = (-10.0, 10.0) grid_size = 400 # define the variational distribution with the grid_size variational_distribution = CholeskyVariationalDistribution( num_inducing_points=grid_size, batch_size=num_dim) # The variational strategy defines how the GP prior is computed and # how to marginalize out the function values (only for DKL) variational_strategy = GridInterpolationVariationalStrategy( self, # inducing_inputs, grid_size=grid_size, grid_bounds=[grid_bounds], # num_dim=num_dim, variational_distribution=variational_distribution, # mixing_params=False, sum_output=False, # learn_inducing_locations=flags.optimize_inducing ) else: # define the variational distribution with the length of the inducing inputs variational_distribution = CholeskyVariationalDistribution( num_inducing) # The variational strategy defines how the GP prior is computed and # how to marginalize out the inducing point function values variational_strategy = VariationalStrategy( self, inducing_inputs, variational_distribution, learn_inducing_locations=flags.optimize_inducing, ) super().__init__(variational_strategy) # initialize mean and covariance # self.mean_module = gpytorch.means.ConstantMean() self.mean_module = mean self.covar_module = kernel
def __init__(self, inducing_points: torch.Tensor): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(GeneralApproximateGP, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel())
def __init__(self, inducing_points, kernel, likelihood): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(SVGPRegressionModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = kernel self.likelihood = likelihood
def __init__(self, inducing_points): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=False) super(GPModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel())
def __init__(self, data_dim, grid_size=64, grid_bounds=([-1, 1], )): variational_distribution = CholeskyVariationalDistribution( num_inducing_points=grid_size, batch_size=data_dim) variational_strategy = AdditiveGridInterpolationVariationalStrategy( self, grid_size=grid_size, grid_bounds=grid_bounds, num_dim=data_dim, variational_distribution=variational_distribution) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = ConstantMean() self.covar_module = ScaleKernel(MaternKernel(nu=1.5)) #RBFKernel())
def _reset_variational_strategy(self): inducing_points = self._select_inducing_points( method=self.inducing_point_method) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0), batch_shape=torch.Size([self._batch_size])) self.variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=False, )
def __init__(self, num_inducing_points=64, name_prefix="mixture_gp"): self.name_prefix = name_prefix inducing_points = torch.linspace(0, 1, num_inducing_points) variational_distribution = CholeskyVariationalDistribution( num_inducing_points) variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, inducing_points, in_dim, out_dim=None, mean=None, covar=None, is_output_layer=True): """[summary] :param inducing_points: [description] :type inducing_points: [type] :param mean: [description] :type mean: [type] :param covar: [description] :type covar: [type] :param num_output_dim: [description] :type num_output_dim: [type] :param full_x: [description], defaults to None :type full_x: [type], optional :param sparse_adj_mat: [description], defaults to None :type sparse_adj_mat: [type], optional """ if out_dim is None: batch_shape = torch.Size([]) else: batch_shape = torch.Size([out_dim]) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(-2), batch_shape=batch_shape) # LMCVariationalStrategy for introducing correlation among tasks variational_strategy = IndependentMultitaskVariationalStrategy( VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True), num_tasks=out_dim, ) super(VariationalGraphGP, self).__init__(variational_strategy) # self.mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape) if mean is None else mean self.mean_module = gpytorch.means.LinearMean( in_dim, batch_shape=torch.Size([out_dim ])) if mean is None else mean self.covar_module = gpytorch.kernels.PolynomialKernel( power=4, batch_shape=batch_shape) if covar is None else covar self.num_inducing = inducing_points.size(-2) self.is_output_layer = is_output_layer
def __init__(self, input_dim, num_inducing, hidden_sizes=[32, 32], out_dim=None, mean=None, covar=None): if out_dim is None: batch_shape = torch.Size([]) else: batch_shape = torch.Size([out_dim]) if out_dim is None: inducing_points = torch.rand(num_inducing, hidden_sizes[-1]) else: inducing_points = torch.rand(out_dim, num_inducing, hidden_sizes[-1]) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(-2), batch_shape=batch_shape ) # Use LMCVariationalStrategy for introducing correlation among tasks if out_dim is None: variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) else: variational_strategy = IndependentMultitaskVariationalStrategy( VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ), num_tasks=out_dim, ) super(DeepGraphKernel, self).__init__(variational_strategy) gcn_layers = nn.ModuleList() layer_input_out_dims = list(zip( [input_dim] + hidden_sizes[:-1], hidden_sizes )) for i, (in_features, out_features) in enumerate(layer_input_out_dims): gcn_layers.append( GraphConv(in_features, out_features, activation=nn.ReLU()) ) self.mean_module = gpytorch.means.LinearMean(hidden_sizes[-1], batch_shape=torch.Size([out_dim])) if mean is None else mean self.covar_module = gpytorch.kernels.PolynomialKernel(power=4, batch_shape=batch_shape) if covar is None else covar # self.covar_module.offset = 5 self.num_inducing = inducing_points.size(-2) self.gcn = gcn_layers self.dropout = torch.nn.Dropout(0.5)
def __init__(self, inducing_points, ex_var_dim, kernel, **ker_conf): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0) ) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) super(ApproximateGPModel, self).__init__(variational_strategy) self.mean_module = ConstantMean() _ker_conf = {'ard_num_dims': ex_var_dim} _ker_conf.update(ker_conf) self.covar_module = set_kernel(kernel, **_ker_conf)
def __init__(self, grid_size=16, grid_bounds=([-1, 1],)): variational_distribution = CholeskyVariationalDistribution(num_inducing_points=16, batch_size=2) variational_strategy = AdditiveGridInterpolationVariationalStrategy( self, grid_size=grid_size, grid_bounds=grid_bounds, num_dim=2, variational_distribution=variational_distribution, ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = ConstantMean(prior=SmoothedBoxPrior(-1e-5, 1e-5)) self.covar_module = ScaleKernel( RBFKernel(ard_num_dims=1, lengthscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1)), outputscale_prior=SmoothedBoxPrior(exp(-5), exp(6), sigma=0.1), )
def __init__( self, train_x: torch.Tensor, train_y: torch.Tensor, inducing_points: torch.Tensor, scales: Union[torch.Tensor, float] = 1.0, mean_module: Optional[Mean] = None, covar_module: Optional[Kernel] = None, fixed_prior_mean: Optional[float] = None, ) -> None: variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_distribution.to(train_x) variational_strategy = VariationalStrategy( model=self, inducing_points=inducing_points, variational_distribution=variational_distribution, learn_inducing_locations=False, ) super(MixedDerivativeVariationalGP, self).__init__(variational_strategy) # Set the mean if specified to if mean_module is None: self.mean_module = ConstantMeanPartialObsGrad() else: self.mean_module = mean_module if fixed_prior_mean is not None: self.mean_module.constant.requires_grad_(False) self.mean_module.constant.copy_( torch.tensor([fixed_prior_mean], dtype=train_x.dtype)) if covar_module is None: self.base_kernel = RBFKernelPartialObsGrad( ard_num_dims=train_x.shape[-1] - 1, lengthscale_prior=GammaPrior(3.0, 6.0 / scales), ) self.covar_module = ScaleKernel(self.base_kernel, outputscale_prior=GammaPrior( 2.0, 0.15)) else: self.covar_module = covar_module self._num_outputs = 1 self.train_inputs = (train_x, ) self.train_targets = train_y self(train_x) # Necessary for CholeskyVariationalDistribution
def __init__(self, inducing_points, use_fast_strategy): inducing_points = torch.from_numpy(inducing_points).float() variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = UnwhitenedVariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean_module = ConstantMean() dims = inducing_points.shape[1] # self.covar_module = ScaleKernel(MaternKernel(ard_num_dims=dims)) + ScaleKernel(LinearKernel(ard_num_dims=dims)) self.covar_module = ScaleKernel(RBFKernel())