def __init__( self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r"""A single-task exact GP model using a heteroskedastic noise model. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling `.posterior` on the model will be on the original scale). Note that the noise model internally log-transforms the variances, which will happen after this transform is applied. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> se = torch.norm(train_X, dim=1, keepdim=True) >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y) >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar) """ if outcome_transform is not None: train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) self._set_dimensions(train_X=train_X, train_Y=train_Y) noise_likelihood = GaussianLikelihood( noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log), batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan(MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0), ) noise_model = SingleTaskGP( train_X=train_X, train_Y=train_Yvar, likelihood=noise_likelihood, outcome_transform=Log(), ) likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) super().__init__(train_X=train_X, train_Y=train_Y, likelihood=likelihood) self.register_added_loss_term("noise_added_loss") self.update_added_loss_term("noise_added_loss", NoiseModelAddedLossTerm(noise_model)) if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, train_Yvar: Optional[Tensor] = None, outcome_transform: Optional[OutcomeTransform] = None, input_transform: Optional[InputTransform] = None, ) -> None: r"""Initialize the fully Bayesian single-task GP model. Args: train_X: Training inputs (n x d) train_Y: Training targets (n x 1) train_Yvar: Observed noise variance (n x 1). Inferred if None. """ if not ( train_X.ndim == train_Y.ndim == 2 and len(train_X) == len(train_Y) and train_Y.shape[-1] == 1 ): raise ValueError( "Expected train_X to have shape n x d and train_Y to have shape n x 1" ) if train_Yvar is not None: if train_Y.shape != train_Yvar.shape: raise ValueError( "Expected train_Yvar to be None or have the same shape as train_Y" ) with torch.no_grad(): transformed_X = self.transform_inputs( X=train_X, input_transform=input_transform ) if outcome_transform is not None: train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) self._validate_tensor_args(X=transformed_X, Y=train_Y) validate_input_scaling( train_X=transformed_X, train_Y=train_Y, train_Yvar=train_Yvar ) self._set_dimensions(train_X=train_X, train_Y=train_Y) if train_Yvar is not None: # Clamp after transforming train_Yvar = train_Yvar.clamp(MIN_INFERRED_NOISE_LEVEL) super().__init__(train_X, train_Y) self.train_X = train_X self.train_Y = train_Y self.train_Yvar = train_Yvar self.mean_module = None self.covar_module = None self.likelihood = None self.pyro_model = SaasPyroModel( train_X=transformed_X, train_Y=train_Y, train_Yvar=train_Yvar ) if outcome_transform is not None: self.outcome_transform = outcome_transform if input_transform is not None: self.input_transform = input_transform
def test_validate_input_scaling(self): train_X = 2 + torch.rand(3, 4, 3) train_Y = torch.randn(3, 4, 2) # check that nothing is being checked with settings.validate_input_scaling(False), settings.debug(True): with warnings.catch_warnings(record=True) as ws: validate_input_scaling(train_X=train_X, train_Y=train_Y) self.assertFalse( any(issubclass(w.category, InputDataWarning) for w in ws)) # check that warnings are being issued with settings.debug(True), warnings.catch_warnings(record=True) as ws: validate_input_scaling(train_X=train_X, train_Y=train_Y) self.assertTrue( any(issubclass(w.category, InputDataWarning) for w in ws)) # check that errors are raised when requested with settings.debug(True): with self.assertRaises(InputDataError): validate_input_scaling(train_X=train_X, train_Y=train_Y, raise_on_fail=True) # check that no errors are being raised if everything is standardized train_X_min = train_X.min(dim=-1, keepdim=True)[0] train_X_max = train_X.max(dim=-1, keepdim=True)[0] train_X_std = (train_X - train_X_min) / (train_X_max - train_X_min) train_Y_std = (train_Y - train_Y.mean( dim=-2, keepdim=True)) / train_Y.std(dim=-2, keepdim=True) with settings.debug(True), warnings.catch_warnings(record=True) as ws: validate_input_scaling(train_X=train_X_std, train_Y=train_Y_std) self.assertFalse( any(issubclass(w.category, InputDataWarning) for w in ws)) # test that negative variances raise an error train_Yvar = torch.rand_like(train_Y_std) train_Yvar[0, 0, 1] = -0.5 with settings.debug(True): with self.assertRaises(InputDataError): validate_input_scaling(train_X=train_X_std, train_Y=train_Y_std, train_Yvar=train_Yvar) # check that NaNs raise errors train_X_std[0, 0, 0] = float("nan") with settings.debug(True): with self.assertRaises(InputDataError): validate_input_scaling(train_X=train_X_std, train_Y=train_Y_std)
def __init__( self, train_X: Tensor, train_Y: Tensor, likelihood: Optional[Likelihood] = None, covar_module: Optional[Module] = None, outcome_transform: Optional[OutcomeTransform] = None, ) -> None: r"""A single-task exact GP model. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. covar_module: The module computing the covariance (Kernel) matrix. If omitted, use a `MaternKernel`. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling `.posterior` on the model will be on the original scale). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> model = SingleTaskGP(train_X, train_Y) """ if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) self._validate_tensor_args(X=train_X, Y=train_Y) validate_input_scaling(train_X=train_X, train_Y=train_Y) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y) if likelihood is None: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: self._is_custom_likelihood = True ExactGP.__init__(self, train_X, train_Y, likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) if covar_module is None: self.covar_module = ScaleKernel( MaternKernel( nu=2.5, ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) self._subset_batch_dict = { "likelihood.noise_covar.raw_noise": -2, "mean_module.constant": -2, "covar_module.raw_outputscale": -1, "covar_module.base_kernel.raw_lengthscale": -3, } else: self.covar_module = covar_module # TODO: Allow subsetting of other covar modules if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, train_Yvar: Tensor, covar_module: Optional[Module] = None, outcome_transform: Optional[OutcomeTransform] = None, **kwargs: Any, ) -> None: r"""A single-task exact GP model using fixed noise levels. Args: train_X: A `batch_shape x n x d` tensor of training features. train_Y: A `batch_shape x n x m` tensor of training observations. train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the `Posterior` obtained by calling `.posterior` on the model will be on the original scale). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> train_Yvar = torch.full_like(train_Y, 0.2) >>> model = FixedNoiseGP(train_X, train_Y, train_Yvar) """ if outcome_transform is not None: train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, train_Yvar = self._transform_tensor_args( X=train_X, Y=train_Y, Yvar=train_Yvar) likelihood = FixedNoiseGaussianLikelihood( noise=train_Yvar, batch_shape=self._aug_batch_shape) ExactGP.__init__(self, train_inputs=train_X, train_targets=train_Y, likelihood=likelihood) self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) if covar_module is None: self.covar_module = ScaleKernel( base_kernel=MaternKernel( nu=2.5, ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) self._subset_batch_dict = { "mean_module.constant": -2, "covar_module.raw_outputscale": -1, "covar_module.base_kernel.raw_lengthscale": -3, } else: self.covar_module = covar_module # TODO: Allow subsetting of other covar modules if outcome_transform is not None: self.outcome_transform = outcome_transform self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Optional[Tensor] = None, likelihood: Optional[Likelihood] = None, num_outputs: int = 1, learn_inducing_points: bool = True, covar_module: Optional[Kernel] = None, mean_module: Optional[Mean] = None, variational_distribution: Optional[_VariationalDistribution] = None, variational_strategy: Type[_VariationalStrategy] = VariationalStrategy, inducing_points: Optional[Union[Tensor, int]] = None, outcome_transform: Optional[OutcomeTransform] = None, input_transform: Optional[InputTransform] = None, ) -> None: r""" A single task stochastic variational Gaussian process model (SVGP) as described by [hensman2013svgp]_. We use pivoted cholesky initialization [burt2020svgp]_ to initialize the inducing points of the model. Args: train_X: Training inputs (due to the ability of the SVGP to sub-sample this does not have to be all of the training inputs). train_Y: Training targets (optional). likelihood: Instance of a GPyYorch likelihood. If omitted, uses a either a `GaussianLikelihood` (if `num_outputs=1`) or a `MultitaskGaussianLikelihood`(if `num_outputs>1`). num_outputs: Number of output responses per input (default: 1). covar_module: Kernel function. If omitted, uses a `MaternKernel`. mean_module: Mean of GP model. If omitted, uses a `ConstantMean`. variational_distribution: Type of variational distribution to use (default: CholeskyVariationalDistribution), the properties of the variational distribution will encourage scalability or ease of optimization. variational_strategy: Type of variational strategy to use (default: VariationalStrategy). The default setting uses "whitening" of the variational distribution to make training easier. inducing_points: The number or specific locations of the inducing points. """ with torch.no_grad(): transformed_X = self.transform_inputs( X=train_X, input_transform=input_transform) if train_Y is not None: if outcome_transform is not None: train_Y, _ = outcome_transform(train_Y) self._validate_tensor_args(X=transformed_X, Y=train_Y) validate_input_scaling(train_X=transformed_X, train_Y=train_Y) if train_Y.shape[-1] != num_outputs: num_outputs = train_Y.shape[-1] self._num_outputs = num_outputs self._input_batch_shape = train_X.shape[:-2] aug_batch_shape = copy.deepcopy(self._input_batch_shape) if num_outputs > 1: aug_batch_shape += torch.Size([num_outputs]) self._aug_batch_shape = aug_batch_shape if likelihood is None: if num_outputs == 1: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: likelihood = MultitaskGaussianLikelihood(num_tasks=num_outputs) else: self._is_custom_likelihood = True model = _SingleTaskVariationalGP( train_X=transformed_X, train_Y=train_Y, num_outputs=num_outputs, learn_inducing_points=learn_inducing_points, covar_module=covar_module, mean_module=mean_module, variational_distribution=variational_distribution, variational_strategy=variational_strategy, inducing_points=inducing_points, ) super().__init__(model=model, likelihood=likelihood, num_outputs=num_outputs) if outcome_transform is not None: self.outcome_transform = outcome_transform if input_transform is not None: self.input_transform = input_transform # for model fitting utilities # TODO: make this a flag? self.model.train_inputs = [transformed_X] if train_Y is not None: self.model.train_targets = train_Y.squeeze(-1) self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, stats_X: Any, stats_Y: Any, likelihood: Optional[Likelihood] = None, covar_module: Optional[Module] = None, mean_module: Optional[Module] = None, ) -> None: r"""A single-task exact GP model. Args: train_X: A `n x d` or `batch_shape x n x d` (batch mode) tensor of training features. train_Y: A `n x m` or `batch_shape x n x m` (batch mode) tensor of training observations. likelihood: A likelihood. If omitted, use a standard GaussianLikelihood with inferred noise level. covar_module: The covariance (kernel) matrix. If omitted, use the MaternKernel. Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) >>> model = SingleTaskGP(train_X, train_Y) """ validate_input_scaling(train_X=train_X, train_Y=train_Y) self._validate_tensor_args(X=train_X, Y=train_Y) self._set_dimensions(train_X=train_X, train_Y=train_Y) train_X, train_Y, _ = self._transform_tensor_args(X=train_X, Y=train_Y) if likelihood is None: noise_prior = GammaPrior(1.1, 0.05) noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate likelihood = GaussianLikelihood( noise_prior=noise_prior, batch_shape=self._aug_batch_shape, noise_constraint=GreaterThan( MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=noise_prior_mode, ), ) else: self._is_custom_likelihood = True ExactGP.__init__(self, train_X, train_Y, likelihood) if mean_module is None: self.mean_module = ConstantMean(batch_shape=self._aug_batch_shape) else: self.mean_module = Mean_Function.Ed_Mean( mean_module, stats_X, stats_Y, batch_shape=self._aug_batch_shape) if covar_module is None: self.covar_module = ScaleKernel( MaternKernel( nu=2.5, ard_num_dims=train_X.shape[-1], batch_shape=self._aug_batch_shape, lengthscale_prior=GammaPrior(3.0, 6.0), ), batch_shape=self._aug_batch_shape, outputscale_prior=GammaPrior(2.0, 0.15), ) else: self.covar_module = covar_module self.to(train_X)
def __init__( self, train_X: Tensor, train_Y: Tensor, task_feature: int, output_tasks: Optional[List[int]] = None, rank: Optional[int] = None, ) -> None: r"""Multi-Task GP model using an ICM kernel, inferring observation noise. Args: train_X: A `n x (d + 1)` or `b x n x (d + 1)` (batch mode) tensor of training data. One of the columns should contain the task features (see `task_feature` argument). train_Y: A `n` or `b x n` (batch mode) tensor of training observations. task_feature: The index of the task feature (`-d <= task_feature <= d`). output_tasks: A list of task indices for which to compute model outputs for. If omitted, return outputs for all task indices. rank: The rank to be used for the index kernel. If omitted, use a full rank (i.e. number of tasks) kernel. Example: >>> X1, X2 = torch.rand(10, 2), torch.rand(20, 2) >>> i1, i2 = torch.zeros(10, 1), torch.ones(20, 1) >>> train_X = torch.cat([ >>> torch.cat([X1, i1], -1), torch.cat([X2, i2], -1), >>> ]) >>> train_Y = torch.cat(f1(X1), f2(X2)).unsqueeze(-1) >>> model = MultiTaskGP(train_X, train_Y, task_feature=-1) """ self._validate_tensor_args(X=train_X, Y=train_Y) validate_input_scaling(train_X=train_X, train_Y=train_Y) if train_X.ndim != 2: # Currently, batch mode MTGPs are blocked upstream in GPyTorch raise ValueError(f"Unsupported shape {train_X.shape} for train_X.") # squeeze output dim train_Y = train_Y.squeeze(-1) d = train_X.shape[-1] - 1 if not (-d <= task_feature <= d): raise ValueError(f"Must have that -{d} <= task_feature <= {d}") task_feature = task_feature % (d + 1) all_tasks = train_X[:, task_feature].unique().to(dtype=torch.long).tolist() if output_tasks is None: output_tasks = all_tasks else: if set(output_tasks) - set(all_tasks): raise RuntimeError("All output tasks must be present in input data.") self._output_tasks = output_tasks self._num_outputs = len(output_tasks) # TODO (T41270962): Support task-specific noise levels in likelihood likelihood = GaussianLikelihood(noise_prior=GammaPrior(1.1, 0.05)) # construct indexer to be used in forward self._task_feature = task_feature self._base_idxr = torch.arange(d) self._base_idxr[task_feature:] += 1 # exclude task feature super().__init__( train_inputs=train_X, train_targets=train_Y, likelihood=likelihood ) self.mean_module = ConstantMean() self.covar_module = ScaleKernel( base_kernel=MaternKernel( nu=2.5, ard_num_dims=d, lengthscale_prior=GammaPrior(3.0, 6.0) ), outputscale_prior=GammaPrior(2.0, 0.15), ) num_tasks = len(all_tasks) self._rank = rank if rank is not None else num_tasks # TODO: Add LKJ prior for the index kernel self.task_covar_module = IndexKernel(num_tasks=num_tasks, rank=self._rank) self.to(train_X)