示例#1
0
    def __init__(self,
                 num_features=None,
                 ard_num_dims=1,
                 batch_shape=torch.Size([]),
                 signal_variance_constraint=None,
                 fourier_features_constraint=None,
                 **kwargs):
        super(SparseSpectrumKernel, self).__init__(ard_num_dims=ard_num_dims,
                                                   batch_shape=batch_shape,
                                                   **kwargs)

        if num_features is None:
            raise RuntimeError("num_features is a required argument")
        self.num_features = num_features

        if signal_variance_constraint is None:
            signal_variance_constraint = Positive()
        if fourier_features_constraint is None:
            fourier_features_constraint = Positive()

        self.register_parameter(name='raw_signal_variance',
                                parameter=torch.nn.Parameter(torch.zeros(1)))
        ff_shape = torch.Size(
            [*self.batch_shape, self.num_features, 1, self.ard_num_dims])
        self.register_parameter(name='raw_fourier_features',
                                parameter=torch.nn.Parameter(
                                    torch.zeros(ff_shape)))

        self.register_constraint('raw_signal_variance',
                                 signal_variance_constraint)
        self.register_constraint('raw_fourier_features',
                                 fourier_features_constraint)
    def __init__(self):
        super().__init__()

        ms_shape = torch.Size([1, 1])
        self.register_parameter(name="raw_scale", parameter=torch.nn.Parameter(torch.zeros(ms_shape)))
        self.register_parameter(name="raw_mean", parameter=torch.nn.Parameter(torch.zeros(ms_shape)))

        self.register_constraint("raw_scale", Positive())
        self.register_constraint("raw_mean", Positive())
示例#3
0
    def __init__(self,
                 base_kernel,
                 angle_prior: Optional[Prior] = None,
                 radius_prior: Optional[Prior] = None,
                 **kwargs):
        super(ArcKernel, self).__init__(has_lengthscale=True, **kwargs)

        if self.ard_num_dims is None:
            last_dim = 1
        else:
            last_dim = self.ard_num_dims
        # TODO: check the errors given by interval
        angle_constraint = Positive()

        self.register_parameter(
            name="raw_angle",
            parameter=torch.nn.Parameter(
                torch.zeros(*self.batch_shape, 1, last_dim)),
        )
        if angle_prior is not None:
            self.register_prior(
                "angle_prior",
                angle_prior,
                lambda: self.angle,
                lambda v: self._set_angle(v),
            )

        self.register_constraint("raw_angle", angle_constraint)

        self.register_parameter(
            name="raw_radius",
            parameter=torch.nn.Parameter(
                torch.zeros(*self.batch_shape, 1, last_dim)),
        )

        if radius_prior is not None:
            self.register_prior(
                "radius_prior",
                radius_prior,
                lambda: self.radius,
                lambda v: self._set_radius(v),
            )

        radius_constraint = Positive()
        self.register_constraint("raw_radius", radius_constraint)

        self.base_kernel = base_kernel
        if self.base_kernel.has_lengthscale:
            self.base_kernel.lengthscale = 1
            self.base_kernel.raw_lengthscale.requires_grad_(False)
示例#4
0
    def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False):
        train_x, test_x, train_y, test_y = self._get_data(cuda=cuda)
        # We're manually going to set the hyperparameters to be ridiculous
        likelihood = GaussianLikelihood(noise_constraint=Positive())  # This test actually wants a noise < 1e-4
        gp_model = ExactGPModel(train_x, train_y, likelihood)
        gp_model.covar_module.base_kernel.initialize(lengthscale=exp(-15))
        likelihood.initialize(noise=exp(-15))

        if cuda:
            gp_model.cuda()
            likelihood.cuda()

        # Compute posterior distribution
        gp_model.eval()
        likelihood.eval()

        # Let's see how our model does, conditioned with weird hyperparams
        # The posterior should fit all the data
        with gpytorch.settings.debug(False):
            function_predictions = likelihood(gp_model(train_x))

        self.assertAllClose(function_predictions.mean, train_y)
        self.assertAllClose(function_predictions.variance, torch.zeros_like(function_predictions.variance))

        # It shouldn't fit much else though
        test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x))

        self.assertAllClose(test_function_predictions.mean, torch.zeros_like(test_function_predictions.mean))
        self.assertAllClose(
            test_function_predictions.variance,
            gp_model.covar_module.outputscale.expand_as(test_function_predictions.variance)
        )
示例#5
0
    def test_prior(self, cuda=False):
        train_x, test_x, train_y, test_y = self._get_data(cuda=cuda)
        # We're manually going to set the hyperparameters to be ridiculous
        likelihood = GaussianLikelihood(
            noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1),
            noise_constraint=Positive(),  # Prior for this test is looser than default bound
        )
        gp_model = ExactGPModel(None, None, likelihood)
        # Update lengthscale prior to accommodate extreme parameters
        gp_model.covar_module.base_kernel.register_prior(
            "lengthscale_prior", SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5), "raw_lengthscale"
        )
        gp_model.mean_module.initialize(constant=1.5)
        gp_model.covar_module.base_kernel.initialize(lengthscale=1)
        likelihood.initialize(noise=0)

        if cuda:
            gp_model.cuda()
            likelihood.cuda()

        # Compute posterior distribution
        gp_model.eval()
        likelihood.eval()

        # The model should predict in prior mode
        function_predictions = likelihood(gp_model(train_x))
        correct_variance = gp_model.covar_module.outputscale + likelihood.noise

        self.assertAllClose(function_predictions.mean, torch.full_like(function_predictions.mean, fill_value=1.5))
        self.assertAllClose(
            function_predictions.variance,
            correct_variance.squeeze().expand_as(function_predictions.variance)
        )
示例#6
0
    def __init__(
        self,
        power_prior: Optional[Prior] = None,
        offset_prior: Optional[Prior] = None,
        power_constraint: Optional[Interval] = None,
        offset_constraint: Optional[Interval] = None,
        **kwargs
    ):
        super().__init__(has_lengthscale=True, **kwargs)

        if power_constraint is None:
            power_constraint = Positive()
        if offset_constraint is None:
            offset_constraint = Positive()

        self.register_parameter(
            name="raw_power",
            parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)),
        )

        self.register_parameter(
            name="raw_offset",
            parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)),
        )

        if power_prior is not None:
            self.register_prior(
                "power_prior",
                power_prior,
                lambda: self.power,
                lambda v: self._set_power(v),
            )
        self.register_constraint("raw_power", offset_constraint)

        if offset_prior is not None:
            self.register_prior(
                "offset_prior",
                offset_prior,
                lambda: self.offset,
                lambda v: self._set_offset(v),
            )

        self.register_constraint("raw_offset", offset_constraint)
示例#7
0
    def __init__(self, n_elements, n_dimensions, prior_mean=0,
                 prior_variance=1, share_variational_variance=False):
        super().__init__()

        self.prior = Normal(prior_mean, prior_variance**0.5)

        mean = self.prior.sample([n_elements, n_dimensions])
        if share_variational_variance:
            raw_variance = torch.zeros((n_elements, 1))
        else:
            raw_variance = torch.zeros_like(mean)

        self.constraint = Positive()
        self.register_parameter("variational_mean", Parameter(mean))
        self.register_parameter("raw_variational_variance",
                                Parameter(raw_variance))
        self.variational_variance = torch.ones_like(self.variational_mean)

        self.input_dims = 0
        self.output_dims = n_dimensions
示例#8
0
 def __init__(self, m, **kwargs):
     # self.m = m
     scale_constraint = LessThan(0.1)
     super(RBFConstraint,
           self).__init__(lengthscale_constraint=scale_constraint, **kwargs)
     outputscale = torch.zeros(
         *self.batch_shape) if len(self.batch_shape) else torch.tensor(0.0)
     self.register_parameter(name="raw_outputscale",
                             parameter=torch.nn.Parameter(outputscale))
     outputscale_constraint = Positive()
     self.register_constraint("raw_outputscale", outputscale_constraint)
     self.register_buffer("m", torch.tensor(m))
示例#9
0
    def test_posterior_latent_gp_and_likelihood_with_optimization(
            self, cuda=False):
        train_x, test_x, train_y, test_y = self._get_data(cuda=cuda)
        # We're manually going to set the hyperparameters to something they shouldn't be
        likelihood = GaussianLikelihood(
            noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1),
            noise_constraint=Positive(),
        )
        gp_model = ExactGPModel(train_x, train_y, likelihood)
        mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model)
        gp_model.rbf_covar_module.initialize(lengthscale=exp(1))
        gp_model.mean_module.initialize(constant=0)
        likelihood.initialize(noise=exp(1))

        if cuda:
            gp_model.cuda()
            likelihood.cuda()

        # Find optimal model hyperparameters
        gp_model.train()
        likelihood.train()

        optimizer = optim.Adam(list(gp_model.parameters()) +
                               list(likelihood.parameters()),
                               lr=0.1)
        optimizer.n_iter = 0
        with gpytorch.settings.debug(False):
            for _ in range(75):
                optimizer.zero_grad()
                output = gp_model(train_x)
                loss = -mll(output, train_y)
                loss.backward()
                optimizer.n_iter += 1
                optimizer.step()

            for param in gp_model.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            for param in likelihood.parameters():
                self.assertTrue(param.grad is not None)
                self.assertGreater(param.grad.norm().item(), 0)
            optimizer.step()

            # Test the model
            gp_model.eval()
            likelihood.eval()
            test_function_predictions = likelihood(gp_model(test_x))
            mean_abs_error = torch.mean(
                torch.abs(test_y - test_function_predictions.mean))

        self.assertLess(mean_abs_error.squeeze().item(), 0.05)
示例#10
0
    def test_posterior_latent_gp_and_likelihood_without_optimization(
            self, cuda=False):
        train_x, test_x, train_y, test_y = self._get_data(cuda=cuda)
        with gpytorch.settings.debug(False):
            # We're manually going to set the hyperparameters to be ridiculous
            likelihood = GaussianLikelihood(
                noise_prior=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.25),
                noise_constraint=Positive(),
            )
            gp_model = ExactGPModel(train_x, train_y, likelihood)
            # Update lengthscale prior to accommodate extreme parameters
            gp_model.rbf_covar_module.register_prior(
                "lengthscale_prior",
                SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5),
                "raw_lengthscale")
            gp_model.rbf_covar_module.initialize(lengthscale=exp(-10))
            gp_model.mean_module.initialize(constant=0)
            likelihood.initialize(noise=exp(-10))

            if cuda:
                gp_model.cuda()
                likelihood.cuda()

            # Compute posterior distribution
            gp_model.eval()
            likelihood.eval()

            # Let's see how our model does, conditioned with weird hyperparams
            # The posterior should fit all the data
            function_predictions = likelihood(gp_model(train_x))

            self.assertLess(torch.norm(function_predictions.mean - train_y),
                            1e-3)
            self.assertLess(torch.norm(function_predictions.variance), 5e-3)

            # It shouldn't fit much else though
            test_function_predictions = gp_model(
                torch.tensor([1.1]).type_as(test_x))

            self.assertLess(torch.norm(test_function_predictions.mean - 0),
                            1e-4)
            self.assertLess(
                torch.norm(test_function_predictions.variance -
                           gp_model.covar_module.outputscale), 1e-4)
示例#11
0
    def _train_gp_models(self, x, y2):
        X = torch.tensor(x)
        y2 = torch.tensor(y2)

        ll1 = GaussianLikelihood()
        ll2 = GaussianLikelihood(noise_constraint=Positive())

        Xgrid = convert_to_xgrid_torch(X, self.transform).double()
        y1_pred, y1_latent = self.aux_model(Xgrid, return_latent=True)
        train_y1 = y1_latent if self.use_latent else y1_pred
        train_y1 = (train_y1.data[..., self.slice] -
                    self.y1_lower) / (self.y1_upper - self.y1_lower)
        warm_gp = GPWarm(train_y1, y2, ll1)
        train(train_y1, y2, warm_gp, self.train_cf1)

        transform_input_fn = tensor_x_to_tensor_grid(self.transform)
        cold_gp = GPCold(X, y2, ll2, transform_input_fn=transform_input_fn)
        train(X, y2, cold_gp, self.train_cf2)
        return warm_gp, cold_gp
示例#12
0
    def __init__(self,
                 power_law_prior=None,
                 power_law_constraint=None,
                 **kwargs):
        super(RationalQuadraticKernel, self).__init__(has_lengthscale=True,
                                                      **kwargs)

        self.register_parameter(name="raw_power_law",
                                parameter=torch.nn.Parameter(
                                    torch.zeros(*self.batch_shape, 1, 1)))

        if power_law_constraint is None:
            power_law_constraint = Positive()

        if power_law_prior is not None:
            self.register_prior(
                "power_law_prior",
                power_law_prior,
                lambda: self.power_law,
                lambda v: self._set_power_law(v),
            )

        self.register_constraint("raw_power_law", power_law_constraint)
示例#13
0
    def __init__(self,
                 active_dim,
                 period_length_prior=None,
                 period_length_constraint=None,
                 **kwargs):
        super(MyCustomPeriodicKernel, self).__init__(**kwargs)
        if period_length_constraint is None:
            period_length_constraint = Positive()

        self._my_active_dim = active_dim

        self.register_parameter(name="raw_period_length",
                                parameter=torch.nn.Parameter(
                                    torch.zeros(*self.batch_shape, 1, 1)))

        if period_length_prior is not None:
            self.register_prior(
                "period_length_prior",
                period_length_prior,
                lambda: self.period_length,
                lambda v: self._set_period_length(v),
            )

        self.register_constraint("raw_period_length", period_length_constraint)
    def __init__(  # noqa C901
        self,
        fidelity_dims: List[int],
        dimension: Optional[int] = None,
        power_prior: Optional[Prior] = None,
        power_constraint: Optional[Interval] = None,
        nu: float = 2.5,
        lengthscale_prior_unbiased: Optional[Prior] = None,
        lengthscale_prior_biased: Optional[Prior] = None,
        lengthscale_constraint_unbiased: Optional[Interval] = None,
        lengthscale_constraint_biased: Optional[Interval] = None,
        covar_module_unbiased: Optional[Kernel] = None,
        covar_module_biased: Optional[Kernel] = None,
        **kwargs: Any,
    ) -> None:
        if dimension is None and kwargs.get("active_dims") is None:
            raise UnsupportedError(
                "Must specify dimension when not specifying active_dims.")
        n_fidelity = len(fidelity_dims)
        if len(set(fidelity_dims)) != n_fidelity:
            raise ValueError("fidelity_dims must not have repeated elements")
        if n_fidelity not in {1, 2}:
            raise UnsupportedError(
                "LinearTruncatedFidelityKernel accepts either one or two"
                "fidelity parameters.")
        if nu not in {0.5, 1.5, 2.5}:
            raise ValueError("nu must be one of 0.5, 1.5, or 2.5")

        super().__init__(**kwargs)
        self.fidelity_dims = fidelity_dims
        if power_constraint is None:
            power_constraint = Positive()

        if lengthscale_prior_unbiased is None:
            lengthscale_prior_unbiased = GammaPrior(3, 6)

        if lengthscale_prior_biased is None:
            lengthscale_prior_biased = GammaPrior(6, 2)

        if lengthscale_constraint_unbiased is None:
            lengthscale_constraint_unbiased = Positive()

        if lengthscale_constraint_biased is None:
            lengthscale_constraint_biased = Positive()

        self.register_parameter(
            name="raw_power",
            parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)),
        )
        self.register_constraint("raw_power", power_constraint)

        if power_prior is not None:
            self.register_prior(
                "power_prior",
                power_prior,
                lambda: self.power,
                lambda v: self._set_power(v),
            )

        if self.active_dims is not None:
            dimension = len(self.active_dims)

        if covar_module_unbiased is None:
            covar_module_unbiased = MaternKernel(
                nu=nu,
                batch_shape=self.batch_shape,
                lengthscale_prior=lengthscale_prior_unbiased,
                ard_num_dims=dimension - n_fidelity,
                lengthscale_constraint=lengthscale_constraint_unbiased,
            )

        if covar_module_biased is None:
            covar_module_biased = MaternKernel(
                nu=nu,
                batch_shape=self.batch_shape,
                lengthscale_prior=lengthscale_prior_biased,
                ard_num_dims=dimension - n_fidelity,
                lengthscale_constraint=lengthscale_constraint_biased,
            )

        self.covar_module_unbiased = covar_module_unbiased
        self.covar_module_biased = covar_module_biased
示例#15
0
    def __init__(
        self,
        datapoints: Tensor,
        comparisons: Tensor,
        covar_module: Optional[Module] = None,
        input_transform: Optional[InputTransform] = None,
        **kwargs,
    ) -> None:
        r"""A probit-likelihood GP with Laplace approximation model that learns via
            pairwise comparison data. By default it uses a scaled RBF kernel.

        Args:
            datapoints: A `batch_shape x n x d` tensor of training features.
            comparisons: A `batch_shape x m x 2` training comparisons;
                comparisons[i] is a noisy indicator suggesting the utility value
                of comparisons[i, 0]-th is greater than comparisons[i, 1]-th.
            covar_module: Covariance module.
            input_transform: An input transform that is applied in the model's
                forward pass.
        """
        super().__init__()

        if input_transform is not None:
            input_transform.to(datapoints)
            # input transformation is applied in set_train_data
            self.input_transform = input_transform

        # Compatibility variables with fit_gpytorch_*: Dummy likelihood
        # Likelihood is tightly tied with this model and
        # it doesn't make much sense to keep it separate
        self.likelihood = None

        # TODO: remove these variables from `state_dict()` so that when calling
        #       `load_state_dict()`, only the hyperparameters are copied over
        self.register_buffer("datapoints", None)
        self.register_buffer("comparisons", None)
        self.register_buffer("D", None)
        self.register_buffer("DT", None)
        self.register_buffer("utility", None)
        self.register_buffer("covar_chol", None)
        self.register_buffer("likelihood_hess", None)
        self.register_buffer("hlcov_eye", None)
        self.register_buffer("covar", None)
        self.register_buffer("covar_inv", None)

        self.train_inputs = []
        self.train_targets = None

        self.pred_cov_fac_need_update = True
        self.dim = None

        # See set_train_data for additional compatibility variables.
        # Not that the datapoints here are not transformed even if input_transform
        # is not None to avoid double transformation during model fitting.
        # self.transform_inputs is called in `forward`
        self.set_train_data(datapoints, comparisons, update_model=False)

        # Set optional parameters
        # jitter to add for numerical stability
        self._jitter = kwargs.get("jitter", 1e-6)
        # Clamping z lim for better numerical stability. See self._calc_z for detail
        # norm_cdf(z=3) ~= 0.999, top 0.1% percent
        self._zlim = kwargs.get("zlim", 3)
        # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update()
        # If None, set to 1e-6 by default in _update
        self._xtol = kwargs.get("xtol")
        # The maximum number of calls to the function in scipy.optimize.fsolve
        # If None, set to 100 by default in _update
        # If zero, then 100*(N+1) is used by default by fsolve;
        self._maxfev = kwargs.get("maxfev")

        # Set hyperparameters
        # Do not set the batch_shape explicitly so mean_module can operate in both mode
        # once fsolve used in _update can run in batch mode, we should explicitly set
        # the bacth shape here
        self.mean_module = ConstantMean()
        # Do not optimize constant mean prior
        for param in self.mean_module.parameters():
            param.requires_grad = False

        # set covariance module
        # the default outputscale here is only a rule of thumb, meant to keep
        # estimates away from scale value that would make Phi(f(x)) saturate
        # at 0 or 1
        if covar_module is None:
            ls_prior = GammaPrior(1.2, 0.5)
            ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate
            covar_module = ScaleKernel(
                RBFKernel(
                    batch_shape=self.batch_shape,
                    ard_num_dims=self.dim,
                    lengthscale_prior=ls_prior,
                    lengthscale_constraint=Positive(
                        transform=None, initial_value=ls_prior_mode),
                ),
                outputscale_prior=SmoothedBoxPrior(a=1, b=4),
            )

        self.covar_module = covar_module

        self._x0 = None  # will store temporary results for warm-starting
        if self.datapoints is not None and self.comparisons is not None:
            self.to(dtype=self.datapoints.dtype, device=self.datapoints.device)
            # Find f_map for initial parameters with transformed datapoints
            transformed_dp = self.transform_inputs(datapoints)
            self._update(transformed_dp)

        self.to(self.datapoints)
示例#16
0
class LatentLayer(Module):
    """
    Latent layer for use in GP-LVM. It comprises N latent variables, each with
    a Gaussian prior and variational distribution.

    The prior is isotropic with configurable mean and variance. The
    variational distribution may be set to share variance
    """
    def __init__(self, n_elements, n_dimensions, prior_mean=0,
                 prior_variance=1, share_variational_variance=False):
        super().__init__()

        self.prior = Normal(prior_mean, prior_variance**0.5)

        mean = self.prior.sample([n_elements, n_dimensions])
        if share_variational_variance:
            raw_variance = torch.zeros((n_elements, 1))
        else:
            raw_variance = torch.zeros_like(mean)

        self.constraint = Positive()
        self.register_parameter("variational_mean", Parameter(mean))
        self.register_parameter("raw_variational_variance",
                                Parameter(raw_variance))
        self.variational_variance = torch.ones_like(self.variational_mean)

        self.input_dims = 0
        self.output_dims = n_dimensions

    @property
    def n_elements(self):
        return self.variational_mean.shape[0]

    @property
    def variational_variance(self):
        return self.constraint.transform(self.raw_variational_variance)

    @variational_variance.setter
    def variational_variance(self, value):
        param = self.raw_variational_variance
        if not torch.is_tensor(value):
            value = torch.as_tensor(value).to(param)
        param.data.copy_(value.reshape(*param.shape))

    @property
    def dtype(self):
        return next(self.parameters()).dtype

    @property
    def device(self):
        return next(self.parameters()).device

    def forward(self, indices=None):
        """
        Return the variational posterior for the latent variables, pertaining
        to provided indices
        """
        if indices is None:
            ms = self.variational_mean
            vs = self.variational_variance
        else:
            ms = self.variational_mean[indices]
            vs = self.variational_variance[indices]

        vs = vs.expand(len(vs), self.output_dims)

        if self.output_dims == 1:
            m, = ms
            v, = vs
            return MultivariateNormal(m, DiagLazyTensor(v))
        else:
            mvns = [MultivariateNormal(m, DiagLazyTensor(v))
                    for m, v in zip(ms.T, vs.T)]
            return MultitaskMultivariateNormal.from_independent_mvns(mvns)

    def kl_divergence(self):
        """
        KL divergence from variational to prior distribution.
        """
        flat_m = self.variational_mean.T.flatten()

        v = self.variational_variance
        flat_v = v.expand(len(v), self.output_dims).flatten()

        return torch.sum(kl_divergence(Normal(flat_m, flat_v), self.prior))
示例#17
0
    def __init__(
        self,
        decomposition: Dict[str, List[int]],
        batch_shape: torch.Size,
        train_embedding: bool = True,
        cat_feature_dict: Optional[Dict] = None,
        embs_feature_dict: Optional[Dict] = None,
        embs_dim_list: Optional[List[int]] = None,
        context_weight_dict: Optional[Dict] = None,
        device: Optional[torch.device] = None,
    ) -> None:

        super().__init__(batch_shape=batch_shape)
        self.decomposition = decomposition
        self.batch_shape = batch_shape
        self.train_embedding = train_embedding
        self.device = device

        num_param = len(next(iter(decomposition.values())))
        self.context_list = list(decomposition.keys())
        self.num_contexts = len(self.context_list)

        # get parameter space decomposition
        for active_parameters in decomposition.values():
            # check number of parameters are same in each decomp
            if len(active_parameters) != num_param:
                raise ValueError(
                    "num of parameters needs to be same across all contexts")
        self._indexers = {
            context: torch.tensor(active_params, device=self.device)
            for context, active_params in self.decomposition.items()
        }
        # get context features and set emb dim
        self.context_cat_feature = None
        self.context_emb_feature = None
        self.n_embs = 0
        self.emb_weight_matrix_list = None
        self.emb_dims = None
        self._set_context_features(
            cat_feature_dict=cat_feature_dict,
            embs_feature_dict=embs_feature_dict,
            embs_dim_list=embs_dim_list,
        )
        # contruct embedding layer
        if train_embedding:
            self._set_emb_layers()
        # task covariance matrix
        self.task_covar_module = MaternKernel(
            nu=2.5,
            ard_num_dims=self.n_embs,
            batch_shape=batch_shape,
            lengthscale_prior=GammaPrior(3.0, 6.0),
        )
        # base kernel
        self.base_kernel = MaternKernel(
            nu=2.5,
            ard_num_dims=num_param,
            batch_shape=batch_shape,
            lengthscale_prior=GammaPrior(3.0, 6.0),
        )
        # outputscales for each context (note this is like sqrt of outputscale)
        self.context_weight = None
        if context_weight_dict is None:
            outputscale_list = torch.zeros(*batch_shape,
                                           self.num_contexts,
                                           device=self.device)
        else:
            outputscale_list = torch.zeros(*batch_shape, 1, device=self.device)
            self.context_weight = torch.tensor(
                [context_weight_dict[c] for c in self.context_list],
                device=self.device)
        self.register_parameter(name="raw_outputscale_list",
                                parameter=torch.nn.Parameter(outputscale_list))
        self.register_prior(
            "outputscale_list_prior",
            GammaPrior(2.0, 15.0),
            lambda m: m.outputscale_list,
            lambda m, v: m._set_outputscale_list(v),
        )
        self.register_constraint("raw_outputscale_list", Positive())
示例#18
0
    def __init__(
        self,
        datapoints: Tensor,
        comparisons: Tensor,
        covar_module: Optional[Module] = None,
        noise_module: Optional[HomoskedasticNoise] = None,
        **kwargs,
    ) -> None:
        super().__init__()
        r"""A probit-likelihood GP with Laplace approximation model.

        A probit-likelihood GP with Laplace approximation model that learns via
        pairwise comparison data. By default it uses a scaled-RBF kernel.

        Args:
            datapoints: A `batch_shape x n x d` tensor of training features.
            comparisons: A `batch_shape x m x 2` training comparisons;
                comparisons[i] is a noisy indicator suggesting the utility value
                of comparisons[i, 0]-th is greater than comparisons[i, 1]-th.
            covar_module: Covariance module
            noise_module: Noise module
        """

        # Compatibility variables with fit_gpytorch_*: Dummy likelihood
        # Likelihood is tightly tied with this model and
        # it doesn't make much sense to keep it separate
        self.likelihood = None

        # TODO: remove these variables from `state_dict()` so that when calling
        #       `load_state_dict()`, only the hyperparameters are copied over
        self.register_buffer("datapoints", None)
        self.register_buffer("comparisons", None)
        self.register_buffer("utility", None)
        self.register_buffer("covar_chol", None)
        self.register_buffer("likelihood_hess", None)
        self.register_buffer("hlcov_eye", None)
        self.register_buffer("covar", None)
        self.register_buffer("covar_inv", None)

        self.train_inputs = []
        self.train_targets = None

        self.pred_cov_fac_need_update = True
        self._input_batch_shape = torch.Size()
        self.dim = None
        # will be set to match datapoints' dtype and device
        # since scipy.optimize.fsolve only works on cpu, it'd be the
        # fastest to fit the model on cpu and take samples on gpu to avoid
        # overhead of moving data back and forth during fitting time
        self.tkwargs = {}
        # See set_train_data for additional compatibility variables
        self.set_train_data(datapoints, comparisons, update_model=False)

        # Set optional parameters
        # jitter to add for numerical stability
        self._jitter = kwargs.get("jitter", 1e-6)
        # Clamping z lim for better numerical stability. See self._calc_z for detail
        # norm_cdf(z=3) ~= 0.999, top 0.1% percent
        self._zlim = kwargs.get("zlim", 3)
        # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update()
        # If None, set to 1e-6 by default in _update
        self._xtol = kwargs.get("xtol")
        # The maximum number of calls to the function in scipy.optimize.fsolve
        # If None, set to 100 by default in _update
        # If zero, then 100*(N+1) is used by default by fsolve;
        self._maxfev = kwargs.get("maxfev")

        # Set hyperparameters
        # Do not set the batch_shape explicitly so mean_module can operate in both mode
        # once fsolve used in _update can run in batch mode, we should explicitly set
        # the bacth shape here
        self.mean_module = ConstantMean()
        # Do not optimize constant mean prior
        for param in self.mean_module.parameters():
            param.requires_grad = False

        # set covariance module
        if noise_module is None:
            noise_module = HomoskedasticNoise(
                noise_prior=SmoothedBoxPrior(-5, 5, 0.5, transform=torch.log),
                noise_constraint=GreaterThan(1e-4),  # if None, 1e-4 by default
                batch_shape=self._input_batch_shape,
            )
        self.noise_module = noise_module

        # set covariance module
        if covar_module is None:
            ls_prior = GammaPrior(1.2, 0.5)
            ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate
            covar_module = RBFKernel(
                batch_shape=self._input_batch_shape,
                ard_num_dims=self.dim,
                lengthscale_prior=ls_prior,
                lengthscale_constraint=Positive(transform=None,
                                                initial_value=ls_prior_mode),
            )
        self.covar_module = covar_module

        self._x0 = None  # will store temporary results for warm-starting
        if self.datapoints is not None and self.comparisons is not None:
            self.to(dtype=self.datapoints.dtype, device=self.datapoints.device)
            self._update()  # Find f_map for initial parameters

        self.to(self.datapoints)
示例#19
0
    def __init__(
        self,
        dimension: int = 3,
        nu: float = 2.5,
        train_iteration_fidelity: bool = True,
        train_data_fidelity: bool = True,
        lengthscale_prior: Optional[Prior] = None,
        power_prior: Optional[Prior] = None,
        power_constraint: Optional[Interval] = None,
        lengthscale_2_prior: Optional[Prior] = None,
        lengthscale_2_constraint: Optional[Interval] = None,
        lengthscale_constraint: Optional[Interval] = None,
        covar_module_1: Optional[Kernel] = None,
        covar_module_2: Optional[Kernel] = None,
        **kwargs: Any,
    ):
        if not train_iteration_fidelity and not train_data_fidelity:
            raise UnsupportedError(
                "You should have at least one fidelity parameter.")
        if nu not in {0.5, 1.5, 2.5}:
            raise ValueError("nu expected to be 0.5, 1.5, or 2.5")
        super().__init__(**kwargs)
        self.train_iteration_fidelity = train_iteration_fidelity
        self.train_data_fidelity = train_data_fidelity
        if power_constraint is None:
            power_constraint = Positive()

        if lengthscale_prior is None:
            lengthscale_prior = GammaPrior(3, 6)

        if lengthscale_2_prior is None:
            lengthscale_2_prior = GammaPrior(6, 2)

        if lengthscale_constraint is None:
            lengthscale_constraint = Positive()

        if lengthscale_2_constraint is None:
            lengthscale_2_constraint = Positive()

        self.register_parameter(
            name="raw_power",
            parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1)),
        )

        if power_prior is not None:
            self.register_prior(
                "power_prior",
                power_prior,
                lambda: self.power,
                lambda v: self._set_power(v),
            )
        self.register_constraint("raw_power", power_constraint)

        m = self.train_iteration_fidelity + self.train_data_fidelity

        if self.active_dims is not None:
            dimension = len(self.active_dims)

        if covar_module_1 is None:
            self.covar_module_1 = MaternKernel(
                nu=nu,
                batch_shape=self.batch_shape,
                lengthscale_prior=lengthscale_prior,
                ard_num_dims=dimension - m,
                lengthscale_constraint=lengthscale_constraint,
            )
        else:
            self.covar_module_1 = covar_module_1

        if covar_module_2 is None:
            self.covar_module_2 = MaternKernel(
                nu=nu,
                batch_shape=self.batch_shape,
                lengthscale_prior=lengthscale_2_prior,
                ard_num_dims=dimension - m,
                lengthscale_constraint=lengthscale_2_constraint,
            )
        else:
            self.covar_module_2 = covar_module_2