Exemplo n.º 1
0
    def __init__(self, beta_min, beta_prior=None, **kwargs):
        """
        Initialisation.

        Parameters
        ----------
        :param beta_min: minimum value of the inverse square lengthscale parameter beta

        Optional parameters
        -------------------
        :param beta_prior: prior on the parameter beta
        :param kwargs: additional arguments
        """
        super(SphereGaussianKernel, self).__init__(has_lengthscale=False,
                                                   **kwargs)
        self.beta_min = beta_min

        # Add beta parameter, corresponding to the inverse of the lengthscale parameter.
        beta_num_dims = 1
        self.register_parameter(name="raw_beta",
                                parameter=torch.nn.Parameter(
                                    torch.zeros(*self.batch_shape, 1,
                                                beta_num_dims)))

        if beta_prior is not None:
            self.register_prior("beta_prior", beta_prior, lambda: self.beta,
                                lambda v: self._set_beta(v))

        # A GreaterThan constraint is defined on the lengthscale parameter to guarantee positive-definiteness.
        # The value of beta_min can be determined e.g. experimentally.
        self.register_constraint("raw_beta", GreaterThan(self.beta_min))
Exemplo n.º 2
0
    def __init__(self, test_data, args):
        # data buffer, only store training data, test_data will only be stored in GP model before the model is trained
        self.n = 0
        self.data = None
        self.index_list = []
        self.norm = 1.0

        self.previous_loss = CUDA(torch.tensor(np.inf))
        self.trigger_training = CUDA(torch.tensor(1e-3))

        self.lr = args.lr
        self.state_dim = args.state_dim
        self.action_dim = args.action_dim
        self.input_dim = self.state_dim + self.action_dim
        self.gp_iter = args.gp_iter

        # prior of the kernel parameters
        # [NOTE] these prior parameters should be similar to the estimated parameters of real data
        # if lengthscale is too large, it will be too difficult to create new components
        # if lengthscale is too small, it will be too esay to create new components
        # if noise_covar is too large, the prediction will be inaccurate
        # if noise_covar is too small, the conjugate gradient will not converge, and the prediction will be improve if too small
        self.param = [
            1e-5,  # noise_covar initilize and constraint
            0.0,  # constant initilize
            0.7,  # outputscale initilize
            1.0,  # [lengthscale initilize]
            100.0,  # lengthscale_constraint
            0.0001  # outputscale_constraint
        ]
        self.param = CUDA(torch.tensor(self.param))

        # initialize model and likelihood
        model_list = []
        likelihood_list = []
        for m_i in range(self.state_dim):
            likelihood = CUDA(
                gpytorch.likelihoods.GaussianLikelihood(
                    noise_constraint=GreaterThan(self.param[0])))
            model = CUDA(
                ExactGPR(None, None, likelihood, self.input_dim, self.param))
            model.reset_parameters()
            likelihood_list.append(model.likelihood)
            model_list.append(model)

        # initialize model list
        self.model = gpytorch.models.IndependentModelList(*model_list)
        self.likelihood = gpytorch.likelihoods.LikelihoodList(*likelihood_list)

        # initialize optimizer
        self.optimizer = torch.optim.Adam([{
            'params': self.model.parameters()
        }],
                                          lr=self.lr)
        self.mll = gpytorch.mlls.SumMarginalLogLikelihood(
            self.likelihood, self.model)

        # change the flag
        self.model.eval()
        self.likelihood.eval()
Exemplo n.º 3
0
    def load_mcmc_samples(self, mcmc_samples: Dict[str, Tensor]) -> None:
        r"""Load the MCMC hyperparameter samples into the model.

        This method will be called by `fit_fully_bayesian_model_nuts` when the model
        has been fitted in order to create a batched SingleTaskGP model.
        """
        tkwargs = {"device": self.train_X.device, "dtype": self.train_X.dtype}
        num_mcmc_samples = len(mcmc_samples["mean"])
        batch_shape = torch.Size([num_mcmc_samples])

        self.train_X = self.train_X.unsqueeze(0).expand(
            num_mcmc_samples, self.train_X.shape[0], -1
        )
        self.mean_module = ConstantMean(batch_shape=batch_shape).to(**tkwargs)
        self.covar_module = ScaleKernel(
            base_kernel=MaternKernel(
                ard_num_dims=self.train_X.shape[-1],
                batch_shape=batch_shape,
            ),
            batch_shape=batch_shape,
        ).to(**tkwargs)
        if self.train_Yvar is not None:
            self.likelihood = FixedNoiseGaussianLikelihood(
                noise=self.train_Yvar, batch_shape=batch_shape
            ).to(**tkwargs)
        else:
            self.likelihood = GaussianLikelihood(
                batch_shape=batch_shape,
                noise_constraint=GreaterThan(MIN_INFERRED_NOISE_LEVEL),
            ).to(**tkwargs)
            self.likelihood.noise_covar.noise = (
                mcmc_samples["noise"]
                .detach()
                .clone()
                .view(self.likelihood.noise_covar.noise.shape)
                .clamp_min(MIN_INFERRED_NOISE_LEVEL)
                .to(**tkwargs)
            )

        self.covar_module.base_kernel.lengthscale = (
            mcmc_samples["lengthscale"]
            .detach()
            .clone()
            .view(self.covar_module.base_kernel.lengthscale.shape)
            .to(**tkwargs)
        )
        self.covar_module.outputscale = (
            mcmc_samples["outputscale"]
            .detach()
            .clone()
            .view(self.covar_module.outputscale.shape)
            .to(**tkwargs)
        )
        self.mean_module.constant.data = (
            mcmc_samples["mean"]
            .detach()
            .clone()
            .view(self.mean_module.constant.shape)
            .to(**tkwargs)
        )
Exemplo n.º 4
0
 def test_module_bounds(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         # get a test module
         train_x = torch.tensor([[1.0, 2.0, 3.0]], device=device, dtype=dtype)
         train_y = torch.tensor([4.0], device=device, dtype=dtype)
         likelihood = GaussianLikelihood(
             noise_constraint=GreaterThan(1e-5, transform=None)
         )
         model = ExactGP(train_x, train_y, likelihood)
         model.covar_module = RBFKernel(ard_num_dims=3)
         model.mean_module = ConstantMean()
         model.to(device=device, dtype=dtype)
         mll = ExactMarginalLogLikelihood(likelihood, model)
         # test the basic case
         x, pdict, bounds = module_to_array(
             module=mll, bounds={"model.covar_module.raw_lengthscale": (0.1, None)}
         )
         self.assertTrue(np.array_equal(x, np.zeros(5)))
         expected_sizes = {
             "likelihood.noise_covar.raw_noise": torch.Size([1]),
             "model.covar_module.raw_lengthscale": torch.Size([1, 3]),
             "model.mean_module.constant": torch.Size([1]),
         }
         self.assertEqual(set(pdict.keys()), set(expected_sizes.keys()))
         for pname, val in pdict.items():
             self.assertEqual(val.dtype, dtype)
             self.assertEqual(val.shape, expected_sizes[pname])
             self.assertEqual(val.device.type, device.type)
         lower_exp = np.full_like(x, 0.1)
         lower_exp[_get_index(pdict, "model.mean_module.constant")] = -np.inf
         lower_exp[_get_index(pdict, "likelihood.noise_covar.raw_noise")] = 1e-5
         self.assertTrue(np.allclose(bounds[0], lower_exp))
         self.assertTrue(np.equal(bounds[1], np.full_like(x, np.inf)).all())
Exemplo n.º 5
0
 def test_fit_gpytorch_model_singular(self):
     options = {"disp": False, "maxiter": 5}
     for dtype in (torch.float, torch.double):
         X_train = torch.ones(2, 2, device=self.device, dtype=dtype)
         Y_train = torch.zeros(2, 1, device=self.device, dtype=dtype)
         test_likelihood = GaussianLikelihood(
             noise_constraint=GreaterThan(-1e-7, transform=None, initial_value=0.0)
         )
         gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
         mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
         mll.to(device=self.device, dtype=dtype)
         # this will do multiple retries (and emit warnings, which is desired)
         with warnings.catch_warnings(record=True) as ws, settings.debug(True):
             fit_gpytorch_model(mll, options=options, max_retries=2)
             self.assertTrue(
                 any(issubclass(w.category, NumericalWarning) for w in ws)
             )
         # ensure that we fail if noise ensures that jitter does not help
         gp.likelihood = GaussianLikelihood(
             noise_constraint=Interval(-2, -1, transform=None, initial_value=-1.5)
         )
         mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
         mll.to(device=self.device, dtype=dtype)
         with self.assertRaises(NotPSDError):
             fit_gpytorch_model(mll, options=options, max_retries=2)
         # ensure we can handle NaNErrors in the optimizer
         with mock.patch.object(SingleTaskGP, "__call__", side_effect=NanError):
             gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
             mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
             mll.to(device=self.device, dtype=dtype)
             fit_gpytorch_model(
                 mll, options={"disp": False, "maxiter": 1}, max_retries=1
             )
Exemplo n.º 6
0
    def test_fit_gpytorch_model_singular(self):
        options = {"disp": False, "maxiter": 5}
        for dtype in (torch.float, torch.double):
            X_train = torch.ones(2, 2, device=self.device, dtype=dtype)
            Y_train = torch.zeros(2, 1, device=self.device, dtype=dtype)
            test_likelihood = GaussianLikelihood(
                noise_constraint=GreaterThan(-1e-7, transform=None, initial_value=0.0)
            )
            gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            mll.to(device=self.device, dtype=dtype)
            # this will do multiple retries (and emit warnings, which is desired)
            with warnings.catch_warnings(record=True) as ws, settings.debug(True):
                fit_gpytorch_model(mll, options=options, max_retries=2)
                self.assertTrue(
                    any(issubclass(w.category, NumericalWarning) for w in ws)
                )
            # ensure that we fail if noise ensures that jitter does not help
            gp.likelihood = GaussianLikelihood(
                noise_constraint=Interval(-2, -1, transform=None, initial_value=-1.5)
            )
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            mll.to(device=self.device, dtype=dtype)
            with self.assertLogs(level="DEBUG") as logs:
                fit_gpytorch_model(mll, options=options, max_retries=2)
            self.assertTrue(any("NotPSDError" in log for log in logs.output))
            # ensure we can handle NaNErrors in the optimizer
            with mock.patch.object(SingleTaskGP, "__call__", side_effect=NanError):
                gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
                mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
                mll.to(device=self.device, dtype=dtype)
                fit_gpytorch_model(
                    mll, options={"disp": False, "maxiter": 1}, max_retries=1
                )
            # ensure we catch NotPSDErrors
            with mock.patch.object(SingleTaskGP, "__call__", side_effect=NotPSDError):
                mll = self._getModel()
                with self.assertLogs(level="DEBUG") as logs:
                    fit_gpytorch_model(mll, max_retries=2)
                for retry in [1, 2]:
                    self.assertTrue(
                        any(
                            f"Fitting failed on try {retry} due to a NotPSDError."
                            in log
                            for log in logs.output
                        )
                    )

            # Failure due to optimization warning

            def optimize_w_warning(mll, **kwargs):
                warnings.warn("Dummy warning.", OptimizationWarning)
                return mll, None

            mll = self._getModel()
            with self.assertLogs(level="DEBUG") as logs, settings.debug(True):
                fit_gpytorch_model(mll, optimizer=optimize_w_warning, max_retries=2)
            self.assertTrue(
                any("Fitting failed on try 1." in log for log in logs.output)
            )
Exemplo n.º 7
0
    def argmax_posterior_mean(cands: to.Tensor, cands_values: to.Tensor,
                              ddp_space: BoxSpace, num_restarts: int,
                              num_samples: int) -> to.Tensor:
        """
        Compute the GP input with the maximal posterior mean.

        :param cands: candidates a.k.a. x
        :param cands_values: observed values a.k.a. y
        :param ddp_space: space of the domain distribution parameters, indicates the lower and upper bound
        :param num_restarts: number of restarts for the optimization of the acquisition function
        :param num_samples: number of samples for the optimization of the acquisition function
        :return: un-normalized candidate with maximum posterior value a.k.a. x
        """
        if not isinstance(cands, to.Tensor):
            raise pyrado.TypeErr(given=cands, expected_type=to.Tensor)
        if not isinstance(cands_values, to.Tensor):
            raise pyrado.TypeErr(given=cands_values, expected_type=to.Tensor)
        if not isinstance(ddp_space, BoxSpace):
            raise pyrado.TypeErr(given=ddp_space, expected_type=BoxSpace)

        # Normalize the input data and standardize the output data
        uc_projector = UnitCubeProjector(
            to.from_numpy(ddp_space.bound_lo).to(dtype=to.get_default_dtype()),
            to.from_numpy(ddp_space.bound_up).to(dtype=to.get_default_dtype()),
        )
        cands_norm = uc_projector.project_to(cands)
        cands_values_stdized = standardize(cands_values)

        if cands_norm.shape[0] > cands_values.shape[0]:
            print_cbt(
                f"There are {cands.shape[0]} candidates but only {cands_values.shape[0]} evaluations. Ignoring "
                f"the candidates without evaluation for computing the argmax.",
                "y",
            )
            cands_norm = cands_norm[:cands_values.shape[0], :]

        # Create and fit the GP model
        gp = SingleTaskGP(cands_norm, cands_values_stdized)
        gp.likelihood.noise_covar.register_constraint("raw_noise",
                                                      GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)

        # Find position with maximal posterior mean
        cand_norm, _ = optimize_acqf(
            acq_function=PosteriorMean(gp),
            bounds=to.stack(
                [to.zeros(ddp_space.flat_dim),
                 to.ones(ddp_space.flat_dim)]).to(dtype=to.float32),
            q=1,
            num_restarts=num_restarts,
            raw_samples=num_samples,
        )

        cand_norm = cand_norm.to(dtype=to.get_default_dtype())
        cand = uc_projector.project_back(cand_norm.detach())
        print_cbt(f"Converged to argmax of the posterior mean: {cand.numpy()}",
                  "g",
                  bright=True)
        return cand
Exemplo n.º 8
0
 def __init__(self, train_x, train_y, likelihood, input_dim, params):
     super(ExactGPR, self).__init__(train_x, train_y, likelihood)
     self.mean_module = gpytorch.means.ConstantMean()
     self.covar_module = gpytorch.kernels.ScaleKernel(
         gpytorch.kernels.RBFKernel(ard_num_dims=input_dim,
                                    lengthscale_constraint=LessThan(
                                        params[0])),
         outputscale_constraint=GreaterThan(params[1]))
Exemplo n.º 9
0
    def fit_model(self):
        """
        If no state_dict exists, fits the model and saves the state_dict.
        Otherwise, constructs the model but uses the fit given by the state_dict.
        """
        # read the data
        data_list = list()
        for i in range(1, 31):
            data_file = os.path.join(script_dir, "port_evals",
                                     "port_n=100_seed=%d" % i)
            data_list.append(torch.load(data_file))

        # join the data together
        X = torch.cat([data_list[i]["X"] for i in range(len(data_list))],
                      dim=0).squeeze(-2)
        Y = torch.cat([data_list[i]["Y"] for i in range(len(data_list))],
                      dim=0).squeeze(-2)

        # fit GP
        noise_prior = GammaPrior(1.1, 0.5)
        noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate
        likelihood = GaussianLikelihood(
            noise_prior=noise_prior,
            batch_shape=[],
            noise_constraint=GreaterThan(
                0.000005,  # minimum observation noise assumed in the GP model
                transform=None,
                initial_value=noise_prior_mode,
            ),
        )

        # We save the state dict to avoid fitting the GP every time which takes ~3 mins
        try:
            state_dict = torch.load(
                os.path.join(script_dir, "portfolio_surrogate_state_dict.pt"))
            model = SingleTaskGP(X,
                                 Y,
                                 likelihood,
                                 outcome_transform=Standardize(m=1))
            model.load_state_dict(state_dict)
        except FileNotFoundError:
            model = SingleTaskGP(X,
                                 Y,
                                 likelihood,
                                 outcome_transform=Standardize(m=1))
            mll = ExactMarginalLogLikelihood(model.likelihood, model)
            from time import time

            start = time()
            fit_gpytorch_model(mll)
            print("fitting took %s seconds" % (time() - start))
            torch.save(
                model.state_dict(),
                os.path.join(script_dir, "portfolio_surrogate_state_dict.pt"),
            )
        self.model = model
Exemplo n.º 10
0
    def __init__(self, input_dim, feature_dim, label_dim, hidden_width,
                 hidden_depth, n_inducing, batch_size, max_epochs_since_update,
                 **kwargs):
        """
        Args:
            input_dim (int)
            feature_dim (int): dimension of deep kernel features
            label_dim (int)
            hidden_depth (int)
            hidden_width (int or list)
            n_inducing (int): number of inducing points for variational approximation
            batch_size (int)
            max_epochs_since_update (int)
        """
        params = locals()
        del params['self']
        self.__dict__ = params
        super().__init__()

        noise_constraint = GreaterThan(1e-4)
        self.likelihood = GaussianLikelihood(batch_shape=torch.Size(
            [label_dim]),
                                             noise_constraint=noise_constraint)

        self.nn = FCNet(input_dim,
                        output_dim=label_dim,
                        hidden_width=hidden_width,
                        hidden_depth=hidden_depth,
                        batch_norm=True)
        self.batch_norm = torch.nn.BatchNorm1d(feature_dim)

        self.mean_module = ConstantMean(batch_shape=torch.Size([label_dim]))
        base_kernel = RBFKernel(batch_shape=torch.Size([label_dim]),
                                ard_num_dims=feature_dim)
        self.covar_module = ScaleKernel(base_kernel,
                                        batch_shape=torch.Size([label_dim]))

        variational_dist = MeanFieldVariationalDistribution(
            num_inducing_points=n_inducing,
            batch_shape=torch.Size([label_dim]))
        inducing_points = torch.randn(n_inducing, feature_dim)
        self.variational_strategy = VariationalStrategy(
            self,
            inducing_points,
            variational_dist,
            learn_inducing_locations=True)

        # initialize preprocessers
        self.register_buffer("input_mean", torch.zeros(input_dim))
        self.register_buffer("input_std", torch.ones(input_dim))
        self.register_buffer("label_mean", torch.zeros(label_dim))
        self.register_buffer("label_std", torch.ones(label_dim))

        self._train_ckpt = deepcopy(self.state_dict())
        self._eval_ckpt = deepcopy(self.state_dict())
Exemplo n.º 11
0
    def fit(self, x_train, y_train):
        # normalize parameter (=input) data
        x_train_norm = self.param_normalizer.project_to(x_train)
        # normalize the data
        y_train_norm = self.data_normalizer.standardize(y_train)

        self.gp = SingleTaskGP(x_train_norm, y_train_norm)
        self.gp.likelihood.noise_covar.register_constraint(
            "raw_noise", GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(self.gp.likelihood, self.gp)
        fit_gpytorch_model(mll)
        return self.gp
Exemplo n.º 12
0
 def cont_kernel_factory(
     batch_shape: torch.Size,
     ard_num_dims: int,
     active_dims: List[int],
 ) -> MaternKernel:
     return MaternKernel(
         nu=2.5,
         batch_shape=batch_shape,
         ard_num_dims=ard_num_dims,
         active_dims=active_dims,
         lengthscale_constraint=GreaterThan(1e-04),
     )
Exemplo n.º 13
0
 def test_fit_gpytorch_model_singular(self, cuda=False):
     options = {"disp": False, "maxiter": 5}
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         X_train = torch.rand(2, 2, device=device, dtype=dtype)
         Y_train = torch.zeros(2, device=device, dtype=dtype)
         test_likelihood = GaussianLikelihood(noise_constraint=GreaterThan(
             -1.0, transform=None, initial_value=0.0))
         gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
         mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
         mll.to(device=device, dtype=dtype)
         # this will do multiple retries (and emit warnings, which is desired)
         fit_gpytorch_model(mll, options=options, max_retries=2)
Exemplo n.º 14
0
 def __init__(self, train_x, train_y, likelihood, input_dim, params):
     super(SparseGPR, self).__init__(train_x, train_y, likelihood)
     self.mean_module = gpytorch.means.ConstantMean()
     self.covar_module = gpytorch.kernels.ScaleKernel(
         gpytorch.kernels.RBFKernel(ard_num_dims=input_dim,
                                    lengthscale_constraint=LessThan(
                                        params[0])),
         outputscale_constraint=GreaterThan(params[1]))
     # use some training data to initialize the inducing_module
     if train_x is None:
         train_x = CUDA(torch.zeros((1, input_dim)))
     self.inducing_module = gpytorch.kernels.InducingPointKernel(
         self.covar_module, inducing_points=train_x, likelihood=likelihood)
Exemplo n.º 15
0
    def __init__(self, dim, latent_dim, beta_min, beta_prior=None, **kwargs):
        """
        Initialisation.

        Parameters
        ----------
        :param dim: dimension of the ambient high-dimensional sphere manifold
        :param latent_dim: dimension of the latent low-dimensional sphere manifold
        :param beta_min: minimum value of the inverse square lengthscale parameter beta

        Optional parameters
        -------------------
        :param beta_prior: prior on the parameter beta
        :param kwargs: additional arguments
        """
        super(NestedSphereGaussianKernel, self).__init__(has_lengthscale=False, **kwargs)
        self.beta_min = beta_min
        self.dim = dim
        self.latent_dim = latent_dim

        # Add beta parameter, corresponding to the inverse of the lengthscale parameter.
        beta_num_dims = 1
        self.register_parameter(name="raw_beta",
                                parameter=torch.nn.Parameter(torch.zeros(*self.batch_shape, 1, beta_num_dims)))

        if beta_prior is not None:
            self.register_prior("beta_prior", beta_prior, lambda: self.beta, lambda v: self._set_beta(v))

        # A GreaterThan constraint is defined on the lengthscale parameter to guarantee positive-definiteness.
        # The value of beta_min can be determined e.g. experimentally.
        self.register_constraint("raw_beta", GreaterThan(self.beta_min))

        # Add projection parameters
        for d in range(self.dim, self.latent_dim, -1):
            # Axes parameters
            # Register
            axis_name = "raw_axis_S" + str(d)
            # axis = torch.zeros(1, d)
            # axis[:, 0] = 1
            axis = torch.randn(1, d)
            axis = axis / torch.norm(axis)
            axis = axis.repeat(*self.batch_shape, 1, 1)
            self.register_parameter(name=axis_name,
                                    parameter=torch.nn.Parameter(axis))
            # Corresponding manifold
            axis_manifold_name = "raw_axis_S" + str(d) + "_manifold"
            setattr(self, axis_manifold_name, pyman_man.Sphere(d))

        # Distance to axis (constant), fixed at pi/2
        self.distances_to_axis = [np.pi/2 *torch.ones(1, 1) for d in range(self.dim, self.latent_dim, -1)]
Exemplo n.º 16
0
    def _sample(self, candidates: Optional[np.array] = None) -> np.array:
        if len(self.X_observed) < self.num_initial_random_draws:
            return self.initial_sampler.sample(candidates=candidates)
        else:
            z_observed = torch.Tensor(self.transform_outputs(self.y_observed.numpy()))

            with torch.no_grad():
                # both (n, 1)
                #mu_pred, sigma_pred = self.thompson_sampling.prior(self.X_observed)
                mu_pred, sigma_pred = self.initial_sampler.prior.predict(self.X_observed)
                mu_pred = torch.Tensor(mu_pred)
                sigma_pred = torch.Tensor(sigma_pred)

            # (n, 1)
            r_observed = residual_transform(z_observed, mu_pred, sigma_pred)

            # build and fit GP on residuals
            gp = SingleTaskGP(
                train_X=self.X_observed,
                train_Y=r_observed,
                likelihood=GaussianLikelihood(noise_constraint=GreaterThan(1e-3)),
            )
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            fit_gpytorch_model(mll)

            acq = ShiftedExpectedImprovement(
                model=gp,
                best_f=z_observed.min(dim=0).values,
                mean_std_predictor=self.initial_sampler.prior.predict,
                maximize=False,
            )

            if candidates is None:
                candidate, acq_value = optimize_acqf(
                    acq,
                    bounds=self.bounds_tensor,
                    q=1,
                    num_restarts=5,
                    raw_samples=100,
                )
                # import matplotlib.pyplot as plt
                # x = torch.linspace(-1, 1).unsqueeze(dim=-1)
                # x = torch.cat((x, x * 0), dim=1)
                # plt.plot(x[:, 0].flatten().tolist(), acq(x.unsqueeze(dim=1)).tolist())
                # plt.show()
                return candidate[0]
            else:
                # (N,)
                ei = acq(torch.Tensor(candidates).unsqueeze(dim=-2))
                return torch.Tensor(candidates[ei.argmax()])
Exemplo n.º 17
0
Arquivo: main.py Projeto: stys/albo
def initialize_model(x, z, state_dict=None):
    n = z.shape[-1]
    gp_models = []
    for i in range(n):
        y = z[..., i].unsqueeze(-1)
        gp_model = SingleTaskGP(train_X=x, train_Y=y)
        gp_model.likelihood.noise_covar.register_constraint(
            "raw_noise", GreaterThan(1e-5))
        gp_models.append(gp_model)
    model_list = ModelListGP(*gp_models)
    mll = SumMarginalLogLikelihood(model_list.likelihood, model_list)
    if state_dict is not None:
        model_list.load_state_dict(state_dict)
    return mll, model_list
Exemplo n.º 18
0
    def __init__(self, train_x, train_y, likelihood, input_dim, params):
        super(ExactGPR, self).__init__(train_x, train_y, likelihood)
        self.params = params
        self.input_dim = input_dim
        self.lengthscale_prior = None  #gpytorch.priors.GammaPrior(3.0, 6.0)
        self.outputscale_prior = None  #gpytorch.priors.GammaPrior(2.0, 0.15)

        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(
                ard_num_dims=input_dim,
                lengthscale_prior=self.lengthscale_prior,
                lengthscale_constraint=LessThan(self.params[4])),
            outputscale_prior=self.outputscale_prior,
            outputscale_constraint=GreaterThan(self.params[5]))
Exemplo n.º 19
0
 def test_fit_gpytorch_model_singular(self):
     options = {"disp": False, "maxiter": 5}
     for dtype in (torch.float, torch.double):
         X_train = torch.rand(2, 2, device=self.device, dtype=dtype)
         Y_train = torch.zeros(2, 1, device=self.device, dtype=dtype)
         test_likelihood = GaussianLikelihood(
             noise_constraint=GreaterThan(-1.0, transform=None, initial_value=0.0)
         )
         gp = SingleTaskGP(X_train, Y_train, likelihood=test_likelihood)
         mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
         mll.to(device=self.device, dtype=dtype)
         # this will do multiple retries (and emit warnings, which is desired)
         with warnings.catch_warnings(record=True) as ws, settings.debug(True):
             fit_gpytorch_model(mll, options=options, max_retries=2)
             self.assertTrue(
                 any(issubclass(w.category, OptimizationWarning) for w in ws)
             )
Exemplo n.º 20
0
    def __init__(self, dim, latent_dim, beta_min, beta_prior=None, **kwargs):
        """
        Initialisation.

        Parameters
        ----------
        :param dim: dimension of the ambient high-dimensional sphere manifold
        :param latent_dim: dimension of the latent low-dimensional sphere manifold
        :param beta_min: minimum value of the inverse square lengthscale parameter beta
        :param beta_prior: prior on the parameter beta
        :param kwargs: additional arguments
        """
        super(NestedSpdAffineInvariantGaussianKernel,
              self).__init__(has_lengthscale=False, **kwargs)
        self.beta_min = beta_min
        self.dim = dim
        self.latent_dim = latent_dim

        # Add beta parameter, corresponding to the inverse of the lengthscale parameter.
        beta_num_dims = 1
        self.register_parameter(name="raw_beta",
                                parameter=torch.nn.Parameter(
                                    torch.zeros(*self.batch_shape, 1,
                                                beta_num_dims)))

        if beta_prior is not None:
            self.register_prior("beta_prior", beta_prior, lambda: self.beta,
                                lambda v: self._set_beta(v))

        # A GreaterThan constraint is defined on the lengthscale parameter to guarantee the positive-definiteness of the
        #  kernel.
        # The value of beta_min can be determined e.g. experimentally.
        self.register_constraint("raw_beta", GreaterThan(self.beta_min))

        # Add projection parameters
        self.raw_projection_matrix_manifold = pyman_man.Grassmann(
            self.dim, self.latent_dim)
        self.register_parameter(
            name="raw_projection_matrix",
            parameter=torch.nn.Parameter(
                torch.Tensor(
                    self.raw_projection_matrix_manifold.rand()).repeat(
                        *self.batch_shape, 1, 1)))
Exemplo n.º 21
0
    def argmax_posterior_mean(cands: to.Tensor, cands_values: to.Tensor,
                              uc_normalizer: UnitCubeProjector,
                              num_restarts: int,
                              num_samples: int) -> to.Tensor:
        """
        Compute the GP input with the maximal posterior mean.

        :param cands: candidates a.k.a. x
        :param cands_values: observed values a.k.a. y
        :param uc_normalizer: unit cube normalizer used during the experiments (can be recovered form the bounds)
        :param num_restarts: number of restarts for the optimization of the acquisition function
        :param num_samples: number of samples for the optimization of the acquisition function
        :return: un-normalized candidate with maximum posterior value a.k.a. x
        """
        # Normalize the input data and standardize the output data
        cands_norm = uc_normalizer.project_to(cands)
        cands_values_stdized = standardize(cands_values)

        # Create and fit the GP model
        gp = SingleTaskGP(cands_norm, cands_values_stdized)
        gp.likelihood.noise_covar.register_constraint('raw_noise',
                                                      GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)

        # Find position with maximal posterior mean
        cand_norm, acq_value = optimize_acqf(
            acq_function=PosteriorMean(gp),
            bounds=to.stack([
                to.zeros_like(uc_normalizer.bound_lo),
                to.ones_like(uc_normalizer.bound_up)
            ]),
            q=1,
            num_restarts=num_restarts,
            raw_samples=num_samples)

        cand = uc_normalizer.project_back(cand_norm.detach())
        print_cbt(f'Converged to argmax of the posterior mean\n{cand.numpy()}',
                  'g',
                  bright=True)
        return cand
Exemplo n.º 22
0
    def _sample(self, candidates: Optional[np.array] = None) -> np.array:
        if len(self.X_observed) < self.num_initial_random_draws:
            return self.initial_sampler.sample(candidates=candidates)
        else:
            z_observed = torch.Tensor(
                self.transform_outputs(self.y_observed.numpy()))

            # build and fit GP
            gp = SingleTaskGP(
                train_X=self.X_observed,
                train_Y=z_observed,
                # special likelihood for numerical Cholesky errors, following advice from
                # https://www.gitmemory.com/issue/pytorch/botorch/179/506276521
                likelihood=GaussianLikelihood(
                    noise_constraint=GreaterThan(1e-3)),
            )
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            fit_gpytorch_model(mll)

            acq = self.expected_improvement(
                model=gp,
                best_f=z_observed.min(dim=0).values,
            )

            if candidates is None:
                candidate, acq_value = optimize_acqf(
                    acq,
                    bounds=self.bounds_tensor,
                    q=1,
                    num_restarts=5,
                    raw_samples=100,
                )
                return candidate[0]
            else:
                # (N,)
                ei = acq(torch.Tensor(candidates).unsqueeze(dim=-2))
                return torch.Tensor(candidates[ei.argmax()])
Exemplo n.º 23
0
    def __init__(
        self,
        datapoints: Tensor,
        comparisons: Tensor,
        covar_module: Optional[Module] = None,
        noise_module: Optional[HomoskedasticNoise] = None,
        **kwargs,
    ) -> None:
        super().__init__()
        r"""A probit-likelihood GP with Laplace approximation model.

        A probit-likelihood GP with Laplace approximation model that learns via
        pairwise comparison data. By default it uses a scaled-RBF kernel.

        Args:
            datapoints: A `batch_shape x n x d` tensor of training features.
            comparisons: A `batch_shape x m x 2` training comparisons;
                comparisons[i] is a noisy indicator suggesting the utility value
                of comparisons[i, 0]-th is greater than comparisons[i, 1]-th.
            covar_module: Covariance module
            noise_module: Noise module
        """

        # Compatibility variables with fit_gpytorch_*: Dummy likelihood
        # Likelihood is tightly tied with this model and
        # it doesn't make much sense to keep it separate
        self.likelihood = None

        # TODO: remove these variables from `state_dict()` so that when calling
        #       `load_state_dict()`, only the hyperparameters are copied over
        self.register_buffer("datapoints", None)
        self.register_buffer("comparisons", None)
        self.register_buffer("utility", None)
        self.register_buffer("covar_chol", None)
        self.register_buffer("likelihood_hess", None)
        self.register_buffer("hlcov_eye", None)
        self.register_buffer("covar", None)
        self.register_buffer("covar_inv", None)

        self.train_inputs = []
        self.train_targets = None

        self.pred_cov_fac_need_update = True
        self._input_batch_shape = torch.Size()
        self.dim = None
        # will be set to match datapoints' dtype and device
        # since scipy.optimize.fsolve only works on cpu, it'd be the
        # fastest to fit the model on cpu and take samples on gpu to avoid
        # overhead of moving data back and forth during fitting time
        self.tkwargs = {}
        # See set_train_data for additional compatibility variables
        self.set_train_data(datapoints, comparisons, update_model=False)

        # Set optional parameters
        # jitter to add for numerical stability
        self._jitter = kwargs.get("jitter", 1e-6)
        # Clamping z lim for better numerical stability. See self._calc_z for detail
        # norm_cdf(z=3) ~= 0.999, top 0.1% percent
        self._zlim = kwargs.get("zlim", 3)
        # Stopping creteria in scipy.optimize.fsolve used to find f_map in _update()
        # If None, set to 1e-6 by default in _update
        self._xtol = kwargs.get("xtol")
        # The maximum number of calls to the function in scipy.optimize.fsolve
        # If None, set to 100 by default in _update
        # If zero, then 100*(N+1) is used by default by fsolve;
        self._maxfev = kwargs.get("maxfev")

        # Set hyperparameters
        # Do not set the batch_shape explicitly so mean_module can operate in both mode
        # once fsolve used in _update can run in batch mode, we should explicitly set
        # the bacth shape here
        self.mean_module = ConstantMean()
        # Do not optimize constant mean prior
        for param in self.mean_module.parameters():
            param.requires_grad = False

        # set covariance module
        if noise_module is None:
            noise_module = HomoskedasticNoise(
                noise_prior=SmoothedBoxPrior(-5, 5, 0.5, transform=torch.log),
                noise_constraint=GreaterThan(1e-4),  # if None, 1e-4 by default
                batch_shape=self._input_batch_shape,
            )
        self.noise_module = noise_module

        # set covariance module
        if covar_module is None:
            ls_prior = GammaPrior(1.2, 0.5)
            ls_prior_mode = (ls_prior.concentration - 1) / ls_prior.rate
            covar_module = RBFKernel(
                batch_shape=self._input_batch_shape,
                ard_num_dims=self.dim,
                lengthscale_prior=ls_prior,
                lengthscale_constraint=Positive(transform=None,
                                                initial_value=ls_prior_mode),
            )
        self.covar_module = covar_module

        self._x0 = None  # will store temporary results for warm-starting
        if self.datapoints is not None and self.comparisons is not None:
            self.to(dtype=self.datapoints.dtype, device=self.datapoints.device)
            self._update()  # Find f_map for initial parameters

        self.to(self.datapoints)
Exemplo n.º 24
0
    def __init__(
        self,
        train_X: Tensor,
        train_Y: Tensor,
        likelihood: Optional[MultitaskGaussianLikelihood] = None,
        data_covar_module: Optional[Module] = None,
        task_covar_prior: Optional[Prior] = None,
        rank: Optional[int] = None,
        input_transform: Optional[InputTransform] = None,
        outcome_transform: Optional[OutcomeTransform] = None,
        **kwargs: Any,
    ) -> None:
        r"""Multi-task GP with Kronecker structure, using a simple ICM kernel.

        Args:
            train_X: A `batch_shape x n x d` tensor of training features.
            train_Y: A `batch_shape x n x m` tensor of training observations.
            likelihood: A `MultitaskGaussianLikelihood`. If omitted, uses a
                `MultitaskGaussianLikelihood` with a `GammaPrior(1.1, 0.05)`
                noise prior.
            data_covar_module: The module computing the covariance (Kernel) matrix
                in data space. If omitted, use a `MaternKernel`.
            task_covar_prior : A Prior on the task covariance matrix. Must operate
                on p.s.d. matrices. A common prior for this is the `LKJ` prior. If
                omitted, uses `LKJCovariancePrior` with `eta` parameter as specified
                in the keyword arguments (if not specified, use `eta=1.5`).
            rank: The rank of the ICM kernel. If omitted, use a full rank kernel.
            kwargs: Additional arguments to override default settings of priors,
                including:
                - eta: The eta parameter on the default LKJ task_covar_prior.
                A value of 1.0 is uninformative, values <1.0 favor stronger
                correlations (in magnitude), correlations vanish as eta -> inf.
                - sd_prior: A scalar prior over nonnegative numbers, which is used
                for the default LKJCovariancePrior task_covar_prior.
                - likelihood_rank: The rank of the task covariance matrix to fit.
                Defaults to 0 (which corresponds to a diagonal covariance matrix).

        Example:
            >>> train_X = torch.rand(10, 2)
            >>> train_Y = torch.cat([f_1(X), f_2(X)], dim=-1)
            >>> model = KroneckerMultiTaskGP(train_X, train_Y)
        """
        with torch.no_grad():
            transformed_X = self.transform_inputs(
                X=train_X, input_transform=input_transform)
        if outcome_transform is not None:
            train_Y, _ = outcome_transform(train_Y)

        self._validate_tensor_args(X=transformed_X, Y=train_Y)
        self._num_outputs = train_Y.shape[-1]
        batch_shape, ard_num_dims = train_X.shape[:-2], train_X.shape[-1]
        num_tasks = train_Y.shape[-1]

        if rank is None:
            rank = num_tasks
        if likelihood is None:
            noise_prior = GammaPrior(1.1, 0.05)
            noise_prior_mode = (noise_prior.concentration -
                                1) / noise_prior.rate
            likelihood = MultitaskGaussianLikelihood(
                num_tasks=num_tasks,
                batch_shape=batch_shape,
                noise_prior=noise_prior,
                noise_constraint=GreaterThan(
                    MIN_INFERRED_NOISE_LEVEL,
                    transform=None,
                    initial_value=noise_prior_mode,
                ),
                rank=kwargs.get("likelihood_rank", 0),
            )
        if task_covar_prior is None:
            task_covar_prior = LKJCovariancePrior(
                n=num_tasks,
                eta=torch.tensor(kwargs.get("eta", 1.5)).to(train_X),
                sd_prior=kwargs.get(
                    "sd_prior",
                    SmoothedBoxPrior(math.exp(-6), math.exp(1.25), 0.05),
                ),
            )
        super().__init__(train_X, train_Y, likelihood)
        self.mean_module = MultitaskMean(
            base_means=ConstantMean(batch_shape=batch_shape),
            num_tasks=num_tasks)
        if data_covar_module is None:
            data_covar_module = MaternKernel(
                nu=2.5,
                ard_num_dims=ard_num_dims,
                lengthscale_prior=GammaPrior(3.0, 6.0),
                batch_shape=batch_shape,
            )
        else:
            data_covar_module = data_covar_module

        self.covar_module = MultitaskKernel(
            data_covar_module=data_covar_module,
            num_tasks=num_tasks,
            rank=rank,
            batch_shape=batch_shape,
            task_covar_prior=task_covar_prior,
        )

        if outcome_transform is not None:
            self.outcome_transform = outcome_transform
        if input_transform is not None:
            self.input_transform = input_transform
        self.to(train_X)
Exemplo n.º 25
0
    def __init__(
        self,
        indices: List[int],
        transform_on_train: bool = True,
        transform_on_eval: bool = True,
        transform_on_fantasize: bool = True,
        reverse: bool = False,
        eps: float = 1e-7,
        concentration1_prior: Optional[Prior] = None,
        concentration0_prior: Optional[Prior] = None,
        batch_shape: Optional[torch.Size] = None,
    ) -> None:
        r"""Initialize transform.

        Args:
            indices: The indices of the inputs to warp.
            transform_on_train: A boolean indicating whether to apply the
                transforms in train() mode. Default: True.
            transform_on_eval: A boolean indicating whether to apply the
                transform in eval() mode. Default: True.
            transform_on_fantasize: A boolean indicating whether to apply the
                transform when called from within a `fantasize` call. Default: True.
            reverse: A boolean indicating whether the forward pass should untransform
                the inputs.
            eps: A small value used to clip values to be in the interval (0, 1).
            concentration1_prior: A prior distribution on the concentration1 parameter
                of the Kumaraswamy distribution.
            concentration0_prior: A prior distribution on the concentration0 parameter
                of the Kumaraswamy distribution.
            batch_shape: The batch shape.
        """
        super().__init__()
        self.register_buffer("indices", torch.tensor(indices,
                                                     dtype=torch.long))
        self.transform_on_train = transform_on_train
        self.transform_on_eval = transform_on_eval
        self.transform_on_fantasize = transform_on_fantasize
        self.reverse = reverse
        self.batch_shape = batch_shape or torch.Size([])
        self._X_min = eps
        self._X_range = 1 - 2 * eps
        if len(self.batch_shape) > 0:
            # Note: this follows the gpytorch shape convention for lengthscales
            # There is ongoing discussion about the extra `1`.
            # TODO: update to follow new gpytorch convention resulting from
            # https://github.com/cornellius-gp/gpytorch/issues/1317
            batch_shape = self.batch_shape + torch.Size([1])
        else:
            batch_shape = self.batch_shape
        for i in (0, 1):
            p_name = f"concentration{i}"
            self.register_parameter(
                p_name,
                nn.Parameter(torch.full(batch_shape + self.indices.shape,
                                        1.0)),
            )
        if concentration0_prior is not None:
            self.register_prior(
                "concentration0_prior",
                concentration0_prior,
                lambda m: m.concentration0,
                lambda m, v: m._set_concentration(i=0, value=v),
            )
        if concentration1_prior is not None:
            self.register_prior(
                "concentration1_prior",
                concentration1_prior,
                lambda m: m.concentration1,
                lambda m, v: m._set_concentration(i=1, value=v),
            )
        for i in (0, 1):
            p_name = f"concentration{i}"
            constraint = GreaterThan(
                self._min_concentration_level,
                transform=None,
                # set the initial value to be the identity transformation
                initial_value=1.0,
            )
            self.register_constraint(param_name=p_name, constraint=constraint)
Exemplo n.º 26
0
    def step(self, snapshot_mode: str = 'latest', meta_info: dict = None):
        # Save snapshot to save the correct iteration count
        self.save_snapshot()

        if self.curr_checkpoint == -2:
            # Train the initial policies in the source domain
            self.train_init_policies()
            self.reached_checkpoint()  # setting counter to -1

        if self.curr_checkpoint == -1:
            # Evaluate the initial policies in the target domain
            self.eval_init_policies()
            self.reached_checkpoint()  # setting counter to 0

        if self.curr_checkpoint == 0:
            # Normalize the input data and standardize the output data
            cands_norm = self.ddp_projector.project_to(self.cands)
            cands_values_stdized = standardize(self.cands_values).unsqueeze(1)

            # Create and fit the GP model
            gp = SingleTaskGP(cands_norm, cands_values_stdized)
            gp.likelihood.noise_covar.register_constraint('raw_noise', GreaterThan(1e-5))
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            fit_gpytorch_model(mll)
            print_cbt('Fitted the GP.', 'g')

            # Acquisition functions
            if self.acq_fcn_type == 'UCB':
                acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get('beta', 0.1), maximize=True)
            elif self.acq_fcn_type == 'EI':
                acq_fcn = ExpectedImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True)
            elif self.acq_fcn_type == 'PI':
                acq_fcn = ProbabilityOfImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True)
            else:
                raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'")

            # Optimize acquisition function and get new candidate point
            cand_norm, acq_value = optimize_acqf(
                acq_function=acq_fcn,
                bounds=to.stack([to.zeros(self.ddp_space.flat_dim), to.ones(self.ddp_space.flat_dim)]),
                q=1,
                num_restarts=self.acq_restarts,
                raw_samples=self.acq_samples
            )
            next_cand = self.ddp_projector.project_back(cand_norm)
            print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g')
            self.cands = to.cat([self.cands, next_cand], dim=0)
            pyrado.save(self.cands, 'candidates', 'pt', self.save_dir, meta_info)
            self.reached_checkpoint()  # setting counter to 1

        if self.curr_checkpoint == 1:
            # Train and evaluate a new policy, repeat if the resulting policy did not exceed the success threshold
            wrapped_trn_fcn = until_thold_exceeded(
                self.thold_succ_subrtn.item(), self.max_subrtn_rep
            )(self.train_policy_sim)
            wrapped_trn_fcn(self.cands[-1, :], prefix=f'iter_{self._curr_iter}')
            self.reached_checkpoint()  # setting counter to 2

        if self.curr_checkpoint == 2:
            # Evaluate the current policy in the target domain
            policy = pyrado.load(self.policy, 'policy', 'pt', self.save_dir,
                                        meta_info=dict(prefix=f'iter_{self._curr_iter}'))
            self.curr_cand_value = self.eval_policy(
                self.save_dir, self._env_real, policy, self.mc_estimator, f'iter_{self._curr_iter}',
                self.num_eval_rollouts_real
            )
            self.cands_values = to.cat([self.cands_values, self.curr_cand_value.view(1)], dim=0)
            pyrado.save(self.cands_values, 'candidates_values', 'pt', self.save_dir, meta_info)

            # Store the argmax after training and evaluating
            curr_argmax_cand = BayRn.argmax_posterior_mean(
                self.cands, self.cands_values.unsqueeze(1), self.ddp_space, self.acq_restarts, self.acq_samples
            )
            self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0)
            pyrado.save(self.argmax_cand, 'candidates_argmax', 'pt', self.save_dir, meta_info)
            self.reached_checkpoint()  # setting counter to 0
Exemplo n.º 27
0
    def __init__(
        self,
        train_X: Tensor,
        train_Y: Tensor,
        cat_dims: List[int],
        cont_kernel_factory: Optional[Callable[[int, List[int]], Kernel]] = None,
        likelihood: Optional[Likelihood] = None,
        outcome_transform: Optional[OutcomeTransform] = None,  # TODO
        input_transform: Optional[InputTransform] = None,  # TODO
    ) -> None:
        r"""A single-task exact GP model supporting categorical parameters.

        Args:
            train_X: A `batch_shape x n x d` tensor of training features.
            train_Y: A `batch_shape x n x m` tensor of training observations.
            cat_dims: A list of indices corresponding to the columns of
                the input `X` that should be considered categorical features.
            cont_kernel_factory: A method that accepts `ard_num_dims` and
                `active_dims` arguments and returns an instatiated GPyTorch
                `Kernel` object to be used as the ase kernel for the continuous
                dimensions. If omitted, this model uses a Matern-2.5 kernel as
                the kernel for the ordinal parameters.
            likelihood: A likelihood. If omitted, use a standard
                GaussianLikelihood with inferred noise level.
            # outcome_transform: An outcome transform that is applied to the
            #     training data during instantiation and to the posterior during
            #     inference (that is, the `Posterior` obtained by calling
            #     `.posterior` on the model will be on the original scale).
            # input_transform: An input transform that is applied in the model's
            #     forward pass.

        Example:
            >>> train_X = torch.cat(
                    [torch.rand(20, 2), torch.randint(3, (20, 1))], dim=-1)
                )
            >>> train_Y = (
                    torch.sin(train_X[..., :-1]).sum(dim=1, keepdim=True)
                    + train_X[..., -1:]
                )
            >>> model = MixedSingleTaskGP(train_X, train_Y, cat_dims=[-1])
        """
        if outcome_transform is not None:
            raise UnsupportedError("outcome transforms not yet supported")
        if input_transform is not None:
            raise UnsupportedError("input transforms not yet supported")
        if len(cat_dims) == 0:
            raise ValueError(
                "Must specify categorical dimensions for MixedSingleTaskGP"
            )
        input_batch_shape, aug_batch_shape = self.get_batch_dimensions(
            train_X=train_X, train_Y=train_Y
        )

        if cont_kernel_factory is None:

            def cont_kernel_factory(
                batch_shape: torch.Size, ard_num_dims: int, active_dims: List[int]
            ) -> MaternKernel:
                return MaternKernel(
                    nu=2.5,
                    batch_shape=batch_shape,
                    ard_num_dims=ard_num_dims,
                    active_dims=active_dims,
                )

        if likelihood is None:
            # This Gamma prior is quite close to the Horseshoe prior
            min_noise = 1e-5 if train_X.dtype == torch.float else 1e-6
            likelihood = GaussianLikelihood(
                batch_shape=aug_batch_shape,
                noise_constraint=GreaterThan(
                    min_noise, transform=None, initial_value=1e-3
                ),
                noise_prior=GammaPrior(0.9, 10.0),
            )

        d = train_X.shape[-1]
        cat_dims = normalize_indices(indices=cat_dims, d=d)
        ord_dims = sorted(set(range(d)) - set(cat_dims))
        if len(ord_dims) == 0:
            covar_module = ScaleKernel(
                CategoricalKernel(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(cat_dims),
                )
            )
        else:
            sum_kernel = ScaleKernel(
                cont_kernel_factory(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(ord_dims),
                    active_dims=ord_dims,
                )
                + ScaleKernel(
                    CategoricalKernel(
                        batch_shape=aug_batch_shape,
                        ard_num_dims=len(cat_dims),
                        active_dims=cat_dims,
                    )
                )
            )
            prod_kernel = ScaleKernel(
                cont_kernel_factory(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(ord_dims),
                    active_dims=ord_dims,
                )
                * CategoricalKernel(
                    batch_shape=aug_batch_shape,
                    ard_num_dims=len(cat_dims),
                    active_dims=cat_dims,
                )
            )
            covar_module = sum_kernel + prod_kernel
        super().__init__(
            train_X=train_X,
            train_Y=train_Y,
            likelihood=likelihood,
            covar_module=covar_module,
            outcome_transform=outcome_transform,
            input_transform=input_transform,
        )
Exemplo n.º 28
0
    def __init__(
        self,
        train_X: Tensor,
        train_Y: Tensor,
        likelihood: Optional[Likelihood] = None,
        covar_modules: Optional[List[Kernel]] = None,
        num_latent_dims: Optional[List[int]] = None,
        learn_latent_pars: bool = True,
        latent_init: str = "default",
        outcome_transform: Optional[OutcomeTransform] = None,
        input_transform: Optional[InputTransform] = None,
    ):
        r"""A HigherOrderGP model for high-dim output regression.

        Args:
            train_X: A `batch_shape x n x d`-dim tensor of training inputs.
            train_Y: A `batch_shape x n x output_shape`-dim tensor of training targets.
            likelihood: Gaussian likelihood for the model.
            covar_modules: List of kernels for each output structure.
            num_latent_dims: Sizes for the latent dimensions.
            learn_latent_pars: If true, learn the latent parameters.
            latent_init: [default or gp] how to initialize the latent parameters.
        """

        if input_transform is not None:
            input_transform.to(train_X)

        # infer the dimension of `output_shape`.
        num_output_dims = train_Y.dim() - train_X.dim() + 1
        batch_shape = train_X.shape[:-2]
        if len(batch_shape) > 1:
            raise NotImplementedError(
                "HigherOrderGP currently only supports 1-dim `batch_shape`."
            )

        if outcome_transform is not None:
            if isinstance(outcome_transform, Standardize) and not isinstance(
                outcome_transform, FlattenedStandardize
            ):
                warnings.warn(
                    "HigherOrderGP does not support the outcome_transform "
                    "`Standardize`! Using `FlattenedStandardize` with `output_shape="
                    f"{train_Y.shape[- num_output_dims:]} and batch_shape="
                    f"{batch_shape} instead.",
                    RuntimeWarning,
                )
                outcome_transform = FlattenedStandardize(
                    output_shape=train_Y.shape[-num_output_dims:],
                    batch_shape=batch_shape,
                )
            train_Y, _ = outcome_transform(train_Y)

        self._aug_batch_shape = batch_shape
        self._num_dimensions = num_output_dims + 1
        self._num_outputs = train_Y.shape[0] if batch_shape else 1
        self.target_shape = train_Y.shape[-num_output_dims:]
        self._input_batch_shape = batch_shape

        if likelihood is None:

            noise_prior = GammaPrior(1.1, 0.05)
            noise_prior_mode = (noise_prior.concentration - 1) / noise_prior.rate
            likelihood = GaussianLikelihood(
                noise_prior=noise_prior,
                batch_shape=self._aug_batch_shape,
                noise_constraint=GreaterThan(
                    MIN_INFERRED_NOISE_LEVEL,
                    transform=None,
                    initial_value=noise_prior_mode,
                ),
            )
        else:
            self._is_custom_likelihood = True

        super().__init__(
            train_X,
            train_Y.view(*self._aug_batch_shape, -1),
            likelihood=likelihood,
        )

        if covar_modules is not None:
            self.covar_modules = ModuleList(covar_modules)
        else:
            self.covar_modules = ModuleList(
                [
                    MaternKernel(
                        nu=2.5,
                        lengthscale_prior=GammaPrior(3.0, 6.0),
                        batch_shape=self._aug_batch_shape,
                        ard_num_dims=1 if dim > 0 else train_X.shape[-1],
                    )
                    for dim in range(self._num_dimensions)
                ]
            )

        if num_latent_dims is None:
            num_latent_dims = [1] * (self._num_dimensions - 1)

        self.to(train_X.device)

        self._initialize_latents(
            latent_init=latent_init,
            num_latent_dims=num_latent_dims,
            learn_latent_pars=learn_latent_pars,
            device=train_Y.device,
            dtype=train_Y.dtype,
        )

        if outcome_transform is not None:
            self.outcome_transform = outcome_transform
        if input_transform is not None:
            self.input_transform = input_transform
Exemplo n.º 29
0
    def step(self, snapshot_mode: str, meta_info: dict = None):
        if not self.initialized:
            # Start initialization phase
            self.train_init_policies()
            self.eval_init_policies()
            self.initialized = True

        # Normalize the input data and standardize the output data
        cands_norm = self.uc_normalizer.project_to(self.cands)
        cands_values_stdized = standardize(self.cands_values).unsqueeze(1)

        # Create and fit the GP model
        gp = SingleTaskGP(cands_norm, cands_values_stdized)
        gp.likelihood.noise_covar.register_constraint('raw_noise',
                                                      GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)
        print_cbt('Fitted the GP.', 'g')

        # Acquisition functions
        if self.acq_fcn_type == 'UCB':
            acq_fcn = UpperConfidenceBound(gp,
                                           beta=self.acq_param.get(
                                               'beta', 0.1),
                                           maximize=True)
        elif self.acq_fcn_type == 'EI':
            acq_fcn = ExpectedImprovement(
                gp, best_f=cands_values_stdized.max().item(), maximize=True)
        elif self.acq_fcn_type == 'PI':
            acq_fcn = ProbabilityOfImprovement(
                gp, best_f=cands_values_stdized.max().item(), maximize=True)
        else:
            raise pyrado.ValueErr(given=self.acq_fcn_type,
                                  eq_constraint="'UCB', 'EI', 'PI'")

        # Optimize acquisition function and get new candidate point
        cand, acq_value = optimize_acqf(
            acq_function=acq_fcn,
            bounds=to.stack([to.zeros(self.cand_dim),
                             to.ones(self.cand_dim)]),
            q=1,
            num_restarts=self.acq_restarts,
            raw_samples=self.acq_samples)
        next_cand = self.uc_normalizer.project_back(cand)
        print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g')
        self.cands = to.cat([self.cands, next_cand], dim=0)
        to.save(self.cands, osp.join(self._save_dir, 'candidates.pt'))

        # Train and valuate the new candidate (saves to iter_{self._curr_iter}_policy.pt)
        prefix = f'iter_{self._curr_iter}'
        wrapped_trn_fcn = until_thold_exceeded(
            self.thold_succ_subroutine.item(),
            max_iter=self.max_subroutine_rep)(self.train_policy_sim)
        wrapped_trn_fcn(cand, prefix)

        # Evaluate the current policy on the target domain
        policy = to.load(osp.join(self._save_dir, f'{prefix}_policy.pt'))
        self.curr_cand_value = self.eval_policy(self._save_dir, self._env_real,
                                                policy,
                                                self.montecarlo_estimator,
                                                prefix,
                                                self.num_eval_rollouts_real)

        self.cands_values = to.cat(
            [self.cands_values,
             self.curr_cand_value.view(1)], dim=0)
        to.save(self.cands_values,
                osp.join(self._save_dir, 'candidates_values.pt'))

        # Store the argmax after training and evaluating
        curr_argmax_cand = BayRn.argmax_posterior_mean(
            self.cands, self.cands_values.unsqueeze(1), self.uc_normalizer,
            self.acq_restarts, self.acq_samples)
        self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0)
        to.save(self.argmax_cand,
                osp.join(self._save_dir, 'candidates_argmax.pt'))

        self.make_snapshot(snapshot_mode, float(to.mean(self.cands_values)),
                           meta_info)
Exemplo n.º 30
0
    def __init__(self, test_data, args):
        # data buffer, only store training data, test_data will only be stored in GP model before the model is trained
        self.n = 0
        self.data = None
        self.index_list = []

        self.previous_loss = CUDA(torch.tensor(np.inf))
        self.trigger_training = CUDA(torch.tensor(1e-4))

        self.lr = args.lr
        self.state_dim = args.state_dim
        self.action_dim = args.action_dim
        self.input_dim = self.state_dim + self.action_dim
        self.gp_iter = args.gp_iter

        self.normalize_trigger = 1
        self.eps = CUDA(torch.tensor(1e-10))
        self.mu_x = CUDA(torch.zeros((self.input_dim)))
        self.sigma_x = CUDA(torch.ones((self.input_dim)))
        #self.sigma_x[9:12] = CUDA(torch.tensor(10.0))
        #self.sigma_x[12:18] = CUDA(torch.tensor(10.0))
        self.mu_y = CUDA(torch.zeros((self.state_dim)))
        self.sigma_y = CUDA(torch.ones((self.state_dim)))
        #self.sigma_y[9:12] = CUDA(torch.tensor(10.0))
        #self.sigma_y[12:18] = CUDA(torch.tensor(10.0))

        # parameters for inducing GP
        self.max_inducing_point = args.max_inducing_point
        self.trigger_induce = args.trigger_induce
        self.sample_number = args.sample_number

        # prior of the kernel parameters
        # [NOTE] these prior parameters should be similar to the estimated parameters of real data
        # if lengthscale is too large, it will be too difficult to create new components
        # if lengthscale is too small, it will be too esay to create new components
        # if noise_covar is too large, the prediction will be inaccurate
        # if noise_covar is too small, the covariance will be very small, causing some numerical problems
        self.param = CUDA(torch.tensor(args.param))

        # initialize model and likelihood
        model_list = []
        likelihood_list = []
        for m_i in range(self.state_dim):
            likelihood = CUDA(
                gpytorch.likelihoods.GaussianLikelihood(
                    noise_constraint=GreaterThan(self.param[1])))
            model = CUDA(
                SampleGPR(None, None, likelihood, self.input_dim, self.param))
            model.reset_parameters()
            likelihood_list.append(model.likelihood)
            model_list.append(model)

        # initialize model list
        self.model = gpytorch.models.IndependentModelList(*model_list)
        self.likelihood = gpytorch.likelihoods.LikelihoodList(*likelihood_list)

        # initialize optimizer
        self.optimizer = torch.optim.Adam([{
            'params': self.model.parameters()
        }],
                                          lr=self.lr)
        self.mll = gpytorch.mlls.SumMarginalLogLikelihood(
            self.likelihood, self.model)

        # change the flag
        self.model.eval()
        self.likelihood.eval()