示例#1
0
    def __init__(self, dim: int, model_list: list, options: dict) -> None:

        logger.info("Starting EIC ...")

        self.model_list = model_list

        self.dim = dim
        self.Nrestarts = options.optimization.Nrestarts
        self.algo_name = options.optimization.algo_name
        self.constrained_opt = OptimizationNonLinear(
            dim=self.dim,
            fun_obj=self.forward,
            algo_str=self.algo_name,
            bounds=[[0.0] * self.dim, [1.0] * self.dim],
            minimize=False,
            what2optimize_str="EIC acquisition")

        # This is needed to
        self.model_list[idxm['cons']](torch.randn(size=(1, 1, self.dim)))

        # self.use_nlopt = False
        self.disp_info_scipy_opti = options.optimization.disp_info_scipy_opti

        # self._rho_conserv = options.prob_satisfaction
        self.x_next, self.alpha_next = None, None
        self.only_prob = False

        self.x_eta_c = None
        self.eta_c = None
        self.bounds = torch.tensor([[0.0] * self.dim, [1.0] * self.dim],
                                   device=device)

        self.maximize = False  # If tru, we assume we that we want to maximize the objective. Herein, we consider it as cost, hence, we minimize it
    def __init__(self, model: Model, options: dict) -> None:

        # best_f = torch.min(model_list.models[0].train_targets)

        # Initialize parent classes inthe following order:
        ExpectedImprovement.__init__(self,
                                     model=model,
                                     best_f=0.0,
                                     maximize=False)

        AcquisitionBaseTools.__init__(
            self, model=model, Nrestarts_eta=options.optimization.Nrestarts)

        logger.info("Starting EI ...")

        self.dim = model.dim
        self.Nrestarts = options.optimization.Nrestarts
        self.algo_name = options.optimization.algo_name
        self.constrained_opt = OptimizationNonLinear(
            dim=self.dim,
            fun_obj=self.forward,
            algo_str=self.algo_name,
            bounds=[[0.0] * self.dim, [1.0] * self.dim],
            minimize=False,
            what2optimize_str="EI acquisition")
        # self.use_nlopt = False
        self.disp_info_scipy_opti = options.optimization.disp_info_scipy_opti
        self.method = "L-BFGS-B"

        self.x_next, self.alpha_next = None, None
    def __init__(self, model_list: List[Model], constraints,
                 options: dict) -> None:

        # best_f = torch.min(model_list.models[0].train_targets)

        # Initialize parent classes inthe following order:
        ConstrainedExpectedImprovement.__init__(self,
                                                model=model_list,
                                                best_f=0.0,
                                                objective_index=0,
                                                constraints=constraints,
                                                maximize=False)

        AcquisitionBaseToolsConstrained.__init__(
            self,
            model_list=model_list,
            Nrestarts_eta_c=options.optimization.Nrestarts)

        logger.info("Starting EIC ...")

        self.dim = model_list.models[idxm['cons']].dim
        self.Nrestarts = options.optimization.Nrestarts
        self.algo_name = options.optimization.algo_name
        self.constrained_opt = OptimizationNonLinear(
            dim=self.dim,
            fun_obj=self.forward,
            algo_str=self.algo_name,
            bounds=[[0.0] * self.dim, [1.0] * self.dim],
            minimize=False,
            what2optimize_str="EIC acquisition")
        # self.use_nlopt = False
        self.disp_info_scipy_opti = options.optimization.disp_info_scipy_opti

        self._rho_conserv = options.prob_satisfaction
        self.x_next, self.alpha_next = None, None
        self.only_prob = False
class ExpectedImprovementWithConstraints(AcquisitionBaseToolsConstrained,
                                         ConstrainedExpectedImprovement):
    def __init__(self, model_list: List[Model], constraints,
                 options: dict) -> None:

        # best_f = torch.min(model_list.models[0].train_targets)

        # Initialize parent classes inthe following order:
        ConstrainedExpectedImprovement.__init__(self,
                                                model=model_list,
                                                best_f=0.0,
                                                objective_index=0,
                                                constraints=constraints,
                                                maximize=False)

        AcquisitionBaseToolsConstrained.__init__(
            self,
            model_list=model_list,
            Nrestarts_eta_c=options.optimization.Nrestarts)

        logger.info("Starting EIC ...")

        self.dim = model_list.models[idxm['cons']].dim
        self.Nrestarts = options.optimization.Nrestarts
        self.algo_name = options.optimization.algo_name
        self.constrained_opt = OptimizationNonLinear(
            dim=self.dim,
            fun_obj=self.forward,
            algo_str=self.algo_name,
            bounds=[[0.0] * self.dim, [1.0] * self.dim],
            minimize=False,
            what2optimize_str="EIC acquisition")
        # self.use_nlopt = False
        self.disp_info_scipy_opti = options.optimization.disp_info_scipy_opti

        self._rho_conserv = options.prob_satisfaction
        self.x_next, self.alpha_next = None, None
        self.only_prob = False

        # pdb.set_trace()

    @property
    def rho_conserv(self):
        return self._rho_conserv

    # @t_batch_mode_transform(expected_q=1)
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Constrained Expected Improvement on the candidate set X.

		Args:
			X: A `(b) x 1 x d`-dim Tensor of `(b)` t-batches of `d`-dim design
				points each.

		Returns:
			A `(b)`-dim Tensor of Expected Improvement values at the given
				design points `X`.
		"""
        # import pdb; pdb.set_trace()

        if X.dim() == 1:
            X = X.view(1, self.dim)

        means, sigmas = self._get_posterior_reimplemented(X)

        # (b) x 1
        mean_obj = means[..., [self.objective_index]]
        sigma_obj = sigmas[..., [self.objective_index]]

        # print("mean_obj.shape:",mean_obj.shape)
        # print("sigma_obj.shape:",sigma_obj.shape)
        # print("means.shape:",means.shape)
        # print("sigmas.shape:",sigmas.shape)

        # Probability of feasibility:
        prob_feas = self._compute_prob_feas(X=X, means=means, sigmas=sigmas)

        # print("prob_feas.shape:",prob_feas.shape)

        if self.only_prob:
            ei_times_prob = prob_feas  # Use only the probability of feasibility
        else:
            u = (mean_obj - self.best_f.expand_as(mean_obj)) / sigma_obj
            if not self.maximize:
                u = -u
            normal = Normal(
                torch.zeros(1, device=u.device, dtype=u.dtype),
                torch.ones(1, device=u.device, dtype=u.dtype),
            )
            ei_pdf = torch.exp(normal.log_prob(u))  # (b) x 1
            ei_cdf = normal.cdf(u)
            ei = sigma_obj * (ei_pdf + u * ei_cdf)
            ei_times_prob = ei.mul(prob_feas)

        # print("ei_times_prob.shape:",ei_times_prob.shape)

        val = ei_times_prob.squeeze(dim=-1)
        if val.dim() == 1 and len(val) == 1 or val.dim() == 0:
            val = val.item()
        # else:
        # 	pdb.set_trace()

        # print("X.shape:",X.shape)
        # print("val:",val)

        return val

    def _get_posterior_reimplemented(self, X: Tensor) -> Tensor:

        # Objective is assumed to be in the index 0. Constraints in the rest
        # Objective is assumed to be a classireg.models.GPmodel object
        # Constraints are assumed to be a classireg.models.GPCRmodel object

        means = torch.zeros([X.shape[0], self.model_list.num_outputs])
        sigmas = torch.zeros([X.shape[0], self.model_list.num_outputs])
        # pdb.set_trace()
        for k in range(self.model_list.num_outputs):
            means[...,
                  k] = self.model_list.models[k].posterior(X).mean.squeeze()
            sigmas[..., k] = self.model_list.models[k].posterior(
                X).variance.squeeze().sqrt().clamp_min(1e-9)  # (b) x m
            # means[...,k] 	= self.model_list.models[k].posterior(X.view(1,self.dim)).mean.squeeze()
            # sigmas[...,k] = self.model_list.models[k].posterior(X.view(1,self.dim)).variance.squeeze().sqrt().clamp_min(1e-9)  # (b) x m

        return means, sigmas

    def get_best_constrained_evaluation(self):

        Ycons_safe = self.model.models[idxm['cons']].train_ys
        Yobj_safe = self.model.models[idxm[
            'obj']].train_targets  # Since we don't include the non-stable evaluations in GPCR, the safe evaluations are the evaluations themselves
        if len(Ycons_safe) > 0:
            return torch.min(Yobj_safe).view(1)
        else:
            return None

    def get_next_point(self) -> (Tensor, Tensor):

        if self.model.models[
                idxm["obj"]].train_targets is None:  # No safe evaluations case
            self.eta_c = torch.zeros(1, device=device, dtype=dtype)
            self.x_eta_c = torch.zeros((1, self.dim),
                                       device=device,
                                       dtype=dtype)
            self.only_prob = True
        else:

            # The following functions need to be called in the given order:
            try:
                self.update_eta_c(
                    rho_t=self.rho_conserv
                )  # Update min_x mu(x|D) s.t. Pr(g(x) <= 0) > rho_t
            except Exception as inst:
                logger.info("Exception (!) type: {0:s} | args: {1:s}".format(
                    str(type(inst)), str(inst.args)))
                logger.info("Not optimizing eta_c ...")

            # self.best_f = self.eta_c
            self.best_f = self.get_best_constrained_evaluation(
            ) - self.model.models[idxm["obj"]].likelihood.noise.sqrt()[0].view(
                1)
            self.only_prob = False

        self.x_next, self.alpha_next = self.get_acqui_fun_maximizer()

        # Prevent from getting stuck into global minima:
        close_points, _ = self.model_list.models[
            idxm["cons"]]._identify_stable_close_to_unstable(
                X_sta=self.x_next.cpu().numpy(),
                X_uns=self.model_list.models[
                    idxm["cons"]].train_x_sorted.cpu().numpy(),
                top_dist=math.sqrt(self.dim) * 0.005,
                verbosity=False)
        if len(close_points) > 0:
            logger.info(
                "Changed the evaluation to random as it was very close to an existing evaluation, within math.sqrt(self.dim)*0.005 = {0:f}"
                .format(math.sqrt(self.dim) * 0.005))
            self.x_next = draw_sobol_samples(bounds=torch.Tensor(
                [[0.0] * self.dim, [1.0] * self.dim]),
                                             n=1,
                                             q=1).view(-1, self.dim)

        if self.x_next is not None and self.alpha_next is not None:
            logger.info(
                "xnext: " +
                str(self.x_next.view((1, self.dim)).detach().cpu().numpy()))
            logger.info("alpha_next: {0:2.2f}".format(self.alpha_next.item()))
        else:
            logger.info("xnext: None")
            logger.info("alpha_next: None")

        logger.info("self.x_eta_c: " + str(self.x_eta_c))
        logger.info("self.eta_c: " + str(self.eta_c))
        logger.info("self.best_f: " + str(self.best_f))

        return self.x_next, self.alpha_next

    def get_acqui_fun_maximizer(self):

        logger.info(
            "Computing next candidate by maximizing the acquisition function ..."
        )
        batch_limit = 2
        # batch_limit = 50 # This is a super bad idea for GPCR.
        options = {
            "batch_limit": batch_limit,
            "maxiter": 300,
            "ftol": 1e-6,
            "method": "L-BFGS-B",
            "iprint": 2,
            "maxls": 20,
            "disp": self.disp_info_scipy_opti
        }

        # Get initial random restart points:
        logger.info("Generating random restarts ...")
        initial_conditions = gen_batch_initial_conditions(
            acq_function=self,
            bounds=self.bounds,
            q=1,
            num_restarts=self.Nrestarts,
            raw_samples=500,
            options=options)
        # logger.info("initial_conditions:" + str(initial_conditions))

        logger.info("Using nlopt ...")
        x_next, alpha_next = self.constrained_opt.run_optimization(
            initial_conditions.view((self.Nrestarts, self.dim)))

        # # TODO: Is this really needed?
        # prob_val = self.get_probability_of_safe_evaluation(x_next.unsqueeze(1))
        # if prob_val < self.rho_conserv:
        # 	logger.info("(Is this really needed????) x_next violates the probabilistic constraint...")
        # 	pdb.set_trace()

        logger.info("Done!")

        return x_next, alpha_next

    def get_probability_of_safe_evaluation(self, X: Tensor) -> Tensor:
        """

		Code borrowed from botorch.acquisition.analytic.ConstrainedExpectedImprovement.forward()
		"""

        # posterior = super()._get_posterior(X=X)
        # means = posterior.mean.squeeze(dim=-2)  # (b) x m
        # sigmas = posterior.variance.squeeze(dim=-2).sqrt().clamp_min(1e-9)  # (b) x m

        means, sigmas = self._get_posterior_reimplemented(X)
        prob_feas = self._compute_prob_feas(X=X, means=means, sigmas=sigmas)

        return prob_feas

    def update_eta_c(self, rho_t):
        """
		Search the constrained minimum of the posterior mean, i.e.,
		min_x mu(x|D) s.t. Pr(g(x) <= 0) > rho_t
		If no safe area has been found yet, return the best obserbation of f(x) collected so far.
		
		NOTE: Normally, rho_t should be set to the conservative (safe) value, e.g., rho_t = 0.99
		"""

        if self._does_exist_at_least_one_safe_area(rho_t):
            self.x_eta_c, self.eta_c = self.find_eta_c(rho_t)
        elif self.model.models[1].train_xs.shape[
                0] > 0:  # If there exists a safe evaluation but not a safe area:
            self.x_eta_c, self.eta_c = self.find_eta_c(0.0)
        else:
            self.x_eta_c, self.eta_c = None, None
        # else:
        # 	ind_min = torch.argmin(self.model_list.models[idxm['obj']].train_targets)
        # 	self.x_eta_c = self.model_list.models[idxm['obj']].train_inputs[0][ind_min,:].view((1,self.dim))
        # 	self.eta_c = self.model_list.models[idxm['obj']].train_targets[ind_min].view(1)

    def _does_exist_at_least_one_safe_area(self, rho_t):
        """
		Check if at least one of the collected evaluations of the constraint is such that the probabilistic constraint is satisfied.
		If not, we can be sure the constraint is violated everywhere, and it won't make sense to run self.find_eta_c(rho_t)
		
		NOTE: Normally, rho_t should be set to the conservative (safe) value, e.g., rho_t = 0.99
		"""

        train_inputs = self.model_list.models[idxm['cons']].train_inputs[0]
        prob_feas = self.get_probability_of_safe_evaluation(train_inputs)
        exist_safe_areas = torch.any(prob_feas > rho_t)
        return exist_safe_areas
    def __init__(self,
                 dim: int,
                 train_X: Tensor,
                 train_Y: Tensor,
                 options: dict,
                 which_type: Optional[str] = "obj") -> None:

        self.dim = dim

        if len(train_Y) == 0:  # No data case
            train_X = None
            train_Y = None
        else:
            # Error checking:
            assert train_Y.dim() == 1, "train_Y is required to be 1D"
            self._validate_tensor_args(
                X=train_X, Y=train_Y[:, None]
            )  # Only for this function, train_Y must be 2D (this must be a bug in botorch)

        print("\n")
        logger.info("### Initializing GP model for objective f(x) ###")

        # Likelihood:
        noise_std = options.hyperpars.noise_std.value
        if train_Y is not None:
            lik = FixedNoiseGaussianLikelihood(
                noise=torch.full_like(train_Y, noise_std**2))
        else:
            lik = FixedNoiseGaussianLikelihood(
                noise=torch.tensor([noise_std**2], device=device, dtype=dtype))

        # Initialize parent class:
        super().__init__(train_X, train_Y, lik)

        # # Obtain hyperprior for lengthscale and outputscale:
        # # NOTE: The mean (zero) and the model noise are fixed
        # lengthscale_prior, outputscale_prior = extract_prior(options.hyperpriors)

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Initialize prior mean:
        # self.mean_module = ConstantMean()
        self.mean_module = ZeroMean()

        # Initialize covariance function:
        # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original
        # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original
        # base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2))
        base_kernel = MaternKernel(nu=2.5,
                                   ard_num_dims=self.dim,
                                   lengthscale=0.1 * torch.ones(self.dim))
        self.covar_module = ScaleKernel(base_kernel=base_kernel)

        self.disp_info_scipy_opti = True
        # self.method = "L-BFGS-B"
        self.method = "LN_BOBYQA"
        # self.method = 'trust-constr'

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        # Initialize marginal log likelihood for the GPCR model.
        # mll_objective is callable
        # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine
        self.mll_objective = MLLGP(model_gp=self,
                                   likelihood_gp=self.likelihood,
                                   hyperpriors=hyperpriors)

        # Define nlopt optimizer:
        self.opti_hyperpars = OptimizationNonLinear(
            dim=self.dim_hyperpars,
            fun_obj=self.mll_objective,
            algo_str=self.method,
            tol_x=1e-4,
            Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals,
            bounds=self.hyperpars_bounds,
            what2optimize_str="GP hyperparameters")

        # Make sure we're on the right device/dtype
        if train_Y is not None:
            self.to(train_X)

        self.Nrestarts = options.hyperpars.optimization.Nrestarts

        self._update_hyperparameters()

        self.eval()
class GPmodel(ExactGP, GPyTorchModel):

    _num_outputs = 1  # to inform GPyTorchModel API

    def __init__(self,
                 dim: int,
                 train_X: Tensor,
                 train_Y: Tensor,
                 options: dict,
                 which_type: Optional[str] = "obj") -> None:

        self.dim = dim

        if len(train_Y) == 0:  # No data case
            train_X = None
            train_Y = None
        else:
            # Error checking:
            assert train_Y.dim() == 1, "train_Y is required to be 1D"
            self._validate_tensor_args(
                X=train_X, Y=train_Y[:, None]
            )  # Only for this function, train_Y must be 2D (this must be a bug in botorch)

        print("\n")
        logger.info("### Initializing GP model for objective f(x) ###")

        # Likelihood:
        noise_std = options.hyperpars.noise_std.value
        if train_Y is not None:
            lik = FixedNoiseGaussianLikelihood(
                noise=torch.full_like(train_Y, noise_std**2))
        else:
            lik = FixedNoiseGaussianLikelihood(
                noise=torch.tensor([noise_std**2], device=device, dtype=dtype))

        # Initialize parent class:
        super().__init__(train_X, train_Y, lik)

        # # Obtain hyperprior for lengthscale and outputscale:
        # # NOTE: The mean (zero) and the model noise are fixed
        # lengthscale_prior, outputscale_prior = extract_prior(options.hyperpriors)

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Initialize prior mean:
        # self.mean_module = ConstantMean()
        self.mean_module = ZeroMean()

        # Initialize covariance function:
        # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original
        # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original
        # base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2))
        base_kernel = MaternKernel(nu=2.5,
                                   ard_num_dims=self.dim,
                                   lengthscale=0.1 * torch.ones(self.dim))
        self.covar_module = ScaleKernel(base_kernel=base_kernel)

        self.disp_info_scipy_opti = True
        # self.method = "L-BFGS-B"
        self.method = "LN_BOBYQA"
        # self.method = 'trust-constr'

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        # Initialize marginal log likelihood for the GPCR model.
        # mll_objective is callable
        # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine
        self.mll_objective = MLLGP(model_gp=self,
                                   likelihood_gp=self.likelihood,
                                   hyperpriors=hyperpriors)

        # Define nlopt optimizer:
        self.opti_hyperpars = OptimizationNonLinear(
            dim=self.dim_hyperpars,
            fun_obj=self.mll_objective,
            algo_str=self.method,
            tol_x=1e-4,
            Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals,
            bounds=self.hyperpars_bounds,
            what2optimize_str="GP hyperparameters")

        # Make sure we're on the right device/dtype
        if train_Y is not None:
            self.to(train_X)

        self.Nrestarts = options.hyperpars.optimization.Nrestarts

        self._update_hyperparameters()

        self.eval()

    def set_hyperparameters(self, lengthscale, outputscale, noise):
        self.covar_module.base_kernel.lengthscale = lengthscale
        self.covar_module.outputscale = outputscale
        self.likelihood.noise[:] = noise
        # self.mean_module.constant[:] = 0.0 # Assume zero mean

    def display_hyperparameters(self):
        logger.info("  Re-optimized hyperparameters")
        logger.info("  ----------------------------")
        logger.info("    Outputscale (stddev) | {0:2.4f}".format(
            self.covar_module.outputscale.item()))
        logger.info("    Lengthscale(s)       | " +
                    str(self.covar_module.base_kernel.lengthscale.detach().cpu(
                    ).numpy().flatten()))

    def logging(self):
        log_out = dict()
        log_out[
            "lengthscale"] = self.covar_module.base_kernel.lengthscale.detach(
            ).cpu().numpy()
        log_out["outputscale"] = self.covar_module.outputscale.item()
        log_out["noise"] = self.likelihood.noise.detach().cpu().numpy()
        log_out[
            "train_inputs"] = None if self.train_inputs is None else self.train_inputs[
                0].detach().cpu().numpy()
        log_out[
            "train_targets"] = None if self.train_targets is None else self.train_targets.detach(
            ).cpu().numpy()

        return log_out

    def _update_hyperparameters(self, Nrestarts=5):

        if self.train_targets is None:  # No data case
            return

        self.train()

        logger.info("Fitting GP model f(x) ...")
        logger.info("-------------------------")

        # Get random restarts:
        x_restarts = self._sample_hyperparameters_within_bounds(
            Nsamples=Nrestarts)
        # logger.info("x_restarts:" + str(x_restarts))

        # Store current hyperparameters, just in case the optimization below fails:
        outputscale = self.covar_module.outputscale.view(1)
        lengthscale = self.covar_module.base_kernel.lengthscale.flatten()

        try:
            # Compute new hyperparameters:
            new_hyperpars = self._optimize_acqui_use_restarts_individually(
            ).flatten()

            # Assign hyperparameters and compute loss:
            loss_new_hyperpars = self.mll_objective(new_hyperpars)
            logger.info("  Loss (new hyperparameters): {0:f}".format(
                loss_new_hyperpars.item()))
            self.display_hyperparameters()
        except Exception as inst:
            logger.info("  Exception (!) type: {0:s} | args: {1:s}".format(
                str(type(inst)), str(inst.args)))
            logger.info(
                "  Hyperparameter optimization failed (!!) Keeping the old ones ..."
            )
            loss_old_hyperpars = self.mll_objective.log_marginal(
                lengthscale, outputscale)
            logger.info("  Loss (old hyperparameters): {0:f}".format(
                loss_old_hyperpars.item()))
            self.display_hyperparameters()

    def _optimize_acqui_use_restarts_individually(self):

        # Get initial random restart points:
        logger.info("  Generating random restarts ...")
        options = {
            "maxiter": 200,
            "ftol": 1e-9,
            "method": "L-BFGS-B",
            "iprint": 2,
            "maxls": 20,
            "disp": self.disp_info_scipy_opti
        }
        bounds = torch.tensor(self.hyperpars_bounds,
                              device=device,
                              dtype=dtype)
        initial_conditions = gen_batch_initial_conditions(
            acq_function=self.mll_objective,
            bounds=bounds,
            q=1,
            num_restarts=self.Nrestarts,
            raw_samples=500,
            options=options)

        logger.info(
            "  Optimizing loss function with {0:d} restarts ...".format(
                self.Nrestarts))
        new_hyperpars_many = torch.zeros(size=(self.Nrestarts, 1,
                                               self.dim_hyperpars))
        new_hyperpars_loss_many = torch.zeros(size=(self.Nrestarts, ))

        new_hyperpars, _ = self.opti_hyperpars.run_optimization(
            x_restarts=initial_conditions.view(self.Nrestarts,
                                               self.dim_hyperpars))

        logger.info("  Done!")

        return new_hyperpars

    def _get_hyperparameters_bounds(self, hyperpriors):

        # Compute the domain for hyperparameter search by truncating the support of the corresponding hyperprior at the .75 quantile
        # The lower bound is necessary for numerical stability, i.e., when computing logpdf() in classireg.models.mll_gpcr.log_marginal()
        # All values of the dictionary are defined as double lists
        hyperpriors_support = dict(
            lengthscales=[[0.05] * self.dim,
                          [hyperpriors["lengthscales"].ppf(.75)] * self.dim],
            outputscale=[[0.05], [hyperpriors["outputscale"].ppf(.75)]])

        # Automatically get the bounds from the dictionary:
        hyperpars_lb = []
        hyperpars_ub = []
        for hyperpar in hyperpriors_support.values():
            hyperpars_lb += hyperpar[0]
            hyperpars_ub += hyperpar[1]
        hyperpars_bounds = [hyperpars_lb, hyperpars_ub]

        return hyperpars_bounds

    def _sample_hyperparameters_within_bounds(self, Nsamples):

        # Get a sample from the prior for initialization:
        new_seed = torch.randint(low=0, high=100000, size=(1, )).item(
        )  # Top-level seeds have an impact on this one herein; contrary to the case new_seed = None
        hyperpars_restarts = draw_sobol_samples(bounds=torch.tensor(
            self.hyperpars_bounds),
                                                n=Nsamples,
                                                q=1,
                                                seed=new_seed)
        hyperpars_restarts = hyperpars_restarts.squeeze(
            1)  # Remove batch dimension [n q dim] -> [n dim]

        return hyperpars_restarts

    def forward(self, x):

        # A `num_restarts x q x d` tensor of initial conditions.

        # print("")
        # print("x.shape:",x.shape)
        # print("x:",x)

        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)

        mvn = MultivariateNormal(mean_x, covar_x)
        # print("mean_x.shape:"+str(mean_x.shape))
        # print("covar_x.shape:"+str(covar_x.shape))
        # print("mvn.batch_shape: "+str(mvn.batch_shape))
        # print("mvn.event_shape: "+str(mvn.event_shape))

        return mvn

    def plot(self,
             axes=None,
             block=False,
             Ndiv=100,
             legend=True,
             title="GPgrad",
             plotting=True,
             plotCDF=False,
             clear_axes=False,
             Nsamples=None,
             ylabel=None,
             ylim=None,
             pause=None,
             showtickslabels_x=True):
        '''
		This function hardcodes the plotting limits between zero and one for now
		'''
        if plotting == False or self.dim > 1:
            return

        pp = PlotProbability()
        xpred_vec = torch.linspace(0.0, 1.0, Ndiv)[:, None]
        xpred_vec = xpred_vec.unsqueeze(
            0)  # Ndiv batches of [q=1 x self.dim] dimensions each

        # Predict:
        posterior = self.posterior(xpred_vec)

        # Get upper and lower confidence bounds (2 standard deviations from the mean):
        lower_ci, upper_ci = posterior.mvn.confidence_region()

        # Posterior mean:
        mean_vec = posterior.mean

        if self.dim == 1:
            axes = pp.plot_GP_1D(
                xpred_vec=xpred_vec.squeeze().cpu().numpy(),
                fpred_mode_vec=mean_vec.squeeze().detach().cpu().numpy(),
                fpred_quan_minus=lower_ci.squeeze().detach().cpu().numpy(),
                fpred_quan_plus=upper_ci.squeeze().detach().cpu().numpy(),
                X_sta=None if self.train_inputs is None else
                self.train_inputs[0].detach().cpu().numpy(),
                Y_sta=None if self.train_targets is None else
                self.train_targets.detach().cpu().numpy(),
                title=title,
                axes=axes,
                block=block,
                legend=legend,
                clear_axes=True,
                xlabel=None,
                ylabel=ylabel,
                xlim=np.array([0., 1.]),
                ylim=ylim,
                labelsize="x-large",
                legend_loc="best",
                colormap="paper",
                showtickslabels_x=showtickslabels_x)

            if Nsamples is not None:
                f_sample = posterior.sample(
                    sample_shape=torch.Size([Nsamples]))
                for k in range(Nsamples):
                    axes.plot(xpred_vec.squeeze().detach().cpu().numpy(),
                              f_sample[k, 0, :, 0],
                              linestyle="--",
                              linewidth=1.0,
                              color="sienna")

        elif self.dim == 2:
            pass

        plt.show(block=block)
        if pause is not None:
            plt.pause(pause)

        return axes
    def __init__(self, dim: int, train_x: Tensor, train_yl: Tensor, options):
        """
			train_X: A `batch_shape x n x d` tensor of training features.
			train_Y: A `batch_shape x n x m` tensor of training observations.
			train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise.
		"""

        # Initialize parent class:
        super().__init__(
        )  # This is needed because torch.nn.Module, which is parent of GPyTorchModel, needs it

        print("\n")
        logger.info("### Initializing GPCR model for constraint g(x) ###")

        self.discard_too_close_points = options.discard_too_close_points

        self.dim = dim
        assert self.dim == train_x.shape[
            1], "The input dimension must agree with train_x"
        self.train_x = torch.tensor([],
                                    device=device,
                                    dtype=dtype,
                                    requires_grad=False)
        self.train_yl = torch.tensor([],
                                     device=device,
                                     dtype=dtype,
                                     requires_grad=False)
        self.update_XY(train_x, train_yl)

        # One output
        # ==========
        # pdb.set_trace()
        self._validate_tensor_args(X=self.train_xs,
                                   Y=self.train_ys.view(-1, 1))
        # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
        self._set_dimensions(train_X=self.train_xs,
                             train_Y=self.train_ys.view(-1, 1))
        # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys)

        # # Two outputs
        # # ===========
        # # pdb.set_trace()
        # self._validate_tensor_args(X=self.train_xs, Y=self.train_yl)
        # # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
        # self._set_dimensions(train_X=self.train_xs, train_Y=self.train_yl)
        # # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys)

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior),
            threshold=eval(options.hyperpars.threshold.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim],
                                  threshold=[self.dim + 1])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Define meand and covariance modules with dummy hyperparameters
        self.mean_module = ZeroMean()
        self.covar_module = ScaleKernel(base_kernel=MaternKernel(
            nu=2.5,
            ard_num_dims=self.dim,
            lengthscale=0.1 * torch.ones(self.dim)),
                                        outputscale=10.0)

        # # If non-zero mean, constant mean is assumed:
        # if "constant" in dir(self.mean_module):
        # 	self.__threshold = self.mean_module.constant
        # else:
        # 	self.__threshold = 0.0

        # If non-zero mean, constant mean is assumed:
        if "constant" in dir(self.mean_module):
            self.__threshold = self.mean_module.constant
            self.thres_init = self.mean_module.constant
        else:
            self.__threshold = options.hyperpars.threshold.init
            self.thres_init = options.hyperpars.threshold.init

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        print("self.covar_module.outputscale:",
              str(self.covar_module.outputscale))
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.threshold = hyperpars_sample[self.idx_hyperpars["threshold"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        self.gauss_tools = GaussianTools()

        # Initialize EP
        self.ep = ExpectationPropagation(
            prior_mean=self.mean_module(train_x).cpu().detach().numpy(),
            prior_cov=self.covar_module(train_x).cpu().detach().numpy(),
            Maxiter=options.ep.maxiter,
            required_precission=options.ep.prec,
            verbosity=options.ep.verbo)

        # Initialize marginal log likelihood for the GPCR model.
        # mll_objective is callable
        # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine
        self.mll_objective = MLLGPCR(model_gpcr=self, hyperpriors=hyperpriors)

        # Define nlopt optimizer:
        self.opti = OptimizationNonLinear(
            dim=self.dim_hyperpars,
            fun_obj=self.mll_objective,
            algo_str=options.hyperpars.optimization.algo_name,
            tol_x=1e-3,
            Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals,
            bounds=self.hyperpars_bounds,
            what2optimize_str="GPCR hyperparameters")

        # Extra parameters:
        self.top_dist_ambiguous_points = 0.5 * torch.min(
            self.covar_module.base_kernel.lengthscale).item()
        self.factor_heteroscedastic_noise = 10**4

        # Update hyperparameters:
        self.Nrestarts_hyperpars = options.hyperpars.optimization.Nrestarts
        self._update_hyperparameters(Nrestarts=self.Nrestarts_hyperpars)

        # self.likelihood = FixedNoiseGaussianLikelihood(noise=torch.eye())
        self.likelihood = None
class GPCRmodel(BatchedMultiOutputGPyTorchModel, GP):
    def __init__(self, dim: int, train_x: Tensor, train_yl: Tensor, options):
        """
			train_X: A `batch_shape x n x d` tensor of training features.
			train_Y: A `batch_shape x n x m` tensor of training observations.
			train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise.
		"""

        # Initialize parent class:
        super().__init__(
        )  # This is needed because torch.nn.Module, which is parent of GPyTorchModel, needs it

        print("\n")
        logger.info("### Initializing GPCR model for constraint g(x) ###")

        self.discard_too_close_points = options.discard_too_close_points

        self.dim = dim
        assert self.dim == train_x.shape[
            1], "The input dimension must agree with train_x"
        self.train_x = torch.tensor([],
                                    device=device,
                                    dtype=dtype,
                                    requires_grad=False)
        self.train_yl = torch.tensor([],
                                     device=device,
                                     dtype=dtype,
                                     requires_grad=False)
        self.update_XY(train_x, train_yl)

        # One output
        # ==========
        # pdb.set_trace()
        self._validate_tensor_args(X=self.train_xs,
                                   Y=self.train_ys.view(-1, 1))
        # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
        self._set_dimensions(train_X=self.train_xs,
                             train_Y=self.train_ys.view(-1, 1))
        # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys)

        # # Two outputs
        # # ===========
        # # pdb.set_trace()
        # self._validate_tensor_args(X=self.train_xs, Y=self.train_yl)
        # # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
        # self._set_dimensions(train_X=self.train_xs, train_Y=self.train_yl)
        # # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys)

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior),
            threshold=eval(options.hyperpars.threshold.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim],
                                  threshold=[self.dim + 1])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Define meand and covariance modules with dummy hyperparameters
        self.mean_module = ZeroMean()
        self.covar_module = ScaleKernel(base_kernel=MaternKernel(
            nu=2.5,
            ard_num_dims=self.dim,
            lengthscale=0.1 * torch.ones(self.dim)),
                                        outputscale=10.0)

        # # If non-zero mean, constant mean is assumed:
        # if "constant" in dir(self.mean_module):
        # 	self.__threshold = self.mean_module.constant
        # else:
        # 	self.__threshold = 0.0

        # If non-zero mean, constant mean is assumed:
        if "constant" in dir(self.mean_module):
            self.__threshold = self.mean_module.constant
            self.thres_init = self.mean_module.constant
        else:
            self.__threshold = options.hyperpars.threshold.init
            self.thres_init = options.hyperpars.threshold.init

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        print("self.covar_module.outputscale:",
              str(self.covar_module.outputscale))
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.threshold = hyperpars_sample[self.idx_hyperpars["threshold"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        self.gauss_tools = GaussianTools()

        # Initialize EP
        self.ep = ExpectationPropagation(
            prior_mean=self.mean_module(train_x).cpu().detach().numpy(),
            prior_cov=self.covar_module(train_x).cpu().detach().numpy(),
            Maxiter=options.ep.maxiter,
            required_precission=options.ep.prec,
            verbosity=options.ep.verbo)

        # Initialize marginal log likelihood for the GPCR model.
        # mll_objective is callable
        # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine
        self.mll_objective = MLLGPCR(model_gpcr=self, hyperpriors=hyperpriors)

        # Define nlopt optimizer:
        self.opti = OptimizationNonLinear(
            dim=self.dim_hyperpars,
            fun_obj=self.mll_objective,
            algo_str=options.hyperpars.optimization.algo_name,
            tol_x=1e-3,
            Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals,
            bounds=self.hyperpars_bounds,
            what2optimize_str="GPCR hyperparameters")

        # Extra parameters:
        self.top_dist_ambiguous_points = 0.5 * torch.min(
            self.covar_module.base_kernel.lengthscale).item()
        self.factor_heteroscedastic_noise = 10**4

        # Update hyperparameters:
        self.Nrestarts_hyperpars = options.hyperpars.optimization.Nrestarts
        self._update_hyperparameters(Nrestarts=self.Nrestarts_hyperpars)

        # self.likelihood = FixedNoiseGaussianLikelihood(noise=torch.eye())
        self.likelihood = None

        # nearest_points: [array([0, 0]), array([1, 1]), array([2, 2]), array([3, 3]), array([4, 4]), array([5, 5]), array([6, 6]), array([7, 7]), array([8, 8]), array([ 9,  9, 12, 14]), array([10, 10]), array([11, 11]), array([12,  9, 12, 14, 15, 16]), array([13, 13]), array([14,  9, 12, 14, 15, 16]), array([15, 12, 14, 15, 16]), array([16, 12, 14, 15, 16])]
        # [classireg.acquisitions.acquisition_base_cons] Starting AcquisitionBaseTools ...

        # Keep for compatibility with BOtorch acquisition functions:
        # pdb.set_trace()
        # _replace(v=node.v)
        # self.num_outputs = 1

        # self.eval()

    @property
    def threshold(self):
        return self.__threshold

    @threshold.setter
    def threshold(self, value):
        """

		TODO/NOTE: This function adds the desired threshold value to another pre-existing value.
		This makes sense when self.threshold is set as the result of hyperparameter optimization,
		since therein the quantity learned is an increment over torch.max(self.train_ys).
		However, in general, setting self.threshold manually is a BAD idea. This
		should be changed.
		"""
        if len(self.train_ys) > 0:  # If there exist safe evaluations
            self.__threshold = torch.max(self.train_ys) + value
        else:
            # self.__threshold = value
            self.__threshold = self.thres_init + value

    @threshold.getter
    def threshold(self):
        return self.__threshold

    def forward(self, x_in: Tensor) -> MultivariateNormal:
        """

		This method is not strictly needed, because we won't train this model
		as a NN, nor use autograd for it. However, it's left here for compatibility
		and also used in a few places.
		"""

        return MultivariateNormal(mean=self.mean_module(x_in),
                                  covariance_matrix=self.covar_module(x_in))

    # @t_batch_mode_transform(expected_q=1)
    def predictive(self, x_in):

        # A `num_restarts x q x d` tensor of initial conditions.

        # print("")
        # print("x_in.shape: "+str(x_in.shape))
        # mean_shape = x_in.shape[:-1]
        x_in = self._error_checking_x_in(x_in)
        # # print("x_in.shape: "+str(/x_in.shape))
        # print("x_in:",x_in)

        if self.train_x_sorted.shape[0] == 0:  # No data case
            return self.forward(x_in)
        else:
            with torch.no_grad():
                k_Xxp = self.covar_module(self.train_x_sorted, x_in).evaluate()
                k_xpxp = self.covar_module(x_in).evaluate()
                # K_XX_inv_k_Xxp = torch.solve(input=k_Xxp,A=self.Kprior_cov.evaluate() + 1e-6*torch.eye(self.train_x_sorted.shape[0]))[0]
                K_XX_inv_k_Xxp = torch.solve(input=k_Xxp,
                                             A=self.Kprior_cov.evaluate())[0]

                # mean_pred = K_XX_inv_k_Xxp.T.dot(self.expectation_posterior)
                # mean_pred = torch.matmul(K_XX_inv_k_Xxp.t(),self.expectation_posterior[:,None])
                mean_pred = torch.matmul(K_XX_inv_k_Xxp.t(),
                                         self.expectation_posterior)
                # cov_pred 	= k_xpxp - k_Xxp.T.dot(K_XX_inv_k_Xxp) + K_XX_inv_k_Xxp.T.dot(self.covariance_posterior).dot(K_XX_inv_k_Xxp)
                # cov_pred 	= k_xpxp - torch.matmul(k_Xxp.t(),K_XX_inv_k_Xxp) + torch.chain_matmul(K_XX_inv_k_Xxp.t(),self.covariance_posterior,K_XX_inv_k_Xxp)
                cov_pred = k_xpxp - torch.matmul(
                    k_Xxp.t(), K_XX_inv_k_Xxp) + torch.matmul(
                        K_XX_inv_k_Xxp.t(),
                        torch.matmul(self.covariance_posterior,
                                     K_XX_inv_k_Xxp))
                # cov_pred 	= k_xpxp - torch.matmul(k_Xxp.t(),K_XX_inv_k_Xxp) + torch.matmul(K_XX_inv_k_Xxp.t(),torch.matmul(self.covariance_posterior+1e-5*torch.eye(self.train_x_sorted.shape[0]),K_XX_inv_k_Xxp))

                cov_pred_numpy = cov_pred.cpu().numpy()
                # cov_pred_numpy = self.gauss_tools.fix_singular_matrix(cov_pred_numpy,verbosity=False,what2fix="Fixing prior cov...") # DBG: TEMPORARY TRIAL; NOT ADDING NOISE
                cov_pred = torch.from_numpy(cov_pred_numpy).to(device=device,
                                                               dtype=dtype)
                # pdb.set_trace()
                # cov_pred += 1e-4*torch.eye(cov_pred.shape[0])
        """
		Re-shape mean
		
		TODO: This might not be needed anymore, since we're using _get_posterior_reimplemented in EIC
		"""
        if x_in.dim() == 3:
            batch_shape = torch.Size([1])
        elif x_in.dim() == 2:
            batch_shape = torch.Size([])
        else:
            raise ValueError("No way")
        test_shape = torch.Size([x_in.shape[0]])
        mean_pred = mean_pred.view(*batch_shape, *test_shape).contiguous()

        try:
            mvn = MultivariateNormal(mean=mean_pred,
                                     covariance_matrix=cov_pred)
        except Exception as inst:
            logger.info("type: {0:s} | args: {1:s}".format(
                str(type(inst)), str(inst.args)))
            # pdb.set_trace() # DBG: TEMPORARY TRIAL; NOT ADDING NOISE
            mvn = MultivariateNormal(mean=mean_pred,
                                     covariance_matrix=cov_pred +
                                     1e-6 * torch.eye(x_in.shape[0]))

        # print("mean_pred.shape:"+str(mean_pred.shape))
        # print("cov_pred.shape:"+str(cov_pred.shape))
        # print("mvn.batch_shape: "+str(mvn.batch_shape))
        # print("mvn.event_shape: "+str(mvn.event_shape))
        # print("mvn:",mvn)

        return mvn

    def update_XY(self, x_eval, yl_eval):
        '''
		x_eval [1 x dim]: A single points
		yl_eval [2,]: evaluation and label, i.e., 
			y_eval = yl_eval[0]
			l_eval = yl_eval[1]
		'''

        # Append datapoint:
        self.train_x = torch.cat([self.train_x, x_eval], dim=0)
        self.train_yl = torch.cat([self.train_yl, yl_eval], dim=0)

        # Update internal variables:
        logger.info("Updating after adding new data point...")
        self._update_subsets()

        # # Update hyperparameters
        # # Internally, this also updates the GPCR approximate posterior, so
        # # we do not need to call self._update_approximate_posterior() again
        # if learn_hyperparameters:
        # 	self._update_hyperparameters(Nrestarts=self.Nrestarts_hyperpars)

    def _update_subsets(self):

        self.train_xu = self.train_x[self.train_yl[:, 1] ==
                                     -1, :]  # Training set with unsafe points
        self.train_xs = self.train_x[self.train_yl[:, 1] ==
                                     +1, :]  # Training set with safe points
        self.train_ys = self.train_yl[self.train_yl[:, 1] == +1, 0]

        if self.discard_too_close_points:

            # Eliminate points that are *extremely* close to each other, to avoid numerical unstability
            ind_stay_in_stable = self.discard_points_that_are_too_close_to_avoid_numerical_unstability(
                "stable")
            ind_stay_in_unstable = self.discard_points_that_are_too_close_to_avoid_numerical_unstability(
                "unstable")
            # if torch.any(~ind_stay_in_stable) or torch.any(~ind_stay_in_unstable):
            # 	pdb.set_trace()

            self.train_xs = self.train_xs[ind_stay_in_stable, :]
            self.train_xu = self.train_xu[ind_stay_in_unstable, :]
            self.train_ys = self.train_ys[ind_stay_in_stable]

        # Concatenate both inputs:
        self.train_x_sorted = torch.cat([self.train_xs, self.train_xu],
                                        dim=0)  # Sorted training set

        # For compatibility, although not needed:
        self.train_inputs = [self.train_x]
        self.train_targets = self.train_yl

    def discard_points_that_are_too_close_to_avoid_numerical_unstability(
            self, do_it_with="stable", debug=False):
        """
		"""

        if do_it_with == "stable":
            close_points, _ = self._identify_stable_close_to_unstable(
                X_sta=self.train_xs.cpu().numpy(),
                X_uns=self.train_xs.cpu().numpy(),
                top_dist=math.sqrt(self.dim) * 0.02,
                verbosity=False)
            Nels = self.train_xs.shape[0]
            train_x_new = self.train_xs.clone()
        else:
            close_points, _ = self._identify_stable_close_to_unstable(
                X_sta=self.train_xu.cpu().numpy(),
                X_uns=self.train_xu.cpu().numpy(),
                top_dist=math.sqrt(self.dim) * 0.02,
                verbosity=False)
            Nels = self.train_xu.shape[0]
            train_x_new = self.train_xu.clone()

        ind_sel = torch.ones(Nels, dtype=bool, device=device)
        for k in range(len(close_points)):

            # A point will always be close to itself, so we skip this case:
            if len(close_points[k]) == 2:
                continue

            # If the current k is among the already discarded points, we skip this case:
            if torch.any(k == torch.tensor(range(Nels))[~ind_sel]):
                continue

            close_points_to_k = close_points[k][2::]
            # if self.train_yl[:,1][]

            ind_sel[
                close_points_to_k] = False  # Starting at 2 assumes the points are sorted

        # train_x_new = train_x_new[ind_sel,:]
        # logger.info("\n")
        # logger.info(do_it_with)
        # logger.info("close_points: {0:s}".format(str(close_points)))
        # logger.info("ind_sel: {0:s}".format(str(ind_sel)))
        # if do_it_with == "stable":
        # 	logger.info("self.train_xs: {0:s}".format(str(self.train_xs)))
        # else:
        # 	logger.info("self.train_xu: {0:s}".format(str(self.train_xu)))
        if self.dim <= 2:
            logger.info("{0:s} points discarded ({1:d}): {2:s}".format(
                do_it_with, sum(~ind_sel), str(train_x_new[~ind_sel, :])))
        else:
            logger.info("{0:s} points discarded: {1:d}".format(
                do_it_with, sum(~ind_sel)))

        return ind_sel

    def display_hyperparameters(self):
        logger.info("    Evaluation noise (stddev) (fixed): | {0:2.4f}".format(
            self.noise_std))
        logger.info("  Re-optimized hyperparameters")
        logger.info("  ----------------------------")
        logger.info("    Outputscale (stddev) | {0:2.4f}".format(
            self.covar_module.outputscale.item()))
        logger.info("    Lengthscale(s)       | {0:s}".format(
            str(self.covar_module.base_kernel.lengthscale.detach().cpu().numpy(
            ).flatten())))
        logger.info("    Optimal threshold    | {0:2.4f}".format(
            self.threshold.item()))

    def logging(self):
        log_out = dict()
        log_out[
            "lengthscale"] = self.covar_module.base_kernel.lengthscale.flatten(
            ).detach().cpu().numpy()
        log_out["outputscale"] = self.covar_module.outputscale.item()
        log_out["threshold"] = self.threshold.item()
        log_out[
            "train_inputs"] = None if self.train_inputs is None else self.train_inputs[
                0].detach().cpu().numpy()
        log_out[
            "train_targets"] = None if self.train_targets is None else self.train_targets.detach(
            ).cpu().numpy()
        log_out["train_xs"] = self.train_xs.detach().cpu().numpy()
        log_out["train_xu"] = self.train_xu.detach().cpu().numpy()
        log_out["train_ys"] = self.train_ys.detach().cpu().numpy()
        log_out["train_x_sorted"] = self.train_x_sorted.detach().cpu().numpy()

        return log_out

    def _update_prior(self):
        '''
		Recompute prior covariance matrix with the sorted inputs
		'''
        if self.train_x_sorted.shape[0] > 0:
            Kprior_cov = self.covar_module(self.train_x_sorted)
            self.Kprior_cov = Kprior_cov  # DBG: TEMPORARY TRIAL; NOT ADDING NOISE
            # self.Kprior_cov = Kprior_cov + 1e-6*torch.eye(self.train_x_sorted.shape[0]) # DBG: TEMPORARY TRIAL; NOT ADDING NOISE
            # self.Kprior_cov = self.gauss_tools.fix_singular_matrix(Kprior_cov,verbosity=False,what2fix="Fixing prior cov...")
            # self.Kprior_cov = self.covar_module(self.train_x_sorted)
        else:
            self.Kprior_cov = None

    def _update_EP_object(self):
        '''
		This function assumes that self._update_prior() has been updated
		'''

        if len(self.train_ys) > 0:

            Sigma1_diag = self.noise_std**2 * np.ones(self.train_ys.shape[0])

            # Modify noise matrix if needed:
            if self.top_dist_ambiguous_points > 0.0:
                nearest_points_to_X_sta_i, nearest_points_to_X_uns_i = self._identify_stable_close_to_unstable(
                    X_sta=self.train_xs.cpu().numpy(),
                    X_uns=self.train_xu.cpu().numpy(),
                    top_dist=self.top_dist_ambiguous_points,
                    verbosity=False)
                if len(nearest_points_to_X_sta_i) > 0:
                    str_banner = "<<<< Will modify the noise matrix >>>>"
                    logger.info("=" * len(str_banner))
                    logger.info(str_banner)
                    logger.info("=" * len(str_banner))
                    Sigma1_diag = self._modify_noise_matrix(
                        nearest_points_to_X_sta_i,
                        Sigma1_diag,
                        factor=self.factor_heteroscedastic_noise,
                        verbosity=False)

            mu1 = self.train_ys.cpu().numpy()
            Sigma1 = np.diag(Sigma1_diag)

        else:
            Sigma1 = mu1 = None

        # Product of Gaussians:
        D, m = self.gauss_tools.product_gaussian_densities_different_dimensionality(
            mu1=mu1,
            Sigma1=Sigma1,
            mu12=np.zeros(self.train_x_sorted.shape[0]),
            Sigma12=self.Kprior_cov.cpu().numpy())

        # D = self.gauss_tools.fix_singular_matrix(D,verbosity=False,what2fix="Fixing D before updating the EP object")

        self.ep.restart(prior_mean=m, prior_cov=D)

    def _update_approximate_posterior(self):

        # if self.ep is None:
        # 	self.covariance_posterior = None
        # 	self.expectation_posterior = None

        # Nu = self.data['Nu']
        # Ns = self.data['Ns']
        # Xs = self.data['Xs']
        # Xu = self.data['Xu']
        # Y = self.data['Y']

        # # Check data existance:
        # if Nu == 0 and Ns == 0:
        # 	return None,None

        # Use c_opti to create the integration limits:
        lim_lower, lim_upper = self._create_integration_limits(
            self.train_yl, self.threshold)

        # # Modify integration limits if necessary:
        # if self.top_dist_ambiguous_points > 0.0:
        # 	nearest_points_to_X_sta_i,nearest_points_to_X_uns_i = self._identify_stable_close_to_unstable(Xs,Xu,top_dist=self.top_dist_ambiguous_points,verbosity=False)
        # 	if len(nearest_points_to_X_uns_i) > 0:
        # 		logger.info("\n==============================================\n <<<< Will modify the integration limits >>>>\n==============================================")
        # 	lim_lower = self._modify_integration_limits_for_ambiguous_points(nearest_points_to_X_uns_i,lim_lower,Ns,Nu,Ysta=Y,verbosity=False)

        try:
            self.covariance_posterior, self.expectation_posterior, self.logZ = self.ep.run_EP(
                marginal_moments_EP_unbounded_hyperrectangle,
                lim_lower.cpu().numpy(),
                lim_upper.cpu().numpy())

            # self.covariance_posterior += 1e-5*np.eye(self.covariance_posterior.shape[0]) # DBG: TEMPORARY TRIAL; NOT ADDING NOISE

            self.covariance_posterior = torch.from_numpy(
                self.covariance_posterior).to(device=device, dtype=dtype)
            self.expectation_posterior = torch.from_numpy(
                self.expectation_posterior).to(device=device, dtype=dtype)
        except Exception as inst:
            print(type(inst), inst.args)
            raise ValueError(
                "EP failed when computing the posterior moments...")

    def _update_hyperparameters(self, Nrestarts=5):

        logger.info("Fitting GPCR model g(x) ...")
        logger.info("---------------------------")

        # Get random restarts:
        x_restarts = self._sample_hyperparameters_within_bounds(
            Nsamples=Nrestarts)
        # logger.info("x_restarts:" + str(x_restarts))

        # Store current hyperparameters, just in case the optimization below fails:
        outputscale = self.covar_module.outputscale.detach()
        lengthscale = self.covar_module.base_kernel.lengthscale.detach(
        ).flatten()
        threshold = self.threshold.detach()

        try:
            new_hyperpars, _ = self.opti.run_optimization(
                x_restarts=x_restarts)

            loss_new_hyperpars = self.mll_objective(new_hyperpars.flatten())
            logger.info("  Loss (new hyperparameters): {0:f}".format(
                loss_new_hyperpars.item()))

            self.display_hyperparameters()
        except Exception as inst:
            logger.info("  Exception (!) type: {0:s} | args: {1:s}".format(
                str(type(inst)), str(inst.args)))
            logger.info(
                "  Hyperparameter optimization failed (!!) Keeping the old ones ..."
            )
            try:
                loss_old_hyperpars = self.mll_objective.log_marginal(
                    lengthscale, outputscale, threshold)
                logger.info("  Loss (old hyperparameters): {0:f}".format(
                    loss_old_hyperpars.item()))
            except Exception as inst:
                logger.info(
                    "    Exception (!) type: {0:s} | args: {1:s}".format(
                        str(type(inst)), str(inst.args)))
                logger.info(
                    "    Old hyperparameters do not work either. Setting some random ones ..."
                )
                self.mll_objective(x_restarts[0, :].flatten())
            self.display_hyperparameters()

    def _get_hyperparameters_bounds(self, hyperpriors):

        # Compute the domain for hyperparameter search by truncating the support of the corresponding hyperprior at the .75 quantile
        # The lower bound is necessary for numerical stability, i.e., when computing logpdf() in classireg.models.mll_gpcr.log_marginal()
        # All values of the dictionary are defined as double lists
        hyperpriors_support = dict(
            lengthscales=[[0.05] * self.dim,
                          [hyperpriors["lengthscales"].ppf(.75)] * self.dim],
            outputscale=[[0.05], [hyperpriors["outputscale"].ppf(.75)]],
            threshold=[[0.05], [hyperpriors["threshold"].ppf(.75)]])

        # Automatically get the bounds from the dictionary:
        hyperpars_lb = []
        hyperpars_ub = []
        for hyperpar in hyperpriors_support.values():
            hyperpars_lb += hyperpar[0]
            hyperpars_ub += hyperpar[1]
        hyperpars_bounds = [hyperpars_lb, hyperpars_ub]

        return hyperpars_bounds

    def _sample_hyperparameters_within_bounds(self, Nsamples):

        # Get a sample from the prior for initialization:
        new_seed = torch.randint(low=0, high=100000, size=(1, )).item(
        )  # Top-level seeds have an impact on this one herein; contrary to the case new_seed = None
        hyperpars_restarts = draw_sobol_samples(bounds=torch.tensor(
            self.hyperpars_bounds),
                                                n=Nsamples,
                                                q=1,
                                                seed=new_seed)
        hyperpars_restarts = hyperpars_restarts.squeeze(
            1)  # Remove batch dimension [n q dim] -> [n dim]

        return hyperpars_restarts

    def _create_integration_limits(self, train_yl, c):

        Ns = torch.sum(train_yl[:, 1] == +1)
        Nu = torch.sum(train_yl[:, 1] == -1)
        Neval = Ns + Nu

        # Limits of integration:
        lim_lower = torch.zeros(Neval)
        lim_upper = torch.zeros(Neval)
        for i in range(Ns):
            lim_lower[i] = -float("Inf")
            lim_upper[i] = c

        for i in range(Nu):
            lim_lower[Ns + i] = c
            lim_upper[Ns + i] = +float("Inf")

        return lim_lower, lim_upper

    def _modify_noise_matrix(self,
                             nearest_points_to_X_sta_i,
                             noise_diag,
                             factor,
                             verbosity=False):
        '''
		Modify the diagonal of the noise matrix
		noise_diag: It's a vector!
		'''

        # Error checking:
        Nsta_affected_points = len(nearest_points_to_X_sta_i)
        if Nsta_affected_points == 0:
            return noise_diag
        elif noise_diag is None:
            raise ValueError(
                "noise_diag is None, but Nsta_affected_points != 0. Shouldn't noise_diag have a value?"
            )
        elif noise_diag.ndim != 1:
            raise ValueError("noise_diag must be a vector")
        elif noise_diag.shape[0] == 0:
            raise ValueError("noise_diag must be a non-empty vector")
        else:
            noise_diag_out = noise_diag.copy()

        if factor < 1.0:
            raise ValueError(
                "The factor, in principle, is meant to increase the noise in ambiguous regions"
            )

        # Modify the diagonal:
        for k in range(Nsta_affected_points):
            ind_i = nearest_points_to_X_sta_i[k][0]
            noise_diag_out[ind_i] *= factor

        if verbosity == True:
            logger.info("noise_diag_out")
            logger.info("===============")
            logger.info(str(noise_diag_out))

        return noise_diag_out

    def _modify_integration_limits_for_ambiguous_points(
            self,
            nearest_points_to_X_uns_i,
            lim_lower_i,
            Nsta,
            Nuns,
            Ysta,
            verbosity=False):
        '''
		Modify the c threshold for those pair of points
		that are very close to each other
		'''

        # Error checking:
        Nuns_affected_points = len(nearest_points_to_X_uns_i)
        if Nuns_affected_points == 0:
            return lim_lower_i
        else:
            lim_lower_i_out = lim_lower_i.copy()

        # Modify points:
        for k in range(Nuns_affected_points):

            # Take index of the affected unstable point X_uns[ind_i,:] :
            ind_i = nearest_points_to_X_uns_i[k][0]

            # Take the corresponding indices from X_sta that are affecting X_uns[ind_i,:]
            indices_j = nearest_points_to_X_uns_i[k][1::]

            # Modify the lower integration limits of f_uns: we assign the minimum
            # observed stable value among all the stable points that are affecting X_uns[ind,:]
            c_opti = lim_lower_i_out[Nsta + ind_i]
            pow_ = 2.0
            # pow_ = 1./2
            alpha = (1. / (1. + len(indices_j)))**(pow_)
            lim_lower_i_out[Nsta +
                            ind_i] = alpha * c_opti + (1. - alpha) * np.amin(
                                Ysta[indices_j])

            # lim_lower_i_out[Nsta+ind_i] = np.amin(Ysta[indices_j])

        if verbosity == True:
            logger.info("\nlim_lower_i_out")
            logger.info("===============")
            logger.info(lim_lower_i_out)

        return lim_lower_i_out

    def _identify_stable_close_to_unstable(self,
                                           X_sta,
                                           X_uns,
                                           top_dist,
                                           verbosity=False):
        '''
		
		Inputs
		======
		X_sta: [Ns,D], where D is the input dimensionality, and Ns is the number of stable points
		X_uns: [Nu,D], where D is the input dimensionality, and Ns is the number of unstable points
		
		Outputs
		=======
		nearest_points_to_X_sta_i: list

		Explanation
		===========
		For all the stable points, is there any unstable point that is close enough?
		This method returns a list of arrays.
		For each point X_sta[i,:], we check how close are each one of the points X_uns[j,:]
		If there is at least one X_uns[j,:] that is close enough, we add a new array
		to the list. The first element of the array is the corresponding index i.
		The subsequent elements are the j indices such that norm(X_sta[i,:]-X_uns[j,:]) < top_dist
		If no point from X_uns is close enough to each element i of X_sta, the list will be empty.
		NOTE: we also do the same from the point of view of X_uns, and return it


		TODO: Consider reurning only nearest_points_to_X_uns_i, and 
		the first element of the array in each position i on the list, i.e. nearest_points_to_X_sta_i[i][0]
		The reason is that modify_integration_limits_for_ambiguous_points() uses needs only nearest_points_to_X_uns_i
		and _modify_noise_matrix() needs only nearest_points_to_X_sta_i[i][0]
		'''

        # If there's no stable or unstable values yet, we return an empty list:
        # if len(X_sta) == 0 or len(X_uns) == 0 or top_dist == 0.0:
        if X_sta is None or X_uns is None or top_dist == 0.0:
            return [], []
        elif top_dist < 0.0:
            raise NotImplementedError

        Ns = X_sta.shape[0]
        Nu = X_uns.shape[0]

        # Nearest points to X_sta:
        nearest_points_to_X_sta_i = []
        for i in range(Ns):
            norms_X_sta_i = la.norm(X_sta[i, :] - X_uns, ord=2, axis=1)
            ind_j, = np.where(norms_X_sta_i < top_dist)
            if len(ind_j) > 0:
                aux = np.insert(ind_j, 0, i)
                nearest_points_to_X_sta_i.append(aux)

        # Nearest points to X_uns:
        nearest_points_to_X_uns_i = []
        for i in range(Nu):
            norms_X_uns_i = la.norm(X_uns[i, :] - X_sta, ord=2, axis=1)
            ind_j, = np.where(norms_X_uns_i < top_dist)
            if len(ind_j) > 0:
                aux = np.insert(ind_j, 0, i)
                nearest_points_to_X_uns_i.append(aux)

        if verbosity == True:
            logger.info("")
            logger.info("nearest_points_to_X_sta_i")
            logger.info("=========================")
            logger.info(str(nearest_points_to_X_sta_i))
            logger.info("nearest_points_to_X_uns_i")
            logger.info("=========================")
            logger.info(str(nearest_points_to_X_uns_i))
            logger.info("X_sta")
            logger.info("=========================")
            logger.info(str(X_sta))
            logger.info("X_uns")
            logger.info("=========================")
            logger.info(str(X_uns))

        return nearest_points_to_X_sta_i, nearest_points_to_X_uns_i

    def _error_checking_x_in(self, x_in: Tensor) -> None:

        assert not torch.any(torch.isnan(x_in)), "x_in cannot contain NaNs"
        if x_in.dim() == 1:
            x_in = x_in[None, :]
        assert x_in.shape[
            -1] == self.dim, "x_in must be N x self.dim, where N is the number of points and self.dim is the dimensionality"

        if x_in.dim() >= 3:
            return x_in.view(-1, self.dim)
        else:
            return x_in

    def __call__(self, x_in: Tensor):
        return self.predictive(x_in)

    def plot(self,
             axes=None,
             block=False,
             Ndiv=100,
             legend=True,
             title="GPgrad",
             plotting=True,
             plotCDF=False,
             clear_axes=False,
             Nsamples=None,
             ylabel=None,
             ylim=None,
             pause=None,
             showtickslabels_x=True,
             xlabel=None,
             labelsize=None,
             showtickslabels=None,
             showticks=None,
             linewidth=None,
             color=None,
             prob=False):
        '''
		This function hardcodes the plotting limits between zero and one for now
		'''
        if plotting == False or self.dim > 1:
            return

        pp = PlotProbability()
        xpred_vec = torch.linspace(0.0, 1.0, Ndiv)[:, None]
        # xpred_vec = xpred_vec.unsqueeze(0) # Ndiv batches of [q=1 x self.dim] dimensions each

        # Compute one by one:
        logger.info("Computing posterior while plotting ... (!!)")
        post_batch = False
        if post_batch:

            # Predict:
            posterior = self.posterior(
                X=xpred_vec, observation_noise=False
            )  # observation_noise MUST be always false; this class is not prepared otherwise
            # Internally, self.posterior(xpred_vec) calls self(xpred_vec), which calls self.predictive(xpred_vec)

            # pdb.set_trace()

            # Get upper and lower confidence bounds (2 standard deviations from the mean):
            lower_ci, upper_ci = posterior.mvn.confidence_region()

            # Posterior mean:
            mean_vec = posterior.mean
            std_vec = posterior.variance.sqrt()

        else:

            lower_ci = torch.zeros((Ndiv))
            upper_ci = torch.zeros((Ndiv))
            mean_vec = torch.zeros((Ndiv))
            std_vec = torch.zeros((Ndiv))
            for k in range(Ndiv):
                mvn = self.predictive(xpred_vec[k, :].view(-1, self.dim))
                lower_ci[k], upper_ci[k] = mvn.confidence_region()
                mean_vec[k] = mvn.mean
                std_vec[k] = mvn.variance.sqrt()

        if self.dim == 1:
            if prob == False:
                axes = pp.plot_GP_1D(
                    xpred_vec=xpred_vec.squeeze().cpu().numpy(),
                    fpred_mode_vec=mean_vec.squeeze().detach().cpu().numpy(),
                    fpred_quan_minus=lower_ci.squeeze().detach().cpu().numpy(),
                    fpred_quan_plus=upper_ci.squeeze().detach().cpu().numpy(),
                    X_uns=self.train_xu.detach().cpu().numpy(),
                    X_sta=self.train_xs.detach().cpu().numpy(),
                    Y_sta=self.train_ys.detach().cpu().numpy(),
                    title=title,
                    axes=axes,
                    block=block,
                    legend=legend,
                    clear_axes=True,
                    xlabel=None,
                    ylabel=ylabel,
                    xlim=np.array([0., 1.]),
                    ylim=ylim,
                    labelsize="x-large",
                    legend_loc="upper left",
                    colormap="paper",
                    showtickslabels_x=showtickslabels_x)
            else:
                normal = Normal(
                    loc=mean_vec.squeeze(),
                    # scale=posterior.variance.sqrt().squeeze())
                    scale=std_vec.squeeze())
                ei_cdf = normal.cdf(self.threshold)
                # pdb.set_trace()
                axes = pp.plot_acquisition_function(
                    var_vec=ei_cdf,
                    xpred_vec=xpred_vec.cpu().numpy(),
                    xlabel=xlabel,
                    ylabel=ylabel,
                    title=title,
                    legend=legend,
                    axes=axes,
                    clear_axes=True,
                    xlim=np.array([0., 1.]),
                    block=block,
                    labelsize=labelsize,
                    showtickslabels=showtickslabels,
                    showticks=showticks,
                    what2plot="",
                    color=color,
                    ylim=np.array([0., 1.1]),
                    linewidth=linewidth)

            if Nsamples is not None:
                f_sample = posterior.sample(
                    sample_shape=torch.Size([Nsamples]))
                for k in range(Nsamples):
                    axes.plot(xpred_vec.squeeze().detach().cpu().numpy(),
                              f_sample[k, :, 0],
                              linestyle="--",
                              linewidth=1.0,
                              color="sienna")

        elif self.dim == 2:
            pass

        plt.show(block=block)
        if pause is not None:
            plt.pause(pause)

        return axes
示例#9
0
class ExpectedImprovementWithConstraintsClassi():
    """
	This class expects a GP classifier as constraint
	
	"""
    def __init__(self, dim: int, model_list: list, options: dict) -> None:

        logger.info("Starting EIC ...")

        self.model_list = model_list

        self.dim = dim
        self.Nrestarts = options.optimization.Nrestarts
        self.algo_name = options.optimization.algo_name
        self.constrained_opt = OptimizationNonLinear(
            dim=self.dim,
            fun_obj=self.forward,
            algo_str=self.algo_name,
            bounds=[[0.0] * self.dim, [1.0] * self.dim],
            minimize=False,
            what2optimize_str="EIC acquisition")

        # This is needed to
        self.model_list[idxm['cons']](torch.randn(size=(1, 1, self.dim)))

        # self.use_nlopt = False
        self.disp_info_scipy_opti = options.optimization.disp_info_scipy_opti

        # self._rho_conserv = options.prob_satisfaction
        self.x_next, self.alpha_next = None, None
        self.only_prob = False

        self.x_eta_c = None
        self.eta_c = None
        self.bounds = torch.tensor([[0.0] * self.dim, [1.0] * self.dim],
                                   device=device)

        self.maximize = False  # If tru, we assume we that we want to maximize the objective. Herein, we consider it as cost, hence, we minimize it

    def get_simple_regret_cons(self, fmin_true):

        Ycons = self.model_list[idxm['cons']].train_targets

        N_Ycons_safe = torch.sum(Ycons == +1)

        Yobj_safe = self.model_list[idxm[
            'obj']].train_targets  # Since we don't include the non-stable evaluations in the objective GP, the safe evaluations are the evaluations themselves
        if N_Ycons_safe == 0 and Yobj_safe is None:  # No safe points, but obj has no evaluations at all either
            return torch.tensor([+float("Inf")], device=device,
                                dtype=dtype)  # The regret cannot be computed
        elif N_Ycons_safe == 0:  # No safe points, but obj has some evaluations already
            raise NotImplementedError(
                "We assume that the objective only acquires evaluations if they are safe."
            )
            # f_simple = torch.max(Yobj_safe) # We take the worst observation here. Otherwise, the regret can become non-monotonic
        else:
            if Yobj_safe is None:
                pdb.set_trace()
            f_simple = torch.min(Yobj_safe).view(1)

        regret_simple = f_simple - fmin_true

        return regret_simple

    def __call__(self, X: Tensor) -> Tensor:
        return self.forward(X)

    # @t_batch_mode_transform(expected_q=1)
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate Constrained Expected Improvement on the candidate set X.

		Args:
			X: A `(b) x 1 x d`-dim Tensor of `(b)` t-batches of `d`-dim design
				points each.

		Returns:
			A `(b)`-dim Tensor of Expected Improvement values at the given
				design points `X`.
		"""
        # import pdb; pdb.set_trace()

        if X.dim() == 1:
            X = X.view(1, self.dim)

        # means, sigmas = self._get_posterior_reimplemented(X)

        # Get posterior of objective:
        mvn_obj = self.model_list[idxm['obj']](X)
        mean_obj = mvn_obj.mean
        sigma_obj = mvn_obj.variance.sqrt()

        # # (b) x 1
        # mean_obj = means[..., [self.objective_index]]
        # sigma_obj = sigmas[..., [self.objective_index]]

        # print("mean_obj.shape:",mean_obj.shape)
        # print("sigma_obj.shape:",sigma_obj.shape)
        # print("means.shape:",means.shape)
        # print("sigmas.shape:",sigmas.shape)

        # Probability of feasibility:
        prob_feas = self._compute_prob_feas(X=X)

        # print("prob_feas.shape:",prob_feas.shape)
        # pdb.set_trace()

        if self.only_prob:
            ei_times_prob = prob_feas  # Use only the probability of feasibility
        else:
            u = (mean_obj - self.best_f.expand_as(mean_obj)) / sigma_obj
            if not self.maximize:
                u = -u
            normal = Normal(
                torch.zeros(1, device=u.device, dtype=u.dtype),
                torch.ones(1, device=u.device, dtype=u.dtype),
            )
            ei_pdf = torch.exp(normal.log_prob(u))  # (b) x 1
            ei_cdf = normal.cdf(u)
            ei = sigma_obj * (ei_pdf + u * ei_cdf)
            ei_times_prob = ei.mul(prob_feas)

        # print("ei_times_prob.shape:",ei_times_prob.shape)

        # pdb.set_trace()
        val = ei_times_prob.squeeze(dim=-1)
        if val.dim() == 1 and len(val) == 1 or val.dim() == 0:
            val = val.item()
        # else:
        # 	pdb.set_trace()

        # if isinstance(val,float):
        # 	pdb.set_trace()

        # if val.dim() == 1:
        # 	# if not val.shape[0] == 1:
        # 	if val.shape[0] != X.shape[0]:
        # 		pdb.set_trace()

        # print("X.shape:",X.shape)
        # print("val:",val)
        # pdb.set_trace()

        return val

    def get_best_constrained_evaluation(self):

        # pdb.set_trace()
        Ycons = self.model_list[idxm['cons']].train_targets
        Ycons_safe = Ycons[Ycons == +1]
        Yobj_safe = self.model_list[idxm[
            'obj']].train_targets  # Since we don't include the non-stable evaluations in GPCR, the safe evaluations are the evaluations themselves
        if len(Ycons_safe) > 0 and Yobj_safe is None:
            raise ValueError(
                "This case should not happen (!) We assume that objective evaluations are only collected when the contraint is satisfied..."
            )
        elif len(Ycons_safe) > 0:
            return torch.min(Yobj_safe).view(1)
        else:  # No safe points yet
            return ValueError(
                "This exception (no safe data collected yet) case is assumed to be handled in upper levels, so we should never enter here ..."
            )

    def get_next_point(self) -> (Tensor, Tensor):

        # pdb.set_trace()
        if self.model_list[
                idxm["obj"]].train_targets is None:  # No safe evaluations case
            self.eta_c = torch.zeros(1, device=device, dtype=dtype)
            self.x_eta_c = torch.zeros((1, self.dim),
                                       device=device,
                                       dtype=dtype)
            self.best_f = None
            self.only_prob = True
        else:

            self.eta_c = torch.zeros(1, device=device, dtype=dtype)
            self.x_eta_c = torch.zeros((1, self.dim),
                                       device=device,
                                       dtype=dtype)

            # # The following functions need to be called in the given order:
            # try:
            # 	self.update_eta_c(rho_t=self.rho_conserv) # Update min_x mu(x|D) s.t. Pr(g(x) <= 0) > rho_t
            # except Exception as inst:
            # 	logger.info("Exception (!) type: {0:s} | args: {1:s}".format(str(type(inst)),str(inst.args)))
            # 	logger.info("Not optimizing eta_c ...")

            # self.best_f = self.eta_c
            self.best_f = self.get_best_constrained_evaluation(
            ) - self.model_list[idxm["obj"]].likelihood.noise.sqrt()[0].view(1)
            self.only_prob = False

        self.x_next, self.alpha_next = self.get_acqui_fun_maximizer()

        if self.x_next is not None and self.alpha_next is not None:
            logger.info(
                "xnext: " +
                str(self.x_next.view((1, self.dim)).detach().cpu().numpy()))
            logger.info("alpha_next: {0:2.2f}".format(self.alpha_next.item()))
        else:
            logger.info("xnext: None")
            logger.info("alpha_next: None")

        logger.info("self.x_eta_c: " + str(self.x_eta_c))
        logger.info("self.eta_c: " + str(self.eta_c))
        logger.info("self.best_f: " + str(self.best_f))

        return self.x_next, self.alpha_next

    def get_acqui_fun_maximizer(self):

        logger.info(
            "Computing next candidate by maximizing the acquisition function ..."
        )
        options = {
            "batch_limit": 50,
            "maxiter": 300,
            "ftol": 1e-6,
            "method": "L-BFGS-B",
            "iprint": 2,
            "maxls": 20,
            "disp": self.disp_info_scipy_opti
        }

        # Get initial random restart points:
        logger.info("Generating random restarts ...")
        initial_conditions = gen_batch_initial_conditions(
            acq_function=self,
            bounds=self.bounds,
            q=1,
            num_restarts=self.Nrestarts,
            raw_samples=500,
            options=options)
        # logger.info("initial_conditions:" + str(initial_conditions))

        logger.info("Using nlopt ...")
        x_next, alpha_next = self.constrained_opt.run_optimization(
            initial_conditions.view((self.Nrestarts, self.dim)))

        # # TODO: Is this really needed?
        # prob_val = self.get_probability_of_safe_evaluation(x_next.unsqueeze(1))
        # if prob_val < self.rho_conserv:
        # 	logger.info("(Is this really needed????) x_next violates the probabilistic constraint...")
        # 	pdb.set_trace()

        logger.info("Done!")

        return x_next, alpha_next

    def _compute_prob_feas(self, X):

        # pdb.set_trace()

        # if "BernoulliLikelihood" in repr(self.model_list.models[idxm['cons']].likelihood): # GPClassi
        mvn_cons = self.model_list[idxm['cons']](X)
        prob_feas = self.model_list[idxm['cons']].likelihood(mvn_cons).mean
        # 	# print("prob_feas:",prob_feas)
        # else: # GPCR
        # 	prob_feas = super()._compute_prob_feas(X=X, means=means, sigmas=sigmas)

        return prob_feas

    def plot(self,
             axes=None,
             block=False,
             title=None,
             plotting=False,
             Ndiv=41,
             showtickslabels=True,
             showticks=True,
             xlabel=None,
             ylabel=None,
             clear_axes=True,
             legend=False,
             labelsize=None,
             normalize=False,
             colorbar=False,
             color=None,
             label=None,
             local_axes=None,
             x_next=None,
             alpha_next=None,
             linewidth=2.0):

        if plotting == False:
            return None

        if self.dim > 1:
            return None

        if local_axes is None and axes is None:
            self.fig, (local_axes) = plt.subplots(1,
                                                  1,
                                                  sharex=True,
                                                  figsize=(10, 7))
        elif local_axes is None:
            local_axes = axes
        elif axes is None:
            pass  # If the internal axes already have some value, and no new axes passed, do nothing
        elif local_axes is not None and axes is not None:
            local_axes = axes

        local_pp = PlotProbability()

        if x_next is not None and alpha_next is not None:
            x_next_local = x_next
            alpha_next_local = alpha_next
        else:
            x_next_local = None
            alpha_next_local = 1.0

        test_x_vec = torch.linspace(0.0, 1.0, Ndiv)[:, None]
        test_x_vec = test_x_vec.unsqueeze(
            1
        )  # Make this [Ntest x q x dim] = [n_batches x n_design_points x dim], with q=1 -> Double-check in the documentation!
        var_vec = self.forward(X=test_x_vec).detach().cpu().numpy()

        if self.dim == 1:
            local_axes = local_pp.plot_acquisition_function(
                var_vec=var_vec,
                xpred_vec=test_x_vec.squeeze(1),
                x_next=x_next_local,
                acqui_next=alpha_next_local,
                xlabel=xlabel,
                ylabel=ylabel,
                title=title,
                legend=legend,
                axes=local_axes,
                clear_axes=clear_axes,
                xlim=np.array([0., 1.]),
                block=block,
                labelsize=labelsize,
                showtickslabels=showtickslabels,
                showticks=showticks,
                what2plot=None,
                color=color,
                ylim=None,
                linewidth=linewidth)
            plt.pause(0.25)

        elif self.dim == 2:
            if self.x_next is not None:
                Xs = np.atleast_2d(self.x_next)
            else:
                Xs = self.x_next
            local_axes = local_pp.plot_GP_2D_single(
                var_vec=var_vec,
                Ndiv_dim=Ndiv * np.ones(self.dim, dtype=np.int64),
                Xs=Xs,
                Ys=self.alpha_next,
                x_label=xlabel,
                y_label=ylabel,
                title=title,
                axes=local_axes,
                clear_axes=clear_axes,
                legend=legend,
                block=block,
                colorbar=colorbar,
                color_Xs="gold")
            plt.pause(0.25)

        return local_axes