Exemplo n.º 1
0
 def test_standardize(self, cuda=False):
     tkwargs = {"device": torch.device("cuda" if cuda else "cpu")}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         X = torch.tensor([0.0, 0.0], **tkwargs)
         self.assertTrue(torch.equal(X, standardize(X)))
         X2 = torch.tensor([0.0, 1.0, 1.0, 1.0], **tkwargs)
         expected_X2_stdized = torch.tensor([-1.5, 0.5, 0.5, 0.5],
                                            **tkwargs)
         self.assertTrue(torch.equal(expected_X2_stdized, standardize(X2)))
         X3 = torch.tensor([[0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]],
                           **tkwargs).transpose(1, 0)
         X3_stdized = standardize(X3)
         self.assertTrue(torch.equal(X3_stdized[:, 0], expected_X2_stdized))
         self.assertTrue(
             torch.equal(X3_stdized[:, 1], torch.zeros(4, **tkwargs)))
Exemplo n.º 2
0
    def forward(self, X: Tensor, num_samples: int = 1) -> Tensor:
        r"""Sample from a tempered value of the acquisition function value.

        Args:
            X: A `batch_shape x N x d`-dim Tensor from which to sample (in the `N`
                dimension) according to the maximum posterior value under the objective.
                Note that if a batched model is used in the underlying acquisition
                function, then its batch shape must be broadcastable to `batch_shape`.
            num_samples: The number of samples to draw.

        Returns:
            A `batch_shape x num_samples x d`-dim Tensor of samples from `X`, where
            `X[..., i, :]` is the `i`-th sample.
        """
        # TODO: Can we get the model batch shape property from the model?
        # we move the `N` dimension to the front for evaluating the acquisition function
        # so that X_eval has shape `N x batch_shape x 1 x d`
        X_eval = X.permute(-2, *range(X.ndim - 2), -1).unsqueeze(-2)
        acqval = self.acq_func(X_eval)  # N x batch_shape
        # now move the `N` dimension back (this is the number of categories)
        acqval = acqval.permute(*range(1, X.ndim - 1), 0)  # batch_shape x N
        weights = torch.exp(self.eta * standardize(acqval))  # batch_shape x N
        idcs = batched_multinomial(weights=weights,
                                   num_samples=num_samples,
                                   replacement=self.replacement)
        # now do some gathering acrobatics to select the right elements from X
        return torch.gather(X, -2,
                            idcs.unsqueeze(-1).expand(*idcs.shape, X.size(-1)))
Exemplo n.º 3
0
 def standardize_obs(self, observations, new_y):
     '''Takes a tensor of observations, extracts the y values and spits out the standardised value of a new observation
     '''
     y_vals = np.array([obs[1] for obs in observations])
     t_y_vals = torch.tensor(y_vals).double()
     augmented_obs = torch.cat(
         (t_y_vals, torch.tensor([new_y]).double()))
     standardized_y = standardize(augmented_obs)[-1]
     return (standardized_y)
Exemplo n.º 4
0
 def test_standardize(self):
     for dtype in (torch.float, torch.double):
         tkwargs = {"device": self.device, "dtype": dtype}
         Y = torch.tensor([0.0, 0.0], **tkwargs)
         self.assertTrue(torch.equal(Y, standardize(Y)))
         Y2 = torch.tensor([0.0, 1.0, 1.0, 1.0], **tkwargs)
         expected_Y2_stdized = torch.tensor([-1.5, 0.5, 0.5, 0.5], **tkwargs)
         self.assertTrue(torch.equal(expected_Y2_stdized, standardize(Y2)))
         Y3 = torch.tensor(
             [[0.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]], **tkwargs
         ).transpose(1, 0)
         Y3_stdized = standardize(Y3)
         self.assertTrue(torch.equal(Y3_stdized[:, 0], expected_Y2_stdized))
         self.assertTrue(torch.equal(Y3_stdized[:, 1], torch.zeros(4, **tkwargs)))
         Y4 = torch.cat([Y3, Y2.unsqueeze(-1)], dim=-1)
         Y4_stdized = standardize(Y4)
         self.assertTrue(torch.equal(Y4_stdized[:, 0], expected_Y2_stdized))
         self.assertTrue(torch.equal(Y4_stdized[:, 1], torch.zeros(4, **tkwargs)))
         self.assertTrue(torch.equal(Y4_stdized[:, 2], expected_Y2_stdized))
Exemplo n.º 5
0
 def generate_outer_restart_points(self,
                                   acqf: OneShotrhoKG,
                                   w_samples: Tensor = None) -> Tensor:
     """
     Generates the restart points for acqf optimization.
     :param acqf: The acquisition function being optimized
     :param w_samples: the list of w samples to use
     :return: restart points
     """
     X = draw_constrained_sobol(
         bounds=self.outer_bounds,
         n=self.raw_samples,
         q=self.q,
         inequality_constraints=self.inequality_constraints,
     ).to(dtype=self.dtype, device=self.device)
     # get the optimizers of the inner problem
     if w_samples is None:
         w_samples = (acqf.fixed_samples if acqf.fixed_samples is not None
                      else torch.rand(acqf.num_samples,
                                      acqf.dim_w,
                                      dtype=self.dtype,
                                      device=self.device))
     inner_rho = InnerRho(
         model=acqf.model,
         w_samples=w_samples,
         alpha=acqf.alpha,
         dim_x=acqf.dim_x,
         num_repetitions=acqf.num_repetitions,
         inner_seed=acqf.inner_seed,
         CVaR=acqf.CVaR,
         expectation=acqf.expectation,
         weights=getattr(acqf, "weights", None),
     )
     inner_solutions, inner_values = super().optimize_inner(
         inner_rho, False)
     # sample from the optimizers
     n_value = int((1 - self.random_frac) * self.num_fantasies)
     weights = torch.exp(self.eta * standardize(inner_values))
     idx = torch.multinomial(weights,
                             self.raw_samples * n_value,
                             replacement=True)
     # set the respective raw samples to the sampled optimizers
     X[..., -n_value * self.dim_x:] = inner_solutions[idx, 0].view(
         self.raw_samples, 1, -1)
     if w_samples is not None:
         w_ind = torch.randint(w_samples.shape[0],
                               (self.raw_samples, self.q))
         if self.q > 1:
             raise NotImplementedError("This does not support q>1!")
         X[..., self.dim_x:self.dim] = w_samples[w_ind, :]
     return self.generate_restart_points_from_samples(X, acqf)
Exemplo n.º 6
0
def gen_one_shot_kg_initial_conditions(acq_function,
                                       bounds,
                                       q,
                                       num_restarts,
                                       raw_samples,
                                       options=None):
    r"""[Copy of original botorch function]
    
    Generate a batch of smart initializations for qKnowledgeGradient.
    This function generates initial conditions for optimizing one-shot KG using
    the maximizer of the posterior objective. Intutively, the maximizer of the
    fantasized posterior will often be close to a maximizer of the current
    posterior. This function uses that fact to generate the initital conditions
    for the fantasy points. Specifically, a fraction of `1 - frac_random` (see
    options) is generated by sampling from the set of maximizers of the
    posterior objective (obtained via random restart optimization) according to
    a softmax transformation of their respective values. This means that this
    initialization strategy internally solves an acquisition function
    maximization problem. The remaining `frac_random` fantasy points as well as
    all `q` candidate points are chosen according to the standard initialization
    strategy in `gen_batch_initial_conditions`.
    Args:
        acq_function: The qKnowledgeGradient instance to be optimized.
        bounds: A `2 x d` tensor of lower and upper bounds for each column of
            task features.
        q: The number of candidates to consider.
        num_restarts: The number of starting points for multistart acquisition
            function optimization.
        raw_samples: The number of raw samples to consider in the initialization
            heuristic.
        options: Options for initial condition generation. These contain all
            settings for the standard heuristic initialization from
            `gen_batch_initial_conditions`. In addition, they contain
            `frac_random` (the fraction of fully random fantasy points),
            `num_inner_restarts` and `raw_inner_samples` (the number of random
            restarts and raw samples for solving the posterior objective
            maximization problem, respectively) and `eta` (temperature parameter
            for sampling heuristic from posterior objective maximizers).
    Returns:
        A `num_restarts x q' x d` tensor that can be used as initial conditions
        for `optimize_acqf()`. Here `q' = q + num_fantasies` is the total number
        of points (candidate points plus fantasy points).
    Example:
        >>> qKG = qKnowledgeGradient(model, num_fantasies=64)
        >>> bounds = torch.tensor([[0., 0.], [1., 1.]])
        >>> Xinit = gen_one_shot_kg_initial_conditions(
        >>>     qKG, bounds, q=3, num_restarts=10, raw_samples=512,
        >>>     options={"frac_random": 0.25},
        >>> )
    """
    options = options or {}
    frac_random: float = options.get("frac_random", 0.1)
    if not 0 < frac_random < 1:
        raise ValueError(
            f"frac_random must take on values in (0,1). Value: {frac_random}")
    q_aug = acq_function.get_augmented_q_batch_size(q=q)

    # TODO: Avoid unnecessary computation by not generating all candidates
    ics = gen_batch_initial_conditions(
        acq_function=acq_function,
        bounds=bounds,
        q=q_aug,
        num_restarts=num_restarts,
        raw_samples=raw_samples,
        options=options,
    )

    # compute maximizer of the value function
    value_function = _get_value_function(
        model=acq_function.model,
        objective=acq_function.objective,
        sampler=acq_function.inner_sampler,
    )

    fantasy_cands, fantasy_vals = optimize_acqf(
        acq_function=value_function,
        bounds=bounds,
        q=1,
        num_restarts=options.get("num_inner_restarts", 20),
        raw_samples=options.get("raw_inner_samples", 1024),
        return_best_only=False,
    )

    # sampling from the optimizers
    n_value = int((1 - frac_random) * (q_aug - q))  # number of non-random ICs
    eta = options.get("eta", 2.0)
    weights = torch.exp(eta * transforms.standardize(fantasy_vals))
    idx = torch.multinomial(weights, num_restarts * n_value, replacement=True)

    # set the respective initial conditions to the sampled optimizers
    ics[..., -n_value:, :] = fantasy_cands[idx,
                                           0].view(num_restarts, n_value, -1)
    return ics
Exemplo n.º 7
0
    def test_cache_root(self):
        sample_cached_path = (
            "botorch.acquisition.cached_cholesky.sample_cached_cholesky")
        raw_state_dict = {
            "likelihood.noise_covar.raw_noise":
            torch.tensor([[0.0895], [0.2594]], dtype=torch.float64),
            "mean_module.constant":
            torch.tensor([[-0.4545], [-0.1285]], dtype=torch.float64),
            "covar_module.raw_outputscale":
            torch.tensor([1.4876, 1.4897], dtype=torch.float64),
            "covar_module.base_kernel.raw_lengthscale":
            torch.tensor([[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]],
                         dtype=torch.float64),
        }
        # test batched models (e.g. for MCMC)
        for train_batch_shape, m, dtype in product(
            (torch.Size([]), torch.Size([3])), (1, 2),
            (torch.float, torch.double)):
            state_dict = deepcopy(raw_state_dict)
            for k, v in state_dict.items():
                if m == 1:
                    v = v[0]
                if len(train_batch_shape) > 0:
                    v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape)
                state_dict[k] = v
            tkwargs = {"device": self.device, "dtype": dtype}
            if m == 2:
                objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1))
            else:
                objective = None
            for k, v in state_dict.items():
                state_dict[k] = v.to(**tkwargs)
            all_close_kwargs = ({
                "atol": 1e-1,
                "rtol": 0.0,
            } if dtype == torch.float else {
                "atol": 1e-4,
                "rtol": 0.0
            })
            torch.manual_seed(1234)
            train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs)
            train_Y = (
                torch.sin(train_X * 2 * pi) +
                torch.randn(*train_batch_shape, 3, 2, **tkwargs))[..., :m]
            train_Y = standardize(train_Y)
            model = SingleTaskGP(
                train_X,
                train_Y,
            )
            if len(train_batch_shape) > 0:
                X_baseline = train_X[0]
            else:
                X_baseline = train_X
            model.load_state_dict(state_dict, strict=False)
            # test sampler with collapse_batch_dims=False
            sampler = IIDNormalSampler(5, seed=0, collapse_batch_dims=False)
            with self.assertRaises(UnsupportedError):
                qNoisyExpectedImprovement(
                    model=model,
                    X_baseline=X_baseline,
                    sampler=sampler,
                    objective=objective,
                    prune_baseline=False,
                    cache_root=True,
                )
            sampler = IIDNormalSampler(5, seed=0)
            torch.manual_seed(0)
            acqf = qNoisyExpectedImprovement(
                model=model,
                X_baseline=X_baseline,
                sampler=sampler,
                objective=objective,
                prune_baseline=False,
                cache_root=True,
            )

            orig_base_samples = acqf.base_sampler.base_samples.detach().clone()
            sampler2 = IIDNormalSampler(5, seed=0)
            sampler2.base_samples = orig_base_samples
            torch.manual_seed(0)
            acqf_no_cache = qNoisyExpectedImprovement(
                model=model,
                X_baseline=X_baseline,
                sampler=sampler2,
                objective=objective,
                prune_baseline=False,
                cache_root=False,
            )
            for q, batch_shape in product(
                (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))):
                test_X = (0.3 +
                          0.05 * torch.randn(*batch_shape, q, 2, **tkwargs)
                          ).requires_grad_(True)
                with mock.patch(
                        sample_cached_path,
                        wraps=sample_cached_cholesky) as mock_sample_cached:
                    torch.manual_seed(0)
                    val = acqf(test_X)
                    mock_sample_cached.assert_called_once()
                val.sum().backward()
                base_samples = acqf.sampler.base_samples.detach().clone()
                X_grad = test_X.grad.clone()
                test_X2 = test_X.detach().clone().requires_grad_(True)
                acqf_no_cache.sampler.base_samples = base_samples
                with mock.patch(
                        sample_cached_path,
                        wraps=sample_cached_cholesky) as mock_sample_cached:
                    torch.manual_seed(0)
                    val2 = acqf_no_cache(test_X2)
                mock_sample_cached.assert_not_called()
                self.assertTrue(torch.allclose(val, val2, **all_close_kwargs))
                val2.sum().backward()
                self.assertTrue(
                    torch.allclose(X_grad, test_X2.grad, **all_close_kwargs))
            # test we fall back to standard sampling for
            # ill-conditioned covariances
            acqf._baseline_L = torch.zeros_like(acqf._baseline_L)
            with warnings.catch_warnings(
                    record=True) as ws, settings.debug(True):
                with torch.no_grad():
                    acqf(test_X)
            self.assertEqual(len(ws), 1)
            self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
Exemplo n.º 8
0
    def train_loop(self):
        from botorch.models import SingleTaskGP
        from botorch.fit import fit_gpytorch_model
        from gpytorch.mlls import ExactMarginalLogLikelihood
        from botorch.optim import optimize_acqf
        from botorch.acquisition.monte_carlo import qExpectedImprovement
        from botorch.sampling.samplers import SobolQMCNormalSampler

        seed = 1
        torch.manual_seed(seed)
        dt, d = torch.float32, 3
        lb, ub = [1e-4, 0.1, 0.1], [3e-3, 1 - 1e-3, 1 - 1e-3]
        bounds = torch.tensor([lb, ub], dtype=dt)

        def gen_initial_data():
            # auto
            # x = unnormalize(torch.rand(1, 3, dtype=dt), bounds=bounds)
            # manual
            x = torch.tensor([[1e-3, 0.9, 0.999]])
            print('BO Initialization: \n')
            print('Initial Hyper-parameter: ' + str(x))
            obj = self.train(x.view(-1))
            print('Initial Error: ' + str(obj))
            return x, obj.unsqueeze(1)

        def get_fitted_model(x, obj, state_dict=None):
            # initialize and fit model
            fitted_model = SingleTaskGP(train_X=x, train_Y=obj)
            if state_dict is not None:
                fitted_model.load_state_dict(state_dict)
            mll = ExactMarginalLogLikelihood(fitted_model.likelihood,
                                             fitted_model)
            mll.to(x)
            fit_gpytorch_model(mll)
            return fitted_model

        def optimize_acqf_and_get_observation(acq_func):
            """Optimizes the acquisition function,
            and returns a new candidate and a noisy observation"""
            candidates, _ = optimize_acqf(
                acq_function=acq_func,
                bounds=torch.stack([
                    torch.zeros(d, dtype=dt),
                    torch.ones(d, dtype=dt),
                ]),
                q=1,
                num_restarts=10,
                raw_samples=200,
            )

            x = unnormalize(candidates.detach(), bounds=bounds)
            print('Hyper-parameter: ' + str(x))
            obj = self.train(x.view(-1)).unsqueeze(-1)
            print(print('Error: ' + str(obj)))
            return x, obj

        N_BATCH = 500
        MC_SAMPLES = 2000
        best_observed = []
        train_x, train_obj = gen_initial_data()  # (1,3), (1,1)
        best_observed.append(train_obj.view(-1))

        print(f"\nRunning BO......\n ", end='')
        state_dict = None
        for iteration in range(N_BATCH):
            # fit the model
            model = get_fitted_model(
                normalize(train_x, bounds=bounds),
                standardize(train_obj),
                state_dict=state_dict,
            )

            # define the qNEI acquisition module using a QMC sampler
            qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES,
                                                seed=seed)
            qEI = qExpectedImprovement(model=model,
                                       sampler=qmc_sampler,
                                       best_f=standardize(train_obj).max())

            # optimize and get new observation
            new_x, new_obj = optimize_acqf_and_get_observation(qEI)

            # update training points
            train_x = torch.cat((train_x, new_x))
            train_obj = torch.cat((train_obj, new_obj))

            # update progress
            best_value = train_obj.max().item()
            best_observed.append(best_value)

            state_dict = model.state_dict()
            print(".", end='')

        print(best_observed)
Exemplo n.º 9
0
def gen_value_function_initial_conditions(
    acq_function: AcquisitionFunction,
    bounds: Tensor,
    num_restarts: int,
    raw_samples: int,
    current_model: Model,
    options: Optional[Dict[str, Union[bool, float, int]]] = None,
) -> Tensor:
    r"""Generate a batch of smart initializations for optimizing
    the value function of qKnowledgeGradient.

    This function generates initial conditions for optimizing the inner problem of
    KG, i.e. its value function, using the maximizer of the posterior objective.
    Intutively, the maximizer of the fantasized posterior will often be close to a
    maximizer of the current posterior. This function uses that fact to generate the
    initital conditions for the fantasy points. Specifically, a fraction of `1 -
    frac_random` (see options) of raw samples is generated by sampling from the set of
    maximizers of the posterior objective (obtained via random restart optimization)
    according to a softmax transformation of their respective values. This means that
    this initialization strategy internally solves an acquisition function
    maximization problem. The remaining raw samples are generated using
    `draw_sobol_samples`. All raw samples are then evaluated, and the initial
    conditions are selected according to the standard initialization strategy in
    'initialize_q_batch' individually for each inner problem.

    Args:
        acq_function: The value function instance to be optimized.
        bounds: A `2 x d` tensor of lower and upper bounds for each column of
            task features.
        num_restarts: The number of starting points for multistart acquisition
            function optimization.
        raw_samples: The number of raw samples to consider in the initialization
            heuristic.
        current_model: The model of the KG acquisition function that was used to
            generate the fantasy model of the value function.
        options: Options for initial condition generation. These contain all
            settings for the standard heuristic initialization from
            `gen_batch_initial_conditions`. In addition, they contain
            `frac_random` (the fraction of fully random fantasy points),
            `num_inner_restarts` and `raw_inner_samples` (the number of random
            restarts and raw samples for solving the posterior objective
            maximization problem, respectively) and `eta` (temperature parameter
            for sampling heuristic from posterior objective maximizers).

    Returns:
        A `num_restarts x batch_shape x q x d` tensor that can be used as initial
        conditions for `optimize_acqf()`. Here `batch_shape` is the batch shape
        of value function model.

    Example:
        >>> fant_X = torch.rand(5, 1, 2)
        >>> fantasy_model = model.fantasize(fant_X, SobolQMCNormalSampler(16))
        >>> value_function = PosteriorMean(fantasy_model)
        >>> bounds = torch.tensor([[0., 0.], [1., 1.]])
        >>> Xinit = gen_value_function_initial_conditions(
        >>>     value_function, bounds, num_restarts=10, raw_samples=512,
        >>>     options={"frac_random": 0.25},
        >>> )
    """
    options = options or {}
    seed: Optional[int] = options.get("seed")
    frac_random: float = options.get("frac_random", 0.6)
    if not 0 < frac_random < 1:
        raise ValueError(
            f"frac_random must take on values in (0,1). Value: {frac_random}")

    # compute maximizer of the current value function
    value_function = _get_value_function(
        model=current_model,
        objective=acq_function.objective,
        sampler=getattr(acq_function, "sampler", None),
        project=getattr(acq_function, "project", None),
    )
    from botorch.optim.optimize import optimize_acqf

    fantasy_cands, fantasy_vals = optimize_acqf(
        acq_function=value_function,
        bounds=bounds,
        q=1,
        num_restarts=options.get("num_inner_restarts", 20),
        raw_samples=options.get("raw_inner_samples", 1024),
        return_best_only=False,
        options={
            k: v
            for k, v in options.items()
            if k not in ("frac_random", "num_inner_restarts",
                         "raw_inner_samples", "eta")
        },
    )

    batch_shape = acq_function.model.batch_shape
    # sampling from the optimizers
    n_value = int((1 - frac_random) * raw_samples)  # number of non-random ICs
    if n_value > 0:
        eta = options.get("eta", 2.0)
        weights = torch.exp(eta * standardize(fantasy_vals))
        idx = batched_multinomial(
            weights=weights.expand(*batch_shape, -1),
            num_samples=n_value,
            replacement=True,
        ).permute(-1, *range(len(batch_shape)))
        resampled = fantasy_cands[idx]
    else:
        resampled = torch.empty(0,
                                *batch_shape,
                                1,
                                bounds.shape[-1],
                                dtype=bounds.dtype)
    # add qMC samples
    randomized = draw_sobol_samples(bounds=bounds,
                                    n=raw_samples - n_value,
                                    q=1,
                                    batch_shape=batch_shape,
                                    seed=seed)
    # full set of raw samples
    X_rnd = torch.cat([resampled, randomized], dim=0)

    # evaluate the raw samples
    with torch.no_grad():
        Y_rnd = acq_function(X_rnd)

    # select the restart points using the heuristic
    return initialize_q_batch(X=X_rnd,
                              Y=Y_rnd,
                              n=num_restarts,
                              eta=options.get("eta", 2.0))