def __init__( self, d: int, seed: Optional[int] = None, inv_transform: bool = False ) -> None: r"""Engine for drawing qMC samples from a multivariate normal `N(0, I_d)`. Args: d: The dimension of the samples. seed: The seed with which to seed the random number generator of the underlying SobolEngine. inv_transform: If True, use inverse transform instead of Box-Muller. """ self._d = d self._seed = seed self._inv_transform = inv_transform if inv_transform: sobol_dim = d else: # to apply Box-Muller, we need an even number of dimensions sobol_dim = 2 * math.ceil(d / 2) self._sobol_engine = SobolEngine(dimension=sobol_dim, scramble=True, seed=seed)
class NormalQMCEngine: r"""Engine for qMC sampling from a Multivariate Normal `N(0, I_d)`. By default, this implementation uses Box-Muller transformed Sobol samples following pg. 123 in [Pages2018numprob]_. To use the inverse transform instead, set `inv_transform=True`. Example: >>> engine = NormalQMCEngine(3) >>> samples = engine.draw(10) """ def __init__( self, d: int, seed: Optional[int] = None, inv_transform: bool = False ) -> None: r"""Engine for drawing qMC samples from a multivariate normal `N(0, I_d)`. Args: d: The dimension of the samples. seed: The seed with which to seed the random number generator of the underlying SobolEngine. inv_transform: If True, use inverse transform instead of Box-Muller. """ self._d = d self._seed = seed self._inv_transform = inv_transform if inv_transform: sobol_dim = d else: # to apply Box-Muller, we need an even number of dimensions sobol_dim = 2 * math.ceil(d / 2) self._sobol_engine = SobolEngine(dimension=sobol_dim, scramble=True, seed=seed) def draw( self, n: int = 1, out: Optional[Tensor] = None, dtype: torch.dtype = torch.float ) -> Optional[Tensor]: r"""Draw `n` qMC samples from the standard Normal. Args: n: The number of samples to draw. out: An option output tensor. If provided, draws are put into this tensor, and the function returns None. dtype: The desired torch data type (ignored if `out` is provided). Returns: A `n x d` tensor of samples if `out=None` and `None` otherwise. """ # get base samples samples = self._sobol_engine.draw(n, dtype=dtype) if self._inv_transform: # apply inverse transform (values to close to 0/1 result in inf values) v = 0.5 + (1 - 1e-10) * (samples - 0.5) samples_tf = torch.erfinv(2 * v - 1) * math.sqrt(2) else: # apply Box-Muller transform (note: [1] indexes starting from 1) even = torch.arange(0, samples.shape[-1], 2) Rs = (-2 * torch.log(samples[:, even])).sqrt() thetas = 2 * math.pi * samples[:, 1 + even] cos = torch.cos(thetas) sin = torch.sin(thetas) samples_tf = torch.stack([Rs * cos, Rs * sin], -1).reshape(n, -1) # make sure we only return the number of dimension requested samples_tf = samples_tf[:, : self._d] if out is None: return samples_tf else: out.copy_(samples_tf)
def _create_candidates(self, n_cand, batch_size, X, fX, length, hypers): """Generate candidates assuming X has been scaled to [0,1]^d.""" # Pick the center as the point with the smallest function values # NOTE: This may not be robust to noise, in which case the posterior mean of the GP can be used instead assert X.min() >= 0.0 and X.max() <= 1.0 # Standardize function values. mu, sigma = np.median(fX), fX.std() sigma = 1.0 if sigma < 1e-6 else sigma fX = (fX - mu) / sigma # Figure out what device we are running on if len(X) < self.min_cuda: device, dtype = torch.device("cpu"), torch.float64 else: device, dtype = self.device, self.dtype # We use CG + Lanczos for training if we have enough data with gpytorch.settings.max_cholesky_size(self.max_cholesky_size): X_torch = torch.tensor(X).to(device=device, dtype=dtype) y_torch = torch.tensor(fX).to(device=device, dtype=dtype) gp = train_gp(train_x=X_torch, train_y=y_torch, use_ard=self.use_ard, num_steps=self.n_training_steps, hypers=hypers) # Save state dict hypers = gp.state_dict() # Create the trust region boundaries x_center = X[fX.argmin().item(), :][None, :] weights = gp.covar_module.base_kernel.lengthscale.cpu().detach().numpy( ).ravel() weights = weights / weights.mean( ) # This will make the next line more stable weights = weights / np.prod(np.power( weights, 1.0 / len(weights))) # We now have weights.prod() = 1 lb = np.clip(x_center - weights * length / 2.0, 0.0, 1.0) ub = np.clip(x_center + weights * length / 2.0, 0.0, 1.0) # Draw a Sobolev sequence in [lb, ub] in [0, 1] seed = np.random.randint(int(1e6)) sobol = SobolEngine(self.dim, scramble=True, seed=seed) pert = sobol.draw(n_cand).to(dtype=dtype, device=device).cpu().detach().numpy() pert = lb + (ub - lb) * pert # Create a perturbation mask prob_perturb = min(20.0 / self.dim, 1.0) mask = np.random.rand(n_cand, self.dim) <= prob_perturb ind = np.where(np.sum(mask, axis=1) == 0)[0] mask[ind, np.random.randint(0, self.dim - 1, size=len(ind))] = 1 # Create candidate points X_cand = x_center.copy() * np.ones((n_cand, self.dim)) X_cand[mask] = pert[mask] # Figure out what device we are running on if len(X_cand) < self.min_cuda: device, dtype = torch.device("cpu"), torch.float64 else: device, dtype = self.device, self.dtype # We may have to move the GP to a new device gp = gp.to(dtype=dtype, device=device) # We use Lanczos for sampling if we have enough data with torch.no_grad(), gpytorch.settings.max_cholesky_size( self.max_cholesky_size): X_cand_torch = torch.tensor(X_cand).to(device=device, dtype=dtype) y_cand = gp.likelihood(gp(X_cand_torch)).sample( torch.Size([batch_size])).t().cpu().detach().numpy() # Remove the torch variables del X_torch, y_torch, X_cand_torch, gp # De-standardize the sampled values y_cand = mu + sigma * y_cand return X_cand, y_cand, hypers
class NormalQMCEngine: r"""Engine for qMC sampling from a Multivariate Normal `N(0, I_d)`. By default, this implementation uses Box-Muller transformed Sobol samples following pg. 123 in [Pages2018numprob]_. To use the inverse transform instead, set `inv_transform=True`. Example: >>> engine = NormalQMCEngine(3) >>> samples = engine.draw(10) """ def __init__(self, d: int, seed: Optional[int] = None, inv_transform: bool = False) -> None: r"""Engine for drawing qMC samples from a multivariate normal `N(0, I_d)`. Args: d: The dimension of the samples. seed: The seed with which to seed the random number generator of the underlying SobolEngine. inv_transform: If True, use inverse transform instead of Box-Muller. """ self._d = d self._seed = seed self._inv_transform = inv_transform if inv_transform: sobol_dim = d else: # to apply Box-Muller, we need an even number of dimensions sobol_dim = 2 * math.ceil(d / 2) self._sobol_engine = SobolEngine(dimension=sobol_dim, scramble=True, seed=seed) def draw(self, n: int = 1, out: Optional[Tensor] = None, dtype: torch.dtype = torch.float) -> Optional[Tensor]: r"""Draw `n` qMC samples from the standard Normal. Args: n: The number of samples to draw. out: An option output tensor. If provided, draws are put into this tensor, and the function returns None. dtype: The desired torch data type (ignored if `out` is provided). Returns: A `n x d` tensor of samples if `out=None` and `None` otherwise. """ # get base samples samples = self._sobol_engine.draw(n, dtype=dtype) if self._inv_transform: # apply inverse transform (values to close to 0/1 result in inf values) v = 0.5 + (1 - torch.finfo(samples.dtype).eps) * (samples - 0.5) samples_tf = torch.erfinv(2 * v - 1) * math.sqrt(2) else: # apply Box-Muller transform (note: [1] indexes starting from 1) even = torch.arange(0, samples.shape[-1], 2) Rs = (-2 * torch.log(samples[:, even])).sqrt() thetas = 2 * math.pi * samples[:, 1 + even] cos = torch.cos(thetas) sin = torch.sin(thetas) samples_tf = torch.stack([Rs * cos, Rs * sin], -1).reshape(n, -1) # make sure we only return the number of dimension requested samples_tf = samples_tf[:, :self._d] if out is None: return samples_tf else: out.copy_(samples_tf)
class MACEBO(AbstractOptimizer): # Unclear what is best package to list for primary_import here. primary_import = "bayesmark" def __init__(self, api_config, model_name='gpy'): AbstractOptimizer.__init__(self, api_config) self.api_config = api_config self.space = self.parse_space(api_config) self.X = pd.DataFrame(columns=self.space.para_names) self.y = np.zeros((0, 1)) self.model_name = model_name for k in api_config: print(k, api_config[k]) self.sobol = SobolEngine(self.space.num_paras, scramble=False) def filter(self, y: torch.Tensor) -> [bool]: if not (np.all(y.numpy() > 0) and (y.max() / y.min() > 20)): return [True for _ in range(y.shape[0])], np.inf else: data = y.numpy().reshape(-1) quant = min(data.min() * 20, np.quantile(data, 0.95, interpolation='lower')) return (data <= quant).tolist(), quant def quasi_sample(self, n): samp = self.sobol.draw(n) # samp = torch.FloatTensor(lhs(self.space.num_paras, n)) samp = samp * (self.space.opt_ub - self.space.opt_lb) + self.space.opt_lb x = samp[:, :self.space.num_numeric] xe = samp[:, self.space.num_numeric:] df_samp = self.space.inverse_transform(x, xe) return df_samp def parse_space(self, api_config): space = DesignSpace() params = [] for param_name in api_config: param_conf = api_config[param_name] param_type = param_conf['type'] param_space = param_conf.get('space', None) param_range = param_conf.get("range", None) param_values = param_conf.get("values", None) bo_param_conf = {'name': param_name} if param_type == 'int': # ignore 'log' space # TODO: support log-scale int bo_param_conf['type'] = 'int' bo_param_conf['lb'] = param_range[0] bo_param_conf['ub'] = param_range[1] elif param_type == 'bool': bo_param_conf['type'] = 'bool' elif param_type in ('cat', 'ordinal'): bo_param_conf['type'] = 'cat' bo_param_conf['categories'] = list(set(param_values)) elif param_type == 'real': if param_space in ('log', 'logit'): bo_param_conf['type'] = 'pow' bo_param_conf['base'] = 10 bo_param_conf['lb'] = param_range[0] bo_param_conf['ub'] = param_range[1] else: bo_param_conf['type'] = 'num' bo_param_conf['lb'] = param_range[0] bo_param_conf['ub'] = param_range[1] else: assert False, "type %s not handled in API" % param_type params.append(bo_param_conf) print(params) space.parse(params) return space @property def model_config(self): if self.model_name == 'gp': cfg = { 'lr': 0.01, 'num_epochs': 100, 'verbose': True, 'noise_lb': 8e-4, 'pred_likeli': False } elif self.model_name == 'gpy': cfg = {'verbose': False, 'warp': True, 'space': self.space} elif self.model_name == 'gpy_mlp': cfg = {'verbose': False} elif self.model_name == 'rf': cfg = {'n_estimators': 20} else: cfg = {} if self.space.num_categorical > 0: cfg['num_uniqs'] = [ len(self.space.paras[name].categories) for name in self.space.enum_names ] return cfg def suggest(self, n_suggestions=1): if self.X.shape[0] < 4 * n_suggestions: df_suggest = self.quasi_sample(n_suggestions) x_guess = [] for i, row in df_suggest.iterrows(): x_guess.append(row.to_dict()) else: X, Xe = self.space.transform(self.X) try: if self.y.min() <= 0: y = torch.FloatTensor( power_transform(self.y / self.y.std(), method='yeo-johnson')) else: y = torch.FloatTensor( power_transform(self.y / self.y.std(), method='box-cox')) if y.std() < 0.5: y = torch.FloatTensor( power_transform(self.y / self.y.std(), method='yeo-johnson')) if y.std() < 0.5: raise RuntimeError('Power transformation failed') model = get_model(self.model_name, self.space.num_numeric, self.space.num_categorical, 1, **self.model_config) model.fit(X, Xe, y) except: print('Error fitting GP') y = torch.FloatTensor(self.y).clone() filt, q = self.filter(y) print('Q = %g, kept = %d/%d' % (q, y.shape[0], self.y.shape[0])) X = X[filt] Xe = Xe[filt] y = y[filt] model = get_model(self.model_name, self.space.num_numeric, self.space.num_categorical, 1, **self.model_config) model.fit(X, Xe, y) print('Noise level: %g' % model.noise, flush=True) best_id = np.argmin(self.y.squeeze()) best_x = self.X.iloc[[best_id]] best_y = y.min() py_best, ps2_best = model.predict(*self.space.transform(best_x)) py_best = py_best.detach().numpy().squeeze() ps_best = ps2_best.sqrt().detach().numpy().squeeze() # XXX: minimize (mu, -1 * sigma) # s.t. LCB < best_y iter = max(1, self.X.shape[0] // n_suggestions) upsi = 0.5 delta = 0.01 kappa = np.sqrt( upsi * 2 * np.log(iter**(2.0 + self.X.shape[1] / 2.0) * 3 * np.pi**2 / (3 * delta))) acq = MACE(model, py_best, kappa=kappa) # LCB < py_best mu = Mean(model) sig = Sigma(model, linear_a=-1.) opt = EvolutionOpt(self.space, acq, pop=100, iters=100, verbose=True) rec = opt.optimize(initial_suggest=best_x).drop_duplicates() rec = rec[self.check_unique(rec)] cnt = 0 while rec.shape[0] < n_suggestions: rand_rec = self.quasi_sample(n_suggestions - rec.shape[0]) rand_rec = rand_rec[self.check_unique(rand_rec)] rec = rec.append(rand_rec, ignore_index=True) cnt += 1 if cnt > 3: break if rec.shape[0] < n_suggestions: rand_rec = self.quasi_sample(n_suggestions - rec.shape[0]) rec = rec.append(rand_rec, ignore_index=True) select_id = np.random.choice(rec.shape[0], n_suggestions, replace=False).tolist() x_guess = [] with torch.no_grad(): py_all = mu(*self.space.transform(rec)).squeeze().numpy() ps_all = -1 * sig(*self.space.transform(rec)).squeeze().numpy() best_pred_id = np.argmin(py_all) best_unce_id = np.argmax(ps_all) if best_unce_id not in select_id and n_suggestions > 2: select_id[0] = best_unce_id if best_pred_id not in select_id and n_suggestions > 2: select_id[1] = best_pred_id rec_selected = rec.iloc[select_id].copy() py, ps2 = model.predict(*self.space.transform(rec_selected)) rec_selected['py'] = py.squeeze().numpy() rec_selected['ps'] = ps2.sqrt().squeeze().numpy() print(rec_selected) print('Best y is %g %g %g %g' % (self.y.min(), best_y, py_best, ps_best), flush=True) for idx in select_id: x_guess.append(rec.iloc[idx].to_dict()) for rec in x_guess: for name in rec: if self.api_config[name]['type'] == 'int': rec[name] = int(rec[name]) return x_guess def check_unique(self, rec: pd.DataFrame) -> [bool]: return (~pd.concat([self.X, rec], axis=0).duplicated().tail( rec.shape[0]).values).tolist() def observe(self, X, y): """Feed an observation back. Parameters ---------- X : list of dict-like Places where the objective function has already been evaluated. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. y : array-like, shape (n,) Corresponding values where objective has been evaluated """ # Random search so don't do anything y = np.array(y).reshape(-1) valid_id = np.where(np.isfinite(y))[0].tolist() XX = [X[idx] for idx in valid_id] yy = y[valid_id].reshape(-1, 1) self.X = self.X.append(XX, ignore_index=True) self.y = np.vstack([self.y, yy]) print(yy)
def generate_batch( state, model, # GP model X, # Evaluated points on the domain [0, 1]^d Y, # Function values batch_size, n_candidates=None, # Number of candidates for Thompson sampling num_restarts=10, raw_samples=512, acqf="ts", # "ei" or "ts" deup=False, turbo=True, ): dim = X.shape[-1] assert acqf in ("ts", "ei") assert X.min() >= 0.0 and X.max() <= 1.0 and torch.all(torch.isfinite(Y)) if n_candidates is None: n_candidates = min(5000, max(2000, 200 * X.shape[-1])) # Scale the TR to be proportional to the lengthscales x_center = X[Y.argmax(), :].clone() if not deup: weights = model.covar_module.base_kernel.lengthscale.squeeze().detach() else: weights = model.f_predictor.covar_module.base_kernel.lengthscale.squeeze( ).detach() weights = weights / weights.mean() weights = weights / torch.prod(weights.pow(1.0 / len(weights))) tr_lb = torch.clamp(x_center - weights * state.length / 2.0, 0.0, 1.0) tr_ub = torch.clamp(x_center + weights * state.length / 2.0, 0.0, 1.0) if not turbo: tr_lb = torch.zeros(dim) tr_ub = torch.ones(dim) if acqf == "ts": sobol = SobolEngine(dim, scramble=True) pert = sobol.draw(n_candidates).to(dtype=dtype, device=device) pert = tr_lb + (tr_ub - tr_lb) * pert # Create a perturbation mask prob_perturb = min(20.0 / dim, 1.0) mask = (torch.rand(n_candidates, dim, dtype=dtype, device=device) <= prob_perturb) ind = torch.where(mask.sum(dim=1) == 0)[0] mask[ind, torch.randint(0, dim - 1, size=(len(ind), ), device=device)] = 1 # Create candidate points from the perturbations and the mask X_cand = x_center.expand(n_candidates, dim).clone() X_cand[mask] = pert[mask] # Sample on the candidate points thompson_sampling = MaxPosteriorSampling(model=model, replacement=False) X_next = thompson_sampling(X_cand, num_samples=batch_size) elif acqf == "ei": if batch_size > 1: ei = qExpectedImprovement(model, Y.max(), maximize=True) else: ei = ExpectedImprovement(model, Y.max(), maximize=True) try: X_next, acq_value = optimize_acqf( ei, bounds=torch.stack([tr_lb, tr_ub]), q=batch_size, num_restarts=num_restarts, raw_samples=raw_samples, ) except NotPSDError: sobol = SobolEngine(dim, scramble=True) pert = sobol.draw(batch_size).to(dtype=dtype, device=device) pert = tr_lb + (tr_ub - tr_lb) * pert X_next = pert print( 'Warning: NotPSDError, using {} purely random candidates for this step' .format(batch_size)) return X_next
def get_initial_points(dim, n_pts): sobol = SobolEngine(dimension=dim, scramble=True) X_init = sobol.draw(n=n_pts).to(dtype=dtype, device=device) return X_init