def test_set_transformed_inputs(self): for dtype in (torch.float, torch.double): train_x = torch.rand(5, 1, dtype=dtype, device=self.device) train_y = torch.rand(5, 1, dtype=dtype, device=self.device) tf = Normalize( d=1, bounds=torch.tensor([[0.0], [2.0]], dtype=dtype, device=self.device), transform_on_preprocess=False, ) model = SingleTaskGP(train_x, train_y, input_transform=tf) self.assertTrue(torch.equal(model.train_inputs[0], train_x)) mll = ExactMarginalLogLikelihood(model.likelihood, model) # check that input transform is only applied when the transform # is a transform_on_preprocess is True self.assertTrue(torch.equal(model.train_inputs[0], train_x)) tf.transform_on_preprocess = True _set_transformed_inputs(mll) self.assertTrue(torch.equal(model.train_inputs[0], tf(train_x))) model.eval() # test no set_train_data method mock_model = MockGP(MockPosterior()) mock_model.train_inputs = (train_x,) mock_model.likelihood = model.likelihood mock_model.input_transform = tf mll = ExactMarginalLogLikelihood(mock_model.likelihood, mock_model) with self.assertRaises(BotorchError): _set_transformed_inputs(mll)
class GaussianProcess(object): def __init__(self, dx, param_normalizer, *args, **kwargs): print(dx) self.param_normalizer = param_normalizer self.data_normalizer = normalization_tools.Standardizer() self.gp = None def fit(self, x_train, y_train): # normalize parameter (=input) data x_train_norm = self.param_normalizer.project_to(x_train) # normalize the data y_train_norm = self.data_normalizer.standardize(y_train) self.gp = SingleTaskGP(x_train_norm, y_train_norm) self.gp.likelihood.noise_covar.register_constraint( "raw_noise", GreaterThan(1e-5)) mll = ExactMarginalLogLikelihood(self.gp.likelihood, self.gp) fit_gpytorch_model(mll) return self.gp def predict(self, x): x_norm = self.param_normalizer.project_to(x) self.gp.eval() self.gp.likelihood.eval() with torch.set_grad_enabled(False): pred = self.gp(x_norm) return self.data_normalizer.unstandardize(pred.mean.view( -1, 1)), self.data_normalizer.unstandardize_wo_mean(pred.variance)
def bo_loop(gp_model: SingleTaskGP, acq_func_id: str, acq_func_kwargs: Dict[str, Any], acq_func_opt_kwargs: Dict[str, Any], bounds: Tensor, tkwargs: Dict[str, Any], q: int, num_restarts: int, raw_initial_samples, seed: int, num_MC_sample_acq: int) -> Iterable[Any]: # seed everything np.random.seed(seed) torch.manual_seed(seed) # put on proper device # we want to maximize fmax = torch.quantile(gp_model.train_targets, .9).item() print(f"Using good point cutoff {fmax:.2f}") device = gp_model.train_inputs[0].device bounds = bounds.to(**tkwargs) gp_model.eval() acq_func_kwargs['best_f'] = fmax acq_func = query_acq_func(acq_func_id=acq_func_id, acq_func_kwargs=acq_func_kwargs, gp_model=gp_model, q=q, num_MC_samples_acq=num_MC_sample_acq ) # if q is 1 use analytic acquisitions acq_func.to(**tkwargs) options = { 'batch_limit': 100 } if acq_func_opt_kwargs == {} else acq_func_opt_kwargs print("Start acquisition function optimization...") if q == 1: # use optimize_acq (with LBFGS) candidate, acq_value = optimize_acqf(acq_function=acq_func, bounds=bounds, q=q, num_restarts=num_restarts, raw_samples=raw_initial_samples, return_best_only=True, options=options) else: candidate, acq_value = optimize_acqf_torch( acq_function=acq_func, bounds=bounds, q=q, num_restarts=num_restarts, raw_samples=raw_initial_samples, return_best_only=True, options=options, ) print(f"Acquired {candidate} with acquisition value {acq_value}") return candidate.to(device=device)
def update(self, inputs, targets, *args, **kwargs): inputs = inputs.view(-1, self.input_dim) targets = targets.view(-1, self.target_dim) self._raw_inputs = [torch.cat([*self._raw_inputs, inputs], dim=-2)] self._raw_targets = torch.cat([self._raw_targets, targets], dim=-2) for i in range(inputs.shape[-2]): new_x = self.stem(inputs[i].unsqueeze(0)) new_y = targets[i].unsqueeze(0) _, ranked_models = torch.sort(self._construct_weights(new_x), dim=0, descending=True) num_candidates = math.ceil(len(self.models) / 2) assignment = None for model_idx in ranked_models[:num_candidates]: num_data = self.models[model_idx].train_targets.size(-1) if num_data >= self.max_data_per_model: continue else: assignment = model_idx.squeeze(-1) ###################### # dummy to init caches self.models[assignment](new_x) ###################### new_model = self.models[ assignment].condition_on_observations(new_x, new_y) self.models[assignment] = new_model self.update_model_caches() break if assignment is None: print("Adding new model") assignment = torch.tensor((len(self.models), ), device=new_x.device) new_model = SingleTaskGP(new_x, new_y, covar_module=self.covar_module) new_model.likelihood.initialize(noise=self.noise) new_model.eval() self.models.append(new_model) self.update_model_caches() self._assignments = torch.cat([self._assignments, assignment]) self.train() features = self._refresh_features() train_dist = self(features) loss = -self.mll(train_dist, [m.train_targets for m in self.models]) loss.backward() self.optimizer.step() gp_loss = stem_loss = loss.item() return gp_loss, stem_loss
def gp_fit_test(x_train: Tensor, y_train: Tensor, error_train: Tensor, x_test: Tensor, y_test: Tensor, error_test: Tensor, gp_obj_model: SingleTaskGP, gp_error_model: SingleTaskGP, tkwargs: Dict[str, Any], gp_test_folder: str, obj_out_wp: bool = False, err_out_wp: bool = False) -> None: """ 1) Estimates mean test error between predicted and the true objective function values. 2) Estimates mean test error between predicted recon. error by the gp_model and the true recon. error of the vae_model. :param x_train: normalised points at which the gps were trained :param y_train: objective value function corresponding to x_train that were used as targets of `gp_obj_model` :param error_train: reconstruction error value at points x_train that were used as targets of `gp_error_model` :param x_test: normalised test points :param y_test: objective value function corresponding to x_test :param error_test: reconstruction error at test points :param gp_obj_model: the gp model trained to predict the black box objective function values :param gp_error_model: the gp model trained to predict reconstruction error :param tkwargs: dict of type and device :param gp_test_folder: folder to save test results :param obj_out_wp: if the `gp_obj_model` was trained with output warping then need to apply the same transform :param err_out_wp: if the `gp_error_model` was trained with output warping then need to apply the same transform :return: (Sum_i||true_y_i - pred_y_i||^2 / n_points, Sum_i||true_recon_i - pred_recon_i||^2 / n_points) """ do_robust = True if gp_error_model is not None else False if not os.path.exists(gp_test_folder): os.mkdir(gp_test_folder) gp_obj_model.eval() gp_obj_model.to(tkwargs['device']) y_train = y_train.view(-1) if do_robust: gp_error_model.eval() gp_error_model.to(tkwargs['device']) error_train = error_train.view(-1) with torch.no_grad(): if obj_out_wp: Y_numpy = y_train.cpu().numpy() if Y_numpy.min() <= 0: y_train = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')) else: y_train = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='box-cox')) if y_train.std() < 0.5: Y_numpy = y_train.numpy() y_train = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')).to(x_train) Y_numpy = y_test.cpu().numpy() if Y_numpy.min() <= 0: y_test = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')) else: y_test = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='box-cox')) if y_test.std() < 0.5: Y_numpy = y_test.numpy() y_test = torch.FloatTensor( power_transform(Y_numpy / Y_numpy.std(), method='yeo-johnson')).to(x_test) y_train = y_train.view(-1).to(**tkwargs) y_test = y_test.view(-1).to(**tkwargs) gp_obj_val_model_mse_train = ( gp_obj_model.posterior(x_train).mean.view(-1) - y_train).pow(2).div(len(y_train)) gp_obj_val_model_mse_test = ( gp_obj_model.posterior(x_test).mean.view(-1) - y_test).pow(2).div( len(y_test)) torch.save( gp_obj_val_model_mse_train, os.path.join(gp_test_folder, 'gp_obj_val_model_mse_train.npz')) torch.save(gp_obj_val_model_mse_test, os.path.join(gp_test_folder, 'gp_obj_val_model_test.npz')) print( f'GP training fit on objective value: MSE={gp_obj_val_model_mse_train.sum().item():.5f}' ) print( f'GP testing fit on objective value: MSE={gp_obj_val_model_mse_test.sum().item():.5f}' ) if do_robust: if err_out_wp: error_train = error_train.view(-1, 1) R_numpy = error_train.cpu().numpy() if R_numpy.min() <= 0: error_train = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')) else: error_train = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='box-cox')) if error_train.std() < 0.5: R_numpy = error_train.numpy() error_train = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')).to(x_train) R_numpy = error_test.cpu().numpy() if R_numpy.min() <= 0: error_test = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')) else: error_test = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='box-cox')) if error_test.std() < 0.5: R_numpy = error_test.numpy() error_test = torch.FloatTensor( power_transform(R_numpy / R_numpy.std(), method='yeo-johnson')).to(x_test) error_train = error_train.view(-1).to(**tkwargs) error_test = error_test.view(-1).to(**tkwargs) pred_recon_train = gp_error_model.posterior(x_train).mean.view(-1) pred_recon_test = gp_error_model.posterior(x_test).mean.view(-1) gp_error_model_mse_train = (error_train - pred_recon_train).pow(2).div( len(error_train)) gp_error_model_mse_test = (error_test - pred_recon_test).pow(2).div( len(error_test)) torch.save( gp_error_model_mse_train, os.path.join(gp_test_folder, 'gp_error_model_mse_train.npz')) torch.save( gp_error_model_mse_test, os.path.join(gp_test_folder, 'gp_error_model_mse_test.npz')) print( f'GP training fit on reconstruction errors: MSE={gp_error_model_mse_train.sum().item():.5f}' ) print( f'GP testing fit on reconstruction errors: MSE={gp_error_model_mse_test.sum().item():.5f}' ) torch.save(error_test, os.path.join(gp_test_folder, f"true_rec_err_z.pt")) torch.save(error_train, os.path.join(gp_test_folder, f"error_train.pt")) torch.save(x_train, os.path.join(gp_test_folder, f"train_x.pt")) torch.save(x_test, os.path.join(gp_test_folder, f"test_x.pt")) torch.save(y_train, os.path.join(gp_test_folder, f"y_train.pt")) torch.save(x_test, os.path.join(gp_test_folder, f"X_test.pt")) torch.save(y_test, os.path.join(gp_test_folder, f"y_test.pt")) # y plots plt.hist(y_train.cpu().numpy(), bins=100, label='y train', alpha=0.5, density=True) plt.hist(gp_obj_model.posterior(x_train).mean.view( -1).detach().cpu().numpy(), bins=100, label='y pred', alpha=0.5, density=True) plt.legend() plt.title('Training set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train.pdf')) plt.close() plt.hist(gp_obj_val_model_mse_train.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_obj_val model on training set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train_mse.pdf')) plt.close() plt.hist(y_test.cpu().numpy(), bins=100, label='y true', alpha=0.5, density=True) plt.hist(gp_obj_model.posterior(x_test).mean.detach().cpu().numpy(), bins=100, alpha=0.5, label='y pred', density=True) plt.legend() plt.title('Validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test.pdf')) plt.close() plt.hist(gp_obj_val_model_mse_test.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_obj_val model on validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test_mse.pdf')) plt.close() if do_robust: # error plots plt.hist(error_train.cpu().numpy(), bins=100, label='error train', alpha=0.5, density=True) plt.hist( gp_error_model.posterior(x_train).mean.detach().cpu().numpy(), bins=100, label='error pred', alpha=0.5, density=True) plt.legend() plt.title('Training set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_train.pdf')) plt.close() plt.hist(gp_error_model_mse_train.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_error model on training set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_train_mse.pdf')) plt.close() plt.hist(error_test.cpu().numpy(), bins=100, label='error true', alpha=0.5, density=True) plt.hist( gp_error_model.posterior(x_test).mean.detach().cpu().numpy(), bins=100, alpha=0.5, label='error pred', density=True) plt.legend() plt.title('Validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_test.pdf')) plt.close() plt.hist(gp_error_model_mse_test.detach().cpu().numpy(), bins=100, alpha=0.5, density=True) plt.title('MSE of gp_error model on validation set') plt.savefig(os.path.join(gp_test_folder, 'gp_error_test_mse.pdf')) plt.close() # y-error plots y_train_sorted, indices_train = torch.sort(y_train) error_train_sorted = error_train[indices_train] gp_y_train_pred_sorted, indices_train_pred = torch.sort( gp_obj_model.posterior(x_train).mean.view(-1)) gp_r_train_pred_sorted = (gp_error_model.posterior( x_train).mean.view(-1))[indices_train_pred] plt.scatter(y_train_sorted.cpu().numpy(), error_train_sorted.cpu().numpy(), label='true', marker='+') plt.scatter(gp_y_train_pred_sorted.detach().cpu().numpy(), gp_r_train_pred_sorted.detach().cpu().numpy(), label='pred', marker='*') plt.xlabel('y train targets') plt.ylabel('recon. error train targets') plt.title('y_train vs. error_train') plt.legend() plt.savefig( os.path.join(gp_test_folder, 'scatter_obj_error_train.pdf')) plt.close() y_test_std_sorted, indices_test = torch.sort(y_test) error_test_sorted = error_test[indices_test] gp_y_test_pred_sorted, indices_test_pred = torch.sort( gp_obj_model.posterior(x_test).mean.view(-1)) gp_r_test_pred_sorted = (gp_error_model.posterior( x_test).mean.view(-1))[indices_test_pred] plt.scatter(y_test_std_sorted.cpu().numpy(), error_test_sorted.cpu().numpy(), label='true', marker='+') plt.scatter(gp_y_test_pred_sorted.detach().cpu().numpy(), gp_r_test_pred_sorted.detach().cpu().numpy(), label='pred', marker='*') plt.xlabel('y test targets') plt.ylabel('recon. error test targets') plt.title('y_test vs. error_test') plt.legend() plt.savefig( os.path.join(gp_test_folder, 'scatter_obj_error_test.pdf')) plt.close() # error var plots error_train_sorted, indices_train_pred = torch.sort(error_train) # error_train_sorted = error_train # indices_train_pred = np.arange(len(error_train)) gp_r_train_pred_sorted = gp_error_model.posterior( x_train).mean[indices_train_pred].view(-1) gp_r_train_pred_std_sorted = gp_error_model.posterior( x_train).variance.view(-1).sqrt()[indices_train_pred] plt.scatter(np.arange(len(indices_train_pred)), error_train_sorted.cpu().numpy(), label='err true', marker='+', color='C1', s=15) plt.errorbar( np.arange(len(indices_train_pred)), gp_r_train_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_r_train_pred_std_sorted.detach().cpu().numpy().flatten( ), fmt='*', alpha=0.05, label='err pred', color='C0', ecolor='C0') plt.scatter(np.arange(len(indices_train_pred)), gp_r_train_pred_sorted.detach().cpu().numpy(), marker='*', alpha=0.2, s=10, color='C0') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_r_train_pred_sorted + gp_r_train_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_r_train_pred_sorted - gp_r_train_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean-std', marker='.') plt.legend() plt.title('error predictions and uncertainty on train set') plt.savefig( os.path.join(gp_test_folder, 'gp_error_train_uncertainty.pdf')) plt.close() error_test_sorted, indices_test_pred = torch.sort(error_test) # error_test_sorted = error_test # indices_test_pred = np.arange(len(error_test_sorted)) gp_r_test_pred_sorted = gp_error_model.posterior(x_test).mean.view( -1)[indices_test_pred] gp_r_test_pred_std_sorted = gp_error_model.posterior( x_test).variance.view(-1).sqrt()[indices_test_pred] plt.scatter(np.arange(len(indices_test_pred)), error_test_sorted.cpu().numpy(), label='err true', marker='+', color='C1', s=15) plt.errorbar( np.arange(len(indices_test_pred)), gp_r_test_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_r_test_pred_std_sorted.detach().cpu().numpy().flatten( ), marker='*', alpha=0.05, label='err pred', color='C0', ecolor='C0') plt.scatter(np.arange(len(indices_test_pred)), gp_r_test_pred_sorted.detach().cpu().numpy().flatten(), marker='*', color='C0', alpha=0.2, s=10) # plt.scatter(np.arange(len(indices_test_pred)), # (gp_r_test_pred_sorted + gp_r_test_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_test_pred)), # (gp_r_test_pred_sorted - gp_r_test_pred_std_sorted).detach().cpu().numpy(), # label='err pred mean-std', marker='.') plt.legend() plt.title('error predictions and uncertainty on test set') plt.savefig( os.path.join(gp_test_folder, 'gp_error_test_uncertainty.pdf')) plt.close() # y var plots y_train_std_sorted, indices_train = torch.sort(y_train) gp_y_train_pred_sorted = gp_obj_model.posterior( x_train).mean[indices_train].view(-1) gp_y_train_pred_std_sorted = gp_obj_model.posterior( x_train).variance.sqrt()[indices_train].view(-1) plt.scatter(np.arange(len(indices_train)), y_train_std_sorted.cpu().numpy(), label='y true', marker='+', color='C1', s=15) plt.scatter(np.arange(len(indices_train)), gp_y_train_pred_sorted.detach().cpu().numpy(), marker='*', alpha=0.2, s=10, color='C0') plt.errorbar( np.arange(len(indices_train)), gp_y_train_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_y_train_pred_std_sorted.detach().cpu().numpy().flatten(), fmt='*', alpha=0.05, label='y pred', color='C0', ecolor='C0') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_y_train_pred_sorted+gp_y_train_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_train_pred)), # (gp_y_train_pred_sorted-gp_y_train_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean-std', marker='.') plt.legend() plt.title('y predictions and uncertainty on train set') plt.savefig( os.path.join(gp_test_folder, 'gp_obj_val_train_uncertainty.pdf')) plt.close() y_test_std_sorted, indices_test = torch.sort(y_test) gp_y_test_pred_sorted = gp_obj_model.posterior(x_test).mean.view( -1)[indices_test] gp_y_test_pred_std_sorted = gp_obj_model.posterior( x_test).variance.view(-1).sqrt()[indices_test] plt.scatter(np.arange(len(indices_test)), y_test_std_sorted.cpu().numpy(), label='y true', marker='+', color='C1', s=15) plt.errorbar( np.arange(len(indices_test)), gp_y_test_pred_sorted.detach().cpu().numpy().flatten(), yerr=gp_y_test_pred_std_sorted.detach().cpu().numpy().flatten(), fmt='*', alpha=0.05, label='y pred', color='C0', ecolor='C0') plt.scatter(np.arange(len(indices_test)), gp_y_test_pred_sorted.detach().cpu().numpy(), marker='*', alpha=0.2, s=10, color='C0') # plt.scatter(np.arange(len(indices_test_pred)), # (gp_y_test_pred_sorted + gp_y_test_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean+std', marker='.') # plt.scatter(np.arange(len(indices_test_pred)), # (gp_y_test_pred_sorted - gp_y_test_pred_std_sorted).detach().cpu().numpy(), # label='y pred mean-std', marker='.') plt.legend() plt.title('y predictions and uncertainty on test set') plt.savefig( os.path.join(gp_test_folder, 'gp_obj_val_test_uncertainty.pdf')) plt.close()
def gp_torch_train(train_x: Tensor, train_y: Tensor, n_inducing_points: int, tkwargs: Dict[str, Any], init, scale: bool, covar_name: str, gp_file: Optional[str], save_file: str, input_wp: bool, outcome_transform: Optional[OutcomeTransform] = None, options: Dict[str, Any] = None) -> SingleTaskGP: assert train_y.ndim > 1, train_y.shape assert gp_file or init, (gp_file, init) likelihood = gpytorch.likelihoods.GaussianLikelihood() if init: # build hyp print("Initialize GP hparams...") print("Doing Kmeans init...") assert n_inducing_points > 0, n_inducing_points kmeans = MiniBatchKMeans(n_clusters=n_inducing_points, batch_size=min(10000, train_x.shape[0]), n_init=25) start_time = time.time() kmeans.fit(train_x.cpu().numpy()) end_time = time.time() print(f"K means took {end_time - start_time:.1f}s to finish...") inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy()) output_scale = None if scale: output_scale = train_y.var().item() lscales = torch.empty(1, train_x.shape[1]) for i in range(train_x.shape[1]): lscales[0, i] = torch.pdist(train_x[:, i].view( -1, 1)).median().clamp(min=0.01) base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=inducing_points, likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = CustomWarp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) else: # load model output_scale = 1 # will be overwritten when loading model lscales = torch.ones( train_x.shape[1]) # will be overwritten when loading model base_covar_module = query_covar(covar_name=covar_name, scale=scale, outputscale=output_scale, lscales=lscales) covar_module = InducingPointKernel(base_covar_module, inducing_points=torch.empty( n_inducing_points, train_x.shape[1]), likelihood=likelihood) input_warp_tf = None if input_wp: # Apply input warping # initialize input_warping transformation input_warp_tf = Warp( indices=list(range(train_x.shape[-1])), # use a prior with median at 1. # when a=1 and b=1, the Kumaraswamy CDF is the identity function concentration1_prior=LogNormalPrior(0.0, 0.75**0.5), concentration0_prior=LogNormalPrior(0.0, 0.75**0.5), ) model = SingleTaskGP(train_x, train_y, covar_module=covar_module, likelihood=likelihood, input_transform=input_warp_tf, outcome_transform=outcome_transform) print("Loading GP from file") state_dict = torch.load(gp_file) model.load_state_dict(state_dict) print("GP regression") start_time = time.time() model.to(**tkwargs) model.train() mll = ExactMarginalLogLikelihood(model.likelihood, model) # set approx_mll to False since we are using an exact marginal log likelihood # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options) fit_gpytorch_torch(mll, options=options, approx_mll=False, clip_by_value=True if input_wp else False, clip_value=10.0) end_time = time.time() print(f"Regression took {end_time - start_time:.1f}s to finish...") print("Save GP model...") torch.save(model.state_dict(), save_file) print("Done training of GP.") model.eval() return model
def render_singletask_gp( ax: [plt.Axes, Axes3D, Sequence[plt.Axes]], data_x: to.Tensor, data_y: to.Tensor, idcs_sel: list, data_x_min: to.Tensor = None, data_x_max: to.Tensor = None, x_label: str = '', y_label: str = '', z_label: str = '', min_gp_obsnoise: float = None, resolution: int = 201, num_stds: int = 2, alpha: float = 0.3, color: chr = None, curve_label: str = 'mean', heatmap_cmap: colors.Colormap = None, show_legend_posterior: bool = True, show_legend_std: bool = False, show_legend_data: bool = True, legend_data_cmap: colors.Colormap = None, colorbar_label: str = None, title: str = None, render3D: bool = True, ) -> plt.Figure: """ Fit the GP posterior to the input data and plot the mean and std as well as the data points. There are 3 options: 1D plot (infered by data dimensions), 2D plot .. note:: If you want to have a tight layout, it is best to pass axes of a figure with `tight_layout=True` or `constrained_layout=True`. :param ax: axis of the figure to plot on, only in case of a 2-dim heat map plot provide 2 axis :param data_x: data to plot on the x-axis :param data_y: data to process and plot on the y-axis :param idcs_sel: selected indices of the input data :param data_x_min: explicit minimum value for the evaluation grid, by default this value is extracted from `data_x` :param data_x_max: explicit maximum value for the evaluation grid, by default this value is extracted from `data_x` :param x_label: label for x-axis :param y_label: label for y-axis :param z_label: label for z-axis (3D plot only) :param min_gp_obsnoise: set a minimal noise value (normalized) for the GP, if `None` the GP has no measurement noise :param resolution: number of samples for the input (corresponds to x-axis resolution of the plot) :param num_stds: number of standard deviations to plot around the mean :param alpha: transparency (alpha-value) for the std area :param color: color (e.g. 'k' for black), `None` invokes the default behavior :param curve_label: label for the mean curve (1D plot only) :param heatmap_cmap: color map forwarded to `render_heatmap()` (2D plot only), `None` to use Pyrado's default :param show_legend_posterior: flag if the legend entry for the posterior should be printed (affects mean and std) :param show_legend_std: flag if a legend entry for the std area should be printed :param show_legend_data: flag if a legend entry for the individual data points should be printed :param legend_data_cmap: color map for the sampled points, default is 'binary' :param colorbar_label: label for the color bar (2D plot only) :param title: title displayed above the figure, set to `None` to suppress the title :param render3D: use 3D rendering if possible :return: handle to the resulting figure """ if data_x.ndim != 2: raise pyrado.ShapeErr( msg= "The GP's input data needs to be of shape num_samples x dim_input!" ) data_x = data_x[:, idcs_sel] # forget the rest dim_x = data_x.shape[1] # samples are along axis 0 if data_y.ndim != 2: raise pyrado.ShapeErr(given=data_y, expected_match=to.Size([data_x.shape[0], 1])) if legend_data_cmap is None: legend_data_cmap = plt.get_cmap('binary') # Project to normalized input and standardized output if data_x_min is None or data_x_max is None: data_x_min, data_x_max = to.min(data_x, dim=0)[0], to.max(data_x, dim=0)[0] data_y_mean, data_y_std = to.mean(data_y, dim=0), to.std(data_y, dim=0) data_x = (data_x - data_x_min) / (data_x_max - data_x_min) data_y = (data_y - data_y_mean) / data_y_std # Create and fit the GP model gp = SingleTaskGP(data_x, data_y) if min_gp_obsnoise is not None: gp.likelihood.noise_covar.register_constraint( 'raw_noise', GreaterThan(min_gp_obsnoise)) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) mll.train() fit_gpytorch_model(mll) print_cbt('Fitted the SingleTaskGP.', 'g') argmax_pmean_norm, argmax_pmean_val_stdzed = optimize_acqf( acq_function=PosteriorMean(gp), bounds=to.stack([to.zeros(dim_x), to.ones(dim_x)]), q=1, num_restarts=500, raw_samples=1000) # Project back argmax_posterior = argmax_pmean_norm * (data_x_max - data_x_min) + data_x_min argmax_pmean_val = argmax_pmean_val_stdzed * data_y_std + data_y_mean print_cbt( f'Converged to argmax of the posterior mean: {argmax_posterior.numpy()}', 'g') mll.eval() gp.eval() if dim_x == 1: # Evaluation grid x_grid = np.linspace(min(data_x), max(data_x), resolution, endpoint=True).flatten() x_grid = to.from_numpy(x_grid) # Mean and standard deviation of the surrogate model posterior = gp.posterior(x_grid) mean = posterior.mean.detach().flatten() std = to.sqrt(posterior.variance.detach()).flatten() # Project back from normalized input and standardized output x_grid = x_grid * (data_x_max - data_x_min) + data_x_min data_x = data_x * (data_x_max - data_x_min) + data_x_min data_y = data_y * data_y_std + data_y_mean mean = mean * data_y_std + data_y_mean std *= data_y_std # double-checked with posterior.mvn.confidence_region() # Plot the curve plt.fill_between(x_grid.numpy(), mean.numpy() - num_stds * std.numpy(), mean.numpy() + num_stds * std.numpy(), alpha=alpha, color=color) ax.plot(x_grid.numpy(), mean.numpy(), color=color) # Plot the queried data points scat_plot = ax.scatter(data_x.numpy().flatten(), data_y.numpy().flatten(), marker='o', c=np.arange(data_x.shape[0], dtype=np.int), cmap=legend_data_cmap) if show_legend_data: scat_legend = ax.legend( *scat_plot.legend_elements(fmt='{x:.0f}'), # integer formatter bbox_to_anchor=(0., 1.1, 1., -0.1), title='query points', ncol=data_x.shape[0], loc='upper center', mode='expand', borderaxespad=0., handletextpad=-0.5) ax.add_artist(scat_legend) # Increase vertical space between subplots when printing the data labels # plt.tight_layout(pad=2.) # ignore argument # plt.subplots_adjust(hspace=0.6) # Plot the argmax of the posterior mean # ax.scatter(argmax_posterior.item(), argmax_pmean_val, c='darkorange', marker='o', s=60, label='argmax') ax.axvline(argmax_posterior.item(), c='darkorange', lw=1.5, label='argmax') if show_legend_posterior: ax.add_artist(ax.legend(loc='lower right')) elif dim_x == 2: # Create mesh grid matrices from x and y vectors # x0_grid = to.linspace(min(data_x[:, 0]), max(data_x[:, 0]), resolution) # x1_grid = to.linspace(min(data_x[:, 1]), max(data_x[:, 1]), resolution) x0_grid = to.linspace(0, 1, resolution) x1_grid = to.linspace(0, 1, resolution) x0_mesh, x1_mesh = to.meshgrid([x0_grid, x1_grid]) x0_mesh, x1_mesh = x0_mesh.t(), x1_mesh.t( ) # transpose not necessary but makes identical mesh as np.meshgrid # Mean and standard deviation of the surrogate model x_test = to.stack([ x0_mesh.reshape(resolution**2, 1), x1_mesh.reshape(resolution**2, 1) ], -1).squeeze(1) posterior = gp.posterior( x_test) # identical to gp.likelihood(gp(x_test)) mean = posterior.mean.detach().reshape(resolution, resolution) std = to.sqrt(posterior.variance.detach()).reshape( resolution, resolution) # Project back from normalized input and standardized output data_x = data_x * (data_x_max - data_x_min) + data_x_min data_y = data_y * data_y_std + data_y_mean mean_raw = mean * data_y_std + data_y_mean std_raw = std * data_y_std if render3D: # Project back from normalized input and standardized output (custom for 3D) x0_mesh = x0_mesh * (data_x_max[0] - data_x_min[0]) + data_x_min[0] x1_mesh = x1_mesh * (data_x_max[1] - data_x_min[1]) + data_x_min[1] lower = mean_raw - num_stds * std_raw upper = mean_raw + num_stds * std_raw # Plot a 2D surface in 3D ax.plot_surface(x0_mesh.numpy(), x1_mesh.numpy(), mean_raw.numpy()) ax.plot_surface(x0_mesh.numpy(), x1_mesh.numpy(), lower.numpy(), color='r', alpha=alpha) ax.plot_surface(x0_mesh.numpy(), x1_mesh.numpy(), upper.numpy(), color='r', alpha=alpha) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_zlabel(z_label) # Plot the queried data points scat_plot = ax.scatter(data_x[:, 0].numpy(), data_x[:, 1].numpy(), data_y.numpy(), marker='o', c=np.arange(data_x.shape[0], dtype=np.int), cmap=legend_data_cmap) if show_legend_data: scat_legend = ax.legend( *scat_plot.legend_elements( fmt='{x:.0f}'), # integer formatter bbox_to_anchor=(0.05, 1.1, 0.95, -0.1), loc='upper center', ncol=data_x.shape[0], mode='expand', borderaxespad=0., handletextpad=-0.5) ax.add_artist(scat_legend) # Plot the argmax of the posterior mean x, y = argmax_posterior[0, 0], argmax_posterior[0, 1] ax.scatter(x, y, argmax_pmean_val, c='darkorange', marker='*', s=60) # ax.plot((x, x), (y, y), (data_y.min(), data_y.max()), c='k', ls='--', lw=1.5) else: if not len(ax) == 4: raise pyrado.ShapeErr( msg='Provide 4 axes! 2 heat maps and 2 color bars.') # Project back normalized input and standardized output (custom for 2D) x0_grid_raw = x0_grid * (data_x_max[0] - data_x_min[0]) + data_x_min[0] x1_grid_raw = x1_grid * (data_x_max[1] - data_x_min[1]) + data_x_min[1] # Plot a 2D image df_mean = pd.DataFrame(mean_raw.numpy(), columns=x0_grid_raw.numpy(), index=x1_grid_raw.numpy()) render_heatmap(df_mean, ax_hm=ax[0], ax_cb=ax[1], x_label=x_label, y_label=y_label, annotate=False, fig_canvas_title='Returns', tick_label_prec=2, add_sep_colorbar=True, cmap=heatmap_cmap, colorbar_label=colorbar_label, num_major_ticks_hm=3, num_major_ticks_cb=2, colorbar_orientation='horizontal') df_std = pd.DataFrame(std_raw.numpy(), columns=x0_grid_raw.numpy(), index=x1_grid_raw.numpy()) render_heatmap( df_std, ax_hm=ax[2], ax_cb=ax[3], x_label=x_label, y_label=y_label, annotate=False, fig_canvas_title='Standard Deviations', tick_label_prec=2, add_sep_colorbar=True, cmap=heatmap_cmap, colorbar_label=colorbar_label, num_major_ticks_hm=3, num_major_ticks_cb=2, colorbar_orientation='horizontal', norm=colors.Normalize()) # explicitly instantiate a new norm # Plot the queried data points for i in [0, 2]: scat_plot = ax[i].scatter(data_x[:, 0].numpy(), data_x[:, 1].numpy(), marker='o', s=15, c=np.arange(data_x.shape[0], dtype=np.int), cmap=legend_data_cmap) if show_legend_data: scat_legend = ax[i].legend( *scat_plot.legend_elements( fmt='{x:.0f}'), # integer formatter bbox_to_anchor=(0., 1.1, 1., 0.05), loc='upper center', ncol=data_x.shape[0], mode='expand', borderaxespad=0., handletextpad=-0.5) ax[i].add_artist(scat_legend) # Plot the argmax of the posterior mean ax[0].scatter(argmax_posterior[0, 0], argmax_posterior[0, 1], c='darkorange', marker='*', s=60) # steelblue ax[2].scatter(argmax_posterior[0, 0], argmax_posterior[0, 1], c='darkorange', marker='*', s=60) # steelblue # ax[0].axvline(argmax_posterior[0, 0], c='w', ls='--', lw=1.5) # ax[0].axhline(argmax_posterior[0, 1], c='w', ls='--', lw=1.5) # ax[2].axvline(argmax_posterior[0, 0], c='w', ls='--', lw=1.5) # ax[2].axhline(argmax_posterior[0, 1], c='w', ls='--', lw=1.5) else: raise pyrado.ValueErr(msg='Can only plot 1-dim or 2-dim data!') return plt.gcf()
bounds=torch.Tensor([[-1], [1]]), q=1, num_restarts=5, raw_samples=20, ) candidate_y = obj_noisy(candidate_x) train_x = torch.cat([train_x, candidate_x]) train_y = torch.cat([train_y, candidate_y]) model = model.condition_on_observations(X=candidate_x, Y=candidate_y) # Train GP... mll = ExactMarginalLogLikelihood(model.likelihood, model) fit_gpytorch_model(mll) # Plotting... model.eval() fig, ax = plt.subplots(1, 1, figsize=(6, 4)) plt.title(f"Bayesian Opt. without derivatives, Iteration {it}") test_x = torch.linspace(-1, 1, steps=100) with torch.no_grad(): posterior = model.posterior(test_x) # these are 2 std devs from mean lower, upper = posterior.mvn.confidence_region() ax.plot(test_x.cpu().numpy(), obj(test_x).cpu().numpy(), 'r--', label="true, noiseless objective") ax.plot(train_x.cpu().numpy(), train_y.cpu().numpy(), 'k*', alpha=0.1, label="observations")