def test_feature_flag(self): self.assertTrue(settings.fast_pred_var.is_default()) self.assertFalse(settings.fast_pred_var.on()) with settings.fast_pred_var(): self.assertFalse(settings.fast_pred_var.is_default()) self.assertTrue(settings.fast_pred_var.on()) with settings.fast_pred_var(False): self.assertFalse(settings.fast_pred_var.is_default()) self.assertFalse(settings.fast_pred_var.on())
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise=True`. """ if output_indices is None: output_indices = self._output_tasks if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X_full) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X_full) # If single-output, return the posterior of a single-output model if len(output_indices) == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-1], len(output_indices)), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def posterior( self, X: Tensor, observation_noise: bool = False, **kwargs: Any ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X) return GPyTorchPosterior(mvn=mvn)
def posterior(self, X: Tensor, observation_noise: bool = False, **kwargs: Any) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) mvn = self(X) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, observation_noise: bool = False, **kwargs: Any ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing a batch of `b` joint distributions over `q` points. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X) return GPyTorchPosterior(mvn=mvn)
def gpt_posterior_settings(): r"""Context manager for settings used for computing model posteriors.""" with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches(settings.propagate_grads.off())) yield
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the dimension of the feature space (not including task indices) and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices`. Includes measurement noise if `observation_noise=True`. """ if output_indices is None: output_indices = self._output_tasks if any(i not in self._output_tasks for i in output_indices): raise ValueError("Too many output indices") # construct evaluation X X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature) self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) mvn = self(X_full) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvn = self.likelihood(mvn, X_full) # If single-output, return the posterior of a single-output model if len(output_indices) == 1: return GPyTorchPosterior(mvn=mvn) # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*X.shape[:-1], len(output_indices)), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) return GPyTorchPosterior(mvn=mtmvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape) mvn = self(X) if observation_noise: if isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). noise = self.likelihood.noise.mean().expand(X.shape[:-1]) mvn = self.likelihood(mvn, X, noise=noise) else: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def posterior(self, X: Tensor, observation_noise: Union[bool, Tensor] = False, **kwargs: Any) -> GPyTorchPosterior: # need to override this otherwise posterior variances are shot with gpt_settings.fast_pred_var(False): return super().posterior(X=X, observation_noise=observation_noise, **kwargs)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(gpt_settings.debug(False)) es.enter_context(gpt_settings.fast_pred_var()) es.enter_context( gpt_settings.detach_test_caches( settings.propagate_grads.off())) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: lh_kwargs = [{ "noise": lh.noise.mean().expand(X.shape[:-1]) } if isinstance(lh, FixedNoiseGaussianLikelihood) else {} for lh in self.likelihood.likelihoods] mvns = [ self.likelihood_i(i, mvn, X, **lkws) for i, mvn, lkws in zip( output_indices, mvns, lh_kwargs) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns( mvns=mvns))
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. propagate_grads: If True, do not detach GPyTorch's test caches when computing of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `False`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = not kwargs.get("propagate_grads", False) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) if observation_noise: mvn = self.likelihood(mvn, X) if self._num_outputs > 1: mean_x = mvn.mean covar_x = mvn.covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def acq(fo, model, x_eval=None): model.eval() if x_eval is None: x_eval = torch.linspace(0,1,100) with torch.no_grad(), fast_pred_var(), lazily_evaluate_kernels(True): f_ = model(x_eval) mu, sig = f_.mean, f_.variance#covariance_matrix _cdf = 0.5*(1+torch.erf((fo-mu)/(torch.sqrt(sig*2.)))) _pdf = torch.exp(-(fo-mu)**2/(2*sig))/torch.sqrt(sig*2*3.141593) return (fo-mu)*_cdf + sig*_pdf
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the feature space and `q` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Defaults to `True`. Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes observation noise if `observation_noise=True`. """ self.eval() # make sure model is in eval mode detach_test_caches = kwargs.get("detach_test_caches", True) with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) mvn = self(X) mean_x = mvn.mean covar_x = mvn.covariance_matrix if self._num_outputs > 1: output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), lazify(covar_x.select(dim=output_dim_idx, index=t)), ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) return GPyTorchPosterior(mvn=mvn)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ detach_test_caches = kwargs.get("detach_test_caches", True) self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: mvns = [ self.likelihood_i(i, mvn, X) for i, mvn in zip(output_indices, mvns) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) )
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: bool = False, **kwargs: Any, ) -> GPyTorchPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A `b x q x d`-dim Tensor, where `d` is the dimension of the feature space, `q` is the number of points considered jointly, and `b` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add observation noise to the posterior. detach_test_caches: If True, detach GPyTorch test caches during computation of the posterior. Required for being able to compute derivatives with respect to training inputs at test time (used e.g. by qNoisyExpectedImprovement). Returns: A `GPyTorchPosterior` object, representing `batch_shape` joint distributions over `q` points and the outputs selected by `output_indices` each. Includes measurement noise if `observation_noise=True`. """ detach_test_caches = kwargs.get("detach_test_caches", True) self.eval() # make sure model is in eval mode with ExitStack() as es: es.enter_context(settings.debug(False)) es.enter_context(settings.fast_pred_var()) es.enter_context(settings.detach_test_caches(detach_test_caches)) if output_indices is not None: mvns = [self.forward_i(i, X) for i in output_indices] if observation_noise: mvns = [ self.likelihood_i(i, mvn, X) for i, mvn in zip(output_indices, mvns) ] else: mvns = self(*[X for _ in range(self.num_outputs)]) if observation_noise: # TODO: Allow passing in observation noise via kwarg mvns = self.likelihood(*[(mvn, X) for mvn in mvns]) if len(mvns) == 1: return GPyTorchPosterior(mvn=mvns[0]) else: return GPyTorchPosterior( mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) )
def predict(self, input): input = transform(input.reshape((-1, self.input_size)), self.input_trans) with max_preconditioner_size(10), torch.no_grad(): with max_root_decomposition_size(30), fast_pred_var(): output = self.likelihood(self.model(input)).mean output = inverse_transform(output, self.target_trans) if self.incremental: return input[..., :self.target_size] + output else: return output
def evaluate(model: SSM, outputs: Tensor, inputs: torch.Tensor, output_mean: Tensor, output_scale: Tensor, evaluator: Evaluator, experiment: Experiment, key: str, plot_outputs: bool = False) -> None: """Evaluate outputs.""" with settings.fast_pred_samples(state=True), settings.fast_pred_var(state=True): # predicted_outputs = model.predict(outputs, inputs) predicted_outputs, _ = model.forward(outputs, inputs) collapsed_predicted_outputs = approximate_with_normal(predicted_outputs) evaluator.evaluate(collapsed_predicted_outputs, outputs, output_scale) if plot_outputs: print('\n' + evaluator.last) mean = collapsed_predicted_outputs.loc.detach().numpy() scale = collapsed_predicted_outputs.scale.detach().numpy() fig = plot_pred(mean[-1].T, np.sqrt(scale[-1]).T, outputs[-1].numpy().T) fig.axes[0].set_title('{} {} {} Prediction'.format( experiment.model, experiment.dataset, key.capitalize())) fig.show() fig.savefig('{}prediction_{}.png'.format(experiment.fig_dir, key)) plt.close(fig) if 'robomove' in experiment.dataset.lower(): fig = plot_2d(mean[-1].T, outputs[-1].numpy().T) fig.axes[0].set_title('{} {} {} Prediction'.format( experiment.model, experiment.dataset, key.capitalize())) fig.show() fig.savefig('{}prediction2d_{}.png'.format(experiment.fig_dir, key)) plt.close(fig) if 'kink' in experiment.dataset.lower(): gp = model.forward_model transition = model.transitions x = torch.arange(-3, 1, 0.1) true_next_x = KinkFunction.f(x.numpy()) x = (x - output_mean) / output_scale pred_next_x = transition(gp(x.expand(1, model.dim_states, -1))) pred_next_x.loc += x mu = output_scale * pred_next_x.loc[-1, -1] + output_mean fig = plot_transition( x.numpy(), true_next_x, mu.detach().numpy(), torch.diag( pred_next_x.covariance_matrix[-1, -1]).sqrt().detach().numpy()) fig.axes[0].set_title('{} {} Learned Function'.format( experiment.model, experiment.dataset)) fig.show() fig.savefig('{}transition.png'.format(experiment.fig_dir)) plt.close(fig)
def predict(self, input): self.device = torch.device('cpu') self.model.eval().to(self.device) self.likelihood.eval().to(self.device) input = transform(torch.reshape(input, (-1, self.input_size)), self.input_trans) with max_preconditioner_size(10), torch.no_grad(): with max_root_decomposition_size(30), fast_pred_var(): output = self.likelihood(self.model(input)).mean output = inverse_transform(output[:, None], self.target_trans).squeeze() return output
def predict(self, input): self.device = torch.device('cpu') self.model.eval().to(self.device) self.likelihood.eval().to(self.device) input = transform(input.reshape((-1, self.input_size)), self.input_trans) with max_preconditioner_size(10), torch.no_grad(): with max_root_decomposition_size(30), fast_pred_var(): _input = [input for _ in range(self.target_size)] predictions = self.likelihood(*self.model(*_input)) output = torch.stack([_pred.mean for _pred in predictions]).T output = inverse_transform(output, self.target_trans).squeeze() return output
def test_cache_across_lazy_threshold(self): x = self.create_test_data() likelihood, labels = self.create_likelihood_and_labels() model = self.create_model(x, labels, likelihood) model.eval() model(x) # populate caches with settings.max_eager_kernel_size(2 * N_PTS - 1), settings.fast_pred_var(True): # now we'll cross the threshold and use lazy tensors new_x = self.create_test_data() _, new_y = self.create_likelihood_and_labels() model = model.get_fantasy_model(new_x, new_y) predicted = model(self.create_test_data()) # the main purpose of the test was to ensure there was no error, but we can verify shapes too self.assertEqual(predicted.mean.shape, torch.Size([N_PTS])) self.assertEqual(predicted.variance.shape, torch.Size([N_PTS]))
def acq(fo, model, x_eval=None): model.eval() if x_eval is None: x_eval = torch.linspace(0, 1, 100).unsqueeze(-1) print(x_eval.shape) batch_sz = 100 with torch.no_grad(), fast_pred_var(), lazily_evaluate_kernels(True): mu, sig = [], [] for i in range(0, x_eval.shape[0], batch_sz): f_ = model(x_eval[i:i + batch_sz, :]) mu.append(f_.mean[:, 0]) sig.append(f_.variance[:, 0]) #covariance_matrix mu = torch.cat(mu, 0) sig = torch.cat(sig, 0) print(mu.shape) _cdf = 0.5 * (1 + torch.erf((fo - mu) / (torch.sqrt(sig * 2.)))) _pdf = torch.exp(-(fo - mu)**2 / (2 * sig)) / torch.sqrt(sig * 2 * 3.141593) return (fo - mu) * _cdf + sig * _pdf
{'params': model.covar.parameters()}, {'params': model.mean.parameters()}, {'params': model.likelihood.parameters()}, ], lr=0.01) # "Loss" for GPs - the marginal log likelihood mll = ExactMarginalLogLikelihood(likelihood, model) training_iterations = 60 def train(): iterator = tqdm(range(training_iterations)) for i in iterator: # Zero backprop gradients optimizer.zero_grad() # Get output from model output = model(x_train) # Calc loss and backprop derivatives loss = -mll(output, y_train) loss.backward() iterator.set_postfix(loss=loss.item()) optimizer.step() train() model.eval() likelihood.eval() with torch.no_grad(), use_toeplitz(False), fast_pred_var(): preds = model(x_test) print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - y_test))))
for key in y_means: y_means[key] = y_means[key].cpu() output_dict = { "observations": { "x": train_x.cpu(), "y": train_y.cpu(), "means": y_means, "latent_y": latent_y.cpu(), }, "results": DataFrame(all_outputs), "args": args } torch.save(output_dict, args.output) if __name__ == "__main__": args = parse() use_fast_pred_var = True if not args.use_exact else False with use_toeplitz(args.toeplitz), max_cholesky_size( args.cholesky_size ), max_root_decomposition_size(args.sketch_size), cholesky_jitter( 1e-3 ), fast_pred_var( use_fast_pred_var ), fast_pred_samples( True ): main(args)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior # input transforms are applied at `posterior` in `eval` mode, and at # `model.forward()` at the training time X = self.transform_inputs(X) no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: ensure that this still works for structured noise solves. mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] # we now compute the data covariances for the training data, the testing # data, the joint covariances, and the test train cross-covariance train_train_covar = self.prediction_strategy.lik_train_train_covar.detach() base_train_train_covar = train_train_covar.lazy_tensor data_train_covar = base_train_train_covar.lazy_tensors[0] data_covar = self.covar_modules[0] data_train_test_covar = data_covar(X, train_inputs) data_test_test_covar = data_covar(X) data_joint_covar = data_train_covar.cat_rows( cross_mat=data_train_test_covar, new_mat=data_test_test_covar, ) # we detach the latents so that they don't cause gradient errors # TODO: Can we enable backprop through the latent covariances? batch_shape = data_train_test_covar.batch_shape latent_covar_list = [] for latent_covar in base_train_train_covar.lazy_tensors[1:]: if latent_covar.batch_shape != batch_shape: latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape) latent_covar_list.append(latent_covar.detach()) joint_covar = KroneckerProductLazyTensor( data_joint_covar, *latent_covar_list ) test_train_covar = KroneckerProductLazyTensor( data_train_test_covar, *latent_covar_list ) # compute the posterior variance if necessary if no_pred_variance: pred_variance = mvn.variance else: pred_variance = self.make_posterior_variances(joint_covar) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling # cloning the full covar allows backpropagation through it posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=joint_covar.clone(), output_shape=X.shape[:-1] + self.target_shape, num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
acq_value.item(), pred_rmse.item(), pred_avg_variance.item() ] print("Step RMSE: ", pred_rmse) all_outputs.append(step_output_list) start_ind = end_ind end_ind = int(end_ind + args.batch_size) output_dict = { "model_state_dict": model.cpu().state_dict(), "queried_points": { 'x': model.cpu().train_inputs[0], 'y': model.cpu().train_targets }, "results": DataFrame(all_outputs) } torch.save(output_dict, args.output) if __name__ == "__main__": args = parse() with fast_pred_var(True), \ use_toeplitz(args.toeplitz), \ detach_test_caches(True), \ max_cholesky_size(args.cholesky_size), \ max_root_decomposition_size(args.sketch_size), \ root_pred_var(True): main(args)
def posterior( self, X: Tensor, output_indices: Optional[List[int]] = None, observation_noise: Union[bool, Tensor] = False, **kwargs: Any, ) -> GPyTorchPosterior: self.eval() # make sure we're calling a posterior no_pred_variance = skip_posterior_variances._state with ExitStack() as es: es.enter_context(gpt_posterior_settings()) es.enter_context(fast_pred_var(True)) # we need to skip posterior variances here es.enter_context(skip_posterior_variances(True)) mvn = self(X) if observation_noise is not False: # TODO: implement Kronecker + diagonal solves so that this is possible. # if torch.is_tensor(observation_noise): # # TODO: Validate noise shape # # make observation_noise `batch_shape x q x n` # obs_noise = observation_noise.transpose(-1, -2) # mvn = self.likelihood(mvn, X, noise=obs_noise) # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # noise = self.likelihood.noise.mean().expand(X.shape[:-1]) # mvn = self.likelihood(mvn, X, noise=noise) # else: mvn = self.likelihood(mvn, X) # lazy covariance matrix includes the interpolated version of the full # covariance matrix so we can actually grab that instead. if X.ndimension() > self.train_inputs[0].ndimension(): X_batch_shape = X.shape[:-2] train_inputs = self.train_inputs[0].reshape( *[1] * len(X_batch_shape), *self.train_inputs[0].shape ) train_inputs = train_inputs.repeat( *X_batch_shape, *[1] * self.train_inputs[0].ndimension() ) else: train_inputs = self.train_inputs[0] full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2)) if no_pred_variance: pred_variance = mvn.variance else: joint_covar = self._get_joint_covariance([X]) pred_variance = self.make_posterior_variances(joint_covar) full_covar = KroneckerProductLazyTensor( full_covar, *joint_covar.lazy_tensors[1:] ) joint_covar_list = [self.covar_modules[0](X, train_inputs)] batch_shape = joint_covar_list[0].batch_shape for cm, param in zip(self.covar_modules[1:], self.latent_parameters): covar = cm(param) if covar.batch_shape != batch_shape: covar = BatchRepeatLazyTensor(covar, batch_shape) joint_covar_list.append(covar) test_train_covar = KroneckerProductLazyTensor(*joint_covar_list) # mean and variance get reshaped into the target shape new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape) if not no_pred_variance: new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape) new_variance = DiagLazyTensor(new_variance) else: new_variance = ZeroLazyTensor( *X.shape[:-1], *self.target_shape, self.target_shape[-1] ) mvn = MultivariateNormal(new_mean, new_variance) # return a specialized Posterior to allow for sampling posterior = HigherOrderGPPosterior( mvn=mvn, train_targets=self.train_targets.unsqueeze(-1), train_train_covar=self.prediction_strategy.lik_train_train_covar, test_train_covar=test_train_covar, joint_covariance_matrix=full_covar, output_shape=Size( ( *X.shape[:-1], *self.target_shape, ) ), num_outputs=self._num_outputs, ) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior) return posterior
for i in range(training_iterations): # Zero backprop gradients optimizer.zero_grad() # Get output from model output = model(x_train) # Calc loss and backprop derivatives loss = -mll(output, y_train) loss.backward() print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item())) optimizer.step() torch.cuda.empty_cache() model.eval() likelihood.eval() x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis]) x_test = x_test.cuda() with settings.max_preconditioner_size(10), torch.no_grad(): with settings.max_root_decomposition_size(30), settings.fast_pred_var(): f_preds = model(x_test) y_pred = likelihood(f_preds) # plot with torch.no_grad(): mean = y_pred.mean.cpu().numpy() var = y_pred.variance.cpu().numpy() samples = y_pred.sample().cpu().numpy() plot_gp(mean, var, x_test.cpu().numpy(), X_train=x_train.cpu().numpy(), Y_train=y_train.cpu().numpy(), samples=samples)
optimizer.step() #The spectral mixture kernel is especially good at extrapolation. # To that end, we'll see how well the model extrapolates past the interval [0, 1]. # Test points every 0.1 between 0 and 5 x_test = torch.linspace(0, 5, 51) # Get into evaluation (predictive posterior) mode model.eval() likelihood.eval() import matplotlib.pyplot as plt with torch.no_grad(), settings.fast_pred_var(): # Make predictions y_pred = likelihood(model(x_test)) mean = y_pred.mean.numpy() var = y_pred.variance.numpy() * 1e3 plot_gp(mean, var, x_test.numpy(), X_train=x_train.numpy(), Y_train=y_train.numpy()) # # Initialize plot # f, ax = plt.subplots(1, 1, figsize=(4, 3)) # # # Get upper and lower confidence bounds
output = model(x_train) loss = -marginal_loglikelihood( output, y_train) # this gives the marginal loglikelihood log(p(y|X)) loss.backward() print( f'Iter {i + 1} - Loss: {loss.item()} noise: {model.likelihood.noise.item()}' ) optimizer.step() model.eval() likelihood.eval() with torch.no_grad(), settings.fast_pred_var( ), settings.max_root_decomposition_size(25): x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis]).type(torch.float32) x_test = x_test.cuda() f_preds = model(x_test) y_pred = likelihood(f_preds) # plot with torch.no_grad(): mean = y_pred.mean.cpu().numpy() var = y_pred.variance.cpu().numpy() samples = y_pred.sample().cpu().numpy() plot_gp(mean, var, x_test.cpu().numpy(),
optimizer.step() model.eval() likelihood.eval() # Test points are regularly spaced along [0,1] # Make predictions by feeding model through likelihood # LOVE: fast_pred_var is used for faster computation of predictive posterior # https://arxiv.org/pdf/1803.06058.pdf # This can be especially useful in settings like small-scale Bayesian optimization, # where predictions need to be made at enormous numbers of candidate points, # but there aren't enough training examples to necessarily warrant the use of sparse GP methods # max_root_decomposition_size(35) affects the accuracy of the LOVE solves (larger is more accurate, but slower t1 = time.time() with torch.no_grad(), fast_pred_var(), max_root_decomposition_size(25): x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis]) x_test = x_test.cuda() f_preds = model( x_test ) #f_preds gives us the mean and cov from a distribution that can be used inside liklihood y_pred = likelihood(f_preds) t2 = time.time() print(t2 - t1) # plot with torch.no_grad(): mean = y_pred.mean.cpu().numpy() var = y_pred.variance.cpu().numpy() samples = y_pred.sample().cpu().numpy()