Пример #1
0
 def test_feature_flag(self):
     self.assertTrue(settings.fast_pred_var.is_default())
     self.assertFalse(settings.fast_pred_var.on())
     with settings.fast_pred_var():
         self.assertFalse(settings.fast_pred_var.is_default())
         self.assertTrue(settings.fast_pred_var.on())
     with settings.fast_pred_var(False):
         self.assertFalse(settings.fast_pred_var.is_default())
         self.assertFalse(settings.fast_pred_var.on())
Пример #2
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the
                dimension of the feature space (not including task indices) and
                `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices`. Includes measurement noise if
            `observation_noise=True`.
        """
        if output_indices is None:
            output_indices = self._output_tasks
        if any(i not in self._output_tasks for i in output_indices):
            raise ValueError("Too many output indices")

        # construct evaluation X
        X_full = _make_X_full(X=X,
                              output_indices=output_indices,
                              tf=self._task_feature)

        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            mvn = self(X_full)
            if observation_noise:
                # TODO: Allow passing in observation noise via kwarg
                mvn = self.likelihood(mvn, X_full)
        # If single-output, return the posterior of a single-output model
        if len(output_indices) == 1:
            return GPyTorchPosterior(mvn=mvn)
        # Otherwise, make a MultitaskMultivariateNormal out of this
        mtmvn = MultitaskMultivariateNormal(
            mean=mvn.mean.view(*X.shape[:-1], len(output_indices)),
            covariance_matrix=mvn.lazy_covariance_matrix,
            interleaved=False,
        )
        return GPyTorchPosterior(mvn=mtmvn)
Пример #3
0
    def posterior(
        self, X: Tensor, observation_noise: bool = False, **kwargs: Any
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement). Defaults to `True`.

        Returns:
            A `GPyTorchPosterior` object, representing a batch of `b` joint
            distributions over `q` points. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            mvn = self(X)
            if observation_noise:
                # TODO: Allow passing in observation noise via kwarg
                mvn = self.likelihood(mvn, X)
            return GPyTorchPosterior(mvn=mvn)
Пример #4
0
    def posterior(self,
                  X: Tensor,
                  observation_noise: bool = False,
                  **kwargs: Any) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            observation_noise: If True, add observation noise to the posterior.

        Returns:
            A `GPyTorchPosterior` object, representing a batch of `b` joint
            distributions over `q` points. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(gpt_settings.debug(False))
            es.enter_context(gpt_settings.fast_pred_var())
            es.enter_context(
                gpt_settings.detach_test_caches(
                    settings.propagate_grads.off()))
            mvn = self(X)
            if observation_noise:
                # TODO: Allow passing in observation noise via kwarg
                mvn = self.likelihood(mvn, X)
            return GPyTorchPosterior(mvn=mvn)
Пример #5
0
    def posterior(
        self, X: Tensor, observation_noise: bool = False, **kwargs: Any
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement). Defaults to `True`.

        Returns:
            A `GPyTorchPosterior` object, representing a batch of `b` joint
            distributions over `q` points. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            mvn = self(X)
            if observation_noise:
                # TODO: Allow passing in observation noise via kwarg
                mvn = self.likelihood(mvn, X)
            return GPyTorchPosterior(mvn=mvn)
Пример #6
0
def gpt_posterior_settings():
    r"""Context manager for settings used for computing model posteriors."""
    with ExitStack() as es:
        es.enter_context(gpt_settings.debug(False))
        es.enter_context(gpt_settings.fast_pred_var())
        es.enter_context(
            gpt_settings.detach_test_caches(settings.propagate_grads.off()))
        yield
Пример #7
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `q x d` or `batch_shape x q x d` (batch mode) tensor, where `d` is the
                dimension of the feature space (not including task indices) and
                `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices`. Includes measurement noise if
            `observation_noise=True`.
        """
        if output_indices is None:
            output_indices = self._output_tasks
        if any(i not in self._output_tasks for i in output_indices):
            raise ValueError("Too many output indices")

        # construct evaluation X
        X_full = _make_X_full(X=X, output_indices=output_indices, tf=self._task_feature)

        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            mvn = self(X_full)
            if observation_noise:
                # TODO: Allow passing in observation noise via kwarg
                mvn = self.likelihood(mvn, X_full)
        # If single-output, return the posterior of a single-output model
        if len(output_indices) == 1:
            return GPyTorchPosterior(mvn=mvn)
        # Otherwise, make a MultitaskMultivariateNormal out of this
        mtmvn = MultitaskMultivariateNormal(
            mean=mvn.mean.view(*X.shape[:-1], len(output_indices)),
            covariance_matrix=mvn.lazy_covariance_matrix,
            interleaved=False,
        )
        return GPyTorchPosterior(mvn=mtmvn)
Пример #8
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(gpt_settings.debug(False))
            es.enter_context(gpt_settings.fast_pred_var())
            es.enter_context(
                gpt_settings.detach_test_caches(
                    settings.propagate_grads.off()))
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape)
            mvn = self(X)
            if observation_noise:
                if isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                    # Use the mean of the previous noise values (TODO: be smarter here).
                    noise = self.likelihood.noise.mean().expand(X.shape[:-1])
                    mvn = self.likelihood(mvn, X, noise=noise)
                else:
                    mvn = self.likelihood(mvn, X)
            if self._num_outputs > 1:
                mean_x = mvn.mean
                covar_x = mvn.covariance_matrix
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    ) for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(
                    mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Пример #9
0
 def posterior(self,
               X: Tensor,
               observation_noise: Union[bool, Tensor] = False,
               **kwargs: Any) -> GPyTorchPosterior:
     # need to override this otherwise posterior variances are shot
     with gpt_settings.fast_pred_var(False):
         return super().posterior(X=X,
                                  observation_noise=observation_noise,
                                  **kwargs)
Пример #10
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
                feature space, `q` is the number of points considered jointly,
                and `b` is the batch dimension.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes measurement noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(gpt_settings.debug(False))
            es.enter_context(gpt_settings.fast_pred_var())
            es.enter_context(
                gpt_settings.detach_test_caches(
                    settings.propagate_grads.off()))
            if output_indices is not None:
                mvns = [self.forward_i(i, X) for i in output_indices]
                if observation_noise:
                    lh_kwargs = [{
                        "noise": lh.noise.mean().expand(X.shape[:-1])
                    } if isinstance(lh, FixedNoiseGaussianLikelihood) else {}
                                 for lh in self.likelihood.likelihoods]
                    mvns = [
                        self.likelihood_i(i, mvn, X,
                                          **lkws) for i, mvn, lkws in zip(
                                              output_indices, mvns, lh_kwargs)
                    ]
            else:
                mvns = self(*[X for _ in range(self.num_outputs)])
                if observation_noise:
                    # TODO: Allow passing in observation noise via kwarg
                    mvns = self.likelihood(*[(mvn, X) for mvn in mvns])
        if len(mvns) == 1:
            return GPyTorchPosterior(mvn=mvns[0])
        else:
            return GPyTorchPosterior(
                mvn=MultitaskMultivariateNormal.from_independent_mvns(
                    mvns=mvns))
Пример #11
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            propagate_grads: If True, do not detach GPyTorch's test caches when
                computing of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement). Defaults to `False`.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        detach_test_caches = not kwargs.get("propagate_grads", False)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape
                )
            mvn = self(X)
            if observation_noise:
                mvn = self.likelihood(mvn, X)
            if self._num_outputs > 1:
                mean_x = mvn.mean
                covar_x = mvn.covariance_matrix
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    )
                    for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Пример #12
0
def acq(fo, model, x_eval=None):
    model.eval()
    if x_eval is None: x_eval = torch.linspace(0,1,100)
    with torch.no_grad(), fast_pred_var(), lazily_evaluate_kernels(True):
        f_ = model(x_eval)
        mu, sig = f_.mean, f_.variance#covariance_matrix

    _cdf = 0.5*(1+torch.erf((fo-mu)/(torch.sqrt(sig*2.))))
    _pdf = torch.exp(-(fo-mu)**2/(2*sig))/torch.sqrt(sig*2*3.141593)
    return (fo-mu)*_cdf + sig*_pdf
Пример #13
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `(batch_shape) x q x d`-dim Tensor, where `d` is the dimension of the
                feature space and `q` is the number of points considered jointly.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement). Defaults to `True`.

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes observation noise if
            `observation_noise=True`.
        """
        self.eval()  # make sure model is in eval mode
        detach_test_caches = kwargs.get("detach_test_caches", True)
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            # insert a dimension for the output dimension
            if self._num_outputs > 1:
                X, output_dim_idx = add_output_dim(
                    X=X, original_batch_shape=self._input_batch_shape
                )
            mvn = self(X)
            mean_x = mvn.mean
            covar_x = mvn.covariance_matrix
            if self._num_outputs > 1:
                output_indices = output_indices or range(self._num_outputs)
                mvns = [
                    MultivariateNormal(
                        mean_x.select(dim=output_dim_idx, index=t),
                        lazify(covar_x.select(dim=output_dim_idx, index=t)),
                    )
                    for t in output_indices
                ]
                mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
        return GPyTorchPosterior(mvn=mvn)
Пример #14
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
                feature space, `q` is the number of points considered jointly,
                and `b` is the batch dimension.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes measurement noise if
            `observation_noise=True`.
        """
        detach_test_caches = kwargs.get("detach_test_caches", True)
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            if output_indices is not None:
                mvns = [self.forward_i(i, X) for i in output_indices]
                if observation_noise:
                    mvns = [
                        self.likelihood_i(i, mvn, X)
                        for i, mvn in zip(output_indices, mvns)
                    ]
            else:
                mvns = self(*[X for _ in range(self.num_outputs)])
                if observation_noise:
                    # TODO: Allow passing in observation noise via kwarg
                    mvns = self.likelihood(*[(mvn, X) for mvn in mvns])
        if len(mvns) == 1:
            return GPyTorchPosterior(mvn=mvns[0])
        else:
            return GPyTorchPosterior(
                mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
            )
Пример #15
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: bool = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        r"""Computes the posterior over model outputs at the provided points.

        Args:
            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
                feature space, `q` is the number of points considered jointly,
                and `b` is the batch dimension.
            output_indices: A list of indices, corresponding to the outputs over
                which to compute the posterior (if the model is multi-output).
                Can be used to speed up computation if only a subset of the
                model's outputs are required for optimization. If omitted,
                computes the posterior over all model outputs.
            observation_noise: If True, add observation noise to the posterior.
            detach_test_caches: If True, detach GPyTorch test caches during
                computation of the posterior. Required for being able to compute
                derivatives with respect to training inputs at test time (used
                e.g. by qNoisyExpectedImprovement).

        Returns:
            A `GPyTorchPosterior` object, representing `batch_shape` joint
            distributions over `q` points and the outputs selected by
            `output_indices` each. Includes measurement noise if
            `observation_noise=True`.
        """
        detach_test_caches = kwargs.get("detach_test_caches", True)
        self.eval()  # make sure model is in eval mode
        with ExitStack() as es:
            es.enter_context(settings.debug(False))
            es.enter_context(settings.fast_pred_var())
            es.enter_context(settings.detach_test_caches(detach_test_caches))
            if output_indices is not None:
                mvns = [self.forward_i(i, X) for i in output_indices]
                if observation_noise:
                    mvns = [
                        self.likelihood_i(i, mvn, X)
                        for i, mvn in zip(output_indices, mvns)
                    ]
            else:
                mvns = self(*[X for _ in range(self.num_outputs)])
                if observation_noise:
                    # TODO: Allow passing in observation noise via kwarg
                    mvns = self.likelihood(*[(mvn, X) for mvn in mvns])
        if len(mvns) == 1:
            return GPyTorchPosterior(mvn=mvns[0])
        else:
            return GPyTorchPosterior(
                mvn=MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns)
            )
Пример #16
0
    def predict(self, input):
        input = transform(input.reshape((-1, self.input_size)), self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                output = self.likelihood(self.model(input)).mean

        output = inverse_transform(output, self.target_trans)
        if self.incremental:
            return input[..., :self.target_size] + output
        else:
            return output
Пример #17
0
def evaluate(model: SSM, outputs: Tensor, inputs: torch.Tensor,
             output_mean: Tensor, output_scale: Tensor,
             evaluator: Evaluator, experiment: Experiment, key: str,
             plot_outputs: bool = False) -> None:
    """Evaluate outputs."""
    with settings.fast_pred_samples(state=True), settings.fast_pred_var(state=True):
        # predicted_outputs = model.predict(outputs, inputs)
        predicted_outputs, _ = model.forward(outputs, inputs)
    collapsed_predicted_outputs = approximate_with_normal(predicted_outputs)
    evaluator.evaluate(collapsed_predicted_outputs, outputs, output_scale)

    if plot_outputs:
        print('\n' + evaluator.last)
        mean = collapsed_predicted_outputs.loc.detach().numpy()
        scale = collapsed_predicted_outputs.scale.detach().numpy()

        fig = plot_pred(mean[-1].T, np.sqrt(scale[-1]).T, outputs[-1].numpy().T)
        fig.axes[0].set_title('{} {} {} Prediction'.format(
            experiment.model, experiment.dataset, key.capitalize()))
        fig.show()
        fig.savefig('{}prediction_{}.png'.format(experiment.fig_dir, key))
        plt.close(fig)

        if 'robomove' in experiment.dataset.lower():
            fig = plot_2d(mean[-1].T, outputs[-1].numpy().T)
            fig.axes[0].set_title('{} {} {} Prediction'.format(
                experiment.model, experiment.dataset, key.capitalize()))
            fig.show()
            fig.savefig('{}prediction2d_{}.png'.format(experiment.fig_dir, key))
            plt.close(fig)

        if 'kink' in experiment.dataset.lower():
            gp = model.forward_model
            transition = model.transitions
            x = torch.arange(-3, 1, 0.1)
            true_next_x = KinkFunction.f(x.numpy())

            x = (x - output_mean) / output_scale
            pred_next_x = transition(gp(x.expand(1, model.dim_states, -1)))
            pred_next_x.loc += x

            mu = output_scale * pred_next_x.loc[-1, -1] + output_mean
            fig = plot_transition(
                x.numpy(), true_next_x, mu.detach().numpy(),
                torch.diag(
                    pred_next_x.covariance_matrix[-1, -1]).sqrt().detach().numpy())
            fig.axes[0].set_title('{} {} Learned Function'.format(
                experiment.model, experiment.dataset))
            fig.show()
            fig.savefig('{}transition.png'.format(experiment.fig_dir))
            plt.close(fig)
Пример #18
0
    def predict(self, input):
        self.device = torch.device('cpu')

        self.model.eval().to(self.device)
        self.likelihood.eval().to(self.device)

        input = transform(torch.reshape(input, (-1, self.input_size)), self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                output = self.likelihood(self.model(input)).mean

        output = inverse_transform(output[:, None], self.target_trans).squeeze()
        return output
Пример #19
0
    def predict(self, input):
        self.device = torch.device('cpu')

        self.model.eval().to(self.device)
        self.likelihood.eval().to(self.device)

        input = transform(input.reshape((-1, self.input_size)),
                          self.input_trans)

        with max_preconditioner_size(10), torch.no_grad():
            with max_root_decomposition_size(30), fast_pred_var():
                _input = [input for _ in range(self.target_size)]
                predictions = self.likelihood(*self.model(*_input))
                output = torch.stack([_pred.mean for _pred in predictions]).T

        output = inverse_transform(output, self.target_trans).squeeze()
        return output
Пример #20
0
    def test_cache_across_lazy_threshold(self):
        x = self.create_test_data()
        likelihood, labels = self.create_likelihood_and_labels()
        model = self.create_model(x, labels, likelihood)
        model.eval()
        model(x)  # populate caches

        with settings.max_eager_kernel_size(2 * N_PTS -
                                            1), settings.fast_pred_var(True):
            # now we'll cross the threshold and use lazy tensors
            new_x = self.create_test_data()
            _, new_y = self.create_likelihood_and_labels()
            model = model.get_fantasy_model(new_x, new_y)
            predicted = model(self.create_test_data())

            # the main purpose of the test was to ensure there was no error, but we can verify shapes too
            self.assertEqual(predicted.mean.shape, torch.Size([N_PTS]))
            self.assertEqual(predicted.variance.shape, torch.Size([N_PTS]))
Пример #21
0
def acq(fo, model, x_eval=None):
    model.eval()
    if x_eval is None: x_eval = torch.linspace(0, 1, 100).unsqueeze(-1)
    print(x_eval.shape)
    batch_sz = 100
    with torch.no_grad(), fast_pred_var(), lazily_evaluate_kernels(True):
        mu, sig = [], []
        for i in range(0, x_eval.shape[0], batch_sz):
            f_ = model(x_eval[i:i + batch_sz, :])
            mu.append(f_.mean[:, 0])
            sig.append(f_.variance[:, 0])  #covariance_matrix
        mu = torch.cat(mu, 0)
        sig = torch.cat(sig, 0)
        print(mu.shape)

    _cdf = 0.5 * (1 + torch.erf((fo - mu) / (torch.sqrt(sig * 2.))))
    _pdf = torch.exp(-(fo - mu)**2 /
                     (2 * sig)) / torch.sqrt(sig * 2 * 3.141593)
    return (fo - mu) * _cdf + sig * _pdf
Пример #22
0
    {'params': model.covar.parameters()},
    {'params': model.mean.parameters()},
    {'params': model.likelihood.parameters()},
], lr=0.01)

# "Loss" for GPs - the marginal log likelihood
mll = ExactMarginalLogLikelihood(likelihood, model)

training_iterations = 60


def train():
    iterator = tqdm(range(training_iterations))
    for i in iterator:
        # Zero backprop gradients
        optimizer.zero_grad()
        # Get output from model
        output = model(x_train)
        # Calc loss and backprop derivatives
        loss = -mll(output, y_train)
        loss.backward()
        iterator.set_postfix(loss=loss.item())
        optimizer.step()

train()

model.eval()
likelihood.eval()
with torch.no_grad(), use_toeplitz(False), fast_pred_var():
    preds = model(x_test)
    print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - y_test))))
Пример #23
0
    for key in y_means:
        y_means[key] = y_means[key].cpu()

    output_dict = {
        "observations": {
            "x": train_x.cpu(),
            "y": train_y.cpu(),
            "means": y_means,
            "latent_y": latent_y.cpu(),
        },
        "results": DataFrame(all_outputs),
        "args": args
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    use_fast_pred_var = True if not args.use_exact else False

    with use_toeplitz(args.toeplitz), max_cholesky_size(
        args.cholesky_size
    ), max_root_decomposition_size(args.sketch_size), cholesky_jitter(
        1e-3
    ), fast_pred_var(
        use_fast_pred_var 
    ), fast_pred_samples(
        True
    ):
        main(args)
Пример #24
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        self.eval()  # make sure we're calling a posterior
        # input transforms are applied at `posterior` in `eval` mode, and at
        # `model.forward()` at the training time
        X = self.transform_inputs(X)
        no_pred_variance = skip_posterior_variances._state

        with ExitStack() as es:
            es.enter_context(gpt_posterior_settings())
            es.enter_context(fast_pred_var(True))

            # we need to skip posterior variances here
            es.enter_context(skip_posterior_variances(True))
            mvn = self(X)
            if observation_noise is not False:
                # TODO: ensure that this still works for structured noise solves.
                mvn = self.likelihood(mvn, X)

            # lazy covariance matrix includes the interpolated version of the full
            # covariance matrix so we can actually grab that instead.
            if X.ndimension() > self.train_inputs[0].ndimension():
                X_batch_shape = X.shape[:-2]
                train_inputs = self.train_inputs[0].reshape(
                    *[1] * len(X_batch_shape), *self.train_inputs[0].shape
                )
                train_inputs = train_inputs.repeat(
                    *X_batch_shape, *[1] * self.train_inputs[0].ndimension()
                )
            else:
                train_inputs = self.train_inputs[0]

            # we now compute the data covariances for the training data, the testing
            # data, the joint covariances, and the test train cross-covariance
            train_train_covar = self.prediction_strategy.lik_train_train_covar.detach()
            base_train_train_covar = train_train_covar.lazy_tensor

            data_train_covar = base_train_train_covar.lazy_tensors[0]
            data_covar = self.covar_modules[0]
            data_train_test_covar = data_covar(X, train_inputs)
            data_test_test_covar = data_covar(X)
            data_joint_covar = data_train_covar.cat_rows(
                cross_mat=data_train_test_covar,
                new_mat=data_test_test_covar,
            )

            # we detach the latents so that they don't cause gradient errors
            # TODO: Can we enable backprop through the latent covariances?
            batch_shape = data_train_test_covar.batch_shape
            latent_covar_list = []
            for latent_covar in base_train_train_covar.lazy_tensors[1:]:
                if latent_covar.batch_shape != batch_shape:
                    latent_covar = BatchRepeatLazyTensor(latent_covar, batch_shape)
                latent_covar_list.append(latent_covar.detach())

            joint_covar = KroneckerProductLazyTensor(
                data_joint_covar, *latent_covar_list
            )
            test_train_covar = KroneckerProductLazyTensor(
                data_train_test_covar, *latent_covar_list
            )

            # compute the posterior variance if necessary
            if no_pred_variance:
                pred_variance = mvn.variance
            else:
                pred_variance = self.make_posterior_variances(joint_covar)

            # mean and variance get reshaped into the target shape
            new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape)
            if not no_pred_variance:
                new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape)
                new_variance = DiagLazyTensor(new_variance)
            else:
                new_variance = ZeroLazyTensor(
                    *X.shape[:-1], *self.target_shape, self.target_shape[-1]
                )

            mvn = MultivariateNormal(new_mean, new_variance)

            # return a specialized Posterior to allow for sampling
            # cloning the full covar allows backpropagation through it
            posterior = HigherOrderGPPosterior(
                mvn=mvn,
                train_targets=self.train_targets.unsqueeze(-1),
                train_train_covar=train_train_covar,
                test_train_covar=test_train_covar,
                joint_covariance_matrix=joint_covar.clone(),
                output_shape=X.shape[:-1] + self.target_shape,
                num_outputs=self._num_outputs,
            )
            if hasattr(self, "outcome_transform"):
                posterior = self.outcome_transform.untransform_posterior(posterior)

            return posterior
Пример #25
0
            acq_value.item(),
            pred_rmse.item(),
            pred_avg_variance.item()
        ]
        print("Step RMSE: ", pred_rmse)
        all_outputs.append(step_output_list)

        start_ind = end_ind
        end_ind = int(end_ind + args.batch_size)

    output_dict = {
        "model_state_dict": model.cpu().state_dict(),
        "queried_points": {
            'x': model.cpu().train_inputs[0],
            'y': model.cpu().train_targets
        },
        "results": DataFrame(all_outputs)
    }
    torch.save(output_dict, args.output)


if __name__ == "__main__":
    args = parse()
    with fast_pred_var(True), \
            use_toeplitz(args.toeplitz), \
            detach_test_caches(True), \
            max_cholesky_size(args.cholesky_size), \
            max_root_decomposition_size(args.sketch_size), \
            root_pred_var(True):
        main(args)
Пример #26
0
    def posterior(
        self,
        X: Tensor,
        output_indices: Optional[List[int]] = None,
        observation_noise: Union[bool, Tensor] = False,
        **kwargs: Any,
    ) -> GPyTorchPosterior:
        self.eval()  # make sure we're calling a posterior

        no_pred_variance = skip_posterior_variances._state

        with ExitStack() as es:
            es.enter_context(gpt_posterior_settings())
            es.enter_context(fast_pred_var(True))

            # we need to skip posterior variances here
            es.enter_context(skip_posterior_variances(True))
            mvn = self(X)
            if observation_noise is not False:
                # TODO: implement Kronecker + diagonal solves so that this is possible.
                # if torch.is_tensor(observation_noise):
                #     # TODO: Validate noise shape
                #     # make observation_noise `batch_shape x q x n`
                #     obs_noise = observation_noise.transpose(-1, -2)
                #     mvn = self.likelihood(mvn, X, noise=obs_noise)
                # elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                #     noise = self.likelihood.noise.mean().expand(X.shape[:-1])
                #     mvn = self.likelihood(mvn, X, noise=noise)
                # else:
                mvn = self.likelihood(mvn, X)

            # lazy covariance matrix includes the interpolated version of the full
            # covariance matrix so we can actually grab that instead.
            if X.ndimension() > self.train_inputs[0].ndimension():
                X_batch_shape = X.shape[:-2]
                train_inputs = self.train_inputs[0].reshape(
                    *[1] * len(X_batch_shape), *self.train_inputs[0].shape
                )
                train_inputs = train_inputs.repeat(
                    *X_batch_shape, *[1] * self.train_inputs[0].ndimension()
                )
            else:
                train_inputs = self.train_inputs[0]
            full_covar = self.covar_modules[0](torch.cat((train_inputs, X), dim=-2))

            if no_pred_variance:
                pred_variance = mvn.variance
            else:
                joint_covar = self._get_joint_covariance([X])
                pred_variance = self.make_posterior_variances(joint_covar)

                full_covar = KroneckerProductLazyTensor(
                    full_covar, *joint_covar.lazy_tensors[1:]
                )

            joint_covar_list = [self.covar_modules[0](X, train_inputs)]
            batch_shape = joint_covar_list[0].batch_shape
            for cm, param in zip(self.covar_modules[1:], self.latent_parameters):
                covar = cm(param)
                if covar.batch_shape != batch_shape:
                    covar = BatchRepeatLazyTensor(covar, batch_shape)
                joint_covar_list.append(covar)

            test_train_covar = KroneckerProductLazyTensor(*joint_covar_list)

            # mean and variance get reshaped into the target shape
            new_mean = mvn.mean.reshape(*X.shape[:-1], *self.target_shape)
            if not no_pred_variance:
                new_variance = pred_variance.reshape(*X.shape[:-1], *self.target_shape)
                new_variance = DiagLazyTensor(new_variance)
            else:
                new_variance = ZeroLazyTensor(
                    *X.shape[:-1], *self.target_shape, self.target_shape[-1]
                )

            mvn = MultivariateNormal(new_mean, new_variance)

            # return a specialized Posterior to allow for sampling
            posterior = HigherOrderGPPosterior(
                mvn=mvn,
                train_targets=self.train_targets.unsqueeze(-1),
                train_train_covar=self.prediction_strategy.lik_train_train_covar,
                test_train_covar=test_train_covar,
                joint_covariance_matrix=full_covar,
                output_shape=Size(
                    (
                        *X.shape[:-1],
                        *self.target_shape,
                    )
                ),
                num_outputs=self._num_outputs,
            )
            if hasattr(self, "outcome_transform"):
                posterior = self.outcome_transform.untransform_posterior(posterior)

            return posterior
Пример #27
0
for i in range(training_iterations):
    # Zero backprop gradients
    optimizer.zero_grad()
    # Get output from model
    output = model(x_train)
    # Calc loss and backprop derivatives
    loss = -mll(output, y_train)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
    optimizer.step()
    torch.cuda.empty_cache()

model.eval()
likelihood.eval()

x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis])
x_test = x_test.cuda()

with settings.max_preconditioner_size(10), torch.no_grad():
    with settings.max_root_decomposition_size(30), settings.fast_pred_var():
        f_preds = model(x_test)
        y_pred = likelihood(f_preds)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()
    plot_gp(mean, var, x_test.cpu().numpy(), X_train=x_train.cpu().numpy(), Y_train=y_train.cpu().numpy(), samples=samples)
Пример #28
0
    optimizer.step()

#The spectral mixture kernel is especially good at extrapolation.
# To that end, we'll see how well the model extrapolates past the interval [0, 1].

# Test points every 0.1 between 0 and 5
x_test = torch.linspace(0, 5, 51)

# Get into evaluation (predictive posterior) mode
model.eval()
likelihood.eval()

import matplotlib.pyplot as plt

with torch.no_grad(), settings.fast_pred_var():
    # Make predictions
    y_pred = likelihood(model(x_test))

    mean = y_pred.mean.numpy()
    var = y_pred.variance.numpy() * 1e3
    plot_gp(mean,
            var,
            x_test.numpy(),
            X_train=x_train.numpy(),
            Y_train=y_train.numpy())

    # # Initialize plot
    # f, ax = plt.subplots(1, 1, figsize=(4, 3))
    #
    # # Get upper and lower confidence bounds
    output = model(x_train)

    loss = -marginal_loglikelihood(
        output, y_train)  # this gives the marginal loglikelihood  log(p(y|X))
    loss.backward()

    print(
        f'Iter {i + 1} - Loss: {loss.item()}   noise: {model.likelihood.noise.item()}'
    )

    optimizer.step()

model.eval()
likelihood.eval()

with torch.no_grad(), settings.fast_pred_var(
), settings.max_root_decomposition_size(25):
    x_test = torch.from_numpy(np.linspace(1870, 2030,
                                          200)[:,
                                               np.newaxis]).type(torch.float32)
    x_test = x_test.cuda()
    f_preds = model(x_test)
    y_pred = likelihood(f_preds)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()
    plot_gp(mean,
            var,
            x_test.cpu().numpy(),
Пример #30
0
    optimizer.step()

model.eval()
likelihood.eval()

# Test points are regularly spaced along [0,1]
# Make predictions by feeding model through likelihood
# LOVE: fast_pred_var is used for faster computation of predictive posterior
# https://arxiv.org/pdf/1803.06058.pdf
# This can be especially useful in settings like small-scale Bayesian optimization,
# where predictions need to be made at enormous numbers of candidate points,
# but there aren't enough training examples to necessarily warrant the use of sparse GP methods
# max_root_decomposition_size(35) affects the accuracy of the LOVE solves (larger is more accurate, but slower
t1 = time.time()
with torch.no_grad(), fast_pred_var(), max_root_decomposition_size(25):
    x_test = torch.from_numpy(np.linspace(1870, 2030, 200)[:, np.newaxis])
    x_test = x_test.cuda()
    f_preds = model(
        x_test
    )  #f_preds gives us the mean and cov from a distribution that can be used inside liklihood
    y_pred = likelihood(f_preds)

t2 = time.time()
print(t2 - t1)

# plot
with torch.no_grad():
    mean = y_pred.mean.cpu().numpy()
    var = y_pred.variance.cpu().numpy()
    samples = y_pred.sample().cpu().numpy()