Пример #1
0
    def _M_step(self):
        C, N, X = self.C, self.N, self.X
        denoms = np.sum(self.Q, axis=0)

        # update cluster priors
        self.pi = denoms / N

        # update cluster means
        nums_mu = [np.dot(self.Q[:, c], X) for c in range(C)]
        for ix, (num, den) in enumerate(zip(nums_mu, denoms)):
            self.mu[ix, :] = num / den if den > 0 else np.zeros_like(num)

        # update cluster covariances
        for c in range(C):
            mu_c = self.mu[c, :]
            n_c = denoms[c]

            outer = np.zeros((self.d, self.d))
            for i in range(N):
                wic = self.Q[i, c]
                xi = self.X[i, :]
                outer += wic * np.outer(xi - mu_c, xi - mu_c)

            outer = outer / n_c if n_c > 0 else outer
            self.sigma[c, :, :] = outer

        assert_allclose(np.sum(self.pi),
                        1,
                        err_msg="{}".format(np.sum(self.pi)))
Пример #2
0
def log_gaussian_pdf(x_i, mu, sigma):
    """
    Compute log N(x_i | mu, sigma)
    """
    n = len(mu)
    a = n * np.log(2 * np.pi)
    _, b = np.linalg.slogdet(sigma)

    y = np.linalg.solve(sigma, x_i - mu)
    c = np.dot(x_i - mu, y)
    return -0.5 * (a + b + c)
Пример #3
0
 def dot(a, b):
     return np.dot(a, b)
Пример #4
0
    def predict(self, X, conf_interval=0.95, return_cov=False):
        """
        Return the MAP estimate for :math:`y^*`, corresponding the mean/mode of
        the posterior predictive distribution, :math:`p(y^* \mid x^*, X, y)`.

        Notes
        -----
        Under the GP regression model, the posterior predictive distribution is

        .. math::

            y^* \mid x^*, X, y \sim \mathcal{N}(\mu^*, \\text{cov}^*)

        where

        .. math::

            \mu^*  &=  K^* (K + \\alpha I)^{-1} y \\\\
            \\text{cov}^*  &=  K^{**} - K^{*'} (K + \\alpha I)^{-1} K^*

        and

        .. math::

            K  &=  \\text{kernel}(X, X) \\\\
            K^*  &=  \\text{kernel}(X, X^*) \\\\
            K^{**}  &=  \\text{kernel}(X^*, X^*)

        NB. This implementation uses the inefficient but general purpose
        `np.linalg.inv` routine to invert :math:`(K + \\alpha I)`. A more
        efficient way is to rely on the fact that `K` (and hence also :math:`K
        + \\alpha I`) is symmetric positive (semi-)definite and take the inner
        product of the inverse of its (lower) Cholesky decompositions:

        .. math::

            Q^{-1} = \\text{cholesky}(Q)^{-1 \\top} \\text{cholesky}(Q)^{-1}

        For more details on a production-grade implementation, see Algorithm
        2.1 in Rasmussen & Williams (2006).

        Parameters
        ----------
        X : :py:class:`ndarray <numpy.ndarray>` of shape (N, M)
            The collection of datapoints to generate predictions on
        conf_interval : float in (0, 1)
            The percentage confidence bound to return for each prediction. If
            the scipy package is not available, this value is always set to
            0.95. Default is 0.95.
        return_cov : bool
            If True, also return the covariance (`cov*`) of the posterior
            predictive distribution for the points in `X`. Default is False.

        Returns
        -------
        y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(N, O)`
            The predicted values for each point in `X`, each with
            dimensionality `O`.
        conf : :py:class:`ndarray <numpy.ndarray>` of shape `(N, O)`
            The % conf_interval confidence bound for each `y_pred`. The conf %
            confidence interval for the `i`'th prediction is ``[y[i] - conf[i],
            y[i] + conf[i]]``.
        cov : :py:class:`ndarray <numpy.ndarray>` of shape `(N, N)`
            The covariance (`cov*`) of the posterior predictive distribution for
            `X`. Only returned if `return_cov` is True.
        """
        if conf_interval != 0.95 and not _SCIPY:
            fstr = "Cannot compute {}% confidence score without scipy.stats"
            warnings.warn(fstr.format(conf_interval))

        X_star = X
        X = self.parameters["X"]
        y = self.parameters["y"]
        K = self.parameters["GP_cov"]
        alpha = self.hyperparameters["alpha"]

        K_star = self.kernel(X_star, X)
        K_star_star = self.kernel(X_star, X_star)

        sig = np.eye(K.shape[0]) * alpha
        K_y_inv = inv(K + sig)

        pp_mean = np.dot(np.dot(K_star, K_y_inv), y)

        pp_cov = K_star_star - np.dot(np.dot(K_star, K_y_inv), K_star.T)

        # if we can't use scipy, ignore the passed value for `conf_interval`
        # and return the 95% confidence bound.
        # (norm.ppf == inverse CDF for standard normal)
        percentile = 1.96 if not _SCIPY else norm.ppf(conf_interval)
        conf = percentile * np.sqrt(np.diag(pp_cov))
        return (pp_mean, conf) if not return_cov else (pp_mean, conf, pp_cov)
Пример #5
0
    def marginal_log_likelihood(self, kernel_params=None):
        """
        Compute the log of the marginal likelihood (i.e., the log model
        evidence), :math:`p(y \mid X, \\text{kernel_params})`.

        Notes
        -----
        Under the GP regression model, the marginal likelihood is normally
        distributed:

        .. math::

            y | X, \\theta  \sim  \mathcal{N}(0, K + \\alpha I)

        Hence,

        .. math::

            \log p(y \mid X, \\theta) =
                -0.5 \log \det(K + \\alpha I) -
                    0.5 y^\\top (K + \\alpha I)^{-1} y + \\frac{n}{2} \log 2 \pi

        where :math:`K = \\text{kernel}(X, X)`, :math:`\\theta` is the set of
        kernel parameters, and `n` is the number of dimensions in `K`.

        Parameters
        ----------
        kernel_params : dict
            Parameters for the kernel function. If None, calculate the
            marginal likelihood under the kernel parameters defined at model
            initialization. Default is None.

        Returns
        -------
        marginal_log_likelihood : float
            The log likelihood of the training targets given the kernel
            parameterized by `kernel_params` and the training inputs,
            marginalized over all functions `f`.
        """
        X = self.parameters["X"]
        y = self.parameters["y"]
        alpha = self.hyperparameters["alpha"]

        K = self.parameters["GP_cov"]
        if kernel_params is not None:
            # create a new kernel with parameters `kernel_params` and recalc
            # the GP covariance matrix
            summary_dict = self.kernel.summary_dict()
            summary_dict["parameters"].update(kernel_params)
            kernel = KernelInitializer(summary_dict)()
            K = kernel(X, X)

        # add isotropic noise to kernel diagonal
        K += np.eye(K.shape[0]) * alpha

        Kinv = inv(K)
        Klogdet = -0.5 * slogdet(K)[1]
        const = K.shape[0] / 2 * np.log(2 * np.pi)

        # handle both uni- and multidimensional target values
        if y.ndim == 1:
            y = y[:, np.newaxis]

        # sum over each dimension of y

        marginal_ll = np.sum([
            Klogdet - 0.5 * np.dot(np.dot(_y.T, Kinv), _y) - const
            for _y in y.T
        ])
        return marginal_ll