def _M_step(self): C, N, X = self.C, self.N, self.X denoms = np.sum(self.Q, axis=0) # update cluster priors self.pi = denoms / N # update cluster means nums_mu = [np.dot(self.Q[:, c], X) for c in range(C)] for ix, (num, den) in enumerate(zip(nums_mu, denoms)): self.mu[ix, :] = num / den if den > 0 else np.zeros_like(num) # update cluster covariances for c in range(C): mu_c = self.mu[c, :] n_c = denoms[c] outer = np.zeros((self.d, self.d)) for i in range(N): wic = self.Q[i, c] xi = self.X[i, :] outer += wic * np.outer(xi - mu_c, xi - mu_c) outer = outer / n_c if n_c > 0 else outer self.sigma[c, :, :] = outer assert_allclose(np.sum(self.pi), 1, err_msg="{}".format(np.sum(self.pi)))
def log_gaussian_pdf(x_i, mu, sigma): """ Compute log N(x_i | mu, sigma) """ n = len(mu) a = n * np.log(2 * np.pi) _, b = np.linalg.slogdet(sigma) y = np.linalg.solve(sigma, x_i - mu) c = np.dot(x_i - mu, y) return -0.5 * (a + b + c)
def dot(a, b): return np.dot(a, b)
def predict(self, X, conf_interval=0.95, return_cov=False): """ Return the MAP estimate for :math:`y^*`, corresponding the mean/mode of the posterior predictive distribution, :math:`p(y^* \mid x^*, X, y)`. Notes ----- Under the GP regression model, the posterior predictive distribution is .. math:: y^* \mid x^*, X, y \sim \mathcal{N}(\mu^*, \\text{cov}^*) where .. math:: \mu^* &= K^* (K + \\alpha I)^{-1} y \\\\ \\text{cov}^* &= K^{**} - K^{*'} (K + \\alpha I)^{-1} K^* and .. math:: K &= \\text{kernel}(X, X) \\\\ K^* &= \\text{kernel}(X, X^*) \\\\ K^{**} &= \\text{kernel}(X^*, X^*) NB. This implementation uses the inefficient but general purpose `np.linalg.inv` routine to invert :math:`(K + \\alpha I)`. A more efficient way is to rely on the fact that `K` (and hence also :math:`K + \\alpha I`) is symmetric positive (semi-)definite and take the inner product of the inverse of its (lower) Cholesky decompositions: .. math:: Q^{-1} = \\text{cholesky}(Q)^{-1 \\top} \\text{cholesky}(Q)^{-1} For more details on a production-grade implementation, see Algorithm 2.1 in Rasmussen & Williams (2006). Parameters ---------- X : :py:class:`ndarray <numpy.ndarray>` of shape (N, M) The collection of datapoints to generate predictions on conf_interval : float in (0, 1) The percentage confidence bound to return for each prediction. If the scipy package is not available, this value is always set to 0.95. Default is 0.95. return_cov : bool If True, also return the covariance (`cov*`) of the posterior predictive distribution for the points in `X`. Default is False. Returns ------- y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(N, O)` The predicted values for each point in `X`, each with dimensionality `O`. conf : :py:class:`ndarray <numpy.ndarray>` of shape `(N, O)` The % conf_interval confidence bound for each `y_pred`. The conf % confidence interval for the `i`'th prediction is ``[y[i] - conf[i], y[i] + conf[i]]``. cov : :py:class:`ndarray <numpy.ndarray>` of shape `(N, N)` The covariance (`cov*`) of the posterior predictive distribution for `X`. Only returned if `return_cov` is True. """ if conf_interval != 0.95 and not _SCIPY: fstr = "Cannot compute {}% confidence score without scipy.stats" warnings.warn(fstr.format(conf_interval)) X_star = X X = self.parameters["X"] y = self.parameters["y"] K = self.parameters["GP_cov"] alpha = self.hyperparameters["alpha"] K_star = self.kernel(X_star, X) K_star_star = self.kernel(X_star, X_star) sig = np.eye(K.shape[0]) * alpha K_y_inv = inv(K + sig) pp_mean = np.dot(np.dot(K_star, K_y_inv), y) pp_cov = K_star_star - np.dot(np.dot(K_star, K_y_inv), K_star.T) # if we can't use scipy, ignore the passed value for `conf_interval` # and return the 95% confidence bound. # (norm.ppf == inverse CDF for standard normal) percentile = 1.96 if not _SCIPY else norm.ppf(conf_interval) conf = percentile * np.sqrt(np.diag(pp_cov)) return (pp_mean, conf) if not return_cov else (pp_mean, conf, pp_cov)
def marginal_log_likelihood(self, kernel_params=None): """ Compute the log of the marginal likelihood (i.e., the log model evidence), :math:`p(y \mid X, \\text{kernel_params})`. Notes ----- Under the GP regression model, the marginal likelihood is normally distributed: .. math:: y | X, \\theta \sim \mathcal{N}(0, K + \\alpha I) Hence, .. math:: \log p(y \mid X, \\theta) = -0.5 \log \det(K + \\alpha I) - 0.5 y^\\top (K + \\alpha I)^{-1} y + \\frac{n}{2} \log 2 \pi where :math:`K = \\text{kernel}(X, X)`, :math:`\\theta` is the set of kernel parameters, and `n` is the number of dimensions in `K`. Parameters ---------- kernel_params : dict Parameters for the kernel function. If None, calculate the marginal likelihood under the kernel parameters defined at model initialization. Default is None. Returns ------- marginal_log_likelihood : float The log likelihood of the training targets given the kernel parameterized by `kernel_params` and the training inputs, marginalized over all functions `f`. """ X = self.parameters["X"] y = self.parameters["y"] alpha = self.hyperparameters["alpha"] K = self.parameters["GP_cov"] if kernel_params is not None: # create a new kernel with parameters `kernel_params` and recalc # the GP covariance matrix summary_dict = self.kernel.summary_dict() summary_dict["parameters"].update(kernel_params) kernel = KernelInitializer(summary_dict)() K = kernel(X, X) # add isotropic noise to kernel diagonal K += np.eye(K.shape[0]) * alpha Kinv = inv(K) Klogdet = -0.5 * slogdet(K)[1] const = K.shape[0] / 2 * np.log(2 * np.pi) # handle both uni- and multidimensional target values if y.ndim == 1: y = y[:, np.newaxis] # sum over each dimension of y marginal_ll = np.sum([ Klogdet - 0.5 * np.dot(np.dot(_y.T, Kinv), _y) - const for _y in y.T ]) return marginal_ll