Пример #1
0
def build_covariance(t, K, hyperparams):

    ls = hyperparams[0]
    a0 = hyperparams[1]
    a = hyperparams[2]
    b = hyperparams[3]

    C, _ = a.shape  # number of Fourier coefficients

    T, _ = t.shape
    S = np.empty((T, T, K))
    L = np.empty((T, T, K))
    Si = np.empty((T, T, K))

    Diag, _ = build_diagonal(t, hyperparams)
    for k in range(K):
        # falta meter el término periódic
        hyperparam_k_list = [ls[0, k], a0[0, k], a[:, k], b[:, k]]
        per_term = fourier_series(t, T, C, hyperparam_k_list)
        s = per_term**2
        per_S = s * s.T
        E = periodic_exponential(t, T, hyperparam_k_list)
        S[:, :, k] = per_S * E
        S[:, :, k] += Diag
        L[:, :, k] = linalg.jitchol(S[:, :, k])
        Si[:, :, k], _ = linalg.dpotri(np.asfortranarray(L[:, :, k]))

        # quitar esto:
        #S[:,:,k] = np.eye(T, T)
        #Si[:,:,k] = np.eye(T, T)
    return S, L, Si
 def K_chol(self):
     """
     Cholesky of the prior covariance K
     """
     if self._K_chol is None:
         self._K_chol = jitchol(self.K)
     return self._K_chol
Пример #3
0
    def do_computations(self):
        """
        Here we do all the computations that are required whenever the kernels
        or the variational parameters are changed.
        """
        # sufficient stats.
        self.ybark = np.dot(self.phi.T, self.Y).T

        # compute posterior variances of each cluster (lambda_inv)
        tmp = backsub_both_sides(self.Sy_chol, self.Sf, transpose="right")
        self.Cs = [np.eye(self.D) + tmp * phi_hat_i for phi_hat_i in self.phi_hat]

        self._C_chols = [jitchol(C) for C in self.Cs]
        self.log_det_diff = np.array([2.0 * np.sum(np.log(np.diag(L))) for L in self._C_chols])
        tmp = [dtrtrs(L, self.Sy_chol.T, lower=1)[0] for L in self._C_chols]
        self.Lambda_inv = np.array(
            [
                (self.Sy - np.dot(tmp_i.T, tmp_i)) / phi_hat_i if (phi_hat_i > 1e-6) else self.Sf
                for phi_hat_i, tmp_i in zip(self.phi_hat, tmp)
            ]
        )

        # posterior mean and other useful quantities
        self.Syi_ybark, _ = dpotrs(self.Sy_chol, self.ybark, lower=1)
        self.Syi_ybarkybarkT_Syi = self.Syi_ybark.T[:, None, :] * self.Syi_ybark.T[:, :, None]
        self.muk = (self.Lambda_inv * self.Syi_ybark.T[:, :, None]).sum(1).T
Пример #4
0
    def parameters_changed(self):
        N, D = self.Y.shape

        Kss = self.kern.K(self.X)
        Ksu = self.kern.K(self.X, self.Z)

        wv = self.posterior.woodbury_vector
        wi = self.posterior.woodbury_inv
        
        a = self.Y - Ksu.dot(wv)
        
        C = Kss  + np.eye(N)*self.likelihood.variance - Ksu.dot(wi).dot(Ksu.T)
        Lc = jitchol(C)
        LcInva = dtrtrs(Lc, a)[0]
        LcInv = dtrtri(Lc)
        CInva = dtrtrs(Lc, LcInva,trans=1)[0]

        self._log_marginal_likelihood = -N*D/2.*np.log(2*np.pi) - D*np.log(np.diag(Lc)).sum() - np.square(LcInva).sum()/2.

        dKsu = CInva.dot(wv.T)
        dKss = tdot(CInva)/2. -D* tdot(LcInv.T)/2.
        dKsu += -2. * dKss.dot(Ksu).dot(wi)
        
        X_grad = self.kern.gradients_X(dKss, self.X)
        X_grad += self.kern.gradients_X(dKsu, self.X, self.Z)
        self.X.gradient = X_grad      
        
        if self.uncertain_input:
            # Update Log-likelihood
            KL_div = self.variational_prior.KL_divergence(self.X)
            # update for the KL divergence
            self.variational_prior.update_gradients_KL(self.X)
            self._log_marginal_likelihood += -KL_div
def latent_function_covKuu(Z, B, kernel_list, kernel_list_Gdj, kff_aux):
    """
    Builds the cross-covariance Kudud= cov[u_d(x),u_d(x)] of a Convolved Multi-output GP
    :param Z: Inducing points
    :param B: Coregionalization matrix
    :param kernel_list: Kernels of u_q functions
    :param kernel_list_Gdj: Kernel smoothing functions G(x)
    :param kff_aux is the kernel that solves the convolution integral between G(x) and kern_uq
    :return: Kuu
    """

    J = len(kernel_list_Gdj)
    M, Dz = Z.shape
    Xdim = int(Dz / J)
    # Kuu = np.zeros([Q*M,Q*M])
    Kuu = np.zeros((J, M, M))
    Luu = np.empty((J, M, M))
    Kuui = np.empty((J, M, M))
    for j in range(J):
        for q, B_q in enumerate(B):
            update_conv_Kff(kernel_list[q], kernel_list_Gdj[j], kff_aux)
            Kuu[j, :, :] += B_q.B[j, j] * kff_aux.K(
                Z[:, j * Xdim:j * Xdim + Xdim], Z[:, j * Xdim:j * Xdim + Xdim])

        Luu[j, :, :] = linalg.jitchol(Kuu[j, :, :], maxtries=10)
        Kuui[j, :, :], _ = linalg.dpotri(np.asfortranarray(Luu[j, :, :]))

    return Kuu, Luu, Kuui
Пример #6
0
    def do_computations(self):
        """
        Here we do all the computations that are required whenever the kernels
        or the variational parameters are changed.
        """
        #sufficient stats.
        self.ybark = np.dot(self.phi.T, self.Y).T

        # compute posterior variances of each cluster (lambda_inv)
        tmp = backsub_both_sides(self.Sy_chol, self.Sf, transpose='right')
        self.Cs = [
            np.eye(self.D) + tmp * phi_hat_i for phi_hat_i in self.phi_hat
        ]

        self._C_chols = [jitchol(C) for C in self.Cs]
        self.log_det_diff = np.array(
            [2. * np.sum(np.log(np.diag(L))) for L in self._C_chols])
        tmp = [dtrtrs(L, self.Sy_chol.T, lower=1)[0] for L in self._C_chols]
        self.Lambda_inv = np.array([
            (self.Sy - np.dot(tmp_i.T, tmp_i)) / phi_hat_i if
            (phi_hat_i > 1e-6) else self.Sf
            for phi_hat_i, tmp_i in zip(self.phi_hat, tmp)
        ])

        #posterior mean and other useful quantities
        self.Syi_ybark, _ = dpotrs(self.Sy_chol, self.ybark, lower=1)
        self.Syi_ybarkybarkT_Syi = self.Syi_ybark.T[:,
                                                    None, :] * self.Syi_ybark.T[:, :,
                                                                                None]
        self.muk = (self.Lambda_inv * self.Syi_ybark.T[:, :, None]).sum(1).T
Пример #7
0
def update_posterior(
    K: np.ndarray,
    v: np.ndarray,
    tau: np.ndarray,
    y: List[Tuple[int, float]],
    yc: List[List[Tuple[int, int]]],
    jitter: float = 1e-9,
    get_logger: Callable = None,
) -> posteriorParams:
    """
    Update the posterior approximation. See e.g. 3.59 in http://www.gaussianprocess.org/gpml/chapters/RW.pdf
    :param K: prior covariance matrix
    :param v: Scale of the Gaussian approximation
    :param tau: Precision of the Gaussian approximation
    :param y: Observations indicating where we have a diagonal element
    :param yc: Comparisons indicating where we have a block diagonal element
    :param jitter: small number added to the diagonal to increase robustness.
    :param get_logger: Function for receiving the legger where the prints are forwarded.
    :return: posterior approximation
    """
    D = K.shape[0]
    sqrt_tau = sqrtm_block(tau + np.diag(jitter * np.ones((D))), y, yc)
    G = np.dot(sqrt_tau, K)
    B = np.identity(D) + np.dot(G, sqrt_tau)
    L = jitchol(B)
    V = np.linalg.solve(L, G)
    Sigma_full = K - np.dot(V.T, V)
    mu = np.dot(Sigma_full, v)
    Sigma = np.diag(Sigma_full)

    return posteriorParams(mu=mu, Sigma=Sigma_full, L=L)
Пример #8
0
    def natural_grad_qu(model, n_iter=1, step_size=step_rate, momentum=0.0):
        global mk_ant, mk_aux, mk, V_i, Vk, Lk, Vk, Vki_ant
        """"Initialize the step-sizes""" ""
        beta2_k = step_size  #use step_size*0.1 for Convolutional MOGP
        gamma2_k = momentum
        alpha2_k = step_size
        N_posteriors = model.q_u_means.shape[1]

        if n_iter == 1:
            V_i = choleskies.multiple_dpotri(
                choleskies.flat_to_triang(model.q_u_chols.values)).copy()
            Vk = np.zeros_like(V_i)
            for i in range(N_posteriors):
                Vk[i, :, :] = 0.5 * (model.posteriors[i].covariance.copy() +
                                     model.posteriors[i].covariance.T.copy())

            Lk = np.zeros_like(Vk)
            mk = model.q_u_means.values.copy()

            Vki_ant = V_i.copy()
            mk_aux = mk.copy()

        dL_dm, dL_dV = compute_stoch_grads_for_qu_HetMOGP(model=model)

        mk_ant = mk_aux.copy()
        mk_aux = mk.copy()

        if not model.q_u_means.is_fixed and not model.q_u_chols.is_fixed:
            mk_ant = mk_aux.copy()
            mk_aux = mk.copy()

            for i in range(N_posteriors):
                try:
                    V_i[i, :, :] = V_i[i, :, :] + 2 * beta2_k * dL_dV[
                        i]  #+ 1.0e-6*np.eye(*Vk[i,:,:].shape)
                    Vk[i, :, :] = np.linalg.inv(V_i[i, :, :])
                    Vk[i, :, :] = 0.5 * (np.array(Vk[i, :, :]) +
                                         np.array(Vk[i, :, :].T))
                    Lk[i, :, :] = np.linalg.cholesky(Vk[i, :, :])
                    mk[:, i] = mk[:, i] - alpha2_k * np.dot(
                        Vk[i, :, :], dL_dm[i]) + gamma2_k * np.dot(
                            np.dot(Vk[i, :, :], Vki_ant[i, :, :]),
                            (mk[:, i] - mk_ant[:, i]))
                except LinAlgError:
                    print("Overflow")
                    Vk[i, :, :] = np.linalg.inv(V_i[i, :, :])
                    Vk[i, :, :] = 1.0e-1 * np.eye(
                        *Vk[i, :, :].shape
                    )  #nearestPD(Vk[i,:,:]) # + 1.0e-3*np.eye(*Vk[i,:,:].shape)
                    Lk[i, :, :] = linalg.jitchol(Vk[i, :, :])
                    V_i[i, :, :] = np.linalg.inv(Vk[i, :, :])
                    mk[:, i] = mk[:, i] * 0.0

            Vki_ant = V_i.copy()

            model.L_u.setfield(choleskies.triang_to_flat(Lk.copy()),
                               np.float64)
            model.m_u.setfield(mk.copy(), np.float64)
Пример #9
0
 def compute_covariance(x: np.ndarray, kernel: RBF) -> tuple:
     assert x.ndim <= 2
     if x.ndim == 1:
         x = x.reshape(-1, 1)
     K_xx = kernel.K(x)
     K_xx_cho = jitchol(K_xx)
     cholesky_inv = np.linalg.inv(K_xx_cho)
     K_xx_inv = cholesky_inv.T @ cholesky_inv
     return K_xx, K_xx_inv
Пример #10
0
    def mean_and_chol_covariance(self, X):
        """

        :param X:
        :return:
        """
        m, cov = self.predict_noiseless(X, full_cov=True)
        chol_cov = jitchol(cov)
        return m, chol_cov
Пример #11
0
    def _get_YYTfactor(self, Y):
        """
        find a matrix L which satisfies LLT = YYT.

        Note that L may have fewer columns than Y.
        """
        N, D = Y.shape
        if (N>=D):
            return Y.view(np.ndarray)
        else:
            return jitchol(tdot(Y))
Пример #12
0
def update_posterior(K, eta, theta):
    D = K.shape[0]
    sqrt_theta = np.sqrt(theta)
    G = sqrt_theta[:, None]*K
    B = np.identity(D) + G*sqrt_theta
    L = jitchol(B)
    V = np.linalg.solve(L, G)
    Sigma_full = K - np.dot(V.T, V)
    mu = np.dot(Sigma_full, eta)
    #Sigma = np.diag(Sigma_full)

    return posteriorParams(mu=mu, Sigma=Sigma_full, L=L)
Пример #13
0
    def __init__(
        self,
        X: np.ndarray,
        y: List[Tuple[int, float]],
        yc: List[List[Tuple[int, int]]],
        kernel: GPy.kern.Kern,
        likelihood: Gaussian,
        vi_mode: str = "fr",
        name: str = "VIComparisonGP",
        max_iters: int = 50,
        get_logger: Callable = None,
    ):
        super(VIComparisonGP, self).__init__(name=name)

        self.N, self.D = X.shape[0], X.shape[1]

        self.output_dim = 1
        self.get_logger = get_logger
        self.X = X
        self.y = y
        self.yc = yc

        self.max_iters = max_iters
        self.vi_mode = vi_mode

        self.kern = kernel
        self.likelihood = likelihood

        self.sigma2s = self.likelihood.variance * np.ones(
            (X.shape[0], 1), dtype=int)
        jitter = 1e-6
        K = self.kern.K(X)
        L = np.linalg.cholesky(K + np.identity(K.shape[0]) * jitter)

        self.alpha = np.zeros((self.N, 1))
        self.beta = np.ones((self.N, 1))

        self.posterior = None

        # If we are using full rank VI, we initialize it with mean field VI
        if self.vi_mode == "FRVI":
            self.posterior, _, _, self.alpha, self.beta = vi.vi_comparison(
                self.X,
                self.y,
                self.yc,
                self.kern,
                self.sigma2s,
                self.alpha,
                self.beta,
                max_iters=50,
                method="mf")
            self.beta = choleskies._triang_to_flat_pure(
                jitchol(self.posterior.covariance)[None, :])
Пример #14
0
    def calculate_mu_var(self, X, Y, Z, q_u_mean, q_u_chol, kern, mean_function, num_inducing, num_data, num_outputs):
        """
        Calculate posterior mean and variance for the latent function values for use in the
        expectation over the likelihood
        """
        #expand cholesky representation
        L = choleskies.flat_to_triang(q_u_chol)
        #S = linalg.ijk_ljk_to_ilk(L, L) #L.dot(L.T)
        S = np.empty((num_outputs, num_inducing, num_inducing))
        [np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
        #logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
        logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
        #compute mean function stuff
        if mean_function is not None:
            prior_mean_u = mean_function.f(Z)
            prior_mean_f = mean_function.f(X)
        else:
            prior_mean_u = np.zeros((num_inducing, num_outputs))
            prior_mean_f = np.zeros((num_data, num_outputs))

        #compute kernel related stuff
        Kmm = kern.K(Z)
        #Knm = kern.K(X, Z)
        Kmn = kern.K(Z, X)
        Knn_diag = kern.Kdiag(X)
        #Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
        Lm = linalg.jitchol(Kmm)
        logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
        Kmmi, _ = linalg.dpotri(Lm)

        #compute the marginal means and variances of q(f)
        #A = np.dot(Knm, Kmmi)
        A, _ = linalg.dpotrs(Lm, Kmn)
        #mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
        mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
        #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S), 1)
        v = np.empty((num_data, num_outputs))
        for i in range(num_outputs):
            tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
            v[:,i] = np.sum(np.square(tmp),0)
        v += (Knn_diag - np.sum(A*Kmn,0))[:,None]

        #compute the KL term
        Kmmim = np.dot(Kmmi, q_u_mean)
        #KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KL = KLs.sum()

        latent_detail = LatentFunctionDetails(q_u_mean=q_u_mean, q_u_chol=q_u_chol, mean_function=mean_function,
                                              mu=mu, v=v, prior_mean_u=prior_mean_u, L=L, A=A,
                                              S=S, Kmm=Kmm, Kmmi=Kmmi, Kmmim=Kmmim, KL=KL)
        return latent_detail
Пример #15
0
def integral_mean_rebased(gpy_gp, prior_mean, prior_var, compute_var=False):
    X = gpy_gp.X
    Y = gpy_gp.Y

    n, d = X.shape[0], X.shape[1]
    assert prior_mean.ndim == 1
    assert prior_var.ndim == 2
    assert prior_mean.shape[0] == d
    assert prior_var.shape[0] == d
    assert prior_var.shape[0] == prior_var.shape[1]

    scaling = np.max(Y)
    # print(scaling)
    Y = np.exp(Y - scaling)

    mu = prior_mean

    # Kernel parameters
    w = np.exp(gpy_gp.kern.lengthscale.values)
    h = np.exp(gpy_gp.kern.variance.values[0])

    if len(w) == 1:
        w = np.array([w] * d).reshape(-1)
    W = np.diag(
        w
    )  # Assuming isotropic covariance, build the W matrix from w parameters
    V = prior_var

    n_s = np.zeros((n, ))

    for i in range(n):
        n_s[i] = h * multivariate_normal._pdf_point_est(
            X[i, :], mean=mu, cov=W + V)
    # print(Y)
    c_f = np.linalg.det(2 * np.pi * (2 * W + V))**-0.5

    K_xx = gpy_gp.kern.K(X)
    # Find the inverse of K_xx matrix via Cholesky decomposition (with jitter)
    K_xx_cho = jitchol(K_xx, )
    choleksy_inverse = np.linalg.inv(K_xx_cho)
    K_xx_inv = choleksy_inverse.T @ choleksy_inverse

    unscaled_integral_mean = n_s.T @ K_xx_inv @ Y

    if compute_var:
        unscaled_integral_var = c_f - n_s.T @ K_xx_inv @ n_s
        scaled_var = np.log(unscaled_integral_var) + 2 * scaling
    else:
        scaled_var = np.nan
    scaled_mean = np.log(unscaled_integral_mean) + scaling

    return scaled_mean, scaled_var
Пример #16
0
def _compute_B_statistics(K, W):
    if np.any(np.isnan(W)):
        raise ValueError('One or more element(s) of W is NaN')
    W_12 = np.sqrt(W)
    B = np.eye(K.shape[0]) + W_12*K*W_12.T
    L = jitchol(B)
    LiW12, _ = dtrtrs(L, np.diagflat(W_12), lower=1, trans=0)
    K_Wi_i = np.dot(LiW12.T, LiW12) # R = W12BiW12, in R&W p 126, eq 5.25
    C = np.dot(LiW12, K)
    Ki_W_i = K - C.T.dot(C)
    I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
    logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
    return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
Пример #17
0
    def _get_mu_L(self,
                  x_pred: np.ndarray,
                  N: int = None,
                  woodbury_inv: bool = False,
                  with_index: int = None) -> Tuple:
        """
        Returns posterior mean and cholesky decomposition of the posterior samples

        :param x_pred: locations where the mean and posterior covariance are computed
        :param N: number of posterior samples
        :param woodbury_inv: boolean indicating whether the function should return woodbury_inv vector as well
        :param with_index: index of the specific posterior sample the function should return
        :return params: tuple containing the posterior means and choleskies of the covariances. Also woodbury inverses and woodbury choleskies if woodbury_inv is true
        """
        indices = np.arange(self.samples["f"].shape[0])
        if N is not None:
            indices = np.random.choice(indices, N)
        if with_index is not None:
            indices = np.array([with_index], dtype=int)
        N = len(indices)
        x_pred = np.atleast_2d(x_pred)
        f2_mu = np.empty((N, x_pred.shape[0]))
        f2_L = np.empty((N, x_pred.shape[0], x_pred.shape[0]))
        k_x1_x2 = self.kern.K(self.X, x_pred)
        k_x2_x2 = self.kern.K(x_pred)
        for ni, i in enumerate(indices):
            L_div_k_x1_x2 = la.solve_triangular(self.samples["L_K"][i, :, :],
                                                k_x1_x2,
                                                lower=True,
                                                overwrite_b=False)
            f2_mu[ni, :] = np.dot(
                L_div_k_x1_x2.T,
                self.samples["eta"][i, :])  # self.L_div_f[i,:])
            f2_cov = k_x2_x2 - np.dot(L_div_k_x1_x2.T, L_div_k_x1_x2)
            f2_L[ni, :, :] = jitchol(f2_cov)
        if woodbury_inv:
            w_inv = np.empty((N, self.X.shape[0], self.X.shape[0]))
            w_chol = np.empty((
                N,
                self.X.shape[0],
            ))
            for ni, i in enumerate(indices):
                L_Kinv = la.inv(self.samples["L_K"][i, :, :])
                w_inv[ni, :, :] = L_Kinv.T @ L_Kinv
                w_chol[ni, :] = (
                    L_Kinv.T @ self.samples["eta"][i, :, None]
                )[:,
                  0]  # (Kinv @ self.samples['eta'][i,:, None])[:, 0] # (L_Kinv.T @ self.samples['eta'][i,:, None])[:, 0]  # self.L_div_f[i,:]
            return f2_mu, f2_L, w_inv, w_chol
        else:
            return f2_mu, f2_L
Пример #18
0
def latent_funs_cov(Z, kernel_list):
    """
    Description: Builds the full-covariance cov[u(z),u(z)] of a Multi-output GP for a Sparse approximation
    :param Z: Inducing Points
    :param kernel_list: Kernels of u_q functions priors
    :return: Kuu
    """
    Q = len(kernel_list)
    M,Dz = Z.shape
    Xdim = int(Dz/Q)
    Kuu = np.empty((Q, M, M))
    Luu = np.empty((Q, M, M))
    Kuui = np.empty((Q, M, M))
    for q, kern in enumerate(kernel_list):
        Kuu[q, :, :] = kern.K(Z[:,q*Xdim:q*Xdim+Xdim],Z[:,q*Xdim:q*Xdim+Xdim])
        Luu[q, :, :] = linalg.jitchol(Kuu[q, :, :])
        Kuui[q, :, :], _ = linalg.dpotri(np.asfortranarray(Luu[q, :, :]))
    return Kuu, Luu, Kuui
Пример #19
0
def gp_sample(model, x, n_samples):
    if len(x.shape) == 1:
        x = np.reshape(x, (1, -1))
        n_points = 1
    else:
        n_points = x.shape[0]

    # special case if we're only have 1 realisation of 1 point
    if n_points == 1 and n_samples == 1:
        m, cov = model.predict(x, full_cov=False)
        L = np.sqrt(cov)
        U = numpy_normal()
        return m + L * U

    # else general case, do things properly
    m, cov = model.predict(x, full_cov=True)
    L = jitchol(cov)
    U = numpy_normal(size=(n_points, n_samples))
    return m + L @ U
Пример #20
0
    def alpha(self):
        '''
        Function to compute alpha = k^-1 y

        Args:
            None

        Returns:
            (array) alpha of size N x 1
        '''

        # compute the kernel matrix of size N x N
        k = self.kernel('trainSet', self.theta_, self.theta_)

        # compute the Cholesky factor
        self.chol_fact = gpl.jitchol(k)

        # Use triangular method to solve for alpha
        alp = gpl.dpotrs(self.chol_fact, self.output, lower=True)[0]

        return alp
def latent_funs_cov(Z, kernel_list):
    """
    Builds the full-covariance cov[u(z),u(z)] of a Multi-output GP
    for a Sparse approximation
    :param Z: Inducing Points
    :param kernel_list: Kernels of u_q functions priors
    :return: Kuu
    """
    Q = len(kernel_list)
    M, Dz = Z.shape
    Xdim = int(Dz / Q)
    #Kuu = np.zeros([Q*M,Q*M])
    Kuu = np.empty((Q, M, M))
    Luu = np.empty((Q, M, M))
    Kuui = np.empty((Q, M, M))
    for q, kern in enumerate(kernel_list):
        Kuu[q, :, :] = kern.K(Z[:, q * Xdim:q * Xdim + Xdim],
                              Z[:, q * Xdim:q * Xdim + Xdim])
        Kuu[q, :, :] = Kuu[
            q, :, :]  #+ 1.0e-6*np.eye(*Kuu[q, :, :].shape)    #This line included by Juan for numerical stability
        Luu[q, :, :] = linalg.jitchol(Kuu[q, :, :], maxtries=10)
        Kuui[q, :, :], _ = linalg.dpotri(np.asfortranarray(Luu[q, :, :]))
    return Kuu, Luu, Kuui
Пример #22
0
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, Kuu_sigma=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """


        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        # from ..models.sslvm import Gaussian_Gamma
        # if isinstance(likelihood, Gaussian_Gamma):
        #     beta = likelihood.expectation_beta()
        #     logL_R = -num_data*likelihood.expectation_logbeta()
        # else:
        beta = 1./np.fmax(likelihood.variance, 1e-6)
        logL_R = -num_data*np.log(beta)


        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kmm, Kuu_sigma)
        else:
            diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        #LmInv = dtrtri(Lm)
        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0])/beta #tdot(psi1.dot(LmInv.T).T) /beta
            
        Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LmLL = Lm.dot(LL)
#        LLInv = dtrtri(LL)
 #       LmLLInv = LLInv.dot(LmInv)
        
        logdet_L = 2.*np.sum(np.log(np.diag(LL)))
        b  = dtrtrs(LmLL, psi1Y.T)[0].T #psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = dtrtrs(LmLL, b.T, trans=1)[0].T #b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
        
        if psi1S is not None:
            psi1SLLinv = dtrtrs(LmLL, psi1S.T)[0].T #psi1S.dot(LmLLInv.T)
            bbt += np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT += tdot(psi1SLLinv.T)
            psi1SP = dtrtrs(LmLL, psi1SLLinv.T, trans=1)[0].T #psi1SLLinv.dot(LmLLInv)
        tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim))
        dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim))/2
        #tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
        #dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
        
        #======================================================================
        # Compute log-likelihood
        #======================================================================
        
        logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0-np.trace(LmInvPsi2LmInvT))+YRY- bbt)/2.-output_dim*logdet_L/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm =  dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        LLInvLmT = dtrtrs(LL, Lm.T)[0]
        cov = tdot(LLInvLmT.T)

        wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
        post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v.T, K=Kmm, mean=None, cov=cov, K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        # if isinstance(likelihood, Gaussian_Gamma):
        #     from scipy.special import polygamma
        #     dL_dthetaL = ((YRY + output_dim*psi0)/2. - (dL_dpsi2R*psi2).sum() - np.trace(LLinvPsi1TYYTPsi1LLinvT))/-beta
        #     likelihood.q_a.gradient = num_data*output_dim/2.*polygamma(1, likelihood.q_a) + dL_dthetaL/likelihood.q_b
        #     likelihood.q_b.gradient = num_data*output_dim/(-2.*likelihood.q_b) +dL_dthetaL*(-likelihood.q_a/(likelihood.q_b*likelihood.q_b))
        # else:
        dL_dthetaL = (YRY*beta + beta*output_dim*psi0 - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data,)))/2.

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            dL_dpsi1 = beta*(np.dot(m,v)+Shalf[:,None]*psi1SP)
        else:
            dL_dpsi1 = beta*np.dot(Y,v)

        if uncertain_inputs:
            dL_dpsi2 = beta* dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1,dL_dpsi2R)*2.
            dL_dpsi2 = None
        
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}
            
        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            psi1LmiLLi = dtrtrs(LmLL, psi1.T)[0].T 
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m*beta+ psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta/-2.+ np.square(psi1LmiLLi).sum(axis=1)/2

        return post, logL, grad_dict
 def get_YYTfactor(self, Y):
     N, D = Y.shape
     if (N >= D):
         return Y.view(np.ndarray)
     else:
         return jitchol(tdot(Y))
Пример #24
0
 def get_YYTfactor(self, Y):
     N, D = Y.shape
     if (N>=D):
         return Y.view(np.ndarray)
     else:
         return jitchol(tdot(Y))
    def inference(self,
                  q_u_means,
                  q_u_chols,
                  X,
                  Y,
                  Z,
                  kern_list,
                  kern_list_Gdj,
                  kern_aux,
                  likelihood,
                  B_list,
                  Y_metadata,
                  KL_scale=1.0,
                  batch_scale=None,
                  predictive=False,
                  Gauss_Newton=False):
        M = Z.shape[0]
        T = len(Y)
        if batch_scale is None:
            batch_scale = [1.0] * T
        Ntask = []
        [Ntask.append(Y[t].shape[0]) for t in range(T)]
        Q = len(kern_list)
        D = likelihood.num_output_functions(Y_metadata)
        Kuu, Luu, Kuui = util.latent_funs_cov(Z, kern_list)
        p_U = pu(Kuu=Kuu, Luu=Luu, Kuui=Kuui)
        q_U = qu(mu_u=q_u_means.copy(), chols_u=q_u_chols.copy())
        S_u = np.empty((Q, M, M))
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Su_add_Kuu = np.zeros((Q, M, M))
        Su_add_Kuu_chol = np.zeros((Q, M, M))
        for q in range(Q):
            Su_add_Kuu[q, :, :] = S_u[q, :, :] + Kuu[q, :, :]
            Su_add_Kuu_chol[q, :, :] = linalg.jitchol(Su_add_Kuu[q, :, :])

        # for every latent function f_d calculate q(f_d) and keep it as q(F):
        q_F = []
        posteriors_F = []
        f_index = Y_metadata['function_index'].flatten()
        d_index = Y_metadata['d_index'].flatten()

        for d in range(D):
            Xtask = X[f_index[d]]
            q_fd, q_U = self.calculate_q_f(X=Xtask,
                                           Z=Z,
                                           q_U=q_U,
                                           S_u=S_u,
                                           p_U=p_U,
                                           kern_list=kern_list,
                                           kern_list_Gdj=kern_list_Gdj,
                                           kern_aux=kern_aux,
                                           B=B_list,
                                           M=M,
                                           N=Xtask.shape[0],
                                           Q=Q,
                                           D=D,
                                           d=d)
            # Posterior objects for output functions (used in prediction)
            #I have to get rid of function below Posterior for it is not necessary
            posterior_fd = Posterior(mean=q_fd.m_fd.copy(),
                                     cov=q_fd.S_fd.copy(),
                                     K=util.conv_function_covariance(
                                         X=Xtask,
                                         B=B_list,
                                         kernel_list=kern_list,
                                         kernel_list_Gdj=kern_list_Gdj,
                                         kff_aux=kern_aux,
                                         d=d),
                                     prior_mean=np.zeros(q_fd.m_fd.shape))
            posteriors_F.append(posterior_fd)
            q_F.append(q_fd)

        mu_F = []
        v_F = []
        for t in range(T):
            mu_F_task = np.empty((X[t].shape[0], 1))
            v_F_task = np.empty((X[t].shape[0], 1))
            for d, q_fd in enumerate(q_F):
                if f_index[d] == t:
                    mu_F_task = np.hstack((mu_F_task, q_fd.m_fd))
                    v_F_task = np.hstack((v_F_task, q_fd.v_fd))

            mu_F.append(mu_F_task[:, 1:])
            v_F.append(v_F_task[:, 1:])

        # posterior_Fnew for predictive
        if predictive:
            return posteriors_F
        # inference for rest of cases
        else:
            # Variational Expectations
            VE = likelihood.var_exp(Y, mu_F, v_F, Y_metadata)
            VE_dm, VE_dv = likelihood.var_exp_derivatives(
                Y, mu_F, v_F, Y_metadata, Gauss_Newton)
            for t in range(T):
                VE[t] = VE[t] * batch_scale[t]
                VE_dm[t] = VE_dm[t] * batch_scale[t]
                VE_dv[t] = VE_dv[t] * batch_scale[t]

            # KL Divergence
            KL = self.calculate_KL(q_U=q_U,
                                   Su_add_Kuu=Su_add_Kuu,
                                   Su_add_Kuu_chol=Su_add_Kuu_chol,
                                   p_U=p_U,
                                   M=M,
                                   Q=Q,
                                   D=D)

            # Log Marginal log(p(Y))
            F = 0
            for t in range(T):
                F += VE[t].sum()

            log_marginal = F - KL

            # Gradients and Posteriors
            dL_dS_u = []
            dL_dmu_u = []
            dL_dL_u = []
            dL_dKmm = []
            dL_dKmn = []
            dL_dKdiag = []
            posteriors = []
            for q in range(Q):
                (dL_dmu_q, dL_dL_q, dL_dS_q, posterior_q, dL_dKqq, dL_dKdq,
                 dL_dKdiag_q) = self.calculate_gradients(
                     q_U=q_U,
                     S_u=S_u,
                     Su_add_Kuu_chol=Su_add_Kuu_chol,
                     p_U=p_U,
                     q_F=q_F,
                     VE_dm=VE_dm,
                     VE_dv=VE_dv,
                     Ntask=Ntask,
                     M=M,
                     Q=Q,
                     D=D,
                     f_index=f_index,
                     d_index=d_index,
                     q=q)
                dL_dmu_u.append(dL_dmu_q)
                dL_dL_u.append(dL_dL_q)
                dL_dS_u.append(dL_dS_q)
                dL_dKmm.append(dL_dKqq)
                dL_dKmn.append(dL_dKdq)
                dL_dKdiag.append(dL_dKdiag_q)
                posteriors.append(posterior_q)

            gradients = {
                'dL_dmu_u': dL_dmu_u,
                'dL_dL_u': dL_dL_u,
                'dL_dS_u': dL_dS_u,
                'dL_dKmm': dL_dKmm,
                'dL_dKmn': dL_dKmn,
                'dL_dKdiag': dL_dKdiag
            }

            return log_marginal, gradients, posteriors, posteriors_F
    def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean,
                  qU_var_r, qU_var_c, indexD, output_dim):
        """
        The SVI-VarDTC inference
        """

        N, D, Mr, Mc, Qr, Qc = Y.shape[0], output_dim, Zr.shape[0], Zc.shape[
            0], Zr.shape[1], Zc.shape[1]

        uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
        uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        grad_dict = self._init_grad_dict(N, D, Mr, Mc)

        beta = 1. / likelihood.variance
        if len(beta) == 1:
            beta = np.zeros(D) + beta

        psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr,
                                                    uncertain_inputs_r)
        psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc,
                                                    uncertain_inputs_c)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu_r = kern_r.K(Zr).copy()
        diag.add(Kuu_r, self.const_jitter)
        Lr = jitchol(Kuu_r)

        Kuu_c = kern_c.K(Zc).copy()
        diag.add(Kuu_c, self.const_jitter)
        Lc = jitchol(Kuu_c)

        mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
        LSr = jitchol(Sr)
        LSc = jitchol(Sc)

        LcInvMLrInvT = dtrtrs(Lc, dtrtrs(Lr, mu.T)[0].T)[0]
        LcInvLSc = dtrtrs(Lc, LSc)[0]
        LrInvLSr = dtrtrs(Lr, LSr)[0]
        LcInvScLcInvT = tdot(LcInvLSc)
        LrInvSrLrInvT = tdot(LrInvLSr)
        tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
        tr_LcInvScLcInvT = np.square(LcInvLSc).sum()

        mid_res = {
            'psi0_r': psi0_r,
            'psi1_r': psi1_r,
            'psi2_r': psi2_r,
            'psi0_c': psi0_c,
            'psi1_c': psi1_c,
            'psi2_c': psi2_c,
            'Lr': Lr,
            'Lc': Lc,
            'LcInvMLrInvT': LcInvMLrInvT,
            'LcInvScLcInvT': LcInvScLcInvT,
            'LrInvSrLrInvT': LrInvSrLrInvT,
        }

        #======================================================================
        # Compute log-likelihood
        #======================================================================

        logL = 0.
        for d in range(D):
            logL += self.inference_d(d, beta, Y, indexD, grad_dict, mid_res,
                                     uncertain_inputs_r, uncertain_inputs_c,
                                     Mr, Mc)

        logL += -Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum())  -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
               - np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.

        #======================================================================
        # Compute dL_dKuu
        #======================================================================

        tmp = tdot(
            LcInvMLrInvT
        ) / 2. + tr_LrInvSrLrInvT / 2. * LcInvScLcInvT - Mr / 2. * np.eye(Mc)

        dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
        dL_dKuu_c += dL_dKuu_c.T
        dL_dKuu_c *= 0.5

        tmp = tdot(
            LcInvMLrInvT.T
        ) / 2. + tr_LcInvScLcInvT / 2. * LrInvSrLrInvT - Mc / 2. * np.eye(Mr)

        dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
        dL_dKuu_r += dL_dKuu_r.T
        dL_dKuu_r *= 0.5

        #======================================================================
        # Compute dL_dqU
        #======================================================================

        tmp = -LcInvMLrInvT
        dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]

        LScInv = dtrtri(LSc)
        tmp = -tr_LrInvSrLrInvT / 2. * np.eye(Mc)
        dL_dqU_var_c = backsub_both_sides(Lc, tmp,
                                          'left') + tdot(LScInv.T) * Mr / 2.

        LSrInv = dtrtri(LSr)
        tmp = -tr_LcInvScLcInvT / 2. * np.eye(Mr)
        dL_dqU_var_r = backsub_both_sides(Lr, tmp,
                                          'left') + tdot(LSrInv.T) * Mc / 2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT,
                                    LcInvScLcInvT=LcInvScLcInvT,
                                    LrInvSrLrInvT=LrInvSrLrInvT,
                                    Lr=Lr,
                                    Lc=Lc,
                                    kern_r=kern_r,
                                    Xr=Xr,
                                    Zr=Zr)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        grad_dict['dL_dqU_mean'] += dL_dqU_mean
        grad_dict['dL_dqU_var_c'] += dL_dqU_var_c
        grad_dict['dL_dqU_var_r'] += dL_dqU_var_r
        grad_dict['dL_dKuu_c'] += dL_dKuu_c
        grad_dict['dL_dKuu_r'] += dL_dKuu_r

        if not uncertain_inputs_c:
            grad_dict['dL_dKdiag_c'] = grad_dict['dL_dpsi0_c']
            grad_dict['dL_dKfu_c'] = grad_dict['dL_dpsi1_c']

        if not uncertain_inputs_r:
            grad_dict['dL_dKdiag_r'] = grad_dict['dL_dpsi0_r']
            grad_dict['dL_dKfu_r'] = grad_dict['dL_dpsi1_r']

        return post, logL, grad_dict
Пример #27
0
    def inference(self, kern, X, Z, likelihood, Y, qU):
        """
        The SVI-VarDTC inference
        """

        if isinstance(Y, np.ndarray) and np.any(np.isnan(Y)):
            missing_data = True
            N, M, Q = Y.shape[0], Z.shape[0], Z.shape[1]
            Ds = Y.shape[1] - (np.isnan(Y)*1).sum(1)
            Ymask = 1-np.isnan(Y)*1
            Y_masked = np.zeros_like(Y)
            Y_masked[Ymask==1] = Y[Ymask==1]
            ND = Ymask.sum()
        else:
            missing_data = False
            N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]
            ND = N*D

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(kern, X, Z, Y if not missing_data else Y_masked, beta, uncertain_inputs, D if not missing_data else Ds, missing_data)
        
        #======================================================================
        # Compute Common Components
        #======================================================================
        
        mu, S = qU.mean, qU.covariance
        mupsi1Y = mu.dot(psi1Y)

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)
        
        if missing_data:
            S_mu = S[None,:,:]+mu.T[:,:,None]*mu.T[:,None,:]
            NS_mu = S_mu.T.dot(Ymask.T).T
            LmInv = dtrtri(Lm)
            
            LmInvPsi2LmInvT = np.swapaxes(psi2.dot(LmInv.T),1,2).dot(LmInv.T)            
            LmInvSmuLmInvT =  np.swapaxes(NS_mu.dot(LmInv.T),1,2).dot(LmInv.T)
            
            B = mupsi1Y+ mupsi1Y.T +(Ds[:,None,None]*psi2).sum(0)
            tmp = backsub_both_sides(Lm, B,'right')
            
            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.
        else:
            S_mu = S*D+tdot(mu)
            if uncertain_inputs:
                LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
            else:
                LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0])/beta #tdot(psi1.dot(LmInv.T).T) /beta        
            LmInvSmuLmInvT = backsub_both_sides(Lm, S_mu, 'right')
            
            B = mupsi1Y+ mupsi1Y.T +D*psi2
            tmp = backsub_both_sides(Lm, B,'right')
            
            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = np.eye(M)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = None #(YRY*beta + beta*output_dim*psi0 - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if missing_data:
            dL_dpsi0 = -Ds * (beta * np.ones((N,)))/2.
        else:
            dL_dpsi0 = -D * (beta * np.ones((N,)))/2.

        if uncertain_outputs:
            Ym,Ys = Y.mean, Y.variance
            dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm, Ym.dot(mu.T).T)[0], trans=1)[0].T*beta
        else:
            if missing_data:
                dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm, (Y_masked).dot(mu.T).T)[0], trans=1)[0].T*beta
            else:
                dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm, Y.dot(mu.T).T)[0], trans=1)[0].T*beta

        if uncertain_inputs:
            if missing_data:
                dL_dpsi2 = np.swapaxes((Ds[:,None,None]*np.eye(M)[None,:,:]-LmInvSmuLmInvT).dot(LmInv),1,2).dot(LmInv)*beta/2.
            else:
                dL_dpsi2 = beta*backsub_both_sides(Lm, D*np.eye(M)-LmInvSmuLmInvT, 'left')/2.
        else:
            dL_dpsi1 += beta*psi1.dot(dL_dpsi2+dL_dpsi2.T) 
            dL_dpsi2 = None
            
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        if uncertain_outputs:
            Ym = Y.mean
            grad_dict['dL_dYmean'] = -Ym*beta+ dtrtrs(Lm,psi1.T)[0].T.dot(dtrtrs(Lm,mu)[0])
            grad_dict['dL_dYvar'] = beta/-2.

        return logL, grad_dict
Пример #28
0
    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  Y_metadata=None,
                  Lm=None,
                  dL_dKmm=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        #LmInv = dtrtri(Lm)
        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(
                Lm, psi1.T)[0]) / beta  #tdot(psi1.dot(LmInv.T).T) /beta

        Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LmLL = Lm.dot(LL)
        #        LLInv = dtrtri(LL)
        #       LmLLInv = LLInv.dot(LmInv)

        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        b = dtrtrs(LmLL, psi1Y.T)[0].T  #psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = dtrtrs(LmLL, b.T, trans=1)[0].T  #b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)

        if psi1S is not None:
            psi1SLLinv = dtrtrs(LmLL, psi1S.T)[0].T  #psi1S.dot(LmLLInv.T)
            bbt += np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT += tdot(psi1SLLinv.T)
            psi1SP = dtrtrs(LmLL, psi1SLLinv.T,
                            trans=1)[0].T  #psi1SLLinv.dot(LmLLInv)
        tmp = -backsub_both_sides(
            LL, LLinvPsi1TYYTPsi1LLinvT + output_dim * np.eye(input_dim))
        dL_dpsi2R = backsub_both_sides(
            Lm, tmp + output_dim * np.eye(input_dim)) / 2
        #tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
        #dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        logL_R = -num_data * np.log(beta)
        logL = -(
            output_dim *
            (num_data * log_2_pi + logL_R + psi0 - np.trace(LmInvPsi2LmInvT)) +
            YRY - bbt) / 2. - output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = dL_dpsi2R - output_dim * backsub_both_sides(
            Lm,
            LmInvPsi2LmInvT) / 2  #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        wd_inv = backsub_both_sides(
            Lm,
            np.eye(input_dim) -
            backsub_both_sides(LL, np.identity(input_dim), transpose='left'),
            transpose='left')
        post = Posterior(woodbury_inv=wd_inv,
                         woodbury_vector=v.T,
                         K=Kmm,
                         mean=None,
                         cov=None,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = (YRY * beta + beta * output_dim * psi0 - num_data *
                      output_dim * beta) / 2. - beta * (dL_dpsi2R * psi2).sum(
                      ) - beta * np.trace(LLinvPsi1TYYTPsi1LLinvT)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data, ))) / 2.

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            dL_dpsi1 = beta * (np.dot(m, v) + Shalf[:, None] * psi1SP)
        else:
            dL_dpsi1 = beta * np.dot(Y, v)

        if uncertain_inputs:
            dL_dpsi2 = beta * dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1, dL_dpsi2R) * 2.
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            psi1LmiLLi = dtrtrs(LmLL, psi1.T)[0].T  #psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m * beta + psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta / -2. + np.square(psi1LmiLLi).sum(
                axis=1) / 2

        return post, logL, grad_dict
Пример #29
0
    def inference(self, kern_r, kern_c, Xr, Xc, Zr, Zc, likelihood, Y, qU_mean,
                  qU_var_r, qU_var_c):
        """
        The SVI-VarDTC inference
        """

        N, D, Mr, Mc, Qr, Qc = Y.shape[0], Y.shape[1], Zr.shape[0], Zc.shape[
            0], Zr.shape[1], Zc.shape[1]

        uncertain_inputs_r = isinstance(Xr, VariationalPosterior)
        uncertain_inputs_c = isinstance(Xc, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / likelihood.variance

        psi0_r, psi1_r, psi2_r = self.gatherPsiStat(kern_r, Xr, Zr,
                                                    uncertain_inputs_r)
        psi0_c, psi1_c, psi2_c = self.gatherPsiStat(kern_c, Xc, Zc,
                                                    uncertain_inputs_c)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu_r = kern_r.K(Zr).copy()
        diag.add(Kuu_r, self.const_jitter)
        Lr = jitchol(Kuu_r)

        Kuu_c = kern_c.K(Zc).copy()
        diag.add(Kuu_c, self.const_jitter)
        Lc = jitchol(Kuu_c)

        mu, Sr, Sc = qU_mean, qU_var_r, qU_var_c
        LSr = jitchol(Sr)
        LSc = jitchol(Sc)

        LcInvMLrInvT = dtrtrs(Lc, dtrtrs(Lr, mu.T)[0].T)[0]
        LcInvPsi2_cLcInvT = backsub_both_sides(Lc, psi2_c, 'right')
        LrInvPsi2_rLrInvT = backsub_both_sides(Lr, psi2_r, 'right')
        LcInvLSc = dtrtrs(Lc, LSc)[0]
        LrInvLSr = dtrtrs(Lr, LSr)[0]
        LcInvScLcInvT = tdot(LcInvLSc)
        LrInvSrLrInvT = tdot(LrInvLSr)
        LcInvPsi1_cT = dtrtrs(Lc, psi1_c.T)[0]
        LrInvPsi1_rT = dtrtrs(Lr, psi1_r.T)[0]

        tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT = (LrInvPsi2_rLrInvT *
                                              LrInvSrLrInvT).sum()
        tr_LcInvPsi2_cLcInvT_LcInvScLcInvT = (LcInvPsi2_cLcInvT *
                                              LcInvScLcInvT).sum()
        tr_LrInvSrLrInvT = np.square(LrInvLSr).sum()
        tr_LcInvScLcInvT = np.square(LcInvLSc).sum()
        tr_LrInvPsi2_rLrInvT = np.trace(LrInvPsi2_rLrInvT)
        tr_LcInvPsi2_cLcInvT = np.trace(LcInvPsi2_cLcInvT)

        #======================================================================
        # Compute log-likelihood
        #======================================================================

        logL_A = - np.square(Y).sum() \
               - (LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT)*LrInvPsi2_rLrInvT).sum() \
               -  tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT \
               + 2 * (Y * LcInvPsi1_cT.T.dot(LcInvMLrInvT).dot(LrInvPsi1_rT)).sum() - psi0_c * psi0_r \
               + tr_LrInvPsi2_rLrInvT * tr_LcInvPsi2_cLcInvT

        logL = -N*D/2.*(np.log(2.*np.pi)-np.log(beta)) + beta/2.* logL_A \
               -Mc * (np.log(np.diag(Lr)).sum()-np.log(np.diag(LSr)).sum())  -Mr * (np.log(np.diag(Lc)).sum()-np.log(np.diag(LSc)).sum()) \
               - np.square(LcInvMLrInvT).sum()/2. - tr_LrInvSrLrInvT * tr_LcInvScLcInvT/2. + Mr*Mc/2.

        #======================================================================
        # Compute dL_dKuu
        #======================================================================

        tmp =  beta* LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) \
             + beta* tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT.dot(LcInvScLcInvT) \
             - beta* LcInvMLrInvT.dot(LrInvPsi1_rT).dot(Y.T).dot(LcInvPsi1_cT.T) \
             - beta/2. * tr_LrInvPsi2_rLrInvT* LcInvPsi2_cLcInvT - Mr/2.*np.eye(Mc) \
             + tdot(LcInvMLrInvT)/2. + tr_LrInvSrLrInvT/2. * LcInvScLcInvT

        dL_dKuu_c = backsub_both_sides(Lc, tmp, 'left')
        dL_dKuu_c += dL_dKuu_c.T
        dL_dKuu_c *= 0.5

        tmp =  beta* LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT) \
             + beta* tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT.dot(LrInvSrLrInvT) \
             - beta* LrInvPsi1_rT.dot(Y.T).dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT) \
             - beta/2. * tr_LcInvPsi2_cLcInvT * LrInvPsi2_rLrInvT - Mc/2.*np.eye(Mr) \
             + tdot(LcInvMLrInvT.T)/2. + tr_LcInvScLcInvT/2. * LrInvSrLrInvT

        dL_dKuu_r = backsub_both_sides(Lr, tmp, 'left')
        dL_dKuu_r += dL_dKuu_r.T
        dL_dKuu_r *= 0.5

        #======================================================================
        # Compute dL_dthetaL
        #======================================================================

        dL_dthetaL = -D * N * beta / 2. - logL_A * beta * beta / 2.

        #======================================================================
        # Compute dL_dqU
        #======================================================================

        tmp = -beta * LcInvPsi2_cLcInvT.dot(LcInvMLrInvT).dot(LrInvPsi2_rLrInvT)\
              + beta* LcInvPsi1_cT.dot(Y).dot(LrInvPsi1_rT.T) - LcInvMLrInvT

        dL_dqU_mean = dtrtrs(Lc, dtrtrs(Lr, tmp.T, trans=1)[0].T, trans=1)[0]

        LScInv = dtrtri(LSc)
        tmp = -beta / 2. * tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvPsi2_cLcInvT - tr_LrInvSrLrInvT / 2. * np.eye(
            Mc)
        dL_dqU_var_c = backsub_both_sides(Lc, tmp,
                                          'left') + tdot(LScInv.T) * Mr / 2.

        LSrInv = dtrtri(LSr)
        tmp = -beta / 2. * tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvPsi2_rLrInvT - tr_LcInvScLcInvT / 2. * np.eye(
            Mr)
        dL_dqU_var_r = backsub_both_sides(Lr, tmp,
                                          'left') + tdot(LSrInv.T) * Mc / 2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        post = PosteriorMultioutput(LcInvMLrInvT=LcInvMLrInvT,
                                    LcInvScLcInvT=LcInvScLcInvT,
                                    LrInvSrLrInvT=LrInvSrLrInvT,
                                    Lr=Lr,
                                    Lc=Lc,
                                    kern_r=kern_r,
                                    Xr=Xr,
                                    Zr=Zr)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0_r = -psi0_c * beta / 2. * np.ones((D, ))
        dL_dpsi0_c = -psi0_r * beta / 2. * np.ones((N, ))

        dL_dpsi1_c = beta * dtrtrs(
            Lc, (Y.dot(LrInvPsi1_rT.T).dot(LcInvMLrInvT.T)).T, trans=1)[0].T
        dL_dpsi1_r = beta * dtrtrs(
            Lr, (Y.T.dot(LcInvPsi1_cT.T).dot(LcInvMLrInvT)).T, trans=1)[0].T

        tmp = beta / 2. * (
            -LcInvMLrInvT.dot(LrInvPsi2_rLrInvT).dot(LcInvMLrInvT.T) -
            tr_LrInvPsi2_rLrInvT_LrInvSrLrInvT * LcInvScLcInvT +
            tr_LrInvPsi2_rLrInvT * np.eye(Mc))
        dL_dpsi2_c = backsub_both_sides(Lc, tmp, 'left')
        tmp = beta / 2. * (
            -LcInvMLrInvT.T.dot(LcInvPsi2_cLcInvT).dot(LcInvMLrInvT) -
            tr_LcInvPsi2_cLcInvT_LcInvScLcInvT * LrInvSrLrInvT +
            tr_LcInvPsi2_cLcInvT * np.eye(Mr))
        dL_dpsi2_r = backsub_both_sides(Lr, tmp, 'left')

        if not uncertain_inputs_r:
            dL_dpsi1_r += psi1_r.dot(dL_dpsi2_r + dL_dpsi2_r.T)
        if not uncertain_inputs_c:
            dL_dpsi1_c += psi1_c.dot(dL_dpsi2_c + dL_dpsi2_c.T)

        grad_dict = {
            'dL_dthetaL': dL_dthetaL,
            'dL_dqU_mean': dL_dqU_mean,
            'dL_dqU_var_c': dL_dqU_var_c,
            'dL_dqU_var_r': dL_dqU_var_r,
            'dL_dKuu_c': dL_dKuu_c,
            'dL_dKuu_r': dL_dKuu_r,
        }

        if uncertain_inputs_c:
            grad_dict['dL_dpsi0_c'] = dL_dpsi0_c
            grad_dict['dL_dpsi1_c'] = dL_dpsi1_c
            grad_dict['dL_dpsi2_c'] = dL_dpsi2_c
        else:
            grad_dict['dL_dKdiag_c'] = dL_dpsi0_c
            grad_dict['dL_dKfu_c'] = dL_dpsi1_c

        if uncertain_inputs_r:
            grad_dict['dL_dpsi0_r'] = dL_dpsi0_r
            grad_dict['dL_dpsi1_r'] = dL_dpsi1_r
            grad_dict['dL_dpsi2_r'] = dL_dpsi2_r
        else:
            grad_dict['dL_dKdiag_r'] = dL_dpsi0_r
            grad_dict['dL_dKfu_r'] = dL_dpsi1_r

        return post, logL, grad_dict
Пример #30
0
    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  indexD,
                  output_dim,
                  Y_metadata=None,
                  Lm=None,
                  dL_dKmm=None,
                  Kuu_sigma=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        input_dim = Z.shape[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)

        beta = 1. / likelihood.variance
        if len(beta) == 1:
            beta = np.zeros(output_dim) + beta

        beta_exp = np.zeros(indexD.shape[0])
        for d in range(output_dim):
            beta_exp[indexD == d] = beta[d]

        psi0, psi1, psi2 = self.gatherPsiStat(kern, X, Z, Y, beta,
                                              uncertain_inputs)

        psi2_sum = (beta_exp[:, None, None] * psi2).sum(0) / output_dim

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kmm, Kuu_sigma)
        else:
            diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        logL = 0.
        dL_dthetaL = np.zeros(output_dim)
        dL_dKmm = np.zeros_like(Kmm)
        dL_dpsi0 = np.zeros_like(psi0)
        dL_dpsi1 = np.zeros_like(psi1)
        dL_dpsi2 = np.zeros_like(psi2)
        wv = np.empty((Kmm.shape[0], output_dim))

        for d in range(output_dim):
            idx_d = indexD == d
            Y_d = Y[idx_d]
            N_d = Y_d.shape[0]
            beta_d = beta[d]

            psi2_d = psi2[idx_d].sum(0) * beta_d
            psi1Y = Y_d.T.dot(psi1[idx_d]) * beta_d
            psi0_d = psi0[idx_d].sum() * beta_d
            YRY_d = np.square(Y_d).sum() * beta_d

            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_d, 'right')

            Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
            LL = jitchol(Lambda)
            LmLL = Lm.dot(LL)

            b = dtrtrs(LmLL, psi1Y.T)[0].T
            bbt = np.square(b).sum()
            v = dtrtrs(LmLL, b.T, trans=1)[0].T
            LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)

            tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT)
            dL_dpsi2R = backsub_both_sides(Lm, tmp + np.eye(input_dim)) / 2

            logL_R = -N_d * np.log(beta_d)
            logL += -((N_d * log_2_pi + logL_R + psi0_d -
                       np.trace(LmInvPsi2LmInvT)) + YRY_d - bbt) / 2.

            dL_dKmm += dL_dpsi2R - backsub_both_sides(Lm, LmInvPsi2LmInvT) / 2

            dL_dthetaL[d:d +
                       1] = (YRY_d * beta_d + beta_d * psi0_d - N_d *
                             beta_d) / 2. - beta_d * (dL_dpsi2R * psi2_d).sum(
                             ) - beta_d * np.trace(LLinvPsi1TYYTPsi1LLinvT)

            dL_dpsi0[idx_d] = -beta_d / 2.
            dL_dpsi1[idx_d] = beta_d * np.dot(Y_d, v)
            dL_dpsi2[idx_d] = beta_d * dL_dpsi2R
            wv[:, d] = v

        LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_sum, 'right')

        Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LmLL = Lm.dot(LL)
        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        dL_dpsi2R_common = dpotri(LmLL)[0] / -2.
        dL_dpsi2 += dL_dpsi2R_common[None, :, :] * beta_exp[:, None, None]

        for d in range(output_dim):
            dL_dthetaL[d] += (dL_dpsi2R_common * psi2[indexD == d].sum(0)
                              ).sum() * -beta[d] * beta[d]

        dL_dKmm += dL_dpsi2R_common * output_dim

        logL += -output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        # dL_dKmm =  dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        LLInvLmT = dtrtrs(LL, Lm.T)[0]
        cov = tdot(LLInvLmT.T)

        wd_inv = backsub_both_sides(
            Lm,
            np.eye(input_dim) -
            backsub_both_sides(LL, np.identity(input_dim), transpose='left'),
            transpose='left')
        post = Posterior(woodbury_inv=wd_inv,
                         woodbury_vector=wv,
                         K=Kmm,
                         mean=None,
                         cov=cov,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        # for d in range(output_dim):
        #     dL_dthetaL[d:d+1] += - beta[d]*beta[d]*(dL_dpsi2R[None,:,:] * psi2[indexD==d]/output_dim).sum()
        # dL_dthetaL += - (dL_dpsi2R[None,:,:] * psi2_sum*D beta*(dL_dpsi2R*psi2).sum()

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if not uncertain_inputs:
            dL_dpsi1 += (psi1[:, None, :] * dL_dpsi2).sum(2) * 2.

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        return post, logL, grad_dict
Пример #31
0
    def inference(self, kern, X, Z, likelihood, Y, qU):
        """
        The SVI-VarDTC inference
        """

        if isinstance(Y, np.ndarray) and np.any(np.isnan(Y)):
            missing_data = True
            N, M, Q = Y.shape[0], Z.shape[0], Z.shape[1]
            Ds = Y.shape[1] - (np.isnan(Y) * 1).sum(1)
            Ymask = 1 - np.isnan(Y) * 1
            Y_masked = np.zeros_like(Y)
            Y_masked[Ymask == 1] = Y[Ymask == 1]
            ND = Ymask.sum()
        else:
            missing_data = False
            N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]
            ND = N * D

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(
            kern, X, Z, Y if not missing_data else Y_masked, beta,
            uncertain_inputs, D if not missing_data else Ds, missing_data)

        #======================================================================
        # Compute Common Components
        #======================================================================

        mu, S = qU.mean, qU.covariance
        mupsi1Y = mu.dot(psi1Y)

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        if missing_data:
            S_mu = S[None, :, :] + mu.T[:, :, None] * mu.T[:, None, :]
            NS_mu = S_mu.T.dot(Ymask.T).T
            LmInv = dtrtri(Lm)

            LmInvPsi2LmInvT = np.swapaxes(psi2.dot(LmInv.T), 1, 2).dot(LmInv.T)
            LmInvSmuLmInvT = np.swapaxes(NS_mu.dot(LmInv.T), 1, 2).dot(LmInv.T)

            B = mupsi1Y + mupsi1Y.T + (Ds[:, None, None] * psi2).sum(0)
            tmp = backsub_both_sides(Lm, B, 'right')

            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.
        else:
            S_mu = S * D + tdot(mu)
            if uncertain_inputs:
                LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
            else:
                LmInvPsi2LmInvT = tdot(dtrtrs(
                    Lm, psi1.T)[0]) / beta  #tdot(psi1.dot(LmInv.T).T) /beta
            LmInvSmuLmInvT = backsub_both_sides(Lm, S_mu, 'right')

            B = mupsi1Y + mupsi1Y.T + D * psi2
            tmp = backsub_both_sides(Lm, B, 'right')

            logL =  -ND*log_2_pi/2. +ND*np.log(beta)/2. - psi0/2. - YRY/2.  \
                       -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. +np.trace(tmp)/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm = np.eye(M)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = None  #(YRY*beta + beta*output_dim*psi0 - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if missing_data:
            dL_dpsi0 = -Ds * (beta * np.ones((N, ))) / 2.
        else:
            dL_dpsi0 = -D * (beta * np.ones((N, ))) / 2.

        if uncertain_outputs:
            Ym, Ys = Y.mean, Y.variance
            dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm,
                                         Ym.dot(mu.T).T)[0],
                              trans=1)[0].T * beta
        else:
            if missing_data:
                dL_dpsi1 = dtrtrs(
                    Lm, dtrtrs(Lm,
                               (Y_masked).dot(mu.T).T)[0], trans=1)[0].T * beta
            else:
                dL_dpsi1 = dtrtrs(Lm, dtrtrs(Lm,
                                             Y.dot(mu.T).T)[0],
                                  trans=1)[0].T * beta

        if uncertain_inputs:
            if missing_data:
                dL_dpsi2 = np.swapaxes(
                    (Ds[:, None, None] * np.eye(M)[None, :, :] -
                     LmInvSmuLmInvT).dot(LmInv), 1, 2).dot(LmInv) * beta / 2.
            else:
                dL_dpsi2 = beta * backsub_both_sides(
                    Lm,
                    D * np.eye(M) - LmInvSmuLmInvT, 'left') / 2.
        else:
            dL_dpsi1 += beta * psi1.dot(dL_dpsi2 + dL_dpsi2.T)
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        if uncertain_outputs:
            Ym = Y.mean
            grad_dict['dL_dYmean'] = -Ym * beta + dtrtrs(Lm, psi1.T)[0].T.dot(
                dtrtrs(Lm, mu)[0])
            grad_dict['dL_dYvar'] = beta / -2.

        return logL, grad_dict
Пример #32
0
    def inference(self, kern, X, Z, likelihood, Y, qU_mean ,qU_var, Kuu_sigma=None):
        """
        The SVI-VarDTC inference
        """

        N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./likelihood.variance

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
        
        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kuu, Kuu_sigma)
        else:
            diag.add(Kuu, self.const_jitter)
        Lm = jitchol(Kuu)
        
        mu, S = qU_mean, qU_var
        Ls = jitchol(S)
        LinvLs = dtrtrs(Lm, Ls)[0]
        Linvmu = dtrtrs(Lm, mu)[0]
        psi1YLinvT = dtrtrs(Lm,psi1Y.T)[0].T
        
        self.mid = {
                    'qU_L': Ls,
                    'LinvLu': LinvLs,
                    'L':Lm,
                    'Linvmu': Linvmu}
        
        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0])/beta 
        
        LmInvSmuLmInvT = tdot(LinvLs)*D+tdot(Linvmu)
        
#         logdet_L = np.sum(np.log(np.diag(Lm)))
#         logdet_S = np.sum(np.log(np.diag(Ls)))
        
        #======================================================================
        # Compute log-likelihood
        #======================================================================
        
        logL_R = -N*np.log(beta)
        logL = -N*D*log_2_pi/2. -D*logL_R/2. - D*psi0/2. - YRY/2.  \
                     -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. + np.trace(LmInvPsi2LmInvT)*D/2.+(Linvmu*psi1YLinvT.T).sum()
                
        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        tmp1 = backsub_both_sides(Lm,LmInvSmuLmInvT.dot(LmInvPsi2LmInvT), 'left')
        tmp2 = Linvmu.dot(psi1YLinvT)
        tmp3 = backsub_both_sides(Lm,  - D*LmInvPsi2LmInvT  -tmp2-tmp2.T, 'left')/2.

        dL_dKmm = (tmp1+tmp1.T)/2. + tmp3

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = -D*N*beta/2. -(- D*psi0/2. - YRY/2.-(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. + np.trace(LmInvPsi2LmInvT)*D/2.+(Linvmu*psi1YLinvT.T).sum())*beta
        
        #======================================================================
        # Compute dL_dqU
        #======================================================================
        
        tmp1 = backsub_both_sides(Lm, - LmInvPsi2LmInvT, 'left')
        dL_dqU_mean = tmp1.dot(mu) + dtrtrs(Lm, psi1YLinvT.T,trans=1)[0]
        dL_dqU_var = D/2.*tmp1
        
        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        KuuInvmu = dtrtrs(Lm, Linvmu, trans=1)[0]
        tmp = backsub_both_sides(Lm,  np.eye(M) - tdot(LinvLs), 'left')

        post = Posterior(woodbury_inv=tmp, woodbury_vector=KuuInvmu, K=Kuu, mean=mu, cov=S, K_chol=Lm)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -D * (beta * np.ones((N,)))/2.

        if uncertain_outputs:
            dL_dpsi1 = Y.mean.dot(dtrtrs(Lm,Linvmu,trans=1)[0].T)*beta
        else:
            dL_dpsi1 = Y.dot(dtrtrs(Lm,Linvmu,trans=1)[0].T)*beta

        dL_dpsi2 = beta*backsub_both_sides(Lm, D*np.eye(M)-LmInvSmuLmInvT, 'left')/2.
        if not uncertain_inputs:
            dL_dpsi1 += psi1.dot(dL_dpsi2+dL_dpsi2.T)/beta
            dL_dpsi2 = None
            
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL,
                         'dL_dqU_mean':dL_dqU_mean,
                         'dL_dqU_var':dL_dqU_var}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL,
                         'dL_dqU_mean':dL_dqU_mean,
                         'dL_dqU_var':dL_dqU_var}

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            grad_dict['dL_dYmean'] = -m*beta+ dtrtrs(Lm,psi1.T)[0].T.dot(dtrtrs(Lm,mu)[0])
            grad_dict['dL_dYvar'] = beta/-2.

        return post, logL, grad_dict
Пример #33
0
    def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
        assert mean_function is None, "inference with a mean function not implemented"

        num_inducing, _ = Z.shape
        num_data, output_dim = Y.shape

        #make sure the noise is not hetero
        sigma_n = likelihood.gaussian_variance(Y_metadata)
        if sigma_n.size >1:
            raise NotImplementedError("no hetero noise with this implementation of PEP")

        Kmm = kern.K(Z)
        Knn = kern.Kdiag(X)
        Knm = kern.K(X, Z)
        U = Knm

        #factor Kmm
        diag.add(Kmm, self.const_jitter)
        Kmmi, L, Li, _ = pdinv(Kmm)

        #compute beta_star, the effective noise precision
        LiUT = np.dot(Li, U.T)
        sigma_star = sigma_n + self.alpha * (Knn - np.sum(np.square(LiUT),0))
        beta_star = 1./sigma_star

        # Compute and factor A
        A = tdot(LiUT*np.sqrt(beta_star)) + np.eye(num_inducing)
        LA = jitchol(A)

        # back substitute to get b, P, v
        URiy = np.dot(U.T*beta_star,Y)
        tmp, _ = dtrtrs(L, URiy, lower=1)
        b, _ = dtrtrs(LA, tmp, lower=1)
        tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
        v, _ = dtrtrs(L, tmp, lower=1, trans=1)
        tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
        P = tdot(tmp.T)

        alpha_const_term = (1.0-self.alpha) / self.alpha

        #compute log marginal
        log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
                       -np.sum(np.log(np.diag(LA)))*output_dim + \
                       0.5*output_dim*(1+alpha_const_term)*np.sum(np.log(beta_star)) + \
                       -0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \
                       0.5*np.sum(np.square(b)) + 0.5*alpha_const_term*num_data*np.log(sigma_n)
        #compute dL_dR
        Uv = np.dot(U, v)
        dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - (1.0+alpha_const_term)/beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) \
            + np.sum(np.square(Uv), 1))*beta_star**2 

        # Compute dL_dKmm
        vvT_P = tdot(v.reshape(-1,1)) + P
        dL_dK = 0.5*(Kmmi - vvT_P)
        KiU = np.dot(Kmmi, U.T)
        dL_dK += self.alpha * np.dot(KiU*dL_dR, KiU.T)

        # Compute dL_dU
        vY = np.dot(v.reshape(-1,1),Y.T)
        dL_dU = vY - np.dot(vvT_P, U.T)
        dL_dU *= beta_star
        dL_dU -= self.alpha * 2.*KiU*dL_dR

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
        dL_dthetaL += 0.5*alpha_const_term*num_data / sigma_n
        grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR * self.alpha, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}

        #construct a posterior object
        post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

        return post, log_marginal, grad_dict
Пример #34
0
    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  qU_mean,
                  qU_var,
                  Kuu_sigma=None):
        """
        The SVI-VarDTC inference
        """

        N, D, M, Q = Y.shape[0], Y.shape[1], Z.shape[0], Z.shape[1]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1. / likelihood.variance

        psi0, psi2, YRY, psi1, psi1Y = self.gatherPsiStat(
            kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kuu = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kuu, Kuu_sigma)
        else:
            diag.add(Kuu, self.const_jitter)
        Lm = jitchol(Kuu)

        mu, S = qU_mean, qU_var
        Ls = jitchol(S)
        LinvLs = dtrtrs(Lm, Ls)[0]
        Linvmu = dtrtrs(Lm, mu)[0]
        psi1YLinvT = dtrtrs(Lm, psi1Y.T)[0].T

        self.mid = {'qU_L': Ls, 'LinvLu': LinvLs, 'L': Lm, 'Linvmu': Linvmu}

        if uncertain_inputs:
            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2, 'right')
        else:
            LmInvPsi2LmInvT = tdot(dtrtrs(Lm, psi1.T)[0]) / beta

        LmInvSmuLmInvT = tdot(LinvLs) * D + tdot(Linvmu)

        #         logdet_L = np.sum(np.log(np.diag(Lm)))
        #         logdet_S = np.sum(np.log(np.diag(Ls)))

        #======================================================================
        # Compute log-likelihood
        #======================================================================

        logL_R = -N * np.log(beta)
        logL = -N*D*log_2_pi/2. -D*logL_R/2. - D*psi0/2. - YRY/2.  \
                     -(LmInvSmuLmInvT*LmInvPsi2LmInvT).sum()/2. + np.trace(LmInvPsi2LmInvT)*D/2.+(Linvmu*psi1YLinvT.T).sum()

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        tmp1 = backsub_both_sides(Lm, LmInvSmuLmInvT.dot(LmInvPsi2LmInvT),
                                  'left')
        tmp2 = Linvmu.dot(psi1YLinvT)
        tmp3 = backsub_both_sides(Lm, -D * LmInvPsi2LmInvT - tmp2 - tmp2.T,
                                  'left') / 2.

        dL_dKmm = (tmp1 + tmp1.T) / 2. + tmp3

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = -D * N * beta / 2. - (
            -D * psi0 / 2. - YRY / 2. -
            (LmInvSmuLmInvT * LmInvPsi2LmInvT).sum() / 2. +
            np.trace(LmInvPsi2LmInvT) * D / 2. +
            (Linvmu * psi1YLinvT.T).sum()) * beta

        #======================================================================
        # Compute dL_dqU
        #======================================================================

        tmp1 = backsub_both_sides(Lm, -LmInvPsi2LmInvT, 'left')
        dL_dqU_mean = tmp1.dot(mu) + dtrtrs(Lm, psi1YLinvT.T, trans=1)[0]
        dL_dqU_var = D / 2. * tmp1

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        KuuInvmu = dtrtrs(Lm, Linvmu, trans=1)[0]
        tmp = backsub_both_sides(Lm, np.eye(M) - tdot(LinvLs), 'left')

        post = Posterior(woodbury_inv=tmp,
                         woodbury_vector=KuuInvmu,
                         K=Kuu,
                         mean=mu,
                         cov=S,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -D * (beta * np.ones((N, ))) / 2.

        if uncertain_outputs:
            dL_dpsi1 = Y.mean.dot(dtrtrs(Lm, Linvmu, trans=1)[0].T) * beta
        else:
            dL_dpsi1 = Y.dot(dtrtrs(Lm, Linvmu, trans=1)[0].T) * beta

        dL_dpsi2 = beta * backsub_both_sides(Lm,
                                             D * np.eye(M) - LmInvSmuLmInvT,
                                             'left') / 2.
        if not uncertain_inputs:
            dL_dpsi1 += psi1.dot(dL_dpsi2 + dL_dpsi2.T) / beta
            dL_dpsi2 = None

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL,
                'dL_dqU_mean': dL_dqU_mean,
                'dL_dqU_var': dL_dqU_var
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL,
                'dL_dqU_mean': dL_dqU_mean,
                'dL_dqU_var': dL_dqU_var
            }

        if uncertain_outputs:
            m, s = Y.mean, Y.variance
            grad_dict['dL_dYmean'] = -m * beta + dtrtrs(Lm, psi1.T)[0].T.dot(
                dtrtrs(Lm, mu)[0])
            grad_dict['dL_dYvar'] = beta / -2.

        return post, logL, grad_dict
Пример #35
0
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, fixed_covs_kerns=None, **kw):

        _, output_dim = Y.shape
        uncertain_inputs = isinstance(X, VariationalPosterior)

        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = self.get_YYTfactor(Y)
        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
        het_noise = beta.size > 1

        if het_noise:
            raise(NotImplementedError("Heteroscedastic noise not implemented, should be possible though, feel free to try implementing it :)"))

        if beta.ndim == 1:
            beta = beta[:, None]


        # do the inference:
        num_inducing = Z.shape[0]
        num_data = Y.shape[0]
        # kernel computations, using BGPLVM notation

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        if Lm is None:
            Lm = jitchol(Kmm)

        # The rather complex computations of A, and the psi stats
        if uncertain_inputs:
            psi0 = kern.psi0(Z, X)
            psi1 = kern.psi1(Z, X)
            if het_noise:
                psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
            else:
                psi2_beta = kern.psi2(Z,X) * beta
            LmInv = dtrtri(Lm)
            A = LmInv.dot(psi2_beta.dot(LmInv.T))
        else:
            psi0 = kern.Kdiag(X)
            psi1 = kern.K(X, Z)
            if het_noise:
                tmp = psi1 * (np.sqrt(beta))
            else:
                tmp = psi1 * (np.sqrt(beta))
            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp)

        # factor B
        B = np.eye(num_inducing) + A
        LB = jitchol(B)
        # back substutue C into psi1Vf
        #tmp, _ = dtrtrs(Lm, psi1.T.dot(VVT_factor), lower=1, trans=0)
        #_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        #tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        #Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # data fit and derivative of L w.r.t. Kmm
        #delit = tdot(_LBi_Lmi_psi1Vf)

        # Expose YYT to get additional covariates in (YYT + Kgg):
        tmp, _ = dtrtrs(Lm, psi1.T, lower=1, trans=0)
        _LBi_Lmi_psi1, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1, lower=1, trans=1)
        Cpsi1, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # TODO: cache this:
        # Compute fixed covariates covariance:
        if fixed_covs_kerns is not None:
            K_fixed = 0
            for name, [cov, k] in fixed_covs_kerns.iteritems():
                K_fixed += k.K(cov)

            #trYYT = self.get_trYYT(Y)
            YYT_covs = (tdot(Y) + K_fixed)
            data_term = beta**2 * YYT_covs
            trYYT_covs = np.trace(YYT_covs)
        else:
            data_term = beta**2 * tdot(Y)
            trYYT_covs = self.get_trYYT(Y)

        #trYYT = self.get_trYYT(Y)
        delit = mdot(_LBi_Lmi_psi1, data_term, _LBi_Lmi_psi1.T)
        data_fit = np.trace(delit)

        DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
        if dL_dKmm is None:
            delit = -0.5 * DBi_plus_BiPBi
            delit += -0.5 * B * output_dim
            delit += output_dim * np.eye(num_inducing)
            # Compute dL_dKmm
            dL_dKmm = backsub_both_sides(Lm, delit)

        # derivatives of L w.r.t. psi
        dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
            data_term, Cpsi1, DBi_plus_BiPBi,
            psi1, het_noise, uncertain_inputs)

        # log marginal likelihood
        log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
            psi0, A, LB, trYYT_covs, data_fit, Y)

        if self.save_per_dim:
            self.saved_vals = [psi0, A, LB, _LBi_Lmi_psi1, beta]

        # No heteroscedastics, so no _LBi_Lmi_psi1Vf:
        # For the interested reader, try implementing the heteroscedastic version, it should be possible
        _LBi_Lmi_psi1Vf = None # Is just here for documentation, so you can see, what it was.

        #noise derivatives
        dL_dR = _compute_dL_dR(likelihood,
            het_noise, uncertain_inputs, LB,
            _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
            psi0, psi1, beta,
            data_fit, num_data, output_dim, trYYT_covs, Y, None)

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)

        #put the gradients in the right places
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        if fixed_covs_kerns is not None:
            # For now, we do not take the gradients, we can compute them,
            # but the maximum likelihood solution is to switch off the additional covariates....
            dL_dcovs = beta * np.eye(K_fixed.shape[0]) - beta**2*tdot(_LBi_Lmi_psi1.T)
            grad_dict['dL_dcovs'] = -.5 * dL_dcovs

        #get sufficient things for posterior prediction
        #TODO: do we really want to do this in  the loop?
        if 1:
            woodbury_vector = (beta*Cpsi1).dot(Y)
        else:
            import ipdb; ipdb.set_trace()
            psi1V = np.dot(Y.T*beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = -dpotri(LB, lower=1)[0]
        diag.add(Bi, 1)

        woodbury_inv = backsub_both_sides(Lm, Bi)

        #construct a posterior object
        post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
        return post, log_marginal, grad_dict
Пример #36
0
    def inference_root(self, kern, X, Z, likelihood, Y, Kuu_sigma=None, Y_metadata=None, Lm=None, dL_dKmm=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        num_data, output_dim = Y.shape
        input_dim = Z.shape[0]
        num_data_total = allReduceArrays([np.int32(num_data)], self.mpi_comm)[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)
        uncertain_outputs = isinstance(Y, VariationalPosterior)

        beta = 1./np.fmax(likelihood.variance, 1e-6)

        psi0, psi2, YRY, psi1, psi1Y, Shalf, psi1S = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)

        #======================================================================
        # Compute Common Components
        #======================================================================

        try:
            Kmm = kern.K(Z).copy()
            if Kuu_sigma is not None:
                diag.add(Kmm, Kuu_sigma)
            else:
                diag.add(Kmm, self.const_jitter)
            Lm = jitchol(Kmm)
    
            LmInv = dtrtri(Lm)
            LmInvPsi2LmInvT = LmInv.dot(psi2.dot(LmInv.T))
                
            Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
            LL = jitchol(Lambda)        
            LLInv = dtrtri(LL)
            flag = np.zeros((1,),dtype=np.int32)
            self.mpi_comm.Bcast(flag,root=self.root)
        except LinAlgError as e:
            flag = np.ones((1,),dtype=np.int32)
            self.mpi_comm.Bcast(flag,root=self.root)
            raise e
            
        broadcastArrays([LmInv, LLInv],self.mpi_comm,  self.root)
        LmLLInv = LLInv.dot(LmInv)
        
        logdet_L = 2.*np.sum(np.log(np.diag(LL)))
        b  = psi1Y.dot(LmLLInv.T)
        bbt = np.square(b).sum()
        v = b.dot(LmLLInv)
        LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
        
        if psi1S is not None:
            psi1SLLinv = psi1S.dot(LmLLInv.T)
            bbt_sum = np.square(psi1SLLinv).sum()
            LLinvPsi1TYYTPsi1LLinvT_sum = tdot(psi1SLLinv.T)
            bbt_sum, LLinvPsi1TYYTPsi1LLinvT_sum = reduceArrays([bbt_sum,  LLinvPsi1TYYTPsi1LLinvT_sum], self.mpi_comm, self.root)
            bbt += bbt_sum
            LLinvPsi1TYYTPsi1LLinvT += LLinvPsi1TYYTPsi1LLinvT_sum
            psi1SP = psi1SLLinv.dot(LmLLInv)
        tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
        dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
        broadcastArrays([dL_dpsi2R], self.mpi_comm, self.root)

        #======================================================================
        # Compute log-likelihood
        #======================================================================
        logL_R = -num_data_total*np.log(beta)
        logL = -(output_dim*(num_data_total*log_2_pi+logL_R+psi0-np.trace(LmInvPsi2LmInvT))+YRY- bbt)/2.-output_dim*logdet_L/2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        dL_dKmm =  dL_dpsi2R - output_dim* LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
        post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v.T, K=Kmm, mean=None, cov=None, K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        dL_dthetaL = (YRY*beta + beta*output_dim*psi0 - num_data_total*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2).sum() - beta*np.trace(LLinvPsi1TYYTPsi1LLinvT)
        
        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        dL_dpsi0 = -output_dim * (beta * np.ones((num_data,)))/2.

        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            dL_dpsi1 = beta*(np.dot(m,v)+Shalf[:,None]*psi1SP)
        else:
            dL_dpsi1 = beta*np.dot(Y,v)

        if uncertain_inputs:
            dL_dpsi2 = beta* dL_dpsi2R
        else:
            dL_dpsi1 += np.dot(psi1,dL_dpsi2R)*2.
            dL_dpsi2 = None
        
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}
            
        if uncertain_outputs:
            m,s = Y.mean, Y.variance
            psi1LmiLLi = psi1.dot(LmLLInv.T)
            LLiLmipsi1Y = b.T
            grad_dict['dL_dYmean'] = -m*beta+ psi1LmiLLi.dot(LLiLmipsi1Y)
            grad_dict['dL_dYvar'] = beta/-2.+ np.square(psi1LmiLLi).sum(axis=1)/2

        return post, logL, grad_dict
Пример #37
0
    def maximization(self, Y, K, C, t, parameters, hyperparameters, expectations):

        self.N = Y.shape[0]
        self.T = Y.shape[1]

        # Model parameters
        pi = parameters[0].copy()
        f = parameters[1].copy()
        mu = parameters[2].copy()

        # Model hyperparameters
        ls = hyperparameters[0].copy()
        a0 = hyperparameters[1].copy()
        a = hyperparameters[2].copy()
        b = hyperparameters[3].copy()
        sigmas = hyperparameters[4].copy()

        var_precision = sigmas.shape[0]

        # Expected values
        r_ik = expectations['r_ik']
        #c_ik = expectations['c_ik']
        Y_exp = expectations['Y_exp']
        matrices = expectations['matrices']

        # old building of matrices
        Sold = matrices['S_old']
        Lold = matrices['L_old']
        Siold = matrices['Si_old']

        # new building of matrices
        hyperparam_list = [ls, a0, a, b, sigmas]
        S, L, Si = util.build_covariance(t, K, hyperparam_list) #dims: (T,T,K)

        # Identifiying missing (NaN) values
        nans = np.isnan(Y[:,:,0])
        notnans = np.invert(nans)

        # Expected Log-Likelihood (Cost Function)
        log_likelihood = 0.0
        het_logpdf = np.empty((self.N, K))

        # Log-likelihood derivatives wrt hyperparameters
        dL_dl = np.zeros((1, K))
        dL_da0 = np.zeros((1, K))
        dL_da = np.zeros((C, K))
        dL_db = np.zeros((C, K))
        dL_dsigmas = np.zeros((var_precision, 1))

        c_ik = np.empty((self.N, K))

        for k in range(K):
            S_k = S[:, :, k] # new
            Si_k = Si[:, :, k] # new

            Sold_k = Sold[:, :, k] # old
            Siold_k = Siold[:, :, k] # old

            Y_exp_k = Y_exp[k]
            Y_exp_real = Y_exp_k[:, :, 0]
            Y_exp_bin = Y_exp_k[:, :, 1]
            detS_k = np.linalg.det(S_k)

            for i in range(self.N):
                Sold_k_oo = Sold_k[np.ix_(notnans[i,:], notnans[i,:])]
                Sold_k_mm = Sold_k[np.ix_(nans[i,:], nans[i,:])]
                Sold_k_mo = Sold_k[np.ix_(nans[i,:], notnans[i,:])]
                Sold_k_om = Sold_k_mo.T
                Si_k_mm = Si_k[np.ix_(nans[i,:], nans[i,:])] # mm submatrix of Si_k

                Lold_k_oo = linalg.jitchol(Sold_k_oo)
                iSold_k_oo, _ = linalg.dpotri(np.asfortranarray(Lold_k_oo)) # inverse of oo submatrix

                Cov_m = Sold_k_mm - (Sold_k_mo.dot(iSold_k_oo).dot(Sold_k_om))
                c_ik[i,k] = np.trace(Si_k_mm.dot(Cov_m))

                A_m = np.zeros((self.T, self.T))
                A_m[np.ix_(nans[i, :], nans[i, :])] = Cov_m

                y = Y_exp_real[i, :].T
                y = y[:, np.newaxis]
                yy_T = np.dot(y,y.T)
                aa_T = Si_k.dot(yy_T).dot(Si_k.T)

                Q1 = aa_T - Si_k
                Q2 = Si_k.dot(A_m).dot(Si_k)

                dK_dl, dK_da0, dK_da, dK_db, dK_dsigmas = self.kernel_gradients(Q1, Q2, t, k, C, hyperparam_list)


                dL_dl[0,k] += 0.5*r_ik[i,k]*dK_dl
                dL_da0[0, k] += 0.5*r_ik[i,k]*dK_da0
                dL_da[:,k] += 0.5*r_ik[i,k]*dK_da.flatten()
                dL_db[:,k] += 0.5*r_ik[i,k]*dK_db.flatten()
                dL_dsigmas += 0.5*r_ik[i,k]*dK_dsigmas

                log_likelihood += - 0.5*r_ik[i,k]*np.log(pi[0,k]) - 0.5*r_ik[i,k]*np.log(detS_k) \
                                  - 0.5*r_ik[i,k] * np.dot(Y_exp_real[i,:],Si_k).dot(Y_exp_real[i,:].T) \
                                  - 0.5*r_ik[i,k]*c_ik[i,k] \
                                  + r_ik[i,k]*np.sum(Y_exp_bin[i,:]*np.log(mu[:, k])) \
                                  + r_ik[i,k]*np.sum(Y_exp_bin[i,:]*np.log(1 - mu[:, k]))
                                # + r_ik[i,k]*[]
                                # falta el pi de la gaussian


            #param_list = [f[:, k], S[:, :, k], Si[:, :, k], mu[:, k]]

        gradients = {'dL_dl':dL_dl, 'dL_da0':dL_da0, 'dL_da':dL_da, 'dL_db':dL_db, 'dL_dsigmas':dL_dsigmas}

        return log_likelihood, gradients