def latent_function_covKuu(Z, B, kernel_list, kernel_list_Gdj, kff_aux):
    """
    Builds the cross-covariance Kudud= cov[u_d(x),u_d(x)] of a Convolved Multi-output GP
    :param Z: Inducing points
    :param B: Coregionalization matrix
    :param kernel_list: Kernels of u_q functions
    :param kernel_list_Gdj: Kernel smoothing functions G(x)
    :param kff_aux is the kernel that solves the convolution integral between G(x) and kern_uq
    :return: Kuu
    """

    J = len(kernel_list_Gdj)
    M, Dz = Z.shape
    Xdim = int(Dz / J)
    # Kuu = np.zeros([Q*M,Q*M])
    Kuu = np.zeros((J, M, M))
    Luu = np.empty((J, M, M))
    Kuui = np.empty((J, M, M))
    for j in range(J):
        for q, B_q in enumerate(B):
            update_conv_Kff(kernel_list[q], kernel_list_Gdj[j], kff_aux)
            Kuu[j, :, :] += B_q.B[j, j] * kff_aux.K(
                Z[:, j * Xdim:j * Xdim + Xdim], Z[:, j * Xdim:j * Xdim + Xdim])

        Luu[j, :, :] = linalg.jitchol(Kuu[j, :, :], maxtries=10)
        Kuui[j, :, :], _ = linalg.dpotri(np.asfortranarray(Luu[j, :, :]))

    return Kuu, Luu, Kuui
示例#2
0
文件: util.py 项目: pmorenoz/HierCPD
def build_covariance(t, K, hyperparams):

    ls = hyperparams[0]
    a0 = hyperparams[1]
    a = hyperparams[2]
    b = hyperparams[3]

    C, _ = a.shape  # number of Fourier coefficients

    T, _ = t.shape
    S = np.empty((T, T, K))
    L = np.empty((T, T, K))
    Si = np.empty((T, T, K))

    Diag, _ = build_diagonal(t, hyperparams)
    for k in range(K):
        # falta meter el término periódic
        hyperparam_k_list = [ls[0, k], a0[0, k], a[:, k], b[:, k]]
        per_term = fourier_series(t, T, C, hyperparam_k_list)
        s = per_term**2
        per_S = s * s.T
        E = periodic_exponential(t, T, hyperparam_k_list)
        S[:, :, k] = per_S * E
        S[:, :, k] += Diag
        L[:, :, k] = linalg.jitchol(S[:, :, k])
        Si[:, :, k], _ = linalg.dpotri(np.asfortranarray(L[:, :, k]))

        # quitar esto:
        #S[:,:,k] = np.eye(T, T)
        #Si[:,:,k] = np.eye(T, T)
    return S, L, Si
示例#3
0
文件: mix.py 项目: manuelIDSIA/SRGP
def inv_c(M):
    A = np.ascontiguousarray(M)
    L_M, info = lapack.dpotrf(A, lower=1)

    #L_M = np.linalg.cholesky(M)
    iM, _ = dpotri(L_M)

    return iM
示例#4
0
文件: mix.py 项目: manuelIDSIA/SRGP
def inv_logDet(M):

    A = np.ascontiguousarray(M)
    L_M, info = lapack.dpotrf(A, lower=1)

    iM, _ = dpotri(L_M)
    logDetM = 2 * sum(np.log(np.diag(L_M)))

    return iM, logDetM
示例#5
0
def inv_chol(L):
    """
    Given that ``L`` is the Cholesky decomposition of A, this method returns A^-1

    Note
    ----
    This method is adopted from the GPy package
    """

    Ai, _ = dpotri(np.asfortranarray(L), lower=1)
    return Ai
示例#6
0
def inv_chol(L):
    """
    Given that ``L`` is the Cholesky decomposition of A, this method returns A^-1

    Note
    ----
    This method is adopted from the GPy package
    """

    Ai, _ = dpotri(np.asfortranarray(L), lower=1)
    return Ai
示例#7
0
    def calculate_mu_var(self, X, Y, Z, q_u_mean, q_u_chol, kern, mean_function, num_inducing, num_data, num_outputs):
        """
        Calculate posterior mean and variance for the latent function values for use in the
        expectation over the likelihood
        """
        #expand cholesky representation
        L = choleskies.flat_to_triang(q_u_chol)
        #S = linalg.ijk_ljk_to_ilk(L, L) #L.dot(L.T)
        S = np.empty((num_outputs, num_inducing, num_inducing))
        [np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
        #logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
        logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
        #compute mean function stuff
        if mean_function is not None:
            prior_mean_u = mean_function.f(Z)
            prior_mean_f = mean_function.f(X)
        else:
            prior_mean_u = np.zeros((num_inducing, num_outputs))
            prior_mean_f = np.zeros((num_data, num_outputs))

        #compute kernel related stuff
        Kmm = kern.K(Z)
        #Knm = kern.K(X, Z)
        Kmn = kern.K(Z, X)
        Knn_diag = kern.Kdiag(X)
        #Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
        Lm = linalg.jitchol(Kmm)
        logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
        Kmmi, _ = linalg.dpotri(Lm)

        #compute the marginal means and variances of q(f)
        #A = np.dot(Knm, Kmmi)
        A, _ = linalg.dpotrs(Lm, Kmn)
        #mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
        mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
        #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S), 1)
        v = np.empty((num_data, num_outputs))
        for i in range(num_outputs):
            tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
            v[:,i] = np.sum(np.square(tmp),0)
        v += (Knn_diag - np.sum(A*Kmn,0))[:,None]

        #compute the KL term
        Kmmim = np.dot(Kmmi, q_u_mean)
        #KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
        KL = KLs.sum()

        latent_detail = LatentFunctionDetails(q_u_mean=q_u_mean, q_u_chol=q_u_chol, mean_function=mean_function,
                                              mu=mu, v=v, prior_mean_u=prior_mean_u, L=L, A=A,
                                              S=S, Kmm=Kmm, Kmmi=Kmmi, Kmmim=Kmmim, KL=KL)
        return latent_detail
示例#8
0
def latent_funs_cov(Z, kernel_list):
    """
    Description: Builds the full-covariance cov[u(z),u(z)] of a Multi-output GP for a Sparse approximation
    :param Z: Inducing Points
    :param kernel_list: Kernels of u_q functions priors
    :return: Kuu
    """
    Q = len(kernel_list)
    M,Dz = Z.shape
    Xdim = int(Dz/Q)
    Kuu = np.empty((Q, M, M))
    Luu = np.empty((Q, M, M))
    Kuui = np.empty((Q, M, M))
    for q, kern in enumerate(kernel_list):
        Kuu[q, :, :] = kern.K(Z[:,q*Xdim:q*Xdim+Xdim],Z[:,q*Xdim:q*Xdim+Xdim])
        Luu[q, :, :] = linalg.jitchol(Kuu[q, :, :])
        Kuui[q, :, :], _ = linalg.dpotri(np.asfortranarray(Luu[q, :, :]))
    return Kuu, Luu, Kuui
 def woodbury_inv(self):
     """
     The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
     $$
     (K_{xx} + \Sigma_{xx})^{-1}
     \Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
     $$
     """
     if self._woodbury_inv is None:
         if self._woodbury_chol is not None:
             self._woodbury_inv, _ = dpotri(self._woodbury_chol, lower=1)
             symmetrify(self._woodbury_inv)
         elif self._covariance is not None:
             B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
             self._woodbury_inv = np.empty_like(B)
             for i in range(B.shape[-1]):
                 tmp, _ = dpotrs(self.K_chol, B[:, :, i])
                 self._woodbury_inv[:, :, i], _ = dpotrs(self.K_chol, tmp.T)
     return self._woodbury_inv
示例#10
0
    def comp_KL_qU(self, qU_mean ,qU_var):
        M,D = qU_mean.shape[0], qU_mean.shape[1]

        qU_L = self.mid['qU_L']
        L = self.mid['L']
        Linvmu = self.mid['Linvmu']
        LinvLu = self.mid['LinvLu']
        KuuInv = dpotri(L, lower=1)[0]
        
        Lu = qU_L
        LuInv = dtrtri(Lu)
        
        KL = D*M/-2. - np.log(np.diag(Lu)).sum()*D +np.log(np.diag(L)).sum()*D + np.square(LinvLu).sum()/2.*D + np.square(Linvmu).sum()/2.
        
        dKL_dqU_mean = dtrtrs(L, Linvmu, trans=True)[0] 
        dKL_dqU_var = (tdot(LuInv.T)/-2. +  KuuInv/2.)*D
        dKL_dKuu = KuuInv*D/2. -KuuInv.dot( tdot(qU_mean)+qU_var*D).dot(KuuInv)/2.

        return float(KL), dKL_dqU_mean, dKL_dqU_var, dKL_dKuu
示例#11
0
    def comp_KL_qU(self, qU_mean, qU_var):
        M, D = qU_mean.shape[0], qU_mean.shape[1]

        qU_L = self.mid['qU_L']
        L = self.mid['L']
        Linvmu = self.mid['Linvmu']
        LinvLu = self.mid['LinvLu']
        KuuInv = dpotri(L, lower=1)[0]

        Lu = qU_L
        LuInv = dtrtri(Lu)

        KL = D * M / -2. - np.log(np.diag(Lu)).sum() * D + np.log(
            np.diag(L)).sum() * D + np.square(
                LinvLu).sum() / 2. * D + np.square(Linvmu).sum() / 2.

        dKL_dqU_mean = dtrtrs(L, Linvmu, trans=True)[0]
        dKL_dqU_var = (tdot(LuInv.T) / -2. + KuuInv / 2.) * D
        dKL_dKuu = KuuInv * D / 2. - KuuInv.dot(tdot(qU_mean) +
                                                qU_var * D).dot(KuuInv) / 2.

        return float(KL), dKL_dqU_mean, dKL_dqU_var, dKL_dKuu
def latent_funs_cov(Z, kernel_list):
    """
    Builds the full-covariance cov[u(z),u(z)] of a Multi-output GP
    for a Sparse approximation
    :param Z: Inducing Points
    :param kernel_list: Kernels of u_q functions priors
    :return: Kuu
    """
    Q = len(kernel_list)
    M, Dz = Z.shape
    Xdim = int(Dz / Q)
    #Kuu = np.zeros([Q*M,Q*M])
    Kuu = np.empty((Q, M, M))
    Luu = np.empty((Q, M, M))
    Kuui = np.empty((Q, M, M))
    for q, kern in enumerate(kernel_list):
        Kuu[q, :, :] = kern.K(Z[:, q * Xdim:q * Xdim + Xdim],
                              Z[:, q * Xdim:q * Xdim + Xdim])
        Kuu[q, :, :] = Kuu[
            q, :, :]  #+ 1.0e-6*np.eye(*Kuu[q, :, :].shape)    #This line included by Juan for numerical stability
        Luu[q, :, :] = linalg.jitchol(Kuu[q, :, :], maxtries=10)
        Kuui[q, :, :], _ = linalg.dpotri(np.asfortranarray(Luu[q, :, :]))
    return Kuu, Luu, Kuui
示例#13
0
    def inference(self,
                  kern,
                  X,
                  likelihood,
                  Y,
                  mean_function=None,
                  Y_metadata=None,
                  K=None,
                  variance=None,
                  Z_tilde=None):
        if variance is None:
            variance = likelihood.gaussian_variance(Y_metadata)

        posterior = super(ExactGaussianInferenceIncremental,
                          self).inference(kern, X, likelihood, Y,
                                          mean_function, Y_metadata, K,
                                          variance, Z_tilde)
        self._old_LW = posterior[0].woodbury_chol
        self._K = kern.K(X).copy()
        self._old_Wi, _ = dpotri(self._old_LW, lower=1)
        # diag.add(self._K, variance + 1e-8)

        return posterior
示例#14
0
    def inference(self,
                  kern,
                  X,
                  Z,
                  likelihood,
                  Y,
                  indexD,
                  output_dim,
                  Y_metadata=None,
                  Lm=None,
                  dL_dKmm=None,
                  Kuu_sigma=None):
        """
        The first phase of inference:
        Compute: log-likelihood, dL_dKmm

        Cached intermediate results: Kmm, KmmInv,
        """

        input_dim = Z.shape[0]

        uncertain_inputs = isinstance(X, VariationalPosterior)

        beta = 1. / likelihood.variance
        if len(beta) == 1:
            beta = np.zeros(output_dim) + beta

        beta_exp = np.zeros(indexD.shape[0])
        for d in range(output_dim):
            beta_exp[indexD == d] = beta[d]

        psi0, psi1, psi2 = self.gatherPsiStat(kern, X, Z, Y, beta,
                                              uncertain_inputs)

        psi2_sum = (beta_exp[:, None, None] * psi2).sum(0) / output_dim

        #======================================================================
        # Compute Common Components
        #======================================================================

        Kmm = kern.K(Z).copy()
        if Kuu_sigma is not None:
            diag.add(Kmm, Kuu_sigma)
        else:
            diag.add(Kmm, self.const_jitter)
        Lm = jitchol(Kmm)

        logL = 0.
        dL_dthetaL = np.zeros(output_dim)
        dL_dKmm = np.zeros_like(Kmm)
        dL_dpsi0 = np.zeros_like(psi0)
        dL_dpsi1 = np.zeros_like(psi1)
        dL_dpsi2 = np.zeros_like(psi2)
        wv = np.empty((Kmm.shape[0], output_dim))

        for d in range(output_dim):
            idx_d = indexD == d
            Y_d = Y[idx_d]
            N_d = Y_d.shape[0]
            beta_d = beta[d]

            psi2_d = psi2[idx_d].sum(0) * beta_d
            psi1Y = Y_d.T.dot(psi1[idx_d]) * beta_d
            psi0_d = psi0[idx_d].sum() * beta_d
            YRY_d = np.square(Y_d).sum() * beta_d

            LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_d, 'right')

            Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
            LL = jitchol(Lambda)
            LmLL = Lm.dot(LL)

            b = dtrtrs(LmLL, psi1Y.T)[0].T
            bbt = np.square(b).sum()
            v = dtrtrs(LmLL, b.T, trans=1)[0].T
            LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)

            tmp = -backsub_both_sides(LL, LLinvPsi1TYYTPsi1LLinvT)
            dL_dpsi2R = backsub_both_sides(Lm, tmp + np.eye(input_dim)) / 2

            logL_R = -N_d * np.log(beta_d)
            logL += -((N_d * log_2_pi + logL_R + psi0_d -
                       np.trace(LmInvPsi2LmInvT)) + YRY_d - bbt) / 2.

            dL_dKmm += dL_dpsi2R - backsub_both_sides(Lm, LmInvPsi2LmInvT) / 2

            dL_dthetaL[d:d +
                       1] = (YRY_d * beta_d + beta_d * psi0_d - N_d *
                             beta_d) / 2. - beta_d * (dL_dpsi2R * psi2_d).sum(
                             ) - beta_d * np.trace(LLinvPsi1TYYTPsi1LLinvT)

            dL_dpsi0[idx_d] = -beta_d / 2.
            dL_dpsi1[idx_d] = beta_d * np.dot(Y_d, v)
            dL_dpsi2[idx_d] = beta_d * dL_dpsi2R
            wv[:, d] = v

        LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_sum, 'right')

        Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT
        LL = jitchol(Lambda)
        LmLL = Lm.dot(LL)
        logdet_L = 2. * np.sum(np.log(np.diag(LL)))
        dL_dpsi2R_common = dpotri(LmLL)[0] / -2.
        dL_dpsi2 += dL_dpsi2R_common[None, :, :] * beta_exp[:, None, None]

        for d in range(output_dim):
            dL_dthetaL[d] += (dL_dpsi2R_common * psi2[indexD == d].sum(0)
                              ).sum() * -beta[d] * beta[d]

        dL_dKmm += dL_dpsi2R_common * output_dim

        logL += -output_dim * logdet_L / 2.

        #======================================================================
        # Compute dL_dKmm
        #======================================================================

        # dL_dKmm =  dL_dpsi2R - output_dim* backsub_both_sides(Lm, LmInvPsi2LmInvT)/2 #LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.

        #======================================================================
        # Compute the Posterior distribution of inducing points p(u|Y)
        #======================================================================

        LLInvLmT = dtrtrs(LL, Lm.T)[0]
        cov = tdot(LLInvLmT.T)

        wd_inv = backsub_both_sides(
            Lm,
            np.eye(input_dim) -
            backsub_both_sides(LL, np.identity(input_dim), transpose='left'),
            transpose='left')
        post = Posterior(woodbury_inv=wd_inv,
                         woodbury_vector=wv,
                         K=Kmm,
                         mean=None,
                         cov=cov,
                         K_chol=Lm)

        #======================================================================
        # Compute dL_dthetaL for uncertian input and non-heter noise
        #======================================================================

        # for d in range(output_dim):
        #     dL_dthetaL[d:d+1] += - beta[d]*beta[d]*(dL_dpsi2R[None,:,:] * psi2[indexD==d]/output_dim).sum()
        # dL_dthetaL += - (dL_dpsi2R[None,:,:] * psi2_sum*D beta*(dL_dpsi2R*psi2).sum()

        #======================================================================
        # Compute dL_dpsi
        #======================================================================

        if not uncertain_inputs:
            dL_dpsi1 += (psi1[:, None, :] * dL_dpsi2).sum(2) * 2.

        if uncertain_inputs:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dpsi0': dL_dpsi0,
                'dL_dpsi1': dL_dpsi1,
                'dL_dpsi2': dL_dpsi2,
                'dL_dthetaL': dL_dthetaL
            }
        else:
            grad_dict = {
                'dL_dKmm': dL_dKmm,
                'dL_dKdiag': dL_dpsi0,
                'dL_dKnm': dL_dpsi1,
                'dL_dthetaL': dL_dthetaL
            }

        return post, logL, grad_dict
示例#15
0
    def calculate_gradients(self, q_U, p_U_new, p_U_old, p_U_var, q_F, VE_dm, VE_dv, Ntask, M, Q, D, f_index, d_index,q):
        """
        Calculates gradients of the Log-marginal distribution p(Y) wrt variational
        parameters mu_q, S_q
        """
        # Algebra for q(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]

        S_qi, _ = linalg.dpotri(np.asfortranarray(L_u[q, :, :]))
        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        # Algebra for p(u)
        Kuu_new = p_U_new.Kuu.copy()
        Luu_new = p_U_new.Luu.copy()
        Kuui_new = p_U_new.Kuui.copy()

        Kuu_old = p_U_old.Kuu.copy()
        Luu_old = p_U_old.Luu.copy()
        Kuui_old = p_U_old.Kuui.copy()

        Mu_var = p_U_var.Mu.copy()
        Kuu_var = p_U_var.Kuu.copy()
        Luu_var = p_U_var.Luu.copy()
        Kuui_var = p_U_var.Kuui.copy()


        # KL Terms
        dKLnew_dmu_q = np.dot(Kuui_new[q,:,:], m_u[:, q, None])
        dKLnew_dS_q = 0.5 * (Kuui_new[q,:,:] - S_qi)

        dKLold_dmu_q = np.dot(Kuui_old[q,:,:], m_u[:, q, None])
        dKLold_dS_q = 0.5 * (Kuui_old[q,:,:] - S_qi)

        dKLvar_dmu_q = np.dot(Kuui_var[q,:,:], (m_u[:, q, None] - Mu_var[q, :, :])) # important!! (Eq. 69 MCB)
        dKLvar_dS_q = 0.5 * (Kuui_var[q,:,:] - S_qi)

        dKLnew_dKqq = 0.5 * Kuui_new[q,:,:] - 0.5 * Kuui_new[q,:,:].dot(S_u[q, :, :]).dot(Kuui_new[q,:,:]) \
                   - 0.5 * np.dot(Kuui_new[q,:,:],np.dot(m_u[:, q, None],m_u[:, q, None].T)).dot(Kuui_new[q,:,:].T)

        dKLold_dKqq = 0.5 * Kuui_old[q,:,:] - 0.5 * Kuui_old[q,:,:].dot(S_u[q, :, :]).dot(Kuui_old[q,:,:]) \
                   - 0.5 * np.dot(Kuui_old[q,:,:],np.dot(m_u[:, q, None],m_u[:, q, None].T)).dot(Kuui_old[q,:,:].T)

        #dKLvar_dKqq = 0.5 * Kuui_var[q,:,:] - 0.5 * Kuui_var[q,:,:].dot(S_u[q, :, :]).dot(Kuui_var[q,:,:]) \
        #           - 0.5 * np.dot(Kuui_var[q,:,:],np.dot(m_u[:, q, None],m_u[:, q, None].T)).dot(Kuui_var[q,:,:].T) \
        #            + 0.5 * np.dot(Kuui_var[q,:,:], np.dot(m_u[:,q,None], Mu_var[q,:,:].T)).dot(Kuui_var[q,:,:].T) \
        #            + 0.5 * np.dot(Kuui_var[q,:,:], np.dot(Mu_var[q,:,:], m_u[:,q,None].T)).dot(Kuui_var[q,:,:].T) \
        #              - 0.5 * np.dot(Kuui_var[q,:,:],np.dot(Mu_var[q,:,:], Mu_var[q,:,:].T)).dot(Kuui_var[q,:,:].T)


        #KLvar += 0.5 * np.sum(Kuui_var[q, :, :] * S_u[q, :, :]) \
        #             + 0.5 * np.dot((Mu_var[q, :, :] - m_u[:, q, None]).T, np.dot(Kuui_var[q, :, :], (Mu_var[q, :, :] - m_u[:, q, None]))) \
        #             - 0.5 * M \
        #             + 0.5 * 2. * np.sum(np.log(np.abs(np.diag(Luu_var[q, :, :])))) \
        #             - 0.5 * 2. * np.sum(np.log(np.abs(np.diag(L_u[q, :, :]))))

        #

        # VE Terms
        dVE_dmu_q = np.zeros((M, 1))
        dVE_dS_q = np.zeros((M, M))
        dVE_dKqq = np.zeros((M, M))
        dVE_dKqd = []
        dVE_dKdiag = []

        for d, q_fd in enumerate(q_F):
            Nt = Ntask[f_index[d]]
            dVE_dmu_q += np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:,d_index[d]])[:, None]
            Adv = q_fd.Afdu[q,:,:].T * VE_dv[f_index[d]][:,d_index[d],None].T
            Adv = np.ascontiguousarray(Adv)
            AdvA = np.dot(Adv.reshape(-1, Nt), q_fd.Afdu[q, :, :]).reshape(M, M)
            dVE_dS_q += AdvA

            # Derivatives dKuquq
            tmp_dv = np.dot(AdvA, S_u[q, :, :]).dot(Kuui_new[q,:,:])
            dVE_dKqq += AdvA - tmp_dv - tmp_dv.T
            Adm = np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:,d_index[d],None])
            dVE_dKqq += - np.dot(Adm, np.dot(Kuui_new[q,:,:], m_u[:, q, None]).T)

            # Derivatives dKuqfd
            tmp = np.dot(S_u[q, :, :], Kuui_new[q,:,:])
            tmp = 2. * (tmp - np.eye(M))
            dve_kqd = np.dot(np.dot(Kuui_new[q,:,:], m_u[:, q, None]), VE_dm[f_index[d]][:,d_index[d],None].T)
            dve_kqd += np.dot(tmp.T, Adv)
            dVE_dKqd.append(dve_kqd)

            # Derivatives dKdiag
            dVE_dKdiag.append(VE_dv[f_index[d]][:,d_index[d]])

        dVE_dKqq = 0.5 * (dVE_dKqq + dVE_dKqq.T)

        # Derivatives of variational parameters
        dL_dmu_q = dVE_dmu_q - dKLnew_dmu_q + dKLold_dmu_q - dKLvar_dmu_q
        dL_dS_q = dVE_dS_q - dKLnew_dS_q + dKLold_dS_q - dKLvar_dS_q

        # Derivatives of prior hyperparameters
        # if using Zgrad, dL_dKqq = dVE_dKqq - dKLnew_dKqq + dKLold_dKqq - dKLvar_dKqq
        # otherwise for hyperparameters: dL_dKqq = dVE_dKqq - dKLnew_dKqq
        dL_dKqq = dVE_dKqq - dKLnew_dKqq #+ dKLold_dKqq - dKLvar_dKqq # dKLold_dKqq sólo para Zgrad, dKLvar_dKqq to be done (for Zgrad)
        dL_dKdq = dVE_dKqd
        dL_dKdiag = dVE_dKdiag

        # Pass S_q gradients to its low-triangular representation L_q
        chol_u = q_U.chols_u.copy()
        L_q = choleskies.flat_to_triang(chol_u[:,q:q+1])
        dL_dL_q = 2. * np.array([np.dot(a, b) for a, b in zip(dL_dS_q[None,:,:], L_q)])
        dL_dL_q = choleskies.triang_to_flat(dL_dL_q)

        # Posterior
        posterior_q = Posterior(mean=m_u[:, q, None], cov=S_u[q, :, :], K=Kuu_new[q,:,:], prior_mean=np.zeros(m_u[:, q, None].shape))

        return dL_dmu_q, dL_dL_q, dL_dS_q, posterior_q, dL_dKqq, dL_dKdq, dL_dKdiag
示例#16
0
    def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, fixed_covs_kerns=None, **kw):

        _, output_dim = Y.shape
        uncertain_inputs = isinstance(X, VariationalPosterior)

        #see whether we've got a different noise variance for each datum
        beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
        # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
        #self.YYTfactor = self.get_YYTfactor(Y)
        #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
        het_noise = beta.size > 1

        if het_noise:
            raise(NotImplementedError("Heteroscedastic noise not implemented, should be possible though, feel free to try implementing it :)"))

        if beta.ndim == 1:
            beta = beta[:, None]


        # do the inference:
        num_inducing = Z.shape[0]
        num_data = Y.shape[0]
        # kernel computations, using BGPLVM notation

        Kmm = kern.K(Z).copy()
        diag.add(Kmm, self.const_jitter)
        if Lm is None:
            Lm = jitchol(Kmm)

        # The rather complex computations of A, and the psi stats
        if uncertain_inputs:
            psi0 = kern.psi0(Z, X)
            psi1 = kern.psi1(Z, X)
            if het_noise:
                psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
            else:
                psi2_beta = kern.psi2(Z,X) * beta
            LmInv = dtrtri(Lm)
            A = LmInv.dot(psi2_beta.dot(LmInv.T))
        else:
            psi0 = kern.Kdiag(X)
            psi1 = kern.K(X, Z)
            if het_noise:
                tmp = psi1 * (np.sqrt(beta))
            else:
                tmp = psi1 * (np.sqrt(beta))
            tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
            A = tdot(tmp)

        # factor B
        B = np.eye(num_inducing) + A
        LB = jitchol(B)
        # back substutue C into psi1Vf
        #tmp, _ = dtrtrs(Lm, psi1.T.dot(VVT_factor), lower=1, trans=0)
        #_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        #tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
        #Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # data fit and derivative of L w.r.t. Kmm
        #delit = tdot(_LBi_Lmi_psi1Vf)

        # Expose YYT to get additional covariates in (YYT + Kgg):
        tmp, _ = dtrtrs(Lm, psi1.T, lower=1, trans=0)
        _LBi_Lmi_psi1, _ = dtrtrs(LB, tmp, lower=1, trans=0)
        tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1, lower=1, trans=1)
        Cpsi1, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

        # TODO: cache this:
        # Compute fixed covariates covariance:
        if fixed_covs_kerns is not None:
            K_fixed = 0
            for name, [cov, k] in fixed_covs_kerns.iteritems():
                K_fixed += k.K(cov)

            #trYYT = self.get_trYYT(Y)
            YYT_covs = (tdot(Y) + K_fixed)
            data_term = beta**2 * YYT_covs
            trYYT_covs = np.trace(YYT_covs)
        else:
            data_term = beta**2 * tdot(Y)
            trYYT_covs = self.get_trYYT(Y)

        #trYYT = self.get_trYYT(Y)
        delit = mdot(_LBi_Lmi_psi1, data_term, _LBi_Lmi_psi1.T)
        data_fit = np.trace(delit)

        DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
        if dL_dKmm is None:
            delit = -0.5 * DBi_plus_BiPBi
            delit += -0.5 * B * output_dim
            delit += output_dim * np.eye(num_inducing)
            # Compute dL_dKmm
            dL_dKmm = backsub_both_sides(Lm, delit)

        # derivatives of L w.r.t. psi
        dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
            data_term, Cpsi1, DBi_plus_BiPBi,
            psi1, het_noise, uncertain_inputs)

        # log marginal likelihood
        log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
            psi0, A, LB, trYYT_covs, data_fit, Y)

        if self.save_per_dim:
            self.saved_vals = [psi0, A, LB, _LBi_Lmi_psi1, beta]

        # No heteroscedastics, so no _LBi_Lmi_psi1Vf:
        # For the interested reader, try implementing the heteroscedastic version, it should be possible
        _LBi_Lmi_psi1Vf = None # Is just here for documentation, so you can see, what it was.

        #noise derivatives
        dL_dR = _compute_dL_dR(likelihood,
            het_noise, uncertain_inputs, LB,
            _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
            psi0, psi1, beta,
            data_fit, num_data, output_dim, trYYT_covs, Y, None)

        dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)

        #put the gradients in the right places
        if uncertain_inputs:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dpsi0':dL_dpsi0,
                         'dL_dpsi1':dL_dpsi1,
                         'dL_dpsi2':dL_dpsi2,
                         'dL_dthetaL':dL_dthetaL}
        else:
            grad_dict = {'dL_dKmm': dL_dKmm,
                         'dL_dKdiag':dL_dpsi0,
                         'dL_dKnm':dL_dpsi1,
                         'dL_dthetaL':dL_dthetaL}

        if fixed_covs_kerns is not None:
            # For now, we do not take the gradients, we can compute them,
            # but the maximum likelihood solution is to switch off the additional covariates....
            dL_dcovs = beta * np.eye(K_fixed.shape[0]) - beta**2*tdot(_LBi_Lmi_psi1.T)
            grad_dict['dL_dcovs'] = -.5 * dL_dcovs

        #get sufficient things for posterior prediction
        #TODO: do we really want to do this in  the loop?
        if 1:
            woodbury_vector = (beta*Cpsi1).dot(Y)
        else:
            import ipdb; ipdb.set_trace()
            psi1V = np.dot(Y.T*beta, psi1).T
            tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
            tmp, _ = dpotrs(LB, tmp, lower=1)
            woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
        Bi, _ = dpotri(LB, lower=1)
        symmetrify(Bi)
        Bi = -dpotri(LB, lower=1)[0]
        diag.add(Bi, 1)

        woodbury_inv = backsub_both_sides(Lm, Bi)

        #construct a posterior object
        post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
        return post, log_marginal, grad_dict
示例#17
0
    def inference(self, n0, C0, P0, log_marginal_likelihood0, log_Det_C0,
                  dn_dR, dC_dR, dψ_dR, dn_dσ02, dC_dσ02, dψ_dσ02, dn_dl, dC_dl,
                  dψ_dl, dn_dσn2, dC_dσn2, dψ_dσn2, X, Y):

        α = self.α
        α_const = (1 - α) / α

        num_data, _ = Y.shape
        num_inducing = n0.shape[0]  # it only works with num_outputs = 1

        y = Y[:, 0]  # it only works with num_outputs = 1

        # update kernel with new hyperparams
        self.kern.lengthscale = self.params['ls'].copy()
        self.kern.variance = self.params['σ0']**2

        σ_n2 = self.params['σn']**2
        Z = self.params['R']

        # compute kernel quantities
        Krr = self.kern.K(Z)  # kernel matrix of inducing inputs
        diag.add(Krr,
                 self.const_jitter)  # add some jitter for stability reasons
        Kxr = self.kern.K(
            X, Z)  # kernel matrix between mini-batch and inducing inputs
        kxx = self.kern.Kdiag(
            X
        )  #+const_jitter            # diagonal of kernel matrix auf mini-batch
        L_K = jitchol(Krr)  # lower cholesky matrix of kernel matrix
        iKrr, _ = dpotri(L_K)  # inverse of kernel matrix of inducinv inputs

        self.Krr = Krr
        self.iKrr = iKrr

        # compute state space matrices (and temporary matrices)
        H = np.dot(Kxr, iKrr)  # observation matrix
        Ht = H.T  # transpose of observation matrix
        d = kxx - np.sum(H * Kxr, 1)  # diagonal of correction matrix
        v = α * d + σ_n2  # diagonal of actual noise matrix
        a = α_const * (np.sum(np.log(v)) - num_data * np.log(σ_n2)
                       )  # PEP correction term in marignal likelihoo

        A_ = Ht / v
        α_ = np.dot(P0, n0)

        r = y - np.dot(H, α_)

        # update natural mean and precision + inversion yielding covariance matrix
        # n1 = ns + np.dot(A_,y)
        # C1 = Cs + np.dot(A_,H)

        n1 = n0 + np.dot(A_, y)
        C1 = C0 + np.dot(A_, H)
        L_C = jitchol(C1)
        P1, _ = dpotri(L_C)

        # more temporary matrices
        B_ = np.dot(H, P1)  # iV * H * Li'     # LAPACK?
        β_ = r / v
        γ_ = np.dot(B_.T, β_)
        δ_ = β_ - np.dot(A_.T, γ_)

        # update marginal log likelihood
        log_Det_C1 = 2 * sum(np.log(np.diag(L_C)))
        log_Ddet_V = sum(np.log(v))
        Δ0 = num_data * np.log(
            2 * np.pi) + log_Det_C1 - log_Det_C0 + log_Ddet_V + np.sum(
                r * δ_) + a
        log_marginal_likelihood1 = log_marginal_likelihood0 - 0.5 * Δ0

        # print('lik_i '+str(0.5*Δ0))

        # compute constant derivatives of likelihood wrt kernel matrices
        dL_dH = 2 * ((B_.T / v).T - np.outer(δ_, α_ + γ_))
        dL_dv = -(np.sum(H * B_, 1) - v / α + (r - np.dot(H, γ_))**2) / (v**2)

        D_ = α * (Ht * dL_dv).T
        E_ = np.dot(dL_dH, iKrr)

        dL_dKxr = E_ - 2 * D_
        dL_dKrr = -np.dot(Ht, E_ - D_)

        dL_dkxx = α * dL_dv

        dL_dn = -2 * np.dot(P0, np.dot(Ht, δ_))
        dL_dC = P1 - P0 - np.outer(dL_dn, α_) + np.outer(γ_, γ_)

        # dL_d_dn = 2*σ_n2 *sum(dL_dv) -2*num_data*α_const # wrt to dn
        dL_d_dn = sum(dL_dv) - num_data * α_const / σ_n2  # wrt to σn2

        iVy = y / v
        dH = np.zeros((num_data, num_inducing))

        scaleFact = 1  ###

        if self.params_EST['R']:
            # compute sparse kernel derivatives
            # dKrr_sparse = np.zeros((J,J,D))
            dKrr_sparse = self.kern.dK_dX(Z)  #, dK_dR=dKrr_sparse)
            # dKxr_sparse = np.zeros((B,J,D))
            dKxr_sparse = self.kern.dK_dX(X, Z)  #, dK_dR=dKxr_sparse)

            # loop over all inducing points
            for j in range(0, num_inducing):
                for d in range(0, self.D):

                    jd = j * self.D + d
                    kjd = dKrr_sparse[:, j, d]
                    k2jd = dKxr_sparse[:, j, d]

                    #dψ_dR[j,d] = dψ_dR[j,d] -0.5*( np.sum(dL_dKrr[:,j]*kjd) + np.sum(dL_dKrr[j,:]*kjd) + np.sum(dL_dKxr[:,j]*k2jd) + np.sum( dL_dn*dn_dR[:,jd]) + np.sum( dL_dC*dC_dR[:,:,jd]) )
                    ### dψ_dR[j,d] = dψ_dR[j,d] -0.5*( np.sum(dL_dkxx *dKxx_diag) +  dL_d_dn   )

                    delta = -0.5 * (np.sum(dL_dKrr[:, j] * kjd) + np.sum(
                        dL_dKrr[j, :] * kjd) + np.sum(dL_dKxr[:, j] * k2jd) +
                                    np.sum(dL_dn * dn_dR[:, jd]) +
                                    np.sum(dL_dC * dC_dR[:, :, jd]))
                    dψ_dR[j, d] = delta * scaleFact

                    dH = -np.outer(H[:, j], kjd)
                    dH[:, j] += -np.dot(H, kjd) + k2jd
                    dH = np.dot(dH, iKrr)

                    dd = -np.sum(dH * Kxr, 1
                                 ) - H[:, j] * k2jd  #### dKxx_diag for theta!!
                    div = -α * dd / (v**2)
                    dn_dR[:, jd] = dn_dR[:, jd] + np.dot(dH.T, iVy) + np.dot(
                        Ht, div * y)
                    F_ = np.dot(A_, dH)
                    dC_dR[:, :, jd] = dC_dR[:, :, jd] + F_ + F_.T + np.dot(
                        Ht * div, H)

        # compute kernel derivatives wrt variance_0
        dKrr_dσ02 = self.kern.dK_dσ02(Z)
        dKxr_dσ02 = self.kern.dK_dσ02(X, Z)
        dkxx_dσ02 = self.kern.dK_dσ02_diag(X)

        # dψ_dσ02 = dψ_dσ02 - 0.5*( np.sum(dL_dKrr*dKrr_dσ02) + np.sum(dL_dKxr*dKxr_dσ02) + np.sum( dL_dn*dn_dσ02) + np.sum( dL_dC*dC_dσ02) )
        # dψ_dσ02 = dψ_dσ02 - 0.5* np.sum(dL_dkxx *dkxx_dσ02)

        delta = -0.5 * (np.sum(dL_dKrr * dKrr_dσ02) +
                        np.sum(dL_dKxr * dKxr_dσ02) + np.sum(dL_dn * dn_dσ02) +
                        np.sum(dL_dC * dC_dσ02))
        delta = delta - 0.5 * np.sum(dL_dkxx * dkxx_dσ02)

        dψ_dσ02 = delta * scaleFact

        dH = dKxr_dσ02 - np.dot(H, dKrr_dσ02)
        dH = np.dot(dH, iKrr)

        dd = dkxx_dσ02 - np.sum(dH * Kxr, 1) - np.sum(H * dKxr_dσ02, 1)
        div = -α * dd / (v**2)
        dn_dσ02 = dn_dσ02 + np.dot(dH.T, iVy) + np.dot(Ht, div * y)
        F_ = np.dot(A_, dH)
        dC_dσ02 = dC_dσ02 + F_ + F_.T + np.dot(Ht * div, H)

        # compute kernel derivatives wrt lengthsacle(s)
        dKrr_dl = self.kern.dK_dl(Z)
        dKxr_dl = self.kern.dK_dl(X, Z)
        # dkxx_dl = kern.dK_dl_diag(X)   # zero anyway

        # loop over all lengthscales
        num_lengthscales = dKrr_dl.shape[2]
        for d in range(0, num_lengthscales):

            delta = -0.5 * (np.sum(dL_dKrr * dKrr_dl[:, :, d]) + np.sum(
                dL_dKxr * dKxr_dl[:, :, d]) + np.sum(dL_dn * dn_dl[:, d]) +
                            np.sum(dL_dC * dC_dl[:, :, d]))
            #############################

            dψ_dl[d] = delta * scaleFact
            dH = dKxr_dl[:, :, d] - np.dot(H, dKrr_dl[:, :, d])
            dH = np.dot(dH, iKrr)

            dd = -np.sum(dH * Kxr, 1) - np.sum(H * dKxr_dl[:, :, d], 1)
            div = -α * dd / (v**2)
            dn_dl[:, d] = dn_dl[:, d] + np.dot(dH.T, iVy) + np.dot(Ht, div * y)
            F_ = np.dot(A_, dH)
            dC_dl[:, :, d] = dC_dl[:, :, d] + F_ + F_.T + np.dot(Ht * div, H)

        # gaussian noise variance
        delta = -0.5 * (np.sum(dL_dn * dn_dσn2) + np.sum(dL_dC * dC_dσn2) +
                        dL_d_dn)
        # dψ_dσn2 = dψ_dσn2

        dψ_dσn2 = delta * scaleFact

        div = -1.0 / (v**2)
        dn_dσn2 = dn_dσn2 + np.dot(Ht, div * y)
        dC_dσn2 = dC_dσn2 + np.dot(Ht * div, H)

        m1 = np.dot(P1, n1)

        return log_marginal_likelihood1, n1, m1, C1, P1, log_Det_C1, dn_dR, dC_dR, dψ_dR, dn_dσ02, dC_dσ02, dψ_dσ02, dn_dl, dC_dl, dψ_dl, dn_dσn2, dC_dσn2, dψ_dσn2
示例#18
0
    def maximization(self, Y, K, C, t, parameters, hyperparameters, expectations):

        self.N = Y.shape[0]
        self.T = Y.shape[1]

        # Model parameters
        pi = parameters[0].copy()
        f = parameters[1].copy()
        mu = parameters[2].copy()

        # Model hyperparameters
        ls = hyperparameters[0].copy()
        a0 = hyperparameters[1].copy()
        a = hyperparameters[2].copy()
        b = hyperparameters[3].copy()
        sigmas = hyperparameters[4].copy()

        var_precision = sigmas.shape[0]

        # Expected values
        r_ik = expectations['r_ik']
        #c_ik = expectations['c_ik']
        Y_exp = expectations['Y_exp']
        matrices = expectations['matrices']

        # old building of matrices
        Sold = matrices['S_old']
        Lold = matrices['L_old']
        Siold = matrices['Si_old']

        # new building of matrices
        hyperparam_list = [ls, a0, a, b, sigmas]
        S, L, Si = util.build_covariance(t, K, hyperparam_list) #dims: (T,T,K)

        # Identifiying missing (NaN) values
        nans = np.isnan(Y[:,:,0])
        notnans = np.invert(nans)

        # Expected Log-Likelihood (Cost Function)
        log_likelihood = 0.0
        het_logpdf = np.empty((self.N, K))

        # Log-likelihood derivatives wrt hyperparameters
        dL_dl = np.zeros((1, K))
        dL_da0 = np.zeros((1, K))
        dL_da = np.zeros((C, K))
        dL_db = np.zeros((C, K))
        dL_dsigmas = np.zeros((var_precision, 1))

        c_ik = np.empty((self.N, K))

        for k in range(K):
            S_k = S[:, :, k] # new
            Si_k = Si[:, :, k] # new

            Sold_k = Sold[:, :, k] # old
            Siold_k = Siold[:, :, k] # old

            Y_exp_k = Y_exp[k]
            Y_exp_real = Y_exp_k[:, :, 0]
            Y_exp_bin = Y_exp_k[:, :, 1]
            detS_k = np.linalg.det(S_k)

            for i in range(self.N):
                Sold_k_oo = Sold_k[np.ix_(notnans[i,:], notnans[i,:])]
                Sold_k_mm = Sold_k[np.ix_(nans[i,:], nans[i,:])]
                Sold_k_mo = Sold_k[np.ix_(nans[i,:], notnans[i,:])]
                Sold_k_om = Sold_k_mo.T
                Si_k_mm = Si_k[np.ix_(nans[i,:], nans[i,:])] # mm submatrix of Si_k

                Lold_k_oo = linalg.jitchol(Sold_k_oo)
                iSold_k_oo, _ = linalg.dpotri(np.asfortranarray(Lold_k_oo)) # inverse of oo submatrix

                Cov_m = Sold_k_mm - (Sold_k_mo.dot(iSold_k_oo).dot(Sold_k_om))
                c_ik[i,k] = np.trace(Si_k_mm.dot(Cov_m))

                A_m = np.zeros((self.T, self.T))
                A_m[np.ix_(nans[i, :], nans[i, :])] = Cov_m

                y = Y_exp_real[i, :].T
                y = y[:, np.newaxis]
                yy_T = np.dot(y,y.T)
                aa_T = Si_k.dot(yy_T).dot(Si_k.T)

                Q1 = aa_T - Si_k
                Q2 = Si_k.dot(A_m).dot(Si_k)

                dK_dl, dK_da0, dK_da, dK_db, dK_dsigmas = self.kernel_gradients(Q1, Q2, t, k, C, hyperparam_list)


                dL_dl[0,k] += 0.5*r_ik[i,k]*dK_dl
                dL_da0[0, k] += 0.5*r_ik[i,k]*dK_da0
                dL_da[:,k] += 0.5*r_ik[i,k]*dK_da.flatten()
                dL_db[:,k] += 0.5*r_ik[i,k]*dK_db.flatten()
                dL_dsigmas += 0.5*r_ik[i,k]*dK_dsigmas

                log_likelihood += - 0.5*r_ik[i,k]*np.log(pi[0,k]) - 0.5*r_ik[i,k]*np.log(detS_k) \
                                  - 0.5*r_ik[i,k] * np.dot(Y_exp_real[i,:],Si_k).dot(Y_exp_real[i,:].T) \
                                  - 0.5*r_ik[i,k]*c_ik[i,k] \
                                  + r_ik[i,k]*np.sum(Y_exp_bin[i,:]*np.log(mu[:, k])) \
                                  + r_ik[i,k]*np.sum(Y_exp_bin[i,:]*np.log(1 - mu[:, k]))
                                # + r_ik[i,k]*[]
                                # falta el pi de la gaussian


            #param_list = [f[:, k], S[:, :, k], Si[:, :, k], mu[:, k]]

        gradients = {'dL_dl':dL_dl, 'dL_da0':dL_da0, 'dL_da':dL_da, 'dL_db':dL_db, 'dL_dsigmas':dL_dsigmas}

        return log_likelihood, gradients
示例#19
0
    def reset_epoch(self):

        # update kernel with new hyperparams
        self.kern.lengthscale = self.params['ls'].copy()
        self.kern.variance = self.params['σ0']**2

        σ_n2 = self.params['σn']**2
        Z = self.params['R']

        # initialize all prior quantities
        self.n = np.zeros(
            self.num_inducing)  # natural mean vector (num_output = 1!)
        self.P = self.kern.K(Z)  # covariance matrix
        diag.add(self.P, self.const_jitter)
        L_P = jitchol(self.P)
        self.C, _ = dpotri(L_P, lower=1)  # precision matrix
        self._log_marginal_likelihood = 0.0  # log marginal likelihood
        self._log_Det_C = -2 * sum(np.log(
            np.diag(L_P)))  # log determinant of C

        self.Krr = self.P
        self.iKrr = self.C

        # derivative quantities
        J = self.num_inducing  # number of inducing points
        JD = self.num_inducing * self.kern.input_dim  # number of inducing points times dimension
        if self.params_EST['R']:
            self.dn_dR = np.zeros(
                (J, JD)
            )  # derivative of natural mean wrt inducing inputs (Rjd: R11,...,R1D, R21,...,RJD)
            self.dC_dR = np.zeros(
                (J, J, JD)
            )  # derivative of precision matrix wrt inducing inputs (Rjd: R11,...,R1D, R21,...,RJD)
            self.dψ_dR = np.zeros(
                (J, self.kern.input_dim))  # gradients of inducing inputs

            dKrr_sparse = self.kern.dK_dX(Z)
            for j in range(0, self.num_inducing):
                for d in range(0, self.kern.input_dim):

                    jd = j * self.kern.input_dim + d
                    self.dC_dR[:, :, jd] = -np.outer(
                        np.dot(self.C, dKrr_sparse[:, j, d]), self.C[:, j])
                    self.dC_dR[:, :,
                               jd] = self.dC_dR[:, :, jd] + self.dC_dR[:, :,
                                                                       jd].T
        else:
            self.dψ_dR = 0.0
            self.dn_dR = 0.0
            self.dC_dR = 0.0

        dKrr_dσ02 = self.kern.dK_dσ02(Z)
        self.dn_dσ02 = np.zeros(J)
        self.dC_dσ02 = -np.dot(np.dot(self.C, dKrr_dσ02), self.C)
        self.dψ_dσ02 = 0.0

        dKrr_dl = self.kern.dK_dl(Z)
        num_lengthscales = dKrr_dl.shape[2]
        self.dn_dl = np.zeros((J, num_lengthscales))
        self.dC_dl = np.zeros((J, J, num_lengthscales))
        self.dψ_dl = np.zeros(num_lengthscales)
        for d in range(0, num_lengthscales):
            self.dC_dl[:, :,
                       d] = -np.dot(np.dot(self.C, dKrr_dl[:, :, d]), self.C)

        self.dn_dσn2 = np.zeros(J)
        self.dC_dσn2 = np.zeros((J, J))
        self.dψ_dσn2 = 0.0
示例#20
0
    def calculate_gradients(self, q_U, p_U, q_F, VE_dm, VE_dv, Ntask, M, Q, D,
                            f_index, d_index, j):
        """
        Calculates gradients of the Log-marginal distribution p(Y) wrt variational
        parameters mu_q, S_q
        """
        # Algebra for q(u) and p(u):
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        #S_u = np.empty((Q, M, M))
        S_u = np.dot(
            L_u[j, :, :], L_u[j, :, :].T
        )  #This could be done outside and recieve it to reduce computation
        #[np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        S_qi, _ = linalg.dpotri(np.asfortranarray(L_u[j, :, :]))

        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        # KL Terms
        dKL_dmu_j = np.dot(Kuui[j, :, :], m_u[:, j, None])
        dKL_dS_j = 0.5 * (Kuui[j, :, :] - S_qi)
        dKL_dKjj = 0.5 * Kuui[j,:,:] - 0.5 * Kuui[j,:,:].dot(S_u).dot(Kuui[j,:,:]) \
                   - 0.5 * np.dot(Kuui[j,:,:],np.dot(m_u[:, j, None],m_u[:, j, None].T)).dot(Kuui[j,:,:].T)

        # VE Terms
        dVE_dmu_j = np.zeros((M, 1))
        dVE_dS_j = np.zeros((M, M))
        dVE_dKjj = np.zeros((M, M))
        dVE_dKjd = []
        dVE_dKdiag = []

        Nt = Ntask[f_index[j]]
        dVE_dmu_j += np.dot(q_F[j].Afdu.T,
                            VE_dm[f_index[j]][:, d_index[j]])[:, None]
        Adv = q_F[j].Afdu.T * VE_dv[f_index[j]][:, d_index[j], None].T
        Adv = np.ascontiguousarray(Adv)
        AdvA = np.dot(Adv.reshape(-1, Nt), q_F[j].Afdu).reshape(M, M)
        dVE_dS_j += AdvA

        # Derivatives dKuquq
        tmp_dv = np.dot(AdvA, S_u).dot(Kuui[j, :, :])
        dVE_dKjj += AdvA - tmp_dv - tmp_dv.T
        Adm = np.dot(q_F[j].Afdu.T, VE_dm[f_index[j]][:, d_index[j], None])
        dVE_dKjj += -np.dot(Adm, np.dot(Kuui[j, :, :], m_u[:, j, None]).T)

        # Derivatives dKuqfd
        tmp = np.dot(S_u, Kuui[j, :, :])
        tmp = 2. * (tmp - np.eye(M))
        dve_kjd = np.dot(np.dot(Kuui[j, :, :], m_u[:, j, None]),
                         VE_dm[f_index[j]][:, d_index[j], None].T)
        dve_kjd += np.dot(tmp.T, Adv)
        dVE_dKjd.append(dve_kjd)

        # Derivatives dKdiag
        dVE_dKdiag.append(VE_dv[f_index[j]][:, d_index[j]])

        dVE_dKjj = 0.5 * (dVE_dKjj + dVE_dKjj.T)
        # Sum of VE and KL terms
        dL_dmu_j = dVE_dmu_j - dKL_dmu_j
        dL_dS_j = dVE_dS_j - dKL_dS_j
        dL_dKjj = dVE_dKjj - dKL_dKjj
        dL_dKdj = dVE_dKjd[0].copy()  #Here we just pass the unique position
        dL_dKdiag = dVE_dKdiag[0].copy(
        )  #Here we just pass the unique position

        # Pass S_q gradients to its low-triangular representation L_q
        chol_u = q_U.chols_u.copy()
        L_j = choleskies.flat_to_triang(chol_u[:, j:j + 1])
        dL_dL_j = 2. * np.array(
            [np.dot(a, b) for a, b in zip(dL_dS_j[None, :, :], L_j)])
        dL_dL_j = choleskies.triang_to_flat(dL_dL_j)

        # Posterior
        posterior_j = Posterior(mean=m_u[:, j, None],
                                cov=S_u,
                                K=Kuu[j, :, :],
                                prior_mean=np.zeros(m_u[:, j, None].shape))

        return dL_dmu_j, dL_dL_j, dL_dS_j, posterior_j, dL_dKjj, dL_dKdj, dL_dKdiag
示例#21
0
    def elbo_derivatives(self, q_U, p_U, q_F, VE_dm, VE_dv, Ntask, dims,
                         f_index, d_index, q):
        """
        Description:  Returns ELBO derivatives w.r.t. variational parameters and hyperparameters
        Equation:     gradients = {dL/dmu, dL/dS, dL/dKmm, dL/Kmn, dL/dKdiag}
        Paper:        In Appendix 4 and 5
        Extra_Info:   Gradients w.r.t. hyperparameters use chain-rule and GPy. Note that Kmm, Kmn, Kdiag are matrices
        """
        Q = dims['Q']
        M = dims['M']

        #------------------------------------#      ALGEBRA FOR DERIVATIVES       #------------------------------------#
        #######  Algebra for q(u) and p(u)  #######
        m_u = q_U.mu_u.copy()
        L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        S_u = np.empty((Q, M, M))
        [np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Kuu = p_U.Kuu.copy()
        Kuui = p_U.Kuui.copy()
        S_qi, _ = linalg.dpotri(np.asfortranarray(L_u[q, :, :]))

        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        #-------------------------------------#      DERIVATIVES OF ELBO TERMS      #----------------------------------#
        #######  KL Terms  #######
        dKL_dmu_q = np.dot(Kuui[q, :, :], m_u[:, q, None])
        dKL_dS_q = 0.5 * (Kuui[q, :, :] - S_qi)
        dKL_dKqq = 0.5 * Kuui[q, :, :] - 0.5 * Kuui[q, :, :].dot(S_u[q, :, :]).dot(Kuui[q, :, :]) \
                   - 0.5 * np.dot(Kuui[q, :, :], np.dot(m_u[:, q, None], m_u[:, q, None].T)).dot(Kuui[q, :, :].T)

        ####### Variational Expectation (VE) Terms #######
        dVE_dmu_q = np.zeros((M, 1))
        dVE_dS_q = np.zeros((M, M))
        dVE_dKqq = np.zeros((M, M))
        dVE_dKqd = []
        dVE_dKdiag = []

        for d, q_fd in enumerate(q_F):
            Nt = Ntask[f_index[d]]
            dVE_dmu_q += np.dot(q_fd.Afdu[q, :, :].T,
                                VE_dm[f_index[d]][:, d_index[d]])[:, None]
            Adv = q_fd.Afdu[q, :, :].T * VE_dv[f_index[d]][:, d_index[d],
                                                           None].T
            Adv = np.ascontiguousarray(Adv)
            AdvA = np.dot(Adv.reshape(-1, Nt),
                          q_fd.Afdu[q, :, :]).reshape(M, M)
            dVE_dS_q += AdvA

            #######  Derivatives dKuquq #######
            tmp_dv = np.dot(AdvA, S_u[q, :, :]).dot(Kuui[q, :, :])
            dVE_dKqq += AdvA - tmp_dv - tmp_dv.T
            Adm = np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:, d_index[d],
                                                                 None])
            dVE_dKqq += -np.dot(Adm, np.dot(Kuui[q, :, :], m_u[:, q, None]).T)

            #######  Derivatives dKuqfd  #######
            tmp = np.dot(S_u[q, :, :], Kuui[q, :, :])
            tmp = 2. * (tmp - np.eye(M))
            dve_kqd = np.dot(np.dot(Kuui[q, :, :], m_u[:, q, None]),
                             VE_dm[f_index[d]][:, d_index[d], None].T)
            dve_kqd += np.dot(tmp.T, Adv)
            dVE_dKqd.append(dve_kqd)

            #######  Derivatives dKdiag  #######
            dVE_dKdiag.append(VE_dv[f_index[d]][:, d_index[d]])

        dVE_dKqq = 0.5 * (dVE_dKqq + dVE_dKqq.T)

        #--------------------------------------#      FINAL ELBO DERIVATIVES      #------------------------------------#
        #######  ELBO derivatives ---> sum of VE and KL terms  #######
        dL_dmu_q = dVE_dmu_q - dKL_dmu_q
        dL_dS_q = dVE_dS_q - dKL_dS_q
        dL_dKqq = dVE_dKqq - dKL_dKqq
        dL_dKdq = dVE_dKqd
        dL_dKdiag = dVE_dKdiag

        ####### Pass S_q gradients to its low-triangular representation L_q  #######
        chol_u = q_U.chols_u.copy()
        L_q = choleskies.flat_to_triang(chol_u[:, q:q + 1])
        dL_dL_q = 2. * np.array(
            [np.dot(a, b) for a, b in zip(dL_dS_q[None, :, :], L_q)])
        dL_dL_q = choleskies.triang_to_flat(dL_dL_q)

        return dL_dmu_q, dL_dL_q, dL_dS_q, dL_dKqq, dL_dKdq, dL_dKdiag
    def calculate_gradients(self, q_U, S_u, Su_add_Kuu_chol, p_U, q_F, VE_dm,
                            VE_dv, Ntask, M, Q, D, f_index, d_index, q):
        """
        Calculates gradients of the Log-marginal distribution p(Y) wrt variational
        parameters mu_q, S_q
        """
        # Algebra for q(u) and p(u):
        m_u = q_U.mu_u.copy()
        #L_u = choleskies.flat_to_triang(q_U.chols_u.copy())
        #S_u = np.empty((Q, M, M))
        #[np.dot(L_u[q, :, :], L_u[q, :, :].T, S_u[q, :, :]) for q in range(Q)]
        Kuu = p_U.Kuu.copy()
        Luu = p_U.Luu.copy()
        Kuui = p_U.Kuui.copy()
        S_qi, _ = linalg.dpotri(np.asfortranarray(Su_add_Kuu_chol[q, :, :]))

        if np.any(np.isinf(S_qi)):
            raise ValueError("Sqi: Cholesky representation unstable")

        # KL Terms
        dKL_dmu_q = []
        dKL_dKqq = 0
        for d in range(D):
            dKL_dmu_q.append(np.dot(Kuui[q, :, :], m_u[d][:, q, None]))  #same
            dKL_dKqq += -0.5 * S_qi + 0.5 * Kuui[q, :, :] - 0.5 * Kuui[q, :, :].dot(S_u[q, :, :]).dot(Kuui[q, :, :]) \
                       - 0.5 * np.dot(Kuui[q, :, :], np.dot(m_u[d][:, q, None], m_u[d][:, q, None].T)).dot(Kuui[q, :, :].T)  # same
        #dKL_dS_q = 0.5 * (Kuui[q,:,:] - S_qi)             #old
        dKL_dS_q = 0.5 * (Kuui[q, :, :] - S_qi) * D

        # VE Terms
        #dVE_dmu_q = np.zeros((M, 1))
        dVE_dmu_q = []
        dVE_dS_q = np.zeros((M, M))
        dVE_dKqq = np.zeros((M, M))
        dVE_dKqd = []
        dVE_dKdiag = []
        dL_dmu_q = []

        for d, q_fd in enumerate(q_F):
            Nt = Ntask[f_index[d]]
            dVE_dmu_q.append(
                np.dot(q_fd.Afdu[q, :, :].T,
                       VE_dm[f_index[d]][:, d_index[d]])[:, None])
            dL_dmu_q.append(dVE_dmu_q[d] - dKL_dmu_q[d])
            Adv = q_fd.Afdu[q, :, :].T * VE_dv[f_index[d]][:, d_index[d],
                                                           None].T
            Adv = np.ascontiguousarray(Adv)
            AdvA = np.dot(Adv.reshape(-1, Nt),
                          q_fd.Afdu[q, :, :]).reshape(M, M)
            dVE_dS_q += AdvA

            # Derivatives dKuquq
            tmp_dv = np.dot(AdvA, S_u[q, :, :]).dot(Kuui[q, :, :])
            dVE_dKqq += -tmp_dv - tmp_dv.T  #+ AdvA last term not included in the derivative
            Adm = np.dot(q_fd.Afdu[q, :, :].T, VE_dm[f_index[d]][:, d_index[d],
                                                                 None])
            dVE_dKqq += -np.dot(Adm,
                                np.dot(Kuui[q, :, :], m_u[d][:, q, None]).T)

            # Derivatives dKuqfd
            tmp = np.dot(S_u[q, :, :], Kuui[q, :, :])
            tmp = 2. * tmp  #2. * (tmp - np.eye(M))  # the term -2Adv not included
            dve_kqd = np.dot(np.dot(Kuui[q, :, :], m_u[d][:, q, None]),
                             VE_dm[f_index[d]][:, d_index[d], None].T)
            dve_kqd += np.dot(tmp.T, Adv)
            dVE_dKqd.append(dve_kqd)

            # Derivatives dKdiag
            dVE_dKdiag.append(VE_dv[f_index[d]][:, d_index[d]])

        dVE_dKqq = 0.5 * (dVE_dKqq + dVE_dKqq.T)
        # Sum of VE and KL terms
        #dL_dmu_q = dVE_dmu_q - dKL_dmu_q
        dL_dS_q = dVE_dS_q - dKL_dS_q
        dL_dKqq = dVE_dKqq - dKL_dKqq
        dL_dKdq = dVE_dKqd
        dL_dKdiag = dVE_dKdiag

        # Pass S_q gradients to its low-triangular representation L_q
        chol_u = q_U.chols_u.copy()
        L_q = choleskies.flat_to_triang(chol_u[:, q:q + 1])
        dL_dL_q = 2. * np.array(
            [np.dot(a, b) for a, b in zip(dL_dS_q[None, :, :], L_q)])
        dL_dL_q = choleskies.triang_to_flat(dL_dL_q)

        # Posterior
        posterior_q = []
        for d in range(D):
            posterior_q.append(
                Posterior(mean=m_u[d][:, q, None],
                          cov=S_u[q, :, :] + Kuu[q, :, :],
                          K=Kuu[q, :, :],
                          prior_mean=np.zeros(m_u[d][:, q, None].shape)))

        return dL_dmu_q, dL_dL_q, dL_dS_q, posterior_q, dL_dKqq, dL_dKdq, dL_dKdiag
示例#23
0
    def expectation(self, Y, K, C, t, pi, parameters, hyperparameters):

        self.N = Y.shape[0]
        self.T = Y.shape[1]

        # Model parameters
        pi = parameters[0].copy()
        f = parameters[1].copy()
        mu = parameters[2].copy()

        # Model hyperparameters
        ls = hyperparameters[0].copy()
        a0 = hyperparameters[1].copy()
        a = hyperparameters[2].copy()
        b = hyperparameters[3].copy()
        sigmas = hyperparameters[4].copy()

        # Missing values
        Yreal = Y[:, :, 0]
        Ybin = Y[:, :, 1]

        nans = np.isnan(Yreal)
        notnans = np.invert(nans)

        # Covariance
        hyperparam_list = [ls, a0, a, b, sigmas]
        S, L, Si = util.build_covariance(t, K, hyperparam_list)  #dims: (T,T,K)
        matrices = {'S_old': S, 'L_old': L, 'Si_old': Si}

        # Posterior of latent classes
        r_ik = np.empty((self.N, K))

        # Expectations on latent variables
        for k in range(K):
            #param_list = [f[:,k], S[:,:,k], Si[:,:,k], mu[:,k]]
            S_k = S[:, :, k]
            Si_k = Si[:, :, k]
            mu_k = mu[:, k]
            detS_k = np.linalg.det(S_k)
            #r_ik[:, k] = pi[0, k] * util.heterogeneous_pdf(Y, param_list)
            for i in range(self.N):
                r_ik[i,k] = pi[0,k]*(1/np.sqrt(detS_k * np.pi**self.T)) \
                            *np.exp(-0.5*np.dot(Yreal[i,np.ix_(notnans[i,:])], Si_k[np.ix_(notnans[i,:],notnans[i,:])]).dot(Yreal[i,np.ix_(notnans[i,:])].T)) \
                            *np.prod((mu_k[np.ix_(notnans[i,:])])**Ybin[i,np.ix_(notnans[i,:])]  * (1 - mu_k[np.ix_(notnans[i,:])])**Ybin[i,np.ix_(notnans[i,:])])

        r_ik = r_ik / np.tile(r_ik.sum(1)[:, np.newaxis], (1, K))

        # Expectations on missing values
        c_ik = np.empty((self.N, K))
        Y_expectation = []

        for k in range(K):
            # Real observations
            Yreal_fill = Yreal.copy()
            S_k = S[:, :, k]
            Si_k = Si[:, :, k]

            for i in range(self.N):
                S_k_oo = S_k[np.ix_(notnans[i, :], notnans[i, :])]
                S_k_mm = S_k[np.ix_(nans[i, :], nans[i, :])]
                S_k_mo = S_k[np.ix_(nans[i, :], notnans[i, :])]
                S_k_om = S_k_mo.T
                Si_k_mm = Si_k[np.ix_(nans[i, :],
                                      nans[i, :])]  # mm submatrix of Si_k

                L_k_oo = linalg.jitchol(S_k_oo)
                iS_k_oo, _ = linalg.dpotri(
                    np.asfortranarray(L_k_oo))  # inverse of oo submatrix

                Cov_m = S_k_mm - (S_k_mo.dot(iS_k_oo).dot(S_k_om))
                c_ik[i, k] = np.trace(Si_k_mm.dot(Cov_m))

                Yreal_fill[i, nans[i, :]] = S_k_mo.dot(iS_k_oo).dot(
                    Yreal[i, notnans[i, :]])

            # Binary observations
            Ybin_fill = Ybin.copy()
            mu_matrix = np.tile(mu[:, k].T + 0.0, (self.N, 1))
            Ybin_fill[nans] = mu_matrix[nans]

            # Missings observation are now filled
            Y_fill_k = np.empty((self.N, self.T, 2))
            Y_fill_k[:, :, 0] = Yreal_fill
            Y_fill_k[:, :, 1] = Ybin_fill

            Y_expectation.append(Y_fill_k)

        return r_ik, c_ik, Y_expectation, matrices