Пример #1
0
 def calcMargLik(self, SS):
     ''' Calculate marginal likelihood of assignments, summed over all comps
     '''
     theta = self.gamma / SS.K + SS.N
     cPrior = gammaln(self.gamma) - SS.K * gammaln(self.gamma / SS.K)
     cPost = gammaln(np.sum(theta)) - np.sum(gammaln(theta))
     return cPrior - cPost
Пример #2
0
  def E_logpV( self ):
    logNormC = gammaln(self.alpha0 + self.alpha1) \
                    - gammaln(self.alpha0) - gammaln(self.alpha1)
    logBetaPDF = (self.alpha1-1)*self.ElogV + (self.alpha0-1)*self.Elog1mV
    if self.truncType == 'z':
	    return self.K*logNormC + logBetaPDF.sum()    
    elif self.truncType == 'v':
      return self.K*logNormC + logBetaPDF[:-1].sum()
Пример #3
0
 def log_pdf_dirichlet(self, wvec=None, avec=None):
     ''' Return scalar log probability for Dir(wvec | avec)
     '''
     if wvec is None:
         wvec = self.w
     if avec is None:
         avec = (self.gamma / self.K) * np.ones(self.K)
     logC = gammaln(np.sum(avec)) - np.sum(gammaln(avec))
     return logC + np.sum((avec - 1.0) * np.log(wvec + 1e-100))
Пример #4
0
 def E_logqV( self ):
   logNormC = gammaln(self.qalpha0 + self.qalpha1) \
                     - gammaln(self.qalpha0) - gammaln(self.qalpha1)
   logBetaPDF = (self.qalpha1-1)*self.ElogV + (self.qalpha0-1)*self.Elog1mV
   if self.truncType == 'z':
     return logNormC.sum() + logBetaPDF.sum()
   elif self.truncType == 'v':
     # skip last entry because entropy of Beta(1,0) = 0
     return logNormC[:-1].sum() + logBetaPDF[:-1].sum()
Пример #5
0
 def log_pdf_dirichlet(self, wvec=None, avec=None):
     """ Return scalar log probability for Dir(wvec | avec)
 """
     if wvec is None:
         wvec = self.w
     if avec is None:
         avec = self.alpha0 * np.ones(self.K)
     logC = gammaln(np.sum(avec)) - np.sum(gammaln(avec))
     return logC + np.sum((avec - 1.0) * np.log(wvec))
Пример #6
0
 def calcMargLik(self, SS):
     """ Calculate marginal likelihood of assignments, summed over all comps
     """
     mask = SS.N > 0
     Nvec = SS.N[mask]
     K = Nvec.size
     return gammaln(self.gamma0) \
         + K * np.log(self.gamma0) \
         + np.sum(gammaln(Nvec)) \
         - gammaln(np.sum(Nvec) + self.gamma0)
Пример #7
0
def c_Beta(a1, a0):
    ''' Evaluate cumulant function of the Beta distribution

    When input is vectorized, we compute sum over all entries.

    Returns
    -------
    c : scalar real
    '''
    return np.sum(gammaln(a1 + a0)) - np.sum(gammaln(a1)) - np.sum(gammaln(a0))
Пример #8
0
def log_pdf_dirichlet(PiMat, alphavec):
    ''' Return scalar log probability for Dir(PiMat | alphavec)
    '''
    PiMat = as2D(PiMat + 1e-100)
    J, K = PiMat.shape
    if isinstance(alphavec, float):
        alphavec = alphavec * np.ones(K)
    elif alphavec.ndim == 0:
        alphavec = alphavec * np.ones(K)
    assert alphavec.size == K
    cDir = gammaln(np.sum(alphavec)) - np.sum(gammaln(alphavec))
    return K * cDir + np.sum(np.dot(np.log(PiMat), alphavec - 1.0))
Пример #9
0
    def L_alloc_no_slack(self):
        ''' Compute allocation term of objective function, without slack term

        Returns
        -------
        L : scalar float
        '''
        N = self.theta.shape[0]
        K = self.K
        prior_cDir = N * (gammaln(self.alpha) - K * gammaln(self.alpha / K))
        post_cDir = np.sum(gammaln(np.sum(self.theta, axis=1))) - \
            np.sum(gammaln(self.theta))
        return prior_cDir - post_cDir
Пример #10
0
def c_Beta_ReturnVec(eta1, eta0):
    ''' Evaluate cumulant of Beta distribution for vector of parameters

    Parameters
    -------
    eta1 : 1D array, size K
        represents ON pseudo-count parameter of the Beta
    eta0 : 1D array, size K
        represents OFF pseudo-count parameter of the Beta

    Returns
    -------
    cvec : 1D array, size K
    '''
    return gammaln(eta1 + eta0) - gammaln(eta1) - gammaln(eta0)
Пример #11
0
def c_Beta(eta1, eta0):
    ''' Evaluate cumulant function of Beta distribution

    Parameters
    -------
    eta1 : 1D array, size K
        represents ON pseudo-count parameter of the Beta
    eta0 : 1D array, size K
        represents OFF pseudo-count parameter of the Beta

    Returns
    -------
    c : float
        = \sum_k c_B(eta1[k], eta0[k])
    '''
    return np.sum(gammaln(eta1 + eta0) - gammaln(eta1) - gammaln(eta0))
Пример #12
0
    def elbo_alloc(self):
        K = self.K
        normPinit = gammaln(self.K * self.startAlpha) \
            - self.K * gammaln(self.startAlpha)

        normQinit = gammaln(np.sum(self.startTheta)) \
            - np.sum(gammaln(self.startTheta))

        normPtrans = K * gammaln(K * self.transAlpha + self.kappa) - \
            self.K * (self.K - 1) * gammaln(self.transAlpha) - \
            self.K * gammaln(self.transAlpha + self.kappa)

        normQtrans = np.sum(gammaln(np.sum(self.transTheta, axis=1))) \
            - np.sum(gammaln(self.transTheta))

        return normPinit + normPtrans - normQinit - normQtrans
Пример #13
0
def E_cDalphabeta_surrogate(alpha, rho, omega):
    ''' Compute expected value of cumulant function of alpha * beta.

    Returns
    -------
    csur : scalar float
    '''
    K = rho.size
    eta1 = rho * omega
    eta0 = (1 - rho) * omega
    digammaBoth = digamma(eta1 + eta0)
    ElogU = digamma(eta1) - digammaBoth
    Elog1mU = digamma(eta0) - digammaBoth
    OFFcoef = kvec(K)
    calpha = gammaln(alpha) + (K + 1) * np.log(alpha)
    return calpha + np.sum(ElogU) + np.inner(OFFcoef, Elog1mU)
Пример #14
0
    def get_log_norm_const(self):
        ''' Calculate log normalization constant (aka log partition function)
          for this Gauss-Gamma distribution.

        p(mu,Lam) = NormalGamma( mu, Lam | a, b, m, kappa)
                  = 1/Z f(mu|Lam) g(Lam), where Z is const w.r.t mu,Lam
        Normalization constant = Z = \int f() g() dmu dLam

        Returns
        --------
        logZ : float
    '''
        D = self.D
        a = self.a
        b = self.b
        logNormConstNormal = 0.5 * D * (LOGTWOPI - np.log(self.kappa))
        logNormConstGamma = np.sum(gammaln(a)) - np.inner(a, np.log(b))
        return logNormConstNormal + logNormConstGamma
Пример #15
0
  def get_log_norm_const(self):
    ''' Calculate log normalization constant (aka log partition function)
          for this Gauss-Gamma distribution.

        p(mu,Lam) = NormalGamma( mu, Lam | a, b, m, kappa)
                  = 1/Z f(mu|Lam) g(Lam), where Z is const w.r.t mu,Lam
        Normalization constant = Z = \int f() g() dmu dLam

        Returns
        --------
        logZ : float
    '''
    D = self.D
    a = self.a
    b = self.b
    logNormConstNormal = 0.5 * D * (LOGTWOPI - np.log(self.kappa))
    logNormConstGamma  = np.sum(gammaln(a)) - np.inner(a, np.log(b))
    return logNormConstNormal + logNormConstGamma
Пример #16
0
def calcMergeTermsFromSeparateLP(
        Data=None,
        LPa=None, SSa=None,
        LPb=None, SSb=None,
        mUIDPairs=None):
    ''' Compute merge terms that combine two comps from separate LP dicts.
    
    Returns
    -------
    Mdict : dict of key, array-value pairs
    '''
    M = len(mUIDPairs)
    m_sumLogPi = np.zeros(M)
    m_gammalnTheta = np.zeros(M)
    m_slackTheta = np.zeros(M)
    m_Hresp = np.zeros(M)

    assert np.allclose(LPa['digammaSumTheta'], LPb['digammaSumTheta'])
    for m, (uidA, uidB) in enumerate(mUIDPairs):
        kA = SSa.uid2k(uidA)
        kB = SSb.uid2k(uidB)

        m_resp = LPa['resp'][:, kA] + LPb['resp'][:, kB]
        if hasattr(Data, 'word_count') and \
                Data.nUniqueToken == m_resp.shape[0]:
            m_Hresp[m] = -1 * calcRlogRdotv(
                m_resp[:,np.newaxis], Data.word_count)
        else:
            m_Hresp[m] = -1 * calcRlogR(m_resp[:,np.newaxis])

        DTC_vec = LPa['DocTopicCount'][:, kA] + LPb['DocTopicCount'][:, kB]
        theta_vec = LPa['theta'][:, kA] + LPb['theta'][:, kB]
        m_gammalnTheta[m] = np.sum(gammaln(theta_vec))
        ElogPi_vec = digamma(theta_vec) - LPa['digammaSumTheta']
        m_sumLogPi[m] = np.sum(ElogPi_vec)
        # slack = (Ndm - theta_dm) * E[log pi_dm]
        slack_vec = ElogPi_vec
        slack_vec *= (DTC_vec - theta_vec)
        m_slackTheta[m] = np.sum(slack_vec)
    return dict(
        Hresp=m_Hresp,
        gammalnTheta=m_gammalnTheta,
        slackTheta=m_slackTheta,
        sumLogPi=m_sumLogPi)
Пример #17
0
def c_Func(avec, K=0):
    ''' Evaluate cumulant function of the Dirichlet distribution

    Returns
    -------
    c : scalar real
    '''
    if isinstance(avec, float) or avec.ndim == 0:
        assert K > 0
        avec = avec * np.ones(K)
        return gammaln(np.sum(avec)) - np.sum(gammaln(avec))
    elif avec.ndim == 1:
        return gammaln(np.sum(avec)) - np.sum(gammaln(avec))
    else:
        return np.sum(gammaln(np.sum(avec, axis=1))) - np.sum(gammaln(avec))
Пример #18
0
def c_Dir(AMat, arem=None):
    ''' Evaluate cumulant function of the Dir distribution

    When input is vectorized, we compute sum over all entries.

    Returns
    -------
    c : scalar real
    '''
    AMat = np.asarray(AMat)
    D = AMat.shape[0]
    if arem is None:
        if AMat.ndim == 1:
            return gammaln(np.sum(AMat)) - np.sum(gammaln(AMat))
        else:
            return np.sum(gammaln(np.sum(AMat, axis=1))) \
                - np.sum(gammaln(AMat))

    return np.sum(gammaln(np.sum(AMat, axis=1) + arem)) \
        - np.sum(gammaln(AMat)) \
        - D * np.sum(gammaln(arem))
Пример #19
0
 def calc_log_norm_const(cls, a, b, m, kappa):
     logNormConstNormal = 0.5 * D * (LOGTWOPI + np.log(kappa))
     logNormConstGamma = np.sum(gammaln(a)) - np.inner(a, np.log(b))
     return logNormConstNormal + logNormConstGamma
Пример #20
0
 def E_logpW( self ):
   ''' Bishop PRML eq. 10.73
   '''
   return gammaln(self.K*self.alpha0) \
          - self.K*gammaln(self.alpha0) + (self.alpha0-1)*self.Elogw.sum()
Пример #21
0
def c_Dir(tvec):
    return gammaln(tvec.sum()) - gammaln(tvec).sum()
Пример #22
0
 def E_logqW(self):
     """ Bishop PRML eq. 10.76
 """
     return gammaln(self.alpha.sum()) - gammaln(self.alpha).sum() + np.inner((self.alpha - 1), self.Elogw)
Пример #23
0
 def E_logpW(self):
     ''' Bishop PRML eq. 10.73
     '''
     return gammaln(self.gamma) \
         - self.K * gammaln(self.gamma/self.K) + \
         (self.gamma / self.K - 1) * self.Elogw.sum()
Пример #24
0
 def E_logqW(self):
     ''' Bishop PRML eq. 10.76
     '''
     return gammaln(self.theta.sum()) - gammaln(self.theta).sum() \
         + np.inner((self.theta - 1), self.Elogw)
Пример #25
0
 def E_logpW(self):
     """ Bishop PRML eq. 10.73
 """
     return gammaln(self.K * self.alpha0) - self.K * gammaln(self.alpha0) + (self.alpha0 - 1) * self.Elogw.sum()
Пример #26
0
def calcELBO_NonlinearTerms(Data=None, SS=None, LP=None, todict=0,
                            rho=None, Ebeta=None, alpha=None,
                            resp=None,
                            nDoc=None, DocTopicCount=None,
                            theta=None, thetaRem=None,
                            ElogPi=None, ElogPiRem=None,
                            sumLogPi=None,
                            sumLogPiRem=None, sumLogPiRemVec=None,
                            Hresp=None, slackTheta=None, slackThetaRem=None,
                            gammalnTheta=None, gammalnSumTheta=None,
                            gammalnThetaRem=None,
                            thetaEmptyComp=None, ElogPiEmptyComp=None,
                            ElogPiOrigComp=None,
                            gammalnThetaOrigComp=None, slackThetaOrigComp=None,
                            returnMemoizedDict=0, **kwargs):
    """ Calculate ELBO objective terms non-linear in suff stats.
    """
    if resp is not None:
        N, K = resp.shape
    elif LP is not None:
        if 'resp' in LP:
            N, K = LP['resp'].shape
        else:
            N, K = LP['spR'].shape

    if Ebeta is None:
        Ebeta = rho2beta(rho, returnSize='K+1')

    if LP is not None:
        DocTopicCount = LP['DocTopicCount']
        nDoc = DocTopicCount.shape[0]
        theta = LP['theta']
        thetaRem = LP['thetaRem']
        ElogPi = LP['ElogPi']
        ElogPiRem = LP['ElogPiRem']
        sumLogPi = np.sum(ElogPi, axis=0)
        sumLogPiRem = np.sum(ElogPiRem)
        if 'thetaEmptyComp' in LP:
            thetaEmptyComp = LP['thetaEmptyComp']
            ElogPiEmptyComp = LP['ElogPiEmptyComp']
            ElogPiOrigComp = LP['ElogPiOrigComp']
            gammalnThetaOrigComp = LP['gammalnThetaOrigComp']
            slackThetaOrigComp = LP['slackThetaOrigComp']
            HrespOrigComp = LP['HrespOrigComp']
    elif SS is not None:
        sumLogPi = SS.sumLogPi
        nDoc = SS.nDoc
        if hasattr(SS, 'sumLogPiRemVec'):
            sumLogPiRemVec = SS.sumLogPiRemVec
        else:
            sumLogPiRem = SS.sumLogPiRem

    if DocTopicCount is not None and theta is None:
        theta = DocTopicCount + alpha * Ebeta[:-1]
        thetaRem = alpha * Ebeta[-1]

    if theta is not None and ElogPi is None:
        digammasumtheta = digamma(theta.sum(axis=1) + thetaRem)
        ElogPi = digamma(theta) - digammasumtheta[:, np.newaxis]
        ElogPiRem = digamma(thetaRem) - digammasumtheta[:, np.newaxis]
    if sumLogPi is None and ElogPi is not None:
        sumLogPi = np.sum(ElogPi, axis=0)
        sumLogPiRem = np.sum(ElogPiRem)
    if Hresp is None:
        if SS is not None and SS.hasELBOTerm('Hresp'):
            Hresp = SS.getELBOTerm('Hresp')
        else:
            if hasattr(Data, 'word_count') and N == Data.word_count.size:
                if resp is not None:
                    Hresp = -1 * NumericUtil.calcRlogRdotv(
                        resp, Data.word_count)
                elif 'resp' in LP:
                    Hresp = -1 * NumericUtil.calcRlogRdotv(
                        LP['resp'], Data.word_count)
                elif 'spR' in LP:
                    Hresp = calcSparseRlogRdotv(
                        v=Data.word_count,
                        **LP)
                else:
                    raise ValueError("Missing resp assignments!")
            else:
                if resp is not None:
                    Hresp = -1 * NumericUtil.calcRlogR(resp)
                elif 'resp' in LP:
                    Hresp = -1 * NumericUtil.calcRlogR(LP['resp'])
                elif 'spR' in LP:
                    assert 'nnzPerRow' in LP
                    Hresp = calcSparseRlogR(**LP)
                else:
                    raise ValueError("Missing resp assignments!")


    if slackTheta is None:
        if SS is not None and SS.hasELBOTerm('slackTheta'):
            slackTheta = SS.getELBOTerm('slackTheta')
            slackThetaRem = SS.getELBOTerm('slackThetaRem')
        else:
            slackTheta = DocTopicCount - theta
            slackTheta *= ElogPi
            slackTheta = np.sum(slackTheta, axis=0)
            slackThetaRem = -1 * np.sum(thetaRem * ElogPiRem)

    if gammalnTheta is None:
        if SS is not None and SS.hasELBOTerm('gammalnTheta'):
            gammalnSumTheta = SS.getELBOTerm('gammalnSumTheta')
            gammalnTheta = SS.getELBOTerm('gammalnTheta')
            gammalnThetaRem = SS.getELBOTerm('gammalnThetaRem')
        else:
            sumTheta = np.sum(theta, axis=1) + thetaRem
            gammalnSumTheta = np.sum(gammaln(sumTheta))
            gammalnTheta = np.sum(gammaln(theta), axis=0)
            gammalnThetaRem = theta.shape[0] * gammaln(thetaRem)

    if thetaEmptyComp is not None:
        gammalnThetaEmptyComp = nDoc * gammaln(thetaEmptyComp) - \
            gammalnThetaOrigComp
        slackThetaEmptyComp = -np.sum(thetaEmptyComp * ElogPiEmptyComp) - \
            slackThetaOrigComp


    if returnMemoizedDict:
        Mdict = dict(Hresp=Hresp,
                    slackTheta=slackTheta,
                    slackThetaRem=slackThetaRem,
                    gammalnTheta=gammalnTheta,
                    gammalnThetaRem=gammalnThetaRem,
                    gammalnSumTheta=gammalnSumTheta)
        if thetaEmptyComp is not None:
            Mdict['HrespEmptyComp'] = -1 * HrespOrigComp
            Mdict['gammalnThetaEmptyComp'] = gammalnThetaEmptyComp
            Mdict['slackThetaEmptyComp'] = slackThetaEmptyComp
        return Mdict

    # First, compute all local-only terms
    Lentropy = np.sum(Hresp)
    Lslack = slackTheta.sum() + slackThetaRem
    LcDtheta = -1 * (gammalnSumTheta - gammalnTheta.sum() - gammalnThetaRem)

    # For stochastic (soVB), we need to scale up these terms
    # Only used when --doMemoELBO is set to 0 (not recommended)
    if SS is not None and SS.hasAmpFactor():
        Lentropy *= SS.ampF
        Lslack *= SS.ampF
        LcDtheta *= SS.ampF

    # Next, compute the slack term
    alphaEbeta = alpha * Ebeta
    Lslack_alphaEbeta = np.sum(alphaEbeta[:-1] * sumLogPi)
    if sumLogPiRemVec is not None:
        Ebeta_gt = 1 - np.cumsum(Ebeta[:-1])
        Lslack_alphaEbeta += alpha * np.inner(Ebeta_gt, sumLogPiRemVec)
    else:
        Lslack_alphaEbeta += alphaEbeta[-1] * sumLogPiRem
    Lslack += Lslack_alphaEbeta

    if todict:
        return dict(
            Lslack=Lslack,
            Lentropy=Lentropy,
            LcDtheta=LcDtheta,
            Lslack_alphaEbeta=Lslack_alphaEbeta)
    return LcDtheta + Lslack + Lentropy
Пример #27
0
def calcSummaryStats(Dslice,
                     LP=None,
                     alpha=None,
                     alphaEbeta=None,
                     doTrackTruncationGrowth=0,
                     doPrecompEntropy=0,
                     doPrecompMergeEntropy=0,
                     mergePairSelection=None,
                     mPairIDs=None,
                     trackDocUsage=0,
                     **kwargs):
    """ Calculate summary from local parameters for given data slice.

    Parameters
    -------
    Data : bnpy data object
    LP : local param dict with fields
        resp : Data.nObs x K array,
            where resp[n,k] = posterior resp of comp k
    doPrecompEntropy : boolean flag
        indicates whether to precompute ELBO terms in advance
        used for memoized learning algorithms (moVB)

    Returns
    -------
    SS : SuffStatBag with K components
        Relevant fields
        * nDoc : scalar float
            Counts total documents available in provided data.
        * sumLogPi : 1D array, size K
            Entry k equals \sum_{d in docs} E[ \log \pi_{dk} ]
        * sumLogPiRem : scalar float
            Equals sum over docs of probability of inactive topics.

        Also has optional ELBO field when precompELBO is True
        * Hvec : 1D array, size K
            Vector of entropy contributions from each comp.
            Hvec[k] = \sum_{n=1}^N H[q(z_n)], a function of 'resp'
    """
    if mPairIDs is None:
        M = 0
    else:
        M = len(mPairIDs)
    K = LP['DocTopicCount'].shape[1]
    if 'digammaSumTheta' not in LP:
        digammaSumTheta = digamma(LP['theta'].sum(axis=1) + LP['thetaRem'])
        LP['digammaSumTheta'] = digammaSumTheta  # Used for merges

    if 'ElogPi' not in LP:
        LP['ElogPiRem'] = digamma(LP['thetaRem']) - LP['digammaSumTheta']
        LP['ElogPi'] = digamma(LP['theta']) - \
            LP['digammaSumTheta'][:, np.newaxis]

    SS = SuffStatBag(K=K, D=Dslice.dim, M=M)
    SS.setField('nDoc', Dslice.nDoc, dims=None)
    SS.setField('sumLogPi', np.sum(LP['ElogPi'], axis=0), dims='K')
    if 'ElogPiEmptyComp' in LP:
        sumLogPiEmptyComp = np.sum(LP['ElogPiEmptyComp']) - \
            np.sum(LP['ElogPiOrigComp'])
        SS.setField('sumLogPiEmptyComp', sumLogPiEmptyComp, dims=None)
    if doTrackTruncationGrowth:
        remvec = np.zeros(K)
        remvec[K - 1] = np.sum(LP['ElogPiRem'])
        SS.setField('sumLogPiRemVec', remvec, dims='K')
    else:
        SS.setField('sumLogPiRem', np.sum(LP['ElogPiRem']), dims=None)

    if doPrecompEntropy:
        Mdict = calcELBO_NonlinearTerms(Data=Dslice,
                                        LP=LP,
                                        returnMemoizedDict=1)
        if type(Mdict['Hresp']) == float:
            # SPARSE HARD ASSIGNMENTS
            SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=None)
        else:
            SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=('K', ))
        SS.setELBOTerm('slackTheta', Mdict['slackTheta'], dims='K')
        SS.setELBOTerm('gammalnTheta', Mdict['gammalnTheta'], dims='K')
        if 'ElogPiEmptyComp' in LP:
            SS.setELBOTerm('slackThetaEmptyComp', Mdict['slackThetaEmptyComp'])
            SS.setELBOTerm('gammalnThetaEmptyComp',
                           Mdict['gammalnThetaEmptyComp'])
            SS.setELBOTerm('HrespEmptyComp', Mdict['HrespEmptyComp'])

        else:
            SS.setELBOTerm('gammalnSumTheta',
                           Mdict['gammalnSumTheta'],
                           dims=None)
            SS.setELBOTerm('slackThetaRem', Mdict['slackThetaRem'], dims=None)
            SS.setELBOTerm('gammalnThetaRem',
                           Mdict['gammalnThetaRem'].sum(),
                           dims=None)

    if doPrecompMergeEntropy:
        if mPairIDs is None:
            raise NotImplementedError("TODO: all pairs for merges")
        m_Hresp = calcHrespForSpecificMergePairs(LP, Dslice, mPairIDs)
        if m_Hresp is not None:
            SS.setMergeTerm('Hresp', m_Hresp, dims=('M'))

        m_sumLogPi = np.zeros(M)
        m_gammalnTheta = np.zeros(M)
        m_slackTheta = np.zeros(M)
        for m, (kA, kB) in enumerate(mPairIDs):
            theta_vec = LP['theta'][:, kA] + LP['theta'][:, kB]
            ElogPi_vec = digamma(theta_vec) - LP['digammaSumTheta']
            m_gammalnTheta[m] = np.sum(gammaln(theta_vec))
            m_sumLogPi[m] = np.sum(ElogPi_vec)
            # slack = (Ndm - theta_dm) * E[log pi_dm]
            slack_vec = ElogPi_vec
            slack_vec *= -1 * (alphaEbeta[kA] + alphaEbeta[kB])
            m_slackTheta[m] = np.sum(slack_vec)
        SS.setMergeTerm('gammalnTheta', m_gammalnTheta, dims=('M'))
        SS.setMergeTerm('sumLogPi', m_sumLogPi, dims=('M'))
        SS.setMergeTerm('slackTheta', m_slackTheta, dims=('M'))

        # Uncomment this for verification of merge calculations.
        # for (kA, kB) in mPairIDs:
        #      self.verifySSForMergePair(Data, SS, LP, kA, kB)
        # .... end merge computations

    # Selection terms (computes doc-topic correlation)
    if mergePairSelection is not None:
        if mergePairSelection.count('corr') > 0:
            Tmat = LP['DocTopicCount']
            SS.setSelectionTerm('DocTopicPairMat',
                                np.dot(Tmat.T, Tmat),
                                dims=('K', 'K'))
            SS.setSelectionTerm('DocTopicSum', np.sum(Tmat, axis=0), dims='K')

    if trackDocUsage:
        # Track num of times a topic appears nontrivially in a doc
        DocUsage = np.sum(LP['DocTopicCount'] > 0.01, axis=0)
        SS.setSelectionTerm('DocUsageCount', DocUsage, dims='K')
        Pi = LP['theta'] / LP['theta'].sum(axis=1)[:, np.newaxis]
        SumPi = np.sum(Pi, axis=0)
        SS.setSelectionTerm('SumPi', SumPi, dims='K')
    return SS
Пример #28
0
 def calc_log_norm_const(cls, a, b, m, kappa):
   logNormConstNormal = 0.5 * D * (LOGTWOPI + np.log(kappa))
   logNormConstGamma  = np.sum(gammaln(a)) - np.inner(a, np.log(b))
   return logNormConstNormal + logNormConstGamma