Пример #1
0
    def testSanity(self):
        """
        Sanity check online Gibbs init scheme against deltaLDA
        """
        # Don't even try unless deltaLDA module present
        if(not hasDelta):
            return
        
        randseed = 194582

        # Use 1 'online' Gibbs sample to build initial gamma
        gibbs_docs = [[1,1,2],
                      [1,1,1,1,2],
                      [3,3,3,4],
                      [3,3,3,3,4,4],
                      [0,0,0,0,0],
                      [0,0,0,0]]
        numsamp = 0
        (phi,theta,sample) = deltaLDA(gibbs_docs,self.alpha,self.beta,
                                      numsamp,randseed)
        gamma_init = []
        for (d,di) in zip(self.docs_w,range(len(self.docs_w))):
            gamma = zeros((self.T,len(d)))
            for (w,i) in zip(d,range(len(d))):
                gamma[:,i] = theta[di,:] * phi[:,w]
                # normalize
                gamma[:,i] = gamma[:,i] / gamma[:,i].sum()
            # save
            gamma_init.append(gamma)        
        
        # Run cvbLDA with this gamma
        (gphi,gtheta,gamma) = cvbLDA(self.docs_w,self.docs_c,
                                     self.alpha,self.beta,
                                     gamma_init=gamma_init,
                                     maxiter=self.maxiter,
                                     convtol=self.convtol)

        # Run cvbLDA no init gamma, same randseed
        (phi,theta,gamma) = cvbLDA(self.docs_w,self.docs_c,
                                   self.alpha,self.beta,
                                   randseed=randseed,
                                   maxiter=self.maxiter,
                                   convtol=self.convtol)

        self.assert_(self.matAgree(phi,gphi))
        self.assert_(self.matProb(phi))
        self.assert_(self.matProb(gphi))
        
        self.assert_(self.matAgree(theta,gtheta))
        self.assert_(self.matProb(theta))
        self.assert_(self.matProb(gtheta))
Пример #2
0
    def testStandard(self):
        """ Test standard LDA with base data/params """
        (phi,theta,gamma) = cvbLDA(self.docs_w,self.docs_c,
                                   self.alpha,self.beta,
                                   maxiter=self.maxiter,
                                   convtol=self.convtol)

        # theta should clust docs [0,1], [2,3], [4,5]
        maxtheta = argmax(theta,axis=1)
        self.assert_(maxtheta[0] == maxtheta[1])
        self.assert_(maxtheta[2] == maxtheta[3])
        self.assert_(maxtheta[4] == maxtheta[5])
        # theta valid prob matrix
        self.assert_(self.matProb(theta))

        # corresponding phi should emph [1,2], [3,4], [0]
        maxphi = argmax(phi,axis=1)
        self.assert_(maxphi[maxtheta[0]] == 1)
        self.assert_(maxphi[maxtheta[2]] == 3)
        self.assert_(maxphi[maxtheta[4]] == 0)
        # phi valid prob matrix
        self.assert_(self.matProb(phi))
    beta = .1 * np.ones((nb_topics, vocab_size))

    nb_iter_max = 100
    tol = .001

#If the user has specified some parameters, we fix them
else:
    alpha = alpha * np.ones((1, nb_topics))
    beta = beta * np.ones((nb_topics, vocab_size))

start_time = time.time()

(phi, theta, gamma) = cvbLDA(words,
                             counts,
                             alpha,
                             beta,
                             maxiter=nb_iter_max,
                             verbose=0,
                             convtol=tol)

print('\nCollapsed variational inference LDA exec time: ' +
      str(time.time() - start_time) + 's')

#print('Theta, p(z|d)')
#print(str(theta))

#print('Phi, p(w|z)')
#print(str(phi))

topic = []
Пример #4
0
# Stopping conditions for inference:
# -stop after maxiter iterations
# -stop once sum of absolute changes in all gamma variational
#  parameters for a single iteration falls below convtol
# (whichever occurs FIRST)
#
# If these parameters are not supplied, stop after 100 iterations
#
(maxiter, convtol) = (10, .01)

# Do CVB inference for LDA
#
(phi, theta, gamma) = cvbLDA(docs_w,
                             docs_c,
                             alpha,
                             beta,
                             maxiter=maxiter,
                             verbose=1,
                             convtol=convtol)

# theta is the matrix of document-topic probabilities
# (estimated from expected counts under variational posterior)
#
# theta = D x T
# theta[di,zj] = P(z=zj | d=di)
#
print ''
print 'Theta - P(z|d)'
print str(theta)
print ''
Пример #5
0
          [5],
          [4]]

# Stopping conditions for inference:
# -stop after maxiter iterations
# -stop once sum of absolute changes in all gamma variational
#  parameters for a single iteration falls below convtol
# (whichever occurs FIRST)
# 
# If these parameters are not supplied, stop after 100 iterations
#
(maxiter,convtol) = (10,.01)

# Do CVB inference for LDA
#
(phi,theta,gamma) = cvbLDA(docs_w,docs_c,alpha,beta,
                           maxiter=maxiter,verbose=1,convtol=convtol)

# theta is the matrix of document-topic probabilities
# (estimated from expected counts under variational posterior)
# 
# theta = D x T
# theta[di,zj] = P(z=zj | d=di)
#
print ''
print 'Theta - P(z|d)'
print str(theta)
print ''

# phi is the matrix of topic-word probabilities 
# (estimated from expected counts under variational posterior)
#