def testSanity(self): """ Sanity check online Gibbs init scheme against deltaLDA """ # Don't even try unless deltaLDA module present if(not hasDelta): return randseed = 194582 # Use 1 'online' Gibbs sample to build initial gamma gibbs_docs = [[1,1,2], [1,1,1,1,2], [3,3,3,4], [3,3,3,3,4,4], [0,0,0,0,0], [0,0,0,0]] numsamp = 0 (phi,theta,sample) = deltaLDA(gibbs_docs,self.alpha,self.beta, numsamp,randseed) gamma_init = [] for (d,di) in zip(self.docs_w,range(len(self.docs_w))): gamma = zeros((self.T,len(d))) for (w,i) in zip(d,range(len(d))): gamma[:,i] = theta[di,:] * phi[:,w] # normalize gamma[:,i] = gamma[:,i] / gamma[:,i].sum() # save gamma_init.append(gamma) # Run cvbLDA with this gamma (gphi,gtheta,gamma) = cvbLDA(self.docs_w,self.docs_c, self.alpha,self.beta, gamma_init=gamma_init, maxiter=self.maxiter, convtol=self.convtol) # Run cvbLDA no init gamma, same randseed (phi,theta,gamma) = cvbLDA(self.docs_w,self.docs_c, self.alpha,self.beta, randseed=randseed, maxiter=self.maxiter, convtol=self.convtol) self.assert_(self.matAgree(phi,gphi)) self.assert_(self.matProb(phi)) self.assert_(self.matProb(gphi)) self.assert_(self.matAgree(theta,gtheta)) self.assert_(self.matProb(theta)) self.assert_(self.matProb(gtheta))
def testStandard(self): """ Test standard LDA with base data/params """ (phi,theta,gamma) = cvbLDA(self.docs_w,self.docs_c, self.alpha,self.beta, maxiter=self.maxiter, convtol=self.convtol) # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(theta,axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # theta valid prob matrix self.assert_(self.matProb(theta)) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(phi,axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0) # phi valid prob matrix self.assert_(self.matProb(phi))
beta = .1 * np.ones((nb_topics, vocab_size)) nb_iter_max = 100 tol = .001 #If the user has specified some parameters, we fix them else: alpha = alpha * np.ones((1, nb_topics)) beta = beta * np.ones((nb_topics, vocab_size)) start_time = time.time() (phi, theta, gamma) = cvbLDA(words, counts, alpha, beta, maxiter=nb_iter_max, verbose=0, convtol=tol) print('\nCollapsed variational inference LDA exec time: ' + str(time.time() - start_time) + 's') #print('Theta, p(z|d)') #print(str(theta)) #print('Phi, p(w|z)') #print(str(phi)) topic = []
# Stopping conditions for inference: # -stop after maxiter iterations # -stop once sum of absolute changes in all gamma variational # parameters for a single iteration falls below convtol # (whichever occurs FIRST) # # If these parameters are not supplied, stop after 100 iterations # (maxiter, convtol) = (10, .01) # Do CVB inference for LDA # (phi, theta, gamma) = cvbLDA(docs_w, docs_c, alpha, beta, maxiter=maxiter, verbose=1, convtol=convtol) # theta is the matrix of document-topic probabilities # (estimated from expected counts under variational posterior) # # theta = D x T # theta[di,zj] = P(z=zj | d=di) # print '' print 'Theta - P(z|d)' print str(theta) print ''
[5], [4]] # Stopping conditions for inference: # -stop after maxiter iterations # -stop once sum of absolute changes in all gamma variational # parameters for a single iteration falls below convtol # (whichever occurs FIRST) # # If these parameters are not supplied, stop after 100 iterations # (maxiter,convtol) = (10,.01) # Do CVB inference for LDA # (phi,theta,gamma) = cvbLDA(docs_w,docs_c,alpha,beta, maxiter=maxiter,verbose=1,convtol=convtol) # theta is the matrix of document-topic probabilities # (estimated from expected counts under variational posterior) # # theta = D x T # theta[di,zj] = P(z=zj | d=di) # print '' print 'Theta - P(z|d)' print str(theta) print '' # phi is the matrix of topic-word probabilities # (estimated from expected counts under variational posterior) #