def lda_elbo_entropy(gamma, phi): """Entropy of variational distribution q in LDA. Accepts phi (N x K) matrix. gamma (a K-size vector) for document Returns double representing the entropy in the elbo of LDA.. H(q) = – ΣNΣK φDn,klog φDn,k – log Γ(ΣKγkD) + ΣKlog Γ(γkD) – ΣK(γkD – 1)E[log θkD] """ elbo = 0.0 (N,K) = phi.shape ensure(len(gamma) == K) elbo += -1 * np.sum(phi * np.log(phi)) elbo += -1 * gammaln(np.sum(gamma)) elbo += np.sum(gammaln(gamma)) ElogTheta = graphlib.dirichlet_expectation(gamma) ensure(ElogTheta.shape == gamma.shape) elbo += -1 * sum((gamma - 1) * ElogTheta) return elbo
def test_sum(self): # tests taken from numpy/core/fromnumeric.py docstring from numpypy import array, sum, ones assert sum([0.5, 1.5])== 2.0 assert sum([[0, 1], [0, 5]]) == 6 # assert sum([0.5, 0.7, 0.2, 1.5], dtype=int32) == 1 assert (sum([[0, 1], [0, 5]], axis=0) == array([0, 6])).all() assert (sum([[0, 1], [0, 5]], axis=1) == array([1, 5])).all()
def dirichlet_expectation(alpha): """ From Matt Hoffman: For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha. """ #assert len(alpha.shape) == 1 # jperla: not sure what else it does if (len(alpha.shape) == 1): return(psi(alpha) - psi(np.sum(alpha))) else: return(psi(alpha) - psi(np.sum(alpha, 1))[:, np.newaxis])
def calculate_EZ_from_small_phis(phi1, phi2): """ Accepts a two small phi matrices (like (NdxK) and (NcxJ)) Calculates E[Zd]. Returns the final vector (K+J). E[Z] = φ := (1/N)ΣNφn """ Ndc = phi1.shape[0] + phi2.shape[0] ez = np.concatenate((np.sum(phi1, axis=0), np.sum(phi2, axis=0)), axis=1) return ez / Ndc
def axis_sum(matrix, axis): """Accepts a 2-d array and axis integer (0 or 1). """ assert axis in [0, 1] if ispypy(): nrows,ncols = matrix.shape if axis == 1: return np.array([np.sum(matrix[i]) for i in xrange(nrows)]) else: return np.array([np.sum(matrix[:,i]) for i in xrange(ncols)]) else: rowsums = np.sum(matrix, axis=1) return rowsums
def matrix_multiply(a, b): """Takes two matrices and does a complicated matrix multiply. Yes that one. NOTE: THIS APPEARS TO BE VERY BROKEN """ if len(a.shape) == 1: nrows, = a.shape a = np.zeros((nrows, 1)) if len(b.shape) == 1: bc, = b.shape if bc == a.shape[1]: b = np.zeros((bc, 1)) else: b = np.zeros((1, bc)) nrows,ac = a.shape bc,ncols = b.shape assert ac == bc if ispypy(): n = np.zeros((nrows, ncols)) for i in xrange(nrows): for j in xrange(ncols): n[i,j] = np.sum(a[i] * b[:,j]) return n else: np.dot(a, b)
def calculate_EZZT(big_phi): """ Accepts a big phi matrix (like (N x K) Calculates E[ZdZdT]. Returns the final matrix (K x K). (Also, E[ZdZdT] = (1/N2)(ΣNΣm!=nφd,nφd,mT + ΣNdiag{φd,n}) """ (N, K) = big_phi.shape inner_sum = np.empty((K, K)) for i in xrange(K): for j in xrange(K): inner_sum[i,j] = np.sum(np.multiply.outer(big_phi[:,i], big_phi[:,j])) - np.sum(np.dot(big_phi[:,i], big_phi[:,j])) inner_sum += np.diag(np.sum(big_phi, axis=0)) inner_sum /= (N * N) return inner_sum
def calculate_EZ(big_phi): """ Accepts a big phi matrix (like ((Nd+Nc) x (K+J)) Calculates E[Zd]. Returns the final vector (K+J). E[Z] = φ := (1/N)ΣNφn """ N,K = big_phi.shape return np.sum(big_phi, axis=0) / N
def lda_update_gamma(alpha, phi, gamma): """ Accepts: gamma and alpha are K-size vectors. Phi is an NxK vector. Returns gamma. update gamma: γnew ← α + Σnφn """ ensure(phi.shape[1] == len(gamma)) gamma[:] = alpha + np.sum(phi, axis=0) return gamma
def _unoptimized_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared): """ Update phi in LDA. phi is N x K matrix. gamma is a K-size vector update phid: φd,n ∝ exp{ E[log θ|γ] + E[log p(wn|β1:K)] + (y / Nσ2) η — [2(ηTφd,-n)η + (η∘η)] / (2N2σ2) } Note that E[log p(wn|β1:K)] = log βTwn """ (N, K) = phi.shape #assert len(eta) == K #assert len(gamma) == K #assert beta.shape[0] == K phi_sum = np.sum(phi, axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(gamma) ensure(len(ElogTheta) == K) pC = (1.0 * y_d / Ns * eta) eta_dot_eta = (eta * eta) front = (-1.0 / (2 * N * Ns)) for n,word,count in iterwords(text): phi_sum -= phi[n] ensure(len(phi_sum) == K) pB = np.log(beta[:,word]) pD = (front * (((2 * np.dot(eta, phi_sum) * eta) + eta_dot_eta)) ) ensure(len(pB) == K) ensure(len(pC) == K) ensure(len(pD) == K) # must exponentiate and sum immediately! #phi[n,:] = np.exp(ElogTheta + pB + pC + pD) #phi[n,:] /= np.sum(phi[n,:]) # log normalize before exp for numerical stability phi[n,:] = ElogTheta + pB + pC + pD phi[n,:] -= graphlib.logsumexp(phi[n,:]) phi[n,:] = np.exp(phi[n,:]) # add this back into the sum # unlike in LDA, this cannot be computed in parallel phi_sum += phi[n] return phi
def lda_elbo_terms(document, alpha, beta, gamma, phi): """ Calculates some terms in the elbo for a document. Same as in LDA. E[log p(θD|αD)] + ΣNE[log p(ZnD|θD)] + ΣNE[log p(wnD|ZnD,β1:KD)] E[log p(θ|a)] = log Γ(Σkai) – Σklog Γ(ai) + ΣK(ak-1)E[log θk] E[log p(Zn|θ)] = ΣKφn,kE[log θk] E[log p(wn|Zn,β1:K)] = ΣKφn,klog βk,Wn (Note that E[log θk] = Ψ(γk) – Ψ(Σj=1..Kγj) ). """ N,K = phi.shape elbo = 0.0 # E[log p(θ|a)] = log Γ(Σkai) – Σklog Γ(ai) + ΣK(ak-1)E[log θk] elbo += gammaln(np.sum(alpha)) - np.sum(gammaln(alpha)) ElogTheta = graphlib.dirichlet_expectation(gamma) #assert len(ElogTheta) == len(alpha) #assert ElogTheta.shape == alpha.shape elbo += np.sum((alpha - 1) * ElogTheta) if isinstance(document, np.ndarray): # even faster optimization elbo += np.sum(phi * (ElogTheta + (np.log(beta[:,document]).T))) else: for n,word,count in iterwords(document): # E[log p(Zn|θ)] = ΣKφn,kE[log θk] # E[log p(wn|Zn,β1:K)] = ΣKφn,klog βk,Wn # optimization: # E[log p(Zn|θ)] + E[log p(wn|Zn,β1:K)] = ΣKφn,k(E[log θk] + log βk,Wn) elbo += np.sum(phi[n] * (ElogTheta + np.log(beta[:,word]))) return elbo
def calculate_EZZT_from_small_phis(phi1, phi2): """ Accepts a big phi matrix (like ((Nd+Nc) x (K+J)) Calculates E[ZdZdT]. Returns the final matrix ((K+J) x (K+J)). (Also, E[ZdZdT] = (1/N2)(ΣNΣm!=nφd,nφd,mT + ΣNdiag{φd,n}) """ Nd,K = phi1.shape Nc,J = phi2.shape (Ndc, KJ) = (Nd+Nc, K+J) inner_sum = np.zeros((KJ, KJ)) p1 = np.matrix(phi1) p2 = np.matrix(phi2) for i in xrange(K): for j in xrange(K): m = np.dot(np.matrix(p1[:,i]), np.matrix(p1[:,j]).T) inner_sum[i,j] = np.sum(m) - np.sum(np.diagonal(m)) for i in xrange(J): for j in xrange(J): m = np.dot(np.matrix(p2[:,i]), np.matrix(p2[:,j]).T) inner_sum[K+i,K+j] = np.sum(m) - np.sum(np.diagonal(m)) for i in xrange(K): for j in xrange(J): m = np.dot(np.matrix(p1[:,i]), np.matrix(p2[:,j]).T) inner_sum[i,K+j] = np.sum(m) for i in xrange(J): for j in xrange(K): m = np.dot(np.matrix(p2[:,i]), np.matrix(p1[:,j]).T) inner_sum[K+i,j] = np.sum(m) big_phi_sum = np.concatenate((np.sum(phi1, axis=0), np.sum(phi2, axis=0)), axis=1) ensure(big_phi_sum.shape == (KJ,)) inner_sum += np.diagonal(big_phi_sum) inner_sum /= (Ndc * Ndc) return inner_sum
def partial_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared): """Same as slda update phi, but eta may be smaller than total number of topics. So only some of the topics contribute to y. """ (N, K) = phi.shape Ks = len(eta) phi_sum = np.sum(phi[:,:Ks], axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(gamma) front = (-1.0 / (2 * N * Ns)) eta_dot_eta = front * (eta * eta) pC = ((1.0 * y_d / Ns) * eta) + eta_dot_eta right_eta_times_const = (front * 2 * eta) if isinstance(text, np.ndarray): # if text is in array form, do an approximate fast matrix update phi_minus_n = -(phi[:,:Ks] - phi_sum) phi[:,:] = ElogTheta + np.log(beta[:,text].T) phi[:,:Ks] += pC phi[:,:Ks] += np.dot(np.matrix(np.dot(phi_minus_n, eta)).T, np.matrix(right_eta_times_const)) graphlib.log_row_normalize(phi) phi[:,:] = np.exp(phi[:,:]) else: # otherwise, iterate through each word for n,word,count in iterwords(text): phi_sum -= phi[n,:Ks] pB = np.log(beta[:,word]) pD = (np.dot(eta, phi_sum) * right_eta_times_const) # must exponentiate and normalize immediately! phi[n,:] = ElogTheta + pB phi[n,:] += pC + pD phi[n,:] -= graphlib.logsumexp(phi[n,:]) # normalize in logspace phi[n,:] = np.exp(phi[n,:]) # add this back into the sum # unlike in LDA, this cannot be computed in parallel phi_sum += phi[n,:Ks] return phi
def lm_recalculate_eta_sigma(eta, y, phi1, phi2): """ Accepts eta (K+J)-size vector, also y (a D-size vector of reals), also two phi D-size vectors of NxK matrices. Returns new sigma squared update (a double). ηnew ← (E[ATA])-1 E[A]Ty σ2new ← (1/D) {yTy - yTE[A]ηnew} (Note that A is the D X (K + J) matrix whose rows are the vectors ZdT for document and comment concatenated.) (Also note that the dth row of E[A] is φd, and E[ATA] = Σd E[ZdZdT] .) (Also, note that E[Z] = φ := (1/N)Σnφn, and E[ZdZdT] = (1/N2)(ΣnΣm!=nφd,nφd,mT + Σndiag{φd,n}) """ ensure(len(phi1) == len(phi2)) D = len(phi1) Nd,K = phi1[0].shape Nc,J = phi2[0].shape Ndc, KJ = (Nd+Nc,K+J) #print 'e_a...' E_A = np.zeros((D, KJ)) for d in xrange(D): E_A[d,:] = calculate_EZ_from_small_phis(phi1[d], phi2[d]) #print 'inverse...' E_ATA_inverse = calculate_E_ATA_inverse_from_small_phis(phi1, phi2) #print 'new eta...' #new_eta = matrix_multiply(matrix_multiply(E_ATA_inverse, E_A.T), y) new_eta = np.dot(np.dot(E_ATA_inverse, E_A.T), y) if np.sum(np.abs(new_eta)) > (KJ * KJ * 5): print 'ETA is GOING CRAZY {0}'.format(eta) print 'aborting the update!!!' else: eta[:] = new_eta # todo: don't do this later # keep sigma squared fix #import pdb; pdb.set_trace() #new_sigma_squared = (1.0 / D) * (np.dot(y, y) - np.dot(np.dot(np.dot(np.dot(y, E_A), E_ATA_inverse), E_A.T), y)) new_sigma_squared = 1.0 return new_sigma_squared
def test_sum(self): # tests taken from numpy/core/fromnumeric.py docstring from numpypy import sum, ones, zeros, array assert sum([0.5, 1.5])== 2.0 assert sum([[0, 1], [0, 5]]) == 6 # assert sum([0.5, 0.7, 0.2, 1.5], dtype=int32) == 1 assert (sum([[0, 1], [0, 5]], axis=0) == array([0, 6])).all() assert (sum([[0, 1], [0, 5]], axis=1) == array([1, 5])).all() # If the accumulator is too small, overflow occurs: # assert ones(128, dtype=int8).sum(dtype=int8) == -128 assert sum(range(10)) == 45 assert sum(array(range(10))) == 45 assert list(sum(zeros((0, 2)), axis=1)) == [] a = array([[1, 2], [3, 4]]) out = array([[0, 0], [0, 0]]) c = sum(a, axis=0, out=out[0]) assert (c == [4, 6]).all() assert (c == out[0]).all() assert (c != out[1]).all()
def test_sum(self): # tests taken from numpy/core/fromnumeric.py docstring from numpypy import array, sum, ones, zeros assert sum([0.5, 1.5])== 2.0 assert sum([[0, 1], [0, 5]]) == 6 # assert sum([0.5, 0.7, 0.2, 1.5], dtype=int32) == 1 assert (sum([[0, 1], [0, 5]], axis=0) == array([0, 6])).all() assert (sum([[0, 1], [0, 5]], axis=1) == array([1, 5])).all() # If the accumulator is too small, overflow occurs: # assert ones(128, dtype=int8).sum(dtype=int8) == -128 assert sum(range(10)) == 45 assert sum(array(range(10))) == 45 assert list(sum(zeros((0, 2)), axis=1)) == [] a = array([[1, 2], [3, 4]]) out = array([[0, 0], [0, 0]]) c = sum(a, axis=0, out=out[0]) assert (c == [4, 6]).all() assert (c == out[0]).all() assert (c != out[1]).all()
def np_second_arg_array_index(matrix, array): """Calculates matrix[:,array] NOTE: THIS APPEARS TO BE VERY BROKEN """ if ispypy(): nrows,ncols = matrix.shape if len(array.shape) == 1: n = np.zeros((1, array.shape[0])) for i in xrange(array.shape[0]): n[0,i] = np.sum(matrix[:,int(array[i])]) return n else: assert len(array.shape) == 2 n = np.zeros(array.shape) for i in xrange(array.shape[0]): n[i] = np_second_arg_array_index(matrix, array[i]) return n else: return matrix[:,array]
def test_sum(self): # tests taken from numpy/core/fromnumeric.py docstring from numpypy import array, sum, ones assert sum([0.5, 1.5]) == 2.0 assert sum([[0, 1], [0, 5]]) == 6
def test_sum(self): # tests taken from numpy/core/fromnumeric.py docstring from numpypy import array, sum, ones assert sum([0.5, 1.5])== 2.0 assert sum([[0, 1], [0, 5]]) == 6
def lm_global_elbo(documents, comments, alphaD, alphaC, betaD, betaC, gammaD, gammaC, phiD, phiC, y, eta, sigma_squared): """Given all of the parametes. Calculate the evidence lower bound. Helps you know when convergence happens. """ return np.sum(lm_local_elbo(documents[d], comments[d], alphaD, alphaC, betaD, betaC, gammaD[d], gammaC[d], phiD[d], phiC[d], y[d], eta, sigma_squared) for d in xrange(len(documents)))
def lda_global_elbo(v): return np.sum(lda_local_elbo(v.documents[d], v.alpha, v.beta, v.gamma[d], v.phi[d],) for d in xrange(len(v.documents)))
def slda_global_elbo(v): return np.sum(slda_local_elbo(v.documents[d], v.y[d], v.alpha, v.beta, v.gamma[d], v.phi[d], v.eta, v.sigma_squared) for d in xrange(len(v.documents)))
def tlc_global_elbo(v): # use equivalent of pSLDA global elbo just for this since it's the important part # it's more efficient to calculate than true elbo return np.sum(topiclib.partial_slda_local_elbo(v.labeled[d], v.y[d], v.alphaL, v.beta[-v.Kl:], v.gammaL[d], v.phiL[d], v.eta, v.sigma_squared) for d in xrange(len(v.labeled)))
def c_loop(a): return numpy.sum(a)
def sum(cls, input_list, **kwargs): """ Calculate sum """ return round(numpypy.sum(input_list, **kwargs), cls.default_round)