def var_bound(data, modelState, queryState, XTX = None): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W, X = data.words, data.feats D, _ = W.shape means, varcs, lxi, s, docLens = queryState.means, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.dtype # Calculate some implicit variables xi = ctm._deriveXi(means, varcs, s) isigT = la.inv(sigT) #lnDetSigT = np.log(la.det(sigT)) lnDetSigT = lnDetOfDiagMat(sigT) verifyProper(lnDetSigT, "lnDetSigT") if XTX is None: XTX = X.T.dot(X) bound = 0 # Distribution over latent space bound -= (P*K)/2. * LN_OF_2_PI bound -= P * lnDetSigT bound -= K * P * log(lfv) bound -= 0.5 * np.sum(1./lfv * isigT.dot(Y) * Y) bound -= 0.5 * K * np.trace(R_Y) # And its entropy detR_Y = safeDet(R_Y, "R_Y") bound += 0.5 * LN_OF_2_PI_E + P/2. * lnDetSigT + K/2. * log(detR_Y) # Distribution over mapping from features to topics diff = (A - Y.dot(V)) bound -= (F*K)/2. * LN_OF_2_PI bound -= F * lnDetSigT bound -= K * P * log(fv) bound -= 0.5 * np.sum (1./lfv * isigT.dot(diff) * diff) bound -= 0.5 * K * np.trace(R_A) # And its entropy detR_A = safeDet(R_A, "R_A") bound += 0.5 * LN_OF_2_PI_E + F/2. * lnDetSigT + K/2. * log(detR_A) # Distribution over document topics bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * lnDetSigT diff = means - X.dot(A.T) bound -= 0.5 * np.sum (diff.dot(isigT) * diff) bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. bound -= 0.5 * K * np.trace(XTX.dot(R_A)) # And its entropy bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments # This also takes into account all the variables that # constitute the bound on log(sum_j exp(mean_j)) and # also incorporates the implicit entropy of Z_dvk bound -= np.sum((means*means + varcs) * docLens[:,np.newaxis] * lxi) bound += np.sum(means * 2 * docLens[:,np.newaxis] * s[:,np.newaxis] * lxi) bound += np.sum(means * -0.5 * docLens[:,np.newaxis]) # The last term of line 1 gets cancelled out by part of the first term in line 2 # so neither are included here. row_maxes = means.max(axis=1) means -= row_maxes[:,np.newaxis] expMeans = np.exp(means, out=means) bound -= -np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data) bound -= np.sum(docLens[:,np.newaxis] * lxi * ((s*s)[:,np.newaxis] - (xi * xi))) bound += np.sum(0.5 * docLens[:,np.newaxis] * (s[:,np.newaxis] + xi)) # bound -= np.sum(docLens[:,np.newaxis] * safe_log_one_plus_exp_of(xi)) bound -= scaledSumOfLnOnePlusExp(docLens, xi) bound -= np.dot(s, docLens) means = np.log(expMeans, out=expMeans) means += row_maxes[:,np.newaxis] return bound
def var_bound(data, modelState, queryState, XTX=None): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W, X = data.words, data.feats D, _ = W.shape means, expMeans, varcs, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.docLens F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, Ab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.Ab, modelState.dtype # Calculate some implicit variables isigT = la.inv(sigT) lnDetSigT = lnDetOfDiagMat(sigT) verifyProper(lnDetSigT, "lnDetSigT") if XTX is None: XTX = X.T.dot(X) bound = 0 # Distribution over latent space bound -= (P*K)/2. * LN_OF_2_PI bound -= P * lnDetSigT bound -= K * P * log(lfv) bound -= 0.5 * np.sum(1./lfv * isigT.dot(Y) * Y) bound -= 0.5 * K * np.trace(R_Y) # And its entropy detR_Y = safeDet(R_Y, "R_Y") bound += 0.5 * LN_OF_2_PI_E + P/2. * lnDetSigT + K/2. * log(detR_Y) # Distribution over mapping from features to topics diff = (A - Y.dot(V)) bound -= (F*K)/2. * LN_OF_2_PI bound -= F * lnDetSigT bound -= K * P * log(fv) bound -= 0.5 * np.sum (1./lfv * isigT.dot(diff) * diff) bound -= 0.5 * K * np.trace(R_A) # And its entropy detR_A = safeDet(R_A, "R_A") bound += 0.5 * LN_OF_2_PI_E + F/2. * lnDetSigT + K/2. * log(detR_A) # Distribution over document topics bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * lnDetSigT diff = means - X.dot(A.T) bound -= 0.5 * np.sum (diff.dot(isigT) * diff) bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. bound -= 0.5 * K * np.trace(XTX.dot(R_A)) # And its entropy bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments, and their entropy # and distribution over words. This is re-arranged as we need # means for some parts, and exp(means) for other parts expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans) R = sparseScalarQuotientOfDot(W, expMeans, vocab) # D x V [W / TB] is the quotient of the original over the reconstructed doc-term matrix S = expMeans * (R.dot(vocab.T)) # D x K bound += np.sum(docLens * np.log(np.sum(expMeans, axis=1))) bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data) bound += np.sum(means * S) bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(Ab) * means) bound -= 2. * scaledSelfSoftDot(means, docLens) bound -= 0.5 * np.sum(docLens[:,np.newaxis] * S * (np.diag(Ab))[np.newaxis,:]) bound -= np.sum(means * S) return bound