def var_bound(data, modelState, queryState): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W = data.words D,_ = W.shape means, expMeans, varcs, lxi, s, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens K, topicMean, sigT, vocab = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab # Calculate some implicit variables xi = _deriveXi(means, varcs, s) isigT = la.inv(sigT) bound = 0 # Distribution over document topics bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * la.det(sigT) diff = means - topicMean[np.newaxis,:] bound -= 0.5 * np.sum (diff.dot(isigT) * diff) bound -= 0.5 * np.sum (varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. # And its entropy bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments # This also takes into account all the variables that # constitute the bound on log(sum_j exp(mean_j)) and # also incorporates the implicit entropy of Z_dvk bound -= np.sum((means*means + varcs) * docLens[:,np.newaxis] * lxi) bound += np.sum(means * 2 * docLens[:,np.newaxis] * s[:,np.newaxis] * lxi) bound += np.sum(means * -0.5 * docLens[:,np.newaxis]) # The last term of line 1 gets cancelled out by part of the first term in line 2 # so neither are included here expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans) bound -= -np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data) bound -= np.sum(docLens[:,np.newaxis] * lxi * ((s*s)[:,np.newaxis] - (xi * xi))) bound += np.sum(0.5 * docLens[:,np.newaxis] * (s[:,np.newaxis] + xi)) # bound -= np.sum(docLens[:,np.newaxis] * safe_log_one_plus_exp_of(xi)) bound -= scaledSumOfLnOnePlusExp(docLens, xi) bound -= np.dot(s, docLens) return bound
def var_bound(data, modelState, queryState, XTX = None): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W, X = data.words, data.feats D, _ = W.shape means, varcs, lxi, s, docLens = queryState.means, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.dtype # Calculate some implicit variables xi = ctm._deriveXi(means, varcs, s) isigT = la.inv(sigT) #lnDetSigT = np.log(la.det(sigT)) lnDetSigT = lnDetOfDiagMat(sigT) verifyProper(lnDetSigT, "lnDetSigT") if XTX is None: XTX = X.T.dot(X) bound = 0 # Distribution over latent space bound -= (P*K)/2. * LN_OF_2_PI bound -= P * lnDetSigT bound -= K * P * log(lfv) bound -= 0.5 * np.sum(1./lfv * isigT.dot(Y) * Y) bound -= 0.5 * K * np.trace(R_Y) # And its entropy detR_Y = safeDet(R_Y, "R_Y") bound += 0.5 * LN_OF_2_PI_E + P/2. * lnDetSigT + K/2. * log(detR_Y) # Distribution over mapping from features to topics diff = (A - Y.dot(V)) bound -= (F*K)/2. * LN_OF_2_PI bound -= F * lnDetSigT bound -= K * P * log(fv) bound -= 0.5 * np.sum (1./lfv * isigT.dot(diff) * diff) bound -= 0.5 * K * np.trace(R_A) # And its entropy detR_A = safeDet(R_A, "R_A") bound += 0.5 * LN_OF_2_PI_E + F/2. * lnDetSigT + K/2. * log(detR_A) # Distribution over document topics bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * lnDetSigT diff = means - X.dot(A.T) bound -= 0.5 * np.sum (diff.dot(isigT) * diff) bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. bound -= 0.5 * K * np.trace(XTX.dot(R_A)) # And its entropy bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments # This also takes into account all the variables that # constitute the bound on log(sum_j exp(mean_j)) and # also incorporates the implicit entropy of Z_dvk bound -= np.sum((means*means + varcs) * docLens[:,np.newaxis] * lxi) bound += np.sum(means * 2 * docLens[:,np.newaxis] * s[:,np.newaxis] * lxi) bound += np.sum(means * -0.5 * docLens[:,np.newaxis]) # The last term of line 1 gets cancelled out by part of the first term in line 2 # so neither are included here. row_maxes = means.max(axis=1) means -= row_maxes[:,np.newaxis] expMeans = np.exp(means, out=means) bound -= -np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data) bound -= np.sum(docLens[:,np.newaxis] * lxi * ((s*s)[:,np.newaxis] - (xi * xi))) bound += np.sum(0.5 * docLens[:,np.newaxis] * (s[:,np.newaxis] + xi)) # bound -= np.sum(docLens[:,np.newaxis] * safe_log_one_plus_exp_of(xi)) bound -= scaledSumOfLnOnePlusExp(docLens, xi) bound -= np.dot(s, docLens) means = np.log(expMeans, out=expMeans) means += row_maxes[:,np.newaxis] return bound