def train (data, model, query, plan): iterations, burnIn, thin, _, debug = \ plan.iterations, plan.burnIn, plan.thin, plan.logFrequency, plan.debug w_list, z_list, docLens, _, _ = \ query.w_list, query.z_list, query.docLens, query.topicSum, query.numSamples K, T, topicPrior, vocabPrior, _, _, _, dtype, name = \ model.K, model.T, model.topicPrior, model.vocabPrior, model.topicSum, model.vocabSum, model.numSamples, model.dtype, model.name assert model.dtype == np.float64, "This is only implemented for 64-bit floats" D = docLens.shape[0] ndk = np.zeros((D,K), dtype=np.int32) nkv = np.zeros((K,T), dtype=np.int32) nk = np.zeros((K,), dtype=np.int32) topicSum = np.zeros((D,K), dtype=dtype) vocabSum = np.zeros((K,T), dtype=dtype) compiled.initGlobalRng(0xC0FFEE) compiled.sumSuffStats(w_list, z_list, docLens, ndk, nkv, nk) # Burn in if debug: print ("Burning") compiled.sample (burnIn, burnIn + 1, w_list, z_list, docLens, \ ndk, nkv, nk, topicSum, vocabSum, \ topicPrior, vocabPrior, False, debug) # True samples if debug: print ("Sampling") numSamples = compiled.sample (iterations - burnIn, thin, w_list, z_list, docLens, \ ndk, nkv, nk, topicSum, vocabSum, \ topicPrior, vocabPrior, False, debug) # compiled.freeGlobalRng() return \ ModelState (K, T, topicPrior, vocabPrior, topicSum, vocabSum, numSamples, True, dtype, name), \ QueryState (w_list, z_list, docLens, topicSum, numSamples, True), \ (np.zeros(1), np.zeros(1), np.zeros(1))
def query (data, model, query, plan): iterations, burnIn, thin, _, debug = \ plan.iterations, plan.burnIn, plan.thin, plan.logFrequency, plan.debug w_list, z_list, docLens, _, _ = \ query.w_list, query.z_list, query.docLens, query.topicSum, query.numSamples K, T, topicPrior, vocabPrior, _, _, _, dtype, name = \ model.K, model.T, model.topicPrior, model.vocabPrior, model.topicSum, model.vocabSum, model.numSamples, model.dtype, model.name assert model.dtype == np.float64, "This is only implements for 64-bit floats" D = docLens.shape[0] compiled.initGlobalRng(0xC0FFEE) ndk = np.zeros((D, K), dtype=np.int32) nkv = (wordDists(model) * 1000000).astype(np.int32) nk = nkv.sum(axis=1).astype(np.int32) adjustedVocabPrior = np.zeros((T,), dtype=model.dtype) # already incorporated into nkv topicSum = np.zeros((D,K), dtype=dtype) vocabSum = model.vocabSum compiled.sumSuffStats(w_list, z_list, docLens, ndk, nkv, nk) # Burn in compiled.sample (burnIn, burnIn + 1, w_list, z_list, docLens, \ ndk, nkv, nk, topicSum, vocabSum, \ topicPrior, adjustedVocabPrior, True, debug) # True samples numSamples = compiled.sample (iterations - burnIn, thin, w_list, z_list, docLens, \ ndk, nkv, nk, topicSum, vocabSum, \ topicPrior, adjustedVocabPrior, True, debug) return \ ModelState (K, T, topicPrior, vocabPrior, topicSum, vocabSum, numSamples, model.processed, dtype, name), \ QueryState (w_list, z_list, docLens, topicSum, numSamples, True)