Пример #1
0
def train (data, model, query, plan):
    iterations, burnIn, thin, _, debug = \
        plan.iterations, plan.burnIn, plan.thin, plan.logFrequency, plan.debug
    w_list, z_list, docLens, _, _ = \
        query.w_list, query.z_list, query.docLens, query.topicSum, query.numSamples
    K, T, topicPrior, vocabPrior, _, _, _, dtype, name = \
        model.K, model.T, model.topicPrior, model.vocabPrior, model.topicSum, model.vocabSum, model.numSamples, model.dtype, model.name
    
    assert model.dtype == np.float64, "This is only implemented for 64-bit floats"
    D = docLens.shape[0]
    
    ndk = np.zeros((D,K), dtype=np.int32)
    nkv = np.zeros((K,T), dtype=np.int32)
    nk  = np.zeros((K,),  dtype=np.int32)
    
    topicSum = np.zeros((D,K), dtype=dtype)
    vocabSum = np.zeros((K,T), dtype=dtype)
    
    compiled.initGlobalRng(0xC0FFEE)
    compiled.sumSuffStats(w_list, z_list, docLens, ndk, nkv, nk)
    
    # Burn in
    if debug: print ("Burning")
    compiled.sample (burnIn, burnIn + 1, w_list, z_list, docLens, \
            ndk, nkv, nk, topicSum, vocabSum, \
            topicPrior, vocabPrior, False, debug)
    
    # True samples
    if debug: print ("Sampling")
    numSamples = compiled.sample (iterations - burnIn, thin, w_list, z_list, docLens, \
            ndk, nkv, nk, topicSum, vocabSum, \
            topicPrior, vocabPrior, False, debug)
    
#     compiled.freeGlobalRng()
    
    return \
        ModelState (K, T, topicPrior, vocabPrior, topicSum, vocabSum, numSamples, True, dtype, name), \
        QueryState (w_list, z_list, docLens, topicSum, numSamples, True), \
        (np.zeros(1), np.zeros(1), np.zeros(1))
Пример #2
0
def query (data, model, query, plan):
    iterations, burnIn, thin, _, debug = \
        plan.iterations, plan.burnIn, plan.thin, plan.logFrequency, plan.debug
    w_list, z_list, docLens, _, _ = \
        query.w_list, query.z_list, query.docLens, query.topicSum, query.numSamples
    K, T, topicPrior, vocabPrior, _, _, _, dtype, name = \
        model.K, model.T, model.topicPrior, model.vocabPrior, model.topicSum, model.vocabSum, model.numSamples, model.dtype, model.name
    
    assert model.dtype == np.float64, "This is only implements for 64-bit floats"
    D = docLens.shape[0]
    
    compiled.initGlobalRng(0xC0FFEE)
    
    ndk = np.zeros((D, K), dtype=np.int32)
    nkv = (wordDists(model) * 1000000).astype(np.int32)
    nk  = nkv.sum(axis=1).astype(np.int32)
    adjustedVocabPrior = np.zeros((T,), dtype=model.dtype) # already incorporated into nkv
    
    topicSum = np.zeros((D,K), dtype=dtype)
    vocabSum = model.vocabSum
    
    compiled.sumSuffStats(w_list, z_list, docLens, ndk, nkv, nk)
    
    # Burn in
    compiled.sample (burnIn, burnIn + 1, w_list, z_list, docLens, \
            ndk, nkv, nk, topicSum, vocabSum, \
            topicPrior, adjustedVocabPrior, True, debug)
    
    # True samples
    numSamples = compiled.sample (iterations - burnIn, thin, w_list, z_list, docLens, \
            ndk, nkv, nk, topicSum, vocabSum, \
            topicPrior, adjustedVocabPrior, True, debug)
    
    return \
        ModelState (K, T, topicPrior, vocabPrior, topicSum, vocabSum, numSamples, model.processed, dtype, name), \
        QueryState (w_list, z_list, docLens, topicSum, numSamples, True)