Exemplo n.º 1
0
class PoissonUnigramPattern:
    def __init__(self, K, morpheme_prior, gamma, delta, pattern_vocabulary):
        self.morpheme_model = DirichletMultinomial(K-2, morpheme_prior) # -START, -STOP
        self.length_model = GammaPoisson(gamma, delta)
        self.vocabulary = pattern_vocabulary

    def increment(self, pattern):
        morphemes = self.vocabulary[pattern]
        for morpheme in morphemes:
            self.morpheme_model.increment(morpheme-2)
        self.length_model.increment(len(morphemes)-1)

    def decrement(self, pattern):
        morphemes = self.vocabulary[pattern]
        for morpheme in morphemes:
            self.morpheme_model.decrement(morpheme-2)
        self.length_model.decrement(len(morphemes)-1)

    def prob(self, pattern):
        morphemes = self.vocabulary[pattern]
        return (prod(self.morpheme_model.prob(m) for m in morphemes) *
                self.length_model.prob(len(morphemes)-1))

    def log_likelihood(self, full=False):
        return (self.morpheme_model.log_likelihood(full)
                + self.length_model.log_likelihood(full))

    def resample_hyperparemeters(self, n_iter):
        return self.morpheme_model.resample_hyperparemeters(n_iter)

    def __repr__(self):
        return ('PoissonUnigram(length ~ {self.length_model},'
                ' morph ~ {self.morpheme_model})').format(self=self)
Exemplo n.º 2
0
def sample_topics(doc, model, n_iter):
    assignments = [None] * len(doc)
    doc_topic = DirichletMultinomial(model.n_topics, model.alpha)
    for it in xrange(n_iter):
        for i, word in enumerate(doc):
            if it > 0: doc_topic.decrement(assignments[i])
            assignments[i] = mult_sample((k, (doc_topic.prob(k)
                * model.topic_word[k].prob(word))) for k in xrange(model.n_topics))
            doc_topic.increment(assignments[i])
    return topic_vector(doc_topic, model)