Пример #1
0
def generate(topics, words, words_per_doc):
    num_docs = len(topics)
    word_cdfs = [util.get_cdf(topic) for topic in words]

    docs = []
    doc_topics = []
    for i in range(num_docs):
        if i % 100 == 0:
            print "reached document", i
        num_words = util.poisson(words_per_doc)
        topic_dist = topics[i]
        topic_cdf = util.get_cdf(topic_dist)

        doc = []
        word_topics = []
        for word in range(num_words):
            topic = util.sample(topic_cdf)
            doc.append(util.sample(word_cdfs[topic]))
            word_topics.append(topic)
        docs.append(doc)
        doc_topics.append(word_topics)

    return docs, doc_topics
Пример #2
0
    def __init__(self, params):
        # The word distribution of this node's topic.
        self.word_dist = dirichlet(params["topic_to_word_param"])
        self.word_cdf = util.get_cdf(self.word_dist)

        # The number of documents that pass through this node.
        self.num_documents = 0

        # Those children of this node which have looked below this level.
        # Documents that reached this node but never looked below aren't
        # represented here; this is okay because the Chinese Restaurant
        # Process is exchangeable (doesn't depend on order).
        self.children = []

        # The number of documents which looked below this level.  This
        # should always be equal to sum(c.num_documents for c in
        # self.children).
        self.num_documents_in_children = 0
Пример #3
0
 def new_customer(self):
     self.customers += 1
     
     self.update_probabilities()
     assert(abs(1 - sum(self.probabilities)) < 1e-10)
     
     cdf = util.get_cdf(self.probabilities)
     table = util.sample(cdf)
     
     print "new customer has arrived!", self.customers
     #print "customers are sitting at tables", self.sparse_seats()
     #print "customer will sit with probabilities:", self.probabilities
     print "customer has chosen to sit at table", table
     
     if table not in self.seats:
         self.tables.append(1)
     else:
         self.tables[table] += 1
     self.seats.append(table)