def resample(self): # 1. Resample table assignments for i, (w, p, s) in enumerate(self.analyses): self.decrement(w, p, s) w, p, s = self.increment(w) self.analyses[i] = (w, p, s) # 2. Resample table dishes new_analyses = [] new_tables = {} new_ncustomers = {} for (w, old_p, old_s), tables in self.tables.iteritems(): for c in tables: self.p_counts[old_p] -= 1 self.s_counts[old_s] -= 1 p, s = mult_sample(((p, s), self.base.prob(p, s)) for p, s in self.seg_mappings[w]) self.p_counts[p] += 1 self.s_counts[s] += 1 if (w, p, s) not in new_tables: new_tables[w, p, s] = [] new_ncustomers[w, p, s] = 0 new_tables[w, p, s].append(c) new_ncustomers[w, p, s] += c new_analyses.extend([(w, p, s)] * c) self.analyses = new_analyses self.tables = new_tables self.ncustomers = new_ncustomers
def increment(self, word, initialize=False): if initialize: p, s = random.choice([a for a, _ in self._analysis_probs(word)]) else: p, s = mult_sample(self._analysis_probs(word)) self.prefix_model.increment(p) self.suffix_model.increment(s) self.analyses[word].append((p, s))
def increment(self, w, initialize=False): """Sample a segmentation and a table assignment for word #w""" # sample a table (p, s, seat) = mult_sample(self.seating_probs(w, initialize)) # seat to the table if self._seat_to((w, p, s), seat): self.base.increment(p, s) # increment dish count self.analyses[w][p, s] += 1
def sample_topics(doc, model, n_iter): assignments = [None] * len(doc) doc_topic = DirichletMultinomial(model.n_topics, model.alpha) for it in xrange(n_iter): for i, word in enumerate(doc): if it > 0: doc_topic.decrement(assignments[i]) assignments[i] = mult_sample((k, (doc_topic.prob(k) * model.topic_word[k].prob(word))) for k in xrange(model.n_topics)) doc_topic.increment(assignments[i]) return topic_vector(doc_topic, model)
def increment(self, k): # Sample analysis & store assignment i = (0 if len(self.analyses[k]) == 1 else mult_sample((i, self.analysis_prob(analysis)) for i, analysis in enumerate(self.analyses[k]))) self.assignments[k].append(i) analysis = self.analyses[k][i] # Increment models self.stem_model.increment(analysis.stem) self.pattern_model.increment(analysis.pattern)
def resample_labels(self): new_analyses = [Counter() for _ in xrange(len(self.word_vocabulary))] new_tables = {} new_ncustomers = {} for (w, old_p, old_s), tables in self.tables.iteritems(): for c in tables: # remove (old_p, old_s) self.base.decrement(old_p, old_s) # resample (p, s) = mult_sample(((p, s), self.base.prob(p, s)) for p, s in self.segmentations(w)) # add (p, s) if (w, p, s) not in new_tables: new_tables[w, p, s] = [] new_ncustomers[w, p, s] = 0 new_tables[w, p, s].append(c) new_ncustomers[w, p, s] += c new_analyses[w][p, s] += c self.base.increment(p, s) self.analyses = new_analyses self.tables = new_tables self.ncustomers = new_ncustomers
def increment(self, w, initialize=False): (w, p, s, seat) = mult_sample(self.seating_probs(w, initialize)) if self._seat_to((w, p, s), seat): self.p_counts[p] += 1 self.s_counts[s] += 1 return (w, p, s)