def infer(self, input, doc): """Perform LDA topic inference on input, and store the results in doc. Args: input: a list of strings after tokenization. doc: LDADoc type or SLDADoc type. """ fix_random_seed() if isinstance(doc, LDADoc) and not isinstance(doc, SLDADoc): doc.init(self.__model.num_topics()) doc.set_alpha(self.__model.alpha()) for token in input: id_ = self.__model.term_id(token) if id_ != OOV: init_topic = rand_k(self.__model.num_topics()) doc.add_token(Token(init_topic, id_)) self.lda_infer(doc, 20, 50) elif isinstance(doc, SLDADoc): doc.init(self.__model.num_topics()) doc.set_alpha(self.__model.alpha()) for sent in input: words = [] for token in sent: id_ = self.__model.term_id(token) if id_ != OOV: words.append(id_) init_topic = rand_k(self.__model.num_topics()) doc.add_sentence(Sentence(init_topic, words)) self.slda_infer(doc, 20, 50) else: logger.error("Wrong Doc Type!")
def __doc_proposal(self, doc, token): if isinstance(doc, LDADoc) and isinstance(token, Token): old_topic = token.topic dart = rand() * (doc.size() + self.__model.alpha_sum()) if dart < doc.size(): token_index = int(dart) new_topic = doc.token(token_index).topic else: new_topic = rand_k(self.__model.num_topics()) if new_topic != old_topic: proposal_old = self.__doc_proposal_distribution(doc, old_topic) proposal_new = self.__doc_proposal_distribution(doc, new_topic) proportion_old = self.__proportional_function( doc, token, old_topic) proportion_new = self.__proportional_function( doc, token, new_topic) transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) rejection = rand() mask = -(rejection < transition_prob) return (new_topic & mask) | (old_topic & ~mask) return new_topic elif isinstance(doc, SLDADoc) and isinstance(token, Sentence): sent = token old_topic = sent.topic dart = rand() * (doc.size() + self.__model.alpha_sum()) if dart < doc.size(): token_index = int(dart) new_topic = doc.sent(token_index).topic else: new_topic = rand_k(self.__model.num_topics()) if new_topic != old_topic: proportion_old = self.__proportional_function( doc, sent, old_topic) proportion_new = self.__proportional_function( doc, sent, new_topic) proposal_old = self.__doc_proposal_distribution(doc, old_topic) proposal_new = self.__doc_proposal_distribution(doc, new_topic) transition_prob = float((proportion_new * proposal_old) / (proportion_old * proposal_new)) rejection = rand() mask = -(rejection < transition_prob) return (new_topic & mask) | (old_topic & ~mask) return new_topic
def generate(self): """Generate samples from given distribution. """ dart1 = rand_k(self.size()) dart2 = int(rand()) return dart1 if dart2 > self.__prob[dart1] else self.__alias[dart1]