def __init__(self, k=10, lda_model=None): # build model if lda_model: self.lda = lda_model else: self.lda = LDAmodel(n_topics=500, n_passes=50, vocabulary='combined') # parameters self.k = k
class LDAExpansion(QueryExpansion): def __init__(self, k=10, lda_model=None): # build model if lda_model: self.lda = lda_model else: self.lda = LDAmodel(n_topics=500, n_passes=50, vocabulary='combined') # parameters self.k = k def expand(self, query): tokens = tokenize(query.lower()) latent = self.lda.tokens2latent(tokens) extra_terms = [] for topic in latent: topn = self.lda.model.show_topic(topicid=topic[0], topn=round(self.k * topic[1])) extra_terms += [e[1] for e in topn] extra_terms = list(set(extra_terms)) new_query = query + " " + " ".join(extra_terms) return new_query def __str__(self): return self.__class__.__name__ + str("(k=%s)" % self.k)
class LDAExpansion(QueryExpansion): def __init__(self, k=10, lda_model=None): # build model if lda_model: self.lda = lda_model else: self.lda = LDAmodel(n_topics=500, n_passes=50, vocabulary='combined') # parameters self.k = k def expand(self, query): tokens = tokenize(query.lower()) latent = self.lda.tokens2latent(tokens) extra_terms = [] for topic in latent: topn = self.lda.model.show_topic(topicid=topic[0], topn=round(self.k*topic[1])) extra_terms += [e[1] for e in topn] extra_terms = list(set(extra_terms)) new_query = query + " " + " ".join(extra_terms) return new_query def __str__(self): return self.__class__.__name__ + str("(k=%s)" % self.k )