def coherence(word_topic, train, test, top=10, window_with=10): words_for_probs = set(eval_words_for_probs(word_topic, top)) prob_ungrams = eval_pob_ungrams(train, words_for_probs) prob_conditn = eval_pob_bigrams(train, word_topic.shape[0], words_for_probs, wind_with=window_with) pmi = lambda w1, w2: math.log(prob_conditn[w2][w1] / prob_ungrams[w1]) if prob_conditn[w2][w1] != 0.0 else 0.0 pmis_mean = [] pmis_median = [] for t in xrange(word_topic.shape[1]): topic_wrds = get_topic(word_topic, topic=t, head=top) pmi_t = [pmi(w1, w2) for w1, w2 in all_combine(topic_wrds) if pmi(w1, w2) != 0.0] pmis_median.append(np.median(pmi_t)) pmis_mean.append(np.mean(pmi_t)) return 'coherence = mean %.2f median %.2f' % (np.mean(pmis_mean), np.median(pmis_median))
def coherence(word_topic, train, test, top=10, window_with=10): words_for_probs = set(eval_words_for_probs(word_topic, top)) prob_ungrams = eval_pob_ungrams(train, words_for_probs) prob_conditn = eval_pob_bigrams(train, word_topic.shape[0], words_for_probs, wind_with=window_with) pmi = lambda w1, w2: math.log(prob_conditn[w2][w1] / prob_ungrams[w1] ) if prob_conditn[w2][w1] != 0.0 else 0.0 pmis_mean = [] pmis_median = [] for t in xrange(word_topic.shape[1]): topic_wrds = get_topic(word_topic, topic=t, head=top) pmi_t = [ pmi(w1, w2) for w1, w2 in all_combine(topic_wrds) if pmi(w1, w2) != 0.0 ] pmis_median.append(np.median(pmi_t)) pmis_mean.append(np.mean(pmi_t)) return 'coherence = mean %.2f median %.2f' % (np.mean(pmis_mean), np.median(pmis_median))
def eval_words_for_probs(word_topic, top): words = [] for t in xrange(word_topic.shape[1]): words += get_topic(word_topic, topic=t, head=top) return words