def testCompareCoherenceForModels(self): models = [self.ldamodel, self.ldamodel] cm = CoherenceModel.for_models( models, dictionary=self.dictionary, texts=self.texts, coherence='c_v') self.assertIsNotNone(cm._accumulator) # Accumulator should have all relevant IDs. for model in models: cm.model = model self.assertIsNotNone(cm._accumulator) (coherence_topics1, coherence1), (coherence_topics2, coherence2) = \ cm.compare_models(models) self.assertAlmostEqual(np.mean(coherence_topics1), coherence1, 4) self.assertAlmostEqual(np.mean(coherence_topics2), coherence2, 4) self.assertAlmostEqual(coherence1, coherence2, places=4)
def testCompareCoherenceForModels(self): models = [self.ldamodel, self.ldamodel] cm = CoherenceModel.for_models( models, dictionary=self.dictionary, texts=self.texts, coherence='c_v') self.assertIsNotNone(cm._accumulator) # Accumulator should have all relevant IDs. for model in models: cm.model = model self.assertIsNotNone(cm._accumulator) (coherence_topics1, coherence1), (coherence_topics2, coherence2) = \ cm.compare_models(models) self.assertAlmostEqual(np.mean(coherence_topics1), coherence1, 4) self.assertAlmostEqual(np.mean(coherence_topics2), coherence2, 4) self.assertAlmostEqual(coherence1, coherence2, places=4)
#%%Topic Modeling from sklearn.decomposition import TruncatedSVD from gensim.models.coherencemodel import CoherenceModel from nltk.tokenize import sent_tokenize, word_tokenize # SVD represent documents and terms in vectors svd_model = TruncatedSVD(n_components=20, algorithm='randomized', n_iter=100, random_state=122) svd_model.fit(X) len(svd_model.components_) text = sent_tokenize(''.join(list(data['processed_data'].values))) coherencemodel = CoherenceModel(model=svd_model, texts=text) #%% from gensim.test.utils import common_corpus, common_dictionary from gensim.models.ldamodel import LdaModel from gensim.models.coherencemodel import CoherenceModel m1 = LdaModel(common_corpus, 3, common_dictionary) m2 = LdaModel(common_corpus, 5, common_dictionary) cm = CoherenceModel.for_models([m1, m2], common_dictionary, corpus=common_corpus, coherence='u_mass')