def testPersistenceAfterProbabilityEstimationUsingTexts(self): fname = testfile() model = CoherenceModel( topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v') model.estimate_probabilities() model.save(fname) model2 = CoherenceModel.load(fname) self.assertIsNotNone(model2._accumulator) self.assertTrue(model.get_coherence() == model2.get_coherence())
def testAccumulatorCachingTopicSubsets(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass') cm1 = CoherenceModel(topics=self.topics1, **kwargs) cm1.estimate_probabilities() accumulator = cm1._accumulator self.assertIsNotNone(accumulator) cm1.topics = [t[:2] for t in self.topics1] self.assertEqual(accumulator, cm1._accumulator) cm1.topics = self.topics1 self.assertEqual(accumulator, cm1._accumulator)
def testAccumulatorCachingTopicSubsets(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass') cm1 = CoherenceModel(topics=self.topics1, **kwargs) cm1.estimate_probabilities() accumulator = cm1._accumulator self.assertIsNotNone(accumulator) cm1.topics = [t[:2] for t in self.topics1] self.assertEqual(accumulator, cm1._accumulator) cm1.topics = self.topics1 self.assertEqual(accumulator, cm1._accumulator)
def testPersistenceAfterProbabilityEstimationUsingCorpus(self): fname = get_tmpfile('gensim_similarities.tst.pkl') model = CoherenceModel( topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass' ) model.estimate_probabilities() model.save(fname) model2 = CoherenceModel.load(fname) self.assertIsNotNone(model2._accumulator) self.assertTrue(model.get_coherence() == model2.get_coherence())
def testPersistenceAfterProbabilityEstimationUsingTexts(self): fname = testfile() model = CoherenceModel( topics=self.topics1, texts=self.texts, dictionary=self.dictionary, coherence='c_v' ) model.estimate_probabilities() model.save(fname) model2 = CoherenceModel.load(fname) self.assertIsNotNone(model2._accumulator) self.assertTrue(model.get_coherence() == model2.get_coherence())
def testPersistenceAfterProbabilityEstimationUsingCorpus(self): fname = get_tmpfile('gensim_similarities.tst.pkl') model = CoherenceModel( topics=self.topics1, corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass' ) model.estimate_probabilities() model.save(fname) model2 = CoherenceModel.load(fname) self.assertIsNotNone(model2._accumulator) self.assertTrue(model.get_coherence() == model2.get_coherence())
def testAccumulatorCachingWithModelSetting(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass') cm1 = CoherenceModel(topics=self.topics1, **kwargs) cm1.estimate_probabilities() self.assertIsNotNone(cm1._accumulator) cm1.model = self.ldamodel topics = [] for topic in self.ldamodel.state.get_lambda(): bestn = argsort(topic, topn=cm1.topn, reverse=True) topics.append(bestn) self.assertTrue(np.array_equal(topics, cm1.topics)) self.assertIsNone(cm1._accumulator)
def testAccumulatorCachingWithModelSetting(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, coherence='u_mass') cm1 = CoherenceModel(topics=self.topics1, **kwargs) cm1.estimate_probabilities() self.assertIsNotNone(cm1._accumulator) cm1.model = self.ldamodel topics = [] for topic in self.ldamodel.state.get_lambda(): bestn = argsort(topic, topn=cm1.topn, reverse=True) topics.append(bestn) self.assertTrue(np.array_equal(topics, cm1.topics)) self.assertIsNone(cm1._accumulator)
def testAccumulatorCachingWithTopnSettingGivenModel(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass') cm1 = CoherenceModel(model=self.ldamodel, **kwargs) cm1.estimate_probabilities() self.assertIsNotNone(cm1._accumulator) accumulator = cm1._accumulator topics_before = cm1._topics cm1.topn = 3 self.assertEqual(accumulator, cm1._accumulator) self.assertEqual(3, len(cm1.topics[0])) self.assertEqual(topics_before, cm1._topics) cm1.topn = 6 # should be able to expand given the model self.assertEqual(6, len(cm1.topics[0]))
def testAccumulatorCachingWithTopnSettingGivenModel(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass') cm1 = CoherenceModel(model=self.ldamodel, **kwargs) cm1.estimate_probabilities() self.assertIsNotNone(cm1._accumulator) accumulator = cm1._accumulator topics_before = cm1._topics cm1.topn = 3 self.assertEqual(accumulator, cm1._accumulator) self.assertEqual(3, len(cm1.topics[0])) self.assertEqual(topics_before, cm1._topics) cm1.topn = 6 # should be able to expand given the model self.assertEqual(6, len(cm1.topics[0]))
def testAccumulatorCachingWithTopnSettingGivenTopics(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass') cm1 = CoherenceModel(topics=self.topics1, **kwargs) cm1.estimate_probabilities() self.assertIsNotNone(cm1._accumulator) accumulator = cm1._accumulator topics_before = cm1._topics cm1.topn = 3 self.assertEqual(accumulator, cm1._accumulator) self.assertEqual(3, len(cm1.topics[0])) self.assertEqual(topics_before, cm1._topics) # Topics should not have been truncated, so topn settings below 5 should work cm1.topn = 4 self.assertEqual(accumulator, cm1._accumulator) self.assertEqual(4, len(cm1.topics[0])) self.assertEqual(topics_before, cm1._topics) with self.assertRaises(ValueError): cm1.topn = 6 # can't expand topics any further without model
def testAccumulatorCachingWithTopnSettingGivenTopics(self): kwargs = dict(corpus=self.corpus, dictionary=self.dictionary, topn=5, coherence='u_mass') cm1 = CoherenceModel(topics=self.topics1, **kwargs) cm1.estimate_probabilities() self.assertIsNotNone(cm1._accumulator) accumulator = cm1._accumulator topics_before = cm1._topics cm1.topn = 3 self.assertEqual(accumulator, cm1._accumulator) self.assertEqual(3, len(cm1.topics[0])) self.assertEqual(topics_before, cm1._topics) # Topics should not have been truncated, so topn settings below 5 should work cm1.topn = 4 self.assertEqual(accumulator, cm1._accumulator) self.assertEqual(4, len(cm1.topics[0])) self.assertEqual(topics_before, cm1._topics) with self.assertRaises(ValueError): cm1.topn = 6 # can't expand topics any further without model