def apply(self): self.topic_desc.clear() self.refresh_gui() if self.corpus: self.commit.setEnabled(False) self.enabled(False) with self.progressBar(len(self.corpus)) as bar: self.lda = LDA(self.corpus.tokens, num_topics=self.num_topics, callback=bar.advance) table = self.lda.insert_topics_into_corpus(self.corpus) self.update_topics() self.send(Output.DATA, table) self.send_topic_by_id(0) self.enabled(True) else: self.send(Output.DATA, None) self.send(Output.TOPICS, None)
class LDATests(unittest.TestCase): corp = Corpus.from_file('deerwester') text = [d.split() for d in corp.documents] model = LDA(text, num_topics=5) def test_insert_topic_into_corpus(self): corp_topics = self.model.insert_topics_into_corpus(self.corp) self.assertEqual(len(corp_topics), len(self.corp)) self.assertEqual(len(corp_topics.domain.attributes), 5) self.assertEqual(corp_topics.X.shape, (len(self.corp), 5)) def test_get_topic_table_by_id(self): topic1 = self.model.get_topics_table_by_id(1) self.assertEqual(len(topic1), 45) self.assertEqual(topic1.metas.shape, (45, 2)) def test_top_words_by_topic(self): words = self.model.get_top_words_by_id(1) self.assertEqual(len(words), 10) def test_too_large_id(self): with self.assertRaises(ValueError): self.model.get_topics_table_by_id(6)
def apply(self): self.topic_desc.clear() self.refresh_gui() if self.corpus: self.commit.setEnabled(False) self.enabled(False) preprocessed = self.preprocessor(self.corpus.documents) self.progressBarInit() self.lda = LDA(preprocessed, num_topics=self.num_topics, callback=self.progress) table = self.lda.insert_topics_into_corpus(self.corpus) self.update_topics() self.progressBarFinished() self.send(Output.DATA, table) self.send_topic_by_id(0) self.enabled(True) else: self.send(Output.DATA, None) self.send(Output.TOPICS, None)