class TestLdaDiff(unittest.TestCase): def setUp(self): texts = [ ['human', 'interface', 'computer'], ['survey', 'user', 'computer', 'system', 'response', 'time'], ['eps', 'user', 'interface', 'system'], ['system', 'human', 'system', 'eps'], ['user', 'response', 'time'], ['trees'], ['graph', 'trees'], ['graph', 'minors', 'trees'], ['graph', 'minors', 'survey'], ] self.dictionary = Dictionary(texts) self.corpus = [self.dictionary.doc2bow(text) for text in texts] self.num_topics = 5 self.n_ann_terms = 10 self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, num_topics=self.num_topics, passes=10) def testBasic(self): mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms) self.assertEqual(mdiff.shape, (self.num_topics, self.num_topics)) self.assertEquals(len(annotation), self.num_topics) self.assertEquals(len(annotation[0]), self.num_topics) def testIdentity(self): for dist_name in ["hellinger", "kulback_leibler", "jaccard"]: mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name) for row in annotation: for (int_tokens, diff_tokens) in row: self.assertEquals(diff_tokens, []) self.assertEquals(len(int_tokens), self.n_ann_terms) self.assertTrue( np.allclose(np.diag(mdiff), np.zeros(mdiff.shape[0], dtype=mdiff.dtype))) if dist_name == "jaccard": self.assertTrue( np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) def testInput(self): self.assertRaises(ValueError, self.model.diff, self.model, n_ann_terms=self.n_ann_terms, distance='something') self.assertRaises(ValueError, self.model.diff, [], n_ann_terms=self.n_ann_terms, distance='something')
class TestLdaDiff(unittest.TestCase): def setUp(self): self.dictionary = common_dictionary self.corpus = common_corpus self.num_topics = 5 self.n_ann_terms = 10 self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, num_topics=self.num_topics, passes=10) def test_basic(self): # test for matrix case mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms) self.assertEqual(mdiff.shape, (self.num_topics, self.num_topics)) self.assertEqual(len(annotation), self.num_topics) self.assertEqual(len(annotation[0]), self.num_topics) # test for diagonal case mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, diagonal=True) self.assertEqual(mdiff.shape, (self.num_topics,)) self.assertEqual(len(annotation), self.num_topics) def test_identity(self): for dist_name in ["hellinger", "kullback_leibler", "jaccard"]: # test for matrix case mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name) for row in annotation: for (int_tokens, diff_tokens) in row: self.assertEqual(diff_tokens, []) self.assertEqual(len(int_tokens), self.n_ann_terms) self.assertTrue(np.allclose(np.diag(mdiff), np.zeros(mdiff.shape[0], dtype=mdiff.dtype))) if dist_name == "jaccard": self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) # test for diagonal case mdiff, annotation = \ self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name, diagonal=True) for (int_tokens, diff_tokens) in annotation: self.assertEqual(diff_tokens, []) self.assertEqual(len(int_tokens), self.n_ann_terms) self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) if dist_name == "jaccard": self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) def test_input(self): self.assertRaises(ValueError, self.model.diff, self.model, n_ann_terms=self.n_ann_terms, distance='something') self.assertRaises(ValueError, self.model.diff, [], n_ann_terms=self.n_ann_terms, distance='something')
class TestLdaDiff(unittest.TestCase): def setUp(self): self.dictionary = common_dictionary self.corpus = common_corpus self.num_topics = 5 self.n_ann_terms = 10 self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, num_topics=self.num_topics, passes=10) def testBasic(self): # test for matrix case mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms) self.assertEqual(mdiff.shape, (self.num_topics, self.num_topics)) self.assertEqual(len(annotation), self.num_topics) self.assertEqual(len(annotation[0]), self.num_topics) # test for diagonal case mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, diagonal=True) self.assertEqual(mdiff.shape, (self.num_topics,)) self.assertEqual(len(annotation), self.num_topics) def testIdentity(self): for dist_name in ["hellinger", "kullback_leibler", "jaccard"]: # test for matrix case mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name) for row in annotation: for (int_tokens, diff_tokens) in row: self.assertEqual(diff_tokens, []) self.assertEqual(len(int_tokens), self.n_ann_terms) self.assertTrue(np.allclose(np.diag(mdiff), np.zeros(mdiff.shape[0], dtype=mdiff.dtype))) if dist_name == "jaccard": self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) # test for diagonal case mdiff, annotation = \ self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name, diagonal=True) for (int_tokens, diff_tokens) in annotation: self.assertEqual(diff_tokens, []) self.assertEqual(len(int_tokens), self.n_ann_terms) self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) if dist_name == "jaccard": self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) def testInput(self): self.assertRaises(ValueError, self.model.diff, self.model, n_ann_terms=self.n_ann_terms, distance='something') self.assertRaises(ValueError, self.model.diff, [], n_ann_terms=self.n_ann_terms, distance='something')
class TestLdaDiff(unittest.TestCase): def setUp(self): texts = [ ['human', 'interface', 'computer'], ['survey', 'user', 'computer', 'system', 'response', 'time'], ['eps', 'user', 'interface', 'system'], ['system', 'human', 'system', 'eps'], ['user', 'response', 'time'], ['trees'], ['graph', 'trees'], ['graph', 'minors', 'trees'], ['graph', 'minors', 'survey'], ] self.dictionary = Dictionary(texts) self.corpus = [self.dictionary.doc2bow(text) for text in texts] self.num_topics = 5 self.n_ann_terms = 10 self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, num_topics=self.num_topics, passes=10) def testBasic(self): mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms) self.assertEqual(mdiff.shape, (self.num_topics, self.num_topics)) self.assertEquals(len(annotation), self.num_topics) self.assertEquals(len(annotation[0]), self.num_topics) def testIdentity(self): for dist_name in ["hellinger", "kullback_leibler", "jaccard"]: mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name) for row in annotation: for (int_tokens, diff_tokens) in row: self.assertEquals(diff_tokens, []) self.assertEquals(len(int_tokens), self.n_ann_terms) self.assertTrue(np.allclose(np.diag(mdiff), np.zeros(mdiff.shape[0], dtype=mdiff.dtype))) if dist_name == "jaccard": self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) def testInput(self): self.assertRaises(ValueError, self.model.diff, self.model, n_ann_terms=self.n_ann_terms, distance='something') self.assertRaises(ValueError, self.model.diff, [], n_ann_terms=self.n_ann_terms, distance='something')
def plot_distances(lda: models.LdaModel, distance='jaccard', num_words=50, title=None, other_model=None): comparison = True if not other_model: comparison = False other_model = lda mdiff, annotations = lda.diff(other_model, distance=distance, num_words=num_words) topic_difference_heatmap(mdiff=mdiff, title=title, diff_models=comparison)
try: get_ipython() import plotly.offline as py except Exception: plot_difference = plot_difference_matplotlib else: py.init_notebook_mode() plot_difference = plot_difference_plotly # In[ ]: # Heatmap to compare the correlation between LDA and Multicore LDA mdiff, annotation = ldamodel.diff(ldamulticore, distance='jaccard', num_words=30) plot_difference( mdiff, title="LDA vs LDA Multicore Topic difference by Jaccard distance", annotation=annotation) # In[ ]: # Convert LdaMallet model to a gensim model for Visualization from gensim.models.ldamodel import LdaModel def convertldaMalletToldaGen(mallet_model): model_gensim = LdaModel(id2word=mallet_model.id2word,