def test_dtm_conversion(self): topics = self.topics dtm = self.dtm num_topics, vocab_size = topics.shape WS, DS = utils.matrix_to_lists(dtm) self.assertEqual(max(WS) + 1, vocab_size) self.assertEqual(max(DS) + 1, NUM_DOCS) dtm_recovered = utils.lists_to_matrix(WS, DS) np.testing.assert_allclose(dtm, dtm_recovered)
def test_matrix_to_lists(self): dtm, D, N_WORDS_PER_DOC = self.dtm, self.D, self.N_WORDS_PER_DOC N_BY_D, N_BY_W = self.N_BY_D, self.N_BY_W WS, DS = utils.matrix_to_lists(dtm) self.assertEqual(len(WS), D * N_WORDS_PER_DOC) self.assertEqual(len(WS), len(DS)) self.assertEqual(dtm.shape, (max(DS) + 1, max(WS) + 1)) self.assertTrue(all(DS == sorted(DS))) self.assertTrue(np.all(np.bincount(DS) == N_BY_D)) self.assertTrue(np.all(np.bincount(WS) == N_BY_W))
def test_lists_to_matrix(self): dtm = self.dtm WS, DS = utils.matrix_to_lists(dtm) dtm_new = utils.lists_to_matrix(WS, DS) self.assertTrue(np.all(dtm == dtm_new))