def test_ldac_conversion(self): dtm = self.dtm N, V = dtm.shape doclines = list(utils.dtm2ldac(self.dtm)) nd_unique = np.sum(dtm > 0, axis=1) for n, docline in zip(nd_unique, doclines): self.assertEqual(n, int(docline.split(' ')[0])) self.assertEqual(len(doclines), N) f = io.StringIO('\n'.join(doclines)) dtm_new = utils.ldac2dtm(f) self.assertTrue(np.all(dtm == dtm_new))
def test_ldac2dtm(self): test_dir = os.path.dirname(__file__) reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac') dtm = utils.ldac2dtm(open(reuters_ldac_fn)) self.assertEqual(dtm.shape, (395, 4258)) self.assertEqual(dtm.sum(), 84010)
def load_ldac(file_path): return ldac2dtm(open(file_path), offset=0)