示例#1
0
文件: test_utils.py 项目: 52nlp/lda-1
 def test_ldac_conversion(self):
     dtm = self.dtm
     N, V = dtm.shape
     doclines = list(utils.dtm2ldac(self.dtm))
     nd_unique = np.sum(dtm > 0, axis=1)
     for n, docline in zip(nd_unique, doclines):
         self.assertEqual(n, int(docline.split(' ')[0]))
     self.assertEqual(len(doclines), N)
     f = io.StringIO('\n'.join(doclines))
     dtm_new = utils.ldac2dtm(f)
     self.assertTrue(np.all(dtm == dtm_new))
示例#2
0
文件: test_utils.py 项目: nvdnkpr/lda
 def test_ldac_conversion(self):
     dtm = self.dtm
     N, V = dtm.shape
     doclines = list(utils.dtm2ldac(self.dtm))
     nd_unique = np.sum(dtm > 0, axis=1)
     for n, docline in zip(nd_unique, doclines):
         self.assertEqual(n, int(docline.split(' ')[0]))
     self.assertEqual(len(doclines), N)
     f = io.StringIO('\n'.join(doclines))
     dtm_new = utils.ldac2dtm(f)
     self.assertTrue(np.all(dtm == dtm_new))
示例#3
0
文件: test_utils.py 项目: 52nlp/lda-1
 def test_ldac2dtm(self):
     test_dir = os.path.dirname(__file__)
     reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac')
     dtm = utils.ldac2dtm(open(reuters_ldac_fn))
     self.assertEqual(dtm.shape, (395, 4258))
     self.assertEqual(dtm.sum(), 84010)
示例#4
0
文件: test_utils.py 项目: nvdnkpr/lda
 def test_ldac2dtm(self):
     test_dir = os.path.dirname(__file__)
     reuters_ldac_fn = os.path.join(test_dir, 'reuters.ldac')
     dtm = utils.ldac2dtm(open(reuters_ldac_fn))
     self.assertEqual(dtm.shape, (395, 4258))
     self.assertEqual(dtm.sum(), 84010)
示例#5
0
def load_ldac(file_path):
    return ldac2dtm(open(file_path), offset=0)