def test_docs_from_ldac_simple(): stream = StringIO() stream.write("2 0:2 1:1\n2 0:3 1:2") stream.seek(0) # rewind stream docs = [[0, 0, 1], [0, 0, 0, 1, 1]] assert utils.docs_from_ldac(stream) == docs stream = StringIO() stream.write("2 1:1 0:2\n3 2:1 0:3 1:1") stream.seek(0) # rewind stream docs = [[1, 0, 0], [2, 0, 0, 0, 1]] assert utils.docs_from_ldac(stream) == docs
def _load_docs(cls): test_dir = os.path.dirname(__file__) reuters_ldac_fn = os.path.join(test_dir, 'data', 'reuters.ldac') with open(reuters_ldac_fn, 'r') as f: cls.docs = utils.docs_from_ldac(f) cls.V = utils.num_terms(cls.docs) cls.N = len(cls.docs)
def test_bad_ldac_data(): stream = StringIO() stream.write("2 0:1") stream.seek(0) # rewind stream utils.docs_from_ldac(stream)