def json_data(): data = { 'a.csv': [{ 'x': 1, 'y': 2 }, { 'x': 3, 'y': 4 }], 'b.csv': [{ 'x': 5, 'y': 6 }, { 'x': 7, 'y': 8 }], 'c.csv': [{ 'x': 9, 'y': 10 }, { 'x': 11, 'y': 12 }] } text = dict( (fn, '\n'.join(map(json.dumps, dicts))) for fn, dicts in data.items()) with filetexts(text) as filenames: descriptors = [ JSON_Streaming(fn, schema='{x: int32, y: int32}') for fn in sorted(filenames) ] yield Stack(descriptors)
def test_stack(stack_data): descriptors = [CSV(fn, schema='2 * int32') for fn in sorted(stack_data)] dd = Stack(descriptors) assert dd.dshape == 3 * descriptors[0].dshape expected = (((1, 1), (2, 2)), ((3, 3), (4, 4)), ((5, 5), (6, 6))) assert tuplify(tuple(dd.as_py())) == expected result = dd.as_dynd() expected2 = nd.array(expected, dtype='int32') assert nd.as_py(result) == nd.as_py(expected2) assert tuplify(tuple(dd)) == expected assert tuplify(tuple(dd)) == expected # Not one use only chunks = dd.chunks() assert all(isinstance(chunk, nd.array) for chunk in chunks) assert tuple(dd[[0, 2], 0, 0]) == (1, 5) assert tuplify(tuple(dd[0])) == ((1, 1), (2, 2)) res = dd[0, :, [1]] x = tuple(res) assert tuplify(x) == ((1, ), (2, )) assert tuplify(tuple(dd[0])) == expected[0] assert isinstance(dd[:, 0], Iterator) assert isinstance(dd[:], Iterator)
def test_gzip_json_files(self): with filetexts(texts, open=gzip.open) as filenames: descriptors = [JSON(fn, dshape=schema, open=gzip.open) for fn in sorted(filenames)] dd = Stack(descriptors) self.assertEqual(sorted(dd), sorted(tuples)) self.assertEqual(dd.schema, dshape(schema))