def test_resource_shape(): with tmpfile('.hdf5') as fn: assert resource(fn+'::/data', dshape='10 * int').shape == (10,) with tmpfile('.hdf5') as fn: assert resource(fn+'::/data', dshape='10 * 10 * int').shape == (10, 10) with tmpfile('.hdf5') as fn: assert resource(fn+'::/data', dshape='var * 10 * int').shape == (0, 10)
def test_resource_shape(): with tmpfile('.bcolz') as fn: assert resource(fn, dshape='10 * int').shape == (10,) with tmpfile('.bcolz') as fn: assert resource(fn, dshape='10 * 10 * int').shape == (10, 10) with tmpfile('.bcolz') as fn: assert resource(fn, dshape='var * 10 * int').shape == (0, 10)
def test_resource_gzip(): with tmpfile('json.gz') as fn: assert isinstance(resource(fn), (JSON, JSONLines)) assert isinstance(resource('json://' + fn), (JSON, JSONLines)) assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines)) with tmpfile('jsonlines.gz'): assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines))
def test_resource_on_file(): with tmpfile('.db') as fn: uri = 'sqlite:///' + fn sql = resource(uri, 'foo', dshape='var * {x: int, y: int}') assert isinstance(sql, sa.Table) with tmpfile('.db') as fn: uri = 'sqlite:///' + fn sql = resource(uri + '::' + 'foo', dshape='var * {x: int, y: int}') assert isinstance(sql, sa.Table)
def test_into_sqlite(): data = [('Alice', 100), ('Bob', 200)] ds = datashape.dshape('var * {name: string, amount: int}') with tmpfile('.db') as dbpath: with tmpfile('.csv') as csvpath: csv = into(csvpath, data, dshape=ds, has_header=False) sql = resource('sqlite:///%s::mytable' % dbpath, dshape=ds) append_csv_to_sql_table(sql, csv) assert into(list, sql) == data
def test_resource_shape(): with tmpfile('.hdf5') as fn: r = resource(fn+'::/data', dshape='10 * int') assert r.shape == (10,) r.file.close() with tmpfile('.hdf5') as fn: r = resource(fn+'::/data', dshape='10 * 10 * int') assert r.shape == (10, 10) r.file.close() with tmpfile('.hdf5') as fn: r = resource(fn+'::/data', dshape='var * 10 * int') assert r.shape == (0, 10) r.file.close()
def test_tuples_to_json(): ds = dshape('var * {a: int, b: int}') with tmpfile('json') as fn: j = JSON(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read() with tmpfile('json') as fn: j = JSONLines(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read()
def test_tuples_to_json(): ds = dshape('var * {a: int, b: int}') with tmpfile('json') as fn: j = JSON(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read() with tmpfile('json') as fn: j = JSONLines(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read()
def test_fixed_shape(): with tmpfile('.hdf5') as fn: df.to_hdf(fn, 'foo') r = resource('hdfstore://' + fn + '::/foo') assert isinstance(r.shape, list) assert discover(r).shape == (len(df), ) r.parent.close()
def test_varlen_dtypes(): y = np.array([('Alice', 100), ('Bob', 200)], dtype=[('name', 'O'), ('amount', 'i4')]) with tmpfile('.hdf5') as fn: dset = into(fn + '::/data', y) assert into(list, dset) == into(list, dset)
def test_create(): with tmpfile('.hdf5') as fn: ds = datashape.dshape('{x: int32, y: {z: 3 * int32}}') f = create(h5py.File, dshape='{x: int32, y: {z: 3 * int32}}', path=fn) assert isinstance(f, h5py.File) assert f.filename == fn assert discover(f) == ds
def test_csv(): with tmpfile('.csv') as fn: csv = CSV(fn, dshape='var * {name: string, amount: int}', delimiter=',') assert csv.dialect['delimiter'] == ','
def people(sc): with tmpfile('.txt') as fn: df.to_csv(fn, header=False, index=False) raw = sc.textFile(fn) parts = raw.map(lambda line: line.split(',')) yield parts.map(lambda person: Row( name=person[0], amount=float(person[1]), id=int(person[2])))
def tbfile(): with tmpfile('.h5') as filename: f = tb.open_file(filename, mode='w') d = f.create_table('/', 'title', x) d.close() f.close() yield filename
def test_resource_with_variable_length(): with tmpfile('.hdf5') as fn: os.remove(fn) ds = datashape.dshape('var * 4 * int32') r = resource(fn + '::/data', dshape=ds) assert r.shape == (0, 4)
def test_resource_existing_ctable(): with tmpfile('.bcolz') as fn: r = into(fn, y) r.flush() r2 = resource(fn) assert eq(r2[:], y)
def test_empty_line(): text = '{"a": 1}\n{"a": 2}\n\n' # extra endline with tmpfile('.json') as fn: with open(fn, 'w') as f: f.write(text) j = JSONLines(fn) assert len(convert(list, j)) == 2
def test_resource_ctable(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='var * {name: string[5, "ascii"], balance: int32}') assert isinstance(r, ctable) assert r.dtype == [('name', 'S5'), ('balance', 'i4')]
def test_resource(): with tmpfile('json') as fn: assert isinstance(resource('jsonlines://' + fn), JSONLines) assert isinstance(resource('json://' + fn), JSON) assert isinstance( resource(fn, expected_dshape=dshape('var * {a: int}')), JSONLines)
def test_resource_ctable_correctly_infers_length(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='100 * int32') assert isinstance(r, carray) assert r.dtype == 'i4' assert get_expectedlen(r) == 100
def test_drop(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='var * {name: string[5, "ascii"], balance: int32}') assert os.path.exists(fn) drop(fn) assert not os.path.exists(fn)
def test_empty_line(): text = '{"a": 1}\n{"a": 2}\n\n' # extra endline with tmpfile('.json') as fn: with open(fn, 'w') as f: f.write(text) j = JSONLines(fn) assert len(convert(list, j)) == 2
def test_resource_carray(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='var * int32') assert isinstance(r, carray) assert r.dtype == 'i4' assert r.shape == (0,)
def test_resource_existing_carray(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='var * int32') append(r, [1, 2, 3]) r.flush() newr = resource(fn) assert isinstance(newr, carray)
def test_append(): with tmpfile('log') as fn: t = TextFile(fn) append(t, ['Hello', 'World']) assert os.path.exists(fn) with open(fn) as f: assert list(map(str.strip, f.readlines())) == ['Hello', 'World']
def test_engine_metadata_caching(): with tmpfile('db') as fn: engine = resource('sqlite:///' + fn) a = resource('sqlite:///' + fn + '::a', dshape=dshape('var * {x: int}')) b = resource('sqlite:///' + fn + '::b', dshape=dshape('var * {y: int}')) assert a.metadata is b.metadata assert engine is a.bind is b.bind
def test_resource_with_variable_length(): with tmpfile('.hdf5') as fn: ds = datashape.dshape('var * 4 * int32') r = resource(fn + '::/data', dshape=ds) try: assert r.shape == (0, 4) finally: r.file.close()
def jsonlines_file(data): with tmpfile('.json') as fn: with open(fn, 'w') as f: for item in data: json.dump(item, f, default=json_dumps) f.write('\n') yield fn
def test_drop(): with tmpfile('json') as fn: js = JSON(fn) append(js, [1, 2, 3]) assert os.path.exists(fn) drop(js) assert not os.path.exists(fn)
def test_read_gzip(): with tmpfile('json.gz') as fn: f = gzip.open(fn, 'wb') s = json.dumps(dat).encode('utf-8') f.write(s) f.close() js = JSON(fn) assert convert(list, js) == dat
def test_into_resource(): with tmpfile('.hdf5') as fn: d = into(fn + '::/x', x) try: assert d.shape == x.shape assert eq(d[:], x[:]) finally: d.file.close()
def jsonlines_file(data): with tmpfile('.json') as fn: with open(fn, 'w') as f: for item in data: json.dump(item, f, default=json_dumps) f.write('\n') yield fn
def test_read_gzip(): with tmpfile('json.gz') as fn: f = gzip.open(fn, 'wb') s = json.dumps(dat).encode('utf-8') f.write(s) f.close() js = JSON(fn) assert convert(list, js) == dat
def test_copy_with_into(): with tmpfile('.hdf5') as fn: dset = into(fn + '::/data', [1, 2, 3]) try: assert dset.shape == (3,) assert eq(dset[:], [1, 2, 3]) finally: dset.file.close()
def test_resource_carray_overrides_expectedlen(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='100 * int32', expectedlen=200) assert isinstance(r, carray) assert r.dtype == 'i4' assert r.shape == (100,) assert get_expectedlen(r) == 200
def test_resource_ctable_correctly_infers_length(): with tmpfile('.bcolz') as fn: r = resource(fn, dshape='100 * {name: string[5, "ascii"], balance: int32}') assert isinstance(r, ctable) assert r.dtype == [('name', 'S5'), ('balance', 'i4')] assert all(get_expectedlen(r[c]) == 100 for c in r.names)
def test_resource(): with tmpfile('.hdf5') as fn: os.remove(fn) ds = datashape.dshape('{x: int32, y: 3 * int32}') r = resource(fn, dshape=ds) assert isinstance(r, h5py.File) assert discover(r) == ds
def test_append_json(): with tmpfile('json') as fn: j = JSON(fn) append(j, dat) with open(j.path) as f: lines = f.readlines() assert len(lines) == 1 assert 'Alice' in lines[0] assert 'Bob' in lines[0]
def test_pandas_writes_header_by_default(): with tmpfile('.csv') as fn: ds = datashape.dshape('var * {name: string, amount: int}') data = [('Alice', 1), ('Bob', 2)] csv = CSV(fn) append(csv, data, dshape=ds) with open(fn) as f: assert 'name' in f.read()
def test_write_gzip_lines(): with tmpfile('json.gz') as fn: j = JSONLines(fn) append(j, dat) f = gzip.open(fn) line = next(f) f.close() assert line.decode('utf-8').strip() == str(json.dumps(dat[0]))
def test_write_gzip(): with tmpfile('json.gz') as fn: j = JSON(fn) append(j, dat) f = gzip.open(fn) text = f.read() f.close() assert text.decode('utf-8').strip() == str(json.dumps(dat))
def test_resource_with_datapath(): with tmpfile('.hdf5') as fn: ds = datashape.dshape('3 * 4 * int32') r = resource(fn + '::/data', dshape=ds) assert isinstance(r, h5py.Dataset) assert discover(r) == ds assert r.file.filename == fn assert r.file['/data'] == r
def test_resource_existing_carray(): with tmpfile('.bcolz') as fn: os.remove(fn) r = resource(fn, dshape=discover(y)) append(r, y) r.flush() r2 = resource(fn) assert eq(r2[:], y)
def test_into_double_string(): with filetext('alice,1\nbob,2', extension='.csv') as source: assert into(list, source) == [('alice', 1), ('bob', 2)] with tmpfile('.csv') as target: csv = into(target, source) assert isinstance(csv, CSV) with open(target) as f: assert 'alice' in f.read()
def test_resource_to_engine_to_create_tables(): with tmpfile('.db') as fn: uri = 'sqlite:///' + fn ds = datashape.dshape('{mytable: var * {name: string, amt: int}}') r = resource(uri, dshape=ds) assert isinstance(r, sa.engine.Engine) assert r.dialect.name == 'sqlite' assert discover(r) == ds
def test_pandas_writes_header_by_default(): with tmpfile('.csv') as fn: ds = datashape.dshape('var * {name: string, amount: int}') data = [('Alice', 1), ('Bob', 2)] csv = CSV(fn) append(csv, data, dshape=ds) with open(fn) as f: assert 'name' in f.read()
def file(df): with tmpfile('.hdf5') as fn: f = pd.HDFStore(fn) f.put('/data', df, format='table', append=True) try: yield fn, f, f.get_storer('/data') finally: f.close()