def test_read_bytes_delimited(e, s, a, b): for bs in [5, 15, 45, 1500]: futures = read_bytes(test_bucket_name+'/test/accounts*', lazy=False, blocksize=bs, delimiter=b'\n') futures2 = read_bytes(test_bucket_name+'/test/accounts*', lazy=False, blocksize=bs, delimiter=b'foo') assert [a.key for a in futures] != [b.key for b in futures2] results = yield e._gather(futures) res = [r for r in results if r] assert all(r.endswith(b'\n') for r in res) ourlines = b''.join(res).split(b'\n') testlines = b"".join(files[k] for k in sorted(files)).split(b'\n') assert ourlines == testlines # delimiter not at the end d = b'}' futures = read_bytes(test_bucket_name+'/test/accounts*', lazy=False, blocksize=bs, delimiter=d) results = yield e._gather(futures) res = [r for r in results if r] # All should end in } except EOF assert sum(r.endswith(b'}') for r in res) == len(res) - 2 ours = b"".join(res) test = b"".join(files[v] for v in sorted(files)) assert ours == test
def test_read_bytes_blocksize_on_large_data(e, s, a, b): L = read_bytes('dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv', lazy=True, blocksize=None) assert len(L) == 1 L = read_bytes('dask-data/nyc-taxi/2014/*.csv', lazy=True, blocksize=None) assert len(L) == 12
def test_read_bytes_block(e, s, a, b): for bs in [5, 15, 45, 1500]: vals = read_bytes(test_bucket_name+'/test/account*', blocksize=bs) assert len(vals) == sum([(len(v) // bs + 1) for v in files.values()]) futures = e.compute(vals) results = yield e._gather(futures) assert sum(len(r) for r in results) == sum(len(v) for v in files.values()) futures = read_bytes(test_bucket_name+'/test/accounts*', blocksize=bs, lazy=False) assert len(vals) == len(futures) results = yield e._gather(futures) assert sum(len(r) for r in results) == sum(len(v) for v in files.values()) ourlines = b"".join(results).split(b'\n') testlines = b"".join(files.values()).split(b'\n') assert set(ourlines) == set(testlines)
def test_read_bytes_lazy(e, s, a, b): values = read_bytes(test_bucket_name+'/test/', lazy=True) assert all(isinstance(v, Value) for v in values) results = e.compute(values, sync=False) results = yield e._gather(results) assert set(results).issuperset(set(files.values()))
def test_read_bytes(s, a, b): e = Executor((s.ip, s.port), start=False) yield e._start() futures = read_bytes(test_bucket_name, prefix='test/', anon=True) assert len(futures) >= len(files) results = yield e._gather(futures) assert set(results).issuperset(set(files.values())) yield e._shutdown()
def test_read_bytes_lazy(s, a, b): e = Executor((s.ip, s.port), start=False) yield e._start() values = read_bytes(test_bucket_name, 'test/', lazy=True, anon=True) assert all(isinstance(v, Value) for v in values) results = e.compute(values, sync=False) results = yield e._gather(results) assert set(results).issuperset(set(files.values())) yield e._shutdown()
def test_read_bytes(s, a, b): e = Executor((s.ip, s.port), start=False) yield e._start() futures = read_bytes(test_bucket_name, prefix='test/', anon=True, lazy=False) assert len(futures) >= len(files) results = yield e._gather(futures) assert set(results).issuperset(set(files.values())) yield e._shutdown()
def test_read_bytes_blocksize_none(e, s, a, b): futures = read_bytes(test_bucket_name+'/test/accounts.*', lazy=False, blocksize=None) assert len(futures) == len(files)
def test_read_bytes(e, s, a, b): futures = read_bytes(test_bucket_name+'/test/accounts.*', lazy=False) assert len(futures) >= len(files) results = yield e._gather(futures) assert set(results) == set(files.values())