def test_rechunk_dask_array(tmp_path, shape, source_chunks, dtype, target_chunks, max_mem): ### Create source array ### source_array = dsa.ones(shape, chunks=source_chunks, dtype=dtype) ### Create targets ### target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") rechunked = api.rechunk(source_array, target_chunks, max_mem, target_store, temp_store=temp_store) assert isinstance(rechunked, api.Rechunked) target_array = zarr.open(target_store) assert target_array.chunks == tuple(target_chunks) result = rechunked.execute() assert isinstance(result, zarr.Array) a_tar = dsa.from_zarr(target_array) assert dsa.equal(a_tar, 1).all().compute()
def test_rechunk_group(tmp_path): store_source = str(tmp_path / "source.zarr") group = zarr.group(store_source) group.attrs["foo"] = "bar" # 800 byte chunks a = group.ones("a", shape=(5, 10, 20), chunks=(1, 10, 20), dtype="f4") a.attrs["foo"] = "bar" b = group.ones("b", shape=(20, ), chunks=(10, ), dtype="f4") b.attrs["foo"] = "bar" target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") max_mem = 1600 # should force a two-step plan for a target_chunks = {"a": (5, 10, 4), "b": (20, )} delayed = api.rechunk(group, target_chunks, max_mem, target_store, temp_store=temp_store) target_group = zarr.open(target_store) assert "a" in target_group assert "b" in target_group assert dict(group.attrs) == dict(target_group.attrs) dask.compute(delayed) for aname in target_chunks: a_tar = dsa.from_zarr(target_group[aname]) assert dsa.equal(a_tar, 1).all().compute()
def test_pywren_function_executor(tmp_path): pytest.importorskip("pywren_ibm_cloud") from rechunker.executors.pywren import ( pywren_local_function_executor, PywrenExecutor, ) # Create a Pywren function exectutor that we manage ourselves # and pass in to rechunker's PywrenExecutor with pywren_local_function_executor() as function_executor: executor = PywrenExecutor(function_executor) shape = (8000, 8000) source_chunks = (200, 8000) dtype = "f4" max_mem = 25600000 target_chunks = (400, 8000) ### Create source array ### store_source = str(tmp_path / "source.zarr") source_array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) ### Create targets ### target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") rechunked = api.rechunk( source_array, target_chunks, max_mem, target_store, temp_store=temp_store, executor=executor, ) assert isinstance(rechunked, api.Rechunked) target_array = zarr.open(target_store) assert target_array.chunks == tuple(target_chunks) result = rechunked.execute() assert isinstance(result, zarr.Array) a_tar = dsa.from_zarr(target_array) assert dsa.equal(a_tar, 1).all().compute()
def test_rechunk_group(tmp_path, executor, source_store, target_store, temp_store): if source_store.startswith("mapper"): fsspec = pytest.importorskip("fsspec") store_source = fsspec.get_mapper(str(tmp_path) + source_store) target_store = fsspec.get_mapper(str(tmp_path) + target_store) temp_store = fsspec.get_mapper(str(tmp_path) + temp_store) else: store_source = str(tmp_path / source_store) target_store = str(tmp_path / target_store) temp_store = str(tmp_path / temp_store) group = zarr.group(store_source) group.attrs["foo"] = "bar" # 800 byte chunks a = group.ones("a", shape=(5, 10, 20), chunks=(1, 10, 20), dtype="f4") a.attrs["foo"] = "bar" b = group.ones("b", shape=(20, ), chunks=(10, ), dtype="f4") b.attrs["foo"] = "bar" max_mem = 1600 # should force a two-step plan for a target_chunks = {"a": (5, 10, 4), "b": (20, )} rechunked = api.rechunk( group, target_chunks, max_mem, target_store, temp_store=temp_store, executor=executor, ) assert isinstance(rechunked, api.Rechunked) target_group = zarr.open(target_store) assert "a" in target_group assert "b" in target_group assert dict(group.attrs) == dict(target_group.attrs) rechunked.execute() for aname in target_chunks: assert target_group[aname].chunks == target_chunks[aname] a_tar = dsa.from_zarr(target_group[aname]) assert dsa.equal(a_tar, 1).all().compute()
def test_rechunk_array(tmp_path, shape, source_chunks, dtype, dims, target_chunks, max_mem, executor): ### Create source array ### store_source = str(tmp_path / "source.zarr") source_array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) # add some attributes source_array.attrs["foo"] = "bar" if dims: source_array.attrs[_DIMENSION_KEY] = dims ### Create targets ### target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") rechunked = api.rechunk( source_array, target_chunks, max_mem, target_store, temp_store=temp_store, executor=executor, ) assert isinstance(rechunked, api.Rechunked) target_array = zarr.open(target_store) if isinstance(target_chunks, dict): target_chunks_list = [target_chunks[d] for d in dims] else: target_chunks_list = target_chunks assert target_array.chunks == tuple(target_chunks_list) assert dict(source_array.attrs) == dict(target_array.attrs) result = rechunked.execute() assert isinstance(result, zarr.Array) a_tar = dsa.from_zarr(target_array) assert dsa.equal(a_tar, 1).all().compute()
def test_compute(rechunk_delayed): delayed, target_store = rechunk_delayed delayed.compute() a_tar = dsa.from_zarr(target_store) assert dsa.equal(a_tar, 1).all().compute()