def test_no_intermediate(): a = zarr.ones((4, 4), chunks=(2, 2)) b = zarr.ones((4, 4), chunks=(4, 1)) rechunked = api.Rechunked(None, None, source=a, intermediate=None, target=b) assert "Intermediate" not in repr(rechunked) rechunked._repr_html_()
def rechunked_fn(tmp_path, request): if request.param == "Group": store_source = str(tmp_path / "source.zarr") group = zarr.group(store_source) group.attrs["foo"] = "bar" # 800 byte chunks a = group.ones("a", shape=(5, 10, 20), chunks=(1, 10, 20), dtype="f4") a.attrs["foo"] = "bar" b = group.ones("b", shape=(8000, ), chunks=(100, ), dtype="f4") b.attrs["foo"] = "bar" target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") max_mem = 16000 # should force a two-step plan for b target_chunks = {"a": (5, 10, 4), "b": (4000, )} rechunked_fn = partial( api.rechunk, group, target_chunks, max_mem, target_store, temp_store=temp_store, ) else: shape = (8000, 8000) source_chunks = (200, 8000) dtype = "f4" max_mem = 25600000 dims = None target_chunks = (8000, 200) store_source = str(tmp_path / "source.zarr") source_array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) # add some attributes source_array.attrs["foo"] = "bar" if dims: source_array.attrs[_DIMENSION_KEY] = dims ### Create targets ### target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") rechunked_fn = partial( api.rechunk, source_array, target_chunks, max_mem, target_store, temp_store=temp_store, ) return rechunked_fn
def test_ones_like(): z = ones(100, 10) z2 = ones_like(z) eq(z.shape, z2.shape) eq(z.chunks, z2.chunks) eq(z.dtype, z2.dtype) eq(z.cname, z2.cname) eq(z.clevel, z2.clevel) eq(z.shuffle, z2.shuffle) eq(z.fill_value, z2.fill_value)
def sample_zarr_array(tmp_path): shape = (8000, 8000) source_chunks = (200, 8000) dtype = "f4" dims = None path = str(tmp_path / "source.zarr") array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=path) # add some attributes array.attrs["foo"] = "bar" if dims: array.attrs[_DIMENSION_KEY] = dims return array
def rechunk_delayed(tmp_path): store_source = str(tmp_path / 'source.zarr') shape = (8000, 8000) source_chunks = (200, 8000) dtype = 'f4' a_source = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) target_store = str(tmp_path / 'target.zarr') temp_store = str(tmp_path / 'temp.zarr') max_mem = 25600000 target_chunks = (8000, 200) return api.rechunk_zarr2zarr_w_dask(a_source, target_chunks, max_mem, target_store, temp_store=temp_store), target_store
def test_pywren_function_executor(tmp_path): pytest.importorskip("pywren_ibm_cloud") from rechunker.executors.pywren import ( pywren_local_function_executor, PywrenExecutor, ) # Create a Pywren function exectutor that we manage ourselves # and pass in to rechunker's PywrenExecutor with pywren_local_function_executor() as function_executor: executor = PywrenExecutor(function_executor) shape = (8000, 8000) source_chunks = (200, 8000) dtype = "f4" max_mem = 25600000 target_chunks = (400, 8000) ### Create source array ### store_source = str(tmp_path / "source.zarr") source_array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) ### Create targets ### target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") rechunked = api.rechunk( source_array, target_chunks, max_mem, target_store, temp_store=temp_store, executor=executor, ) assert isinstance(rechunked, api.Rechunked) target_array = zarr.open(target_store) assert target_array.chunks == tuple(target_chunks) result = rechunked.execute() assert isinstance(result, zarr.Array) a_tar = dsa.from_zarr(target_array) assert dsa.equal(a_tar, 1).all().compute()
def test_equality_operator(): import operator import dask.array as da import numpy as np import xarray as xr import zarr class MyNPArray(np.ndarray): pass assert pick_equality_operator(np.ones((1, 1))) == np.array_equal assert pick_equality_operator(MyNPArray([1, 1])) == np.array_equal assert pick_equality_operator(da.ones((1, 1))) == operator.is_ assert pick_equality_operator(zarr.ones((1, 1))) == operator.is_ assert (pick_equality_operator(xr.DataArray(np.ones( (1, 1)))) == np.array_equal)
def test_no_intermediate_fused(tmp_path): shape = (8000, 8000) source_chunks = (200, 8000) dtype = "f4" max_mem = 25600000 target_chunks = (400, 8000) store_source = str(tmp_path / "source.zarr") source_array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) target_store = str(tmp_path / "target.zarr") rechunked = api.rechunk(source_array, target_chunks, max_mem, target_store) num_tasks = len( [v for v in rechunked.plan.dask.values() if dask.core.istask(v)]) assert num_tasks < 20 # less than if no fuse
def test_equality_operator(): import operator import dask.array as da import numpy as np import xarray as xr import zarr class MyNPArray(np.ndarray): pass assert pick_equality_operator(np.ones((1, 1))) == _quiet_array_equal assert pick_equality_operator(MyNPArray([1, 1])) == _quiet_array_equal assert pick_equality_operator(da.ones((1, 1))) == operator.is_ assert pick_equality_operator(zarr.ones((1, 1))) == operator.is_ assert (pick_equality_operator(xr.DataArray(np.ones( (1, 1)))) == _quiet_array_equal) eq = pick_equality_operator(np.asarray([])) # make sure this doesn't warn assert not eq(np.asarray([]), np.asarray([], '<U32'))
def test_rechunk_array(tmp_path, shape, source_chunks, dtype, dims, target_chunks, max_mem, executor): ### Create source array ### store_source = str(tmp_path / "source.zarr") source_array = zarr.ones(shape, chunks=source_chunks, dtype=dtype, store=store_source) # add some attributes source_array.attrs["foo"] = "bar" if dims: source_array.attrs[_DIMENSION_KEY] = dims ### Create targets ### target_store = str(tmp_path / "target.zarr") temp_store = str(tmp_path / "temp.zarr") rechunked = api.rechunk( source_array, target_chunks, max_mem, target_store, temp_store=temp_store, executor=executor, ) assert isinstance(rechunked, api.Rechunked) target_array = zarr.open(target_store) if isinstance(target_chunks, dict): target_chunks_list = [target_chunks[d] for d in dims] else: target_chunks_list = target_chunks assert target_array.chunks == tuple(target_chunks_list) assert dict(source_array.attrs) == dict(target_array.attrs) result = rechunked.execute() assert isinstance(result, zarr.Array) a_tar = dsa.from_zarr(target_array) assert dsa.equal(a_tar, 1).all().compute()
def test_ones(): z = ones(100, 10) eq((100, ), z.shape) eq((10, ), z.chunks) assert_array_equal(np.ones(100), z[:])
def test_ones(): z = ones(100, 10) eq((100,), z.shape) eq((10,), z.chunks) assert_array_equal(np.ones(100), z[:])