Exemplo n.º 1
0
def test_rechunk_dask_array(tmp_path, shape, source_chunks, dtype,
                            target_chunks, max_mem):

    ### Create source array ###
    source_array = dsa.ones(shape, chunks=source_chunks, dtype=dtype)

    ### Create targets ###
    target_store = str(tmp_path / "target.zarr")
    temp_store = str(tmp_path / "temp.zarr")

    rechunked = api.rechunk(source_array,
                            target_chunks,
                            max_mem,
                            target_store,
                            temp_store=temp_store)
    assert isinstance(rechunked, api.Rechunked)

    target_array = zarr.open(target_store)

    assert target_array.chunks == tuple(target_chunks)

    result = rechunked.execute()
    assert isinstance(result, zarr.Array)
    a_tar = dsa.from_zarr(target_array)
    assert dsa.equal(a_tar, 1).all().compute()
Exemplo n.º 2
0
def test_rechunk_group(tmp_path):
    store_source = str(tmp_path / "source.zarr")
    group = zarr.group(store_source)
    group.attrs["foo"] = "bar"
    # 800 byte chunks
    a = group.ones("a", shape=(5, 10, 20), chunks=(1, 10, 20), dtype="f4")
    a.attrs["foo"] = "bar"
    b = group.ones("b", shape=(20, ), chunks=(10, ), dtype="f4")
    b.attrs["foo"] = "bar"

    target_store = str(tmp_path / "target.zarr")
    temp_store = str(tmp_path / "temp.zarr")

    max_mem = 1600  # should force a two-step plan for a
    target_chunks = {"a": (5, 10, 4), "b": (20, )}

    delayed = api.rechunk(group,
                          target_chunks,
                          max_mem,
                          target_store,
                          temp_store=temp_store)

    target_group = zarr.open(target_store)
    assert "a" in target_group
    assert "b" in target_group
    assert dict(group.attrs) == dict(target_group.attrs)

    dask.compute(delayed)
    for aname in target_chunks:
        a_tar = dsa.from_zarr(target_group[aname])
        assert dsa.equal(a_tar, 1).all().compute()
Exemplo n.º 3
0
def test_pywren_function_executor(tmp_path):
    pytest.importorskip("pywren_ibm_cloud")
    from rechunker.executors.pywren import (
        pywren_local_function_executor,
        PywrenExecutor,
    )

    # Create a Pywren function exectutor that we manage ourselves
    # and pass in to rechunker's PywrenExecutor
    with pywren_local_function_executor() as function_executor:

        executor = PywrenExecutor(function_executor)

        shape = (8000, 8000)
        source_chunks = (200, 8000)
        dtype = "f4"
        max_mem = 25600000
        target_chunks = (400, 8000)

        ### Create source array ###
        store_source = str(tmp_path / "source.zarr")
        source_array = zarr.ones(shape,
                                 chunks=source_chunks,
                                 dtype=dtype,
                                 store=store_source)

        ### Create targets ###
        target_store = str(tmp_path / "target.zarr")
        temp_store = str(tmp_path / "temp.zarr")

        rechunked = api.rechunk(
            source_array,
            target_chunks,
            max_mem,
            target_store,
            temp_store=temp_store,
            executor=executor,
        )
        assert isinstance(rechunked, api.Rechunked)

        target_array = zarr.open(target_store)

        assert target_array.chunks == tuple(target_chunks)

        result = rechunked.execute()
        assert isinstance(result, zarr.Array)
        a_tar = dsa.from_zarr(target_array)
        assert dsa.equal(a_tar, 1).all().compute()
Exemplo n.º 4
0
def test_rechunk_group(tmp_path, executor, source_store, target_store,
                       temp_store):
    if source_store.startswith("mapper"):
        fsspec = pytest.importorskip("fsspec")
        store_source = fsspec.get_mapper(str(tmp_path) + source_store)
        target_store = fsspec.get_mapper(str(tmp_path) + target_store)
        temp_store = fsspec.get_mapper(str(tmp_path) + temp_store)
    else:
        store_source = str(tmp_path / source_store)
        target_store = str(tmp_path / target_store)
        temp_store = str(tmp_path / temp_store)

    group = zarr.group(store_source)
    group.attrs["foo"] = "bar"
    # 800 byte chunks
    a = group.ones("a", shape=(5, 10, 20), chunks=(1, 10, 20), dtype="f4")
    a.attrs["foo"] = "bar"
    b = group.ones("b", shape=(20, ), chunks=(10, ), dtype="f4")
    b.attrs["foo"] = "bar"

    max_mem = 1600  # should force a two-step plan for a
    target_chunks = {"a": (5, 10, 4), "b": (20, )}

    rechunked = api.rechunk(
        group,
        target_chunks,
        max_mem,
        target_store,
        temp_store=temp_store,
        executor=executor,
    )
    assert isinstance(rechunked, api.Rechunked)

    target_group = zarr.open(target_store)
    assert "a" in target_group
    assert "b" in target_group
    assert dict(group.attrs) == dict(target_group.attrs)

    rechunked.execute()
    for aname in target_chunks:
        assert target_group[aname].chunks == target_chunks[aname]
        a_tar = dsa.from_zarr(target_group[aname])
        assert dsa.equal(a_tar, 1).all().compute()
Exemplo n.º 5
0
def test_rechunk_array(tmp_path, shape, source_chunks, dtype, dims,
                       target_chunks, max_mem, executor):

    ### Create source array ###
    store_source = str(tmp_path / "source.zarr")
    source_array = zarr.ones(shape,
                             chunks=source_chunks,
                             dtype=dtype,
                             store=store_source)
    # add some attributes
    source_array.attrs["foo"] = "bar"
    if dims:
        source_array.attrs[_DIMENSION_KEY] = dims

    ### Create targets ###
    target_store = str(tmp_path / "target.zarr")
    temp_store = str(tmp_path / "temp.zarr")

    rechunked = api.rechunk(
        source_array,
        target_chunks,
        max_mem,
        target_store,
        temp_store=temp_store,
        executor=executor,
    )
    assert isinstance(rechunked, api.Rechunked)

    target_array = zarr.open(target_store)

    if isinstance(target_chunks, dict):
        target_chunks_list = [target_chunks[d] for d in dims]
    else:
        target_chunks_list = target_chunks
    assert target_array.chunks == tuple(target_chunks_list)
    assert dict(source_array.attrs) == dict(target_array.attrs)

    result = rechunked.execute()
    assert isinstance(result, zarr.Array)
    a_tar = dsa.from_zarr(target_array)
    assert dsa.equal(a_tar, 1).all().compute()
Exemplo n.º 6
0
def test_compute(rechunk_delayed):
    delayed, target_store = rechunk_delayed
    delayed.compute()
    a_tar = dsa.from_zarr(target_store)
    assert dsa.equal(a_tar, 1).all().compute()