def test_rechunk_2d(): """Try rechunking a random 2d matrix""" a = np.random.uniform(0, 1, 300).reshape((10, 30)) x = da.from_array(a, chunks=((1, 2, 3, 4), (5, ) * 6)) new = ((5, 5), (15, ) * 2) x2 = rechunk(x, chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a)
def test_rechunk_1d(): """Try rechunking a random 1d matrix""" a = np.random.uniform(0, 1, 300) x = da.from_array(a, chunks=((100, ) * 3, )) new = ((50, ) * 6,) x2 = rechunk(x, chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a)
def test_rechunk_4d(): """Try rechunking a random 4d matrix""" old = ((5, 5), ) * 4 a = np.random.uniform(0, 1, 10000).reshape((10, ) * 4) x = da.from_array(a, chunks=old) new = ((10, ), ) * 4 x2 = rechunk(x, chunks=new) assert x2.chunks == new assert np.all(x2.compute() == a)
def test_rechunk_blockshape(): """ Test that blockshape can be used.""" new_shape, new_chunks = (10, 10), (4, 3) new_blockdims = normalize_chunks(new_chunks, new_shape) old_chunks = ((4, 4, 2), (3, 3, 3, 1)) a = np.random.uniform(0,1,100).reshape((10, 10)) x = da.from_array(a, chunks=old_chunks) check1 = rechunk(x, chunks=new_chunks) assert check1.chunks == new_blockdims assert np.all(check1.compute() == a)
def test_rechunk_blockshape(): """ Test that blockshape can be used.""" new_shape, new_chunks = (10, 10), (4, 3) new_blockdims = normalize_chunks(new_chunks, new_shape) old_chunks = ((4, 4, 2), (3, 3, 3, 1)) a = np.random.uniform(0, 1, 100).reshape((10, 10)) x = da.from_array(a, chunks=old_chunks) check1 = rechunk(x, chunks=new_chunks) assert check1.chunks == new_blockdims assert np.all(check1.compute() == a)
def histogramdd(data, bins=None, *args, **kwargs): """Facade function to create multi-dimensional histogram using dask. Each "column" must be one-dimensional. """ import dask from dask.array.rechunk import rechunk if isinstance(data, (list, tuple)): data = dask.array.stack(data, axis=1) if not hasattr(data, "dask"): data = dask.array.from_array(data, chunks=(int(data.shape[0] / options["chunk_split"]), data.shape[1])) else: data = rechunk(data, {1: data.shape[1]}) if isinstance(data, dask.array.Array): if data.ndim != 2: raise ValueError( f"Only (n, dim) data allowed for histogramdd, {data.shape} encountered." ) if not kwargs.get("adaptive", True): raise ValueError( "Only adaptive histograms supported for dask (currently).") kwargs["adaptive"] = True def block_hist(array): return original_hdd(array, bins, *args, **kwargs) return _run_dask(name="dask_adaptive_dd", data=data, compute=kwargs.pop("compute", True), method=kwargs.pop("dask_method", "threaded"), func=block_hist, expand_arg=True)