def test_gufunc_two_inputs(): def foo(x, y): return np.einsum('...ij,...jk->ik', x, y) a = da.ones((2, 3), chunks=100, dtype=int) b = da.ones((3, 4), chunks=100, dtype=int) x = apply_gufunc(foo, "(i,j),(j,k)->(i,k)", a, b, output_dtypes=int) assert_eq(x, 3 * np.ones((2, 4), dtype=int))
def test_atop_chunks(): x = da.ones((5, 5), chunks=((2, 1, 2), (3, 2))) def double(a, axis=0): return np.concatenate([a, a], axis=axis) y = atop(double, 'ij', x, 'ij', adjust_chunks={'i': lambda n: 2 * n}, axis=0, dtype=x.dtype) assert y.chunks == ((4, 2, 4), (3, 2)) assert_eq(y, np.ones((10, 5))) y = atop(double, 'ij', x, 'ij', adjust_chunks={'j': lambda n: 2 * n}, axis=1, dtype=x.dtype) assert y.chunks == ((2, 1, 2), (6, 4)) assert_eq(y, np.ones((5, 10))) x = da.ones((10, 10), chunks=(5, 5)) y = atop(double, 'ij', x, 'ij', axis=0, adjust_chunks={'i': 10}, dtype=x.dtype) assert y.chunks == ((10, 10), (5, 5)) assert_eq(y, np.ones((20, 10))) y = atop(double, 'ij', x, 'ij', axis=0, adjust_chunks={'i': (10, 10)}, dtype=x.dtype) assert y.chunks == ((10, 10), (5, 5)) assert_eq(y, np.ones((20, 10)))
def test_atop_new_axes(): def f(x): return x[:, None] * np.ones((1, 7)) x = da.ones(5, chunks=2) y = atop(f, 'aq', x, 'a', new_axes={'q': 7}, concatenate=True, dtype=x.dtype) assert y.chunks == ((2, 2, 1), (7,)) assert_eq(y, np.ones((5, 7))) def f(x): return x[None, :] * np.ones((7, 1)) x = da.ones(5, chunks=2) y = atop(f, 'qa', x, 'a', new_axes={'q': 7}, concatenate=True, dtype=x.dtype) assert y.chunks == ((7,), (2, 2, 1)) assert_eq(y, np.ones((7, 5))) def f(x): y = x.sum(axis=1) return y[:, None] * np.ones((1, 5)) x = da.ones((4, 6), chunks=(2, 2)) y = atop(f, 'aq', x, 'ab', new_axes={'q': 5}, concatenate=True, dtype=x.dtype) assert y.chunks == ((2, 2), (5,)) assert_eq(y, np.ones((4, 5)) * 6)
def test_array_broadcasting(): arr = np.arange(6).reshape((2, 3)) daones = da.ones((2, 3, 4), chunks=3) assert da.random.poisson(arr, chunks=3).compute().shape == (2, 3) for x in (arr, daones): y = da.random.normal(x, 2, chunks=3) assert y.shape == x.shape assert y.compute().shape == x.shape y = da.random.normal(daones, 2, chunks=3) assert set(daones.dask).issubset(set(y.dask)) assert da.random.normal(np.ones((1, 4)), da.ones((2, 3, 4), chunks=(2, 3, 4)), chunks=(2, 3, 4)).compute().shape == (2, 3, 4) assert da.random.normal(scale=np.ones((1, 4)), loc=da.ones((2, 3, 4), chunks=(2, 3, 4)), size=(2, 2, 3, 4), chunks=(2, 2, 3, 4)).compute().shape == (2, 2, 3, 4) with pytest.raises(ValueError): da.random.normal(arr, np.ones((3, 1)), size=(2, 3, 4), chunks=3) for o in (np.ones(100), da.ones(100, chunks=(50,)), 1): a = da.random.normal(1000 * o, 0.01, chunks=(50,)) assert 800 < a.mean().compute() < 1200 # ensure that mis-matched chunks align well x = np.arange(10)**3 y = da.from_array(x, chunks=(1,)) z = da.random.normal(y, 0.01, chunks=(10,)) assert 0.8 < z.mean().compute() / x.mean() < 1.2
def test_from_dask_array_compat_numpy_array(): x = da.ones((3, 3, 3), chunks=2) with pytest.raises(ValueError): dd.from_dask_array(x) # dask with pytest.raises(ValueError): dd.from_array(x.compute()) # numpy x = da.ones((10, 3), chunks=(3, 3)) d1 = dd.from_dask_array(x) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2])) d2 = dd.from_array(x.compute()) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2])) with pytest.raises(ValueError): dd.from_dask_array(x, columns=['a']) # dask with pytest.raises(ValueError): dd.from_array(x.compute(), columns=['a']) # numpy d1 = dd.from_dask_array(x, columns=['a', 'b', 'c']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c'])) d2 = dd.from_array(x.compute(), columns=['a', 'b', 'c']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
def test_repr(): d = da.ones((4, 4), chunks=(2, 2)) assert d.name[:5] in repr(d) assert str(d.shape) in repr(d) assert str(d._dtype) in repr(d) d = da.ones((4000, 4), chunks=(4, 2)) assert len(str(d)) < 1000
def test_rechunk_with_dict(): x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk(chunks={0: 12}) assert y.chunks == ((12, 12), (8, 8, 8)) x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk(chunks={0: (12, 12)}) assert y.chunks == ((12, 12), (8, 8, 8))
def test_raise_on_no_chunks(): x = da.ones(6, chunks=3) try: Array(x.dask, x.name, chunks=None, dtype=x.dtype, shape=None) assert False except ValueError as e: assert "dask.pydata.org" in str(e) assert raises(ValueError, lambda: da.ones(6))
def test_array(): x = np.ones(5, dtype='i4') d = da.ones(5, chunks=3, dtype='i4') assert_eq(da.array(d, ndmin=3, dtype='i8'), np.array(x, ndmin=3, dtype='i8')) # regression #1847 this shall not raise an exception. x = da.ones((100,3), chunks=10) y = da.array(x) assert isinstance(y, da.Array)
def test_reductions_with_empty_array(): dx1 = da.ones((10, 0, 5), chunks=4) x1 = dx1.compute() dx2 = da.ones((0, 0, 0), chunks=4) x2 = dx2.compute() for dx, x in [(dx1, x1), (dx2, x2)]: with pytest.warns(None): # empty slice warning assert_eq(dx.mean(), x.mean()) assert_eq(dx.mean(axis=0), x.mean(axis=0)) assert_eq(dx.mean(axis=1), x.mean(axis=1)) assert_eq(dx.mean(axis=2), x.mean(axis=2))
def test_from_dask_array_raises(): x = da.ones((3, 3, 3), chunks=2) pytest.raises(ValueError, lambda: from_dask_array(x)) x = da.ones((10, 3), chunks=(3, 3)) pytest.raises(ValueError, lambda: from_dask_array(x)) # no columns # Not enough columns pytest.raises(ValueError, lambda: from_dask_array(x, columns=["a"])) try: from_dask_array(x, columns=["hello"]) except Exception as e: assert "hello" in str(e) assert "3" in str(e)
def test_rechunk_auto_image_stack(n): with dask.config.set({'array.chunk-size': '10MiB'}): x = da.ones((n, 1000, 1000), chunks=(1, 1000, 1000), dtype='uint8') y = x.rechunk('auto') assert y.chunks == ((10,) * (n // 10), (1000,), (1000,)) assert y.rechunk('auto').chunks == y.chunks # idempotent with dask.config.set({'array.chunk-size': '7MiB'}): z = x.rechunk('auto') assert z.chunks == ((5,) * (n // 5), (1000,), (1000,)) with dask.config.set({'array.chunk-size': '1MiB'}): x = da.ones((n, 1000, 1000), chunks=(1, 1000, 1000), dtype='float64') z = x.rechunk('auto') assert z.chunks == ((1,) * n , (250,) * 4, (250,) * 4)
def test_lazy_diff_rechunk(): s = signals.Signal1D(da.ones((10, 100), chunks=(1, 2))).as_lazy() for rm in (s.derivative, s.diff): # The data has been rechunked assert rm(axis=-1).data.chunks == ((10,), (99,)) assert rm(axis=-1, rechunk=False).data.chunks == ((1,) * 10, (1,) * 99) # The data has not been rechunked
def test_raise_on_bad_kwargs(): x = da.ones(5, chunks=3) try: da.minimum(x, out=None) except TypeError as e: assert 'minimum' in str(e) assert 'out' in str(e)
def add_ramp(self, ramp_x, ramp_y, offset=0): """Add a linear ramp to the signal. Parameters ---------- ramp_x: float Slope of the ramp in x-direction. ramp_y: float Slope of the ramp in y-direction. offset: float, optional Offset of the ramp at the signal fulcrum. Notes ----- The fulcrum of the linear ramp is at the origin and the slopes are given in units of the axis with the according scale taken into account. Both are available via the `axes_manager` of the signal. """ yy, xx = np.indices(self.axes_manager._signal_shape_in_array) if self._lazy: import dask.array as da ramp = offset * da.ones(self.data.shape, dtype=self.data.dtype, chunks=self.data.chunks) else: ramp = offset * np.ones(self.data.shape, dtype=self.data.dtype) ramp += ramp_x * xx ramp += ramp_y * yy self.data += ramp
def test_array_compute(capsys): from dask.array import ones data = ones((100,100),dtype='f4',chunks=(100,100)) with ProgressBar(): out = data.sum().compute() assert out == 10000 check_bar_completed(capsys)
def test_vindex_errors(): d = da.ones((5, 5, 5), chunks=(3, 3, 3)) assert raises(IndexError, lambda: d.vindex[0]) assert raises(IndexError, lambda: d.vindex[[1, 2, 3]]) assert raises(IndexError, lambda: d.vindex[[1, 2, 3], [1, 2, 3], 0]) assert raises(IndexError, lambda: d.vindex[[1], [1, 2, 3]]) assert raises(IndexError, lambda: d.vindex[[1, 2, 3], [[1], [2], [3]]])
def test_gh_4176(): from dask.sharedict import ShareDict def foo(A): return A[None, ...] A = da.ones(shape=(10, 20, 4), chunks=(2, 5, 4)) name = 'D' dsk = blockwise( foo, name, ("nsrc", "ntime", "nbl", "npol"), A.name, ("ntime", "nbl", "npol"), new_axes={"nsrc": 1}, numblocks={a.name: a.numblocks for a in (A,)} ) array_dsk = ShareDict() array_dsk.update(dsk) array_dsk.update(A.__dask_graph__()) chunks = ((1,),) + A.chunks D = da.Array(array_dsk, name, chunks, dtype=A.dtype) D.sum(axis=0).compute()
def test_squeeze(): x = da.ones((10, 1), chunks=(3, 1)) assert eq(x.squeeze(), x.compute().squeeze()) assert x.squeeze().chunks == ((3, 3, 3, 1),) assert same_keys(x.squeeze(), x.squeeze())
def test_atop_legacy(): x = da.ones(10, chunks=(5,)) with pytest.warns(None): y = da.atop(inc, 'i', x, 'i', dtype=x.dtype) z = da.blockwise(inc, 'i', x, 'i', dtype=x.dtype) assert_eq(y, z) assert y.name == z.name
def test_index_with_dask_array_errors(): x = da.ones((5, 5), chunks=2) with pytest.raises(NotImplementedError): x[x > 10] with pytest.raises(NotImplementedError): x[0, x > 10]
def test_dont_concatenate_single_chunks(shape, chunks): x = da.ones(shape, chunks=shape) y = x.rechunk(chunks) dsk = dict(y.dask) assert not any(funcname(task[0]).startswith('concat') for task in dsk.values() if dask.istask(task))
def test_atop_kwargs(): def f(a, b=0): return a + b x = da.ones(5, chunks=(2,)) y = atop(f, 'i', x, 'i', b=10, dtype=x.dtype) assert_eq(y, np.ones(5) + 10)
def test_gufunc_mixed_inputs(): def foo(x, y): return x + y a = np.ones((2, 1), dtype=int) b = da.ones((1, 8), chunks=(2, 3), dtype=int) x = apply_gufunc(foo, "(),()->()", a, b, output_dtypes=int) assert_eq(x, 2 * np.ones((2, 8), dtype=int))
def test_apply_dask_parallelized_two_args(): import dask.array as da array = da.ones((2, 2), chunks=(1, 1), dtype=np.int64) data_array = xr.DataArray(array, dims=('x', 'y')) data_array.name = None def parallel_add(x, y): return apply_ufunc(operator.add, x, y, dask='parallelized', output_dtypes=[np.int64]) def check(x, y): actual = parallel_add(x, y) assert isinstance(actual.data, da.Array) assert actual.data.chunks == array.chunks assert_identical(data_array, actual) check(data_array, 0), check(0, data_array) check(data_array, xr.DataArray(0)) check(data_array, 0 * data_array) check(data_array, 0 * data_array[0]) check(data_array[:, 0], 0 * data_array[0]) check(data_array, 0 * data_array.compute())
def test_apply_dask_parallelized_errors(): import dask.array as da array = da.ones((2, 2), chunks=(1, 1)) data_array = xr.DataArray(array, dims=('x', 'y')) with pytest.raises(NotImplementedError): apply_ufunc(identity, data_array, output_core_dims=[['z'], ['z']], dask='parallelized') with raises_regex(ValueError, 'dtypes'): apply_ufunc(identity, data_array, dask='parallelized') with raises_regex(TypeError, 'list'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=float) with raises_regex(ValueError, 'must have the same length'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float, float]) with raises_regex(ValueError, 'output_sizes'): apply_ufunc(identity, data_array, output_core_dims=[['z']], output_dtypes=[float], dask='parallelized') with raises_regex(ValueError, 'at least one input is an xarray object'): apply_ufunc(identity, array, dask='parallelized') with raises_regex(ValueError, 'consists of multiple chunks'): apply_ufunc(identity, data_array, dask='parallelized', output_dtypes=[float], input_core_dims=[('y',)], output_core_dims=[('y',)])
def test_from_dask_array_compat_numpy_array_1d(): x = da.ones(10, chunks=3) d1 = dd.from_dask_array(x) # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name is None d2 = dd.from_array(x.compute()) # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name is None d1 = dd.from_dask_array(x, columns='name') # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name == 'name' d2 = dd.from_array(x.compute(), columns='name') # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name == 'name' # passing list via columns results in DataFrame d1 = dd.from_dask_array(x, columns=['name']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['name'])) d2 = dd.from_array(x.compute(), columns=['name']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['name']))
def test_DataFrame_from_dask_array(): x = da.ones((10, 3), chunks=(4, 2)) df = from_dask_array(x, ['a', 'b', 'c']) assert list(df.columns) == ['a', 'b', 'c'] assert list(df.divisions) == [0, 4, 8, 10] assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()
def test_chunks_is_immutable(): x = da.ones(6, chunks=3) try: x.chunks = 2 assert False except TypeError as e: assert 'rechunk(2)' in str(e)
def test_oob_check(): x = da.ones(5, chunks=(2,)) with pytest.raises(IndexError): x[6] with pytest.raises(IndexError): x[[6]] with pytest.raises(IndexError): x[0, 0]
def test_rechunk_with_zero_placeholders(): x = da.ones((24, 24), chunks=((12, 12), (24, 0))) y = da.ones((24, 24), chunks=((12, 12), (12, 12))) y = y.rechunk(((12, 12), (24, 0))) assert x.chunks == y.chunks
def test_rechunk_auto_3d(): x = da.ones((20, 20, 20), chunks=((2, 2, 2))) y = x.rechunk({0: "auto", 1: "auto"}, block_size_limit=200 * x.dtype.itemsize) assert y.chunks[2] == x.chunks[2] assert y.chunks[0] == (10, 10) assert y.chunks[1] == (10, 10) # even split
def test_rechunk_minus_one(): x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk((-1, 8)) assert y.chunks == ((24, ), (8, 8, 8)) assert_eq(x, y)
def test_slice_stop_0(): # from gh-125 a = da.ones(10, chunks=(10, ))[:0].compute() b = np.ones(10)[:0] assert_eq(a, b)
def test_reduction_errors(): x = da.ones((5, 5), chunks=(3, 3)) with pytest.raises(ValueError): x.sum(axis=2) with pytest.raises(ValueError): x.sum(axis=-3)
def test_cull(): x = da.ones(1000, chunks=(10, )) for slc in [1, slice(0, 30), slice(0, None, 100)]: y = x[slc] assert len(y.dask) < len(x.dask)
def test_empty_slice(): x = da.ones((5, 5), chunks=(2, 2), dtype="i4") y = x[:0] assert_eq(y, np.ones((5, 5), dtype="i4")[:0])
def test_rechunk_auto_1d(shape, chunks, bs, expected): x = da.ones(shape, chunks=(chunks, )) y = x.rechunk({0: "auto"}, block_size_limit=bs * x.dtype.itemsize) assert y.chunks == (expected, )
def test_rechunk_avoid_needless_chunking(): x = da.ones(16, chunks=2) y = x.rechunk(8) dsk = y.__dask_graph__() assert len(dsk) <= 8 + 2
def test_rechunk_empty(): x = da.ones((0, 10), chunks=(5, 5)) y = x.rechunk((2, 2)) assert y.chunks == ((0, ), (2, ) * 5) assert_eq(x, y)
def test_rechunk_with_empty_input(): x = da.ones((24, 24), chunks=(4, 8)) assert x.rechunk(chunks={}).chunks == x.chunks pytest.raises(ValueError, lambda: x.rechunk(chunks=()))
def test_rechunk_zero(): with dask.config.set({"array.chunk-size": "1B"}): x = da.ones(10, chunks=(5, )) y = x.rechunk("auto") assert y.chunks == ((1, ) * 10, )
def test_arg_reductions_unknown_single_chunksize(func): x = da.ones((10, 10), chunks=(10, 10)) x = x[x[0, :] > 0, :] # unknown chunks in first dimension only getattr(da, func)(x, axis=0).compute() getattr(da, func)(x, axis=1).compute()
def test_array_reduction_out(func): x = da.arange(10, chunks=(5, )) y = da.ones((10, 10), chunks=(4, 4)) func(y, axis=0, out=x) assert_eq(x, func(np.ones((10, 10)), axis=0))
def test_take_semi_sorted(): x = da.ones(10, chunks=(5, )) index = np.arange(15) % 10 y = x[index] assert y.chunks == ((5, 5, 5), )
def sinfit(array, periods, dim=None, coord=None, unit='s'): """ Least squares sinusoidal fit. Fit sinusoidal functions ``y = A[p] * sin(2 * pi * ax * f[1] + phi[1])`` Parameters ---------- array : xarray.DataArray Data to be fitted periods: float or list of float The periods of the sinusoidal functions to be fitted dim : str, optional The dimension along which the data will be fitted. If not precised, the first dimension will be used unit : {'D', 'h', 'm', 's', 'ms', 'us', 'ns'}, optional If the fit uses a datetime dimension, the unit of the period may be specified here. Returns ------- modes : Dataset A Dataset with the amplitude and the phase for each periods """ if dim is None: dim = array.dims[0] if _utils.is_scalar(periods): periods = [ periods, ] n = 2 * len(periods) + 1 # Sort frequencies in ascending order periods.sort(reverse=True) # Re-order the array to place the fitting dimension as the first dimension # + stack the other dimensions array_stacked = _order_and_stack(array, dim) dim_chunk = array.chunks[array.get_axis_num(dim)][0] # Check if the dimension is associated with a numpy.datetime # and normalize to use periods and time in seconds if coord is None: coord = array[dim] if _utils.is_datetime(coord): # Use the 1e-9 to scale nanoseconds to seconds (by default, xarray use # datetime in nanoseconds t = coord.data.astype('f8') * 1e-9 freqs = 1. / pd.to_timedelta(periods, unit=unit).total_seconds() else: t = coord.data freqs = 1. / periods # Build coefficient matrix for the fit using the exponential form x = da.vstack([da.cos(2 * np.pi * f * t) for f in reversed(freqs)] + [ da.ones(len(t), chunks=dim_chunk), ] + [da.sin(2 * np.pi * f * t) for f in freqs]).T x = x.rechunk((dim_chunk, n)) # Solve the least-square system c, _, _, _ = da.linalg.lstsq(x, array_stacked.data) # Get cosine (a) and sine (b) ampitudes b = c[0:n // 2, ][::-1] a = c[n // 2 + 1:, ] # Compute amplitude and phase amplitude = da.sqrt(a**2 + b**2) phase = da.arctan2(b, a) * 180. / np.pi # Store the results new_dims = ('periods', ) + array_stacked.dims[1:] new_coords = { co: array_stacked.coords[co] for co in array_stacked.coords if co is not dim } var_dict = { 'amplitude': (new_dims, amplitude), 'phase': (new_dims, phase), 'offset': (array_stacked.dims[1:], c[n // 2, ]) } ds = xr.Dataset(var_dict, coords=new_coords) ds = ds.assign_coords(periods=periods) ds['periods'].attrs['units'] = unit # Unstack the data modes = _unstack(ds) return modes
def test_negative_n_slicing(): assert_eq(da.ones(2, chunks=2)[-2], np.ones(2)[-2])
def test_rechunk_zero_dim(): da = pytest.importorskip("dask.array") x = da.ones((0, 10, 100), chunks=(0, 10, 10)).rechunk((0, 10, 50)) assert len(x.compute()) == 0
def test_uneven_chunks(): assert da.ones(20, chunks=5)[::2].chunks == ((3, 2, 3, 2), )
hdul.writeto('simple-dask-dirty.fits', overwrite=True) # Display image if we have matplotlib try: import matplotlib.pyplot as plt except ImportError: pass else: plt.figure() plt.imshow(dirty, interpolation="nearest", cmap="cubehelix") plt.title("DIRTY") plt.colorbar() plt.show(True) # Introduce one "correlation" into the dirty image dirty = dirty[:, :, None] # Create natural weights for the one correlation weight_shape = (xds.UVW.shape[0], wavelength.shape[0], 1) weight_chunks = (xds.UVW.chunks[0], wavelength.shape[0], (1, )) degrid_weights = da.ones(weight_shape, dtype=natural_weights.dtype, chunks=weight_chunks) # Construct the visibility dask array vis = degrid(dirty, xds.UVW.data, degrid_weights, wavelength, conv_filter, cell_size) # But only degrid the first 1000 visibilities vis[:1000].compute()
def test_rechunk_same(): x = da.ones((24, 24), chunks=(4, 8)) y = x.rechunk(x.chunks) assert x is y
def test_rechunk_unknown_raises(): dd = pytest.importorskip("dask.dataframe") x = dd.from_array(da.ones(shape=(10, 10), chunks=(5, 5))).values with pytest.raises(ValueError): x.rechunk((None, (5, 5, 5)))
def test_rechunk_with_null_dimensions(): x = da.from_array(np.ones((24, 24)), chunks=(4, 8)) assert x.rechunk(chunks=(None, 4)).chunks == da.ones((24, 24), chunks=(4, 4)).chunks
def test_array_cumreduction_out(func): x = da.ones((10, 10), chunks=(4, 4)) func(x, axis=0, out=x) assert_eq(x, func(np.ones((10, 10)), axis=0))
def test_slicing_and_chunks(): o = da.ones((24, 16), chunks=((4, 8, 8, 4), (2, 6, 6, 2))) t = o[4:-4, 2:-2] assert t.chunks == ((8, 8), (6, 6))
def test_rechunk_unknown_from_array(): dd = pytest.importorskip("dask.dataframe") # pd = pytest.importorskip('pandas') x = dd.from_array(da.ones(shape=(4, 4), chunks=(2, 2))).values # result = x.rechunk({1: 5}) result = x.rechunk((None, 4)) assert np.isnan(x.chunks[0]).all() assert np.isnan(result.chunks[0]).all() assert x.chunks[1] == (4, ) assert_eq(x, result) @pytest.mark.parametrize( "x, chunks", [ (da.ones(shape=(50, 10), chunks=(25, 10)), (None, 5)), (da.ones(shape=(50, 10), chunks=(25, 10)), { 1: 5 }), (da.ones(shape=(50, 10), chunks=(25, 10)), (None, (5, 5))), (da.ones(shape=(1000, 10), chunks=(5, 10)), (None, 5)), (da.ones(shape=(1000, 10), chunks=(5, 10)), { 1: 5 }), (da.ones(shape=(1000, 10), chunks=(5, 10)), (None, (5, 5))), (da.ones(shape=(10, 10), chunks=(10, 10)), (None, 5)), (da.ones(shape=(10, 10), chunks=(10, 10)), { 1: 5 }), (da.ones(shape=(10, 10), chunks=(10, 10)), (None, (5, 5))), (da.ones(shape=(10, 10), chunks=(10, 2)), (None, 5)),
def test_rechunk_zero(): with dask.config.set({'array.chunk-size': '1B'}): x = da.ones(10, chunks=(5, )) y = x.rechunk('auto') assert y.chunks == ((1, ) * 10, )
def test_dtype(): x = da.ones(5, chunks=(2, )) assert x.rechunk(chunks=(1, )).dtype == x.dtype
def test_object_reduction(method): arr = da.ones(1).astype(object) result = getattr(arr, method)().compute() assert result == 1
def test_auto_chunks(): with dask.config.set({"array.chunk-size": "50 MiB"}): x = da.ones((10000, 10000)) assert 4 < x.npartitions < 32