def reshape(a, nstep, mstep, chunksize, aligned=0): shape = a.shape dtype = a.dtype if aligned != 0: newshape = shape[:-1] + (aligned, ) fill = da.full(newshape, com.get_fill(dtype), dtype=dtype, chunks=chunksize) a = da.concatenate((fill, a), axis=-1) shape = a.shape fill_len = (mstep - shape[-1] % mstep) % mstep if fill_len != 0: newshape = shape[:-1] + (fill_len, ) fill = da.full(newshape, com.get_fill(dtype), dtype=dtype, chunks=chunksize) a = da.concatenate((a, fill), axis=-1) newshape = shape[:-1] + (nstep, mstep) a = a.reshape(newshape).rechunk(chunksize) return a
def test_wrap_consistent_names(): assert (sorted(ones(10, dtype='i4', chunks=(4,)).dask) == sorted(ones(10, dtype='i4', chunks=(4,)).dask)) assert (sorted(ones(10, dtype='i4', chunks=(4,)).dask) != sorted(ones(10, chunks=(4,)).dask)) assert (sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) == sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask)) assert (sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='i2').dask) != sorted(da.full((3, 3), 100, chunks=(2, 2)).dask))
def test_wrap_consistent_names(): assert sorted(ones(10, dtype='i4', chunks=(4,)).dask) ==\ sorted(ones(10, dtype='i4', chunks=(4,)).dask) assert sorted(ones(10, dtype='i4', chunks=(4,)).dask) !=\ sorted(ones(10, chunks=(4,)).dask) assert sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) ==\ sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) assert sorted(da.full((3, 3), 100, chunks=(2, 2), dtype='f8').dask) !=\ sorted(da.full((3, 3), 100, chunks=(2, 2)).dask)
def test_ms_create_and_update(Dataset, tmp_path, chunks): """ Test that we can update and append at the same time """ filename = str(tmp_path / "create-and-update.ms") rs = np.random.RandomState(42) # Create a dataset of 10 rows with DATA and DATA_DESC_ID dims = ("row", "chan", "corr") row, chan, corr = tuple(sum(chunks[d]) for d in dims) ms_datasets = [] np_data = (rs.normal(size=(row, chan, corr)) + 1j * rs.normal(size=(row, chan, corr))).astype(np.complex64) data_chunks = tuple((chunks['row'], chan, corr)) dask_data = da.from_array(np_data, chunks=data_chunks) # Create dask ddid column dask_ddid = da.full(row, 0, chunks=chunks['row'], dtype=np.int32) dataset = Dataset({ 'DATA': (dims, dask_data), 'DATA_DESC_ID': (("row", ), dask_ddid), }) ms_datasets.append(dataset) # Write it writes = xds_to_table(ms_datasets, filename, ["DATA", "DATA_DESC_ID"]) dask.compute(writes) ms_datasets = xds_from_ms(filename) # Now add another dataset (different DDID), with no ROWID np_data = (rs.normal(size=(row, chan, corr)) + 1j * rs.normal(size=(row, chan, corr))).astype(np.complex64) data_chunks = tuple((chunks['row'], chan, corr)) dask_data = da.from_array(np_data, chunks=data_chunks) # Create dask ddid column dask_ddid = da.full(row, 1, chunks=chunks['row'], dtype=np.int32) dataset = Dataset({ 'DATA': (dims, dask_data), 'DATA_DESC_ID': (("row", ), dask_ddid), }) ms_datasets.append(dataset) # Write it writes = xds_to_table(ms_datasets, filename, ["DATA", "DATA_DESC_ID"]) dask.compute(writes) # Rows have been added and additional data is present with pt.table(filename, ack=False, readonly=True) as T: first_data_desc_id = da.full(row, ms_datasets[0].DATA_DESC_ID, chunks=chunks['row']) ds_data = da.concatenate( [ms_datasets[0].DATA.data, ms_datasets[1].DATA.data]) ds_ddid = da.concatenate( [first_data_desc_id, ms_datasets[1].DATA_DESC_ID.data]) assert_array_equal(T.getcol("DATA"), ds_data) assert_array_equal(T.getcol("DATA_DESC_ID"), ds_ddid)
def test_wrap_consistent_names(): assert sorted(ones(10, dtype="i4", chunks=(4, )).dask) == sorted( ones(10, dtype="i4", chunks=(4, )).dask) assert sorted(ones(10, dtype="i4", chunks=(4, )).dask) != sorted( ones(10, chunks=(4, )).dask) assert sorted(da.full( (3, 3), 100, chunks=(2, 2), dtype="f8").dask) == sorted( da.full((3, 3), 100, chunks=(2, 2), dtype="f8").dask) assert sorted(da.full( (3, 3), 100, chunks=(2, 2), dtype="i2").dask) != sorted( da.full((3, 3), 100, chunks=(2, 2)).dask)
def test_lettered_tiles_no_valid_data(self): """Test creating a lettered grid with no valid data.""" from satpy.writers.awips_tiled import AWIPSTiledWriter from xarray import DataArray from pyresample.geometry import AreaDefinition from pyresample.utils import proj4_str_to_dict w = AWIPSTiledWriter(base_dir=self.base_dir, compress=True) area_def = AreaDefinition( 'test', 'test', 'test', proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. ' '+lat_0=25 +lat_1=25 +units=m +no_defs'), 1000, 2000, (-1000000., -1500000., 1000000., 1500000.), ) now = datetime(2018, 1, 1, 12, 0, 0) ds = DataArray( da.full((2000, 1000), np.nan, chunks=500, dtype=np.float32), attrs=dict( name='test_ds', platform_name='PLAT', sensor='SENSOR', units='1', area=area_def, start_time=now, end_time=now + timedelta(minutes=20)) ) w.save_datasets([ds], sector_id='LCC', source_name="TESTS", tile_count=(3, 3), lettered_grid=True) # No files created - all NaNs should result in no tiles being created all_files = glob(os.path.join(self.base_dir, 'TESTS_AII*.nc')) assert not all_files
def test_get_padding_area_float(): """Test padding area generator for floats.""" shape = (10, 10) dtype = np.float res = get_padding_area(shape, dtype) expected = da.full(shape, np.nan, dtype=dtype, chunks=CHUNK_SIZE) np.testing.assert_array_equal(res, expected)
def test_full(): a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8') assert (a.compute() == 100).all() assert a.dtype == a.compute(scheduler='sync').dtype == 'i8' assert a.name.startswith('full-')
def test_get_padding_area_int(): """Test padding area generator for integers.""" shape = (10, 10) dtype = np.int64 res = get_padding_area(shape, dtype) expected = da.full(shape, 0, dtype=dtype, chunks=CHUNK_SIZE) np.testing.assert_array_equal(res, expected)
def test_full(): a = da.full((3, 3), 100, chunks=(2, 2), dtype="i8") assert (a.compute() == 100).all() assert a.dtype == a.compute(scheduler="sync").dtype == "i8" assert a.name.startswith("full_like-")
def _ir_calibrate(self, radiance, measured, root): """IR channel calibration.""" coef = self[measured + "/radiance_unit_conversion_coefficient"] wl_c = self[root + "/central_wavelength_actual"] a = self[measured + "/radiance_to_bt_conversion_coefficient_a"] b = self[measured + "/radiance_to_bt_conversion_coefficient_b"] c1 = self[measured + "/radiance_to_bt_conversion_constant_c1"] c2 = self[measured + "/radiance_to_bt_conversion_constant_c2"] for v in (coef, wl_c, a, b, c1, c2): if v == v.attrs.get("FillValue", default_fillvals.get(v.dtype.str[1:])): logger.error("{:s} set to fill value, cannot produce " "brightness temperatures for {:s}.".format( v.attrs.get( "long_name", "at least one necessary coefficient"), root)) return xr.DataArray(da.full(shape=radiance.shape, chunks=radiance.chunks, fill_value=np.nan), dims=radiance.dims, coords=radiance.coords, attrs=radiance.attrs) Lv = radiance * coef vc = 1e6 / wl_c # from wl in um to wn in m^-1 nom = c2 * vc denom = a * np.log(1 + (c1 * vc**3) / Lv) res = nom / denom - b / a res.attrs["units"] = "K" return res
def test_full_detects_da_dtype(): x = da.from_array(100) with pytest.warns(FutureWarning, match="not implemented by Dask array") as record: # This shall not raise an NotImplementedError due to dtype detected as object. a = da.full(shape=(3, 3), fill_value=x) assert a.dtype == x.dtype assert_eq(a, np.full(shape=(3, 3), fill_value=100)) assert len(record) == 1
def test_inlined_array(): A = da.ones((10, 10), chunks=(2, 2), dtype=np.float64) B = da.full((10, 10), np.float64(2), chunks=(2, 2)) C = A + B E = C + 1 D = inlined_array(C) assert len(C.__dask_graph__().layers) == 3 assert D.name == C.name assert D.name in D.__dask_graph__().layers assert A.name not in D.__dask_graph__().layers assert B.name not in D.__dask_graph__().layers graph_keys = set(flatten(D.__dask_graph__().keys())) assert graph_keys == set(flatten(D.__dask_keys__())) assert_array_equal(D, C) D = inlined_array(C, [A, B]) assert len(D.__dask_graph__().layers) == 1 assert D.name == C.name assert D.name in D.__dask_graph__().layers assert A.name not in D.__dask_graph__().layers assert B.name not in D.__dask_graph__().layers graph_keys = set(flatten(D.__dask_graph__().keys())) assert graph_keys == set(flatten(D.__dask_keys__())) assert_array_equal(D, C) D = inlined_array(C, [A]) assert len(D.__dask_graph__().layers) == 2 assert D.name == C.name assert D.name in D.__dask_graph__().layers assert A.name not in D.__dask_graph__().layers assert B.name in D.__dask_graph__().layers graph_keys = set(flatten(D.__dask_graph__().keys())) assert graph_keys == set(flatten([a.__dask_keys__() for a in [D, B]])) assert_array_equal(D, C) D = inlined_array(C, [B]) assert len(D.__dask_graph__().layers) == 2 assert D.name == C.name assert D.name in D.__dask_graph__().layers assert A.name in D.__dask_graph__().layers assert B.name not in D.__dask_graph__().layers graph_keys = set(flatten(D.__dask_graph__().keys())) assert graph_keys == set(flatten([a.__dask_keys__() for a in [D, A]])) assert_array_equal(D, C) D = inlined_array(E, [A]) assert len(D.__dask_graph__().layers) == 3 assert D.name == E.name assert D.name in D.__dask_graph__().layers assert B.name in D.__dask_graph__().layers assert A.name not in D.__dask_graph__().layers assert C.name in D.__dask_graph__().layers graph_keys = set(flatten(D.__dask_graph__().keys())) assert graph_keys == set(flatten([a.__dask_keys__() for a in [D, B, C]])) assert_array_equal(D, E)
def _pad_dask_pieces_after(self, pieces, dask_pieces, chunks): """Pad the dask pieces after.""" last_x = max(arr.coords['x'][-1] for arr in pieces) if last_x < self._image_shape[1] - 1: missing_x = np.arange(last_x + 1, self._image_shape[1]) missing_y = pieces[-1].coords['y'] new_piece = da.full((len(missing_y), len(missing_x)), np.nan, chunks=chunks) dask_pieces.append(new_piece)
def _pad_dask_pieces_before(pieces, dask_pieces, chunks): """Pad the dask pieces before.""" first_x = min(arr.coords['x'][0] for arr in pieces) if first_x > 0: missing_x = np.arange(first_x) missing_y = pieces[0].coords['y'] new_piece = da.full((len(missing_y), len(missing_x)), np.nan, chunks=chunks) dask_pieces.insert(0, new_piece)
def get_padding_area(shape, dtype): """Create a padding area filled with no data.""" if np.issubdtype(dtype, np.floating): init_value = np.nan else: init_value = 0 padding_area = da.full(shape, init_value, dtype=dtype, chunks=CHUNK_SIZE) return padding_area
def setup(self): CHUNK_SIZE = 10 NCHUNKS = 9000 SIZE = CHUNK_SIZE * NCHUNKS base = [ da.full((SIZE, ), i, dtype=np.int8, chunks=CHUNK_SIZE) for i in range(4) ] self.base = base
def test_resample_area_to_area_2d_fill_value(self): """Resample area to area, 2d, use fill value.""" data = xr.DataArray(da.full(self.src_area.shape, np.nan, dtype=np.float64), dims=['y', 'x']) res = self.resampler.compute( data, method='bil', fill_value=2.0).compute(scheduler='single-threaded') assert res.shape == self.dst_area.shape assert np.allclose(res, 2.0)
def test_lettered_tiles_no_valid_data(self): """Test creating a lettered grid with no valid data.""" from satpy.writers.awips_tiled import AWIPSTiledWriter w = AWIPSTiledWriter(base_dir=self.base_dir, compress=True) data = da.full((2000, 1000), np.nan, chunks=500, dtype=np.float32) area_def = self._get_test_area(shape=(2000, 1000), extents=(-1000000., -1500000., 1000000., 1500000.)) ds = self._get_test_lcc_data(data, area_def) w.save_datasets([ds], sector_id='LCC', source_name="TESTS", tile_count=(3, 3), lettered_grid=True) # No files created - all NaNs should result in no tiles being created all_files = glob(os.path.join(self.base_dir, 'TESTS_AII*.nc')) assert not all_files
def _multimodel_mask_cubes(cubes, shape): """Apply common mask to all cubes in-place.""" # Create mask mask = da.full(shape, False, dtype=bool) for cube in cubes: new_mask = da.ma.getmaskarray(cube.core_data()) mask |= new_mask # Apply common mask for cube in cubes: cube.data = da.ma.masked_array(cube.core_data(), mask=mask) return cubes
def average_spw(spw_ds, chan_bin_size): """ Parameters ---------- spw_ds : list of Datasets list of Datasets, each describing a single Spectral Window chan_bin_size : int Number of channels in an averaging bin Returns ------- spw_ds : list of Datasets list of Datasets, each describing an averaged Spectral Window """ new_spw_ds = [] for r, spw in enumerate(spw_ds): # Get the dataset variables as a mutable dictionary dv = dict(spw.data_vars) # Extract arrays we wish to average chan_freq = dv['CHAN_FREQ'].data[0] chan_width = dv['CHAN_WIDTH'].data[0] effective_bw = dv['EFFECTIVE_BW'].data[0] resolution = dv['RESOLUTION'].data[0] # Construct channel metadata chan_arrays = (chan_freq, chan_width, effective_bw, resolution) chan_meta = chan_metadata((), chan_arrays, chan_bin_size) # Average channel based data avg = dask_chan_avg(chan_meta, chan_freq=chan_freq, chan_width=chan_width, effective_bw=effective_bw, resolution=resolution, chan_bin_size=chan_bin_size) num_chan = da.full((1, ), avg.chan_freq.shape[0], dtype=np.int32) # These columns change, re-create them dv['NUM_CHAN'] = (("row", ), num_chan) dv['CHAN_FREQ'] = (("row", "chan"), avg.chan_freq[None, :]) dv['CHAN_WIDTH'] = (("row", "chan"), avg.chan_width[None, :]) dv['EFFECTIVE_BW'] = (("row", "chan"), avg.effective_bw[None, :]) dv['RESOLUTION'] = (("row", "chan"), avg.resolution[None, :]) # But re-use all the others new_spw_ds.append(Dataset(dv)) return new_spw_ds
def data(self): """ Get the buffer contents in shape that corresponds to the original dataset shape, using a lazy Dask array. Copied largely from BufferWrapper with modifications to ensure Dask arrays are correctly unpacked into the result array. #TODO consider if this needs to be cached to avoid creating multiple copies in the task graph ? If a ROI is set, embed the result into a new array; unset values have NaN value for floating point types, False for boolean, 0 for integer types and structs, '' for string types and None for objects. """ if isinstance(self._data, DaskInplaceWrapper): self._data = self._data.data if self._contiguous_cache: raise RuntimeError("Cache is not empty, has to be flushed") if self._roi is None or self._kind != 'nav': return self._data.reshape( self._shape_for_kind(self._kind, self._ds_shape)) shape = self._shape_for_kind(self._kind, self._ds_shape) if shape == self._data.shape: # preallocated and already wrapped return self._data # Integer types and "void" (structs and such) if self.dtype.kind in ('i', 'u', 'V'): fill = 0 # Bytes and Unicode strings elif self.dtype.kind in ('S', 'U'): fill = '' else: # 'b' (boolean): False # 'f', 'c': NaN # 'm', 'M' (datetime, timedelta): NaT # 'O' (object): None fill = None flat_chunking = tuple(p.slice.shape[0] for p in self._ds_partitions) flat_chunking = (flat_chunking, ) + self._extra_chunking flat_shape = (prod(self._ds_shape.nav), ) + self._extra_shape flat_wrapper = da.full(flat_shape, fill, dtype=self._dtype, chunks=flat_chunking) flat_wrapper[self._roi, ...] = self._data wrapper = flat_wrapper.reshape(self._ds_shape.nav + self._extra_shape) return wrapper
def test_array_creation_blockwise_fusion(): """ Check that certain array creation routines work with blockwise and can be fused with other blockwise operations. """ x = da.ones(3, chunks=(3,)) y = da.zeros(3, chunks=(3,)) z = da.full(3, fill_value=2, chunks=(3,)) a = x + y + z dsk1 = a.__dask_graph__() assert len(dsk1) == 5 dsk2 = optimize_blockwise(dsk1) assert len(dsk2) == 1 assert_eq(a, np.full(3, 3))
def full(shape, *args, **kwargs): try: array_used_to_infere_type = kwargs.pop('as_type_of') except KeyError: msg = 'as_type_of is mandatory: This is an array to infer the type ' msg += 'of the generated array' raise ValueError(msg) if isinstance(array_used_to_infere_type, da.Array): return da.full(shape, *args, **kwargs) elif isinstance(array_used_to_infere_type, np.ndarray): return np.full(shape, *args, **kwargs) else: msg = 'Not implemeted for type not in dask array or numpy ndarray' raise NotImplementedError(msg)
def apply_common_mask(cfg, input_data): """Apply common mask to all datasets.""" if not cfg.get('apply_common_mask'): return input_data logger.info("Applying common mask to all cubes") shapes = {data['cube'].shape for data in input_data} if len(shapes) > 1: raise ValueError( f"Expected cubes with identical shapes when 'apply_common_mask' " f"is set to 'True', got shapes {shapes}") common_mask = da.full(list(shapes)[0], False) for data in input_data: common_mask |= da.ma.getmaskarray(data['cube'].core_data()) for data in input_data: data['cube'].data = da.ma.masked_array(data['cube'].core_data(), mask=common_mask) return input_data
def parallel_gradient_search(data, src_x, src_y, dst_x, dst_y, src_gradient_xl, src_gradient_xp, src_gradient_yl, src_gradient_yp, dst_mosaic_locations, dst_slices, **kwargs): """Run gradient search in parallel in input area coordinates.""" method = kwargs.get('method', 'bilinear') # Determine the number of bands bands = np.array([arr.shape[0] for arr in data if arr is not None]) num_bands = np.max(bands) if np.any(bands != num_bands): raise ValueError( "All source data chunks have to have the same number of bands") chunks = {} is_pad = False # Collect co-located target chunks for i, arr in enumerate(data): if arr is None: is_pad = True res = da.full((num_bands, dst_slices[i][1] - dst_slices[i][0], dst_slices[i][3] - dst_slices[i][2]), np.nan) else: is_pad = False res = dask.delayed(_gradient_resample_data)(arr.astype(np.float64), src_x[i], src_y[i], src_gradient_xl[i], src_gradient_xp[i], src_gradient_yl[i], src_gradient_yp[i], dst_x[i], dst_y[i], method=method) res = da.from_delayed(res, (num_bands, ) + dst_x[i].shape, dtype=np.float64) if dst_mosaic_locations[i] in chunks: if not is_pad: chunks[dst_mosaic_locations[i]].append(res) else: chunks[dst_mosaic_locations[i]] = [ res, ] return _concatenate_chunks(chunks)
def _multimodel_mask_products(products, shape): """Apply common mask to all cubes of products in-place.""" # Create mask and get products used for mask mask = da.full(shape, False, dtype=bool) used_products = set() for product in products: for cube in product.cubes: new_mask = da.ma.getmaskarray(cube.core_data()) mask |= new_mask if da.any(new_mask): used_products.add(product) # Apply common mask and update provenance information for product in products: for cube in product.cubes: cube.data = da.ma.masked_array(cube.core_data(), mask=mask) for other_product in used_products: if other_product.filename != product.filename: product.wasderivedfrom(other_product) return products
def _reshape_to_target_area(self, res, ndim): if ndim == 3: dim_multiplier = res.shape[0] else: dim_multiplier = 1 res = da.reshape(res, (1, res.size)) if res.size != dim_multiplier * self._target_geo_def.size: out = [] for i in range(dim_multiplier): tmp = da.full(self._target_geo_def.size, np.nan) tmp[self._valid_output_indices] = res[i, :] out.append(tmp) res = da.stack(out) shp = self._target_geo_def.shape if ndim == 3: res = da.reshape(res, (res.shape[0], shp[0], shp[1])) else: res = da.reshape(res, (shp[0], shp[1])) return res
def _expand_group_columns(self, datasets, args): if not args.group_columns: return datasets new_datasets = [] for ds in datasets: # Remove grouping attribute and recreate grouping columns new_group_vars = {} row_chunks = ds.chunks["row"] row_dims = ds.dims["row"] attrs = ds.attrs for column in args.group_columns: value = attrs.pop(column) group_column = da.full(row_dims, value, chunks=row_chunks) new_group_vars[column] = (("row",), group_column) new_ds = ds.assign_attrs(attrs).assign(**new_group_vars) new_datasets.append(new_ds) return new_datasets
def _vis_calibrate(self, radiance, measured): """VIS channel calibration.""" # radiance to reflectance taken as in mipp/xrit/MSG.py # again FCI User Guide is not clear on how to do this cesilab = measured + "/channel_effective_solar_irradiance" cesi = self[cesilab] if cesi == cesi.attrs.get("FillValue", default_fillvals.get(cesi.dtype.str[1:])): logger.error( "channel effective solar irradiance set to fill value, " "cannot produce reflectance for {:s}.".format(measured)) return xr.DataArray(da.full(shape=radiance.shape, chunks=radiance.chunks, fill_value=np.nan), dims=radiance.dims, coords=radiance.coords, attrs=radiance.attrs) sirr = float(cesi) res = radiance / sirr * 100 res.attrs["units"] = "%" return res
def shift(arr, num, axis, fill_value=0): """ Shift N-dim array. """ if not num: return arr.copy() fill_shape = arr.shape[:axis] + (abs(num), ) + arr.shape[axis + 1:] filled = da.full(shape=fill_shape, fill_value=fill_value) kept_slice = [ ':', ] * arr.ndim if num > 0: kept_slice[axis] = '0:{}'.format(-num) kept = eval('arr[' + ', '.join(kept_slice) + ']') result = da.concatenate([filled, kept], axis=axis) else: kept_slice[axis] = '{}:'.format(-num) kept = eval('arr[' + ', '.join(kept_slice) + ']') result = da.concatenate([kept, filled], axis=axis) return result
def test_full(): d = da.full((3, 4), 2, chunks=((2, 1), (2, 2))) assert d.chunks == ((2, 1), (2, 2)) assert eq(d, np.full((3, 4), 2))
def rolling_window(a, axis, window, center, fill_value): """ Dask's equivalence to np.utils.rolling_window """ orig_shape = a.shape # inputs for ghost if axis < 0: axis = a.ndim + axis depth = {d: 0 for d in range(a.ndim)} depth[axis] = int(window / 2) # For evenly sized window, we need to crop the first point of each block. offset = 1 if window % 2 == 0 else 0 if depth[axis] > min(a.chunks[axis]): raise ValueError( "For window size %d, every chunk should be larger than %d, " "but the smallest chunk size is %d. Rechunk your array\n" "with a larger chunk size or a chunk size that\n" "more evenly divides the shape of your array." % (window, depth[axis], min(a.chunks[axis]))) # Although dask.ghost pads values to boundaries of the array, # the size of the generated array is smaller than what we want # if center == False. if center: start = int(window / 2) # 10 -> 5, 9 -> 4 end = window - 1 - start else: start, end = window - 1, 0 pad_size = max(start, end) + offset - depth[axis] drop_size = 0 # pad_size becomes more than 0 when the ghosted array is smaller than # needed. In this case, we need to enlarge the original array by padding # before ghosting. if pad_size > 0: if pad_size < depth[axis]: # Ghosting requires each chunk larger than depth. If pad_size is # smaller than the depth, we enlarge this and truncate it later. drop_size = depth[axis] - pad_size pad_size = depth[axis] shape = list(a.shape) shape[axis] = pad_size chunks = list(a.chunks) chunks[axis] = (pad_size, ) fill_array = da.full(shape, fill_value, dtype=a.dtype, chunks=chunks) a = da.concatenate([fill_array, a], axis=axis) boundary = {d: fill_value for d in range(a.ndim)} # create ghosted arrays ag = da.ghost.ghost(a, depth=depth, boundary=boundary) # apply rolling func def func(x, window, axis=-1): x = np.asarray(x) rolling = nputils._rolling_window(x, window, axis) return rolling[(slice(None), ) * axis + (slice(offset, None), )] chunks = list(a.chunks) chunks.append(window) out = ag.map_blocks(func, dtype=a.dtype, new_axis=a.ndim, chunks=chunks, window=window, axis=axis) # crop boundary. index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]), ) return out[index]
def test_full(): a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8') assert (a.compute() == 100).all() assert a.dtype == a.compute(scheduler='sync').dtype == 'i8'
def test_full(): a = da.full((3, 3), 100, chunks=(2, 2), dtype='i8') assert (a.compute() == 100).all() assert a._dtype == a.compute(get=dask.get).dtype == 'i8'
def test_full(): d = da.full((3, 4), 2, blockdims=((2, 1), (2, 2))) assert d.blockdims == ((2, 1), (2, 2)) assert eq(d, np.full((3, 4), 2))