def test_get_tiles_comparison(ltl_ctx, merlin_detector_sim_ptycho, merlin_ds_ptycho_flat): merlin_ds = merlin_ds_ptycho_flat da, _ = make_dask_array(merlin_ds) p = next(merlin_ds.get_partitions()) host, port = merlin_detector_sim_ptycho aq = ltl_ctx.prepare_acquisition( 'merlin', trigger=None, nav_shape=merlin_ds.shape.nav, host=host, port=port, drain=False, pool_size=4, # Match live partition size with offline # partition size to avoid read amplification frames_per_partition=p.slice.shape[0]) s = TilingScheme.make_for_shape(tileshape=Shape((7, 256, 256), sig_dims=2), dataset_shape=aq.shape) with ltl_ctx._do_acquisition(aq, None): for p in aq.get_partitions(): part_data = da[p.slice.get()].compute() print(f"comparing partition {p}") for tile in p.get_tiles(s): print( f"comparing tile {tile.tile_slice} in partition {p.slice}") tile_data = part_data[tile.tile_slice.shift(p.slice).get()] assert np.allclose(tile, tile_data)
def test_dask_array(): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet( data=data, tileshape=(16, 16, 16), num_partitions=2, ) (da, workers) = make_dask_array(dataset) assert np.allclose(da, data) assert np.allclose(da.sum().compute(workers=workers), data.sum()) assert da.shape == data.shape
def test_dask_array_with_roi_1(): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet( data=data, tileshape=(16, 16, 16), num_partitions=2, ) roi = np.zeros(dataset.shape.nav, dtype=bool) roi[0, 0] = True (da, workers) = make_dask_array(dataset, roi=roi) assert np.allclose( da.compute(workers=workers, scheduler='single-threaded'), data[0, 0]) assert da.shape == (1, 16, 16)
def test_dask_array_2(dask_executor): # NOTE: keep in sync with the example in docs/source/api.rst! # Construct a Dask array from the dataset # The second return value contains information # on workers that hold parts of a dataset in local # storage to ensure optimal data locality ctx = Context(executor=dask_executor) dataset = ctx.load("memory", datashape=(16, 16, 16), sig_dims=2) dask_array, workers = make_dask_array(dataset) # Use the Dask.distributed client of LiberTEM, since it may not be # the default client: ctx.executor.client.compute(dask_array.sum(axis=(-1, -2))).result()
def test_dask_array_with_roi_2(): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet( data=data, tileshape=(16, 16, 16), num_partitions=2, ) sparse_roi = np.random.choice([True, False], size=dataset.shape.nav, p=[0.1, 0.9]) (da, workers) = make_dask_array(dataset, roi=sparse_roi) assert np.allclose( da.compute(workers=workers, scheduler='single-threaded'), data[sparse_roi]) assert da.shape == (np.count_nonzero(sparse_roi), 16, 16)
def _mk_ds(method, ctx, raw_ds): filename = raw_ds._path shape = tuple(raw_ds.shape) dtype = raw_ds.dtype if method == 'from_array': arr = da.from_array( np.memmap(filename, shape=shape, dtype=dtype, mode='r')) ds = ctx.load('dask', arr, sig_dims=2) elif method == 'native': ds = raw_ds elif method == 'delayed': arr = _mk_dask_from_delayed(shape=shape, dtype=dtype, chunking=(4, -1, 64, -1), filename=filename) ds = ctx.load('dask', arr, sig_dims=2) elif method == 'make_dask_array': arr, _ = make_dask_array(raw_ds, dtype=raw_ds.dtype) ds = ctx.load('dask', arr, sig_dims=2) else: raise ValueError(f"Unknown method {method}") return ds
def doit(): # There seems to be some form of caching if the sum is calculated # repeatedly on the same dask arrays dask_array, workers = make_dask_array(my_ds, dtype=my_ds.dtype) assert len(dask_array.shape) == 4 return dask_array.sum(axis=(0, 1)).compute(resources=resources)