def _slice_padded(self, _bounds): pads = (max(-_bounds[0], 0), max(-_bounds[1], 0), max(_bounds[2]-self.shape[2], 0), max(_bounds[3]-self.shape[1], 0)) bounds = (max(_bounds[0], 0), max(_bounds[1], 0), max(min(_bounds[2], self.shape[2]), 0), max(min(_bounds[3], self.shape[1]), 0)) result = self[:, bounds[1]:bounds[3], bounds[0]:bounds[2]] if pads[0] > 0: dims = (result.shape[0], result.shape[1], pads[0]) result = da.concatenate([da.zeros(dims, chunks=dims, dtype=result.dtype), result], axis=2) if pads[2] > 0: dims = (result.shape[0], result.shape[1], pads[2]) result = da.concatenate([result, da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=2) if pads[1] > 0: dims = (result.shape[0], pads[1], result.shape[2]) result = da.concatenate([da.zeros(dims, chunks=dims, dtype=result.dtype), result], axis=1) if pads[3] > 0: dims = (result.shape[0], pads[3], result.shape[2]) result = da.concatenate([result, da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=1) return (result, _bounds[0], _bounds[1])
def test_corrected_green(self): """Test adjusting the 'green' band.""" import xarray as xr import dask.array as da import numpy as np from satpy.composites.ahi import GreenCorrector from pyresample.geometry import AreaDefinition rows = 5 cols = 10 area = AreaDefinition( 'test', 'test', 'test', {'proj': 'eqc', 'lon_0': 0.0, 'lat_0': 0.0}, cols, rows, (-20037508.34, -10018754.17, 20037508.34, 10018754.17)) comp = GreenCorrector('green', prerequisites=(0.51, 0.85), standard_name='toa_bidirectional_reflectance') c01 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.25, dims=('y', 'x'), attrs={'name': 'C01', 'area': area}) c02 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.30, dims=('y', 'x'), attrs={'name': 'C02', 'area': area}) res = comp((c01, c02)) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) self.assertEqual(res.attrs['name'], 'green') self.assertEqual(res.attrs['standard_name'], 'toa_bidirectional_reflectance') data = res.compute() np.testing.assert_allclose(data, 0.2575)
def test_setitem_with_different_chunks_preserves_shape(params): """ Reproducer for https://github.com/dask/dask/issues/3730. Mutating based on an array with different chunks can cause new chunks to be used. We need to ensure those new chunk sizes are applied to the mutated array, otherwise the array won't generate the correct keys. """ array_size, chunk_size1, chunk_size2 = params x = da.zeros(array_size, chunks=chunk_size1) mask = da.zeros(array_size, chunks=chunk_size2) x[mask] = 1 result = x.compute() assert x.shape == result.shape
def test_3d_ewa(self, ll2cr, fornav): """Test EWA with a 3D dataset.""" import numpy as np import dask.array as da import xarray as xr from satpy.resample import resample_dataset from pyresample.geometry import SwathDefinition, AreaDefinition from pyresample.utils import proj4_str_to_dict lons = xr.DataArray(da.zeros((10, 10), chunks=5)) lats = xr.DataArray(da.zeros((10, 10), chunks=5)) ll2cr.return_value = (100, np.zeros((10, 10), dtype=np.float32), np.zeros((10, 10), dtype=np.float32)) fornav.return_value = ([100 * 200] * 3, [np.zeros((200, 100), dtype=np.float32)] * 3) sgd = SwathDefinition(lons, lats) proj_dict = proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 ' '+lon_0=-95. +lat_0=25 +lat_1=25 ' '+units=m +no_defs') tgd = AreaDefinition( 'test', 'test', 'test', proj_dict, x_size=100, y_size=200, area_extent=(-1000., -1500., 1000., 1500.), ) input_data = xr.DataArray( da.zeros((3, 10, 10), chunks=5, dtype=np.float32), dims=('bands', 'y', 'x'), attrs={'area': sgd, 'test': 'test'}) new_data = resample_dataset(input_data, tgd, resampler='ewa') self.assertTupleEqual(new_data.shape, (3, 200, 100)) self.assertEqual(new_data.dtype, np.float32) self.assertEqual(new_data.attrs['test'], 'test') self.assertIs(new_data.attrs['area'], tgd) # make sure we can actually compute everything new_data.compute() previous_calls = ll2cr.call_count # resample a different dataset and make sure cache is used input_data = xr.DataArray( da.zeros((3, 10, 10), chunks=5, dtype=np.float32), dims=('bands', 'y', 'x'), attrs={'area': sgd, 'test': 'test'}) new_data = resample_dataset(input_data, tgd, resampler='ewa') self.assertEqual(ll2cr.call_count, previous_calls) new_data.compute()
def test_index_with_int_dask_array_nanchunks(chunks): # Slice by array with nan-sized chunks a = da.arange(-2, 3, chunks=chunks) assert_eq(a[a.nonzero()], np.array([-2, -1, 1, 2])) # Edge case: the nan-sized chunks resolve to size 0 a = da.zeros(5, chunks=chunks) assert_eq(a[a.nonzero()], np.array([]))
def test_expand_without_dims(self): from satpy.resample import NativeResampler import numpy as np import dask.array as da from xarray import DataArray from pyresample.geometry import AreaDefinition from pyresample.utils import proj4_str_to_dict ds1 = DataArray(da.zeros((100, 50), chunks=85)) proj_dict = proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 ' '+lon_0=-95. +lat_0=25 +lat_1=25 ' '+units=m +no_defs') target = AreaDefinition( 'test', 'test', 'test', proj_dict, x_size=100, y_size=200, area_extent=(-1000., -1500., 1000., 1500.), ) # source geo def doesn't actually matter resampler = NativeResampler(None, target) new_arr = resampler.resample(ds1) self.assertEqual(new_arr.shape, (200, 100)) new_arr2 = resampler.resample(ds1.compute()) self.assertTrue(np.all(new_arr == new_arr2))
def test_0d_array(): x = da.mean(da.ones(4, chunks=4), axis=0).compute() y = np.mean(np.ones(4)) assert type(x) == type(y) x = da.sum(da.zeros(4, chunks=1)).compute() y = np.sum(np.zeros(4)) assert type(x) == type(y)
def test_fuse_roots(): x = da.ones(10, chunks=(2,)) y = da.zeros(10, chunks=(2,)) z = (x + 1) + (2 * y ** 2) (zz,) = dask.optimize(z) # assert len(zz.dask) == 5 assert sum(map(dask.istask, zz.dask.values())) == 5 # there are some aliases assert_eq(zz, z)
def test_get_signal_chunk_slice_not_square(sig_chunks, index, expected): data = da.zeros((2, 2, 10, 20), chunks=(2, 2, *sig_chunks[::-1])) if expected == 'error': with pytest.raises(ValueError): chunk_slice = get_signal_chunk_slice(index, data.chunks) else: chunk_slice = get_signal_chunk_slice(index, data.chunks) assert chunk_slice == expected
def execute_between_time(op, data, lower, upper, **kwargs): # TODO - Can this be done better? indexer = ((data.dt.time.astype(str) >= lower) & (data.dt.time.astype(str) <= upper)).to_dask_array(True) result = da.zeros(len(data), dtype=np.bool_) result[indexer] = True return dd.from_array(result)
def test_map_inplace_data_changing(self): s = _lazy_signals.LazySignal2D( da.zeros((6, 6, 8, 8), chunks=(2, 2, 4, 4))) s.__call__() assert len(s._cache_dask_chunk.shape) == 4 s.map(np.sum, axis=1, ragged=False, inplace=True) s.__call__() assert len(s._cache_dask_chunk.shape) == 3
def test_simulated_green(self): """Test creating a fake 'green' band.""" import dask.array as da import numpy as np import xarray as xr from pyresample.geometry import AreaDefinition from satpy.composites.abi import SimulatedGreen rows = 5 cols = 10 area = AreaDefinition('test', 'test', 'test', { 'proj': 'eqc', 'lon_0': 0.0, 'lat_0': 0.0 }, cols, rows, (-20037508.34, -10018754.17, 20037508.34, 10018754.17)) comp = SimulatedGreen('green', prerequisites=('C01', 'C02', 'C03'), standard_name='toa_bidirectional_reflectance') c01 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.25, dims=('y', 'x'), attrs={ 'name': 'C01', 'area': area }) c02 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.30, dims=('y', 'x'), attrs={ 'name': 'C02', 'area': area }) c03 = xr.DataArray(da.zeros((rows, cols), chunks=25) + 0.35, dims=('y', 'x'), attrs={ 'name': 'C03', 'area': area }) res = comp((c01, c02, c03)) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) self.assertEqual(res.attrs['name'], 'green') self.assertEqual(res.attrs['standard_name'], 'toa_bidirectional_reflectance') data = res.compute() np.testing.assert_allclose(data, 0.28025)
def const_features_for_single_grid_single_file(grid_indx, wind_grid_indx, data): client = Client() dims = data['no2'].shape ntime = dims[0] - 1 nvel = dims[2] data_dict = dict() data_hours = da.array(data['hour'][1:]) data_dict['hour'] = da.repeat(data_hours[:, :], nvel, axis=1) data_dict['date'] = da.zeros((ntime, nvel)) + da.mean(data['date'][:]) data_dict['date'] = data_dict['date'] cum_ic_flash = da.array(data['IC_FLASHCOUNT'][:, grid_indx, :]) cum_cg_flash = da.array(data['CG_FLASHCOUNT'][:, grid_indx, :]) data_dict['IC_FLASHCOUNT'] = da.repeat(cum_ic_flash[1:, :] - cum_ic_flash[:-1, :], nvel, axis=1) data_dict['CG_FLASHCOUNT'] = da.repeat(cum_cg_flash[1:, :] - cum_cg_flash[:-1, :], nvel, axis=1) e_no_lower = da.array(data['E_NO'])[1:, grid_indx, :] e_no_upper = da.zeros((ntime, nvel - e_no_lower.shape[1])) data_dict['E_NO'] = da.concatenate([e_no_lower, e_no_upper], axis=1) data_dict['U'] = (data['U'][1:, wind_grid_indx[0][0], :] + data['U'][1:, wind_grid_indx[0][1], :])/2 data_dict['V'] = (data['V'][1:, wind_grid_indx[1][0], :] + data['V'][1:, wind_grid_indx[1][1], :])/2 match_vars = ['no2', 'pres', 'temp', 'CLDFRA'] print('Variables read directly from wrf: {}'.format(match_vars[:])) for var in match_vars: data_dict[var] = da.array(data[var])[1:, grid_indx, :] reduce_dim_vars = ['elev', 'W'] print('Variables average vertically: {}'.format(reduce_dim_vars[:])) for var in reduce_dim_vars: this_value = da.array(data[var])[1:, grid_indx, :] data_dict[var] = (this_value[:, 1:] + this_value[:, :-1]) / 2 add_dim_vars = ['COSZEN', 'PBLH', 'LAI', 'HGT', 'SWDOWN', 'GLW'] print('Variables add vertical layers: {}'.format(add_dim_vars[:])) for var in add_dim_vars: this_value = da.array(data[var])[1:, grid_indx, :] data_dict[var] = da.repeat(this_value, nvel, axis=1) print('Key of dict:{}'.format(data_dict.keys())) save_arr = [] for var in data_dict.keys(): data_dict[var] = data_dict[var].flatten() save_arr.append(data_dict[var]) save_arr = da.array(save_arr).compute() return save_arr
def test_convert_proj4_string(self): import xarray as xr import dask.array as da from satpy.writers.mitiff import MITIFFWriter from pyresample.geometry import AreaDefinition checks = [{ 'epsg': '+init=EPSG:32631', 'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=3 +k=0.9996 ' '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 ' '+y_0=1515.000000\n') }, { 'epsg': '+init=EPSG:32632', 'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=9 +k=0.9996 ' '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 ' '+y_0=1515.000000\n') }, { 'epsg': '+init=EPSG:32633', 'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=15 +k=0.9996 ' '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 ' '+y_0=1515.000000\n') }, { 'epsg': '+init=EPSG:32634', 'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=21 +k=0.9996 ' '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 ' '+y_0=1515.000000\n') }, { 'epsg': '+init=EPSG:32635', 'proj4': (' Proj string: +proj=etmerc +lat_0=0 +lon_0=27 +k=0.9996 ' '+ellps=WGS84 +datum=WGS84 +units=km +x_0=501020.000000 ' '+y_0=1515.000000\n') }] for check in checks: area_def = AreaDefinition( 'test', 'test', 'test', check['epsg'], 100, 200, (-1000., -1500., 1000., 1500.), ) ds1 = xr.DataArray(da.zeros((10, 20), chunks=20), dims=('y', 'x'), attrs={'area': area_def}) w = MITIFFWriter(filename='dummy.tif', base_dir=self.base_dir) proj4_string = w._add_proj4_string(ds1, ds1) self.assertEqual(proj4_string, check['proj4'])
def _get_test_datasets_2d(): """Create a single 2D test dataset.""" ds1 = xr.DataArray(da.zeros((100, 200), chunks=50), dims=('y', 'x'), attrs={ 'name': 'test', 'start_time': datetime.utcnow() }) return [ds1]
def test_rotate_diffraction_keep_shape(self): shape = (7, 5, 4, 15) s = Diffraction2D(np.zeros(shape)) s_rot = s.rotate_diffraction(angle=45) assert s.axes_manager.shape == s_rot.axes_manager.shape s_lazy = LazyDiffraction2D(da.zeros(shape, chunks=(1, 1, 1, 1))) s_rot_lazy = s_lazy.rotate_diffraction(angle=45) assert s_lazy.axes_manager.shape == s_rot_lazy.axes_manager.shape
def test_2d_input_2d_output(self, shape): dask_array = da.zeros(shape, chunks=(10, 10, 20, 20)) s = hs.signals.Signal2D(dask_array).as_lazy() def a_function(image): return np.zeros((2, 3)) s_out = s.map(a_function, inplace=False, lazy_output=True) assert s.data.shape[:-2] + (2, 3) == s_out.data.shape
def pad_chunks(darray, chunklen): ''' make sure chunks are the right shape''' padlen = chunklen - np.mod(darray.shape[0], chunklen) if padlen == 0: return darray else: pad = da.zeros((padlen, ), dtype=np.complex64) padded = da.concatenate([darray, pad], axis=0) return padded
def _create_test_dataset(name, shape=DEFAULT_SHAPE, area=None): """Create a test DataArray object.""" import xarray as xr import dask.array as da import numpy as np return xr.DataArray( da.zeros(shape, dtype=np.float32, chunks=shape), dims=('y', 'x'), attrs={'name': name, 'area': area})
def test_chunking_saving_lazy_specify(self, tmp_path, file): filename = tmp_path / file s = Signal2D(da.zeros((50, 100, 100))).as_lazy() # specify chunks chunks = (50, 10, 10) s.data = s.data.rechunk([50, 25, 25]) s.save(filename, chunks=chunks) s1 = load(filename, lazy=True) assert tuple([c[0] for c in s1.data.chunks]) == chunks
def common_test_setup(self, shape_3d=(0, 2), data_chunks=None): # Construct a basic testcase with all-lazy mesh_cube and submesh_cubes # full-mesh cube shape is 'shape_3d' # data_chunks sets chunking of source cube, (else all-1-chunk) n_outer, n_z = shape_3d n_mesh = 20 mesh = sample_mesh(n_nodes=20, n_edges=0, n_faces=n_mesh) mesh_cube = sample_mesh_cube(n_z=n_z, mesh=mesh) # Fix index-coord name to the expected default for recombine_submeshes. mesh_cube.coord("i_mesh_face").rename("i_mesh_index") if n_outer: # Crudely merge a set of copies to build an outer dimension. mesh_cube.add_aux_coord(AuxCoord([0], long_name="outer")) meshcubes_2d = [] for i_outer in range(n_outer): cube = mesh_cube.copy() cube.coord("outer").points = np.array([i_outer]) meshcubes_2d.append(cube) mesh_cube = CubeList(meshcubes_2d).merge_cube() if not data_chunks: data_chunks = mesh_cube.shape[:-1] + (-1, ) mesh_cube.data = da.zeros(mesh_cube.shape, chunks=data_chunks) n_regions = 4 # it doesn't divide neatly region_len = n_mesh // n_regions i_points = np.arange(n_mesh) region_inds = [ np.where((i_points // region_len) == i_region) for i_region in range(n_regions) ] # Disturb slightly to ensure some gaps + some overlaps region_inds = [list(indarr[0]) for indarr in region_inds] region_inds[2] = region_inds[2][:-2] # missing points region_inds[3] += region_inds[1][:2] # duplicates self.mesh_cube = mesh_cube self.region_inds = region_inds self.region_cubes = [mesh_cube[..., inds] for inds in region_inds] for i_cube, cube in enumerate(self.region_cubes): for i_z in range(n_z): # Set data='z' ; don't vary over other dimensions. cube.data[..., i_z, :] = i_cube + 1000 * i_z + 1 cube.data = cube.lazy_data() # Also construct an array to match the expected result (2d cases only). # basic layer showing region allocation (large -ve values for missing) expected = np.array([1.0, 1, 1, 1, 1] + [4, 4] # points in #1 overlapped by #3 + [2, 2, 2] + [3, 3, 3] + [-99999, -99999] # missing points + [4, 4, 4, 4, 4]) # second layer should be same but +1000. # NOTE: only correct if shape_3d=None; no current need to generalise this. expected = np.stack([expected, expected + 1000]) # convert to masked array with missing points. expected = np.ma.masked_less(expected, 0) self.expected_result = expected
def test_get_signal_chunk_slice(sig_chunks, index, expected): ndim = 1 + len(index) data = da.zeros([20]*ndim, chunks=(10, *sig_chunks[::-1])) if expected == 'error': with pytest.raises(ValueError): chunk_slice = get_signal_chunk_slice(index, data.chunks) else: chunk_slice = get_signal_chunk_slice(index, data.chunks) assert chunk_slice == expected
def get_dbz(daskArray, use_varint=False, use_liqskin=False, omp_threads=1): t = fetch_variable(daskArray, "T") p = fetch_variable(daskArray, "P") pb = fetch_variable(daskArray, "PB") qv = fetch_variable(daskArray, "QVAPOR") qr = fetch_variable(daskArray, "QRAIN") dtype = t.dtype try: qs = fetch_variable(daskArray, "QSNOW") except KeyError: qs = da.zeros(qv.shape, qv.dtype) try: qgraup = fetch_variable(daskArray, "QGRAUP") except KeyError: qgraup = da.zeros(qv.shape, qv.dtype) full_t = map_blocks(wrapped_add, t, Constants.T_BASE, dtype=t.dtype) full_p = map_blocks(wrapped_add, p, pb, dtype=p.dtype) tk = map_blocks(tk_wrap, full_p, full_t, omp_threads, dtype=p.dtype) sn0 = 1 if qs.any() else 0 ivarint = 1 if use_varint else 0 iliqskin = 1 if use_liqskin else 0 del (t) del (p) del (pb) dbz = map_blocks(dbz_wrap, full_p, tk, qv, qr, qs, qgraup, sn0, ivarint, iliqskin, omp_threads, dtype=dtype) return dbz
def test_hncc_dnb(self): """Test the 'hncc_dnb' compositor.""" import dask.array as da import numpy as np import xarray as xr from pyresample.geometry import AreaDefinition from satpy.composites.viirs import NCCZinke rows = 5 cols = 10 area = AreaDefinition( 'test', 'test', 'test', {'proj': 'eqc', 'lon_0': 0.0, 'lat_0': 0.0}, cols, rows, (-20037508.34, -10018754.17, 20037508.34, 10018754.17)) comp = NCCZinke('hncc_dnb', prerequisites=('dnb',), standard_name='toa_outgoing_radiance_per_' 'unit_wavelength') dnb = np.zeros((rows, cols)) + 0.25 dnb[3, :] += 0.25 dnb[4:, :] += 0.5 dnb = da.from_array(dnb, chunks=25) c01 = xr.DataArray(dnb, dims=('y', 'x'), attrs={'name': 'DNB', 'area': area}) sza = np.zeros((rows, cols)) + 70.0 sza[:, 3] += 20.0 sza[:, 4:] += 45.0 sza = da.from_array(sza, chunks=25) c02 = xr.DataArray(sza, dims=('y', 'x'), attrs={'name': 'solar_zenith_angle', 'area': area}) lza = np.zeros((rows, cols)) + 70.0 lza[:, 3] += 20.0 lza[:, 4:] += 45.0 lza = da.from_array(lza, chunks=25) c03 = xr.DataArray(lza, dims=('y', 'x'), attrs={'name': 'lunar_zenith_angle', 'area': area}) mif = xr.DataArray(da.zeros((5,), chunks=5) + 0.1, dims=('y',), attrs={'name': 'moon_illumination_fraction', 'area': area}) res = comp((c01, c02, c03, mif)) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) self.assertEqual(res.attrs['name'], 'hncc_dnb') self.assertEqual(res.attrs['standard_name'], 'ncc_radiance') data = res.compute() unique = np.unique(data) np.testing.assert_allclose( unique, [3.48479712e-04, 6.96955799e-04, 1.04543189e-03, 4.75394738e-03, 9.50784532e-03, 1.42617433e-02, 1.50001560e+03, 3.00001560e+03, 4.50001560e+03])
def parse(self, request: Metamorphing, settings: dict) -> List[Tuple[models.Model, str]]: rescale = True array = request.representation.array if "z" in array.dims: array = array.max(dim="z") if "t" in array.dims: array = array.sel(t=0) if "c" in array.dims: # Check if we have to convert to monoimage if array.c.size == 1: array = array.sel(c=0) if rescale == True: self.progress("Rescaling") min, max = array.min(), array.max() image = np.interp(array, (min, max), (0, 255)).astype(np.uint8) else: image = (array * 255).astype(np.uint8) from matplotlib import cm mapped = cm.viridis(image) finalarray = (mapped * 255).astype(np.uint8) else: if array.c.size >= 3: array = array.sel(c=[0, 1, 2]).data elif array.c.size == 2: # Two Channel Image will be displayed with a Dark Channel array = da.concatenate([ array.sel(c=[0, 1]).data, da.zeros((array.x.size, array.y.size, 1)) ], axis=2) if rescale == True: self.progress("Rescaling") min, max = array.min(), array.max() image = np.interp(array.compute(), (min, max), (0, 255)).astype(np.uint8) else: image = (array * 255).astype(np.uint8) finalarray = image else: raise NotImplementedError( "Image Does not provide the channel Argument") display = Display.objects.from_xarray_and_request(finalarray, request) return [(display, "create")]
def test_cube_arg(self): """Check that a input lazy cube will be realised before return.""" cube = Cube(da.zeros((1, 1), chunks=(1, 1)), long_name="dummy") self.assertTrue(cube.has_lazy_data()) result = inputcube_nolazy(cube) self.coerce_patch.assert_called_with(improver.utilities.load.load_cube, cube, no_lazy_load=True) self.assertFalse(cube.has_lazy_data()) self.assertEqual(result, "return")
def _get_test_datasets_3d(): """Create a single 3D test dataset.""" ds1 = xr.DataArray(da.zeros((3, 100, 200), chunks=50), dims=('bands', 'y', 'x'), coords={'bands': ['R', 'G', 'B']}, attrs={ 'name': 'test', 'start_time': datetime.utcnow() }) return [ds1]
def test_changed_data_trigger(self): s = _lazy_signals.LazySignal2D( da.zeros((6, 6, 8, 8), chunks=(2, 2, 4, 4))) position = s.axes_manager._getitem_tuple s._get_cache_dask_chunk(position) assert s._cache_dask_chunk is not None assert s._cache_dask_chunk_slice is not None s.events.data_changed.trigger(None) assert s._cache_dask_chunk is None assert s._cache_dask_chunk_slice is None
def test_notifications_error_with_threading(make_napari_viewer): """Test notifications of `threading` threads, using a dask example.""" random_image = da.random.random(size=(50, 50)) with notification_manager: viewer = make_napari_viewer() viewer.add_image(random_image) result = da.divide(random_image, da.zeros(50, 50)) viewer.add_image(result) assert len(notification_manager.records) >= 1 notification_manager.records = []
def __init__(self, parameters: Parameter = None): self._type = 'acoustic' self._ndim = 2 self._nx, self._nz = parameters['number-of-cells'][0], parameters[ 'number-of-cells'][1] self._sxx = da.zeros((self._nx, self._nz), dtype=DTYPE) self._vx = da.zeros_like(self._sxx) self._vz = da.zeros_like(self._sxx)
def setup(self): A = 400 B = 800 a = da.ones((A, B, 2), chunks=1) b = da.zeros((A, B, 1), chunks=1) c = a + b g = c.__dask_graph__() layer = g.layers[c.name] self.layer = layer
def _slice_padded(self, _bounds): pads = (max(-_bounds[0], 0), max(-_bounds[1], 0), max(_bounds[2] - self.shape[2], 0), max(_bounds[3] - self.shape[1], 0)) bounds = (max(_bounds[0], 0), max(_bounds[1], 0), max(min(_bounds[2], self.shape[2]), 0), max(min(_bounds[3], self.shape[1]), 0)) # NOTE: image is a dask array that implements daskmeta interface (via op) result = self[:, bounds[1]:bounds[3], bounds[0]:bounds[2]] if pads[0] > 0: dims = (result.shape[0], result.shape[1], pads[0]) result = da.concatenate( [da.zeros(dims, chunks=dims, dtype=result.dtype), result], axis=2) if pads[2] > 0: dims = (result.shape[0], result.shape[1], pads[2]) result = da.concatenate( [result, da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=2) if pads[1] > 0: dims = (result.shape[0], pads[1], result.shape[2]) result = da.concatenate( [da.zeros(dims, chunks=dims, dtype=result.dtype), result], axis=1) if pads[3] > 0: dims = (result.shape[0], pads[3], result.shape[2]) result = da.concatenate( [result, da.zeros(dims, chunks=dims, dtype=result.dtype)], axis=1) image = super(DaskImage, self.__class__).__new__(self.__class__, result.dask, result.name, result.chunks, result.dtype, result.shape) image.__geo_transform__ = self.__geo_transform__ + (_bounds[0], _bounds[1]) return image
def test_atop_stacked_new_axes_same_dim(concatenate): def f(x): return x[..., None] * np.ones((1, 7)) x = da.ones(5, chunks=2) y = da.zeros(5, chunks=2) a = atop(f, 'aq', x, 'a', new_axes={'q': 7}, concatenate=concatenate, dtype=x.dtype) b = atop(f, 'aq', y, 'a', new_axes={'q': 7}, concatenate=concatenate, dtype=x.dtype) c = a + b assert c.chunks == ((2, 2, 1), (7,)) assert_eq(c, np.ones((5, 7)))
def abi_l1b_c01_data_array(goes_east_conus_area_def) -> xr.DataArray: return xr.DataArray( da.zeros((3000, 5000), chunks=4096), dims=("y", "x"), attrs={ "area": goes_east_conus_area_def, "platform_name": "goes16", "sensor": "abi", "name": "C01", }, )
def _get_test_datasets(self): import xarray as xr import dask.array as da from datetime import datetime ds1 = xr.DataArray(da.zeros((100, 200), chunks=50), dims=('y', 'x'), attrs={ 'name': 'test', 'start_time': datetime.utcnow() }) return [ds1]
def _get_test_datasets(self): import xarray as xr import dask.array as da from datetime import datetime ds1 = xr.DataArray( da.zeros((100, 200), chunks=50), dims=('y', 'x'), attrs={'name': 'test', 'start_time': datetime.utcnow()} ) return [ds1]
def test_mixed_output_type(): y = da.random.random((10, 10), chunks=(5, 5)) y[y < 0.4] = 0 y = da.ma.masked_equal(y, 0) x = da.zeros((10, 1), chunks=(5, 1)) z = da.concatenate([x, y], axis=1) assert z.shape == (10, 11) zz = z.compute() assert isinstance(zz, np.ma.masked_array)
def test_mixed_output_type(): y = da.random.random((10, 10), chunks=(5, 5)) y[y < 0.8] = 0 y = y.map_blocks(sparse.COO.from_numpy) x = da.zeros((10, 1), chunks=(5, 1)) z = da.concatenate([x, y], axis=1) assert z.shape == (10, 11) zz = z.compute() assert isinstance(zz, sparse.COO) assert zz.nnz == y.compute().nnz
def test_hncc_dnb(self): """Test the 'hncc_dnb' compositor.""" import xarray as xr import dask.array as da import numpy as np from satpy.composites.viirs import NCCZinke from pyresample.geometry import AreaDefinition rows = 5 cols = 10 area = AreaDefinition( 'test', 'test', 'test', {'proj': 'eqc', 'lon_0': 0.0, 'lat_0': 0.0}, cols, rows, (-20037508.34, -10018754.17, 20037508.34, 10018754.17)) comp = NCCZinke('hncc_dnb', prerequisites=('dnb',), standard_name='toa_outgoing_radiance_per_' 'unit_wavelength') dnb = np.zeros((rows, cols)) + 0.25 dnb[3, :] += 0.25 dnb[4:, :] += 0.5 dnb = da.from_array(dnb, chunks=25) c01 = xr.DataArray(dnb, dims=('y', 'x'), attrs={'name': 'DNB', 'area': area}) sza = np.zeros((rows, cols)) + 70.0 sza[3, :] += 20.0 sza[4:, :] += 45.0 sza = da.from_array(sza, chunks=25) c02 = xr.DataArray(sza, dims=('y', 'x'), attrs={'name': 'solar_zenith_angle', 'area': area}) lza = da.from_array(sza, chunks=25) c03 = xr.DataArray(lza, dims=('y', 'x'), attrs={'name': 'lunar_zenith_angle', 'area': area}) mif = xr.DataArray(da.zeros((5,), chunks=5) + 0.1, dims=('y',), attrs={'name': 'moon_illumination_fraction', 'area': area}) res = comp((c01, c02, c03, mif)) self.assertIsInstance(res, xr.DataArray) self.assertIsInstance(res.data, da.Array) self.assertEqual(res.attrs['name'], 'hncc_dnb') self.assertEqual(res.attrs['standard_name'], 'ncc_radiance') data = res.compute() unique = np.unique(data) np.testing.assert_allclose( unique, [3.484797e-04, 9.507845e-03, 4.500016e+03])
def test_rechunk_bad_keys(): x = da.zeros((2, 3, 4), chunks=1) assert x.rechunk({-1: 4}).chunks == ((1, 1), (1, 1, 1), (4,)) assert x.rechunk({-x.ndim: 2}).chunks == ((2,), (1, 1, 1), (1, 1, 1, 1)) with pytest.raises(TypeError) as info: x.rechunk({'blah': 4}) assert 'blah' in str(info.value) with pytest.raises(ValueError) as info: x.rechunk({100: 4}) assert '100' in str(info.value) with pytest.raises(ValueError) as info: x.rechunk({-100: 4}) assert '-100' in str(info.value)
def test_expand_reduce(self): from satpy.resample import NativeResampler import numpy as np import dask.array as da d_arr = da.zeros((6, 20), chunks=4) new_arr = NativeResampler.expand_reduce(d_arr, {0: 2., 1: 2.}) self.assertEqual(new_arr.shape, (12, 40)) new_arr = NativeResampler.expand_reduce(d_arr, {0: .5, 1: .5}) self.assertEqual(new_arr.shape, (3, 10)) self.assertRaises(ValueError, NativeResampler.expand_reduce, d_arr, {0: 1. / 3, 1: 1.}) new_arr = NativeResampler.expand_reduce(d_arr, {0: 1., 1: 1.}) self.assertEqual(new_arr.shape, (6, 20)) self.assertIs(new_arr, d_arr) self.assertRaises(ValueError, NativeResampler.expand_reduce, d_arr, {0: 0.333323423, 1: 1.}) self.assertRaises(ValueError, NativeResampler.expand_reduce, d_arr, {0: 1.333323423, 1: 1.}) n_arr = np.zeros((6, 20)) new_arr = NativeResampler.expand_reduce(n_arr, {0: 2., 1: 1.0}) self.assertTrue(np.all(new_arr.compute()[::2, :] == n_arr))
def test_slicing_consistent_names_after_normalization(): x = da.zeros(10, chunks=(5,)) assert same_keys(x[0:], x[:10]) assert same_keys(x[0:], x[0:10]) assert same_keys(x[0:], x[0:10:1]) assert same_keys(x[:], x[0:10:1])
def test_slice_list_then_None(): x = da.zeros(shape=(5, 5), chunks=(3, 3)) y = x[[2, 1]][None] assert_eq(y, np.zeros((1, 2, 5)))
def get_reflectance(self, sun_zenith, sat_zenith, azidiff, bandname, redband=None): """Get the reflectance from the three sun-sat angles""" # Get wavelength in nm for band: if isinstance(bandname, float): LOG.warning('A wavelength is provided instead of band name - ' + 'disregard the relative spectral responses and assume ' + 'it is the effective wavelength: %f (micro meter)', bandname) wvl = bandname * 1000.0 else: wvl = self.get_effective_wavelength(bandname) wvl = wvl * 1000.0 rayl, wvl_coord, azid_coord, satz_sec_coord, sunz_sec_coord = self.get_reflectance_lut() # force dask arrays compute = False if HAVE_DASK and not isinstance(sun_zenith, Array): compute = True sun_zenith = from_array(sun_zenith, chunks=sun_zenith.shape) sat_zenith = from_array(sat_zenith, chunks=sat_zenith.shape) azidiff = from_array(azidiff, chunks=azidiff.shape) if redband is not None: redband = from_array(redband, chunks=redband.shape) clip_angle = rad2deg(arccos(1. / sunz_sec_coord.max())) sun_zenith = clip(sun_zenith, 0, clip_angle) sunzsec = 1. / cos(deg2rad(sun_zenith)) clip_angle = rad2deg(arccos(1. / satz_sec_coord.max())) sat_zenith = clip(sat_zenith, 0, clip_angle) satzsec = 1. / cos(deg2rad(sat_zenith)) shape = sun_zenith.shape if not(wvl_coord.min() < wvl < wvl_coord.max()): LOG.warning( "Effective wavelength for band %s outside 400-800 nm range!", str(bandname)) LOG.info( "Set the rayleigh/aerosol reflectance contribution to zero!") if HAVE_DASK: chunks = sun_zenith.chunks if redband is None else redband.chunks res = zeros(shape, chunks=chunks) return res.compute() if compute else res else: return zeros(shape) idx = np.searchsorted(wvl_coord, wvl) wvl1 = wvl_coord[idx - 1] wvl2 = wvl_coord[idx] fac = (wvl2 - wvl) / (wvl2 - wvl1) raylwvl = fac * rayl[idx - 1, :, :, :] + (1 - fac) * rayl[idx, :, :, :] tic = time.time() smin = [sunz_sec_coord[0], azid_coord[0], satz_sec_coord[0]] smax = [sunz_sec_coord[-1], azid_coord[-1], satz_sec_coord[-1]] orders = [ len(sunz_sec_coord), len(azid_coord), len(satz_sec_coord)] f_3d_grid = atleast_2d(raylwvl.ravel()) if HAVE_DASK and isinstance(smin[0], Array): # compute all of these at the same time before passing to the interpolator # otherwise they are computed separately smin, smax, orders, f_3d_grid = da.compute(smin, smax, orders, f_3d_grid) minterp = MultilinearInterpolator(smin, smax, orders) minterp.set_values(f_3d_grid) if HAVE_DASK: ipn = map_blocks(self._do_interp, minterp, sunzsec, azidiff, satzsec, dtype=raylwvl.dtype, chunks=azidiff.chunks) else: ipn = self._do_interp(minterp, sunzsec, azidiff, satzsec) LOG.debug("Time - Interpolation: {0:f}".format(time.time() - tic)) ipn *= 100 res = ipn if redband is not None: res = where(redband < 20., res, (1 - (redband - 20) / 80) * res) res = clip(res, 0, 100) if compute: res = res.compute() return res
def define_array_type_specific_functions(self): self._load = generic_netcdf_loader_for_grids\ (array_type=self._array_type,chunks=self.chunks) self._zeros = lambda n:da.zeros(n,chunks=self.chunks)
def decomposition(self, output_dimension, normalize_poissonian_noise=False, algorithm='PCA', signal_mask=None, navigation_mask=None, get=threaded.get, num_chunks=None, reproject=True, bounds=True, **kwargs): """Perform Incremental (Batch) decomposition on the data, keeping n significant components. Parameters ---------- output_dimension : int the number of significant components to keep normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : str One of ('PCA', 'ORPCA', 'ONMF'). By default ('PCA') IncrementalPCA from scikit-learn is run. get : dask scheduler the dask scheduler to use for computations; default `dask.threaded.get` num_chunks : int the number of dask chunks to pass to the decomposition model. More chunks require more memory, but should run faster. Will be increased to contain atleast output_dimension signals. navigation_mask : {BaseSignal, numpy array, dask array} The navigation locations marked as True are not used in the decompostion. signal_mask : {BaseSignal, numpy array, dask array} The signal locations marked as True are not used in the decomposition. reproject : bool Reproject data on the learnt components (factors) after learning. bounds : {tuple, bool} The (min, max) values of the data to normalize before learning. If tuple (min, max), those values will be used for normalization. If True, extremes will be looked up (expensive), default. If False, no normalization is done (learning may be very slow). If normalize_poissonian_noise is True, this cannot be True. **kwargs passed to the partial_fit/fit functions. Notes ----- Various algorithm parameters and their default values: ONMF: lambda1=1, kappa=1, robust=False, store_r=False batch_size=None ORPCA: fast=True, lambda1=None, lambda2=None, method=None, learning_rate=None, init=None, training_samples=None, momentum=None PCA: batch_size=None, copy=True, white=False """ explained_variance = None explained_variance_ratio = None _al_data = self._data_aligned_with_axes nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension] sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:] num_chunks = 1 if num_chunks is None else num_chunks blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)]) nblocks = multiply([len(c) for c in nav_chunks]) if blocksize / output_dimension < num_chunks: num_chunks = np.ceil(blocksize / output_dimension) blocksize *= num_chunks ## LEARN if algorithm == 'PCA': from sklearn.decomposition import IncrementalPCA obj = IncrementalPCA(n_components=output_dimension) method = partial(obj.partial_fit, **kwargs) reproject = True elif algorithm == 'ORPCA': from hyperspy.learn.rpca import ORPCA kwg = {'fast': True} kwg.update(kwargs) obj = ORPCA(output_dimension, **kwg) method = partial(obj.fit, iterating=True) elif algorithm == 'ONMF': from hyperspy.learn.onmf import ONMF batch_size = kwargs.pop('batch_size', None) obj = ONMF(output_dimension, **kwargs) method = partial(obj.fit, batch_size=batch_size) else: raise ValueError('algorithm not known') original_data = self.data try: if normalize_poissonian_noise: if bounds is True: bounds = False # warnings.warn? data = self._data_aligned_with_axes ndim = self.axes_manager.navigation_dimension sdim = self.axes_manager.signal_dimension nm = da.logical_not( da.zeros( self.axes_manager.navigation_shape[::-1], chunks=nav_chunks) if navigation_mask is None else to_array( navigation_mask, chunks=nav_chunks)) sm = da.logical_not( da.zeros( self.axes_manager.signal_shape[::-1], chunks=sig_chunks) if signal_mask is None else to_array( signal_mask, chunks=sig_chunks)) ndim = self.axes_manager.navigation_dimension sdim = self.axes_manager.signal_dimension bH, aG = da.compute( data.sum(axis=range(ndim)), data.sum(axis=range(ndim, ndim + sdim))) bH = da.where(sm, bH, 1) aG = da.where(nm, aG, 1) raG = da.sqrt(aG) rbH = da.sqrt(bH) coeff = raG[(..., ) + (None, )*rbH.ndim] *\ rbH[(None, )*raG.ndim + (...,)] coeff.map_blocks(np.nan_to_num) coeff = da.where(coeff == 0, 1, coeff) data = data / coeff self.data = data # normalize the data for learning algs: if bounds: if bounds is True: _min, _max = da.compute(self.data.min(), self.data.max()) else: _min, _max = bounds self.data = (self.data - _min) / (_max - _min) # LEARN this_data = [] try: for chunk in progressbar( self._block_iterator( flat_signal=True, get=get, signal_mask=signal_mask, navigation_mask=navigation_mask), total=nblocks, leave=True, desc='Learn'): this_data.append(chunk) if len(this_data) == num_chunks: thedata = np.concatenate(this_data, axis=0) method(thedata) this_data = [] if len(this_data): thedata = np.concatenate(this_data, axis=0) method(thedata) except KeyboardInterrupt: pass # GET ALREADY CALCULATED RESULTS if algorithm == 'PCA': explained_variance = obj.explained_variance_ explained_variance_ratio = obj.explained_variance_ratio_ factors = obj.components_.T elif algorithm == 'ORPCA': _, _, U, S, V = obj.finish() factors = U * S loadings = V explained_variance = S**2 / len(factors) elif algorithm == 'ONMF': factors, loadings = obj.finish() loadings = loadings.T # REPROJECT if reproject: if algorithm == 'PCA': method = obj.transform post = lambda a: np.concatenate(a, axis=0) elif algorithm == 'ORPCA': method = obj.project obj.R = [] post = lambda a: obj.finish()[4] elif algorithm == 'ONMF': method = obj.project post = lambda a: np.concatenate(a, axis=1).T _map = map(lambda thing: method(thing), self._block_iterator( flat_signal=True, get=get, signal_mask=signal_mask, navigation_mask=navigation_mask)) H = [] try: for thing in progressbar( _map, total=nblocks, desc='Project'): H.append(thing) except KeyboardInterrupt: pass loadings = post(H) if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # RESHUFFLE "blocked" LOADINGS ndim = self.axes_manager.navigation_dimension try: loadings = _reshuffle_mixed_blocks( loadings, ndim, (output_dimension,), nav_chunks).reshape((-1, output_dimension)) except ValueError: # In case the projection step was not finished, it's left # as scrambled pass finally: self.data = original_data target = self.learning_results target.decomposition_algorithm = algorithm target.output_dimension = output_dimension target._object = obj target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio
def test_integer_input(): assert da.zeros((4, 6), chunks=2).rechunk(3).chunks == ((3, 1), (3, 3))
def _block_iterator(self, flat_signal=True, get=threaded.get, navigation_mask=None, signal_mask=None): """A function that allows iterating lazy signal data by blocks, defining the dask.Array. Parameters ---------- flat_signal: bool returns each block flattened, such that the shape (for the particular block) is (navigation_size, signal_size), with optionally masked elements missing. If false, returns the equivalent of s.inav[{blocks}].data, where masked elements are set to np.nan or 0. get : dask scheduler the dask scheduler to use for computations; default `dask.threaded.get` navigation_mask : {BaseSignal, numpy array, dask array} The navigation locations marked as True are not returned (flat) or set to NaN or 0. signal_mask : {BaseSignal, numpy array, dask array} The signal locations marked as True are not returned (flat) or set to NaN or 0. """ self._make_lazy() data = self._data_aligned_with_axes nav_chunks = data.chunks[:self.axes_manager.navigation_dimension] indices = product(*[range(len(c)) for c in nav_chunks]) signalsize = self.axes_manager.signal_size sig_reshape = (signalsize,) if signalsize else () data = data.reshape((self.axes_manager.navigation_shape[::-1] + sig_reshape)) if signal_mask is None: signal_mask = slice(None) if flat_signal else \ np.zeros(self.axes_manager.signal_size, dtype='bool') else: try: signal_mask = to_array(signal_mask).ravel() except ValueError: # re-raise with a message raise ValueError("signal_mask has to be a signal, numpy or" " dask array, but " "{} was given".format(type(signal_mask))) if flat_signal: signal_mask = ~signal_mask if navigation_mask is None: nav_mask = da.zeros( self.axes_manager.navigation_shape[::-1], chunks=nav_chunks, dtype='bool') else: try: nav_mask = to_array(navigation_mask, chunks=nav_chunks) except ValueError: # re-raise with a message raise ValueError("navigation_mask has to be a signal, numpy or" " dask array, but " "{} was given".format(type(navigation_mask))) if flat_signal: nav_mask = ~nav_mask for ind in indices: chunk = get(data.dask, (data.name, ) + ind + (0,)*bool(signalsize)) n_mask = get(nav_mask.dask, (nav_mask.name, ) + ind) if flat_signal: yield chunk[n_mask, ...][..., signal_mask] else: chunk = chunk.copy() value = np.nan if np.can_cast('float', chunk.dtype) else 0 chunk[n_mask, ...] = value chunk[..., signal_mask] = value yield chunk.reshape(chunk.shape[:-1] + self.axes_manager.signal_shape[::-1])
def tocsr(self): nzs = self.data nzi = da.zeros(len(self.data), chunks=(int(1e4)))
def decomposition(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, signal_mask=None, navigation_mask=None, get=threaded.get, num_chunks=None, reproject=True, bounds=False, **kwargs): """Perform Incremental (Batch) decomposition on the data, keeping n significant components. Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : str One of ('svd', 'PCA', 'ORPCA', 'ONMF'). By default 'svd', lazy SVD decomposition from dask. output_dimension : int the number of significant components to keep. If None, keep all (only valid for SVD) get : dask scheduler the dask scheduler to use for computations; default `dask.threaded.get` num_chunks : int the number of dask chunks to pass to the decomposition model. More chunks require more memory, but should run faster. Will be increased to contain atleast output_dimension signals. navigation_mask : {BaseSignal, numpy array, dask array} The navigation locations marked as True are not used in the decompostion. signal_mask : {BaseSignal, numpy array, dask array} The signal locations marked as True are not used in the decomposition. reproject : bool Reproject data on the learnt components (factors) after learning. **kwargs passed to the partial_fit/fit functions. Notes ----- Various algorithm parameters and their default values: ONMF: lambda1=1, kappa=1, robust=False, store_r=False batch_size=None ORPCA: fast=True, lambda1=None, lambda2=None, method=None, learning_rate=None, init=None, training_samples=None, momentum=None PCA: batch_size=None, copy=True, white=False """ if bounds: msg = ( "The `bounds` keyword is deprecated and will be removed " "in v2.0. Since version > 1.3 this has no effect.") warnings.warn(msg, VisibleDeprecationWarning) explained_variance = None explained_variance_ratio = None _al_data = self._data_aligned_with_axes nav_chunks = _al_data.chunks[:self.axes_manager.navigation_dimension] sig_chunks = _al_data.chunks[self.axes_manager.navigation_dimension:] num_chunks = 1 if num_chunks is None else num_chunks blocksize = np.min([multiply(ar) for ar in product(*nav_chunks)]) nblocks = multiply([len(c) for c in nav_chunks]) if algorithm != "svd" and output_dimension is None: raise ValueError("With the %s the output_dimension " "must be specified" % algorithm) if output_dimension and blocksize / output_dimension < num_chunks: num_chunks = np.ceil(blocksize / output_dimension) blocksize *= num_chunks # LEARN if algorithm == 'PCA': from sklearn.decomposition import IncrementalPCA obj = IncrementalPCA(n_components=output_dimension) method = partial(obj.partial_fit, **kwargs) reproject = True elif algorithm == 'ORPCA': from hyperspy.learn.rpca import ORPCA kwg = {'fast': True} kwg.update(kwargs) obj = ORPCA(output_dimension, **kwg) method = partial(obj.fit, iterating=True) elif algorithm == 'ONMF': from hyperspy.learn.onmf import ONMF batch_size = kwargs.pop('batch_size', None) obj = ONMF(output_dimension, **kwargs) method = partial(obj.fit, batch_size=batch_size) elif algorithm != "svd": raise ValueError('algorithm not known') original_data = self.data try: if normalize_poissonian_noise: data = self._data_aligned_with_axes ndim = self.axes_manager.navigation_dimension sdim = self.axes_manager.signal_dimension nm = da.logical_not( da.zeros( self.axes_manager.navigation_shape[::-1], chunks=nav_chunks) if navigation_mask is None else to_array( navigation_mask, chunks=nav_chunks)) sm = da.logical_not( da.zeros( self.axes_manager.signal_shape[::-1], chunks=sig_chunks) if signal_mask is None else to_array( signal_mask, chunks=sig_chunks)) ndim = self.axes_manager.navigation_dimension sdim = self.axes_manager.signal_dimension bH, aG = da.compute( data.sum(axis=tuple(range(ndim))), data.sum(axis=tuple(range(ndim, ndim + sdim)))) bH = da.where(sm, bH, 1) aG = da.where(nm, aG, 1) raG = da.sqrt(aG) rbH = da.sqrt(bH) coeff = raG[(..., ) + (None, ) * rbH.ndim] *\ rbH[(None, ) * raG.ndim + (...,)] coeff.map_blocks(np.nan_to_num) coeff = da.where(coeff == 0, 1, coeff) data = data / coeff self.data = data # LEARN if algorithm == "svd": reproject = False from dask.array.linalg import svd try: self._unfolded4decomposition = self.unfold() # TODO: implement masking if navigation_mask or signal_mask: raise NotImplemented( "Masking is not yet implemented for lazy SVD." ) U, S, V = svd(self.data) factors = V.T explained_variance = S ** 2 / self.data.shape[0] loadings = U * S finally: if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False else: this_data = [] try: for chunk in progressbar( self._block_iterator( flat_signal=True, get=get, signal_mask=signal_mask, navigation_mask=navigation_mask), total=nblocks, leave=True, desc='Learn'): this_data.append(chunk) if len(this_data) == num_chunks: thedata = np.concatenate(this_data, axis=0) method(thedata) this_data = [] if len(this_data): thedata = np.concatenate(this_data, axis=0) method(thedata) except KeyboardInterrupt: pass # GET ALREADY CALCULATED RESULTS if algorithm == 'PCA': explained_variance = obj.explained_variance_ explained_variance_ratio = obj.explained_variance_ratio_ factors = obj.components_.T elif algorithm == 'ORPCA': _, _, U, S, V = obj.finish() factors = U * S loadings = V explained_variance = S**2 / len(factors) elif algorithm == 'ONMF': factors, loadings = obj.finish() loadings = loadings.T # REPROJECT if reproject: if algorithm == 'PCA': method = obj.transform def post(a): return np.concatenate(a, axis=0) elif algorithm == 'ORPCA': method = obj.project obj.R = [] def post(a): return obj.finish()[4] elif algorithm == 'ONMF': method = obj.project def post(a): return np.concatenate(a, axis=1).T _map = map(lambda thing: method(thing), self._block_iterator( flat_signal=True, get=get, signal_mask=signal_mask, navigation_mask=navigation_mask)) H = [] try: for thing in progressbar( _map, total=nblocks, desc='Project'): H.append(thing) except KeyboardInterrupt: pass loadings = post(H) if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # RESHUFFLE "blocked" LOADINGS ndim = self.axes_manager.navigation_dimension if algorithm != "svd": # Only needed for online algorithms try: loadings = _reshuffle_mixed_blocks( loadings, ndim, (output_dimension,), nav_chunks).reshape((-1, output_dimension)) except ValueError: # In case the projection step was not finished, it's left # as scrambled pass finally: self.data = original_data target = self.learning_results target.decomposition_algorithm = algorithm target.output_dimension = output_dimension if algorithm != "svd": target._object = obj target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors = target.factors * rbH.ravel()[:, np.newaxis] target.loadings = target.loadings * raG.ravel()[:, np.newaxis]