def test_returnXArray(self): cdo = Cdo() cdo.debug = DEBUG if not cdo.hasXarray: print("nothing testes for test_returnXArray because of missing xarray") return topo = cdo.topo(options='-f nc',returnXArray='topo') self.assertEqual(-1889,int(topo.mean())) self.assertEqual(259200,topo.count()) bathy = cdo.setrtomiss(0,10000, input = " -topo" ,returnXArray='topo') self.assertEqual(-3385,int(bathy.mean())) self.assertEqual(173565,bathy.count()) oro = cdo.setrtomiss(-10000,0, input = cdo.topo(options='-f nc'),returnXArray='topo') self.assertEqual(1142,int(oro.mean())) self.assertEqual(85567,oro.count()) bathy = cdo.remapnn('r2x2',input = cdo.topo(options = '-f nc'), returnXArray = 'topo') self.assertEqual(-4298.0,bathy[0,0]) self.assertEqual(-2669.0,bathy[0,1]) ta = cdo.remapnn('r2x2',input = cdo.topo(options = '-f nc')) tb = cdo.subc(-2669.0,input = ta) withMask = cdo.div(input=ta+" "+tb,returnXArray='topo') from xarray import DataArray self.assertEqual(False,DataArray.to_masked_array(withMask).mask[0,0]) self.assertEqual(False,DataArray.to_masked_array(withMask).mask[1,0]) self.assertEqual(False,DataArray.to_masked_array(withMask).mask[1,1]) self.assertEqual(True,DataArray.to_masked_array(withMask).mask[0,1])
def __init__(self, data, coords=None, dims=None, name=None, attrs=None, encoding=None, fastpath=False ): if major_x_ver == 0 and minor_x_ver < 7: DataArray.__init__(self, data=data, coords=coords, dims=dims, name=name, attrs=attrs, encoding=encoding, # fastpath=fastpath ) else: DataArray.__init__(self, data=data, coords=coords, dims=dims, name=name, attrs=attrs, encoding=encoding, fastpath=fastpath )
def setUp(self): a = easy_array((10, 15, 3, 2)) darray = DataArray(a, dims=["y", "x", "col", "row"]) darray.coords["col"] = np.array(["col" + str(x) for x in darray.coords["col"].values]) darray.coords["row"] = np.array(["row" + str(x) for x in darray.coords["row"].values]) self.darray = darray
def test_default_title(self): a = DataArray(easy_array((4, 3, 2)), dims=['a', 'b', 'c']) a.coords['c'] = [0, 1] a.coords['d'] = u'foo' self.plotfunc(a.isel(c=1)) title = plt.gca().get_title() self.assertTrue('c = 1, d = foo' == title or 'd = foo, c = 1' == title)
def test_subplot_kws(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) d.coords["z"] = list("abcd") g = d.plot(x="x", y="y", col="z", col_wrap=2, cmap="cool", subplot_kws=dict(axisbg="r")) for ax in g.axes.flat: self.assertEqual(ax.get_axis_bgcolor(), "r")
def get_dataset(self, key, info): """Load a dataset.""" if self._channel != key.name: return logger.debug('Reading %s.', key.name) # FIXME: get this from MTD_MSIL1C.xml quantification_value = 10000. jp2 = glymur.Jp2k(self.filename) bitdepth = 0 for seg in jp2.codestream.segment: try: bitdepth = max(bitdepth, seg.bitdepth[0]) except AttributeError: pass jp2.dtype = (np.uint8 if bitdepth <= 8 else np.uint16) # Initialize the jp2 reader / doesn't work in a multi-threaded context. # jp2[0, 0] # data = da.from_array(jp2, chunks=CHUNK_SIZE) / quantification_value * 100 data = da.from_delayed(delayed(jp2.read)(), jp2.shape, jp2.dtype) data = data.rechunk(CHUNK_SIZE) / quantification_value * 100 proj = DataArray(data, dims=['y', 'x']) proj.attrs = info.copy() proj.attrs['units'] = '%' proj.attrs['platform_name'] = self.platform_name return proj
def construct_dataarray(dim_num, dtype, contains_nan, dask): # dimnum <= 3 rng = np.random.RandomState(0) shapes = [16, 8, 4][:dim_num] dims = ('x', 'y', 'z')[:dim_num] if np.issubdtype(dtype, np.floating): array = rng.randn(*shapes).astype(dtype) elif np.issubdtype(dtype, np.integer): array = rng.randint(0, 10, size=shapes).astype(dtype) elif np.issubdtype(dtype, np.bool_): array = rng.randint(0, 1, size=shapes).astype(dtype) elif dtype == str: array = rng.choice(['a', 'b', 'c', 'd'], size=shapes) else: raise ValueError da = DataArray(array, dims=dims, coords={'x': np.arange(16)}, name='da') if contains_nan: da = da.reindex(x=np.arange(20)) if dask and has_dask: chunks = {d: 4 for d in dims} da = da.chunk(chunks) return da
def read(self): """ :return: DataArray objects populated with data read from eeg files. The size of the output is number of channels x number of start offsets x number of time series points The corresponding DataArray axes are: 'channels', 'start_offsets', 'offsets' """ eventdata, read_ok_mask = self.read_file(self.dataroot,self.channels,self.start_offsets,self.read_size) # multiply by the gain eventdata *= self.params_dict['gain'] eventdata = DataArray(eventdata, dims=[self.channel_name, 'start_offsets', 'offsets'], coords={ self.channel_name: self.channels, 'start_offsets': self.start_offsets.copy(), 'offsets': np.arange(self.read_size), 'samplerate': self.params_dict['samplerate'] } ) from copy import deepcopy eventdata.attrs = deepcopy(self.params_dict) return eventdata, read_ok_mask
def test_expand_without_dims(self): from satpy.resample import NativeResampler import numpy as np import dask.array as da from xarray import DataArray from pyresample.geometry import AreaDefinition from pyresample.utils import proj4_str_to_dict ds1 = DataArray(da.zeros((100, 50), chunks=85)) proj_dict = proj4_str_to_dict('+proj=lcc +datum=WGS84 +ellps=WGS84 ' '+lon_0=-95. +lat_0=25 +lat_1=25 ' '+units=m +no_defs') target = AreaDefinition( 'test', 'test', 'test', proj_dict, x_size=100, y_size=200, area_extent=(-1000., -1500., 1000., 1500.), ) # source geo def doesn't actually matter resampler = NativeResampler(None, target) new_arr = resampler.resample(ds1) self.assertEqual(new_arr.shape, (200, 100)) new_arr2 = resampler.resample(ds1.compute()) self.assertTrue(np.all(new_arr == new_arr2))
def setUp(self): self.values = np.random.randn(4, 6) self.data = da.from_array(self.values, chunks=(2, 2)) self.eager_array = DataArray(self.values, coords={'x': range(4)}, dims=('x', 'y'), name='foo') self.lazy_array = DataArray(self.data, coords={'x': range(4)}, dims=('x', 'y'), name='foo')
def test_decode_cf_time_bounds(): da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)), coords={'time': [1, 2, 3]}, dims=('time', 'nbnd'), name='time_bnds') attrs = {'units': 'days since 2001-01', 'calendar': 'standard', 'bounds': 'time_bnds'} ds = da.to_dataset() ds['time'].attrs.update(attrs) _update_bounds_attributes(ds.variables) assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01', 'calendar': 'standard'} dsc = decode_cf(ds) assert dsc.time_bnds.dtype == np.dtype('M8[ns]') dsc = decode_cf(ds, decode_times=False) assert dsc.time_bnds.dtype == np.dtype('int64') # Do not overwrite existing attrs ds = da.to_dataset() ds['time'].attrs.update(attrs) bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'} ds['time_bnds'].attrs.update(bnd_attr) _update_bounds_attributes(ds.variables) assert ds.variables['time_bnds'].attrs == bnd_attr # If bounds variable not available do not complain ds = da.to_dataset() ds['time'].attrs.update(attrs) ds['time'].attrs['bounds'] = 'fake_var' _update_bounds_attributes(ds.variables)
def loopread(tcoutput, size_record, ncol, n_alt, size_head, size_data_record, tReq): tcoutput = Path(tcoutput).expanduser() n_t = tcoutput.stat().st_size // size_record // d_bytes chi = empty(n_t, float) t = empty(n_t, datetime) plasmaparam = DataArray(data=empty((n_t, n_alt, 4)), dims=["time", "alt_km", "isrparam"]) iono = DataArray(data=empty((n_t, n_alt, 22)), dims=["time", "alt_km", "param"]) with tcoutput.open("rb") as f: # reset to beginning for i in range(n_t): iono[i, ...], chi[i], t[i], alt, plasmaparam[i, ...] = data_tra( f, size_record, ncol, n_alt, size_head, size_data_record ) # FIXME isn't there a way to inherit coordinates like Pandas? iono = iono.assign_coords(time=t, param=PARAM, alt_km=alt) plasmaparam = plasmaparam.assign_coords(time=t, isrparam=ISRPARAM, alt_km=alt) #%% handle time request -- will return Dataframe if tReq, else returns Panel of all times if tReq is not None: # have to qualify this since picktime default gives last time as fallback tUsedInd = picktime(iono.time, tReq, None)[0] if tUsedInd is not None: # in case ind is 0 iono = iono[tUsedInd, ...] plasmaparam = plasmaparam[tUsedInd, ...] return iono, chi, plasmaparam
def test_mask_valid_data(): from xarray import DataArray, Dataset import numpy as np test_attrs = { 'one': 1, 'nodata': -999, } expected_data_array = DataArray(np.array([[1., np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, np.nan]], dtype='float'), attrs=test_attrs, name='var_one') data_array = DataArray([[1, -999, -999], [2, 3, -999], [-999, -999, -999]], attrs=test_attrs) dataset = Dataset(data_vars={'var_one': data_array}, attrs={'ds_attr': 'still here'}) # Make sure test is actually changing something assert not data_array.equals(expected_data_array) output_ds = mask_valid_data(dataset, keep_attrs=True) assert output_ds.attrs['ds_attr'] == 'still here' assert output_ds.data_vars['var_one'].equals(expected_data_array) assert output_ds.data_vars['var_one'].attrs['one'] == 1 output_da = mask_valid_data(data_array, keep_attrs=True) assert output_da.equals(expected_data_array) assert output_da.attrs['one'] == 1
def cyclic_dataarray(da, coord='lon'): """ Add a cyclic coordinate point to a DataArray along a specified named coordinate dimension. >>> from xarray import DataArray >>> data = DataArray([[1, 2, 3], [4, 5, 6]], ... coords={'x': [1, 2], 'y': range(3)}, ... dims=['x', 'y']) >>> cd = cyclic_dataarray(data, 'y') >>> print cd.data array([[1, 2, 3, 1], [4, 5, 6, 4]]) """ assert isinstance(da, DataArray) lon_idx = da.dims.index(coord) cyclic_data, cyclic_coord = add_cyclic_point(da.values, coord=da.coords[coord], axis=lon_idx) # Copy and add the cyclic coordinate and data new_coords = dict(da.coords) new_coords[coord] = cyclic_coord new_values = cyclic_data new_da = DataArray(new_values, dims=da.dims, coords=new_coords) # Copy the attributes for the re-constructed data and coords for att, val in da.attrs.items(): new_da.attrs[att] = val for c in da.coords: for att in da.coords[c].attrs: new_da.coords[c].attrs[att] = da.coords[c].attrs[att] return new_da
def test_subplot_kws(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) d.coords['z'] = list('abcd') g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool', subplot_kws=dict(axisbg='r')) for ax in g.axes.flat: self.assertEqual(ax.get_axis_bgcolor(), 'r')
def test_datetime_dimension(self): nrow = 3 ncol = 4 time = pd.date_range("2000-01-01", periods=nrow) a = DataArray(easy_array((nrow, ncol)), coords=[("time", time), ("y", range(ncol))]) a.plot() ax = plt.gca() self.assertTrue(ax.has_data())
def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) arr = DataArray(data, dims=('w', 'x', 'y')) stacked = arr.stack(z=('x', 'y')) z = pd.MultiIndex.from_product([np.arange(3), np.arange(4)], names=['x', 'y']) expected = DataArray(data.reshape(2, -1), {'z': z}, dims=['w', 'z']) assert stacked.data.chunks == expected.data.chunks self.assertLazyAndEqual(expected, stacked)
def setUp(self): a = easy_array((10, 15, 3, 2)) darray = DataArray(a, dims=['y', 'x', 'col', 'row']) darray.coords['col'] = np.array(['col' + str(x) for x in darray.coords['col'].values]) darray.coords['row'] = np.array(['row' + str(x) for x in darray.coords['row'].values]) self.darray = darray
def test_convenient_facetgrid_4d(self): a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=['y', 'x', 'columns', 'rows']) g = d.plot(x='x', y='y', col='columns', row='rows') self.assertArrayEqual(g.axes.shape, [3, 2]) for ax in g.axes.flat: self.assertTrue(ax.has_data()) with self.assertRaisesRegexp(ValueError, '[Ff]acet'): d.plot(x='x', y='y', col='columns', ax=plt.gca())
def setUp(self): ''' Create a DataArray with a time-axis that contains datetime objects. ''' month = np.arange(1, 13, 1) data = np.sin(2 * np.pi * month / 12.0) darray = DataArray(data, dims=['time']) darray.coords['time'] = np.array([datetime(2017, m, 1) for m in month]) self.darray = darray
def test_convenient_facetgrid_4d(self): a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=["y", "x", "columns", "rows"]) g = d.plot(x="x", y="y", col="columns", row="rows") self.assertArrayEqual(g.axes.shape, [3, 2]) for ax in g.axes.flat: self.assertTrue(ax.has_data()) with self.assertRaisesRegexp(ValueError, "[Ff]acet"): d.plot(x="x", y="y", col="columns", ax=plt.gca())
def setUp(self): da = DataArray(easy_array((10, 15), start=-1), dims=["y", "x"]) # add 2d coords ds = da.to_dataset(name="testvar") x, y = np.meshgrid(da.x.values, da.y.values) ds["x2d"] = DataArray(x, dims=["y", "x"]) ds["y2d"] = DataArray(y, dims=["y", "x"]) ds.set_coords(["x2d", "y2d"], inplace=True) # set darray and plot method self.darray = ds.testvar self.plotmethod = getattr(self.darray.plot, self.plotfunc.__name__)
def test_dataarray_pickle(self): # Test that pickling/unpickling does not convert the dask # backend to numpy a1 = DataArray(build_dask_array()) a1.compute() self.assertFalse(a1._in_memory) self.assertEquals(kernel_call_count, 1) a2 = pickle.loads(pickle.dumps(a1)) self.assertEquals(kernel_call_count, 1) self.assertDataArrayIdentical(a1, a2) self.assertFalse(a1._in_memory) self.assertFalse(a2._in_memory)
def test_subplot_kws(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) d.coords['z'] = list('abcd') g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool', subplot_kws=dict(axisbg='r')) for ax in g.axes.flat: try: # mpl V2 self.assertEqual(ax.get_facecolor()[0:3], mpl.colors.to_rgb('r')) except AttributeError: self.assertEqual(ax.get_axis_bgcolor(), 'r')
def sumplasmaline(fn,P): spec,freq = readplasmaline(fn,P) assert isinstance(spec,DataArray) and spec.ndim==4 assert isinstance(P['flim'][0],float) z = spec.srng specsum = DataArray(index=spec.items,columns=spec.labels) zind = (P['zlim'][0] <= z) & (z <= P['zlim'][1]) for s in spec: find = (P['flim'][0] <= absolute(freq[s]/1.e6)) & (absolute(freq[s]/1.e6) < P['flim'][1]) specsum.loc[:,s] = spec.loc[:,:,zind,find].sum(axis=3).sum(axis=2) #FIXME .sum(dim=) return specsum
def test_convenient_facetgrid(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) d.coords['z'] = list('abcd') g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool') self.assertArrayEqual(g.axes.shape, [2, 2]) for ax in g.axes.flat: self.assertTrue(ax.has_data()) with self.assertRaisesRegexp(ValueError, '[Ff]acet'): d.plot(x='x', y='y', col='z', ax=plt.gca()) with self.assertRaisesRegexp(ValueError, '[Ff]acet'): d[0].plot(x='x', y='y', col='z', ax=plt.gca())
def test_convenient_facetgrid(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) d.coords["z"] = list("abcd") g = d.plot(x="x", y="y", col="z", col_wrap=2, cmap="cool") self.assertArrayEqual(g.axes.shape, [2, 2]) for ax in g.axes.flat: self.assertTrue(ax.has_data()) with self.assertRaisesRegexp(ValueError, "[Ff]acet"): d.plot(x="x", y="y", col="z", ax=plt.gca()) with self.assertRaisesRegexp(ValueError, "[Ff]acet"): d[0].plot(x="x", y="y", col="z", ax=plt.gca())
def unstack_cat(da: xr.DataArray, dim, level=0): """Unstack DataArray expanding to dataset along a given level Parameters ---------- da dim level Returns ------- xr.Dataset """ if not isinstance(da, xr.DataArray): raise ValueError("da must be a DataArray object") idx = da.indexes[dim] if not isinstance(idx, pd.MultiIndex): raise ValueError(f"{dim} is not a stacked coordinate") variables = idx.levels[level] # pull variables out of datarray data_dict = {} for k in variables: data_dict[k] = da.sel(variable=k).squeeze(drop=True) # unstacked dataset return xr.Dataset(data_dict)
class DoNotCooccur(HACConstraint): """Do NOT merge co-occurring face tracks""" def initialize(self, parent=None): current_state = parent.current_state clusters = [cluster for cluster in current_state.labels()] n_clusters = len(clusters) self._cooccur = DataArray( np.zeros((n_clusters, n_clusters)), [('i', clusters), ('j', clusters)]) for (segment1, track1), (segment2, track2) in current_state.co_iter(current_state): i = current_state[segment1, track1] j = current_state[segment2, track2] if i == j: continue self._cooccur.loc[i, j] = 1 self._cooccur.loc[j, i] = 1 def mergeable(self, clusters, parent=None): return self._cooccur.loc[clusters, clusters].sum().item() == 0. def update(self, merged_clusters, new_cluster, parent=None): # clusters that will be removed _clusters = list(set(merged_clusters) - set([new_cluster])) # update coooccurrence matrix self._cooccur.loc[new_cluster, :] += self._cooccur.loc[_clusters, :].sum(dim='i') self._cooccur.loc[:, new_cluster] += self._cooccur.loc[:, _clusters].sum(dim='j') # remove clusters self._cooccur = self._cooccur.drop(_clusters, dim='i').drop(_clusters, dim='j')
def centered_to_right(f: xr.DataArray, block_size, dim, boundary='wrap'): """Move centered variable to the right interface Parameters ---------- f : xr.DataArray block_size : size of the coarse graining block dim : str boundary : str, optional A boundary condition which is passed to `isel_bc` Returns ------- interface : xr.DataArray The value of f along the right interfaces of the coarse-grain blocks """ new_coord = get_center_coords(f[dim].values, block_size) n = f.shape[f.get_axis_num(dim)] left_idx = slice(block_size, n+1, block_size) right_idx = slice(block_size-1, n, block_size) left = isel_bc(f, left_idx, dim, boundary=boundary) left = left.assign_coords(**{dim: new_coord}) right = isel_bc(f, right_idx, dim, boundary=boundary) right = right.assign_coords(**{dim: new_coord}) return (left+right)/2
class TestDataArrayAndDataset(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): self.assertLazyAnd(expected, actual, self.assertDataArrayIdentical) def assertLazyAndAllClose(self, expected, actual): self.assertLazyAnd(expected, actual, self.assertDataArrayAllClose) def assertLazyAndEqual(self, expected, actual): self.assertLazyAnd(expected, actual, self.assertDataArrayEqual) def setUp(self): self.values = np.random.randn(4, 6) self.data = da.from_array(self.values, chunks=(2, 2)) self.eager_array = DataArray(self.values, coords={'x': range(4)}, dims=('x', 'y'), name='foo') self.lazy_array = DataArray(self.data, coords={'x': range(4)}, dims=('x', 'y'), name='foo') def test_rechunk(self): chunked = self.eager_array.chunk({'x': 2}).chunk({'y': 2}) self.assertEqual(chunked.chunks, ((2, ) * 2, (2, ) * 3)) self.assertLazyAndIdentical(self.lazy_array, chunked) def test_new_chunk(self): chunked = self.eager_array.chunk() self.assertTrue(chunked.data.name.startswith('xarray-<this-array>')) def test_lazy_dataset(self): lazy_ds = Dataset({'foo': (('x', 'y'), self.data)}) self.assertIsInstance(lazy_ds.foo.variable.data, da.Array) def test_lazy_array(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(u, v) self.assertLazyAndAllClose(-u, -v) self.assertLazyAndAllClose(u.T, v.T) self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(1 + u, 1 + v) actual = xr.concat([v[:2], v[2:]], 'x') self.assertLazyAndAllClose(u, actual) @pytest.mark.skipif(LooseVersion(dask.__version__) <= '0.15.4', reason='Need dask 0.16 for new interface') def test_compute(self): u = self.eager_array v = self.lazy_array assert dask.is_dask_collection(v) (v2, ) = dask.compute(v + 1) assert not dask.is_dask_collection(v2) assert ((u + 1).data == v2.data).all() @pytest.mark.skipif(LooseVersion(dask.__version__) <= '0.15.4', reason='Need dask 0.16 for new interface') def test_persist(self): u = self.eager_array v = self.lazy_array + 1 (v2, ) = dask.persist(v) assert v is not v2 assert len(v2.__dask_graph__()) < len(v.__dask_graph__()) assert v2.__dask_keys__() == v.__dask_keys__() assert dask.is_dask_collection(v) assert dask.is_dask_collection(v2) self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) def test_concat_loads_variables(self): # Test that concat() computes not-in-memory variables at most once # and loads them in the output, while leaving the input unaltered. d1 = build_dask_array('d1') c1 = build_dask_array('c1') d2 = build_dask_array('d2') c2 = build_dask_array('c2') d3 = build_dask_array('d3') c3 = build_dask_array('c3') # Note: c is a non-index coord. # Index coords are loaded by IndexVariable.__init__. ds1 = Dataset(data_vars={'d': ('x', d1)}, coords={'c': ('x', c1)}) ds2 = Dataset(data_vars={'d': ('x', d2)}, coords={'c': ('x', c2)}) ds3 = Dataset(data_vars={'d': ('x', d3)}, coords={'c': ('x', c3)}) assert kernel_call_count == 0 out = xr.concat([ds1, ds2, ds3], dim='n', data_vars='different', coords='different') # each kernel is computed exactly once assert kernel_call_count == 6 # variables are loaded in the output assert isinstance(out['d'].data, np.ndarray) assert isinstance(out['c'].data, np.ndarray) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars='all', coords='all') # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out['d'].data, dask.array.Array) assert isinstance(out['c'].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=['d'], coords=['c']) # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out['d'].data, dask.array.Array) assert isinstance(out['c'].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=[], coords=[]) # variables are loaded once as we are validing that they're identical assert kernel_call_count == 12 assert isinstance(out['d'].data, np.ndarray) assert isinstance(out['c'].data, np.ndarray) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars='different', coords='different', compat='identical') # compat=identical doesn't do any more kernel calls than compat=equals assert kernel_call_count == 18 assert isinstance(out['d'].data, np.ndarray) assert isinstance(out['c'].data, np.ndarray) # When the test for different turns true halfway through, # stop computing variables as it would not have any benefit ds4 = Dataset(data_vars={'d': ('x', [2.0])}, coords={'c': ('x', [2.0])}) out = xr.concat([ds1, ds2, ds4, ds3], dim='n', data_vars='different', coords='different') # the variables of ds1 and ds2 were computed, but those of ds3 didn't assert kernel_call_count == 22 assert isinstance(out['d'].data, dask.array.Array) assert isinstance(out['c'].data, dask.array.Array) # the data of ds1 and ds2 was loaded into numpy and then # concatenated to the data of ds3. Thus, only ds3 is computed now. out.compute() assert kernel_call_count == 24 # Finally, test that riginals are unaltered assert ds1['d'].data is d1 assert ds1['c'].data is c1 assert ds2['d'].data is d2 assert ds2['c'].data is c2 assert ds3['d'].data is d3 assert ds3['c'].data is c3 def test_groupby(self): if LooseVersion(dask.__version__) == LooseVersion('0.15.3'): pytest.xfail('upstream bug in dask: ' 'https://github.com/dask/dask/issues/2718') u = self.eager_array v = self.lazy_array expected = u.groupby('x').mean() actual = v.groupby('x').mean() self.assertLazyAndAllClose(expected, actual) def test_groupby_first(self): u = self.eager_array v = self.lazy_array for coords in [u.coords, v.coords]: coords['ab'] = ('x', ['a', 'a', 'b', 'b']) with raises_regex(NotImplementedError, 'dask'): v.groupby('ab').first() expected = u.groupby('ab').first() actual = v.groupby('ab').first(skipna=False) self.assertLazyAndAllClose(expected, actual) def test_reindex(self): u = self.eager_array.assign_coords(y=range(6)) v = self.lazy_array.assign_coords(y=range(6)) for kwargs in [{ 'x': [2, 3, 4] }, { 'x': [1, 100, 2, 101, 3] }, { 'x': [2.5, 3, 3.5], 'y': [2, 2.5, 3] }]: expected = u.reindex(**kwargs) actual = v.reindex(**kwargs) self.assertLazyAndAllClose(expected, actual) def test_to_dataset_roundtrip(self): u = self.eager_array v = self.lazy_array expected = u.assign_coords(x=u['x']) self.assertLazyAndEqual(expected, v.to_dataset('x').to_array('x')) def test_merge(self): def duplicate_and_merge(array): return xr.merge([array, array.rename('bar')]).to_array() expected = duplicate_and_merge(self.eager_array) actual = duplicate_and_merge(self.lazy_array) self.assertLazyAndEqual(expected, actual) def test_ufuncs(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) def test_where_dispatching(self): a = np.arange(10) b = a > 3 x = da.from_array(a, 5) y = da.from_array(b, 5) expected = DataArray(a).where(b) self.assertLazyAndEqual(expected, DataArray(a).where(y)) self.assertLazyAndEqual(expected, DataArray(x).where(b)) self.assertLazyAndEqual(expected, DataArray(x).where(y)) def test_simultaneous_compute(self): ds = Dataset({'foo': ('x', range(5)), 'bar': ('x', range(5))}).chunk() count = [0] def counting_get(*args, **kwargs): count[0] += 1 return dask.get(*args, **kwargs) with dask.set_options(get=counting_get): ds.load() self.assertEqual(count[0], 1) def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) arr = DataArray(data, dims=('w', 'x', 'y')) stacked = arr.stack(z=('x', 'y')) z = pd.MultiIndex.from_product( [np.arange(3), np.arange(4)], names=['x', 'y']) expected = DataArray(data.reshape(2, -1), {'z': z}, dims=['w', 'z']) assert stacked.data.chunks == expected.data.chunks self.assertLazyAndEqual(expected, stacked) def test_dot(self): eager = self.eager_array.dot(self.eager_array[0]) lazy = self.lazy_array.dot(self.lazy_array[0]) self.assertLazyAndAllClose(eager, lazy) def test_dataarray_repr(self): # Test that __repr__ converts the dask backend to numpy # in neither the data variable nor the non-index coords data = build_dask_array('data') nonindex_coord = build_dask_array('coord') a = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)}) expected = dedent("""\ <xarray.DataArray 'data' (x: 1)> dask.array<shape=(1,), dtype=int64, chunksize=(1,)> Coordinates: y (x) int64 dask.array<shape=(1,), chunksize=(1,)> Dimensions without coordinates: x""") self.assertEqual(expected, repr(a)) assert kernel_call_count == 0 def test_dataset_repr(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array('data') nonindex_coord = build_dask_array('coord') ds = Dataset(data_vars={'a': ('x', data)}, coords={'y': ('x', nonindex_coord)}) expected = dedent("""\ <xarray.Dataset> Dimensions: (x: 1) Coordinates: y (x) int64 dask.array<shape=(1,), chunksize=(1,)> Dimensions without coordinates: x Data variables: a (x) int64 dask.array<shape=(1,), chunksize=(1,)>""") self.assertEqual(expected, repr(ds)) assert kernel_call_count == 0 def test_dataarray_pickle(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variable nor the non-index coords data = build_dask_array('data') nonindex_coord = build_dask_array('coord') a1 = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)}) a1.compute() self.assertFalse(a1._in_memory) self.assertFalse(a1.coords['y']._in_memory) assert kernel_call_count == 2 a2 = pickle.loads(pickle.dumps(a1)) assert kernel_call_count == 2 self.assertDataArrayIdentical(a1, a2) self.assertFalse(a1._in_memory) self.assertFalse(a2._in_memory) self.assertFalse(a1.coords['y']._in_memory) self.assertFalse(a2.coords['y']._in_memory) def test_dataset_pickle(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array('data') nonindex_coord = build_dask_array('coord') ds1 = Dataset(data_vars={'a': ('x', data)}, coords={'y': ('x', nonindex_coord)}) ds1.compute() self.assertFalse(ds1['a']._in_memory) self.assertFalse(ds1['y']._in_memory) assert kernel_call_count == 2 ds2 = pickle.loads(pickle.dumps(ds1)) assert kernel_call_count == 2 self.assertDatasetIdentical(ds1, ds2) self.assertFalse(ds1['a']._in_memory) self.assertFalse(ds2['a']._in_memory) self.assertFalse(ds1['y']._in_memory) self.assertFalse(ds2['y']._in_memory) def test_dataarray_getattr(self): # ipython/jupyter does a long list of getattr() calls to when trying to # represent an object. # Make sure we're not accidentally computing dask variables. data = build_dask_array('data') nonindex_coord = build_dask_array('coord') a = DataArray(data, dims=['x'], coords={'y': ('x', nonindex_coord)}) with suppress(AttributeError): getattr(a, 'NOTEXIST') assert kernel_call_count == 0 def test_dataset_getattr(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array('data') nonindex_coord = build_dask_array('coord') ds = Dataset(data_vars={'a': ('x', data)}, coords={'y': ('x', nonindex_coord)}) with suppress(AttributeError): getattr(ds, 'NOTEXIST') assert kernel_call_count == 0 def test_values(self): # Test that invoking the values property does not convert the dask # backend to numpy a = DataArray([1, 2]).chunk() self.assertFalse(a._in_memory) assert a.values.tolist() == [1, 2] self.assertFalse(a._in_memory) def test_from_dask_variable(self): # Test array creation from Variable with dask backend. # This is used e.g. in broadcast() a = DataArray(self.lazy_array.variable, coords={'x': range(4)}, name='foo') self.assertLazyAndIdentical(self.lazy_array, a)
def ingest_NXarpes(paths): assert len(paths) == 1 path = paths[0] f = fits.open(path) frame_count = f[1].data.shape[0] data = np.stack([f[1].data[i][-1] for i in range(frame_count)]) data = data.reshape((f[0].header["N_0_0"], f[0].header["N_0_1"], data.shape[1], data.shape[2])) energy = np.arange(f[0].header["SFSE_0"], f[0].header["SFEE_0"], 1. / f[0].header["SFPEV_0"] * f[0].header["SFBE0"]) sample_x = np.linspace(f[0].header["ST_0_0"], f[0].header["EN_0_0"], f[0].header["N_0_0"]) sample_y = np.linspace(f[0].header["ST_0_1"], f[0].header["EN_0_1"], f[0].header["N_0_1"]) unknown_axis_coords = np.arange(data.shape[2]) dim0 = f"{f[0].header['NM_0_0']} ({f[0].header['UN_0_0']})" dim1 = f"{f[0].header['NM_0_1']} ({f[0].header['UN_0_1']})" # TODO: length mismatch between energy shape and data shape[-1] (energy). # is this occurring because of arange and floats (see arange's documentation on its return value) if len(energy) != data.shape[-1]: energy = energy[:-1] xarray = DataArray( data, dims=[dim1, dim0, ANGLE_FIELD, ENERGY_FIELD], coords=[sample_y, sample_x, unknown_axis_coords, energy]) # dask_data = da.from_array(xarray) # Compose run start run_bundle = event_model.compose_run( ) # type: event_model.ComposeRunBundle start_doc = run_bundle.start_doc start_doc["sample_name"] = Path(paths[0]).resolve().stem start_doc["projections"] = projections yield 'start', start_doc # Compose descriptor source = 'nxSTXM' frame_data_keys = { 'raw': { 'source': source, 'dtype': 'number', 'dims': xarray.dims, # 'coords': [energy, sample_y, sample_x], 'shape': data.shape }, ENERGY_FIELD: { 'source': source, 'dtype': 'number', 'shape': energy.shape }, dim0: { 'source': source, 'dtype': 'number', 'shape': sample_x.shape }, dim1: { 'source': source, 'dtype': 'number', 'shape': sample_y.shape }, ANGLE_FIELD: { 'source': source, 'dtype': 'number', 'shape': unknown_axis_coords.shape } } frame_stream_name = 'primary' frame_stream_bundle = run_bundle.compose_descriptor( data_keys=frame_data_keys, name=frame_stream_name, # configuration=_metadata(path) ) yield 'descriptor', frame_stream_bundle.descriptor_doc # NOTE: Resource document may be meaningful in the future. For transient access it is not useful # # Compose resource # resource = run_bundle.compose_resource(root=Path(path).root, resource_path=path, spec='NCEM_DM', resource_kwargs={}) # yield 'resource', resource.resource_doc # Compose datum_page # z_indices, t_indices = zip(*itertools.product(z_indices, t_indices)) # datum_page_doc = resource.compose_datum_page(datum_kwargs={'index_z': list(z_indices), 'index_t': list(t_indices)}) # datum_ids = datum_page_doc['datum_id'] # yield 'datum_page', datum_page_doc yield 'event', frame_stream_bundle.compose_event(data={ 'raw': xarray, ENERGY_FIELD: energy, dim0: sample_x, dim1: sample_y, ANGLE_FIELD: unknown_axis_coords }, timestamps={ 'raw': time.time(), ENERGY_FIELD: time.time(), dim0: time.time(), dim1: time.time(), ANGLE_FIELD: time.time() }) yield 'stop', run_bundle.compose_stop()
def test_nonnumeric_index_raises_typeerror(self): a = DataArray(easy_array((3, 2)), coords=[['a', 'b', 'c'], ['d', 'e']]) with self.assertRaisesRegexp(TypeError, r'[Pp]lot'): self.plotfunc(a)
def test_2d_before_squeeze(self): a = DataArray(easy_array((1, 5))) a.plot()
def setUp(self): self.darray = DataArray(easy_array((2, 3, 4)))
def ensembles2dataset_dask(ensdict, ncfpath, dsattrs={}, chunks=10, verbose=True, print_every=1000): """ Convert a dictionary of ensembles into an xarray Dataset object using dask.delayed to keep memory usage feasible. """ mms2ms = 1e-3 n=0 # fbadens = np.array(ensdict_aux)==None # nt = len(ensdict) - np.sum(fbadens) # embed() ensdict0 = None while ensdict0 is None: ensdict0 = ensdict[n].compute() n+=1 nz = ensdict0['fixed_leader_janus']['number_of_cells'] fixj = ensdict0['fixed_leader_janus'].compute() fix5 = ensdict0['fixed_leader_beam5'].compute() # Add ping offset to get beam 5's timestamps. dt5 = fix5['ping_offset_time'] # In milliseconds. dt5 = np.array(Timedelta(dt5, unit='ms')) th = fixj['beam_angle'] assert th==25 # Always 25 degrees. th = th*np.pi/180. Cth = np.cos(th) # Construct along-beam/vertical axes. cm2m = 1e-2 r1janus = fixj['bin_1_distance']*cm2m r1b5 = fix5['bin_1_distance']*cm2m ncj = fixj['number_of_cells'] nc5 = fix5['number_of_cells'] lcj = fixj['depth_cell_length']*cm2m lc5 = fix5['depth_cell_length']*cm2m Lj = ncj*lcj # Distance from center of bin 1 to the center of last bin (Janus). L5 = nc5*lc5 # Distance from center of bin 1 to the center of last bin (beam 5). rb = r1janus + np.arange(0, Lj, lcj) # Distance from xducer head # (Janus). zab = Cth*rb # Vertical distance from xducer head # (Janus). zab5 = r1b5 + np.arange(0, L5, lc5) # Distance from xducer head, also # depth for the vertical beam. rb = IndexVariable('z', rb, attrs={'units':'meters', 'long_name':"along-beam distance from the xducer's face to the center of the bins, for beams 1-4 (Janus)"}) zab = IndexVariable('z', zab, attrs={'units':'meters', 'long_name':"vertical distance from the instrument's head to the center of the bins, for beams 1-4 (Janus)"}) zab5 = IndexVariable('z5', zab5, attrs={'units':'meters', 'long_name':"vertical distance from xducer face to the center of the bins, for beam 5 (vertical)"}) ensdict = from_sequence(ensdict) tjanus = ensdict.map_partitions(_alloc_timestamp_parts) t5 = _addtarr(tjanus, dt5) if verbose: print("Unpacking timestamps.") time = IndexVariable('time', tjanus.compute(), attrs={'long_name':'timestamps for beams 1-4 (Janus)'}) time5 = IndexVariable('time5', t5.compute(), attrs={'long_name':'timestamps for beam 5 (vertical)'}) if verbose: print("Done unpacking timestamps.") coords0 = dict(time=time) coords = dict(z=zab, time=time, rb=rb) coords5 = dict(z5=zab5, time5=time5) dims = ['z', 'time'] dims5 = ['z5', 'time5'] dims0 = ['time'] coordsdict = coords0 if verbose: print("Allocating heading, pitch, roll.") kwda = dict(coords=coordsdict, dims=dims0, attrs=dict(units=unit, long_name=lname)) svars = ['heading', 'pitch', 'roll'] long_names = svars units = ['degrees']*3 grp = 'variable_leader_janus' vars1d = dict() for vname,lname,unit in zip(svars,long_names,units): if verbose: print(vname) wrk = ensdict.map_partitions(_alloc_hpr, grp, vname) # wrk = darr.from_array(np.array(wrk.compute()), chunks=chunks) wrk2 = delayed(_bag2DataArray)(wrk, chunks)(**kwda) vars1d.update({vname:wrk2}) del(wrk, wrk2) ds2hpr = Dataset(data_vars=vars1d, coords=coordsdict) ds2hpr = ds2hpr.to_netcdf(ncfpath, compute=False, mode='w') if verbose: print("Saving heading, pitch, roll.") ds2hpr.compute() if verbose: print("Done saving heading, pitch, roll.") del(ds2hpr) coordsdict = coords5 # Load beam 5 variables into memory to # be able to put them in a chunked DataArray. if verbose: print("Allocating beam 5 variables.") grps = ['velocity_beam5', 'correlation_beam5', 'echo_intensity_beam5'] long_names = ['Beam 5 velocity', 'Beam 5 correlation', 'Beam 5 echo amplitude'] units = ['mm/s, positive toward xducer face', 'unitless', 'dB'] vars5 = dict() for grp,lname,unit in zip(grps,long_names,units): if verbose: print(grp) wrk = ensdict.map_partitions(_alloc_beam5, grp) wrk = darr.from_array(np.array(wrk.compute()).T, chunks=(1, chunks)) wrk = DataArray(wrk, coords=coordsdict, dims=dims5, attrs=dict(units=unit, long_name=lname)) vars5.update({grp:wrk}) del(wrk) ds5 = Dataset(data_vars=vars5, coords=coordsdict) ds5 = ds5.to_netcdf(ncfpath, compute=False, mode='a') if verbose: print("Saving beam 5 variables.") ds5.compute() if verbose: print("Done saving beam 5 variables.") del(ds5) embed() coordsdict = coords # Load beams 1-4 variables into memory to # be able to put them in a chunked DataArray. if verbose: print("Allocating Janus variables.") grps = ['velocity_janus', 'correlation_janus', 'echo_intensity_janus'] long_names = ['Janus velocity', 'Janus correlation', 'Janus echo amplitude'] units = ['mm/s, positive toward xducer face', 'unitless', 'dB'] vars5 = dict() for grp,lname,unit in zip(grps,long_names,units): if verbose: print(grp) wrk = ensdict.map_partitions(_alloc_janus, grp) wrk = darr.from_array(np.array(wrk.compute()).T, chunks=(1, chunks)) wrk = DataArray(wrk, coords=coordsdict, dims=dims5, attrs=dict(units=unit, long_name=lname)) vars5.update({grp:wrk}) del(wrk) dsj = Dataset(data_vars=varsj, coords=coordsdict) dsj = dsj.to_netcdf(ncfpath, compute=False, mode='a') if verbose: print("Saving Janus variables.") dsj.compute() if verbose: print("Done saving Janus variables.") del(dsj) long_names = ('Beam 1 velocity', 'Beam 2 velocity', 'Beam 3 velocity', 'Beam 4 velocity', 'Beam 5 velocity', 'Beam 1 correlation', 'Beam 2 correlation', 'Beam 3 correlation', 'Beam 4 correlation', 'Beam 5 correlation', 'Beam 1 echo amplitude', 'Beam 2 echo amplitude', 'Beam 3 echo amplitude', 'Beam 4 echo amplitude', 'Beam 5 echo amplitude', 'heading', 'pitch', 'roll') units = ('m/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'no units', 'no units', 'no units', 'no units', 'no units', 'dB', 'dB', 'dB', 'dB', 'dB', 'degrees', 'degrees', 'degrees') names = ('b1', 'b2', 'b3', 'b4', 'b5', 'cor1', 'cor2', 'cor3', 'cor4', 'cor5', 'int1', 'int2', 'int3', 'int4', 'int5', 'phi1', 'phi2', 'phi3') # data_vars = {} # # sk = darr.zeros((nz, nt), chunks=chunks)*np.nan # Beam vels stored in mm/s # # as int64 to save memory. # b1, b2, b3, b4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() # # embed() # sk0 = darr.zeros(nt, chunks=chunks)*np.nan # cor1, cor2, cor3, cor4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() # int1, int2, int3, int4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() # b5, cor5, int5 = sk.copy(), sk.copy(), sk.copy() # heading, pitch, roll = sk0.copy(), sk0.copy(), sk0.copy() # tjanus = [] # ensdict = np.array(ensdict)[~fbadens] # ensdict = ensdict.tolist() arrs = (b1, b2, b3, b4, b5, cor1, cor2, cor3, cor4, cor5, int1, int2, int3, int4, int5, heading, pitch, roll) # pressure, temperature, salinity, soundspeed) for arr,name,long_name,unit in zip(arrs,names,long_names,units): if 'Beam5' in long_name: coordsn = coords5 dimsn = dims elif 'phi' in name: coordsn = coords0 dimsn = dims0 else: coordsn = coords dimsn = dims da = DataArray(arr, coords=coordsn, dims=dimsn, attrs=dict(units=unit, long_name=long_name)) data_vars.update({name:da}) allcoords.update(coords) allcoords.update(coords5) ds = Dataset(data_vars=data_vars, coords=allcoords, attrs=dsattrs) return ds
def _swath_def_of_data_arrays(rows, cols): return SwathDefinition( DataArray(da.zeros((rows, cols)), dims=('y', 'x')), DataArray(da.zeros((rows, cols)), dims=('y', 'x')), )
def main(argv=sys.argv[1:]): from polar2grid.core.script_utils import setup_logging, create_basic_parser, create_exc_handler, rename_log_file, ExtendAction from polar2grid.compositors import CompositorManager frontends = available_frontends() backends = available_backends() parser = create_basic_parser(description="Extract swath data, remap it, and write it to a new file format") parser.add_argument("frontend", choices=sorted(frontends.keys()), help="Specify the swath extractor to use to read data (additional arguments are determined after this is specified)") parser.add_argument("backend", choices=sorted(backends.keys()), help="Specify the backend to use to write data output (additional arguments are determined after this is specified)") parser.add_argument("--compositor-configs", nargs="*", default=None, help="Specify alternative configuration file(s) for compositors") # don't include the help flag argv_without_help = [x for x in argv if x not in ["-h", "--help"]] args, remaining_args = parser.parse_known_args(argv_without_help) glue_name = args.frontend + "2" + args.backend LOG = logging.getLogger(glue_name) # Load compositor information (we can't know the compositor choices until we've loaded the configuration) compositor_manager = CompositorManager(config_files=args.compositor_configs) # Hack: argparse doesn't let you use choices and nargs=* on a positional argument parser.add_argument("compositors", choices=list(compositor_manager.keys()) + [[]], nargs="*", help="Specify the compositors to apply to the provided scene (additional arguments are determined after this is specified)") # load the actual components we need farg_func = get_frontend_argument_func(frontends, args.frontend) fcls = get_frontend_class(frontends, args.frontend) barg_func = get_backend_argument_func(backends, args.backend) bcls = get_backend_class(backends, args.backend) # add_frontend_arguments(parser) subgroup_titles = [] subgroup_titles += farg_func(parser) subgroup_titles += add_remap_argument_groups(parser) subgroup_titles += barg_func(parser) parser.add_argument('-f', dest='data_files', nargs="+", default=[], action=ExtendAction, help="List of files or directories to extract data from") parser.add_argument('-d', dest='data_files', nargs="+", default=[], action=ExtendAction, help="Data directories to look for input data files (equivalent to -f)") global_keywords = ("keep_intermediate", "overwrite_existing", "exit_on_error") args = parser.parse_args(argv, global_keywords=global_keywords, subgroup_titles=subgroup_titles) if not args.data_files: # FUTURE: When the -d flag is removed this won't be needed because -f will be required parser.print_usage() parser.exit(1, "ERROR: No data files provided (-f flag)\n") # Logs are renamed once data the provided start date is known rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) sys.excepthook = create_exc_handler(LOG.name) LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Keep track of things going wrong to tell the user what went wrong (we want to create as much as possible) status_to_return = STATUS_SUCCESS # Compositor validation # XXX: Hack to make `polar2grid.sh crefl gtiff` work like legacy crefl2gtiff.sh script if args.subgroup_args['Frontend Swath Extraction'].get('no_compositors'): LOG.debug("Removing all compositors") args.compositors = [] elif args.frontend == 'crefl': if args.backend in ['awips', 'scmi']: LOG.debug("Adding 'crefl_sharpen' compositor") args.compositors.append('crefl_sharpen' if args.backend == 'scmi' else 'crefl_sharpen_awips') else: LOG.debug("Adding 'true_color' compositor") args.compositors.append('true_color') if '--true-color' in sys.argv and 'true_color' not in args.compositors: LOG.debug("Adding 'true_color' compositor") args.compositors.append('true_color') if '--false-color' in sys.argv and 'false_color' not in args.compositors: LOG.debug("Adding 'false_color' compositor") args.compositors.append('false_color') # if "--true-color" in for c in args.compositors: if c not in compositor_manager: LOG.error("Compositor '%s' is unknown" % (c,)) raise RuntimeError("Compositor '%s' is unknown" % (c,)) # Frontend try: LOG.info("Initializing reader...") list_products = args.subgroup_args["Frontend Initialization"].pop("list_products") f = fcls(search_paths=args.data_files, **args.subgroup_args["Frontend Initialization"]) except (ValueError, KeyError): LOG.debug("Frontend exception: ", exc_info=True) LOG.error("%s frontend failed to load and sort data files (see log for details)", args.frontend) return STATUS_FRONTEND_FAIL # Rename the log file if rename_log: rename_log_file(glue_name + f.begin_time.strftime("_%Y%m%d_%H%M%S.log")) if list_products: print("\n".join(sorted(f.available_product_names))) return STATUS_SUCCESS try: LOG.info("Initializing remapping...") remapper = Remapper(**args.subgroup_args["Remapping Initialization"]) remap_kwargs = args.subgroup_args["Remapping"] except (ValueError, KeyError): LOG.debug("Remapping initialization exception: ", exc_info=True) LOG.error("Remapping initialization failed (see log for details)") return STATUS_REMAP_FAIL try: LOG.info("Initializing backend...") backend = bcls(**args.subgroup_args["Backend Initialization"]) except (ValueError, KeyError): LOG.debug("Writer initialization exception: ", exc_info=True) LOG.error("Writer initialization failed (see log for details)") return STATUS_BACKEND_FAIL try: LOG.info("Initializing compositor objects...") compositor_objects = {} for c in args.compositors: compositor_objects[c] = compositor_manager.get_compositor(c, **args.global_kwargs) except (ValueError, KeyError): LOG.debug("Compositor initialization exception: ", exc_info=True) LOG.error("Compositor initialization failed (see log for details)") return STATUS_COMP_FAIL try: LOG.info("Extracting swaths from data files available...") scene = f.create_scene(**args.subgroup_args["Frontend Swath Extraction"]) # Determine if we have a satpy scene if we should convert it to # a P2G Scene to continue processing resample_method = args.subgroup_args["Remapping"].get("remap_method") is_satpy_resample_method = resample_method in SATPY_RESAMPLERS if is_satpy_resample_method and not isinstance(scene, Scene): raise RuntimeError("Resampling method '{}' only supports 'satpy' readers".format(resample_method)) elif not is_satpy_resample_method and isinstance(scene, Scene): # convert satpy scene to P2G Scene to be compatible with old P2G resamplers scene = convert_satpy_to_p2g_swath(f, scene) if isinstance(scene, Scene): if not scene.datasets: LOG.error("No products were returned by the frontend") raise RuntimeError("No products were returned by the frontend") if args.keep_intermediate: raise RuntimeError("satpy readers do not currently support saving intermediate files") else: if (isinstance(scene, Scene) and not scene.datasets) or not scene: LOG.error("No products were returned by the frontend") raise RuntimeError("No products were returned by the frontend") if args.keep_intermediate: filename = glue_name + "_swath_scene.json" LOG.info("Saving intermediate swath scene as '%s'", filename) scene.save(filename) except (ValueError, KeyError): LOG.debug("Frontend data extraction exception: ", exc_info=True) LOG.error("Frontend data extraction failed (see log for details)") return STATUS_FRONTEND_FAIL # What grids should we remap to (the user should tell us or the backend should have a good set of defaults) known_grids = backend.known_grids LOG.debug("Writer known grids: %r", known_grids) grids = remap_kwargs.pop("forced_grids", None) LOG.debug("Forced Grids: %r", grids) if resample_method == "sensor" and grids != ["sensor"]: LOG.error("'sensor' resampling method only supports the 'sensor' grid") return STATUS_GDETER_FAIL if not grids and not known_grids: # the user didn't ask for any grids and the backend doesn't have specific defaults LOG.error("No grids specified and no known defaults") return STATUS_GDETER_FAIL elif not grids: # the user didn't tell us what to do, so let's try everything the backend knows how to do grids = known_grids elif known_grids is not None: # the user told us what to do, let's make sure the backend can do it grids = list(set(grids) & set(known_grids)) if not grids: LOG.error("%s backend doesn't know how to handle any of the grids specified", args.backend) return STATUS_GDETER_FAIL LOG.debug("Grids that will be mapped to: %r", grids) # Remap for grid_name in grids: LOG.info("Remapping to grid %s", grid_name) try: gridded_scene = remapper.remap_scene(scene, grid_name, **remap_kwargs) if args.keep_intermediate: filename = glue_name + "_gridded_scene_" + grid_name + ".json" LOG.debug("saving intermediate gridded scene as '%s'", filename) gridded_scene.save(filename) except (ValueError, KeyError): LOG.debug("Remapping data exception: ", exc_info=True) LOG.error("Remapping data failed") status_to_return |= STATUS_REMAP_FAIL if args.exit_on_error: return status_to_return continue if not isinstance(scene, Scene): # Composition for c, comp in compositor_objects.items(): try: LOG.info("Running gridded scene through '%s' compositor", c) gridded_scene = comp.modify_scene(gridded_scene, **args.subgroup_args[c + " Modification"]) if args.keep_intermediate: filename = glue_name + "_gridded_scene_" + grid_name + ".json" LOG.debug("Updating saved intermediate gridded scene (%s) after compositor", filename) gridded_scene.save(filename) except (KeyError, ValueError): LOG.debug("Compositor Error: ", exc_info=True) LOG.error("Could not properly modify scene using compositor '%s'" % (c,)) if args.exit_on_error: raise RuntimeError("Could not properly modify scene using compositor '%s'" % (c,)) if isinstance(f, ReaderWrapper) and not isinstance(gridded_scene, Scene): this_grid_definition = None # HACK: Create SatPy composites that were either separated before # resampling or needed resampling to be created rgbs = {} for product_name in gridded_scene.keys(): rgb_name = product_name[:-6] # Keep track of one of the grid definitions if this_grid_definition is None: this_grid_definition = gridded_scene[product_name]["grid_definition"] if product_name.endswith("rgb_0") or product_name.endswith("rgb_1") or product_name.endswith("rgb_2"): if rgb_name not in rgbs: rgbs[rgb_name] = [None, None, None] chn_idx = int(product_name[-1]) rgbs[rgb_name][chn_idx] = product_name LOG.debug("Putting RGBs back together again") for rgb_name, v in rgbs.items(): r = gridded_scene.pop(v[0]) g = gridded_scene.pop(v[1]) b = gridded_scene.pop(v[2]) new_info = r.copy() new_info["grid_data"] = new_info["grid_data"].replace(v[0], rgb_name) new_info["product_name"] = rgb_name data = np.memmap(new_info["grid_data"], dtype=new_info["data_type"], mode="w+", shape=(3, new_info["grid_definition"]["height"], new_info["grid_definition"]["width"])) data[0] = r.get_data_array()[:] data[1] = g.get_data_array()[:] data[2] = b.get_data_array()[:] gridded_scene[rgb_name] = new_info del data, new_info # Create composites that satpy couldn't complete until after remapping composite_names = [x for x in f.wishlist if not isinstance(x, DatasetID)] if composite_names: tmp_scene = Scene() for k, v in gridded_scene.items(): if not isinstance(v["sensor"], set): v["sensor"] = set([v["sensor"]]) # turn sensor back in to a set to match satpy usage tmp_scene[v["id"]] = DataArray(v.get_data_array(), attrs=v) tmp_scene[v["id"]].attrs["area"] = this_grid_definition.to_satpy_area() # tmp_scene[v["id"]].info = {} if v["sensor"] not in tmp_scene.attrs["sensor"]: tmp_scene.attrs["sensor"].extend(v["sensor"]) # Overwrite the wishlist that will include the above assigned datasets tmp_scene.wishlist = f.wishlist for cname in composite_names: tmp_scene.compositors[cname] = tmp_scene.cpl.load_compositor(cname, tmp_scene.attrs["sensor"]) tmp_scene.compute() tmp_scene.unload() # Add any new Datasets to our P2G Scene if SatPy created them for ds in tmp_scene: ds_id = DatasetID.from_dict(ds.attrs) if ds_id.name not in gridded_scene: LOG.debug("Adding Dataset from SatPy Commpositing: %s", ds_id) gridded_scene[ds_id.name] = dataarray_to_gridded_product(ds) gridded_scene[ds_id.name]["grid_definition"] = this_grid_definition # Remove any Products from P2G Scene that SatPy decided it didn't need anymore for k, v in list(gridded_scene.items()): if v["id"].name not in tmp_scene: LOG.debug("Removing Dataset that is no longer used: %s", k) del gridded_scene[k] del tmp_scene, v if isinstance(gridded_scene, Scene): LOG.debug("Converting satpy Scene to P2G Gridded Scene") # Convert it to P2G Gridded Scene gridded_scene = convert_satpy_to_p2g_gridded(f, gridded_scene) # Writer try: LOG.info("Creating output from data mapped to grid %s", grid_name) backend.create_output_from_scene(gridded_scene, **args.subgroup_args["Backend Output Creation"]) except (ValueError, KeyError): LOG.debug("Writer output creation exception: ", exc_info=True) LOG.error("Writer output creation failed (see log for details)") status_to_return |= STATUS_BACKEND_FAIL if args.exit_on_error: return status_to_return continue LOG.info("Processing data for grid %s complete", grid_name) # Force deletion and eventual garbage collection of the scene objects del gridded_scene del scene return status_to_return
def __getitem__(self, item): return DataArray(self.data[item], attrs=self.attrs)
# Tterm = np.nansum(Term*Tzimskdzt, axis=0)/TH # [1/s2]. Tterm = np.nansum(Term * Tzimskdzt, axis=0) # [m/s2]. Tterm[fland] = np.nan Iterm = stripmsk(Iterm) if TOPOG_TERMS: Tterm = stripmsk(Tterm) iterm = 'I' + term if TOPOG_TERMS: tterm = 'T' + term # Add timestamp. coords = dict(lon=(dimsxy, lont), lat=(dimsxy, latt)) Iterm = DataArray(Iterm, coords=coords, dims=dimsxy) if TOPOG_TERMS: Tterm = DataArray(Tterm, coords=coords, dims=dimsxy) t = np.array(Timestamp(date)) Iterm.coords.update(dict(time=t)) Terms.update({iterm: Iterm}) if TOPOG_TERMS: Terms.update({tterm: Tterm}) if SAVE_TERMS_netCDF: fout = headout + 'vortbdgt_' + date + '.nc' Datasetx(data_vars=Terms, coords=coords).to_netcdf(fout, unlimited_dims='time')
def _impute_genotype_call_with_variant_mean( call_g: xr.DataArray, call_g_mask: xr.DataArray) -> xr.DataArray: call_g_present = ~call_g_mask # type: ignore[operator] variant_mean = call_g.where(call_g_present).mean(dim="samples") imputed_call_g: xr.DataArray = call_g.where(call_g_present, variant_mean) return imputed_call_g
def test_multivar_numbered_tiles_glm(self, sector): """Test creating a tiles with multiple variables.""" import xarray as xr from satpy.writers.awips_tiled import AWIPSTiledWriter from xarray import DataArray from pyresample.geometry import AreaDefinition from pyresample.utils import proj4_str_to_dict w = AWIPSTiledWriter(base_dir=self.base_dir, compress=True) area_def = AreaDefinition( 'test', 'test', 'test', proj4_str_to_dict( '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. ' '+lat_0=25 +lat_1=25 +units=m +no_defs'), 100, 200, (-1000., -1500., 1000., 1500.), ) now = datetime(2018, 1, 1, 12, 0, 0) end_time = now + timedelta(minutes=20) ds1 = DataArray(da.from_array(np.linspace(0., 1., 20000, dtype=np.float32).reshape( (200, 100)), chunks=50), attrs=dict(name='total_energy', platform_name='GOES-17', sensor='SENSOR', units='1', area=area_def, start_time=now, end_time=end_time, scan_mode='M3', scene_abbr=sector, platform_shortname="G17")) ds2 = ds1.copy() ds2.attrs.update({ 'name': 'flash_extent_density', }) ds3 = ds1.copy() ds3.attrs.update({ 'name': 'average_flash_area', }) dqf = ds1.copy() dqf = (dqf * 255).astype(np.uint8) dqf.attrs = ds1.attrs.copy() dqf.attrs.update({ 'name': 'DQF', '_FillValue': 1, }) w.save_datasets([ds1, ds2, ds3, dqf], sector_id='TEST', source_name="TESTS", tile_count=(3, 3), template='glm_l2_rad{}'.format(sector.lower())) all_files = glob(os.path.join(self.base_dir, '*_GLM*.nc')) assert len(all_files) == 9 for fn in all_files: ds = xr.open_dataset(fn, mask_and_scale=False) check_required_common_attributes(ds) if sector == 'C': assert ds.attrs['time_coverage_end'] == end_time.strftime( '%Y-%m-%dT%H:%M:%S.%fZ') else: # 'F' assert ds.attrs['time_coverage_end'] == end_time.strftime( '%Y-%m-%dT%H:%M:%SZ')
def flux_woolf2016_rapid( temp_bulk_C, salt_bulk, pCO2_bulk_uatm, pCO2_air_uatm, press_hPa, wind_ms, kw_func=gas_transfer_velocity.k_Ni00, cool_skin_bias=-0.14, salty_skin_bias=0.1, ): """ Calculates air sea CO2 fluxes using the RAPID model as defined by Woolf et al. (2016), where the concentration of CO2 in the skin and foundation layers are used to calculate the fluxes rather than delta pCO2 (latter is called bulk flux). We calculate the skin temperature and salinity using a cool and salty skin bias as defined in Woolf et al. (2016). The defaults are 0.14 degC and 0.1 PSU as taken from FluxEngine. **Assumptions: ** This function is set up to use AVHRR only OISST which reports temperatures at 1m depth based on a buoy correction (Banzon et al. 2016). We make the assumption that this bulk temperature is equivalent to foundation temperature (where nighttime and daytime temperatures are the same). We also assume that EN4 salinity is foundation salinity (this is probably more accurate than the first assumtion). Lastly we assume that the ML estimated fCO2 is bulk fCO2 – we use bulk variable inputs (SSS and SST). Parameters ---------- temp_bulk_C : np.array temperature from OISST in deg Celcius with an allowable range of [-2 : 45] salt_bulk : np.array salinity from EN4 in PSU. Allowable range [5 : 50] pCO2_bulk_uatm : np.array partial pressure of CO2 in the sea in micro-atmospheres, assuming that it was measured/predicted at the same level as the temperature and salinity (See our assumptions above). Allowable range is [50 : 1000] pCO2_air_uatm : np.array partial pressure of CO2 in the air in micro-atmospheres. Allowable range is [50:1000]. press_hPa : np.array atmospheric pressure in hecto-Pascals with an allowable range of [500 : 1500] hPa wind_ms : np.array wind speed in metres per second with an allowable range of [0 : 40] kw_func : callable a function that returns the gas transfer velocity in cm/hr. The default is the gas transfer volicty as calculated by Ho et al. (2006). This is the prefered method of Goddijn-Murphy et al. (2016). Other functions are available in the `gas_transfer` class. If you'd like to use your own inputs must be wind speed (m/s) and temperature (degC) and output must be cm/hr cool_skin_bias : float The temperature difference between the foundation/bulk temperature and the skin temperature as suggested by Wolf et al. (2016). The default is 0.14 degC where this will be subtracted from the bulk temperature, i.e. the surface is cooler due to the cooling effect of winds. salty_skin_bias : float The salinity difference between the foundation and skin layers. This is driven by evaporation and defaults to 0.1 (will be added to salinity). Reurns ------ FCO2 : np.array Sea-air CO2 flux where positive is out of the ocean and negative is into the ocean. Units are gC.m-2.day-1 (grams Carbon per metre squared per day) """ from numpy import array from xarray import DataArray warnings.warn("This function has not been tested yet") if isinstance(pCO2_bulk_uatm, DataArray): var = pCO2_bulk_uatm.copy() # attribute preservation else: var = None press_atm = array(press_hPa) / 1013.25 SSTfnd_C = array(temp_bulk_C) SSTskn_C = SSTfnd_C - cool_skin_bias # from default FluxEngine config SSTfnd_K = SSTfnd_C + 273.15 SSTskn_K = SSTskn_C + 273.15 SSTdelta = SSTfnd_C - SSTskn_C SSSfnd = array(salt_bulk) SSSskn = SSSfnd + salty_skin_bias # from default FluxEngine config pCO2sea = array(pCO2_bulk_uatm) * 1e-6 # to atm pCO2air = array(pCO2_air_uatm) * 1e-6 # checking units press_atm = check.pres_atm(press_atm) SSTfnd_K = check.temp_K(SSTfnd_K) SSSfnd = check.salt(SSSfnd) pCO2sea = check.CO2_mol(pCO2sea) pCO2air = check.CO2_mol(pCO2air) wind_ms = check.wind_ms(wind_ms) fCO2sea = pCO2sea * f2p.virial_coeff(SSTfnd_K, press_atm) fCO2air = pCO2air * f2p.virial_coeff(SSTskn_K, press_atm) # units in mol . L-1 . atm-1 K0fnd = sol.solubility_woolf2016(SSSfnd, SSTfnd_K, SSTdelta, press_atm) K0skn = sol.solubility_woolf2016(SSSskn, SSTskn_K, SSTdelta, press_atm) # molar mass of carbon (gC/mol * kg/g) mC = 12.0108 * 1000 # kg . mol-1 # CONC : UNIT ANALYSIS # solubility * pCO2 * molar mass # conc = (mol . L-1 . atm-1) * (atm) * (kg . mol-1) # conc = mol. mol-1 . L-1 . atm . atm-1 * kg # conc = kg . L-1 ||| gC . m-3 # Bulk uses skin, equilibrium and rapid use foundation for concSEA concSEA = K0fnd * fCO2sea * mC concAIR = K0skn * fCO2air * mC # KW : UNIT ANALYSIS # kw = (cm / 100) / (hr / 24) # kw = m . day-1 kw = kw_func(wind_ms, SSTskn_C) * (24 / 100) # FLUX : UNIT ANALYSIS # flux = (m . day-1) * (g . m-3) # flux = gC . m . m-3 . day-1 # flux = gC . m-2 . day-1 CO2flux_woolfe = kw * (concSEA - concAIR) if isinstance(var, DataArray): kw_name = kw_func.__name__[2:] attributes = dict( units="gC / m2 / day", description=f"sea-air CO2 fluxes calculated with {kw_name}", long_name="sea-air CO2 fluxes", ) CO2flux_woolfe = DataArray(data=CO2flux_woolfe, coords=var.coords, attrs=attributes) return CO2flux_woolfe
def _bag2DataArray(bg, chunks, **kwargs): return DataArray(darr.from_array(np.array(wrk.compute()), chunks=chunks) **kwargs)
def make_fake_scene(content_dict, daskify=False, area=True, common_attrs=None): """Create a fake Scene. Create a fake Scene object from fake data. Data are provided in the ``content_dict`` argument. In ``content_dict``, keys should be strings or DataID, and values may be either numpy.ndarray or xarray.DataArray, in either case with exactly two dimensions. The function will convert each of the numpy.ndarray objects into an xarray.DataArray and assign those as datasets to a Scene object. A fake AreaDefinition will be assigned for each array, unless disabled by passing ``area=False``. When areas are automatically generated, arrays with the same shape will get the same area. This function is exclusively intended for testing purposes. If regular ndarrays are passed and the keyword argument daskify is True, DataArrays will be created as dask arrays. If False (default), regular DataArrays will be created. When the user passes xarray.DataArray objects then this flag has no effect. Args: content_dict (Mapping): Mapping where keys correspond to objects accepted by ``Scene.__setitem__``, i.e. strings or DataID, and values may be either ``numpy.ndarray`` or ``xarray.DataArray``. daskify (bool): optional, to use dask when converting ``numpy.ndarray`` to ``xarray.DataArray``. No effect when the values in ``content_dict`` are already ``xarray.DataArray``. area (bool or BaseDefinition): Can be ``True``, ``False``, or an instance of ``pyresample.geometry.BaseDefinition`` such as ``AreaDefinition`` or ``SwathDefinition``. If ``True``, which is the default, automatically generate areas. If ``False``, values will not have assigned areas. If an instance of ``pyresample.geometry.BaseDefinition``, those instances will be used for all generated fake datasets. Warning: Passing an area as a string (``area="germ"``) is not supported. common_attrs (Mapping): optional, additional attributes that will be added to every dataset in the scene. Returns: Scene object with datasets corresponding to content_dict. """ if common_attrs is None: common_attrs = {} sc = Scene() for (did, arr) in content_dict.items(): extra_attrs = common_attrs.copy() if isinstance(area, BaseDefinition): extra_attrs["area"] = area elif area: extra_attrs["area"] = create_area_def("test-area", { "proj": "eqc", "lat_ts": 0, "lat_0": 0, "lon_0": 0, "x_0": 0, "y_0": 0, "ellps": "sphere", "units": "m", "no_defs": None, "type": "crs" }, units="m", shape=arr.shape, resolution=1000, center=(0, 0)) if isinstance(arr, DataArray): sc[did] = arr.copy() # don't change attributes of input sc[did].attrs.update(extra_attrs) else: if daskify: arr = da.from_array(arr) sc[did] = DataArray(arr, dims=("y", "x"), attrs=extra_attrs) return sc
def ingest_nxTOMO(paths): assert len(paths) == 1 path = paths[0] h5 = h5py.File(path, 'r') data = h5['irmap']['DATA']['data'] energy = h5['irmap']['DATA']['energy'][()] sample_x = h5['irmap']['DATA']['sample_x'][()] sample_y = h5['irmap']['DATA']['sample_y'][()] xarray = DataArray(data, dims=['E (eV)', 'y (μm)', 'x (μm)'], coords=[energy, sample_y, sample_x]) dask_data = da.from_array(xarray) projections = [('nxSTXM', { 'irmap/DATA/data': ('primary', 'raw'), 'irmap/DATA/energy': energy, 'irmap/DATA/sample_x': sample_x, 'irmap/DATA/sample_y': sample_y })] # Compose run start run_bundle = event_model.compose_run( ) # type: event_model.ComposeRunBundle start_doc = run_bundle.start_doc start_doc["sample_name"] = Path(paths[0]).resolve().stem start_doc["projections"] = projections yield 'start', start_doc # Compose descriptor source = 'nxSTXM' frame_data_keys = { 'raw': { 'source': source, 'dtype': 'number', 'dims': xarray.dims, # 'coords': [energy, sample_y, sample_x], 'shape': data.shape } } frame_stream_name = 'primary' frame_stream_bundle = run_bundle.compose_descriptor( data_keys=frame_data_keys, name=frame_stream_name, # configuration=_metadata(path) ) yield 'descriptor', frame_stream_bundle.descriptor_doc # NOTE: Resource document may be meaningful in the future. For transient access it is not useful # # Compose resource # resource = run_bundle.compose_resource(root=Path(path).root, resource_path=path, spec='NCEM_DM', resource_kwargs={}) # yield 'resource', resource.resource_doc # Compose datum_page # z_indices, t_indices = zip(*itertools.product(z_indices, t_indices)) # datum_page_doc = resource.compose_datum_page(datum_kwargs={'index_z': list(z_indices), 'index_t': list(t_indices)}) # datum_ids = datum_page_doc['datum_id'] # yield 'datum_page', datum_page_doc yield 'event', frame_stream_bundle.compose_event( data={'raw': dask_data}, timestamps={'raw': time.time()}) yield 'stop', run_bundle.compose_stop()
class TestFacetGrid(PlotTestCase): def setUp(self): d = easy_array((10, 15, 3)) self.darray = DataArray(d, dims=['y', 'x', 'z'], coords={'z': ['a', 'b', 'c']}) self.g = xplt.FacetGrid(self.darray, col='z') def test_no_args(self): self.g.map_dataarray(xplt.contourf, 'x', 'y') # Don't want colorbar labeled with 'None' alltxt = text_in_fig() self.assertNotIn('None', alltxt) for ax in self.g.axes.flat: self.assertTrue(ax.has_data()) # default font size should be small fontsize = ax.title.get_size() self.assertLessEqual(fontsize, 12) def test_names_appear_somewhere(self): self.darray.name = 'testvar' self.g.map_dataarray(xplt.contourf, 'x', 'y') for k, ax in zip('abc', self.g.axes.flat): self.assertEqual('z = {0}'.format(k), ax.get_title()) alltxt = text_in_fig() self.assertIn(self.darray.name, alltxt) for label in ['x', 'y']: self.assertIn(label, alltxt) def test_text_not_super_long(self): self.darray.coords['z'] = [100 * letter for letter in 'abc'] g = xplt.FacetGrid(self.darray, col='z') g.map_dataarray(xplt.contour, 'x', 'y') alltxt = text_in_fig() maxlen = max(len(txt) for txt in alltxt) self.assertLess(maxlen, 50) t0 = g.axes[0, 0].get_title() self.assertTrue(t0.endswith('...')) def test_colorbar(self): vmin = self.darray.values.min() vmax = self.darray.values.max() expected = np.array((vmin, vmax)) self.g.map_dataarray(xplt.imshow, 'x', 'y') for image in plt.gcf().findobj(mpl.image.AxesImage): clim = np.array(image.get_clim()) self.assertTrue(np.allclose(expected, clim)) self.assertEqual(1, len(find_possible_colorbars())) def test_empty_cell(self): g = xplt.FacetGrid(self.darray, col='z', col_wrap=2) g.map_dataarray(xplt.imshow, 'x', 'y') bottomright = g.axes[-1, -1] self.assertFalse(bottomright.has_data()) self.assertFalse(bottomright.get_visible()) def test_norow_nocol_error(self): with self.assertRaisesRegexp(ValueError, r'[Rr]ow'): xplt.FacetGrid(self.darray) def test_groups(self): self.g.map_dataarray(xplt.imshow, 'x', 'y') upperleft_dict = self.g.name_dicts[0, 0] upperleft_array = self.darray.loc[upperleft_dict] z0 = self.darray.isel(z=0) self.assertDataArrayEqual(upperleft_array, z0) def test_float_index(self): self.darray.coords['z'] = [0.1, 0.2, 0.4] g = xplt.FacetGrid(self.darray, col='z') g.map_dataarray(xplt.imshow, 'x', 'y') def test_nonunique_index_error(self): self.darray.coords['z'] = [0.1, 0.2, 0.2] with self.assertRaisesRegexp(ValueError, r'[Uu]nique'): xplt.FacetGrid(self.darray, col='z') def test_robust(self): z = np.zeros((20, 20, 2)) darray = DataArray(z, dims=['y', 'x', 'z']) darray[:, :, 1] = 1 darray[2, 0, 0] = -1000 darray[3, 0, 0] = 1000 g = xplt.FacetGrid(darray, col='z') g.map_dataarray(xplt.imshow, 'x', 'y', robust=True) # Color limits should be 0, 1 # The largest number displayed in the figure should be less than 21 numbers = set() alltxt = text_in_fig() for txt in alltxt: try: numbers.add(float(txt)) except ValueError: pass largest = max(abs(x) for x in numbers) self.assertLess(largest, 21) def test_can_set_vmin_vmax(self): vmin, vmax = 50.0, 1000.0 expected = np.array((vmin, vmax)) self.g.map_dataarray(xplt.imshow, 'x', 'y', vmin=vmin, vmax=vmax) for image in plt.gcf().findobj(mpl.image.AxesImage): clim = np.array(image.get_clim()) self.assertTrue(np.allclose(expected, clim)) def test_can_set_norm(self): norm = mpl.colors.SymLogNorm(0.1) self.g.map_dataarray(xplt.imshow, 'x', 'y', norm=norm) for image in plt.gcf().findobj(mpl.image.AxesImage): self.assertIs(image.norm, norm) def test_figure_size(self): self.assertArrayEqual(self.g.fig.get_size_inches(), (10, 3)) g = xplt.FacetGrid(self.darray, col='z', size=6) self.assertArrayEqual(g.fig.get_size_inches(), (19, 6)) g = self.darray.plot.imshow(col='z', size=6) self.assertArrayEqual(g.fig.get_size_inches(), (19, 6)) g = xplt.FacetGrid(self.darray, col='z', size=4, aspect=0.5) self.assertArrayEqual(g.fig.get_size_inches(), (7, 4)) g = xplt.FacetGrid(self.darray, col='z', figsize=(9, 4)) self.assertArrayEqual(g.fig.get_size_inches(), (9, 4)) with self.assertRaisesRegexp(ValueError, "cannot provide both"): g = xplt.plot(self.darray, row=2, col='z', figsize=(6, 4), size=6) with self.assertRaisesRegexp(ValueError, "Can't use"): g = xplt.plot(self.darray, row=2, col='z', ax=plt.gca(), size=6) def test_num_ticks(self): nticks = 99 maxticks = nticks + 1 self.g.map_dataarray(xplt.imshow, 'x', 'y') self.g.set_ticks(max_xticks=nticks, max_yticks=nticks) for ax in self.g.axes.flat: xticks = len(ax.get_xticks()) yticks = len(ax.get_yticks()) self.assertLessEqual(xticks, maxticks) self.assertLessEqual(yticks, maxticks) self.assertGreaterEqual(xticks, nticks / 2.0) self.assertGreaterEqual(yticks, nticks / 2.0) def test_map(self): self.g.map(plt.contourf, 'x', 'y', Ellipsis) self.g.map(lambda: None) def test_map_dataset(self): g = xplt.FacetGrid(self.darray.to_dataset(name='foo'), col='z') g.map(plt.contourf, 'x', 'y', 'foo') alltxt = text_in_fig() for label in ['x', 'y']: self.assertIn(label, alltxt) # everything has a label self.assertNotIn('None', alltxt) # colorbar can't be inferred automatically self.assertNotIn('foo', alltxt) self.assertEqual(0, len(find_possible_colorbars())) g.add_colorbar(label='colors!') self.assertIn('colors!', text_in_fig()) self.assertEqual(1, len(find_possible_colorbars())) def test_set_axis_labels(self): g = self.g.map_dataarray(xplt.contourf, 'x', 'y') g.set_axis_labels('longitude', 'latitude') alltxt = text_in_fig() for label in ['longitude', 'latitude']: self.assertIn(label, alltxt) def test_facetgrid_colorbar(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z'], name='foo') d.plot.imshow(x='x', y='y', col='z') self.assertEqual(1, len(find_possible_colorbars())) d.plot.imshow(x='x', y='y', col='z', add_colorbar=True) self.assertEqual(1, len(find_possible_colorbars())) d.plot.imshow(x='x', y='y', col='z', add_colorbar=False) self.assertEqual(0, len(find_possible_colorbars())) def test_facetgrid_polar(self): # test if polar projection in FacetGrid does not raise an exception self.darray.plot.pcolormesh(col='z', subplot_kws=dict(projection='polar'), sharex=False, sharey=False)
def escore( tgt: xr.DataArray, sim: xr.DataArray, dims: Sequence[str] = ("variables", "time"), N: int = 0, # noqa scale: bool = False, ) -> xr.DataArray: r"""Energy score, or energy dissimilarity metric, based on [SzekelyRizzo]_ and [Cannon18]_. Parameters ---------- tgt: xr.DataArray Target observations. sim: xr.DataArray Candidate observations. Must have the same dimensions as `tgt`. dims: sequence of 2 strings The name of the dimensions along which the variables and observation points are listed. `tgt` and `sim` can have different length along the second one, but must be equal along the first one. The result will keep all other dimensions. N : int If larger than 0, the number of observations to use in the score computation. The points are taken evenly distributed along `obs_dim`. scale: bool Whether to scale the data before computing the score. If True, both arrays as scaled according to the mean and standard deviation of `tgt` along `obs_dim`. (std computed with `ddof=1` and both statistics excluding NaN values). Returns ------- xr.DataArray e-score with dimensions not in `dims`. Notes ----- Explanation adapted from the "energy" R package documentation. The e-distance between two clusters :math:`C_i`, :math:`C_j` (tgt and sim) of size :math:`n_i,n_j` proposed by Székely and Rizzo (2004) is defined by: .. math:: e(C_i,C_j) = \frac{1}{2}\frac{n_i n_j}{n_i + n_j} \left[2 M_{ij} − M_{ii} − M_{jj}\right] where .. math:: M_{ij} = \frac{1}{n_i n_j} \sum_{p = 1}^{n_i} \sum_{q = 1}^{n_j} \left\Vert X_{ip} − X{jq} \right\Vert. :math:`\Vert\cdot\Vert` denotes Euclidean norm, :math:`X_{ip}` denotes the p-th observation in the i-th cluster. The input scaling and the factor :math:`\frac{1}{2}` in the first equation are additions of [Cannon18]_ to the metric. With that factor, the test becomes identical to the one defined by [BaringhausFranz]_. This version is tested against values taken from Alex Cannon's MBC R package. References ---------- .. [BaringhausFranz] Baringhaus, L. and Franz, C. (2004) On a new multivariate two-sample test, Journal of Multivariate Analysis, 88(1), 190–206. https://doi.org/10.1016/s0047-259x(03)00079-4 .. [Cannon18] Cannon, A. J. (2018). Multivariate quantile mapping bias correction: An N-dimensional probability density function transform for climate model simulations of multiple variables. Climate Dynamics, 50(1), 31–49. https://doi.org/10.1007/s00382-017-3580-6 .. [SzekelyRizzo] Székely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, InterStat, November (5) """ pts_dim, obs_dim = dims if N > 0: # If N non-zero we only take around N points, evenly distributed sim_step = int(np.ceil(sim[obs_dim].size / N)) sim = sim.isel({obs_dim: slice(None, None, sim_step)}) tgt_step = int(np.ceil(tgt[obs_dim].size / N)) tgt = tgt.isel({obs_dim: slice(None, None, tgt_step)}) if scale: tgt, avg, std = standardize(tgt) sim, _, _ = standardize(sim, avg, std) # The dimension renaming is to allow different coordinates. # Otherwise, apply_ufunc tries to align both obs_dim together. new_dim = get_temp_dimname(tgt.dims, obs_dim) sim = sim.rename({obs_dim: new_dim}) out = xr.apply_ufunc( _escore, tgt, sim, input_core_dims=[[pts_dim, obs_dim], [pts_dim, new_dim]], output_dtypes=[sim.dtype], dask="parallelized", ) out.name = "escores" out.attrs.update( long_name="Energy dissimilarity metric", description=f"Escores computed from {N or 'all'} points.", references="Székely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, InterStat, November (5)", ) return out
def test_contains_cftime_datetimes_1d(data): assert contains_cftime_datetimes(data.time) @pytest.mark.skipif(not has_dask, reason='dask not installed') @pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_contains_cftime_datetimes_dask_1d(data): assert contains_cftime_datetimes(data.time.chunk()) @pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_contains_cftime_datetimes_3d(times_3d): assert contains_cftime_datetimes(times_3d) @pytest.mark.skipif(not has_dask, reason='dask not installed') @pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_contains_cftime_datetimes_dask_3d(times_3d): assert contains_cftime_datetimes(times_3d.chunk()) @pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data) @pytest.mark.skipif(not has_dask, reason='dask not installed') @pytest.mark.parametrize('non_cftime_data', [DataArray([]), DataArray([1, 2])]) def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data): assert not contains_cftime_datetimes(non_cftime_data.chunk())
def jitter( x: xr.DataArray, lower: str = None, upper: str = None, minimum: str = None, maximum: str = None, ) -> xr.DataArray: """Replaces values under a threshold and values above another by a uniform random noise. Do not confuse with R's jitter, which adds uniform noise instead of replacing values. Parameters ---------- x : xr.DataArray Values. lower : str Threshold under which to add uniform random noise to values, a quantity with units. If None, no jittering is performed on the lower end. upper : str Threshold over which to add uniform random noise to values, a quantity with units. If None, no jittering is performed on the upper end. minimum : str Lower limit (excluded) for the lower end random noise, a quantity with units. If None but `lower` is not None, 0 is used. maximum : str Upper limit (excluded) for the upper end random noise, a quantity with units. If `upper` is not None, it must be given. Returns ------- xr.DataArray Same as `x` but values < lower are replaced by a uniform noise in range (minimum, lower) and values >= upper are replaced by a uniform noise in range [upper, maximum). The two noise distributions are independent. """ out = x notnull = x.notnull() if lower is not None: lower = convert_units_to(lower, x) minimum = convert_units_to(minimum, x) if minimum is not None else 0 minimum = minimum + np.finfo(x.dtype).eps if uses_dask(x): jitter = dsk.random.uniform( low=minimum, high=lower, size=x.shape, chunks=x.chunks ) else: jitter = np.random.uniform(low=minimum, high=lower, size=x.shape) out = out.where(~((x < lower) & notnull), jitter.astype(x.dtype)) if upper is not None: if maximum is None: raise ValueError("If 'upper' is given, so must 'maximum'.") upper = convert_units_to(upper, x) maximum = convert_units_to(maximum, x) if uses_dask(x): jitter = dsk.random.uniform( low=upper, high=maximum, size=x.shape, chunks=x.chunks ) else: jitter = np.random.uniform(low=upper, high=maximum, size=x.shape) out = out.where(~((x >= upper) & notnull), jitter.astype(x.dtype)) copy_all_attrs(out, x) # copy attrs and same units return out
def get_test_content(self, filename, filename_info, filetype_info): """Mimic reader input file content""" file_content = { '/attr/Platform_Name': filename_info['platform_shortname'], '/attr/Element_Resolution': 2., '/attr/Line_Resolution': 2., '/attr/Subsatellite_Longitude': -70.2 if 'GOES' in filename_info['platform_shortname'] else 140.65, 'pixel_longitude': DEFAULT_LON_DATA, 'pixel_longitude/attr/scale_factor': 1., 'pixel_longitude/attr/add_offset': 0., 'pixel_longitude/shape': DEFAULT_FILE_SHAPE, 'pixel_longitude/attr/_FillValue': np.nan, 'pixel_latitude': DEFAULT_LAT_DATA, 'pixel_latitude/attr/scale_factor': 1., 'pixel_latitude/attr/add_offset': 0., 'pixel_latitude/shape': DEFAULT_FILE_SHAPE, 'pixel_latitude/attr/_FillValue': np.nan, } sensor = { 'HIMAWARI-8': 'himawari8', 'GOES-16': 'goes16', 'GOES-13': 'goes', 'GOES-14': 'goes', 'GOES-15': 'goes', }[filename_info['platform_shortname']] file_content['/attr/Sensor_Name'] = sensor if filename_info['platform_shortname'] == 'HIMAWARI-8': file_content['pixel_longitude'] = DEFAULT_LON_DATA + 130. file_content['variable1'] = DEFAULT_FILE_DATA.astype(np.float32) file_content['variable1/attr/_FillValue'] = -1 file_content['variable1/attr/scale_factor'] = 1. file_content['variable1/attr/add_offset'] = 0. file_content['variable1/attr/units'] = '1' file_content['variable1/shape'] = DEFAULT_FILE_SHAPE # data with fill values file_content['variable2'] = np.ma.masked_array( DEFAULT_FILE_DATA.astype(np.float32), mask=np.zeros_like(DEFAULT_FILE_DATA)) file_content['variable2'].mask[::5, ::5] = True file_content['variable2/attr/_FillValue'] = -1 file_content['variable2/attr/scale_factor'] = 1. file_content['variable2/attr/add_offset'] = 0. file_content['variable2/attr/units'] = '1' file_content['variable2/shape'] = DEFAULT_FILE_SHAPE # category file_content['variable3'] = DEFAULT_FILE_DATA.astype(np.byte) file_content['variable3/attr/_FillValue'] = -128 file_content[ 'variable3/attr/flag_meanings'] = "clear water supercooled mixed ice unknown" file_content['variable3/attr/flag_values'] = [0, 1, 2, 3, 4, 5] file_content['variable3/attr/units'] = '1' file_content['variable3/shape'] = DEFAULT_FILE_SHAPE # convert to xarrays from xarray import DataArray for key, val in file_content.items(): if isinstance(val, np.ndarray): attrs = {} for a in [ '_FillValue', 'flag_meanings', 'flag_values', 'units' ]: if key + '/attr/' + a in file_content: attrs[a] = file_content[key + '/attr/' + a] if val.ndim > 1: file_content[key] = DataArray(val, dims=('lines', 'elements'), attrs=attrs) else: file_content[key] = DataArray(val, attrs=attrs) return file_content
def setUp(self): d = easy_array((10, 15, 3)) self.darray = DataArray(d, dims=['y', 'x', 'z'], coords={'z': ['a', 'b', 'c']}) self.g = xplt.FacetGrid(self.darray, col='z')
class TestPlot(PlotTestCase): def setUp(self): self.darray = DataArray(easy_array((2, 3, 4))) def test1d(self): self.darray[:, 0, 0].plot() def test_2d_before_squeeze(self): a = DataArray(easy_array((1, 5))) a.plot() def test2d_uniform_calls_imshow(self): self.assertTrue(self.imshow_called(self.darray[:, :, 0].plot.imshow)) def test2d_nonuniform_calls_contourf(self): a = self.darray[:, :, 0] a.coords['dim_1'] = [2, 1, 89] self.assertTrue(self.contourf_called(a.plot.contourf)) def test3d(self): self.darray.plot() def test_can_pass_in_axis(self): self.pass_in_axis(self.darray.plot) def test__infer_interval_breaks(self): self.assertArrayEqual([-0.5, 0.5, 1.5], _infer_interval_breaks([0, 1])) self.assertArrayEqual([-0.5, 0.5, 5.0, 9.5, 10.5], _infer_interval_breaks([0, 1, 9, 10])) self.assertArrayEqual( pd.date_range('20000101', periods=4) - np.timedelta64(12, 'h'), _infer_interval_breaks(pd.date_range('20000101', periods=3))) # make a bounded 2D array that we will center and re-infer xref, yref = np.meshgrid(np.arange(6), np.arange(5)) cx = (xref[1:, 1:] + xref[:-1, :-1]) / 2 cy = (yref[1:, 1:] + yref[:-1, :-1]) / 2 x = _infer_interval_breaks(cx, axis=1) x = _infer_interval_breaks(x, axis=0) y = _infer_interval_breaks(cy, axis=1) y = _infer_interval_breaks(y, axis=0) np.testing.assert_allclose(xref, x) np.testing.assert_allclose(yref, y) def test_datetime_dimension(self): nrow = 3 ncol = 4 time = pd.date_range('2000-01-01', periods=nrow) a = DataArray(easy_array((nrow, ncol)), coords=[('time', time), ('y', range(ncol))]) a.plot() ax = plt.gca() self.assertTrue(ax.has_data()) def test_convenient_facetgrid(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) d.coords['z'] = list('abcd') g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool') self.assertArrayEqual(g.axes.shape, [2, 2]) for ax in g.axes.flat: self.assertTrue(ax.has_data()) with self.assertRaisesRegexp(ValueError, '[Ff]acet'): d.plot(x='x', y='y', col='z', ax=plt.gca()) with self.assertRaisesRegexp(ValueError, '[Ff]acet'): d[0].plot(x='x', y='y', col='z', ax=plt.gca()) def test_subplot_kws(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=['y', 'x', 'z']) d.coords['z'] = list('abcd') g = d.plot(x='x', y='y', col='z', col_wrap=2, cmap='cool', subplot_kws=dict(axisbg='r')) for ax in g.axes.flat: try: # mpl V2 self.assertEqual(ax.get_facecolor()[0:3], mpl.colors.to_rgb('r')) except AttributeError: self.assertEqual(ax.get_axis_bgcolor(), 'r') def test_plot_size(self): self.darray[:, 0, 0].plot(figsize=(13, 5)) assert tuple(plt.gcf().get_size_inches()) == (13, 5) self.darray.plot(figsize=(13, 5)) assert tuple(plt.gcf().get_size_inches()) == (13, 5) self.darray.plot(size=5) assert plt.gcf().get_size_inches()[1] == 5 self.darray.plot(size=5, aspect=2) assert tuple(plt.gcf().get_size_inches()) == (10, 5) with self.assertRaisesRegexp(ValueError, 'cannot provide both'): self.darray.plot(ax=plt.gca(), figsize=(3, 4)) with self.assertRaisesRegexp(ValueError, 'cannot provide both'): self.darray.plot(size=5, figsize=(3, 4)) with self.assertRaisesRegexp(ValueError, 'cannot provide both'): self.darray.plot(size=5, ax=plt.gca()) with self.assertRaisesRegexp(ValueError, 'cannot provide `aspect`'): self.darray.plot(aspect=1) def test_convenient_facetgrid_4d(self): a = easy_array((10, 15, 2, 3)) d = DataArray(a, dims=['y', 'x', 'columns', 'rows']) g = d.plot(x='x', y='y', col='columns', row='rows') self.assertArrayEqual(g.axes.shape, [3, 2]) for ax in g.axes.flat: self.assertTrue(ax.has_data()) with self.assertRaisesRegexp(ValueError, '[Ff]acet'): d.plot(x='x', y='y', col='columns', ax=plt.gca())
def from_series_or_scalar(se): try: return DataArray.from_series(se) except AttributeError: # scalar case return DataArray(se)
class TestDataArrayAndDataset(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): self.assertLazyAnd(expected, actual, assert_identical) def assertLazyAndAllClose(self, expected, actual): self.assertLazyAnd(expected, actual, assert_allclose) def assertLazyAndEqual(self, expected, actual): self.assertLazyAnd(expected, actual, assert_equal) @pytest.fixture(autouse=True) def setUp(self): self.values = np.random.randn(4, 6) self.data = da.from_array(self.values, chunks=(2, 2)) self.eager_array = DataArray(self.values, coords={"x": range(4)}, dims=("x", "y"), name="foo") self.lazy_array = DataArray(self.data, coords={"x": range(4)}, dims=("x", "y"), name="foo") def test_rechunk(self): chunked = self.eager_array.chunk({"x": 2}).chunk({"y": 2}) assert chunked.chunks == ((2, ) * 2, (2, ) * 3) self.assertLazyAndIdentical(self.lazy_array, chunked) def test_new_chunk(self): chunked = self.eager_array.chunk() assert chunked.data.name.startswith("xarray-<this-array>") def test_lazy_dataset(self): lazy_ds = Dataset({"foo": (("x", "y"), self.data)}) assert isinstance(lazy_ds.foo.variable.data, da.Array) def test_lazy_array(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(u, v) self.assertLazyAndAllClose(-u, -v) self.assertLazyAndAllClose(u.T, v.T) self.assertLazyAndAllClose(u.mean(), v.mean()) self.assertLazyAndAllClose(1 + u, 1 + v) actual = xr.concat([v[:2], v[2:]], "x") self.assertLazyAndAllClose(u, actual) def test_compute(self): u = self.eager_array v = self.lazy_array assert dask.is_dask_collection(v) (v2, ) = dask.compute(v + 1) assert not dask.is_dask_collection(v2) assert ((u + 1).data == v2.data).all() def test_persist(self): u = self.eager_array v = self.lazy_array + 1 (v2, ) = dask.persist(v) assert v is not v2 assert len(v2.__dask_graph__()) < len(v.__dask_graph__()) assert v2.__dask_keys__() == v.__dask_keys__() assert dask.is_dask_collection(v) assert dask.is_dask_collection(v2) self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) def test_concat_loads_variables(self): # Test that concat() computes not-in-memory variables at most once # and loads them in the output, while leaving the input unaltered. d1 = build_dask_array("d1") c1 = build_dask_array("c1") d2 = build_dask_array("d2") c2 = build_dask_array("c2") d3 = build_dask_array("d3") c3 = build_dask_array("c3") # Note: c is a non-index coord. # Index coords are loaded by IndexVariable.__init__. ds1 = Dataset(data_vars={"d": ("x", d1)}, coords={"c": ("x", c1)}) ds2 = Dataset(data_vars={"d": ("x", d2)}, coords={"c": ("x", c2)}) ds3 = Dataset(data_vars={"d": ("x", d3)}, coords={"c": ("x", c3)}) assert kernel_call_count == 0 out = xr.concat([ds1, ds2, ds3], dim="n", data_vars="different", coords="different") # each kernel is computed exactly once assert kernel_call_count == 6 # variables are loaded in the output assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) out = xr.concat([ds1, ds2, ds3], dim="n", data_vars="all", coords="all") # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=["d"], coords=["c"]) # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[]) # variables are loaded once as we are validing that they're identical assert kernel_call_count == 12 assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) out = xr.concat( [ds1, ds2, ds3], dim="n", data_vars="different", coords="different", compat="identical", ) # compat=identical doesn't do any more kernel calls than compat=equals assert kernel_call_count == 18 assert isinstance(out["d"].data, np.ndarray) assert isinstance(out["c"].data, np.ndarray) # When the test for different turns true halfway through, # stop computing variables as it would not have any benefit ds4 = Dataset(data_vars={"d": ("x", [2.0])}, coords={"c": ("x", [2.0])}) out = xr.concat([ds1, ds2, ds4, ds3], dim="n", data_vars="different", coords="different") # the variables of ds1 and ds2 were computed, but those of ds3 didn't assert kernel_call_count == 22 assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) # the data of ds1 and ds2 was loaded into numpy and then # concatenated to the data of ds3. Thus, only ds3 is computed now. out.compute() assert kernel_call_count == 24 # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 assert ds2["c"].data is c2 assert ds3["d"].data is d3 assert ds3["c"].data is c3 # now check that concat() is correctly using dask name equality to skip loads out = xr.concat([ds1, ds1, ds1], dim="n", data_vars="different", coords="different") assert kernel_call_count == 24 # variables are not loaded in the output assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat([ds1, ds1, ds1], dim="n", data_vars=[], coords=[], compat="identical") assert kernel_call_count == 24 # variables are not loaded in the output assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) out = xr.concat( [ds1, ds2.compute(), ds3], dim="n", data_vars="all", coords="different", compat="identical", ) # c1,c3 must be computed for comparison since c2 is numpy; # d2 is computed too assert kernel_call_count == 28 out = xr.concat( [ds1, ds2.compute(), ds3], dim="n", data_vars="all", coords="all", compat="identical", ) # no extra computes assert kernel_call_count == 30 # Finally, test that originals are unaltered assert ds1["d"].data is d1 assert ds1["c"].data is c1 assert ds2["d"].data is d2 assert ds2["c"].data is c2 assert ds3["d"].data is d3 assert ds3["c"].data is c3 def test_groupby(self): u = self.eager_array v = self.lazy_array expected = u.groupby("x").mean(...) actual = v.groupby("x").mean(...) self.assertLazyAndAllClose(expected, actual) def test_groupby_first(self): u = self.eager_array v = self.lazy_array for coords in [u.coords, v.coords]: coords["ab"] = ("x", ["a", "a", "b", "b"]) with raises_regex(NotImplementedError, "dask"): v.groupby("ab").first() expected = u.groupby("ab").first() actual = v.groupby("ab").first(skipna=False) self.assertLazyAndAllClose(expected, actual) def test_reindex(self): u = self.eager_array.assign_coords(y=range(6)) v = self.lazy_array.assign_coords(y=range(6)) for kwargs in [ { "x": [2, 3, 4] }, { "x": [1, 100, 2, 101, 3] }, { "x": [2.5, 3, 3.5], "y": [2, 2.5, 3] }, ]: expected = u.reindex(**kwargs) actual = v.reindex(**kwargs) self.assertLazyAndAllClose(expected, actual) def test_to_dataset_roundtrip(self): u = self.eager_array v = self.lazy_array expected = u.assign_coords(x=u["x"]) self.assertLazyAndEqual(expected, v.to_dataset("x").to_array("x")) def test_merge(self): def duplicate_and_merge(array): return xr.merge([array, array.rename("bar")]).to_array() expected = duplicate_and_merge(self.eager_array) actual = duplicate_and_merge(self.lazy_array) self.assertLazyAndEqual(expected, actual) @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") def test_ufuncs(self): u = self.eager_array v = self.lazy_array self.assertLazyAndAllClose(np.sin(u), xu.sin(v)) def test_where_dispatching(self): a = np.arange(10) b = a > 3 x = da.from_array(a, 5) y = da.from_array(b, 5) expected = DataArray(a).where(b) self.assertLazyAndEqual(expected, DataArray(a).where(y)) self.assertLazyAndEqual(expected, DataArray(x).where(b)) self.assertLazyAndEqual(expected, DataArray(x).where(y)) def test_simultaneous_compute(self): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk() count = [0] def counting_get(*args, **kwargs): count[0] += 1 return dask.get(*args, **kwargs) ds.load(scheduler=counting_get) assert count[0] == 1 def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) arr = DataArray(data, dims=("w", "x", "y")) stacked = arr.stack(z=("x", "y")) z = pd.MultiIndex.from_product( [np.arange(3), np.arange(4)], names=["x", "y"]) expected = DataArray(data.reshape(2, -1), {"z": z}, dims=["w", "z"]) assert stacked.data.chunks == expected.data.chunks self.assertLazyAndEqual(expected, stacked) def test_dot(self): eager = self.eager_array.dot(self.eager_array[0]) lazy = self.lazy_array.dot(self.lazy_array[0]) self.assertLazyAndAllClose(eager, lazy) @pytest.mark.skipif(LooseVersion(dask.__version__) >= "2.0", reason="no meta") def test_dataarray_repr_legacy(self): data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) expected = dedent("""\ <xarray.DataArray 'data' (x: 1)> {!r} Coordinates: y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray> Dimensions without coordinates: x""".format(data)) assert expected == repr(a) assert kernel_call_count == 0 # should not evaluate dask array @pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta") def test_dataarray_repr(self): data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) expected = dedent("""\ <xarray.DataArray 'data' (x: 1)> {!r} Coordinates: y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray> Dimensions without coordinates: x""".format(data)) assert expected == repr(a) assert kernel_call_count == 0 # should not evaluate dask array @pytest.mark.skipif(LooseVersion(dask.__version__) < "2.0", reason="needs meta") def test_dataset_repr(self): data = build_dask_array("data") nonindex_coord = build_dask_array("coord") ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) expected = dedent("""\ <xarray.Dataset> Dimensions: (x: 1) Coordinates: y (x) int64 dask.array<chunksize=(1,), meta=np.ndarray> Dimensions without coordinates: x Data variables: a (x) int64 dask.array<chunksize=(1,), meta=np.ndarray>""" ) assert expected == repr(ds) assert kernel_call_count == 0 # should not evaluate dask array def test_dataarray_pickle(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variable nor the non-index coords data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a1 = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) a1.compute() assert not a1._in_memory assert not a1.coords["y"]._in_memory assert kernel_call_count == 2 a2 = pickle.loads(pickle.dumps(a1)) assert kernel_call_count == 2 assert_identical(a1, a2) assert not a1._in_memory assert not a2._in_memory assert not a1.coords["y"]._in_memory assert not a2.coords["y"]._in_memory def test_dataset_pickle(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array("data") nonindex_coord = build_dask_array("coord") ds1 = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) ds1.compute() assert not ds1["a"]._in_memory assert not ds1["y"]._in_memory assert kernel_call_count == 2 ds2 = pickle.loads(pickle.dumps(ds1)) assert kernel_call_count == 2 assert_identical(ds1, ds2) assert not ds1["a"]._in_memory assert not ds2["a"]._in_memory assert not ds1["y"]._in_memory assert not ds2["y"]._in_memory def test_dataarray_getattr(self): # ipython/jupyter does a long list of getattr() calls to when trying to # represent an object. # Make sure we're not accidentally computing dask variables. data = build_dask_array("data") nonindex_coord = build_dask_array("coord") a = DataArray(data, dims=["x"], coords={"y": ("x", nonindex_coord)}) with suppress(AttributeError): getattr(a, "NOTEXIST") assert kernel_call_count == 0 def test_dataset_getattr(self): # Test that pickling/unpickling converts the dask backend # to numpy in neither the data variables nor the non-index coords data = build_dask_array("data") nonindex_coord = build_dask_array("coord") ds = Dataset(data_vars={"a": ("x", data)}, coords={"y": ("x", nonindex_coord)}) with suppress(AttributeError): getattr(ds, "NOTEXIST") assert kernel_call_count == 0 def test_values(self): # Test that invoking the values property does not convert the dask # backend to numpy a = DataArray([1, 2]).chunk() assert not a._in_memory assert a.values.tolist() == [1, 2] assert not a._in_memory def test_from_dask_variable(self): # Test array creation from Variable with dask backend. # This is used e.g. in broadcast() a = DataArray(self.lazy_array.variable, coords={"x": range(4)}, name="foo") self.assertLazyAndIdentical(self.lazy_array, a)
def ensembles2dataset(ensdict, dsattrs={}, verbose=False, print_every=1000): """ Convert a dictionary of ensembles into an xarray Dataset object. """ mms2ms = 1e-3 fbadens = np.array([not isinstance(ens, dict) for ens in ensdict]) nt = len(ensdict) - np.sum(fbadens) n=0 ensdict0 = np.nan while not isinstance(ensdict0, dict): ensdict0 = ensdict[n] n+=1 nz = ensdict0['fixed_leader_janus']['number_of_cells'] sk = np.ma.zeros((nz, nt))*np.nan # Beam vels stored in mm/s # as int64 to save memory. b1, b2, b3, b4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() sk0 = np.ma.zeros(nt)*np.nan cor1, cor2, cor3, cor4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() int1, int2, int3, int4 = sk.copy(), sk.copy(), sk.copy(), sk.copy() b5, cor5, int5 = sk.copy(), sk.copy(), sk.copy() heading, pitch, roll = sk0.copy(), sk0.copy(), sk0.copy() tjanus = [] ensdict = np.array(ensdict)[~fbadens] ensdict = ensdict.tolist() n=0 for ensarr in ensdict: tjanus.append(ensarr['timestamp']) heading[n] = ensarr['variable_leader_janus']['heading'] pitch[n] = ensarr['variable_leader_janus']['pitch'] roll[n] = ensarr['variable_leader_janus']['roll'] vjanus = ensarr['velocity_janus']['data'] b1[:, n] = vjanus[:, 0] b2[:, n] = vjanus[:, 1] b3[:, n] = vjanus[:, 2] b4[:, n] = vjanus[:, 3] b5[:, n] = ensarr['velocity_beam5']['data'].squeeze() corjanus = ensarr['correlation_janus']['data'] cor1[:, n] = corjanus[:, 0] cor2[:, n] = corjanus[:, 1] cor3[:, n] = corjanus[:, 2] cor4[:, n] = corjanus[:, 3] cor5[:, n] = ensarr['correlation_beam5']['data'].squeeze() intjanus = ensarr['echo_intensity_janus']['data'] int1[:, n] = intjanus[:, 0] int2[:, n] = intjanus[:, 1] int3[:, n] = intjanus[:, 2] int4[:, n] = intjanus[:, 3] int5[:, n] = ensarr['echo_intensity_beam5']['data'].squeeze() n+=1 if verbose and not n%print_every: print(n) fixj = ensdict0['fixed_leader_janus'] fix5 = ensdict0['fixed_leader_beam5'] # Add ping offset to get beam 5's timestamps. dt5 = fix5['ping_offset_time'] # In milliseconds. dt5 = np.array(Timedelta(dt5, unit='ms')) t5 = tjanus + dt5 th = fixj['beam_angle'] assert th==25 # Always 25 degrees. th = th*np.pi/180. Cth = np.cos(th) # Construct along-beam/vertical axes. cm2m = 1e-2 r1janus = fixj['bin_1_distance']*cm2m r1b5 = fix5['bin_1_distance']*cm2m ncj = fixj['number_of_cells'] nc5 = fix5['number_of_cells'] lcj = fixj['depth_cell_length']*cm2m lc5 = fix5['depth_cell_length']*cm2m Lj = ncj*lcj # Distance from center of bin 1 to the center of last bin (Janus). L5 = nc5*lc5 # Distance from center of bin 1 to the center of last bin (beam 5). rb = r1janus + np.arange(0, Lj, lcj) # Distance from xducer head # (Janus). zab = Cth*rb # Vertical distance from xducer head # (Janus). zab5 = r1b5 + np.arange(0, L5, lc5) # Distance from xducer head, also # depth for the vertical beam. rb = IndexVariable('z', rb, attrs={'units':'meters', 'long_name':"along-beam distance from the xducer's face to the center of the bins, for beams 1-4 (Janus)"}) zab = IndexVariable('z', zab, attrs={'units':'meters', 'long_name':"vertical distance from the instrument's head to the center of the bins, for beams 1-4 (Janus)"}) zab5 = IndexVariable('z', zab5, attrs={'units':'meters', 'long_name':"vertical distance from xducer face to the center of the bins, for beam 5 (vertical)"}) time = IndexVariable('time', tjanus, attrs={'long_name':'timestamp for beams 1-4 (Janus)'}) time5 = IndexVariable('time', t5, attrs={'long_name':'timestamp for beam 5 (vertical)'}) coords0 = [('time', time)] coords = [('z', zab), ('time', time)] coords5 = [('z5', zab5), ('time5', time5)] dims = ['z', 'time'] dims0 = ['time'] # Convert velocities to m/s. b1, b2, b3, b4, b5 = b1*mms2ms, b2*mms2ms, b3*mms2ms, b4*mms2ms, b5*mms2ms # Scale heading, pitch and roll. Sentinel V manual, p. 259. phisc = 0.01 heading *= phisc pitch *= phisc roll *= phisc arrs = (b1, b2, b3, b4, b5, cor1, cor2, cor3, cor4, cor5, int1, int2, int3, int4, int5, heading, pitch, roll) # pressure, temperature, salinity, soundspeed) long_names = ('Beam 1 velocity', 'Beam 2 velocity', 'Beam 3 velocity', 'Beam 4 velocity', 'Beam 5 velocity', 'Beam 1 correlation', 'Beam 2 correlation', 'Beam 3 correlation', 'Beam 4 correlation', 'Beam 5 correlation', 'Beam 1 echo amplitude', 'Beam 2 echo amplitude', 'Beam 3 echo amplitude', 'Beam 4 echo amplitude', 'Beam 5 echo amplitude', 'heading', 'pitch', 'roll') units = ('m/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'm/s, positive toward xducer face', 'no units', 'no units', 'no units', 'no units', 'no units', 'dB', 'dB', 'dB', 'dB', 'dB', 'degrees', 'degrees', 'degrees') names = ('b1', 'b2', 'b3', 'b4', 'b5', 'cor1', 'cor2', 'cor3', 'cor4', 'cor5', 'int1', 'int2', 'int3', 'int4', 'int5', 'phi1', 'phi2', 'phi3') data_vars = {} for arr,name,long_name,unit in zip(arrs,names,long_names,units): if 'Beam5' in long_name: coordsn = coords5 dimsn = dims elif 'phi' in name: coordsn = coords0 dimsn = dims0 else: coordsn = coords dimsn = dims if 'int' in name: arr *= 0.45 # Scale factor for echo intensity, see Sentinel V manual # Sentinel V manual p. 264. da = DataArray(arr, coords=coordsn, dims=dimsn, attrs=dict(units=unit, long_name=long_name)) data_vars.update({name:da}) allcoords = {'rb':rb} # Along-beam distance for slanted beams. allcoords.update(coords) allcoords.update(coords5) ds = Dataset(data_vars=data_vars, coords=allcoords, attrs=dsattrs) return ds
def update_metrics( self, data_array: xr.DataArray, **kwargs ): metrics = data_array.attrs.get('metrics', {} ) metrics.update( **kwargs ) data_array.attrs['metrics'] = metrics
def spatial_interpolate_slice(self, persistent_classes: xr.DataArray, water_maps_slice: xr.DataArray, **kwargs ) -> xr.DataArray: dynamics_class = kwargs.get( "dynamics_class", 0 ) tval = water_maps_slice.coords[ water_maps_slice.dims[0] ].values[0] persistent_classes_slice = persistent_classes if persistent_classes.ndim == 2 else persistent_classes.sel( **{persistent_classes.dims[0]:tval}, method="nearest" ).drop_vars( persistent_classes.dims[0] ) dynamics_mask: xr.DataArray = persistent_classes_slice.isin( [dynamics_class] ) return water_maps_slice.where( dynamics_mask, persistent_classes_slice )
def test_lettered_tiles_update_existing(self): """Test updating lettered tiles with additional data.""" import shutil import xarray as xr from satpy.writers.awips_tiled import AWIPSTiledWriter from xarray import DataArray from pyresample.geometry import AreaDefinition from pyresample.utils import proj4_str_to_dict import dask first_base_dir = os.path.join(self.base_dir, 'first') w = AWIPSTiledWriter(base_dir=first_base_dir, compress=True) area_def = AreaDefinition( 'test', 'test', 'test', proj4_str_to_dict( '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. ' '+lat_0=25 +lat_1=25 +units=m +no_defs'), 1000, 2000, (-1000000., -1500000., 1000000., 1500000.), ) now = datetime(2018, 1, 1, 12, 0, 0) data = np.linspace(0., 1., 2000000, dtype=np.float32).reshape( (2000, 1000)) # pixels to be filled in later data[:, -200:] = np.nan ds = DataArray(da.from_array(data, chunks=500), attrs=dict(name='test_ds', platform_name='PLAT', sensor='SENSOR', units='1', area=area_def, start_time=now, end_time=now + timedelta(minutes=20))) # tile_count should be ignored since we specified lettered_grid w.save_datasets([ds], sector_id='LCC', source_name="TESTS", tile_count=(3, 3), lettered_grid=True) all_files = sorted(glob(os.path.join(first_base_dir, 'TESTS_AII*.nc'))) assert len(all_files) == 16 first_files = [] second_base_dir = os.path.join(self.base_dir, 'second') os.makedirs(second_base_dir) for fn in all_files: new_fn = fn.replace(first_base_dir, second_base_dir) shutil.copy(fn, new_fn) first_files.append(new_fn) # Second writing/updating # Area is about 100 pixels to the right area_def2 = AreaDefinition( 'test', 'test', 'test', proj4_str_to_dict( '+proj=lcc +datum=WGS84 +ellps=WGS84 +lon_0=-95. ' '+lat_0=25 +lat_1=25 +units=m +no_defs'), 1000, 2000, (-800000., -1500000., 1200000., 1500000.), ) data2 = np.linspace(0., 1., 2000000, dtype=np.float32).reshape( (2000, 1000)) # a gap at the beginning where old values remain data2[:, :200] = np.nan # a gap at the end where old values remain data2[:, -400:-300] = np.nan ds2 = DataArray(da.from_array(data2, chunks=500), attrs=dict(name='test_ds', platform_name='PLAT', sensor='SENSOR', units='1', area=area_def2, start_time=now, end_time=now + timedelta(minutes=20))) w = AWIPSTiledWriter(base_dir=second_base_dir, compress=True) # HACK: The _copy_to_existing function hangs when opening the output # file multiple times...sometimes. If we limit dask to one worker # it seems to work fine. with dask.config.set(num_workers=1): w.save_datasets([ds2], sector_id='LCC', source_name="TESTS", tile_count=(3, 3), lettered_grid=True) all_files = glob(os.path.join(second_base_dir, 'TESTS_AII*.nc')) # 16 original tiles + 4 new tiles assert len(all_files) == 20 # these tiles should be the right-most edge of the first image first_right_edge_files = [ x for x in first_files if 'P02' in x or 'P04' in x or 'V02' in x or 'V04' in x ] for new_file in first_right_edge_files: orig_file = new_file.replace(second_base_dir, first_base_dir) orig_nc = xr.open_dataset(orig_file) orig_data = orig_nc['data'].values if not np.isnan(orig_data).any(): # we only care about the tiles that had NaNs originally continue new_nc = xr.open_dataset(new_file) new_data = new_nc['data'].values # there should be at least some areas of the file # that old data was present and hasn't been replaced np.testing.assert_allclose(orig_data[:, :20], new_data[:, :20]) # it isn't exactly 200 because the tiles aren't aligned with the # data (the left-most tile doesn't have data until some columns # in), but it should be at least that many columns assert np.isnan(orig_data[:, 200:]).all() assert not np.isnan(new_data[:, 200:]).all()
def estimate_motion(varr: xr.DataArray, dim="frame", npart=3, chunk_nfm: Optional[int] = None, **kwargs) -> xr.DataArray: """ Estimate motion for each frame of the input movie data. This function estimates motion using a recursive approach. The movie is splitted into chunks of `npart` frames and motion estimation is carried out within each chunk relative to the middle frame, then a template is generated for each chunk by aggregating the motion-corrected frames within each chunk. Next, every `npart` chunks are grouped together, and motion estimation is carried out within each group relative to the middle chunk using the aggregated templates. The chunk-level motions are added on top of the previous within-chunk level motions. This step is then repeated recursively until we are left with a single chunk representing the full movie, at which point the motion estimation is finished. The motion estimation itself is carried out with fft-based phase correlation by default. Alternatively, non-rigid motion correction can be carried out by modelling the motion of each frame as translations of individual vertices of a smooth BSpline mesh. The estimation of the translations can then be find by gradient descent using correlation between frames as objective. This feature is currently experimental. Additionally, various correction procedures can be carry out to filter out frames not suited as template for motion correction, or to correct for large false shifts when the quality of templates are low. Parameters ---------- varr : xr.DataArray Input movie data. dim : str, optional The dimension along which motion estimation should be carried out. By default `"frame"`. npart : int, optional Number of frames/chunks to combine for the recursive algorithm. By default `3`. chunk_nfm : int, optional Number of frames in each parallel task. Note that this only affects dask graph construction, but not the recursion of the algorithm. If `None` then the dask chunksize along `dim` will be used. By default `None`. Keyword Arguments ----------------- alt_error : float, optional Error threshold between estimated shifts from two alternative methods, specified in pixels. If not `None`, then for each chunk during recursion, the first and last frame of that chunk will be returned in addition to the aggregated template. And when estimating motion between chunks, the estimation will be carried out twice: once using the aggregated templates, once using the consecutive first/last frames between chunks. The result of these two methods will then be compared. If their absolute difference is larger than `alt_error`, then the result with smaller absolute value (closer to zero shifts) will be used. This is useful to correct for cases where activities of cells are sparse and changing across chunks, leading to wrong features being matched in aggregated templates. If `None` then no additional checking will be performed. By default `5`. aggregation : str, optional How frames should be aggregated to generate the template for each chunk. Should be either "mean" or "max". By default `"mean"`. upsample : int, optional The upsample factor passed to :func:`skimage.registration.phase_cross_correlation` to achieve sub-pixel accuracy. circ_thres : float, optional The circularity threshold to check whether a frame can serve as a good template for estimating motion. If not `None`, then for each frame a comparison image is computed using :func:`cv2.matchTemplate` between the frame and zero-padded version (up to `max_sh`) using `cv2.TM_SQDIFF_NORMED`. The comparison image of a good template should only have `< 1` values around the center and the `< 1` region should be circular. Hence the circularity defined as `4 * np.pi * (area / (perimeter ** 2))` for the `< 1` region is computed, and any frame with circularity smaller than `circ_thres` is excluded from propagation of shifts and aggregation of templates. By default `None`. max_sh : int, optional Amount of zero padding when checking for the quality of frames, specified in pixels. Only used if `circ_thres is not None`. See `circ_thres` for more detail. By default `100`. mesh_size : Tuple[int, int], optional Number of control points for the BSpline mesh in each dimension, specified in the order ("height", "width"). If not `None` then the experimental non-rigid motion estimation is enabled. By default `None` niter : int, optional Max number of iteration for the gradient descent process of estimation BSpline parameters. Only used if `mesh_size is not None`. By default `100`. bin_thres : int, optional Intensity threshold for binarizing the frames. The binarized frame will be used as masks for non-rigid motion estimation, where only pixels in the mask will be used to evaluate the gradient during optimization. Significantly improve performance but sacrifice accuracy of estimation for dim regions. Only used if `mesh_size is not None`. By default `None`. Returns ------- motion : xr.DataArray Estimated motion for each frame. Has two dimensions `dim` and `"shift_dim"` representing rigid shifts in each direction if `mesh_size is None`, otherwise has four dimensions: `dim`, `"grid0"`, `"grid1"` and `"shift_dim"` representing shifts for each mesh grid control point. See Also -------- :doc:`simpleitk:registrationOverview` : for overview of the non-rigid estimation method """ varr = varr.transpose(..., dim, "height", "width") loop_dims = list(set(varr.dims) - set(["height", "width", dim])) if npart is None: # by default use a npart that result in two layers of recursion npart = max(3, int(np.ceil((varr.sizes[dim] / chunk_nfm)**(1 / 2)))) if loop_dims: loop_labs = [varr.coords[d].values for d in loop_dims] res_dict = dict() for lab in itt.product(*loop_labs): va = varr.sel( {loop_dims[i]: lab[i] for i in range(len(loop_dims))}) vmax, sh = est_motion_part(va.data, npart, chunk_nfm, **kwargs) if kwargs.get("mesh_size", None): sh = xr.DataArray( sh, dims=[dim, "shift_dim", "grid0", "grid1"], coords={ dim: va.coords[dim].values, "shift_dim": ["height", "width"], }, ) else: sh = xr.DataArray( sh, dims=[dim, "shift_dim"], coords={ dim: va.coords[dim].values, "shift_dim": ["height", "width"], }, ) res_dict[lab] = sh.assign_coords( **{k: v for k, v in zip(loop_dims, lab)}) sh = xrconcat_recursive(res_dict, loop_dims) else: vmax, sh = est_motion_part(varr.data, npart, chunk_nfm, **kwargs) if kwargs.get("mesh_size", None): sh = xr.DataArray( sh, dims=[dim, "shift_dim", "grid0", "grid1"], coords={ dim: varr.coords[dim].values, "shift_dim": ["height", "width"], }, ) else: sh = xr.DataArray( sh, dims=[dim, "shift_dim"], coords={ dim: varr.coords[dim].values, "shift_dim": ["height", "width"], }, ) return sh