def test_apply_gufunc_check_coredim_chunksize(): def foo(x): return np.sum(x, axis=-1) a = da.random.normal(size=(8,), chunks=3) with pytest.raises(ValueError) as excinfo: da.apply_gufunc(foo, "(i)->()", a, output_dtypes=float, allow_rechunk=False) assert "consists of multiple chunks" in str(excinfo.value)
def test_apply_gufunc_check_inhomogeneous_chunksize(): def foo(x, y): return x + y a = da.random.normal(size=(8,), chunks=((2, 2, 2, 2),)) b = da.random.normal(size=(8,), chunks=((2, 3, 3),)) with pytest.raises(ValueError) as excinfo: da.apply_gufunc(foo, "(),()->()", a, b, output_dtypes=float, allow_rechunk=False) assert "with different chunksize present" in str(excinfo.value)
def apply_filter(self, data, time_index, min_window=None): """Apply the filter to an array of data. Args: data (dask.array.Array): An array of (time x particle) of advected particle data. This can be a dask array of lazily-loaded temporary data. time_index (int): The index along the time dimension corresponding to the central point, to extract after filtering. min_window (Optional[int]): A minimum window size for considering particles valid for filtering. Returns: dask.array.Array: An array of (particle) of the filtered particle data, restricted to the specified time index. """ def filter_select(x): if min_window is not None: Filter.pad_window(x, time_index, min_window) return signal.sosfiltfilt(self._filter, x)[..., time_index] # apply scipy filter as a ufunc # mapping an array to scalar over the first axis, automatically vectorize execution # and allow rechunking (since we have a chunk boundary across the first axis) filtered = da.apply_gufunc( filter_select, "(i)->()", data.rechunk((-1, "auto")), axis=0, output_dtypes=data.dtype, ) return filtered.compute()
def _invert_from_model_any(inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind): # wrapper to allow computation on any type (xarray, numpy) try: # if input is xarray, will return xarray da_ws_co = xr.zeros_like(sigma0_co_db, dtype=np.complex128) da_ws_co.name = 'windspeed_gmf' da_ws_co.attrs.clear() da_ws_cr = xr.zeros_like(sigma0_co_db, dtype=np.float64) da_ws_cr.name = 'windspeed_gmf' da_ws_cr.attrs.clear() try: # if dask array, use map_blocks # raise ImportError import dask.array as da if any( [ isinstance(v.data, da.Array) for v in [inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind] ] ): da_ws_co.data, da_ws_cr.data = da.apply_gufunc( _invert_from_model_numpy, '(n),(n),(n),(n),(n)->(n),(n)', inc.data, sigma0_co_db.data, sigma0_cr_db.data, dsig_cr.data, ancillary_wind.data ) logger.debug('invert with map_blocks') else: raise TypeError except (ImportError, TypeError): # use numpy array, but store in xarray da_ws_co.data, da_ws_cr.data = _invert_from_model_numpy( np.asarray(inc), np.asarray(sigma0_co_db), np.asarray(sigma0_cr_db), np.asarray(dsig_cr), np.asarray(ancillary_wind), ) logger.debug('invert with xarray.values. no chunks') except TypeError: # full numpy logger.debug('invert with numpy') da_ws_co, da_ws_cr = _invert_from_model_numpy( inc, sigma0_co_db, sigma0_cr_db, dsig_cr, ancillary_wind ) return da_ws_co, da_ws_cr
def filter_step(self, advection_data): """Perform filtering of a single step of advection data. The Lagrangian-transformed data from :func:`~advection_step` is high-pass filtered in time, leaving only the signal at the origin point (i.e. the filtered forward and backward advection data is discarded). Args: advection_data (Dict[str, (int, dask.array)]): A dictionary of particle advection data from a single timestep, returned from :func:`~advection_step`. Returns: Dict[str, dask.array]: A dictionary mapping sampled variable names to a 1D dask array containing the filtered data at the specified time. This data is not lazy, as it has already been computed out of the temporary advection data. """ da_out = {} for v, a in advection_data.items(): time_index_data, var_array = a def filter_select(x): return signal.filtfilt(*self.inertial_filter, x)[..., time_index_data] # apply scipy filter as a ufunc # mapping an array to scalar over the first axis, automatically vectorize execution # and allow rechunking (since we have a chunk boundary across the first axis) filtered = da.apply_gufunc( filter_select, "(i)->()", var_array, axis=0, output_dtypes=var_array.dtype, allow_rechunk=True, ) da_out[v] = filtered.compute() return da_out
def apply_filter(self, data, time_index, min_window=None): """Apply the filter to an array of data.""" def filter_select(filt, x): if min_window is not None: Filter.pad_window(x, time_index, min_window) return sosfilt.sosfiltfilt(filt, x)[..., time_index] data = data.rechunk((-1, "100 MiB")) filtered = da.apply_gufunc( filter_select, "(s,n),(i)->()", da.from_array(self._filter, (data.chunksize[1], None, None)), data, axes=[(1, 2), (0, ), ()], output_dtypes=data.dtype, ) return filtered.compute()
def _calc_srh_gufunc(*args, **kwargs): if (kwargs['vertical_lev'] == 'sigma'): signature = "(i),(i),(i),(i),(i),(),(),(),(),()->(),()" output_dtypes = ('f4', 'f4') elif (kwargs['vertical_lev'] == 'pressure'): signature = "(i),(i),(i),(i),(i),(),(),(),(),(),()->(),()" output_dtypes = ('f4', 'f4') if kwargs['output_var'] == 'all': signature += ",(),(),(),(),(),()" #",(2),(2),(2)" output_dtypes = output_dtypes + ('f4', 'f4', 'f4', 'f4', 'f4', 'f4' ) #('f4','f4','f4') return da.apply_gufunc(_calc_srh_numpy, signature, *args, output_dtypes=output_dtypes, axis=-1, vectorize=False, **kwargs)
def _calc_cape_gufunc(*args, **kwargs): if (kwargs['vertical_lev'] == 'sigma'): signature = "(i),(i),(i),(),(),()->(),()" output_dtypes = ('f4', 'f4') elif (kwargs['vertical_lev'] == 'pressure'): signature = "(i),(i),(i),(),(),(),()->(),()" output_dtypes = ('f4', 'f4') if kwargs['source'] == 'most-unstable': signature += ",(),()" output_dtypes = output_dtypes + ('i4', 'f4') return da.apply_gufunc(_calc_cape_numpy, signature, *args, output_dtypes=output_dtypes, axis=-1, vectorize=False, **kwargs)
def _calc_cape_gufunc(*args, **kwargs): ''' Wrapped function for cape calculation for dask arrays to leverage parallelized calculation over the grid. ''' if (kwargs['vertical_lev'] == 'sigma'): signature = "(i),(i),(i),(),(),()->(),()" output_dtypes = ('f4', 'f4') elif (kwargs['vertical_lev'] == 'pressure'): signature = "(i),(i),(i),(),(),(),()->(),()" output_dtypes = ('f4', 'f4') if kwargs['source'] == 'most-unstable': signature += ",(),()" output_dtypes = output_dtypes + ('i4', 'f4') return da.apply_gufunc(_calc_cape_numpy, signature, *args, output_dtypes=output_dtypes, axis=-1, vectorize=False, **kwargs)
def apply_filter(self, data, time_index, min_window=None): """Apply the filter to an array of data.""" def filter_select(filt, x): if min_window is not None: Filter.pad_window(x, time_index, min_window) return sosfilt.sosfiltfilt(filt, x)[..., time_index] # we have to make sure the chunking of filter matches that of data data = data.rechunk((-1, "auto")) filt = da.from_array(self._filter, chunks=(data.chunksize[1], None, None)) filtered = da.apply_gufunc( filter_select, "(s,n),(i)->()", filt, data, axes=[(1, 2), (0, ), ()], output_dtypes=data.dtype, ) return filtered.compute()
def _calc_srh_gufunc(*args, **kwargs): ''' Wrapped function for srh calculation for dask arrays to leverage parallelized calculation over the grid. ''' if (kwargs['vertical_lev'] == 'sigma'): signature = "(i),(i),(i),(i),(i),(),(),(),(),()->(),()" output_dtypes = ('f4', 'f4') elif (kwargs['vertical_lev'] == 'pressure'): signature = "(i),(i),(i),(i),(i),(),(),(),(),(),()->(),()" output_dtypes = ('f4', 'f4') if kwargs['output_var'] == 'all': signature += ",(),(),(),(),(),()" #",(2),(2),(2)" output_dtypes = output_dtypes + ('f4', 'f4', 'f4', 'f4', 'f4', 'f4' ) #('f4','f4','f4') return da.apply_gufunc(_calc_srh_numpy, signature, *args, output_dtypes=output_dtypes, axis=-1, vectorize=False, **kwargs)
def CartesianToEquatorial(pos, observer=[0,0,0], frame='icrs'): """ Convert Cartesian position coordinates to equatorial right ascension and declination, using the specified observer location. .. note:: RA and DEC will be returned in degrees, with RA in the range [0,360] and DEC in the range [-90, 90]. Parameters ---------- pos : array_like a N x 3 array holding the Cartesian position coordinates observer : array_like a length 3 array holding the observer location frame : string A string, 'icrs' or 'galactic'. The frame of the input position. Use 'icrs' if the cartesian position is already in Equatorial. Returns ------- ra, dec : array_like the right ascension and declination coordinates, in degrees. RA will be in the range [0,360] and DEC in the range [-90, 90] """ # split x, y, z to signify that we do not need to have pos # as a full chunk in the last dimension. # this is useful when we use apply_gufunc. x, y, z = [pos[..., i] - observer[i] for i in range(3)] if frame == 'icrs': # FIXME: Convert these to a gufunc that uses astropy? # might be a step backward. # from equatorial to equatorial s = da.hypot(x, y) lon = da.arctan2(y, x) lat = da.arctan2(z, s) # convert to degrees lon = da.rad2deg(lon) lat = da.rad2deg(lat) # wrap lon to [0,360] lon = da.mod(lon-360., 360.) ra, dec = lon, lat else: from astropy.coordinates import SkyCoord def cart_to_eq(x, y, z): try: sc = SkyCoord(x, y, z, representation_type='cartesian', frame=frame) scg = sc.transform_to(frame='icrs') scg.representation_type = 'unitspherical' except: sc = SkyCoord(x, y, z, representation='cartesian', frame=frame) scg = sc.transform_to(frame='icrs') scg.representation = 'unitspherical' ra, dec = scg.ra.value, scg.dec.value return ra, dec dtype = pos.dtype ra, dec = da.apply_gufunc(cart_to_eq, '(),(),()->(),()', x, y, z, output_dtypes=[dtype, dtype]) return da.stack((ra, dec), axis=0)
def make_kappa_maps(cat, nside, zs_list, ds_list, localsize, nbar): """ Make kappa maps at a list of ds Return kappa, Nm in shape of (n_ds, localsize), kappabar in shape of (n_ds,) The maps are distributed in memory, and localsize is the size of map on this rank. """ dl = (abs(cat['Position'] **2).sum(axis=-1)) ** 0.5 chunks = dl.chunks ra = cat['RA'] dec = cat['DEC'] zl = (1 / cat['Aemit'] - 1) ipix = da.apply_gufunc(lambda ra, dec, nside: healpix.ang2pix(nside, numpy.radians(90-dec), numpy.radians(ra)), '(),()->()', ra, dec, nside=nside) npix = healpix.nside2npix(nside) ipix = ipix.compute() dl = dl.persist() cat.comm.barrier() if cat.comm.rank == 0: cat.logger.info("ipix and dl are persisted") area = (4 * numpy.pi / npix) * dl**2 Om = cat.attrs['OmegaM'][0] kappa_list = [] kappabar_list = [] Nm_list = [] for zs, ds in zip(zs_list, ds_list): LensKernel = da.apply_gufunc(lambda dl, zl, Om, ds: wlen(Om, dl, zl, ds), "(), ()-> ()", dl, zl, Om=Om, ds=ds) weights = (LensKernel / (area * nbar)) weights = weights.compute() cat.comm.barrier() if cat.comm.rank == 0: cat.logger.info("source plane %g weights are persisted" % zs) Wmap, Nmap = weighted_map(ipix, npix, weights, localsize, cat.comm) cat.comm.barrier() if cat.comm.rank == 0: cat.logger.info("source plane %g maps generated" % zs) # compute kappa bar # this is a simple integral, but we do not know dl, dz relation # so do it with values from a subsample of particles every = (cat.csize // 100000) kappa1 = Wmap if every == 0: every = 1 # use GatherArray, because it is faster than comm.gather at this scale # (> 4000 ranks on CrayMPI) ssdl = GatherArray(dl[::every].compute(), cat.comm) ssLensKernel = GatherArray(LensKernel[::every].compute(), cat.comm) if cat.comm.rank == 0: arg = ssdl.argsort() ssdl = ssdl[arg] ssLensKernel = ssLensKernel[arg] kappa1bar = numpy.trapz(ssLensKernel, ssdl) else: kappa1bar = None kappa1bar = cat.comm.bcast(kappa1bar) cat.comm.barrier() if cat.comm.rank == 0: cat.logger.info("source plane %g bar computed " % zs) kappa_list.append(kappa1) kappabar_list.append(kappa1bar) Nm_list.append(Nmap) """ # estimate nbar dlmin = dl.min() dlmax = dl.max() volume = (Nmap > 0).sum() / len(Nmap) * 4 / 3 * numpy.pi * (dlmax**3 - dlmin ** 3) """ # returns number rather than delta, since we do not know fsky here. #Nmap = Nmap / cat.csize * cat.comm.allreduce((Nmap > 0).sum()) # to overdensity. return numpy.array(kappa_list), numpy.array(kappabar_list), numpy.array(Nm_list)
def mock(ns): if ns.idataset is None: ns.idataset = ns.odataset cat = BigFileCatalog(ns.input, dataset=ns.idataset) if ns.simcov == 'NGP': fsky = 0.5 elif ns.simcov == 'FULL': fsky = 1.0 else: raise cat['ZREAL'] = (1 / cat['Aemit'] - 1) def compute_va(vel, pos): u = pos / (pos**2).sum(axis=-1)[:, None]**0.5 return numpy.einsum('ij,ij->i', vel, u) VZ = da.apply_gufunc(compute_va, '(3),(3)->()', cat['Velocity'], cat['Position']) C = 299792458. / 1000 cat['Z'] = (1 + cat['ZREAL']) * (1 + VZ / C) - 1 zmin, zmax = da.compute(cat['Z'].min(), cat['Z'].max()) zmax = max(cat.comm.allgather(zmax)) zmin = min(cat.comm.allgather(zmin)) dNdZ = read_Nz(ns.nz, ns.ncol, zmin, zmax) zedges = numpy.linspace(zmin, zmax, 128) zcenters = 0.5 * (zedges[:-1] + zedges[1:]) dNdZ1 = fit_dNdZ(cat, zedges, fsky) Z = cat['Z'].compute() ntarget = dNdZ(Z) / dNdZ1(Z) ntarget[numpy.isnan(ntarget)] = 0 #ntarget = ntarget.clip(0, 10) rng = numpy.random.RandomState((SEED * 20 + 11) * cat.comm.size + cat.comm.rank) if all(cat.comm.allgather((ntarget < 1.0).all())): ntarget = rng.binomial(1, ntarget) else: ntarget = rng.poisson(ntarget) if cat.comm.rank == 0: cat.logger.info( "Up-sampling with poisson because number density is too low") pos = cat['Position'].compute().repeat(ntarget, axis=0) redshift = cat['Z'].compute().repeat(ntarget, axis=0) aemit = cat['Aemit'].compute().repeat(ntarget, axis=0) ra, dec = transform.CartesianToEquatorial(pos, frame='galactic') if ns.simcov == 'NGP': if cat.comm.rank == 0: cat.logger.info( "Patching the half sky simulation into full sky by flipping z axis" ) ra2, dec2 = transform.CartesianToEquatorial(pos * [1, 1, -1], frame='galactic') cat1 = ArrayCatalog( { 'RA': numpy.concatenate([ra, ra2], axis=0), 'DEC': numpy.concatenate([dec, dec2], axis=0), 'Aemit': numpy.concatenate([aemit, aemit], axis=0), 'Z': numpy.concatenate([redshift, redshift], axis=0), }, comm=cat.comm) elif ns.simcov == 'FULL': cat1 = ArrayCatalog( { 'RA': ra, 'DEC': dec, 'Aemit': aemit, 'Z': redshift, }, comm=cat.comm) cat1.save(ns.output, dataset=ns.odataset)
def SkyToUnitSphere(ra, dec, degrees=True, frame='icrs'): """ Convert sky coordinates (``ra``, ``dec``) to Cartesian coordinates on the unit sphere. Parameters ---------- ra : :class:`dask.array.Array`; shape: (N,) the right ascension angular coordinate dec : :class:`dask.array.Array`; ; shape: (N,) the declination angular coordinate degrees : bool, optional specifies whether ``ra`` and ``dec`` are in degrees or radians frame : string ('icrs' or 'galactic') speciefies which frame the Cartesian coordinates is. Useful if you know the simulation (usually cartesian) is in galactic units but you want to convert to the icrs (ra, dec) usually used in surveys. Returns ------- pos : :class:`dask.array.Array`; shape: (N,3) the cartesian position coordinates, where columns represent ``x``, ``y``, and ``z`` Raises ------ TypeError If the input columns are not dask arrays """ ra, dec = da.broadcast_arrays(ra, dec) if frame == 'icrs': # no frame transformation # put into radians from degrees if degrees: ra = da.deg2rad(ra) dec = da.deg2rad(dec) # cartesian coordinates x = da.cos( dec ) * da.cos( ra ) y = da.cos( dec ) * da.sin( ra ) z = da.sin( dec ) return da.vstack([x,y,z]).T else: from astropy.coordinates import SkyCoord if degrees: ra = da.deg2rad(ra) dec = da.deg2rad(dec) def eq_to_cart(ra, dec): try: sc = SkyCoord(ra, dec, unit='rad', representation_type='unitspherical', frame='icrs') except: sc = SkyCoord(ra, dec, unit='rad', representation='unitspherical', frame='icrs') scg = sc.transform_to(frame=frame) scg = scg.cartesian x, y, z = scg.x.value, scg.y.value, scg.z.value return numpy.stack([x, y, z], axis=1) arr = da.apply_gufunc(eq_to_cart, '(),()->(p)', ra, dec, output_dtypes=[ra.dtype], output_sizes={'p': 3}) return arr
def filter_step(self, time_index, time): """Perform forward-backward advection at a single timestep.""" # seed all particles at gridpoints ps = self.particleset(time) # execute the sample-only kernel to efficiently grab the initial condition ps.kernel = self.sample_kernel ps.execute(self.sample_kernel, runtime=0, dt=self.advection_dt) # set up the temporary output file for the initial condition and # forward advection outfile = LagrangeParticleFile(ps, self.output_dt, self.sample_variables) # now the forward advection kernel can run outfile.set_group("forward") ps.kernel = self.kernel ps.execute( self.kernel, runtime=self.window_size, dt=self.advection_dt, output_file=outfile, recovery={ parcels.ErrorCode.ErrorOutOfBounds: recovery_kernel_out_of_bounds }, ) # reseed particles back on the grid, then advect backwards # we don't need any initial condition sampling since we've already done it outfile.set_group("backward") ps = self.particleset(time) ps.kernel = self.kernel ps.execute( self.kernel, runtime=self.window_size, dt=-self.advection_dt, output_file=outfile, recovery={ parcels.ErrorCode.ErrorOutOfBounds: recovery_kernel_out_of_bounds }, ) # stitch together and filter all sample variables from the temporary # output data da_out = {} for v in self.sample_variables: # load data lazily as dask arrays, for forward and backward segments var_array_forward = da.from_array(outfile.data("forward")[v], chunks=(None, "auto")) var_array_backward = da.from_array(outfile.data("backward")[v], chunks=(None, "auto")) # get an index into the middle of the array time_index_data = var_array_backward.shape[0] # construct proper sequence by concatenating data and flipping the backward segment # for var_array_forward, skip the initial output for both the sample-only and # sample-advection kernels, which have meaningless data var_array = da.concatenate((da.flip(var_array_backward[1:, :], axis=0), var_array_forward)) def filter_select(x): return signal.filtfilt(*self.inertial_filter, x)[..., time_index_data] # apply scipy filter as a ufunc # mapping an array to scalar over the first axis, automatically vectorize execution # and allow rechunking (since we have a chunk boundary across the first axis) filtered = da.apply_gufunc( filter_select, "(i)->()", var_array, axis=0, output_dtypes=var_array.dtype, allow_rechunk=True, ) da_out[v] = filtered.compute() return da_out