def ndq_series(): nx, ny, nt = 2, 3, 5000 x = np.arange(0, nx) y = np.arange(0, ny) cx = xr.IndexVariable("x", x) cy = xr.IndexVariable("y", y) dates = pd.date_range("1900-01-01", periods=nt, freq=pd.DateOffset(days=1)) time = xr.IndexVariable("time", dates, attrs={ "units": "days since 1900-01-01", "calendar": "standard" }) return xr.DataArray( np.random.lognormal(10, 1, (nt, nx, ny)), dims=("time", "x", "y"), coords={ "time": time, "x": cx, "y": cy }, attrs={ "units": "m^3 s-1", "standard_name": "streamflow" }, )
def ndq_series(): nx, ny, nt = 2, 3, 5000 x = np.arange(0, nx) y = np.arange(0, ny) cx = xr.IndexVariable('x', x) cy = xr.IndexVariable('y', y) dates = pd.date_range('1900-01-01', periods=nt, freq=pd.DateOffset(days=1)) time = xr.IndexVariable('time', dates, attrs={ 'units': 'days since 1900-01-01', 'calendar': 'standard' }) return xr.DataArray(np.random.lognormal(10, 1, (nt, nx, ny)), dims=('time', 'x', 'y'), coords={ 'time': time, 'x': cx, 'y': cy }, attrs={ 'units': 'm^3 s-1', 'standard_name': 'streamflow' })
def _warp_spatial_coords(affine, width, height): """get spatial coords in new transform""" new_spatial_coords = affine_to_coords(affine, width, height) return { "x": xarray.IndexVariable("x", new_spatial_coords["x"]), "y": xarray.IndexVariable("y", new_spatial_coords["y"]), }
def _generate_spatial_coords(affine, width, height): """get spatial coords in new transform""" new_spatial_coords = affine_to_coords(affine, width, height) if affine.is_rectilinear: return { "x": xarray.IndexVariable("x", new_spatial_coords["x"]), "y": xarray.IndexVariable("y", new_spatial_coords["y"]), } return { "xc": (("y", "x"), new_spatial_coords["x"]), "yc": (("y", "x"), new_spatial_coords["y"]), }
def __init__(self, time=None, frequency=None, values=None): """""" data = xr.Variable(("frequency", "time"), values) if not isinstance(time, xr.IndexVariable): time = xr.IndexVariable("time", time) if not isinstance(frequency, xr.IndexVariable): frequency = xr.IndexVariable("frequency", frequency) if time.size != values.shape[1] or frequency.size != values.shape[0]: raise ValueError("Input arrays have incompatible lengths.") with np.errstate(divide="ignore", invalid="ignore"): coords = dict(time=time, frequency=frequency, period=1.0 / frequency) super().__init__(data, coords, fastpath=True)
def setup(self): self.nx, self.ny = 2, 3 x = np.arange(0, self.nx) y = np.arange(0, self.ny) cx = xr.IndexVariable("x", x) cy = xr.IndexVariable("y", y) time = xr.IndexVariable("time", np.arange(50)) self.da = xr.DataArray( np.random.lognormal(10, 1, (len(time), self.nx, self.ny)), dims=("time", "x", "y"), coords={"time": time, "x": cx, "y": cy}, )
def observed_data_to_xarray(self): """Convert observed data to xarray.""" if self.dims is None: dims = {} else: dims = self.dims observed_data = {} for idx, arg_name in enumerate(self.arg_names): # Use emcee3 syntax, else use emcee2 arg_array = np.atleast_1d( self.sampler.log_prob_fn.args[idx] if hasattr( self.sampler, "log_prob_fn") else self.sampler.args[idx]) arg_dims = dims.get(arg_name) arg_dims, coords = generate_dims_coords(arg_array.shape, arg_name, dims=arg_dims, coords=self.coords) # filter coords based on the dims coords = { key: xr.IndexVariable((key, ), data=coords[key]) for key in arg_dims } observed_data[arg_name] = xr.DataArray(arg_array, dims=arg_dims, coords=coords) return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.emcee))
def _initialize_tables(self, nstars, napt): iapt = self._xad_apt self._xad_star = istar = xa.IndexVariable(dims='star', data=range(self.nstars)) self._flux = xa.DataArray(zeros([nstars, napt]), name='flux', dims=['star', 'aperture'], coords={ 'star': istar, 'aperture': iapt }) self._entropy = xa.DataArray(zeros([nstars, napt]), name='aperture_entropy', dims=['star', 'aperture'], coords={ 'star': istar, 'aperture': iapt }) self._cshift = xa.DataArray(zeros([nstars, 2]), name='centroid', dims=['star', 'axis'], coords={ 'star': istar, 'axis': ['x', 'y'] }) self._sky_median = xa.DataArray(zeros(nstars), name='sky_median', dims='star', coords={'star': istar}) self._sky_entropy = xa.DataArray(zeros(nstars), name='sky_entropy', dims='star', coords={'star': istar})
def numpy_to_data_array(ary, *, var_name="data", coords=None, dims=None): """Convert a numpy array to an xarray.DataArray. The first two dimensions will be (chain, draw), and any remaining dimensions will be "shape". If the numpy array is 1d, this dimension is interpreted as draw If the numpy array is 2d, it is interpreted as (chain, draw) If the numpy array is 3 or more dimensions, the last dimensions are kept as shapes. Parameters ---------- ary : np.ndarray A numpy array. If it has 2 or more dimensions, the first dimension should be independent chains from a simulation. Use `np.expand_dims(ary, 0)` to add a single dimension to the front if there is only 1 chain. var_name : str If there are no dims passed, this string is used to name dimensions coords : dict[str, iterable] A dictionary containing the values that are used as index. The key is the name of the dimension, the values are the index values. dims : List(str) A list of coordinate names for the variable Returns ------- xr.DataArray Will have the same data as passed, but with coordinates and dimensions """ # manage and transform copies default_dims = ["chain", "draw"] ary = utils.two_de(ary) n_chains, n_samples, *shape = ary.shape if n_chains > n_samples: warnings.warn( "More chains ({n_chains}) than draws ({n_samples}). " "Passed array should have shape (chains, draws, *shape)".format( n_chains=n_chains, n_samples=n_samples), UserWarning, ) dims, coords = generate_dims_coords(shape, var_name, dims=dims, coords=coords, default_dims=default_dims) # reversed order for default dims: 'chain', 'draw' if "draw" not in dims: dims = ["draw"] + dims if "chain" not in dims: dims = ["chain"] + dims if "chain" not in coords: coords["chain"] = utils.arange(n_chains) if "draw" not in coords: coords["draw"] = utils.arange(n_samples) # filter coords based on the dims coords = {key: xr.IndexVariable((key, ), data=coords[key]) for key in dims} return xr.DataArray(ary, coords=coords, dims=dims)
def observed_data_to_xarray(self): """Convert observed data to xarray.""" if self.observed is None: return None observed_data = {} if isinstance(self.observed, self.tf.Tensor): with self.tf.Session() as sess: vals = sess.run(self.observed, feed_dict=self.feed_dict) else: vals = self.observed if self.dims is None: dims = {} else: dims = self.dims name = "obs" val_dims = dims.get(name) vals = np.atleast_1d(vals) val_dims, coords = generate_dims_coords(vals.shape, name, dims=val_dims, coords=self.coords) coords = { key: xr.IndexVariable((key, ), data=coords[key]) for key in val_dims } observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.tfp))
def observed_data_to_xarray(self): """Convert observed data to xarray.""" if self.predictions: return None if self.dims is None: dims = {} else: dims = self.dims observed_data = {} for name, vals in self.observations.items(): if hasattr(vals, "get_value"): vals = vals.get_value() vals = utils.one_de(vals) val_dims = dims.get(name) val_dims, coords = generate_dims_coords(vals.shape, name, dims=val_dims, coords=self.coords) # filter coords based on the dims coords = { key: xr.IndexVariable((key, ), data=coords[key]) for key in val_dims } observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.pymc3))
def series(values, name, start="2000-01-01"): coords = collections.OrderedDict() for dim, n in zip(("time", "lon", "lat"), values.shape): if dim == "time": coords[dim] = pd.date_range(start, periods=n, freq=pd.DateOffset(days=1)) else: coords[dim] = xr.IndexVariable(dim, np.arange(n)) if name == "tas": attrs = { "standard_name": "air_temperature", "cell_methods": "time: mean within days", "units": "K", "kind": "+", } elif name == "pr": attrs = { "standard_name": "precipitation_flux", "cell_methods": "time: sum over day", "units": "kg m-2 s-1", "kind": "*", } return xr.DataArray( values, coords=coords, dims=list(coords.keys()), name=name, attrs=attrs, )
def add_tile_coords(tile: str, dataset: xr_data_type) -> xr_data_type: """Restore physical coordinates to dataset.""" scale = 1111950.5196669996 regex = re.compile('h\d+v\d+') matches = regex.findall(tile) extract = re.compile('\d+') h, v = extract.findall(matches[0]) h = int(h) v = int(v) x_start = scale * (h - 18) x_end = scale * (h - 17) y_start = -scale * (v - 9) y_end = -scale * (v - 8) dataset['x'] = xr.IndexVariable('x', np.linspace(x_start, x_end, 2400)) dataset['y'] = xr.IndexVariable('y', np.linspace(y_start, y_end, 2400)) return dataset
def _make_coords(src_data_array, dst_affine, dst_width, dst_height, dst_crs): """Generate the coordinates of the new projected `xarray.DataArray`""" # step 1: collect old nonspatial coordinates coords = {} for coord in set(src_data_array.coords) - { src_data_array.rio.x_dim, src_data_array.rio.y_dim, "spatial_ref", }: if src_data_array[coord].dims: coords[coord] = xarray.IndexVariable( src_data_array[coord].dims, src_data_array[coord].values, src_data_array[coord].attrs, ) else: coords[coord] = xarray.Variable( src_data_array[coord].dims, src_data_array[coord].values, src_data_array[coord].attrs, ) new_coords = _warp_spatial_coords(src_data_array, dst_affine, dst_width, dst_height) new_coords.update(coords) return add_xy_grid_meta(new_coords)
def observed_data_to_xarray(self): """Convert observed data to xarray.""" # This next line is brittle and may not work forever, but is a secret # way to access the model from the trace. model = self.trace._straces[0].model # pylint: disable=protected-access observations = { obs.name: obs.observations for obs in model.observed_RVs } if self.dims is None: dims = {} else: dims = self.dims observed_data = {} for name, vals in observations.items(): if hasattr(vals, "get_value"): vals = vals.get_value() vals = np.atleast_1d(vals) val_dims = dims.get(name) val_dims, coords = generate_dims_coords(vals.shape, name, dims=val_dims, coords=self.coords) # filter coords based on the dims coords = { key: xr.IndexVariable((key, ), data=coords[key]) for key in val_dims } observed_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) return xr.Dataset(data_vars=observed_data, attrs=make_attrs(library=self.pymc3))
def eofsAsCovariance(self, neofs=None, pcscaling=1): """ Empirical orthogonal functions (EOFs) expressed as the covariance between the principal component time series (PCs) and the time series of the `Eof` input *dataset* at each grid point. **Optional arguments:** *neofs* Number of EOFs to return. Defaults to all EOFs. If the number of EOFs requested is more than the number that are available, then all available EOFs will be returned. *pcscaling* Set the scaling of the PCs used to compute covariance. The following values are accepted: * *0* : Un-scaled PCs. * *1* : PCs are scaled to unit variance (divided by the square-root of their eigenvalue) (default). * *2* : PCs are multiplied by the square-root of their eigenvalue. The default is to divide PCs by the square-root of their eigenvalue so that the PCs are scaled to unit variance (option 1). **Returns:** *eofs* A `~xarray.DataArray` containing the ordered EOFs. The EOFs are numbered from 0 to *neofs* - 1. **Examples:** All EOFs:: eofs = solver.eofsAsCovariance() The leading EOF:: eof1 = solver.eofsAsCovariance(neofs=1) The leading EOF using un-scaled PCs:: eof1 = solver.eofsAsCovariance(neofs=1, pcscaling=0) """ eofs = self._solver.eofsAsCovariance(neofs, pcscaling) eofdim = xr.IndexVariable('mode', range(eofs.shape[0]), attrs={'long_name': 'eof_mode_number'}) coords = [eofdim] + self._coords long_name = 'covariance_between_pcs_and_{!s}'.format(self._name) eofs = xr.DataArray(eofs, coords=coords, name='eofs', attrs={'long_name': long_name}) eofs.coords.update({coord.name: (coord.dims, coord) for coord in self._space_ndcoords}) return eofs
def setup(self): self.nx, self.ny = 2, 3 x = np.arange(0, self.nx) y = np.arange(0, self.ny) cx = xr.IndexVariable('x', x) cy = xr.IndexVariable('y', y) time = xr.IndexVariable('time', np.arange(50)) self.da = xr.DataArray(np.random.lognormal( 10, 1, (len(time), self.nx, self.ny)), dims=('time', 'x', 'y'), coords={ 'time': time, 'x': cx, 'y': cy })
def plot_gm_structure_function(lat=35, N=2.4e-3, N0=5.2e-3, b=1.3e3): ax = plt.gca() r = xr.IndexVariable('r', np.logspace(2, 6, 401)) # Latitude to Coriolis frequency omega_earth = 7.2921e-5 f = 2 * omega_earth * np.sin(np.pi / 180. * lat) D2_GM = gm.duu_r(r, f=abs(f), N=N, N0=N0, b=abs(b)) ax.loglog(1e-3 * r, D2_GM, lw=2, color='black', label='GM81')
def constant_data_to_xarray(self): """Convert constant data to xarray.""" # For constant data, we are concerned only with deterministics and data. # The constant data vars must be either pm.Data (TensorSharedVariable) or pm.Deterministic constant_data_vars = {} # type: Dict[str, Var] for var in self.model.deterministics: if hasattr(self.aesara, "gof"): ancestors_func = self.aesara.gof.graph.ancestors # pylint: disable=no-member else: ancestors_func = self.aesara.graph.basic.ancestors # pylint: disable=no-member ancestors = ancestors_func(var.owner.inputs) # no dependency on a random variable if not any((isinstance(a, self.pymc3.model.PyMC3Variable) for a in ancestors)): constant_data_vars[var.name] = var def is_data(name, var) -> bool: assert self.model is not None return ( var not in self.model.deterministics and var not in self.model.observed_RVs and var not in self.model.free_RVs and var not in self.model.potentials and (self.observations is None or name not in self.observations) ) # I don't know how to find pm.Data, except that they are named variables that aren't # observed or free RVs, nor are they deterministics, and then we eliminate observations. for name, var in self.model.named_vars.items(): if is_data(name, var): constant_data_vars[name] = var if not constant_data_vars: return None if self.dims is None: dims = {} else: dims = self.dims constant_data = {} for name, vals in constant_data_vars.items(): if hasattr(vals, "get_value"): vals = vals.get_value() # this might be a Deterministic, and must be evaluated elif hasattr(self.model[name], "eval"): vals = self.model[name].eval() vals = np.atleast_1d(vals) val_dims = dims.get(name) val_dims, coords = generate_dims_coords( vals.shape, name, dims=val_dims, coords=self.coords ) # filter coords based on the dims coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims} try: constant_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) except ValueError as err: raise ValueError( "Error translating constant_data variable %s: %s" % (name, err) ) from err return xr.Dataset(data_vars=constant_data, attrs=make_attrs(library=self.pymc3))
def _wrap_xarray(reference, lats, lons): try: import xarray as xr except ImportError: try: import xray as xr except ImportError: raise ValueError("cannot use container 'xarray' without xarray") londim = xr.IndexVariable('longitude', lons, attrs={'standard_name': 'longitude', 'units': 'degrees_east'}) latdim = xr.IndexVariable('latitude', lats, attrs={'standard_name': 'latitude', 'units': 'degrees_north'}) for name in reference.keys(): reference[name] = xr.DataArray(reference[name], coords=[latdim, londim], attrs={'long_name': name})
def generate_omega(N, f, nb_points=401): omega = xr.IndexVariable('omega', np.logspace(np.log10(1.01 * f), np.log10(N), nb_points), attrs={ 'name': 'Frequency', 'units': 'rad.s-1' }) return omega
def generate_k(min_decade=-6, max_decade=-2, nb_points=401): k = xr.IndexVariable('k', 2 * np.pi * np.logspace(min_decade, max_decade, nb_points), attrs={ 'long_name': 'Wavelength', 'units': 'rad.m-1' }) return k
def pcs(self, pcscaling=0, npcs=None): pcs = self._solver.pcs(pcscaling, npcs) pcdim = xr.IndexVariable('mode', range(pcs.shape[1]), attrs={'long_name': 'eof_mode_number'}) coords = [self._time, pcdim] pcs = xr.DataArray(pcs, coords=coords, name='pcs') pcs.coords.update({coord.name: (coord.dims, coord.data) for coord in self._time_ndcoords}) return pcs
def northTest(self, neigs=None, vfscaled=False): """Typical errors for eigenvalues. The method of North et al. (1982) is used to compute the typical error for each eigenvalue. It is assumed that the number of times in the input data set is the same as the number of independent realizations. If this assumption is not valid then the result may be inappropriate. **Optional arguments:** *neigs* The number of eigenvalues to return typical errors for. Defaults to typical errors for all eigenvalues. *vfscaled* If *True* scale the errors by the sum of the eigenvalues. This yields typical errors with the same scale as the values returned by `Eof.varianceFraction`. If *False* then no scaling is done. Defaults to *False* (no scaling). **Returns:** *errors* A `~xarray.DataArray` containing the typical errors for each eigenvalue. The egienvalues are numbered from 0 to *neigs* - 1. **References** North G.R., T.L. Bell, R.F. Cahalan, and F.J. Moeng (1982) Sampling errors in the estimation of empirical orthogonal functions. *Mon. Weather. Rev.*, **110**, pp 669-706. **Examples:** Typical errors for all eigenvalues:: errors = solver.northTest() Typical errors for the first 3 eigenvalues scaled by the sum of the eigenvalues:: errors = solver.northTest(neigs=3, vfscaled=True) """ typerrs = self._solver.northTest(neigs=neigs, vfscaled=vfscaled) eofdim = xr.IndexVariable('mode', range(typerrs.shape[0]), attrs={'long_name': 'eof_mode_number'}) coords = [eofdim] long_name = 'typical_errors' typerrs = xr.DataArray(typerrs, coords=coords, name='typical_errors', attrs={'long_name': long_name}) return typerrs
def _per_doy(values, calendar="standard", units="kg m-2 s-1"): n = max_doy[calendar] if len(values) != n: raise ValueError( "Values must be same length as number of days in calendar." ) coords = xr.IndexVariable("dayofyear", np.arange(1, n + 1)) return xr.DataArray( values, coords=[coords], attrs={"calendar": calendar, "units": units} )
def _wrap_xarray(solution, neofs, time_units): try: import xarray as xr except ImportError: try: import xray as xr except ImportError: raise ValueError("cannot use container 'xarray' without " "the xarray/xray module") time_dim = xr.IndexVariable('time', solution['time']) lat_dim = xr.IndexVariable('latitude', solution['latitude']) lon_dim = xr.IndexVariable('longitude', solution['longitude']) eof_dim = xr.IndexVariable('eof', np.arange(1, neofs+1)) solution['sst'] = xr.DataArray(solution['sst'], coords=[time_dim, lat_dim, lon_dim]) solution['eigenvalues'] = xr.DataArray(solution['eigenvalues'], coords=[eof_dim]) solution['eofs'] = xr.DataArray(solution['eofs'], coords=[eof_dim, lat_dim, lon_dim])
def average_ds_over_time(ds, date, freq, mark='end', time_res='S'): """ Average the dataset over constant periods of time Arguments --------- ds: xarray.Dataset Dataset to average date: datetime.datetime Start date freq: string or pandas.DateOffset Size of time chunks. E.g. 10T is 10 minutes mark: string, optional Time index mark. Can be one "start" or "end", e.g. the start or the end of time chunks. time_res: string, optional Can be seconds (S), minutes (M) Returns ------- ave_ds: xarray.Dataset Dataset of averaged data """ # create time index with the given frequency new_time = pd.date_range(start=date, end=date + timedelta(hours=23, minutes=59, seconds=59), freq=freq) if mark == 'end': tstep = new_time[1] - new_time[0] new_time += tstep # TODO: add "middle" option if time_res == 'S': # TODO: rewrite this ts = tstep.total_seconds() elif time_res == 'M': ts = tstep.total_seconds() / 60 # save attributes before averaging _attrs = {k: ds[k].attrs for k in ds.data_vars} # average over time chunks ave_ds = (ds.groupby( xr.IndexVariable(dims='time', data=np.arange(len(ds.time)) // ts)).mean()) # reset time index ave_ds['time'] = new_time # after groupby operation, the attributes are lost, so the saved are used for k in ds.data_vars: ave_ds[k].attrs.update(_attrs[k]) return ave_ds
def param(model): """Return a parameter coordinate. Parameters ---------- model : str Model name. """ model = get_model(model) return xr.IndexVariable('param', data=np.array(model.params._fields), attrs={'standard_name': 'parameter', 'long_name': '{} model parameter name'.format(model)})
def realization(n): """Return a realization coordinate. Parameters ---------- n : int Size of the ensemble. """ return xr.IndexVariable('realization', data=range(n), attrs={'standard_name': 'realization', 'axis': 'E', 'units': '1', 'long_name': 'Label identifying the ensemble member'})
def numpy_to_xarray(array, geobox, name=None): """Utility to convert ndarray to DataArray, using a datacube.model.GeoBox""" coords = [ xarray.IndexVariable(x, geobox.coords[x].values, attrs=dict(units=geobox.coords[x].units)) for x in geobox.dims ] return xarray.DataArray(array, coords=coords, attrs=dict(crs=geobox.crs), name=name)