def add_time_coords(dataset: xr.Dataset, time_range: Tuple[float, float]) -> xr.Dataset: t1, t2 = time_range if t1 != t2: t_center = (t1 + t2) / 2 else: t_center = t1 dataset = dataset.expand_dims('time') dataset = dataset.assign_coords(time=(['time'], [t_center])) time_var = dataset.coords['time'] time_var.attrs['long_name'] = 'time' time_var.attrs['standard_name'] = 'time' time_var.attrs['units'] = DATETIME_UNITS time_var.attrs['calendar'] = DATETIME_CALENDAR time_var.encoding['units'] = DATETIME_UNITS time_var.encoding['calendar'] = DATETIME_CALENDAR if t1 != t2: time_var.attrs['bounds'] = 'time_bnds' dataset = dataset.assign_coords(time_bnds=(['time', 'bnds'], [[t1, t2]])) time_bnds_var = dataset.coords['time_bnds'] time_bnds_var.attrs['long_name'] = 'time' time_bnds_var.attrs['standard_name'] = 'time' time_bnds_var.attrs['units'] = DATETIME_UNITS time_bnds_var.attrs['calendar'] = DATETIME_CALENDAR time_bnds_var.encoding['units'] = DATETIME_UNITS time_bnds_var.encoding['calendar'] = DATETIME_CALENDAR return dataset
def change_crs(dset: xr.Dataset, old_coords, old_crs, new_coords, new_crs): dset = dset.copy() # Load coordinates old_x = dset.variables[old_coords[0]].values old_y = dset.variables[old_coords[1]].values if len(old_x.shape) == 1 and len(old_y.shape) == 1: old_x, old_y = np.meshgrid(old_x, old_y) # Find old dimensions xdims = dset.variables[old_coords[0]].dims ydims = dset.variables[old_coords[1]].dims if len(xdims) == 2: dims = xdims else: dims = ydims + xdims # Transform coordinates old_gridmap, old_proj = _load_crs(dset, old_crs) new_gridmap, new_proj = _load_crs(dset, new_crs) new_x, new_y = crs_transform(old_x, old_y, old_proj, new_proj) # Remove old grid mapping and coordinates dset = dset.drop_vars(old_gridmap.name) dset = dset.drop_vars(old_coords) # Check if new coordinates are one-dimensional xdiff = np.max(np.abs(np.diff(new_x, axis=0))) ydiff = np.max(np.abs(np.diff(new_y, axis=1))) if xdiff < 1e-8 and ydiff < 1e-8: # If one-dimensional, store as one-dimensional variables and # change dimension names to match coordinates dset = dset.assign_coords({ new_coords[0]: xr.Variable(dims[1], new_x[0, :]), new_coords[1]: xr.Variable(dims[0], new_y[:, 0]), }) # type: xr.Dataset dset = dset.swap_dims(dict(zip(reversed(dims), new_coords))) else: # If two-dimensional, store as auxillary coordinates with the same # dimension names as the old coordinates dset = dset.assign_coords({ new_coords[0]: xr.Variable(dims, new_x), new_coords[1]: xr.Variable(dims, new_y), }) # type: xr.Dataset # Find data vars referring to old coordinates old_data_vars = [ k for k, v in dset.data_vars.items() if 'grid_mapping' in v.attrs ] # Add grid mapping to new dataset dset = set_crs(dset=dset, crs=new_gridmap, coords=new_coords, data_vars=old_data_vars) return dset
def _normalize_lon_360(dataset: xr.Dataset) -> xr.Dataset: """ Fix the longitude of the given dataset ``dataset`` so that it ranges from -180 to +180 degrees. :param dataset: The dataset whose longitudes may be given in the range 0 to 360. :return: The fixed dataset or the original dataset. """ if 'lon' not in dataset.coords: return dataset lon_var = dataset.coords['lon'] if len(lon_var.shape) != 1: return dataset lon_size = lon_var.shape[0] if lon_size < 2: return dataset lon_size_05 = lon_size // 2 lon_values = lon_var.values if not np.any(lon_values[lon_size_05:] > 180.): return dataset # roll_coords will be set to False by default in the future dataset = dataset.roll(lon=lon_size_05, roll_coords=True) dataset = dataset.assign_coords(lon=(((dataset.lon + 180) % 360) - 180)) return dataset
def __init__( self, ds: Dataset, covariates: list, timestamp: np.datetime64, type: str = "real", ) -> None: self.timestamp = timestamp self.data_name, self.var_name = _get_field_names(ds) if type == "real": self.ds = _preprocess_ds(ds, timestamp, covariates) self.ds_main = get_main_coords(self.ds).sel(time=timestamp) df = self.to_dataframe() df_main = self.to_dataframe(main=True) self.coords = df[["lat", "lon"]].values self.coords_main = df_main[["lat", "lon"]].values self.values = df[self.data_name].values self.values_main = df_main[self.data_name].values self.temporal_trend = self.ds.attrs["temporal_trend"] self.spatial_trend = df["spatial_trend"].values self.spatial_mean = self.ds.attrs["spatial_mean"] self.scale_fact = self.ds.attrs["scale_fact"] self.covariate_means = self.ds.attrs["covariate_means"] self.covariate_scales = self.ds.attrs["covariate_scales"] self.variance_estimate = df[self.var_name].values self.covariates = df[covariates] else: self.ds_main = ds.assign_coords(coords={"time": np.nan}) df_main = self.to_dataframe(main=True) self.coords = self.coords_main = df_main[["x", "y"]].values self.values = self.values_main = df_main[self.data_name].values self.size = len(self.values)
def xarray_2D_to_3D(self, xdataset: xr.Dataset): new_ds = xdataset.assign_coords( src_chan=xdataset.component.split(':')[0]) new_ds = new_ds.assign_coords( rec_chan=xdataset.component.split(':')[1]) new_ds = new_ds.drop_dims(['component']) return new_ds
def normalize_coord_vars(ds: xr.Dataset) -> xr.Dataset: """ Turn potential coordinate variables from data variables into coordinate variables. Any data variable is considered a coordinate variable * whose name is its only dimension name; * whose number of dimensions is two and where the first dimension name is also a variable namd and whose last dimension is named "bnds". :param ds: The dataset :return: The same dataset or a shallow copy with potential coordinate variables turned into coordinate variables. """ if 'bnds' not in ds.dims: return ds coord_var_names = set() for data_var_name in ds.data_vars: data_var = ds.data_vars[data_var_name] if is_coord_var(ds, data_var): coord_var_names.add(data_var_name) if not coord_var_names: return ds old_ds = ds ds = old_ds.drop(coord_var_names) ds = ds.assign_coords(**{bounds_var_name: old_ds[bounds_var_name] for bounds_var_name in coord_var_names}) return ds
def _map_forecast_horizon_to_months_ahead( stacked: xr.Dataset) -> xr.Dataset: assert "forecast_horizon" in [ c for c in stacked.coords ], ("Expect the" "`stacked` dataset object to have `forecast_horizon` as a coord") # map forecast horizons to months ahead map_ = { pd.Timedelta("28 days 00:00:00"): 1, pd.Timedelta("29 days 00:00:00"): 1, pd.Timedelta("30 days 00:00:00"): 1, pd.Timedelta("31 days 00:00:00"): 1, pd.Timedelta("59 days 00:00:00"): 2, pd.Timedelta("60 days 00:00:00"): 2, pd.Timedelta("61 days 00:00:00"): 2, pd.Timedelta("62 days 00:00:00"): 2, pd.Timedelta("89 days 00:00:00"): 3, pd.Timedelta("90 days 00:00:00"): 3, pd.Timedelta("91 days 00:00:00"): 3, pd.Timedelta("92 days 00:00:00"): 3, } fhs = [pd.Timedelta(fh) for fh in stacked.forecast_horizon.values] months = [map_[fh] for fh in fhs] stacked = stacked.assign_coords(months_ahead=("time", months)) return stacked
def add_traits(ds: Dataset, phenotypes_path: str) -> Dataset: ds_tr = load_dataset(phenotypes_path, consolidated=True) ds = ds.assign_coords(samples=lambda ds: ds.sample_id).merge( ds_tr.assign_coords(samples=lambda ds: ds.sample_id), join="left", compat="override", ) return ds.reset_index("samples").reset_coords(drop=True)
def _adjust_tile_range(ds: xr.Dataset) -> xr.Dataset: if "tile" in ds: tiles = ds.tile if tiles.isel(tile=-1) == 6: ds = ds.assign_coords({"tile": tiles - 1}) return ds
def _create_variable(xr_dataset: xr.Dataset, nc_dataset: netCDF4.Dataset, encoding: Dict[str, Dict[str, Dict[str, Any]]], name: Hashable, unlimited_dims: Optional[List[str]], variable: xr.DataArray) -> None: """Creation and writing of the NetCDF variable.""" unlimited_dims = unlimited_dims or list() variable.attrs.pop("_FillValue", None) # Encode datetime64 to float64 if np.issubdtype(variable.dtype, np.datetime64): # 946684800000000 number of microseconds between 2000-01-01 and # 1970-01-01 values = (variable.values.astype("datetime64[us]").astype("int64") - 946684800000000) * 1e-6 if variable.name in xr_dataset.coords: xr_dataset = xr_dataset.assign_coords( coords={variable.name: values}) attrs = variable.attrs variable = xr_dataset[variable.name] variable.attrs.update(attrs) else: variable.values = values assert ( variable.attrs["units"] == "seconds since 2000-01-01 00:00:00.0") dtype, kwargs = _create_variable_args(encoding, name, variable) group, name = _split_group_name(name) if group is not None: if group not in nc_dataset.groups: nc_dataset = nc_dataset.createGroup(group) if group in ["left", "right"]: nc_dataset.setncatts( _group_attributes( getattr(product_specification.Side, group.upper()).value)) else: nc_dataset = nc_dataset.groups[group] # If the dimensions doesn't exist then we have to create them. if not nc_dataset.dimensions: for dim_name, size in xr_dataset.dims.items(): dim_group, dim_name = _split_group_name(dim_name) if dim_group == group: nc_dataset.createDimension( dim_name, None if dim_name in unlimited_dims else size) ncvar = nc_dataset.createVariable( name, dtype, tuple(_split_group_name(item)[-1] for item in variable.dims), **kwargs) ncvar.setncatts(variable.attrs) values = variable.values if kwargs['fill_value'] is not None: if values.dtype.kind == "f" and np.any(np.isnan(values)): values[np.isnan(values)] = kwargs['fill_value'] values = np.ma.array(values, mask=values == kwargs['fill_value']) nc_dataset[name][:] = values
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset: """ Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees. :param ds: The dataset whose longitudes may be given in the range 0 to 360. :return: The fixed dataset or the original dataset. """ if 'lon' not in ds.coords: return ds lon_var = ds.coords['lon'] if len(lon_var.shape) != 1: return ds lon_size = lon_var.shape[0] if lon_size < 2: return ds lon_size_05 = lon_size // 2 lon_values = lon_var.values if not np.any(lon_values[lon_size_05:] > 180.): return ds delta_lon = lon_values[1] - lon_values[0] var_names = [var_name for var_name in ds.data_vars] ds = ds.assign_coords( lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon, +180. - 0.5 * delta_lon, lon_size), dims=ds['lon'].dims, attrs=dict(long_name='longitude', standard_name='longitude', units='degrees east'))) ds = adjust_spatial_attrs_impl(ds, True) new_vars = dict() for var_name in var_names: var = ds[var_name] if len(var.dims) >= 1 and var.dims[-1] == 'lon': values = np.copy(var.values) temp = np.copy(values[..., :lon_size_05]) values[..., :lon_size_05] = values[..., lon_size_05:] values[..., lon_size_05:] = temp # import matplotlib.pyplot as plt # im = values[(len(values.shape) - 2) * [0] + [slice(None), slice(None)]] # plt.imshow(im) new_vars[var_name] = xr.DataArray(values, dims=var.dims, attrs=var.attrs, encoding=var.encoding) return ds.assign(**new_vars)
def test_concat_coords(self): data = Dataset({"foo": ("x", np.random.randn(10))}) expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5)) objs = [data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1)] for coords in ["different", "all", ["c"]]: actual = concat(objs, dim="x", coords=coords) self.assertDatasetIdentical(expected, actual) for coords in ["minimal", []]: with self.assertRaisesRegexp(ValueError, "not equal across"): concat(objs, dim="x", coords=coords)
def apply_sample_qc_1(ds: Dataset, sample_qc_path: str) -> Dataset: ds_sqc = load_sample_qc(sample_qc_path) ds_sqc = sample_qc_1(ds_sqc) ds_sqc = ds_sqc[SAMPLE_QC_COLS] ds = ds.assign_coords(samples=lambda ds: ds.sample_id).merge( ds_sqc.assign_coords(samples=lambda ds: ds.sample_id).compute(), join="inner", compat="override", ) return ds.reset_index("samples").reset_coords(drop=True)
def friendly_obs(ds: xr.Dataset) -> xr.Dataset: station = ds.station.sum( dim='num_characters').to_series().rename_axis('station') ds = ds.drop_dims('num_characters') ds = ds.rename_dims({ 'default_time_coordinate_size': 'time', 'number_of_stations': 'station' }) ds = ds.assign_coords({'station': station}) ds = ds.set_index({'time': 'OM__phenomenonTimeInstant'}) return ds
def test_concat_coords(self): data = Dataset({'foo': ('x', np.random.randn(10))}) expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5)) objs = [data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1)] for coords in ['different', 'all', ['c']]: actual = concat(objs, dim='x', coords=coords) self.assertDatasetIdentical(expected, actual) for coords in ['minimal', []]: with self.assertRaisesRegexp(ValueError, 'not equal across'): concat(objs, dim='x', coords=coords)
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset: """ Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees. :param ds: The dataset whose longitudes may be given in the range 0 to 360. :return: The fixed dataset or the original dataset. """ if 'lon' not in ds.coords: return ds lon_var = ds.coords['lon'] if len(lon_var.shape) != 1: return ds lon_size = lon_var.shape[0] if lon_size < 2: return ds lon_size_05 = lon_size // 2 lon_values = lon_var.values if not np.any(lon_values[lon_size_05:] > 180.): return ds delta_lon = lon_values[1] - lon_values[0] var_names = [var_name for var_name in ds.data_vars] ds = ds.assign_coords(lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon, +180. - 0.5 * delta_lon, lon_size), dims=ds['lon'].dims, attrs=dict(long_name='longitude', standard_name='longitude', units='degrees east'))) ds = adjust_spatial_attrs_impl(ds, True) new_vars = dict() for var_name in var_names: var = ds[var_name] if len(var.dims) >= 1 and var.dims[-1] == 'lon': values = np.copy(var.values) temp = np.copy(values[..., : lon_size_05]) values[..., : lon_size_05] = values[..., lon_size_05:] values[..., lon_size_05:] = temp # import matplotlib.pyplot as plt # im = values[(len(values.shape) - 2) * [0] + [slice(None), slice(None)]] # plt.imshow(im) new_vars[var_name] = xr.DataArray(values, dims=var.dims, attrs=var.attrs, encoding=var.encoding) return ds.assign(**new_vars)
def _round_time_coord(ds: xr.Dataset, time_coord: str = TIME_DIM_NAME) -> xr.Dataset: if time_coord in ds.coords: new_times = round_time(ds[time_coord]) ds = ds.assign_coords({time_coord: new_times}) else: logger.debug( "Round time operation called on dataset missing a time coordinate." ) return ds
def to_360day_monthly(self, ds: xr.Dataset): """Change the calendar to datetime and precision to monthly.""" # https://github.com/pydata/xarray/issues/3320 time1 = ds.time.copy() for itime in range(ds.sizes['time']): bb = ds.time.values[itime].timetuple() time1.values[itime] = datetime(bb[0], bb[1], 16) logging.info( "[CMIP6_IO] Fixed time units start at {} and end at {}".format( time1.values[0], time1.values[-1])) ds = ds.assign_coords({'time': time1}) return ds
def _normalize_lat_lon_2d(ds: xr.Dataset) -> xr.Dataset: """ Detect 2D 'lat', 'lon' variables that span a equi-rectangular grid. Then: Drop original 'lat', 'lon' variables Rename original dimensions names of 'lat', 'lon' variables, usually ('y', 'x'), to ('lat', 'lon'). Insert new 1D 'lat', 'lon' coordinate variables with dimensions 'lat' and 'lon', respectively. :param ds: some xarray dataset :return: a normalized xarray dataset, or the original one """ if not ('lat' in ds and 'lon' in ds): return ds lat_var = ds['lat'] lon_var = ds['lon'] lat_dims = lat_var.dims lon_dims = lon_var.dims if lat_dims != lon_dims: return ds spatial_dims = lon_dims if len(spatial_dims) != 2: return ds x_dim_name = spatial_dims[-1] y_dim_name = spatial_dims[-2] lat_data_1 = lat_var[:, 0] lat_data_2 = lat_var[:, -1] lon_data_1 = lon_var[0, :] lon_data_2 = lon_var[-1, :] equal_lat = np.allclose(lat_data_1, lat_data_2, equal_nan=True) equal_lon = np.allclose(lon_data_1, lon_data_2, equal_nan=True) # Drop lat lon in any case. If note qual_lat and equal_lon subset_spatial_impl will # subsequently fail with a ValidationError ds = ds.drop_vars(['lon', 'lat']) if not (equal_lat and equal_lon): return ds ds = ds.rename({ x_dim_name: 'lon', y_dim_name: 'lat', }) ds = ds.assign_coords(lon=np.array(lon_data_1), lat=np.array(lat_data_1)) return ds
def test_concat_coords(self): data = Dataset({"foo": ("x", np.random.randn(10))}) expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5)) objs = [ data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1), ] for coords in ["different", "all", ["c"]]: actual = concat(objs, dim="x", coords=coords) assert_identical(expected, actual) for coords in ["minimal", []]: with raises_regex(merge.MergeError, "conflicting values"): concat(objs, dim="x", coords=coords)
def add_time_coords(dataset: xr.Dataset, time_range: Tuple[float, float]) -> xr.Dataset: t1, t2 = time_range if t1 != t2: t_center = (t1 + t2) / 2 else: t_center = t1 dataset = dataset.expand_dims('time') dataset = dataset.assign_coords( time=(['time'], from_time_in_days_since_1970([t_center]))) time_var = dataset.coords['time'] time_var.attrs['long_name'] = 'time' time_var.attrs['standard_name'] = 'time' # Avoiding xarray error: # ValueError: failed to prevent overwriting existing key units in attrs on variable 'time'. # This is probably an encoding field used by xarray to describe how a variable is serialized. # To proceed, remove this key from the variable's attributes manually. # time_var.attrs['units'] = DATETIME_UNITS # time_var.attrs['calendar'] = DATETIME_CALENDAR time_var.encoding['units'] = DATETIME_UNITS time_var.encoding['calendar'] = DATETIME_CALENDAR if t1 != t2: time_var.attrs['bounds'] = 'time_bnds' dataset = dataset.assign_coords( time_bnds=(['time', 'bnds'], from_time_in_days_since_1970([t1, t2]).reshape(1, 2))) time_bnds_var = dataset.coords['time_bnds'] time_bnds_var.attrs['long_name'] = 'time' time_bnds_var.attrs['standard_name'] = 'time' # Avoiding xarray error: # ValueError: failed to prevent overwriting existing key units in attrs on variable # 'time'. This is probably an encoding field used by xarray to describe how a variable # is serialized. # To proceed, remove this key from the variable's attributes manually. # time_bnds_var.attrs['units'] = DATETIME_UNITS # time_bnds_var.attrs['calendar'] = DATETIME_CALENDAR time_bnds_var.encoding['units'] = DATETIME_UNITS time_bnds_var.encoding['calendar'] = DATETIME_CALENDAR return dataset
def _normalize_lat_lon_2d(ds: xr.Dataset) -> xr.Dataset: """ Detect 2D 'lat', 'lon' variables that span a equi-rectangular grid. Then: Drop original 'lat', 'lon' variables Rename original dimensions names of 'lat', 'lon' variables, usually ('y', 'x'), to ('lat', 'lon'). Insert new 1D 'lat', 'lon' coordinate variables with dimensions 'lat' and 'lon', respectively. :param ds: some xarray dataset :return: a normalized xarray dataset, or the original one """ if not ('lat' in ds and 'lon' in ds): return ds lat_var = ds['lat'] lon_var = ds['lon'] lat_dims = lat_var.dims lon_dims = lon_var.dims if lat_dims != lon_dims: return ds spatial_dims = lon_dims if len(spatial_dims) != 2: return ds x_dim_name = spatial_dims[-1] y_dim_name = spatial_dims[-2] lat_data_1 = lat_var[:, 0] lat_data_2 = lat_var[:, -1] lon_data_1 = lon_var[0, :] lon_data_2 = lon_var[-1, :] equal_lat = np.allclose(lat_data_1, lat_data_2, equal_nan=True) equal_lon = np.allclose(lon_data_1, lon_data_2, equal_nan=True) # Drop lat lon in any case. If note qual_lat and equal_lon subset_spatial_impl will subsequently # fail with a ValidationError ds = ds.drop(['lon', 'lat']) if not (equal_lat and equal_lon): return ds ds = ds.rename({ x_dim_name: 'lon', y_dim_name: 'lat', }) ds = ds.assign_coords(lon=np.array(lon_data_1), lat=np.array(lat_data_1)) return ds
def assign_slant_range_time_coord( measurement: xr.Dataset, coordinate_conversion: xr.Dataset) -> xr.Dataset: x = measurement.ground_range - coordinate_conversion.gr0 slant_range = (coordinate_conversion.grsrCoefficients * x**coordinate_conversion.degree).sum(dim="degree") slant_range_coord = slant_range.interp( azimuth_time=measurement.azimuth_time, ground_range=measurement.ground_range).data slant_range_time = 2 / SPEED_OF_LIGHT * slant_range_coord measurement = measurement.assign_coords( slant_range_time=(("azimuth_time", "ground_range"), slant_range_time)) # type: ignore return measurement
def _prepare_vgrid(ds: xr.Dataset, vcoord: xr.DataArray) -> xr.Dataset: ds = ds.copy() dims_non_vert = [d for d in vcoord.dims if d not in _cosmo_vcoords] vcoord_vals = vcoord.mean(dim=dims_non_vert).values if 'soil1' in ds.coords: ds['soil1'] = ds['soil1'].copy(data=ds['soil1']*(-1)) vgrid_coords = np.concatenate([vcoord_vals, ds['soil1'].values]) else: vgrid_coords = vcoord_vals ds = ds.assign_coords(vgrid=vgrid_coords) if 'level1' in ds.dims: ds['level1'] = vcoord_vals if 'level' in ds.dims: ds['level'] = ((vcoord_vals+np.roll(vcoord_vals, 1))/2)[1:] return ds
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset: """ Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees. :param ds: The dataset whose longitudes may be given in the range 0 to 360. :return: The fixed dataset or the original dataset. """ if 'lon' not in ds.coords: return ds lon_var = ds.coords['lon'] if len(lon_var.shape) != 1: return ds lon_size = lon_var.shape[0] if lon_size < 2: return ds lon_size_05 = lon_size // 2 lon_values = lon_var.values if not np.any(lon_values[lon_size_05:] > 180.): return ds delta_lon = lon_values[1] - lon_values[0] var_names = [var_name for var_name in ds.data_vars] ds = ds.assign_coords( lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon, +180. - 0.5 * delta_lon, lon_size), dims=ds['lon'].dims, attrs=dict(long_name='longitude', standard_name='longitude', units='degrees east'))) ds = adjust_spatial_attrs(ds, True) new_vars = dict() for var_name in var_names: var = ds[var_name] if 'lon' in var.dims: new_var = var.roll(lon=lon_size_05, roll_coords=False) new_var.encoding.update(var.encoding) new_vars[var_name] = new_var return ds.assign(**new_vars)
def standardize_zarr_time_coord(ds: xr.Dataset) -> xr.Dataset: """ Casts a datetime coord to to python datetime and rounds to nearest even second (because cftime coords have small rounding errors that makes it hard to other datasets join on time) Args: ds (xr.Dataset): time coordinate is datetime-like object Returns: xr.Dataset with standardized time coordinates """ # Vectorize doesn't work on type-dispatched function overloading times = np.array(list(map(vcm.cast_to_datetime, ds[TIME_NAME].values))) times = round_time(times) ds = ds.assign_coords({TIME_NAME: times}) return ds
def compute_vorticity(ds: xr.Dataset, grid: cfd.grids.Grid) -> xr.Dataset: """ Computes vorticity of a dataset containing Kolmogorov flow trajectories. Args: ds: dataset conntaining variables with with Kolmogorov flow trajectories. grid: grid over which to compute vorticity. Returns: Vorticity of the Kolmogorov flow trajectories. """ coords = xru.construct_coords(grid) ds = ds.assign_coords(coords) dy = ds.y[1] - ds.y[0] dx = ds.x[1] - ds.x[0] dv_dx = (ds.sel(v=1).roll(x=-1, roll_coords=False) - ds.sel(v=1)) / dx du_dy = (ds.sel(v=0).roll(y=-1, roll_coords=False) - ds.sel(v=0)) / dy return (dv_dx - du_dy)
def add_scalar_record(self, ds: xr.Dataset, varname: str, x: Iterable) -> xr.Dataset: if isinstance(x, Tensor): x = x.detach().cpu().numpy() # Cut excess entries (NaNs). x = x[:x.argmin()] if 'iter' not in ds.coords: ds = ds.assign_coords({'iter': np.arange(len(x))}) else: if len(ds['iter']) != len(x): raise ValueError( f'dimension `iter` already exists in `ds`, but length ({len(ds["iter"])}) does ' f'not match length of `x` ({len(x)}).') ds[varname] = ('iter', x) return ds
def _transform_pv_systems(pv_systems: xr.Dataset) -> xr.Dataset: """Transform the system locations into the same coordinate system used by UKV""" system_latitudes, system_longitudes = ( pv_systems["latitude"].values, pv_systems["longitude"].values, ) wgs84 = ccrs.Geodetic() ukv_crs = ccrs.OSGB(approx=False) locs = ukv_crs.transform_points( src_crs=wgs84, x=np.asanyarray(system_longitudes), y=np.asanyarray(system_latitudes), )[:, :-1] new_coords = { "easting": (["system_id"], locs[:, 0].astype("int32")), "northing": (["system_id"], locs[:, 1].astype("int32")), } return pv_systems.assign_coords(new_coords)
def _transform_pv_systems_pyproj(pv_systems: xr.Dataset) -> xr.Dataset: """Transform the system locations into the same coordinate system used by UKV, using pyproj""" import pyproj system_latitudes, system_longitudes = ( pv_systems["latitude"].values, pv_systems["longitude"].values, ) transformer = pyproj.Transformer.from_crs("epsg:4326", "epsg:27700", always_xy=True) locs = transformer.transform(np.asanyarray(system_longitudes), np.asanyarray(system_latitudes)) print(locs) new_coords = { "easting": (["system_id"], locs[0]), "northing": (["system_id"], locs[1]), } return pv_systems.assign_coords(new_coords)
def pre_process(self, dataset: xr.Dataset) -> xr.Dataset: num_times = dataset.sizes.get('t') time = np.ndarray(shape=num_times, dtype=np.dtype('datetime64[us]')) for i in range(num_times): date = dataset.DATE[i] hour = dataset.HOUR[i] minute = dataset.MIN[i] year = date // 10000 month = (date - year * 10000) // 100 day = date % 100 dt = datetime.datetime(year, month, day, hour=hour, minute=minute) dt64 = np.datetime64(dt) time[i] = dt64 dataset = dataset.rename(dict(t='time')) dataset = dataset.drop(['DATE', 'HOUR', 'MIN']) dataset = dataset.assign_coords( time=xr.DataArray(time, dims='time', attrs=dict(long_name='time', standard_name='time', units='seconds since 1970-01-01'), encoding=dict(units='seconds since 1970-01-01', calendar='standard'))) return dataset
def normalize_missing_time(ds: xr.Dataset) -> xr.Dataset: """ Add a time coordinate variable and their associated bounds coordinate variables if temporal CF attributes ``time_coverage_start`` and ``time_coverage_end`` are given but the time dimension is missing. The new time coordinate variable will be named ``time`` with dimension ['time'] and shape [1]. The time bounds coordinates variable will be named ``time_bnds`` with dimensions ['time', 'bnds'] and shape [1,2]. Both are of data type ``datetime64``. :param ds: Dataset to adjust :return: Adjusted dataset """ time_coverage_start = ds.attrs.get('time_coverage_start') if time_coverage_start is not None: # noinspection PyBroadException try: time_coverage_start = pd.to_datetime(time_coverage_start) except BaseException: pass time_coverage_end = ds.attrs.get('time_coverage_end') if time_coverage_end is not None: # noinspection PyBroadException try: time_coverage_end = pd.to_datetime(time_coverage_end) except BaseException: pass if not time_coverage_start and not time_coverage_end: # Can't do anything return ds if 'time' in ds: time = ds.time if not time.dims: ds = ds.drop('time') elif len(time.dims) == 1: time_dim_name = time.dims[0] is_time_used_as_dim = any([(time_dim_name in ds[var_name].dims) for var_name in ds.data_vars]) if is_time_used_as_dim: # It seems we already have valid time coordinates return ds time_bnds_var_name = time.attrs.get('bounds') if time_bnds_var_name in ds: ds = ds.drop(time_bnds_var_name) ds = ds.drop('time') ds = ds.drop([var_name for var_name in ds.coords if time_dim_name in ds.coords[var_name].dims]) if time_coverage_start or time_coverage_end: # noinspection PyBroadException try: ds = ds.expand_dims('time') except BaseException as e: warnings.warn(f'failed to add time dimension: {e}') if time_coverage_start and time_coverage_end: time_value = time_coverage_start + 0.5 * (time_coverage_end - time_coverage_start) else: time_value = time_coverage_start or time_coverage_end new_coord_vars = dict(time=xr.DataArray([time_value], dims=['time'])) if time_coverage_start and time_coverage_end: has_time_bnds = 'time_bnds' in ds.coords or 'time_bnds' in ds if not has_time_bnds: new_coord_vars.update(time_bnds=xr.DataArray([[time_coverage_start, time_coverage_end]], dims=['time', 'bnds'])) ds = ds.assign_coords(**new_coord_vars) ds.coords['time'].attrs['long_name'] = 'time' ds.coords['time'].attrs['standard_name'] = 'time' ds.coords['time'].encoding['units'] = 'days since 1970-01-01' if 'time_bnds' in ds.coords: ds.coords['time'].attrs['bounds'] = 'time_bnds' ds.coords['time_bnds'].attrs['long_name'] = 'time' ds.coords['time_bnds'].attrs['standard_name'] = 'time' ds.coords['time_bnds'].encoding['units'] = 'days since 1970-01-01' return ds
def create_2D_time_coord(ds: xr.Dataset) -> xr.Dataset: time = ds.initialisation_date + ds.forecast_horizon return ds.assign_coords(time=time)
def get_cube_values_for_indexes( cube: xr.Dataset, indexes: Union[xr.Dataset, pd.DataFrame, Mapping[str, Any]], include_coords: bool = False, include_bounds: bool = False, data_var_names: Sequence[str] = None, index_name_pattern: str = DEFAULT_INDEX_NAME_PATTERN, method: str = DEFAULT_INTERP_POINT_METHOD, cube_asserted: bool = False) -> xr.Dataset: """ Get values from the *cube* at given *indexes*. :param cube: A cube dataset. :param indexes: A mapping from column names to index and fraction arrays for all cube dimensions. :param include_coords: Whether to include the cube coordinates for each point in return value. :param include_bounds: Whether to include the cube coordinate boundaries (if any) for each point in return value. :param data_var_names: An optional list of names of data variables in *cube* whose values shall be extracted. :param index_name_pattern: A naming pattern for the computed indexes columns. Must include "{name}" which will be replaced by the dimension name. :param method: "nearest" or "linear". :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. :return: A new data frame whose columns are values from *cube* variables at given *indexes*. """ if not cube_asserted: assert_cube(cube) if method not in {POINT_INTERP_METHOD_NEAREST, POINT_INTERP_METHOD_LINEAR}: raise ValueError(f"invalid method {method!r}") if method != POINT_INTERP_METHOD_NEAREST: raise NotImplementedError(f"method {method!r} not yet implemented") all_data_var_names = tuple(cube.data_vars.keys()) if len(all_data_var_names) == 0: raise ValueError("cube is empty") if data_var_names is not None: if len(data_var_names) == 0: return xr.Dataset( coords=indexes.coords if hasattr(indexes, "coords") else None) for var_name in data_var_names: if var_name not in cube.data_vars: raise ValueError(f"variable {var_name!r} not found in cube") else: data_var_names = all_data_var_names dim_names = cube[data_var_names[0]].dims num_dims = len(dim_names) index_names = [ index_name_pattern.format(name=dim_name) for dim_name in dim_names ] num_points = _validate_points(indexes, index_names, param_name="indexes") indexes = _normalize_points(indexes) cube = xr.Dataset( {var_name: cube[var_name] for var_name in data_var_names}, coords=cube.coords) new_bounds_vars = {} bounds_var_names = _get_coord_bounds_var_names(cube) drop_coords = None if bounds_var_names: if include_bounds: # Flatten any coordinate bounds variables for var_name, bnds_var_name in bounds_var_names.items(): bnds_var = cube[bnds_var_name] new_bounds_vars[var_name + "_lower"] = bnds_var[:, 0] new_bounds_vars[var_name + "_upper"] = bnds_var[:, 1] cube = cube.assign_coords(**new_bounds_vars) cube = cube.drop(bounds_var_names.values()) if not include_coords: drop_coords = set(cube.coords).difference(new_bounds_vars.keys()) else: if not include_coords: drop_coords = set(cube.coords) # Generate a validation condition so we can filter out invalid rows (where any index == -1) is_valid_point = None for index_name in index_names: col = indexes[index_name] condition = col >= 0 if np.issubdtype(col.dtype, np.integer) else np.isnan(col) if is_valid_point is None: is_valid_point = condition else: is_valid_point = np.logical_and(is_valid_point, condition) num_valid_points = np.count_nonzero(is_valid_point) if num_valid_points == num_points: # All indexes valid cube_selector = { dim_names[i]: indexes[index_names[i]] for i in range(num_dims) } cube_values = cube.isel(cube_selector) elif num_valid_points == 0: # All indexes are invalid new_bounds_vars = {} for var_name in cube.variables: new_bounds_vars[var_name] = _empty_points_var( cube[var_name], num_points) cube_values = xr.Dataset(new_bounds_vars) else: # Some invalid indexes idx = np.arange(num_points) good_idx = idx[is_valid_point.values] idx_dim_name = indexes[index_names[0]].dims[0] good_indexes = indexes.isel({idx_dim_name: good_idx}) cube_selector = { dim_names[i]: good_indexes[index_names[i]] for i in range(num_dims) } cube_values = cube.isel(cube_selector) new_bounds_vars = {} for var_name in cube.variables: var = cube_values[var_name] new_var = _empty_points_var(var, num_points) new_var[good_idx] = var new_bounds_vars[var_name] = new_var cube_values = xr.Dataset(new_bounds_vars) if drop_coords: cube_values = cube_values.drop(drop_coords) return cube_values