Exemplo n.º 1
0
def add_time_coords(dataset: xr.Dataset,
                    time_range: Tuple[float, float]) -> xr.Dataset:
    t1, t2 = time_range
    if t1 != t2:
        t_center = (t1 + t2) / 2
    else:
        t_center = t1
    dataset = dataset.expand_dims('time')
    dataset = dataset.assign_coords(time=(['time'], [t_center]))
    time_var = dataset.coords['time']
    time_var.attrs['long_name'] = 'time'
    time_var.attrs['standard_name'] = 'time'
    time_var.attrs['units'] = DATETIME_UNITS
    time_var.attrs['calendar'] = DATETIME_CALENDAR
    time_var.encoding['units'] = DATETIME_UNITS
    time_var.encoding['calendar'] = DATETIME_CALENDAR
    if t1 != t2:
        time_var.attrs['bounds'] = 'time_bnds'
        dataset = dataset.assign_coords(time_bnds=(['time', 'bnds'],
                                                   [[t1, t2]]))
        time_bnds_var = dataset.coords['time_bnds']
        time_bnds_var.attrs['long_name'] = 'time'
        time_bnds_var.attrs['standard_name'] = 'time'
        time_bnds_var.attrs['units'] = DATETIME_UNITS
        time_bnds_var.attrs['calendar'] = DATETIME_CALENDAR
        time_bnds_var.encoding['units'] = DATETIME_UNITS
        time_bnds_var.encoding['calendar'] = DATETIME_CALENDAR
    return dataset
Exemplo n.º 2
0
def change_crs(dset: xr.Dataset, old_coords, old_crs, new_coords, new_crs):
    dset = dset.copy()

    # Load coordinates
    old_x = dset.variables[old_coords[0]].values
    old_y = dset.variables[old_coords[1]].values
    if len(old_x.shape) == 1 and len(old_y.shape) == 1:
        old_x, old_y = np.meshgrid(old_x, old_y)

    # Find old dimensions
    xdims = dset.variables[old_coords[0]].dims
    ydims = dset.variables[old_coords[1]].dims
    if len(xdims) == 2:
        dims = xdims
    else:
        dims = ydims + xdims

    # Transform coordinates
    old_gridmap, old_proj = _load_crs(dset, old_crs)
    new_gridmap, new_proj = _load_crs(dset, new_crs)
    new_x, new_y = crs_transform(old_x, old_y, old_proj, new_proj)

    # Remove old grid mapping and coordinates
    dset = dset.drop_vars(old_gridmap.name)
    dset = dset.drop_vars(old_coords)

    # Check if new coordinates are one-dimensional
    xdiff = np.max(np.abs(np.diff(new_x, axis=0)))
    ydiff = np.max(np.abs(np.diff(new_y, axis=1)))
    if xdiff < 1e-8 and ydiff < 1e-8:
        # If one-dimensional, store as one-dimensional variables and
        # change dimension names to match coordinates
        dset = dset.assign_coords({
            new_coords[0]:
            xr.Variable(dims[1], new_x[0, :]),
            new_coords[1]:
            xr.Variable(dims[0], new_y[:, 0]),
        })  # type: xr.Dataset
        dset = dset.swap_dims(dict(zip(reversed(dims), new_coords)))
    else:
        # If two-dimensional, store as auxillary coordinates with the same
        # dimension names as the old coordinates
        dset = dset.assign_coords({
            new_coords[0]: xr.Variable(dims, new_x),
            new_coords[1]: xr.Variable(dims, new_y),
        })  # type: xr.Dataset

    # Find data vars referring to old coordinates
    old_data_vars = [
        k for k, v in dset.data_vars.items() if 'grid_mapping' in v.attrs
    ]

    # Add grid mapping to new dataset
    dset = set_crs(dset=dset,
                   crs=new_gridmap,
                   coords=new_coords,
                   data_vars=old_data_vars)

    return dset
Exemplo n.º 3
0
def _normalize_lon_360(dataset: xr.Dataset) -> xr.Dataset:
    """
    Fix the longitude of the given dataset ``dataset`` so that it ranges from -180 to +180 degrees.

    :param dataset: The dataset whose longitudes may be given in the range 0 to 360.
    :return: The fixed dataset or the original dataset.
    """

    if 'lon' not in dataset.coords:
        return dataset

    lon_var = dataset.coords['lon']

    if len(lon_var.shape) != 1:
        return dataset

    lon_size = lon_var.shape[0]
    if lon_size < 2:
        return dataset

    lon_size_05 = lon_size // 2
    lon_values = lon_var.values
    if not np.any(lon_values[lon_size_05:] > 180.):
        return dataset

    # roll_coords will be set to False by default in the future
    dataset = dataset.roll(lon=lon_size_05, roll_coords=True)
    dataset = dataset.assign_coords(lon=(((dataset.lon + 180) % 360) - 180))

    return dataset
Exemplo n.º 4
0
 def __init__(
     self,
     ds: Dataset,
     covariates: list,
     timestamp: np.datetime64,
     type: str = "real",
 ) -> None:
     self.timestamp = timestamp
     self.data_name, self.var_name = _get_field_names(ds)
     if type == "real":
         self.ds = _preprocess_ds(ds, timestamp, covariates)
         self.ds_main = get_main_coords(self.ds).sel(time=timestamp)
         df = self.to_dataframe()
         df_main = self.to_dataframe(main=True)
         self.coords = df[["lat", "lon"]].values
         self.coords_main = df_main[["lat", "lon"]].values
         self.values = df[self.data_name].values
         self.values_main = df_main[self.data_name].values
         self.temporal_trend = self.ds.attrs["temporal_trend"]
         self.spatial_trend = df["spatial_trend"].values
         self.spatial_mean = self.ds.attrs["spatial_mean"]
         self.scale_fact = self.ds.attrs["scale_fact"]
         self.covariate_means = self.ds.attrs["covariate_means"]
         self.covariate_scales = self.ds.attrs["covariate_scales"]
         self.variance_estimate = df[self.var_name].values
         self.covariates = df[covariates]
     else:
         self.ds_main = ds.assign_coords(coords={"time": np.nan})
         df_main = self.to_dataframe(main=True)
         self.coords = self.coords_main = df_main[["x", "y"]].values
         self.values = self.values_main = df_main[self.data_name].values
     self.size = len(self.values)
Exemplo n.º 5
0
 def xarray_2D_to_3D(self, xdataset: xr.Dataset):
     new_ds = xdataset.assign_coords(
         src_chan=xdataset.component.split(':')[0])
     new_ds = new_ds.assign_coords(
         rec_chan=xdataset.component.split(':')[1])
     new_ds = new_ds.drop_dims(['component'])
     return new_ds
Exemplo n.º 6
0
def normalize_coord_vars(ds: xr.Dataset) -> xr.Dataset:
    """
    Turn potential coordinate variables from data variables into coordinate variables.

    Any data variable is considered a coordinate variable

    * whose name is its only dimension name;
    * whose number of dimensions is two and where the first dimension name is also a variable namd and
      whose last dimension is named "bnds".

    :param ds: The dataset
    :return: The same dataset or a shallow copy with potential coordinate
             variables turned into coordinate variables.
    """

    if 'bnds' not in ds.dims:
        return ds

    coord_var_names = set()
    for data_var_name in ds.data_vars:
        data_var = ds.data_vars[data_var_name]
        if is_coord_var(ds, data_var):
            coord_var_names.add(data_var_name)

    if not coord_var_names:
        return ds

    old_ds = ds
    ds = old_ds.drop(coord_var_names)
    ds = ds.assign_coords(**{bounds_var_name: old_ds[bounds_var_name] for bounds_var_name in coord_var_names})

    return ds
Exemplo n.º 7
0
    def _map_forecast_horizon_to_months_ahead(
            stacked: xr.Dataset) -> xr.Dataset:
        assert "forecast_horizon" in [
            c for c in stacked.coords
        ], ("Expect the"
            "`stacked` dataset object to have `forecast_horizon` as a coord")

        # map forecast horizons to months ahead
        map_ = {
            pd.Timedelta("28 days 00:00:00"): 1,
            pd.Timedelta("29 days 00:00:00"): 1,
            pd.Timedelta("30 days 00:00:00"): 1,
            pd.Timedelta("31 days 00:00:00"): 1,
            pd.Timedelta("59 days 00:00:00"): 2,
            pd.Timedelta("60 days 00:00:00"): 2,
            pd.Timedelta("61 days 00:00:00"): 2,
            pd.Timedelta("62 days 00:00:00"): 2,
            pd.Timedelta("89 days 00:00:00"): 3,
            pd.Timedelta("90 days 00:00:00"): 3,
            pd.Timedelta("91 days 00:00:00"): 3,
            pd.Timedelta("92 days 00:00:00"): 3,
        }

        fhs = [pd.Timedelta(fh) for fh in stacked.forecast_horizon.values]
        months = [map_[fh] for fh in fhs]
        stacked = stacked.assign_coords(months_ahead=("time", months))

        return stacked
Exemplo n.º 8
0
def add_traits(ds: Dataset, phenotypes_path: str) -> Dataset:
    ds_tr = load_dataset(phenotypes_path, consolidated=True)
    ds = ds.assign_coords(samples=lambda ds: ds.sample_id).merge(
        ds_tr.assign_coords(samples=lambda ds: ds.sample_id),
        join="left",
        compat="override",
    )
    return ds.reset_index("samples").reset_coords(drop=True)
Exemplo n.º 9
0
def _adjust_tile_range(ds: xr.Dataset) -> xr.Dataset:

    if "tile" in ds:
        tiles = ds.tile

        if tiles.isel(tile=-1) == 6:
            ds = ds.assign_coords({"tile": tiles - 1})

    return ds
Exemplo n.º 10
0
def _create_variable(xr_dataset: xr.Dataset, nc_dataset: netCDF4.Dataset,
                     encoding: Dict[str, Dict[str, Dict[str, Any]]],
                     name: Hashable, unlimited_dims: Optional[List[str]],
                     variable: xr.DataArray) -> None:
    """Creation and writing of the NetCDF variable."""
    unlimited_dims = unlimited_dims or list()

    variable.attrs.pop("_FillValue", None)
    # Encode datetime64 to float64
    if np.issubdtype(variable.dtype, np.datetime64):
        # 946684800000000 number of microseconds between 2000-01-01 and
        # 1970-01-01
        values = (variable.values.astype("datetime64[us]").astype("int64") -
                  946684800000000) * 1e-6
        if variable.name in xr_dataset.coords:
            xr_dataset = xr_dataset.assign_coords(
                coords={variable.name: values})
            attrs = variable.attrs
            variable = xr_dataset[variable.name]
            variable.attrs.update(attrs)
        else:
            variable.values = values
        assert (
            variable.attrs["units"] == "seconds since 2000-01-01 00:00:00.0")
    dtype, kwargs = _create_variable_args(encoding, name, variable)

    group, name = _split_group_name(name)

    if group is not None:
        if group not in nc_dataset.groups:
            nc_dataset = nc_dataset.createGroup(group)
            if group in ["left", "right"]:
                nc_dataset.setncatts(
                    _group_attributes(
                        getattr(product_specification.Side,
                                group.upper()).value))
        else:
            nc_dataset = nc_dataset.groups[group]

    # If the dimensions doesn't exist then we have to create them.
    if not nc_dataset.dimensions:
        for dim_name, size in xr_dataset.dims.items():
            dim_group, dim_name = _split_group_name(dim_name)
            if dim_group == group:
                nc_dataset.createDimension(
                    dim_name, None if dim_name in unlimited_dims else size)

    ncvar = nc_dataset.createVariable(
        name, dtype,
        tuple(_split_group_name(item)[-1] for item in variable.dims), **kwargs)
    ncvar.setncatts(variable.attrs)
    values = variable.values
    if kwargs['fill_value'] is not None:
        if values.dtype.kind == "f" and np.any(np.isnan(values)):
            values[np.isnan(values)] = kwargs['fill_value']
        values = np.ma.array(values, mask=values == kwargs['fill_value'])
    nc_dataset[name][:] = values
Exemplo n.º 11
0
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset:
    """
    Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees.

    :param ds: The dataset whose longitudes may be given in the range 0 to 360.
    :return: The fixed dataset or the original dataset.
    """

    if 'lon' not in ds.coords:
        return ds

    lon_var = ds.coords['lon']

    if len(lon_var.shape) != 1:
        return ds

    lon_size = lon_var.shape[0]
    if lon_size < 2:
        return ds

    lon_size_05 = lon_size // 2
    lon_values = lon_var.values
    if not np.any(lon_values[lon_size_05:] > 180.):
        return ds

    delta_lon = lon_values[1] - lon_values[0]

    var_names = [var_name for var_name in ds.data_vars]

    ds = ds.assign_coords(
        lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon, +180. -
                                     0.5 * delta_lon, lon_size),
                         dims=ds['lon'].dims,
                         attrs=dict(long_name='longitude',
                                    standard_name='longitude',
                                    units='degrees east')))

    ds = adjust_spatial_attrs_impl(ds, True)

    new_vars = dict()
    for var_name in var_names:
        var = ds[var_name]
        if len(var.dims) >= 1 and var.dims[-1] == 'lon':
            values = np.copy(var.values)
            temp = np.copy(values[..., :lon_size_05])
            values[..., :lon_size_05] = values[..., lon_size_05:]
            values[..., lon_size_05:] = temp
            # import matplotlib.pyplot as plt
            # im = values[(len(values.shape) - 2) * [0] + [slice(None), slice(None)]]
            # plt.imshow(im)
            new_vars[var_name] = xr.DataArray(values,
                                              dims=var.dims,
                                              attrs=var.attrs,
                                              encoding=var.encoding)

    return ds.assign(**new_vars)
Exemplo n.º 12
0
 def test_concat_coords(self):
     data = Dataset({"foo": ("x", np.random.randn(10))})
     expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5))
     objs = [data.isel(x=slice(5)).assign_coords(c=0), data.isel(x=slice(5, None)).assign_coords(c=1)]
     for coords in ["different", "all", ["c"]]:
         actual = concat(objs, dim="x", coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ["minimal", []]:
         with self.assertRaisesRegexp(ValueError, "not equal across"):
             concat(objs, dim="x", coords=coords)
Exemplo n.º 13
0
def apply_sample_qc_1(ds: Dataset, sample_qc_path: str) -> Dataset:
    ds_sqc = load_sample_qc(sample_qc_path)
    ds_sqc = sample_qc_1(ds_sqc)
    ds_sqc = ds_sqc[SAMPLE_QC_COLS]
    ds = ds.assign_coords(samples=lambda ds: ds.sample_id).merge(
        ds_sqc.assign_coords(samples=lambda ds: ds.sample_id).compute(),
        join="inner",
        compat="override",
    )
    return ds.reset_index("samples").reset_coords(drop=True)
Exemplo n.º 14
0
def friendly_obs(ds: xr.Dataset) -> xr.Dataset:
    station = ds.station.sum(
        dim='num_characters').to_series().rename_axis('station')
    ds = ds.drop_dims('num_characters')
    ds = ds.rename_dims({
        'default_time_coordinate_size': 'time',
        'number_of_stations': 'station'
    })
    ds = ds.assign_coords({'station': station})
    ds = ds.set_index({'time': 'OM__phenomenonTimeInstant'})
    return ds
Exemplo n.º 15
0
 def test_concat_coords(self):
     data = Dataset({'foo': ('x', np.random.randn(10))})
     expected = data.assign_coords(c=('x', [0] * 5 + [1] * 5))
     objs = [data.isel(x=slice(5)).assign_coords(c=0),
             data.isel(x=slice(5, None)).assign_coords(c=1)]
     for coords in ['different', 'all', ['c']]:
         actual = concat(objs, dim='x', coords=coords)
         self.assertDatasetIdentical(expected, actual)
     for coords in ['minimal', []]:
         with self.assertRaisesRegexp(ValueError, 'not equal across'):
             concat(objs, dim='x', coords=coords)
Exemplo n.º 16
0
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset:
    """
    Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees.

    :param ds: The dataset whose longitudes may be given in the range 0 to 360.
    :return: The fixed dataset or the original dataset.
    """

    if 'lon' not in ds.coords:
        return ds

    lon_var = ds.coords['lon']

    if len(lon_var.shape) != 1:
        return ds

    lon_size = lon_var.shape[0]
    if lon_size < 2:
        return ds

    lon_size_05 = lon_size // 2
    lon_values = lon_var.values
    if not np.any(lon_values[lon_size_05:] > 180.):
        return ds

    delta_lon = lon_values[1] - lon_values[0]

    var_names = [var_name for var_name in ds.data_vars]

    ds = ds.assign_coords(lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon,
                                                       +180. - 0.5 * delta_lon,
                                                       lon_size),
                                           dims=ds['lon'].dims,
                                           attrs=dict(long_name='longitude',
                                                      standard_name='longitude',
                                                      units='degrees east')))

    ds = adjust_spatial_attrs_impl(ds, True)

    new_vars = dict()
    for var_name in var_names:
        var = ds[var_name]
        if len(var.dims) >= 1 and var.dims[-1] == 'lon':
            values = np.copy(var.values)
            temp = np.copy(values[..., : lon_size_05])
            values[..., : lon_size_05] = values[..., lon_size_05:]
            values[..., lon_size_05:] = temp
            # import matplotlib.pyplot as plt
            # im = values[(len(values.shape) - 2) * [0] + [slice(None), slice(None)]]
            # plt.imshow(im)
            new_vars[var_name] = xr.DataArray(values, dims=var.dims, attrs=var.attrs, encoding=var.encoding)

    return ds.assign(**new_vars)
Exemplo n.º 17
0
def _round_time_coord(ds: xr.Dataset,
                      time_coord: str = TIME_DIM_NAME) -> xr.Dataset:

    if time_coord in ds.coords:
        new_times = round_time(ds[time_coord])
        ds = ds.assign_coords({time_coord: new_times})
    else:
        logger.debug(
            "Round time operation called on dataset missing a time coordinate."
        )

    return ds
Exemplo n.º 18
0
 def to_360day_monthly(self, ds: xr.Dataset):
     """Change the calendar to datetime and precision to monthly."""
     # https://github.com/pydata/xarray/issues/3320
     time1 = ds.time.copy()
     for itime in range(ds.sizes['time']):
         bb = ds.time.values[itime].timetuple()
         time1.values[itime] = datetime(bb[0], bb[1], 16)
     logging.info(
         "[CMIP6_IO] Fixed time units start at {} and end at {}".format(
             time1.values[0], time1.values[-1]))
     ds = ds.assign_coords({'time': time1})
     return ds
Exemplo n.º 19
0
def _normalize_lat_lon_2d(ds: xr.Dataset) -> xr.Dataset:
    """
    Detect 2D 'lat', 'lon' variables that span a equi-rectangular grid. Then:
    Drop original 'lat', 'lon' variables
    Rename original dimensions names of 'lat', 'lon' variables, usually ('y', 'x'), to
    ('lat', 'lon').
    Insert new 1D 'lat', 'lon' coordinate variables with dimensions 'lat' and 'lon', respectively.
    :param ds: some xarray dataset
    :return: a normalized xarray dataset, or the original one
    """
    if not ('lat' in ds and 'lon' in ds):
        return ds

    lat_var = ds['lat']
    lon_var = ds['lon']

    lat_dims = lat_var.dims
    lon_dims = lon_var.dims
    if lat_dims != lon_dims:
        return ds

    spatial_dims = lon_dims
    if len(spatial_dims) != 2:
        return ds

    x_dim_name = spatial_dims[-1]
    y_dim_name = spatial_dims[-2]

    lat_data_1 = lat_var[:, 0]
    lat_data_2 = lat_var[:, -1]
    lon_data_1 = lon_var[0, :]
    lon_data_2 = lon_var[-1, :]

    equal_lat = np.allclose(lat_data_1, lat_data_2, equal_nan=True)
    equal_lon = np.allclose(lon_data_1, lon_data_2, equal_nan=True)

    # Drop lat lon in any case. If note qual_lat and equal_lon subset_spatial_impl will
    # subsequently fail with a ValidationError

    ds = ds.drop_vars(['lon', 'lat'])

    if not (equal_lat and equal_lon):
        return ds

    ds = ds.rename({
        x_dim_name: 'lon',
        y_dim_name: 'lat',
    })

    ds = ds.assign_coords(lon=np.array(lon_data_1), lat=np.array(lat_data_1))

    return ds
Exemplo n.º 20
0
 def test_concat_coords(self):
     data = Dataset({"foo": ("x", np.random.randn(10))})
     expected = data.assign_coords(c=("x", [0] * 5 + [1] * 5))
     objs = [
         data.isel(x=slice(5)).assign_coords(c=0),
         data.isel(x=slice(5, None)).assign_coords(c=1),
     ]
     for coords in ["different", "all", ["c"]]:
         actual = concat(objs, dim="x", coords=coords)
         assert_identical(expected, actual)
     for coords in ["minimal", []]:
         with raises_regex(merge.MergeError, "conflicting values"):
             concat(objs, dim="x", coords=coords)
Exemplo n.º 21
0
def add_time_coords(dataset: xr.Dataset,
                    time_range: Tuple[float, float]) -> xr.Dataset:
    t1, t2 = time_range
    if t1 != t2:
        t_center = (t1 + t2) / 2
    else:
        t_center = t1
    dataset = dataset.expand_dims('time')
    dataset = dataset.assign_coords(
        time=(['time'], from_time_in_days_since_1970([t_center])))
    time_var = dataset.coords['time']
    time_var.attrs['long_name'] = 'time'
    time_var.attrs['standard_name'] = 'time'
    # Avoiding xarray error:
    #   ValueError: failed to prevent overwriting existing key units in attrs on variable 'time'.
    #   This is probably an encoding field used by xarray to describe how a variable is serialized.
    #   To proceed, remove this key from the variable's attributes manually.
    # time_var.attrs['units'] = DATETIME_UNITS
    # time_var.attrs['calendar'] = DATETIME_CALENDAR
    time_var.encoding['units'] = DATETIME_UNITS
    time_var.encoding['calendar'] = DATETIME_CALENDAR
    if t1 != t2:
        time_var.attrs['bounds'] = 'time_bnds'
        dataset = dataset.assign_coords(
            time_bnds=(['time', 'bnds'],
                       from_time_in_days_since_1970([t1, t2]).reshape(1, 2)))
        time_bnds_var = dataset.coords['time_bnds']
        time_bnds_var.attrs['long_name'] = 'time'
        time_bnds_var.attrs['standard_name'] = 'time'
        # Avoiding xarray error:
        #   ValueError: failed to prevent overwriting existing key units in attrs on variable
        #   'time'. This is probably an encoding field used by xarray to describe how a variable
        #   is serialized.
        # To proceed, remove this key from the variable's attributes manually.
        # time_bnds_var.attrs['units'] = DATETIME_UNITS
        # time_bnds_var.attrs['calendar'] = DATETIME_CALENDAR
        time_bnds_var.encoding['units'] = DATETIME_UNITS
        time_bnds_var.encoding['calendar'] = DATETIME_CALENDAR
    return dataset
Exemplo n.º 22
0
def _normalize_lat_lon_2d(ds: xr.Dataset) -> xr.Dataset:
    """
    Detect 2D 'lat', 'lon' variables that span a equi-rectangular grid. Then:
    Drop original 'lat', 'lon' variables
    Rename original dimensions names of 'lat', 'lon' variables, usually ('y', 'x'), to ('lat', 'lon').
    Insert new 1D 'lat', 'lon' coordinate variables with dimensions 'lat' and 'lon', respectively.
    :param ds: some xarray dataset
    :return: a normalized xarray dataset, or the original one
    """
    if not ('lat' in ds and 'lon' in ds):
        return ds

    lat_var = ds['lat']
    lon_var = ds['lon']

    lat_dims = lat_var.dims
    lon_dims = lon_var.dims
    if lat_dims != lon_dims:
        return ds

    spatial_dims = lon_dims
    if len(spatial_dims) != 2:
        return ds

    x_dim_name = spatial_dims[-1]
    y_dim_name = spatial_dims[-2]

    lat_data_1 = lat_var[:, 0]
    lat_data_2 = lat_var[:, -1]
    lon_data_1 = lon_var[0, :]
    lon_data_2 = lon_var[-1, :]

    equal_lat = np.allclose(lat_data_1, lat_data_2, equal_nan=True)
    equal_lon = np.allclose(lon_data_1, lon_data_2, equal_nan=True)

    # Drop lat lon in any case. If note qual_lat and equal_lon subset_spatial_impl will subsequently
    # fail with a ValidationError

    ds = ds.drop(['lon', 'lat'])

    if not (equal_lat and equal_lon):
        return ds

    ds = ds.rename({
        x_dim_name: 'lon',
        y_dim_name: 'lat',
    })

    ds = ds.assign_coords(lon=np.array(lon_data_1), lat=np.array(lat_data_1))

    return ds
Exemplo n.º 23
0
def assign_slant_range_time_coord(
        measurement: xr.Dataset,
        coordinate_conversion: xr.Dataset) -> xr.Dataset:
    x = measurement.ground_range - coordinate_conversion.gr0
    slant_range = (coordinate_conversion.grsrCoefficients *
                   x**coordinate_conversion.degree).sum(dim="degree")
    slant_range_coord = slant_range.interp(
        azimuth_time=measurement.azimuth_time,
        ground_range=measurement.ground_range).data
    slant_range_time = 2 / SPEED_OF_LIGHT * slant_range_coord
    measurement = measurement.assign_coords(
        slant_range_time=(("azimuth_time", "ground_range"),
                          slant_range_time))  # type: ignore
    return measurement
Exemplo n.º 24
0
def _prepare_vgrid(ds: xr.Dataset, vcoord: xr.DataArray) -> xr.Dataset:
    ds = ds.copy()
    dims_non_vert = [d for d in vcoord.dims if d not in _cosmo_vcoords]
    vcoord_vals = vcoord.mean(dim=dims_non_vert).values
    if 'soil1' in ds.coords:
        ds['soil1'] = ds['soil1'].copy(data=ds['soil1']*(-1))
        vgrid_coords = np.concatenate([vcoord_vals, ds['soil1'].values])
    else:
        vgrid_coords = vcoord_vals
    ds = ds.assign_coords(vgrid=vgrid_coords)
    if 'level1' in ds.dims:
        ds['level1'] = vcoord_vals
    if 'level' in ds.dims:
        ds['level'] = ((vcoord_vals+np.roll(vcoord_vals, 1))/2)[1:]
    return ds
Exemplo n.º 25
0
def _normalize_lon_360(ds: xr.Dataset) -> xr.Dataset:
    """
    Fix the longitude of the given dataset ``ds`` so that it ranges from -180 to +180 degrees.

    :param ds: The dataset whose longitudes may be given in the range 0 to 360.
    :return: The fixed dataset or the original dataset.
    """

    if 'lon' not in ds.coords:
        return ds

    lon_var = ds.coords['lon']

    if len(lon_var.shape) != 1:
        return ds

    lon_size = lon_var.shape[0]
    if lon_size < 2:
        return ds

    lon_size_05 = lon_size // 2
    lon_values = lon_var.values
    if not np.any(lon_values[lon_size_05:] > 180.):
        return ds

    delta_lon = lon_values[1] - lon_values[0]

    var_names = [var_name for var_name in ds.data_vars]

    ds = ds.assign_coords(
        lon=xr.DataArray(np.linspace(-180. + 0.5 * delta_lon, +180. -
                                     0.5 * delta_lon, lon_size),
                         dims=ds['lon'].dims,
                         attrs=dict(long_name='longitude',
                                    standard_name='longitude',
                                    units='degrees east')))

    ds = adjust_spatial_attrs(ds, True)

    new_vars = dict()
    for var_name in var_names:
        var = ds[var_name]
        if 'lon' in var.dims:
            new_var = var.roll(lon=lon_size_05, roll_coords=False)
            new_var.encoding.update(var.encoding)
            new_vars[var_name] = new_var

    return ds.assign(**new_vars)
Exemplo n.º 26
0
def standardize_zarr_time_coord(ds: xr.Dataset) -> xr.Dataset:
    """ Casts a datetime coord to to python datetime and rounds to
    nearest even second (because cftime coords have small rounding
    errors that makes it hard to other datasets join on time)

    Args:
        ds (xr.Dataset): time coordinate is datetime-like object

    Returns:
        xr.Dataset with standardized time coordinates
    """
    # Vectorize doesn't work on type-dispatched function overloading
    times = np.array(list(map(vcm.cast_to_datetime, ds[TIME_NAME].values)))
    times = round_time(times)
    ds = ds.assign_coords({TIME_NAME: times})
    return ds
Exemplo n.º 27
0
def compute_vorticity(ds: xr.Dataset, grid: cfd.grids.Grid) -> xr.Dataset:
    """
  Computes vorticity of a dataset containing Kolmogorov flow trajectories.
  
  Args:
    ds: dataset conntaining variables with with Kolmogorov flow trajectories.
    grid: grid over which to compute vorticity.
    
  Returns:
    Vorticity of the Kolmogorov flow trajectories.
  """
    coords = xru.construct_coords(grid)
    ds = ds.assign_coords(coords)
    dy = ds.y[1] - ds.y[0]
    dx = ds.x[1] - ds.x[0]
    dv_dx = (ds.sel(v=1).roll(x=-1, roll_coords=False) - ds.sel(v=1)) / dx
    du_dy = (ds.sel(v=0).roll(y=-1, roll_coords=False) - ds.sel(v=0)) / dy
    return (dv_dx - du_dy)
Exemplo n.º 28
0
    def add_scalar_record(self, ds: xr.Dataset, varname: str,
                          x: Iterable) -> xr.Dataset:

        if isinstance(x, Tensor):
            x = x.detach().cpu().numpy()

        # Cut excess entries (NaNs).
        x = x[:x.argmin()]

        if 'iter' not in ds.coords:
            ds = ds.assign_coords({'iter': np.arange(len(x))})
        else:
            if len(ds['iter']) != len(x):
                raise ValueError(
                    f'dimension `iter` already exists in `ds`, but length ({len(ds["iter"])}) does '
                    f'not match length of `x` ({len(x)}).')

        ds[varname] = ('iter', x)

        return ds
Exemplo n.º 29
0
def _transform_pv_systems(pv_systems: xr.Dataset) -> xr.Dataset:
    """Transform the system locations into the same coordinate system used by UKV"""

    system_latitudes, system_longitudes = (
        pv_systems["latitude"].values,
        pv_systems["longitude"].values,
    )

    wgs84 = ccrs.Geodetic()
    ukv_crs = ccrs.OSGB(approx=False)
    locs = ukv_crs.transform_points(
        src_crs=wgs84,
        x=np.asanyarray(system_longitudes),
        y=np.asanyarray(system_latitudes),
    )[:, :-1]

    new_coords = {
        "easting": (["system_id"], locs[:, 0].astype("int32")),
        "northing": (["system_id"], locs[:, 1].astype("int32")),
    }
    return pv_systems.assign_coords(new_coords)
Exemplo n.º 30
0
def _transform_pv_systems_pyproj(pv_systems: xr.Dataset) -> xr.Dataset:
    """Transform the system locations into the same coordinate system used by UKV, using pyproj"""
    import pyproj

    system_latitudes, system_longitudes = (
        pv_systems["latitude"].values,
        pv_systems["longitude"].values,
    )

    transformer = pyproj.Transformer.from_crs("epsg:4326",
                                              "epsg:27700",
                                              always_xy=True)
    locs = transformer.transform(np.asanyarray(system_longitudes),
                                 np.asanyarray(system_latitudes))
    print(locs)

    new_coords = {
        "easting": (["system_id"], locs[0]),
        "northing": (["system_id"], locs[1]),
    }
    return pv_systems.assign_coords(new_coords)
Exemplo n.º 31
0
 def pre_process(self, dataset: xr.Dataset) -> xr.Dataset:
     num_times = dataset.sizes.get('t')
     time = np.ndarray(shape=num_times, dtype=np.dtype('datetime64[us]'))
     for i in range(num_times):
         date = dataset.DATE[i]
         hour = dataset.HOUR[i]
         minute = dataset.MIN[i]
         year = date // 10000
         month = (date - year * 10000) // 100
         day = date % 100
         dt = datetime.datetime(year, month, day, hour=hour, minute=minute)
         dt64 = np.datetime64(dt)
         time[i] = dt64
     dataset = dataset.rename(dict(t='time'))
     dataset = dataset.drop(['DATE', 'HOUR', 'MIN'])
     dataset = dataset.assign_coords(
         time=xr.DataArray(time,
                           dims='time',
                           attrs=dict(long_name='time',
                                      standard_name='time',
                                      units='seconds since 1970-01-01'),
                           encoding=dict(units='seconds since 1970-01-01',
                                         calendar='standard')))
     return dataset
Exemplo n.º 32
0
def normalize_missing_time(ds: xr.Dataset) -> xr.Dataset:
    """
    Add a time coordinate variable and their associated bounds coordinate variables
    if temporal CF attributes ``time_coverage_start`` and ``time_coverage_end``
    are given but the time dimension is missing.

    The new time coordinate variable will be named ``time`` with dimension ['time'] and shape [1].
    The time bounds coordinates variable will be named ``time_bnds`` with dimensions ['time', 'bnds'] and shape [1,2].
    Both are of data type ``datetime64``.

    :param ds: Dataset to adjust
    :return: Adjusted dataset
    """
    time_coverage_start = ds.attrs.get('time_coverage_start')
    if time_coverage_start is not None:
        # noinspection PyBroadException
        try:
            time_coverage_start = pd.to_datetime(time_coverage_start)
        except BaseException:
            pass

    time_coverage_end = ds.attrs.get('time_coverage_end')
    if time_coverage_end is not None:
        # noinspection PyBroadException
        try:
            time_coverage_end = pd.to_datetime(time_coverage_end)
        except BaseException:
            pass

    if not time_coverage_start and not time_coverage_end:
        # Can't do anything
        return ds

    if 'time' in ds:
        time = ds.time
        if not time.dims:
            ds = ds.drop('time')
        elif len(time.dims) == 1:
            time_dim_name = time.dims[0]
            is_time_used_as_dim = any([(time_dim_name in ds[var_name].dims) for var_name in ds.data_vars])
            if is_time_used_as_dim:
                # It seems we already have valid time coordinates
                return ds
            time_bnds_var_name = time.attrs.get('bounds')
            if time_bnds_var_name in ds:
                ds = ds.drop(time_bnds_var_name)
            ds = ds.drop('time')
            ds = ds.drop([var_name for var_name in ds.coords if time_dim_name in ds.coords[var_name].dims])

    if time_coverage_start or time_coverage_end:
        # noinspection PyBroadException
        try:
            ds = ds.expand_dims('time')
        except BaseException as e:
            warnings.warn(f'failed to add time dimension: {e}')

        if time_coverage_start and time_coverage_end:
            time_value = time_coverage_start + 0.5 * (time_coverage_end - time_coverage_start)
        else:
            time_value = time_coverage_start or time_coverage_end

        new_coord_vars = dict(time=xr.DataArray([time_value], dims=['time']))

        if time_coverage_start and time_coverage_end:
            has_time_bnds = 'time_bnds' in ds.coords or 'time_bnds' in ds
            if not has_time_bnds:
                new_coord_vars.update(time_bnds=xr.DataArray([[time_coverage_start, time_coverage_end]],
                                                             dims=['time', 'bnds']))

        ds = ds.assign_coords(**new_coord_vars)

        ds.coords['time'].attrs['long_name'] = 'time'
        ds.coords['time'].attrs['standard_name'] = 'time'
        ds.coords['time'].encoding['units'] = 'days since 1970-01-01'
        if 'time_bnds' in ds.coords:
            ds.coords['time'].attrs['bounds'] = 'time_bnds'
            ds.coords['time_bnds'].attrs['long_name'] = 'time'
            ds.coords['time_bnds'].attrs['standard_name'] = 'time'
            ds.coords['time_bnds'].encoding['units'] = 'days since 1970-01-01'

    return ds
Exemplo n.º 33
0
 def create_2D_time_coord(ds: xr.Dataset) -> xr.Dataset:
     time = ds.initialisation_date + ds.forecast_horizon
     return ds.assign_coords(time=time)
Exemplo n.º 34
0
def get_cube_values_for_indexes(
        cube: xr.Dataset,
        indexes: Union[xr.Dataset, pd.DataFrame, Mapping[str, Any]],
        include_coords: bool = False,
        include_bounds: bool = False,
        data_var_names: Sequence[str] = None,
        index_name_pattern: str = DEFAULT_INDEX_NAME_PATTERN,
        method: str = DEFAULT_INTERP_POINT_METHOD,
        cube_asserted: bool = False) -> xr.Dataset:
    """
    Get values from the *cube* at given *indexes*.

    :param cube: A cube dataset.
    :param indexes: A mapping from column names to index and fraction arrays for all cube dimensions.
    :param include_coords: Whether to include the cube coordinates for each point in return value.
    :param include_bounds: Whether to include the cube coordinate boundaries (if any) for each point in return value.
    :param data_var_names: An optional list of names of data variables in *cube* whose values shall be extracted.
    :param index_name_pattern: A naming pattern for the computed indexes columns.
           Must include "{name}" which will be replaced by the dimension name.
    :param method: "nearest" or "linear".
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new data frame whose columns are values from *cube* variables at given *indexes*.
    """
    if not cube_asserted:
        assert_cube(cube)

    if method not in {POINT_INTERP_METHOD_NEAREST, POINT_INTERP_METHOD_LINEAR}:
        raise ValueError(f"invalid method {method!r}")
    if method != POINT_INTERP_METHOD_NEAREST:
        raise NotImplementedError(f"method {method!r} not yet implemented")

    all_data_var_names = tuple(cube.data_vars.keys())
    if len(all_data_var_names) == 0:
        raise ValueError("cube is empty")

    if data_var_names is not None:
        if len(data_var_names) == 0:
            return xr.Dataset(
                coords=indexes.coords if hasattr(indexes, "coords") else None)
        for var_name in data_var_names:
            if var_name not in cube.data_vars:
                raise ValueError(f"variable {var_name!r} not found in cube")
    else:
        data_var_names = all_data_var_names

    dim_names = cube[data_var_names[0]].dims
    num_dims = len(dim_names)
    index_names = [
        index_name_pattern.format(name=dim_name) for dim_name in dim_names
    ]
    num_points = _validate_points(indexes, index_names, param_name="indexes")
    indexes = _normalize_points(indexes)

    cube = xr.Dataset(
        {var_name: cube[var_name]
         for var_name in data_var_names},
        coords=cube.coords)

    new_bounds_vars = {}
    bounds_var_names = _get_coord_bounds_var_names(cube)
    drop_coords = None
    if bounds_var_names:
        if include_bounds:
            # Flatten any coordinate bounds variables
            for var_name, bnds_var_name in bounds_var_names.items():
                bnds_var = cube[bnds_var_name]
                new_bounds_vars[var_name + "_lower"] = bnds_var[:, 0]
                new_bounds_vars[var_name + "_upper"] = bnds_var[:, 1]
            cube = cube.assign_coords(**new_bounds_vars)
        cube = cube.drop(bounds_var_names.values())
        if not include_coords:
            drop_coords = set(cube.coords).difference(new_bounds_vars.keys())
    else:
        if not include_coords:
            drop_coords = set(cube.coords)

    # Generate a validation condition so we can filter out invalid rows (where any index == -1)
    is_valid_point = None
    for index_name in index_names:
        col = indexes[index_name]
        condition = col >= 0 if np.issubdtype(col.dtype,
                                              np.integer) else np.isnan(col)
        if is_valid_point is None:
            is_valid_point = condition
        else:
            is_valid_point = np.logical_and(is_valid_point, condition)

    num_valid_points = np.count_nonzero(is_valid_point)
    if num_valid_points == num_points:
        # All indexes valid
        cube_selector = {
            dim_names[i]: indexes[index_names[i]]
            for i in range(num_dims)
        }
        cube_values = cube.isel(cube_selector)
    elif num_valid_points == 0:
        # All indexes are invalid
        new_bounds_vars = {}
        for var_name in cube.variables:
            new_bounds_vars[var_name] = _empty_points_var(
                cube[var_name], num_points)
        cube_values = xr.Dataset(new_bounds_vars)
    else:
        # Some invalid indexes
        idx = np.arange(num_points)
        good_idx = idx[is_valid_point.values]
        idx_dim_name = indexes[index_names[0]].dims[0]
        good_indexes = indexes.isel({idx_dim_name: good_idx})

        cube_selector = {
            dim_names[i]: good_indexes[index_names[i]]
            for i in range(num_dims)
        }
        cube_values = cube.isel(cube_selector)

        new_bounds_vars = {}
        for var_name in cube.variables:
            var = cube_values[var_name]
            new_var = _empty_points_var(var, num_points)
            new_var[good_idx] = var
            new_bounds_vars[var_name] = new_var

        cube_values = xr.Dataset(new_bounds_vars)

    if drop_coords:
        cube_values = cube_values.drop(drop_coords)

    return cube_values