Пример #1
0
def parse_non_spatial_labels(
        raw_labels: Mapping[str, str],
        dims: Sequence[Hashable],
        coords: Mapping[Hashable, xr.DataArray],
        allow_slices: bool = False,
        exception_type: type = ValueError) -> Mapping[str, Any]:
    xy_var_names = get_dataset_xy_var_names(coords, must_exist=False)
    if xy_var_names is None:
        raise exception_type(f'missing spatial coordinates')
    xy_dims = set(coords[xy_var_name].dims[0] for xy_var_name in xy_var_names)

    def to_datetime(datetime_str: str, dim_var: xr.DataArray):
        if datetime_str == 'current':
            return dim_var[-1]
        else:
            return pd.to_datetime(datetime_str)

    parsed_labels = {}
    for dim in dims:
        if dim in xy_dims:
            continue
        dim_var = coords[dim]
        label_str = raw_labels.get(dim)
        try:
            if label_str is None:
                label = dim_var.values[0]
            elif label_str == 'current':
                label = dim_var.values[-1]
            else:
                if '/' in label_str:
                    label_strs = tuple(label_str.split('/', maxsplit=1))
                else:
                    label_strs = (label_str, )
                if np.issubdtype(dim_var.dtype, np.floating):
                    labels = tuple(map(float, label_strs))
                elif np.issubdtype(dim_var.dtype, np.integer):
                    labels = tuple(map(int, label_strs))
                elif np.issubdtype(dim_var.dtype, np.datetime64):
                    labels = tuple(
                        to_datetime(label, dim_var) for label in label_strs)
                else:
                    raise exception_type(
                        f'unable to parse value {label_str!r} into a {dim_var.dtype!r}'
                    )
                if len(labels) == 1:
                    label = labels[0]
                else:
                    if allow_slices:
                        label = slice(labels[0], labels[1])
                    elif np.issubdtype(dim_var.dtype, np.integer):
                        label = labels[0] + (labels[1] - labels[0]) // 2
                    else:
                        label = labels[0] + (labels[1] - labels[0]) / 2
            parsed_labels[str(dim)] = label
        except ValueError as e:
            raise exception_type(
                f'{label_str!r} is not a valid value for dimension {dim!r}'
            ) from e

    return parsed_labels
Пример #2
0
def clip_dataset_by_geometry(
        dataset: xr.Dataset,
        geometry: GeometryLike,
        save_geometry_wkt: Union[str, bool] = False
) -> Optional[xr.Dataset]:
    """
    Spatially clip a dataset according to the bounding box of a
    given geometry.

    :param dataset: The dataset
    :param geometry: A geometry-like object,
        see py:function:`convert_geometry`.
    :param save_geometry_wkt: If the value is a string,
        the effective intersection geometry is stored as
        a Geometry WKT string in the global attribute named
        by *save_geometry*. If the value is True, the name
        "geometry_wkt" is used.
    :return: The dataset spatial subset, or None if the bounding
        box of the dataset has a no or a zero area
        intersection with the bounding box of the geometry.
    """
    xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    intersection_geometry = intersect_geometries(
        get_dataset_bounds(dataset, xy_var_names=xy_var_names),
        geometry
    )
    if intersection_geometry is None:
        return None
    return _clip_dataset_by_geometry(
        dataset,
        intersection_geometry,
        xy_var_names,
        save_geometry_wkt=save_geometry_wkt
    )
Пример #3
0
def is_dataset_y_axis_inverted(
        dataset: Union[xr.Dataset, xr.DataArray],
        xy_var_names: Tuple[str, str] = None
) -> bool:
    if xy_var_names is None:
        xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    y_var = dataset[xy_var_names[1]]
    return float(y_var[0]) < float(y_var[-1])
Пример #4
0
def get_dataset_geometry(dataset: Union[xr.Dataset, xr.DataArray],
                         xy_var_names: Tuple[str, str] = None) \
        -> shapely.geometry.base.BaseGeometry:
    if xy_var_names is None:
        xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    geo_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names)
    if is_lon_lat_dataset(dataset, xy_var_names=xy_var_names):
        return get_box_split_bounds_geometry(*geo_bounds)
    else:
        return shapely.geometry.box(*geo_bounds)
Пример #5
0
def is_lon_lat_dataset(dataset: Union[xr.Dataset, xr.DataArray],
                       xy_var_names: Tuple[str, str] = None) -> bool:
    if xy_var_names is None:
        xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    x_var_name, y_var_name = xy_var_names
    if x_var_name == 'lon' and y_var_name == 'lat':
        return True
    x_var = dataset[x_var_name]
    y_var = dataset[y_var_name]
    return x_var.attrs.get('long_name') == 'longitude' and y_var.attrs.get(
        'long_name') == 'latitude'
Пример #6
0
def get_dataset_bounds(dataset: Union[xr.Dataset, xr.DataArray],
                       xy_var_names: Tuple[str, str] = None) -> Bounds:
    if xy_var_names is None:
        xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    x_name, y_name = xy_var_names
    x_var, y_var = dataset.coords[x_name], dataset.coords[y_name]
    is_lon = xy_var_names[0] == 'lon'

    # Note, x_min > x_max then we intersect with the anti-meridian
    x_bnds_name = get_dataset_bounds_var_name(dataset, x_name)
    if x_bnds_name:
        x_bnds_var = dataset.coords[x_bnds_name]
        x1 = x_bnds_var[0, 0]
        x2 = x_bnds_var[0, 1]
        x3 = x_bnds_var[-1, 0]
        x4 = x_bnds_var[-1, 1]
        x_min = min(x1, x2)
        x_max = max(x3, x4)
    else:
        x_min = x_var[0]
        x_max = x_var[-1]
        delta = (x_max - x_min +
                 (0 if x_max >= x_min or not is_lon else 360)) / (x_var.size -
                                                                  1)
        x_min -= 0.5 * delta
        x_max += 0.5 * delta

    # Note, x-axis may be inverted
    y_bnds_name = get_dataset_bounds_var_name(dataset, y_name)
    if y_bnds_name:
        y_bnds_var = dataset.coords[y_bnds_name]
        y1 = y_bnds_var[0, 0]
        y2 = y_bnds_var[0, 1]
        y3 = y_bnds_var[-1, 0]
        y4 = y_bnds_var[-1, 1]
        y_min = min(y1, y2, y3, y4)
        y_max = max(y1, y2, y3, y4)
    else:
        y1 = y_var[0]
        y2 = y_var[-1]
        delta = abs(y2 - y1) / (y_var.size - 1)
        y_min = min(y1, y2) - 0.5 * delta
        y_max = max(y1, y2) + 0.5 * delta

    return float(x_min), float(y_min), float(x_max), float(y_max)
Пример #7
0
def verify_cube(dataset: xr.Dataset) -> List[str]:
    """
    Verify the given *dataset* for being a valid xcube dataset.

    The tool verifies that *dataset*
    * defines two spatial x,y coordinate variables, that are 1D, non-empty, using correct units;
    * defines a time coordinate variables, that are 1D, non-empty, using correct units;
    * has valid bounds variables for spatial x,y and time coordinate variables, if any;
    * has any data variables and that they are valid, e.g. min. 3-D, all have
      same dimensions, have at least the dimensions dim(time), dim(y), dim(x) in that order.

    Returns a list of issues, which is empty if *dataset* is a valid xcube dataset.

    :param dataset: A dataset to be verified.
    :return: List of issues or empty list.
    """
    report = []

    xy_var_names = get_dataset_xy_var_names(dataset, must_exist=False)
    if xy_var_names is None:
        report.append(f"missing spatial x,y coordinate variables")

    time_var_name = get_dataset_time_var_name(dataset, must_exist=False)
    if time_var_name is None:
        report.append(f"missing time coordinate variable")

    if time_var_name:
        _check_time(dataset, time_var_name, report)
    if xy_var_names and is_lon_lat_dataset(dataset, xy_var_names=xy_var_names):
        _check_lon_or_lat(dataset, xy_var_names[0], -180., 180., report)
        _check_lon_or_lat(dataset, xy_var_names[1], -90., 90., report)

    if xy_var_names and time_var_name:
        _check_data_variables(dataset, xy_var_names, time_var_name, report)

    if xy_var_names:
        _check_coord_equidistance(dataset, xy_var_names[0], xy_var_names[0], report)
        _check_coord_equidistance(dataset, xy_var_names[1], xy_var_names[1], report)

    return report
Пример #8
0
def rasterize_features(dataset: xr.Dataset,
                       features: Union[GeoDataFrame, GeoJSONFeatures],
                       feature_props: Sequence[Name],
                       var_props: Dict[Name, VarProps] = None,
                       in_place: bool = False) -> Optional[xr.Dataset]:
    """
    Rasterize feature properties given by *feature_props* of vector-data *features*
    as new variables into *dataset*.

    *dataset* must have two spatial 1-D coordinates, either ``lon`` and ``lat`` in degrees,
    reprojected coordinates, ``x`` and ``y``, or similar.

    *feature_props* is a sequence of names of feature properties that must exists in each
    feature of *features*.

    *features* may be passed as pandas.GeoDataFrame`` or as an iterable of GeoJSON features.

    Using the optional *var_props*, the properties of newly created variables from feature properties
    can be specified. It is a mapping of feature property names to mappings of variable
    properties. Here is an example variable properties mapping:::

    {
        'name': 'land_class',  # (str) - the variable's name, default is the feature property name;
        'dtype' np.int16,      # (str|np.dtype) - the variable's dtype, default is np.float64;
        'fill_value': -1,      # (bool|int|float|np.nparray) - the variable's fill value, default is np.nan;
        'attrs': {},           # (Mapping[str, Any]) - the variable's fill value, default is {};
        'converter': int,      # (Callable[[Any], Any]) - a converter function used to convert from property
                               # feature value to variable value, default is float.
    }

    Currently, the coordinates of the geometries in the given *features* must use the same CRS as
    the given *dataset*.

    :param dataset: The xarray dataset.
    :param features: A ``geopandas.GeoDataFrame`` instance or a sequence of GeoJSON features.
    :param feature_props: Sequence of names of numeric feature properties to be rasterized.
    :param var_props: Optional mapping of feature property name
        to a name or a 5-tuple (name, dtype, fill_value, attributes, converter) for the new variable.
    :param in_place: Whether to add new variables to *dataset*.
        If False, a copy will be created and returned.
    :return: dataset with rasterized feature_property
    """
    import geopandas

    var_props = var_props or {}
    xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    dataset_bounds = get_dataset_bounds(dataset, xy_var_names=xy_var_names)

    ds_x_min, ds_y_min, ds_x_max, ds_y_max = dataset_bounds

    x_var_name, y_var_name = xy_var_names
    x_var, y_var = dataset[x_var_name], dataset[y_var_name]
    x_dim, y_dim = x_var.dims[0], y_var.dims[0]
    coords = {y_var_name: y_var, x_var_name: x_var}
    dims = (y_dim, x_dim)

    width = x_var.size
    height = y_var.size
    spatial_res = (ds_x_max - ds_x_min) / width

    if geopandas and isinstance(features, geopandas.GeoDataFrame):
        geo_data_frame = features
    else:
        geo_data_frame = geopandas.GeoDataFrame.from_features(features)

    for feature_property_name in feature_props:
        if feature_property_name not in geo_data_frame:
            raise ValueError(
                f'feature property {feature_property_name!r} not found')

    if not in_place:
        dataset = xr.Dataset(coords=dataset.coords, attrs=dataset.attrs)

    for row in range(len(geo_data_frame)):
        geometry = geo_data_frame.geometry[row]
        if geometry.is_empty or not geometry.is_valid:
            continue

        # TODO (forman): allow transforming geometry into CRS of dataset here
        intersection_geometry = intersect_geometries(dataset_bounds, geometry)
        if intersection_geometry is None:
            continue

        # TODO (forman): check, we should be able to drastically improve performance by generating
        #                 the mask for a dataset subset genereated by clipping against geometry
        mask_data = get_geometry_mask(width, height, intersection_geometry,
                                      ds_x_min, ds_y_min, spatial_res)
        mask = xr.DataArray(mask_data, coords=coords, dims=dims)

        for feature_property_name in feature_props:

            var_prop_mapping = var_props.get(feature_property_name, {})
            var_name = var_prop_mapping.get(
                'name', feature_property_name.replace(' ', '_'))
            var_dtype = var_prop_mapping.get('dtype', np.float64)
            var_fill_value = var_prop_mapping.get('fill_value', np.nan)
            var_attrs = var_prop_mapping.get('attrs', {})
            converter = var_prop_mapping.get('converter', float)

            feature_property_value = converter(
                geo_data_frame[feature_property_name][row])

            var_new = xr.DataArray(np.full((height, width),
                                           feature_property_value,
                                           dtype=var_dtype),
                                   coords=coords,
                                   dims=dims,
                                   attrs=var_attrs)
            if var_name not in dataset:
                var_old = xr.DataArray(np.full((height, width),
                                               var_fill_value,
                                               dtype=var_dtype),
                                       coords=coords,
                                       dims=dims,
                                       attrs=var_attrs)
                dataset[var_name] = var_old
            else:
                var_old = dataset[var_name]

            dataset[var_name] = var_new.where(mask, var_old)
            dataset[var_name].encoding.update(fill_value=var_fill_value)

    return dataset
Пример #9
0
def mask_dataset_by_geometry(
        dataset: xr.Dataset,
        geometry: GeometryLike,
        excluded_vars: Sequence[str] = None,
        no_clip: bool = False,
        save_geometry_mask: Union[str, bool] = False,
        save_geometry_wkt: Union[str, bool] = False) -> Optional[xr.Dataset]:
    """
    Mask a dataset according to the given geometry. The cells of variables of the
    returned dataset will have NaN-values where their spatial coordinates are not intersecting
    the given geometry.

    :param dataset: The dataset
    :param geometry: A geometry-like object, see py:function:`convert_geometry`.
    :param excluded_vars: Optional sequence of names of data variables that should not be masked
        (but still may be clipped).
    :param no_clip: If True, the function will not clip the dataset before masking, this is, the
        returned dataset will have the same dimension size as the given *dataset*.
    :param save_geometry_mask: If the value is a string, the effective geometry mask array is stored as
        a 2D data variable named by *save_geometry_mask*.
        If the value is True, the name "geometry_mask" is used.
    :param save_geometry_wkt: If the value is a string, the effective intersection geometry is stored as
        a Geometry WKT string in the global attribute named by *save_geometry*.
        If the value is True, the name "geometry_wkt" is used.
    :return: The dataset spatial subset, or None if the bounding box of the dataset has a no or a zero area
        intersection with the bounding box of the geometry.
    """
    geometry = convert_geometry(geometry)
    xy_var_names = get_dataset_xy_var_names(dataset, must_exist=True)
    intersection_geometry = intersect_geometries(
        get_dataset_bounds(dataset, xy_var_names=xy_var_names), geometry)
    if intersection_geometry is None:
        return None

    if not no_clip:
        dataset = _clip_dataset_by_geometry(dataset, intersection_geometry,
                                            xy_var_names)

    ds_x_min, ds_y_min, ds_x_max, ds_y_max = get_dataset_bounds(
        dataset, xy_var_names=xy_var_names)

    x_var_name, y_var_name = xy_var_names
    x_var, y_var = dataset[x_var_name], dataset[y_var_name]

    width = x_var.size
    height = y_var.size
    spatial_res = (ds_x_max - ds_x_min) / width

    mask_data = get_geometry_mask(width, height, intersection_geometry,
                                  ds_x_min, ds_y_min, spatial_res)
    mask = xr.DataArray(mask_data,
                        coords={
                            y_var_name: y_var,
                            x_var_name: x_var
                        },
                        dims=(y_var.dims[0], x_var.dims[0]))

    dataset_vars = {}
    for var_name, var in dataset.data_vars.items():
        if not excluded_vars or var_name not in excluded_vars:
            dataset_vars[var_name] = var.where(mask)
        else:
            dataset_vars[var_name] = var

    masked_dataset = xr.Dataset(dataset_vars,
                                coords=dataset.coords,
                                attrs=dataset.attrs)

    _save_geometry_mask(masked_dataset, mask, save_geometry_mask)
    _save_geometry_wkt(masked_dataset, intersection_geometry,
                       save_geometry_wkt)

    return masked_dataset