def _compute_target_grid_mapping(cube_config: CubeConfig, source_gm: GridMapping) -> GridMapping: # assert_true(source_gm.is_regular, 'source_gm must be regular') target_crs = cube_config.crs target_bbox = cube_config.bbox target_spatial_res = cube_config.spatial_res if target_crs is None \ and target_bbox is None \ and target_spatial_res is None: # Nothing to do if source_gm.is_regular: return source_gm return source_gm.to_regular(tile_size=cube_config.tile_size) if target_spatial_res is not None: xy_res = (target_spatial_res, target_spatial_res) else: xy_res = source_gm.xy_res if target_bbox is not None: x_res, y_res = xy_res x_min, y_min, x_max, y_max = target_bbox xy_min = x_min, y_min size = round((x_max - x_min) / x_res), round((y_max - y_min) / y_res) else: xy_min = source_gm.x_min, source_gm.y_min size = source_gm.size if target_crs is not None: crs = pyproj.crs.CRS.from_string(target_crs) else: crs = source_gm.crs target_gm = GridMapping.regular(size=size, xy_min=xy_min, xy_res=xy_res, crs=crs, tile_size=source_gm.tile_size, is_j_axis_up=source_gm.is_j_axis_up) return target_gm.derive(xy_var_names=source_gm.xy_var_names, xy_dim_names=source_gm.xy_dim_names)
def resample_in_space(dataset: xr.Dataset, source_gm: GridMapping = None, target_gm: GridMapping = None, var_configs: Mapping[Hashable, Mapping[str, Any]] = None): """ Resample a dataset in the spatial dimensions. If the source grid mapping *source_gm* is not given, it is derived from *dataset*: ``source_gm = GridMapping.from_dataset(dataset)``. If the target grid mapping *target_gm* is not given, it is derived from *source_gm*: ``target_gm = source_gm.to_regular()``. If *source_gm* is almost equal to *target_gm*, this function is a no-op and *dataset* is returned unchanged. Otherwise the function computes a spatially resampled version of *dataset* and returns it. Using *var_configs*, the resampling of individual variables can be configured. If given, *var_configs* must be a mapping from variable names to configuration dictionaries which can have the following properties: * ``spline_order`` (int) - The order of spline polynomials used for interpolating. It is used for upsampling only. Possible values are 0 to 5. Default is 1 (bi-linear) for floating point variables, and 0 (= nearest neighbor) for integer and bool variables. * ``aggregator`` (str) - An optional aggregating function. It is used for downsampling only. Examples are numpy.nanmean, numpy.nanmin, numpy.nanmax. Default is numpy.nanmean for floating point variables, and None (= nearest neighbor) for integer and bool variables. * ``recover_nan`` (bool) - whether a special algorithm shall be used that is able to recover values that would otherwise yield NaN during resampling. Default is True for floating point variables, and False for integer and bool variables. Note that *var_configs* is only used if the resampling involves an affine transformation. This is true if the CRS of *source_gm* and CRS of *target_gm* are equal and one of two cases is given: 1. *source_gm* is regular. In this case the resampling is the affine transformation. and the result is returned directly. 2. *source_gm* is not regular and has a lower resolution than *target_cm*. In this case *dataset* is downsampled first using an affine transformation. Then the result is rectified. In all other cases, no affine transformation is applied and the resampling is a direct rectification. :param dataset: The source dataset. :param source_gm: The source grid mapping. :param target_gm: The target grid mapping. Must be regular. :param var_configs: Optional resampling configurations for individual variables. :return: The spatially resampled dataset. """ if source_gm is None: # No source grid mapping given, so do derive it from dataset source_gm = GridMapping.from_dataset(dataset) if target_gm is None: # No target grid mapping given, so do derive it from source target_gm = source_gm.to_regular() if source_gm.is_close(target_gm): # If source and target grid mappings are almost equal return dataset # target_gm must be regular GridMapping.assert_regular(target_gm, name='target_gm') # Are source and target both geographic grid mappings? both_geographic = source_gm.crs.is_geographic \ and target_gm.crs.is_geographic if both_geographic or source_gm.crs == target_gm.crs: # If CRSes are both geographic or their CRSes are equal: if source_gm.is_regular: # If also the source is regular, then resampling reduces # to an affine transformation. return affine_transform_dataset( dataset, source_gm=source_gm, target_gm=target_gm, var_configs=var_configs, ) # If the source is not regular, we need to rectify it, # so the target is regular. Our rectification implementation # works only correctly if source pixel size >= target pixel # size. Therefore check if we must downscale source first. x_scale = source_gm.x_res / target_gm.x_res y_scale = source_gm.y_res / target_gm.y_res if x_scale > _SCALE_LIMIT and y_scale > _SCALE_LIMIT: # Source pixel size >= target pixel size. # We can rectify. return rectify_dataset(dataset, source_gm=source_gm, target_gm=target_gm) # Source has higher resolution than target. # Downscale first, then rectify if source_gm.is_regular: # If source is regular downscaled_gm = source_gm.scale((x_scale, y_scale)) downscaled_dataset = resample_dataset( dataset, ((x_scale, 1, 0), (1, y_scale, 0)), size=downscaled_gm.size, tile_size=source_gm.tile_size, xy_dim_names=source_gm.xy_dim_names, var_configs=var_configs, ) else: _, downscaled_size = scale_xy_res_and_size(source_gm.xy_res, source_gm.size, (x_scale, y_scale)) downscaled_dataset = resample_dataset( dataset, ((x_scale, 1, 0), (1, y_scale, 0)), size=downscaled_size, tile_size=source_gm.tile_size, xy_dim_names=source_gm.xy_dim_names, var_configs=var_configs, ) downscaled_gm = GridMapping.from_dataset( downscaled_dataset, tile_size=source_gm.tile_size, prefer_crs=source_gm.crs) return rectify_dataset(downscaled_dataset, source_gm=downscaled_gm, target_gm=target_gm) # If CRSes are not both geographic and their CRSes are different # transform the source_gm so its CRS matches the target CRS: transformed_source_gm = source_gm.transform(target_gm.crs) transformed_x, transformed_y = transformed_source_gm.xy_coords reprojected_dataset = resample_in_space(dataset.assign( transformed_x=transformed_x, transformed_y=transformed_y), source_gm=transformed_source_gm, target_gm=target_gm) if not target_gm.crs.is_geographic: # Add 'crs' variable according to CF conventions reprojected_dataset = reprojected_dataset.assign( crs=xr.DataArray(0, attrs=target_gm.crs.to_cf())) return reprojected_dataset
def rectify_dataset(source_ds: xr.Dataset, *, var_names: Union[str, Sequence[str]] = None, source_gm: GridMapping = None, xy_var_names: Tuple[str, str] = None, target_gm: GridMapping = None, tile_size: Union[int, Tuple[int, int]] = None, is_j_axis_up: bool = None, output_ij_names: Tuple[str, str] = None, compute_subset: bool = True, uv_delta: float = 1e-3) -> Optional[xr.Dataset]: """ Reproject dataset *source_ds* using its per-pixel x,y coordinates or the given *source_gm*. The function expects *source_ds* or the given *source_gm* to have either one- or two-dimensional coordinate variables that provide spatial x,y coordinates for every data variable with the same spatial dimensions. For example, a dataset may comprise variables with spatial dimensions ``var(..., y_dim, x_dim)``, then one the function expects coordinates to be provided in two forms: 1. One-dimensional ``x_var(x_dim)`` and ``y_var(y_dim)`` (coordinate) variables. 2. Two-dimensional ``x_var(y_dim, x_dim)`` and ``y_var(y_dim, x_dim)`` (coordinate) variables. If *target_gm* is given and it defines a tile size or *tile_size* is given, and the number of tiles is greater than one in the output's x- or y-direction, then the returned dataset will be composed of lazy, chunked dask arrays. Otherwise the returned dataset will be composed of ordinary numpy arrays. :param source_ds: Source dataset grid mapping. :param var_names: Optional variable name or sequence of variable names. :param source_gm: Target dataset grid mapping. :param xy_var_names: Optional tuple of the x- and y-coordinate variables in *source_ds*. Ignored if *source_gm* is given. :param target_gm: Optional output geometry. If not given, output geometry will be computed to spatially fit *dataset* and to retain its spatial resolution. :param tile_size: Optional tile size for the output. :param is_j_axis_up: Whether y coordinates are increasing with positive image j axis. :param output_ij_names: If given, a tuple of variable names in which to store the computed source pixel coordinates in the returned output. :param compute_subset: Whether to compute a spatial subset from *dataset* using *output_geom*. If set, the function may return ``None`` in case there is no overlap. :param uv_delta: A normalized value that is used to determine whether x,y coordinates in the output are contained in the triangles defined by the input x,y coordinates. The higher this value, the more inaccurate the rectification will be. :return: a reprojected dataset, or None if the requested output does not intersect with *dataset*. """ if source_gm is None: source_gm = GridMapping.from_dataset(source_ds, xy_var_names=xy_var_names) src_attrs = dict(source_ds.attrs) if target_gm is None: target_gm = source_gm.to_regular(tile_size=tile_size) elif compute_subset: source_ds_subset = select_spatial_subset( source_ds, xy_bbox=target_gm.xy_bbox, ij_border=1, xy_border=0.5 * (target_gm.x_res + target_gm.y_res), grid_mapping=source_gm) if source_ds_subset is None: return None if source_ds_subset is not source_ds: # TODO: GridMapping.from_dataset() may be expensive. # Find a more effective way. source_gm = GridMapping.from_dataset(source_ds_subset) source_ds = source_ds_subset # if src_geo_coding.xy_var_names != output_geom.xy_var_names: # output_geom = output_geom.derive( # xy_var_names=src_geo_coding.xy_var_names # ) # if src_geo_coding.xy_dim_names != output_geom.xy_dim_names: # output_geom = output_geom.derive( # xy_dim_names=src_geo_coding.xy_dim_names # ) if tile_size is not None or is_j_axis_up is not None: target_gm = target_gm.derive(tile_size=tile_size, is_j_axis_up=is_j_axis_up) src_vars = _select_variables(source_ds, source_gm, var_names) if target_gm.is_tiled: compute_dst_src_ij_images = _compute_ij_images_xarray_dask compute_dst_var_image = _compute_var_image_xarray_dask else: compute_dst_src_ij_images = _compute_ij_images_xarray_numpy compute_dst_var_image = _compute_var_image_xarray_numpy dst_src_ij_array = compute_dst_src_ij_images(source_gm, target_gm, uv_delta) dst_x_dim, dst_y_dim = target_gm.xy_dim_names dst_dims = dst_y_dim, dst_x_dim dst_ds_coords = target_gm.to_coords() dst_vars = dict() for src_var_name, src_var in src_vars.items(): dst_var_dims = src_var.dims[0:-2] + dst_dims dst_var_coords = { d: src_var.coords[d] for d in dst_var_dims if d in src_var.coords } dst_var_coords.update( {d: dst_ds_coords[d] for d in dst_var_dims if d in dst_ds_coords}) dst_var_array = compute_dst_var_image(src_var, dst_src_ij_array, fill_value=np.nan) dst_var = xr.DataArray(dst_var_array, dims=dst_var_dims, coords=dst_var_coords, attrs=src_var.attrs) dst_vars[src_var_name] = dst_var if output_ij_names: output_i_name, output_j_name = output_ij_names dst_ij_coords = { d: dst_ds_coords[d] for d in dst_dims if d in dst_ds_coords } dst_vars[output_i_name] = xr.DataArray(dst_src_ij_array[0], dims=dst_dims, coords=dst_ij_coords) dst_vars[output_j_name] = xr.DataArray(dst_src_ij_array[1], dims=dst_dims, coords=dst_ij_coords) return xr.Dataset(dst_vars, coords=dst_ds_coords, attrs=src_attrs)