def test_resample_f_all(self): resampled_cube = resample_in_time(self.input_cube, 'all', ['min', 'max']) self.assertIsNot(resampled_cube, self.input_cube) self.assertIn('time', resampled_cube) self.assertIn('temperature_min', resampled_cube) self.assertIn('temperature_max', resampled_cube) self.assertIn('precipitation_min', resampled_cube) self.assertIn('precipitation_max', resampled_cube) self.assertEqual(('time',), resampled_cube.time.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_min.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_max.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_min.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_max.dims) self.assertEqual((1,), resampled_cube.time.shape) self.assertEqual((1, 180, 360), resampled_cube.temperature_min.shape) self.assertEqual((1, 180, 360), resampled_cube.temperature_max.shape) self.assertEqual((1, 180, 360), resampled_cube.precipitation_min.shape) self.assertEqual((1, 180, 360), resampled_cube.precipitation_max.shape) np.testing.assert_allclose(resampled_cube.temperature_min.values[..., 0, 0], np.array([272.0])) np.testing.assert_allclose(resampled_cube.temperature_max.values[..., 0, 0], np.array([274.9])) np.testing.assert_allclose(resampled_cube.precipitation_min.values[..., 0, 0], np.array([114.2])) np.testing.assert_allclose(resampled_cube.precipitation_max.values[..., 0, 0], np.array([120.0])) schema = CubeSchema.new(resampled_cube) self.assertEqual(3, schema.ndim) self.assertEqual(('time', 'lat', 'lon'), schema.dims) self.assertEqual((1, 180, 360), schema.shape)
def test_resample_in_time_with_time_chunk_size(self): resampled_cube = resample_in_time(self.input_cube, '2D', ['min', 'max'], time_chunk_size=5) schema = CubeSchema.new(resampled_cube) self.assertEqual(3, schema.ndim) self.assertEqual(('time', 'lat', 'lon'), schema.dims) self.assertEqual((33, 180, 360), schema.shape) self.assertEqual((5, 90, 180), schema.chunks)
def test_repr_html(self): cube = new_cube(variables=dict(a=2, b=3, c=4)) cube = cube.chunk(dict(time=1, lat=90, lon=90)) schema = CubeSchema.new(cube) self.assertEqual("<table>" "<tr><td>Shape:</td><td>(5, 180, 360)</td></tr>" "<tr><td>Chunk sizes:</td><td>(1, 90, 90)</td></tr>" "<tr><td>Dimensions:</td><td>('time', 'lat', 'lon')</td></tr>" "</table>", schema._repr_html_())
def test_resample_in_time_min_max(self): resampled_cube = resample_in_time(self.input_cube, '2W', ['min', 'max']) self.assertIsNot(resampled_cube, self.input_cube) self.assertIn('time', resampled_cube) self.assertIn('temperature_min', resampled_cube) self.assertIn('temperature_max', resampled_cube) self.assertIn('precipitation_min', resampled_cube) self.assertIn('precipitation_max', resampled_cube) self.assertEqual(('time', ), resampled_cube.time.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_min.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_max.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_min.dims) self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_max.dims) self.assertEqual((6, ), resampled_cube.time.shape) self.assertEqual((6, 180, 360), resampled_cube.temperature_min.shape) self.assertEqual((6, 180, 360), resampled_cube.temperature_max.shape) self.assertEqual((6, 180, 360), resampled_cube.precipitation_min.shape) self.assertEqual((6, 180, 360), resampled_cube.precipitation_max.shape) np.testing.assert_equal( resampled_cube.time.values, np.array([ '2017-06-25T00:00:00', '2017-07-09T00:00:00', '2017-07-23T00:00:00', '2017-08-06T00:00:00', '2017-08-20T00:00:00', '2017-09-03T00:00:00' ], dtype=np.datetime64)) np.testing.assert_allclose( resampled_cube.temperature_min.values[..., 0, 0], np.array([272.0, 272.4, 273.0, 273.8, 274.4, 274.9])) np.testing.assert_allclose( resampled_cube.temperature_max.values[..., 0, 0], np.array([272.3, 272.9, 273.7, 274.3, 274.8, 274.9])) np.testing.assert_allclose( resampled_cube.precipitation_min.values[..., 0, 0], np.array([119.4, 118.2, 116.6, 115.4, 114.4, 114.2])) np.testing.assert_allclose( resampled_cube.precipitation_max.values[..., 0, 0], np.array([120.0, 119.2, 118.0, 116.4, 115.2, 114.2])) schema = CubeSchema.new(resampled_cube) self.assertEqual(3, schema.ndim) self.assertEqual(('time', 'lat', 'lon'), schema.dims) self.assertEqual((6, 180, 360), schema.shape) self.assertEqual((1, 90, 180), schema.chunks)
def test_without_inputs(self): calls = [] def my_cube_func( input_params: Dict[str, Any] = None, dim_coords: Dict[str, np.ndarray] = None, dim_ranges: Dict[str, Tuple[int, int]] = None) -> CubeFuncOutput: nonlocal calls calls.append((input_params, dim_coords, dim_ranges)) lon_range = dim_ranges['lon'] lat_range = dim_ranges['lat'] time_range = dim_ranges['time'] n_lon = lon_range[1] - lon_range[0] n_lat = lat_range[1] - lat_range[0] n_time = time_range[1] - time_range[0] fill_value = input_params['fill_value'] return np.full((n_time, n_lat, n_lon), fill_value, dtype=np.float64) output_cube = compute_cube(my_cube_func, input_cube_schema=CubeSchema.new(self.cube), input_params=dict(fill_value=0.74)) self.assertIsInstance(output_cube, xr.Dataset) self.assertIn('output', output_cube.data_vars) output_var = output_cube.output self.assertEqual(0, len(calls)) self.assertEqual(('time', 'lat', 'lon'), output_var.dims) self.assertEqual((6, 180, 360), output_var.shape) values = output_var.values self.assertEqual(2 * 2 * 4, len(calls)) self.assertEqual((6, 180, 360), values.shape) self.assertAlmostEqual(0.74, values[0, 0, 0]) self.assertAlmostEqual(0.74, values[-1, -1, -1])
def test_constructor_with_invalid_args(self): cube = new_cube(variables=dict(t=273)) schema = CubeSchema.new(cube) with self.assertRaises(ValueError) as cm: # noinspection PyTypeChecker CubeSchema(None, schema.coords) self.assertEqual('shape must be a sequence of integer sizes', f'{cm.exception}') with self.assertRaises(ValueError) as cm: # noinspection PyTypeChecker CubeSchema(schema.shape, None) self.assertEqual('coords must be a mapping from dimension names to label arrays', f'{cm.exception}') with self.assertRaises(ValueError) as cm: # noinspection PyTypeChecker CubeSchema(schema.shape, cube.coords, x_name=None) self.assertEqual('x_name must be given', f'{cm.exception}') with self.assertRaises(ValueError) as cm: # noinspection PyTypeChecker CubeSchema(schema.shape, cube.coords, y_name=None) self.assertEqual('y_name must be given', f'{cm.exception}') with self.assertRaises(ValueError) as cm: # noinspection PyTypeChecker CubeSchema(schema.shape, cube.coords, time_name=None) self.assertEqual('time_name must be given', f'{cm.exception}') with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape[1:], schema.coords) self.assertEqual('shape must have at least three dimensions', f'{cm.exception}') with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape, schema.coords, dims=('lat', 'lon')) self.assertEqual('dims must have same length as shape', f'{cm.exception}') with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape, schema.coords, dims=('lat', 'lon', 'time')) self.assertEqual("the first dimension in dims must be 'time'", f'{cm.exception}') with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape, schema.coords, dims=('time', 'lon', 'lat')) self.assertEqual("the last two dimensions in dims must be 'lat' and 'lon'", f'{cm.exception}') with self.assertRaises(ValueError) as cm: CubeSchema(schema.shape, schema.coords, dims=schema.dims, chunks=(90, 90)) self.assertEqual("chunks must have same length as shape", f'{cm.exception}') with self.assertRaises(ValueError) as cm: coords = dict(schema.coords) del coords['lat'] CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90)) self.assertEqual("missing variables 'lon', 'lat', 'time' in coords", f'{cm.exception}') with self.assertRaises(ValueError) as cm: coords = dict(schema.coords) lat = coords['lat'] coords['lat'] = xr.DataArray(lat.values.reshape((1, len(lat))), dims=('b', lat.dims[0]), attrs=lat.attrs) CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90)) self.assertEqual("variables 'lon', 'lat', 'time' in coords must be 1-D", f'{cm.exception}') with self.assertRaises(ValueError) as cm: coords = dict(schema.coords) lat = coords['lat'] coords['lat'] = xr.DataArray(lat.values[1:], dims=('lat',), attrs=lat.attrs) CubeSchema(schema.shape, coords, dims=schema.dims, chunks=(1, 90, 90)) self.assertEqual("number of labels of 'lat' in coords does not match shape", f'{cm.exception}')
def test_new_chunked(self): cube = new_cube(variables=dict(a=2, b=3, c=4)) cube = cube.chunk(dict(time=1, lat=90, lon=90)) schema = CubeSchema.new(cube) self._assert_schema(schema, expected_shape=cube.a.shape, expected_chunks=(1, 90, 90))
def test_new_with_cube(self): cube = new_cube() with self.assertRaises(ValueError) as cm: CubeSchema.new(cube) self.assertEqual("cube is empty", f'{cm.exception}') cube = new_cube() del cube.coords['lon'] with self.assertRaises(ValueError) as cm: CubeSchema.new(cube) self.assertEqual("cube has no valid spatial coordinate variables", f'{cm.exception}') cube = new_cube() del cube.coords['time'] with self.assertRaises(ValueError) as cm: CubeSchema.new(cube) self.assertEqual("cube has no valid time coordinate variable", f'{cm.exception}') cube = new_cube(variables=dict(a=1, b=2)) cube['c'] = xr.DataArray(np.array([1, 2, 3, 4, 5]), dims=('q',)) with self.assertRaises(ValueError) as cm: CubeSchema.new(cube) self.assertEqual("all variables must have same dimensions, but variable 'c' has dimensions ('q',)", f'{cm.exception}') cube = new_cube(variables=dict(a=1, b=2)) cube = cube.chunk(dict(time=1, lat=90, lon=90)) cube['b'] = cube['b'].chunk(dict(time=1, lat=45, lon=90)) with self.assertRaises(ValueError) as cm: CubeSchema.new(cube) self.assertEqual("all variables must have same chunks, but variable 'b' has chunks (1, 45, 90)", f'{cm.exception}') cube = new_cube(variables=dict(a=1, b=2)) cube = cube.chunk(dict(time=1, lat=(44, 43, 46, 47), lon=90)) with self.assertRaises(ValueError) as cm: CubeSchema.new(cube) self.assertEqual("dimension 'lat' of variable 'a' has chunks of different sizes: (44, 43, 46, 47)", f'{cm.exception}')
def test_new(self): cube = new_cube(variables=dict(a=2, b=3, c=4)) schema = CubeSchema.new(cube) self._assert_schema(schema, expected_shape=cube.a.shape)
def resample_in_time(dataset: xr.Dataset, frequency: str, method: Union[str, Sequence[str]], offset=None, base: int = 0, tolerance=None, interp_kind=None, time_chunk_size=None, var_names: Sequence[str] = None, metadata: Dict[str, Any] = None, cube_asserted: bool = False) -> xr.Dataset: """ Resample a dataset in the time dimension. The argument *method* may be one or a sequence of ``'all'``, ``'any'``, ``'argmax'``, ``'argmin'``, ``'count'``, ``'first'``, ``'last'``, ``'max'``, ``'min'``, ``'mean'``, ``'median'``, ``'percentile_<p>'``, ``'std'``, ``'sum'``, ``'var'``. In value ``'percentile_<p>'`` is a placeholder, where ``'<p>'`` must be replaced by an integer percentage value, e.g. ``'percentile_90'`` is the 90%-percentile. *Important note:* As of xarray 0.14 and dask 2.8, the methods ``'median'`` and ``'percentile_<p>'` cannot be used if the variables in *cube* comprise chunked dask arrays. In this case, use the ``compute()`` or ``load()`` method to convert dask arrays into numpy arrays. :param dataset: The xcube dataset. :param frequency: Temporal aggregation frequency. Use format "<count><offset>" where <offset> is one of 'H', 'D', 'W', 'M', 'Q', 'Y'. :param method: Resampling method or sequence of resampling methods. :param offset: Offset used to adjust the resampled time labels. Uses same syntax as *frequency*. :param base: For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '24H' frequency, base could range from 0 through 23. :param time_chunk_size: If not None, the chunk size to be used for the "time" dimension. :param var_names: Variable names to include. :param tolerance: Time tolerance for selective upsampling methods. Defaults to *frequency*. :param interp_kind: Kind of interpolation if *method* is 'interpolation'. :param metadata: Output metadata. :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. :return: A new xcube dataset resampled in time. """ if not cube_asserted: assert_cube(dataset) if frequency == 'all': time_gap = np.array(dataset.time[-1]) - np.array(dataset.time[0]) days = int((np.timedelta64(time_gap, 'D') / np.timedelta64(1, 'D')) + 1) frequency = f'{days}D' if var_names: dataset = select_variables_subset(dataset, var_names) resampler = dataset.resample(skipna=True, closed='left', label='left', time=frequency, loffset=offset, base=base) if isinstance(method, str): methods = [method] else: methods = list(method) percentile_prefix = 'percentile_' resampled_cubes = [] for method in methods: method_args = [] method_postfix = method if method.startswith(percentile_prefix): p = int(method[len(percentile_prefix):]) q = p / 100.0 method_args = [q] method_postfix = f'p{p}' method = 'quantile' resampling_method = getattr(resampler, method) method_kwargs = get_method_kwargs(method, frequency, interp_kind, tolerance) resampled_cube = resampling_method(*method_args, **method_kwargs) resampled_cube = resampled_cube.rename({ var_name: f'{var_name}_{method_postfix}' for var_name in resampled_cube.data_vars }) resampled_cubes.append(resampled_cube) if len(resampled_cubes) == 1: resampled_cube = resampled_cubes[0] else: resampled_cube = xr.merge(resampled_cubes) # TODO: add time_bnds to resampled_ds time_coverage_start = '%s' % dataset.time[0] time_coverage_end = '%s' % dataset.time[-1] resampled_cube.attrs.update(metadata or {}) # TODO: add other time_coverage_ attributes resampled_cube.attrs.update(time_coverage_start=time_coverage_start, time_coverage_end=time_coverage_end) schema = CubeSchema.new(dataset) chunk_sizes = { schema.dims[i]: schema.chunks[i] for i in range(schema.ndim) } if isinstance(time_chunk_size, int) and time_chunk_size >= 0: chunk_sizes['time'] = time_chunk_size return resampled_cube.chunk(chunk_sizes)
def tile(cube: str, variables: Optional[str], labels: Optional[str], tile_size: Optional[str], config_path: Optional[str], style_id: Optional[str], output_path: Optional[str], verbose: List[bool], dry_run: bool): """ Create RGBA tiles from CUBE. Color bars and value ranges for variables can be specified in a CONFIG file. Here the color mappings are defined for a style named "ocean_color": \b Styles: - Identifier: ocean_color ColorMappings: conc_chl: ColorBar: "plasma" ValueRange: [0., 24.] conc_tsm: ColorBar: "PuBuGn" ValueRange: [0., 100.] kd489: ColorBar: "jet" ValueRange: [0., 6.] This is the same styles syntax as the configuration file for "xcube serve", hence its configuration can be reused. """ import fractions import itertools import json import os.path # noinspection PyPackageRequirements import yaml import xarray as xr import numpy as np from xcube.core.mldataset import open_ml_dataset from xcube.core.mldataset import MultiLevelDataset from xcube.core.schema import CubeSchema from xcube.core.tile import get_ml_dataset_tile from xcube.core.tile import get_var_valid_range from xcube.core.tile import get_var_cmap_params from xcube.core.tile import parse_non_spatial_labels from xcube.core.select import select_variables_subset from xcube.cli.common import parse_cli_kwargs from xcube.cli.common import parse_cli_sequence from xcube.cli.common import assert_positive_int_item from xcube.util.tilegrid import TileGrid from xcube.util.tiledimage import DEFAULT_COLOR_MAP_NUM_COLORS # noinspection PyShadowingNames def write_tile_map_resource(path: str, resolutions: List[fractions.Fraction], tile_grid: TileGrid, title='', abstract='', srs='CRS:84'): num_levels = len(resolutions) z_and_upp = zip(range(num_levels), map(float, resolutions)) x1, y1, x2, y2 = tile_grid.geo_extent xml = [f'<TileMap version="1.0.0" tilemapservice="http://tms.osgeo.org/1.0.0">', f' <Title>{title}</Title>', f' <Abstract>{abstract}</Abstract>', f' <SRS>{srs}</SRS>', f' <BoundingBox minx="{x1}" miny="{y1}" maxx="{x2}" maxy="{y2}"/>', f' <Origin x="{x1}" y="{y1}"/>', f' <TileFormat width="{tile_grid.tile_width}" height="{tile_grid.tile_height}"' f' mime-type="image/png" extension="png"/>', f' <TileSets profile="local">'] + [ f' <TileSet href="{z}" order="{z}" units-per-pixel="{upp}"/>' for z, upp in z_and_upp] + [ f' </TileSets>', f'</TileMap>'] with open(path, 'w') as fp: fp.write('\n'.join(xml)) # noinspection PyShadowingNames def _convert_coord_var(coord_var: xr.DataArray): values = coord_var.values if np.issubdtype(values.dtype, np.datetime64): return list(np.datetime_as_string(values, timezone='UTC')) elif np.issubdtype(values.dtype, np.integer): return [int(value) for value in values] else: return [float(value) for value in values] # noinspection PyShadowingNames def _get_color_mappings(ml_dataset: MultiLevelDataset, var_name: str, config: Mapping[str, Any], style_id: str): cmap_name = None cmap_range = None, None if config: style_id = style_id or 'default' styles = config.get('Styles') if styles: color_mappings = None for style in styles: if style.get('Identifier') == style_id: color_mappings = style.get('ColorMappings') break if color_mappings: color_mapping = color_mappings.get(var_name) if color_mapping: cmap_name = color_mapping.get('ColorBar') cmap_vmin, cmap_vmax = color_mapping.get('ValueRange', (None, None)) cmap_range = cmap_vmin, cmap_vmax if cmap_name is not None and None not in cmap_range: return cmap_name, cmap_range var = ml_dataset.base_dataset[var_name] valid_range = get_var_valid_range(var) return get_var_cmap_params(var, cmap_name, cmap_range, valid_range) variables = parse_cli_sequence(variables, metavar='VARIABLES', num_items_min=1, item_plural_name='variables') tile_size = parse_cli_sequence(tile_size, num_items=2, metavar='TILE_SIZE', item_parser=int, item_validator=assert_positive_int_item, item_plural_name='tile sizes') labels = parse_cli_kwargs(labels, metavar='LABELS') verbosity = len(verbose) config = {} if config_path: if verbosity: print(f'Opening {config_path}...') with open(config_path, 'r') as fp: config = yaml.safe_load(fp) if verbosity: print(f'Opening {cube}...') ml_dataset = open_ml_dataset(cube, chunks='auto') tile_grid = ml_dataset.tile_grid base_dataset = ml_dataset.base_dataset schema = CubeSchema.new(base_dataset) spatial_dims = schema.x_dim, schema.y_dim if tile_size: tile_width, tile_height = tile_size else: if verbosity: print(f'Warning: using default tile sizes derived from CUBE') tile_width, tile_height = tile_grid.tile_width, tile_grid.tile_height indexers = None if labels: indexers = parse_non_spatial_labels(labels, schema.dims, schema.coords, allow_slices=True, exception_type=click.ClickException) def transform(ds: xr.Dataset) -> xr.Dataset: if variables: ds = select_variables_subset(ds, var_names=variables) if indexers: ds = ds.sel(**indexers) chunk_sizes = {dim: 1 for dim in ds.dims} chunk_sizes[spatial_dims[0]] = tile_width chunk_sizes[spatial_dims[1]] = tile_height return ds.chunk(chunk_sizes) ml_dataset = ml_dataset.apply(transform) tile_grid = ml_dataset.tile_grid base_dataset = ml_dataset.base_dataset schema = CubeSchema.new(base_dataset) spatial_dims = schema.x_dim, schema.y_dim x1, _, x2, _ = tile_grid.geo_extent num_levels = tile_grid.num_levels resolutions = [fractions.Fraction(fractions.Fraction(x2 - x1), tile_grid.width(z)) for z in range(num_levels)] if verbosity: print(f'Writing tile sets...') print(f' Zoom levels: {num_levels}') print(f' Resolutions: {", ".join(map(str, resolutions))} units/pixel') print(f' Tile size: {tile_width} x {tile_height} pixels') image_cache = {} for var_name, var in base_dataset.data_vars.items(): color_bar, (value_min, value_max) = _get_color_mappings(ml_dataset, str(var_name), config, style_id) label_names = [] label_indexes = [] for dim in var.dims: if dim not in spatial_dims: label_names.append(dim) label_indexes.append(list(range(var[dim].size))) var_path = os.path.join(output_path, str(var_name)) metadata_path = os.path.join(var_path, 'metadata.json') metadata = dict(name=str(var_name), attrs={name: value for name, value in var.attrs.items()}, dims=[str(dim) for dim in var.dims], dim_sizes={dim: int(var[dim].size) for dim in var.dims}, color_mapping=dict(color_bar=color_bar, value_min=value_min, value_max=value_max, num_colors=DEFAULT_COLOR_MAP_NUM_COLORS), coordinates={name: _convert_coord_var(coord_var) for name, coord_var in var.coords.items() if coord_var.ndim == 1}) if verbosity: print(f'Writing {metadata_path}') if not dry_run: os.makedirs(var_path, exist_ok=True) with open(metadata_path, 'w') as fp: json.dump(metadata, fp, indent=2) for label_index in itertools.product(*label_indexes): labels = {name: index for name, index in zip(label_names, label_index)} tilemap_path = os.path.join(var_path, *[str(l) for l in label_index]) tilemap_resource_path = os.path.join(tilemap_path, 'tilemapresource.xml') if verbosity > 1: print(f'Writing {tilemap_resource_path}') if not dry_run: os.makedirs(tilemap_path, exist_ok=True) write_tile_map_resource(tilemap_resource_path, resolutions, tile_grid, title=f'{var_name}') for z in range(num_levels): num_tiles_x = tile_grid.num_tiles_x(z) num_tiles_y = tile_grid.num_tiles_y(z) tile_z_path = os.path.join(tilemap_path, str(z)) if not dry_run and not os.path.exists(tile_z_path): os.mkdir(tile_z_path) for x in range(num_tiles_x): tile_zx_path = os.path.join(tile_z_path, str(x)) if not dry_run and not os.path.exists(tile_zx_path): os.mkdir(tile_zx_path) for y in range(num_tiles_y): tile_bytes = get_ml_dataset_tile(ml_dataset, str(var_name), x, y, z, labels=labels, labels_are_indices=True, cmap_name=color_bar, cmap_range=(value_min, value_max), image_cache=image_cache, trace_perf=True, exception_type=click.ClickException) tile_path = os.path.join(tile_zx_path, f'{num_tiles_y - 1 - y}.png') if verbosity > 2: print(f'Writing tile {tile_path}') if not dry_run: with open(tile_path, 'wb') as fp: fp.write(tile_bytes) print(f'Done writing tile sets.')
def compute_dataset(cube_func: CubeFunc, *input_cubes: xr.Dataset, input_cube_schema: CubeSchema = None, input_var_names: Sequence[str] = None, input_params: Dict[str, Any] = None, output_var_name: str = 'output', output_var_dims: AbstractSet[str] = None, output_var_dtype: Any = np.float64, output_var_attrs: Dict[str, Any] = None, vectorize: bool = None, cube_asserted: bool = False) -> xr.Dataset: """ Compute a new output dataset with a single variable named *output_var_name* from variables named *input_var_names* contained in zero, one, or more input data cubes in *input_cubes* using a cube factory function *cube_func*. *cube_func* is called concurrently for each of the chunks of the input variables. It is expected to return a chunk block whith is type ``np.ndarray``. If *input_cubes* is not empty, *cube_func* receives variables as specified by *input_var_names*. If *input_cubes* is empty, *input_var_names* must be empty too, and *input_cube_schema* must be given, so that a new cube can be created. The full signature of *cube_func* is::: def cube_func(*input_vars: np.ndarray, input_params: Dict[str, Any] = None, dim_coords: Dict[str, np.ndarray] = None, dim_ranges: Dict[str, Tuple[int, int]] = None) -> np.ndarray: pass The arguments are: * ``input_vars``: the variables according to the given *input_var_names*; * ``input_params``: is this call's *input_params*, a mapping from parameter name to value; * ``dim_coords``: a mapping from dimension names to the current chunk's coordinate arrays; * ``dim_ranges``: a mapping from dimension names to the current chunk's index ranges. Only the ``input_vars`` argument is mandatory. The keyword arguments ``input_params``, ``input_params``, ``input_params`` do need to be present at all. *output_var_dims* my be given in the case, where ... TODO: describe new output_var_dims... :param cube_func: The cube factory function. :param input_cubes: An optional sequence of input cube datasets, must be provided if *input_cube_schema* is not. :param input_cube_schema: An optional input cube schema, must be provided if *input_cubes* is not. :param input_var_names: A sequence of variable names :param input_params: Optional dictionary with processing parameters passed to *cube_func*. :param output_var_name: Optional name of the output variable, defaults to ``'output'``. :param output_var_dims: Optional set of names of the output dimensions, used in the case *cube_func* reduces dimensions. :param output_var_dtype: Optional numpy datatype of the output variable, defaults to ``'float32'``. :param output_var_attrs: Optional metadata attributes for the output variable. :param vectorize: Whether all *input_cubes* have the same variables which are concatenated and passed as vectors to *cube_func*. Not implemented yet. :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. :return: A new dataset that contains the computed output variable. """ if vectorize is not None: # TODO: support vectorize = all cubes have same variables and cube_func # receives variables as vectors (with extra dim) raise NotImplementedError('vectorize is not supported yet') if not cube_asserted: for cube in input_cubes: assert_cube(cube) # Check compatibility of inputs if input_cubes: input_cube_schema = CubeSchema.new(input_cubes[0]) for cube in input_cubes: if not cube_asserted: assert_cube(cube) if cube != input_cubes[0]: # noinspection PyUnusedLocal other_schema = CubeSchema.new(cube) # TODO (forman): broadcast all cubes to same shape, rechunk to same chunks elif input_cube_schema is None: raise ValueError('input_cube_schema must be given') output_var_name = output_var_name or 'output' # Collect named input variables, raise if not found input_var_names = input_var_names or [] input_vars = [] for var_name in input_var_names: input_var = None for cube in input_cubes: if var_name in cube.data_vars: input_var = cube[var_name] break if input_var is None: raise ValueError( f'variable {var_name!r} not found in any of cubes') input_vars.append(input_var) # Find out, if cube_func uses any of _PREDEFINED_KEYWORDS has_input_params, has_dim_coords, has_dim_ranges = _inspect_cube_func( cube_func, input_var_names) def cube_func_wrapper(index_chunk, *input_var_chunks): nonlocal input_cube_schema, input_var_names, input_params, input_vars nonlocal has_input_params, has_dim_coords, has_dim_ranges # Note, xarray.apply_ufunc does a test call with empty input arrays, # so index_chunk.size == 0 is a valid case empty_call = index_chunk.size == 0 # TODO: when output_var_dims is given, index_chunk must be reordered # as core dimensions are moved to the and of index_chunk and input_var_chunks if not empty_call: index_chunk = index_chunk.ravel() if index_chunk.size < 2 * input_cube_schema.ndim: if not empty_call: warnings.warn( f"unexpected index_chunk of size {index_chunk.size} received!" ) return None dim_ranges = None if has_dim_ranges or has_dim_coords: dim_ranges = {} for i in range(input_cube_schema.ndim): dim_name = input_cube_schema.dims[i] if not empty_call: start = int(index_chunk[2 * i + 0]) end = int(index_chunk[2 * i + 1]) dim_ranges[dim_name] = start, end else: dim_ranges[dim_name] = () dim_coords = None if has_dim_coords: dim_coords = {} for coord_var_name, coord_var in input_cube_schema.coords.items(): coord_slices = [slice(None)] * coord_var.ndim for i in range(input_cube_schema.ndim): dim_name = input_cube_schema.dims[i] if dim_name in coord_var.dims: j = coord_var.dims.index(dim_name) coord_slices[j] = slice(*dim_ranges[dim_name]) dim_coords[coord_var_name] = coord_var[tuple( coord_slices)].values kwargs = {} if has_input_params: kwargs['input_params'] = input_params if has_dim_ranges: kwargs['dim_ranges'] = dim_ranges if has_dim_coords: kwargs['dim_coords'] = dim_coords return cube_func(*input_var_chunks, **kwargs) index_var = _gen_index_var(input_cube_schema) all_input_vars = [index_var] + input_vars input_core_dims = None if output_var_dims: input_core_dims = [] has_warned = False for i in range(len(all_input_vars)): input_var = all_input_vars[i] var_core_dims = [ dim for dim in input_var.dims if dim not in output_var_dims ] must_rechunk = False if var_core_dims and input_var.chunks: for var_core_dim in var_core_dims: dim_index = input_var.dims.index(var_core_dim) dim_chunk_size = input_var.chunks[dim_index][0] dim_shape_size = input_var.shape[dim_index] if dim_chunk_size != dim_shape_size: must_rechunk = True break if must_rechunk: if not has_warned: warnings.warn( f'Input variables must not be chunked in dimension(s): {", ".join(var_core_dims)}.\n' f'Rechunking applies, which may drastically decrease runtime performance ' f'and increase memory usage.') has_warned = True all_input_vars[i] = input_var.chunk( {var_core_dim: -1 for var_core_dim in var_core_dims}) input_core_dims.append(var_core_dims) output_var = xr.apply_ufunc(cube_func_wrapper, *all_input_vars, dask='parallelized', input_core_dims=input_core_dims, output_dtypes=[output_var_dtype]) if output_var_attrs: output_var.attrs.update(output_var_attrs) return xr.Dataset({output_var_name: output_var}, coords=input_cube_schema.coords)
def compute_cube(cube_func: CubeFunc, *input_cubes: xr.Dataset, input_cube_schema: CubeSchema = None, input_var_names: Sequence[str] = None, input_params: Dict[str, Any] = None, output_var_name: str = 'output', output_var_dtype: Any = np.float64, output_var_attrs: Dict[str, Any] = None, vectorize: bool = None, cube_asserted: bool = False) -> xr.Dataset: """ Compute a new output data cube with a single variable named *output_var_name* from variables named *input_var_names* contained in zero, one, or more input data cubes in *input_cubes* using a cube factory function *cube_func*. *cube_func* is called concurrently for each of the chunks of the input variables. It is expected to return a chunk block whith is type ``np.ndarray``. If *input_cubes* is not empty, *cube_func* receives variables as specified by *input_var_names*. If *input_cubes* is empty, *input_var_names* must be empty too, and *input_cube_schema* must be given, so that a new cube can be created. The full signature of *cube_func* is::: def cube_func(*input_vars: np.ndarray, input_params: Dict[str, Any] = None, dim_coords: Dict[str, np.ndarray] = None, dim_ranges: Dict[str, Tuple[int, int]] = None) -> np.ndarray: pass The arguments are: * ``input_vars``: the variables according to the given *input_var_names*; * ``input_params``: is this call's *input_params*, a mapping from parameter name to value; * ``dim_coords``: a mapping from dimension names to the current chunk's coordinate arrays; * ``dim_ranges``: a mapping from dimension names to the current chunk's index ranges. Only the ``input_vars`` argument is mandatory. The keyword arguments ``input_params``, ``input_params``, ``input_params`` do need to be present at all. :param cube_func: The cube factory function. :param input_cubes: An optional sequence of input cube datasets, must be provided if *input_cube_schema* is not. :param input_cube_schema: An optional input cube schema, must be provided if *input_cubes* is not. :param input_var_names: A sequence of variable names :param input_params: Optional dictionary with processing parameters passed to *cube_func*. :param output_var_name: Optional name of the output variable, defaults to ``'output'``. :param output_var_dtype: Optional numpy datatype of the output variable, defaults to ``'float32'``. :param output_var_attrs: Optional metadata attributes for the output variable. :param vectorize: Whether all *input_cubes* have the same variables which are concatenated and passed as vectors to *cube_func*. Not implemented yet. :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. :return: A new dataset that contains the computed output variable. """ if vectorize is not None: raise NotImplementedError('vectorize is not supported yet') if not cube_asserted: for cube in input_cubes: assert_cube(cube) if input_cubes: input_cube_schema = CubeSchema.new(input_cubes[0]) for cube in input_cubes: if not cube_asserted: assert_cube(cube) if cube != input_cubes[0]: # noinspection PyUnusedLocal other_schema = CubeSchema.new(cube) # TODO (forman): broadcast all cubes to same shape, rechunk to same chunks elif input_cube_schema is None: raise ValueError('input_cube_schema must be given') if output_var_name is None: output_var_name = 'output' input_var_names = input_var_names or [] input_vars = [] for var_name in input_var_names: var = None for cube in input_cubes: if var_name in cube.data_vars: var = cube[var_name] break if var is None: raise ValueError(f'variable {var_name!r} not found in any of cubes') input_vars.append(var) has_input_params, has_dim_coords, has_dim_ranges = _inspect_cube_func(cube_func, input_var_names) def cube_func_wrapper(index_chunk, *input_var_chunks): nonlocal input_cube_schema, input_var_names, input_params, input_vars nonlocal has_input_params, has_dim_coords, has_dim_ranges index_chunk = index_chunk.ravel() if index_chunk.size < 2 * input_cube_schema.ndim: warnings.warn(f"weird index_chunk of size {index_chunk.size} received!") return dim_ranges = None if has_dim_ranges or has_dim_coords: dim_ranges = {} for i in range(input_cube_schema.ndim): dim_name = input_cube_schema.dims[i] start = int(index_chunk[2 * i + 0]) end = int(index_chunk[2 * i + 1]) dim_ranges[dim_name] = start, end dim_coords = None if has_dim_coords: dim_coords = {} for coord_var_name, coord_var in input_cube_schema.coords.items(): coord_slices = [slice(None)] * coord_var.ndim for i in range(input_cube_schema.ndim): dim_name = input_cube_schema.dims[i] if dim_name in coord_var.dims: j = coord_var.dims.index(dim_name) coord_slices[j] = slice(*dim_ranges[dim_name]) dim_coords[coord_var_name] = coord_var[tuple(coord_slices)].values kwargs = {} if has_input_params: kwargs['input_params'] = input_params if has_dim_ranges: kwargs['dim_ranges'] = dim_ranges if has_dim_coords: kwargs['dim_coords'] = dim_coords return cube_func(*input_var_chunks, **kwargs) index_var = _gen_index_var(input_cube_schema) output_var = xr.apply_ufunc(cube_func_wrapper, index_var, *input_vars, dask='parallelized', output_dtypes=[output_var_dtype]) if output_var_attrs: output_var.attrs.update(output_var_attrs) return xr.Dataset({output_var_name: output_var}, coords=input_cube_schema.coords)
def resample_in_time(cube: xr.Dataset, frequency: str, method: Union[str, Sequence[str]], offset=None, base: int = 0, tolerance=None, interp_kind=None, time_chunk_size=None, var_names: Sequence[str] = None, metadata: Dict[str, Any] = None, cube_asserted: bool = False) -> xr.Dataset: """ Resample a xcube dataset in the time dimension. :param cube: The xcube dataset. :param frequency: Temporal aggregation frequency. Use format "<count><offset>" "where <offset> is one of 'H', 'D', 'W', 'M', 'Q', 'Y'. :param method: Resampling method or sequence of resampling methods. :param offset: Offset used to adjust the resampled time labels. Uses same syntax as *frequency*. :param base: For frequencies that evenly subdivide 1 day, the "origin" of the aggregated intervals. For example, for '24H' frequency, base could range from 0 through 23. :param time_chunk_size: If not None, the chunk size to be used for the "time" dimension. :param var_names: Variable names to include. :param tolerance: Time tolerance for selective upsampling methods. Defaults to *frequency*. :param interp_kind: Kind of interpolation if *method* is 'interpolation'. :param metadata: Output metadata. :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. :return: A new xcube dataset resampled in time. """ if not cube_asserted: assert_cube(cube) if var_names: cube = select_vars(cube, var_names) resampler = cube.resample(skipna=True, closed='left', label='left', keep_attrs=True, time=frequency, loffset=offset, base=base) if isinstance(method, str): methods = [method] else: methods = list(method) resampled_cubes = [] for method in methods: resampling_method = getattr(resampler, method) kwargs = get_method_kwargs(method, frequency, interp_kind, tolerance) resampled_cube = resampling_method(**kwargs) resampled_cube = resampled_cube.rename( {var_name: f'{var_name}_{method}' for var_name in resampled_cube.data_vars}) resampled_cubes.append(resampled_cube) if len(resampled_cubes) == 1: resampled_cube = resampled_cubes[0] else: resampled_cube = xr.merge(resampled_cubes) # TODO: add time_bnds to resampled_ds time_coverage_start = '%s' % cube.time[0] time_coverage_end = '%s' % cube.time[-1] resampled_cube.attrs.update(metadata or {}) # TODO: add other time_coverage_ attributes resampled_cube.attrs.update(time_coverage_start=time_coverage_start, time_coverage_end=time_coverage_end) schema = CubeSchema.new(cube) chunk_sizes = {schema.dims[i]: schema.chunks[i] for i in range(schema.ndim)} if isinstance(time_chunk_size, int) and time_chunk_size >= 0: chunk_sizes['time'] = time_chunk_size return resampled_cube.chunk(chunk_sizes)