Python CubeSchema.new примеры использования

Язык программирования: Python

Пространство имен/Пакет: xcube.core.schema

Класс/Тип: CubeSchema

Метод/Функция: new

Примеров на hotexamples.com: 6

Python CubeSchema.new - 6 примеров найдено. Это лучшие примеры Python кода для xcube.core.schema.CubeSchema.new, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

new(14)

CubeSchema(1)

Основные методы

new (14)

CubeSchema (1)

Пример #1

Показать файл

Файл: test_temporal.py Проект: dcs4cop/xcube

    def test_resample_f_all(self):
        resampled_cube = resample_in_time(self.input_cube, 'all', ['min', 'max'])
        self.assertIsNot(resampled_cube, self.input_cube)
        self.assertIn('time', resampled_cube)
        self.assertIn('temperature_min', resampled_cube)
        self.assertIn('temperature_max', resampled_cube)
        self.assertIn('precipitation_min', resampled_cube)
        self.assertIn('precipitation_max', resampled_cube)
        self.assertEqual(('time',), resampled_cube.time.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_min.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_max.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_min.dims)
        self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_max.dims)
        self.assertEqual((1,), resampled_cube.time.shape)
        self.assertEqual((1, 180, 360), resampled_cube.temperature_min.shape)
        self.assertEqual((1, 180, 360), resampled_cube.temperature_max.shape)
        self.assertEqual((1, 180, 360), resampled_cube.precipitation_min.shape)
        self.assertEqual((1, 180, 360), resampled_cube.precipitation_max.shape)
        np.testing.assert_allclose(resampled_cube.temperature_min.values[..., 0, 0],
                                   np.array([272.0]))
        np.testing.assert_allclose(resampled_cube.temperature_max.values[..., 0, 0],
                                   np.array([274.9]))
        np.testing.assert_allclose(resampled_cube.precipitation_min.values[..., 0, 0],
                                   np.array([114.2]))
        np.testing.assert_allclose(resampled_cube.precipitation_max.values[..., 0, 0],
                                   np.array([120.0]))

        schema = CubeSchema.new(resampled_cube)
        self.assertEqual(3, schema.ndim)
        self.assertEqual(('time', 'lat', 'lon'), schema.dims)
        self.assertEqual((1, 180, 360), schema.shape)

Пример #2

Показать файл

Файл: test_temporal.py Проект: dcs4cop/xcube

 def test_resample_in_time_with_time_chunk_size(self):
     resampled_cube = resample_in_time(self.input_cube, '2D', ['min', 'max'], time_chunk_size=5)
     schema = CubeSchema.new(resampled_cube)
     self.assertEqual(3, schema.ndim)
     self.assertEqual(('time', 'lat', 'lon'), schema.dims)
     self.assertEqual((33, 180, 360), schema.shape)
     self.assertEqual((5, 90, 180), schema.chunks)

Пример #3

Показать файл

    def test_without_inputs(self):
        calls = []

        def my_cube_func(
                input_params: Dict[str, Any] = None,
                dim_coords: Dict[str, np.ndarray] = None,
                dim_ranges: Dict[str, Tuple[int,
                                            int]] = None) -> CubeFuncOutput:
            nonlocal calls
            calls.append((input_params, dim_coords, dim_ranges))
            lon_range = dim_ranges['lon']
            lat_range = dim_ranges['lat']
            time_range = dim_ranges['time']
            n_lon = lon_range[1] - lon_range[0]
            n_lat = lat_range[1] - lat_range[0]
            n_time = time_range[1] - time_range[0]
            fill_value = input_params['fill_value']
            return np.full((n_time, n_lat, n_lon),
                           fill_value,
                           dtype=np.float64)

        output_cube = compute_cube(my_cube_func,
                                   input_cube_schema=CubeSchema.new(self.cube),
                                   input_params=dict(fill_value=0.74))

        self.assertIsInstance(output_cube, xr.Dataset)
        self.assertIn('output', output_cube.data_vars)
        output_var = output_cube.output
        self.assertEqual(0, len(calls))
        self.assertEqual(('time', 'lat', 'lon'), output_var.dims)
        self.assertEqual((6, 180, 360), output_var.shape)

        values = output_var.values
        self.assertEqual(2 * 2 * 4, len(calls))
        self.assertEqual((6, 180, 360), values.shape)
        self.assertAlmostEqual(0.74, values[0, 0, 0])
        self.assertAlmostEqual(0.74, values[-1, -1, -1])

Пример #4

Показать файл

Файл: compute.py Проект: dcs4cop/xcube

def compute_dataset(cube_func: CubeFunc,
                    *input_cubes: xr.Dataset,
                    input_cube_schema: CubeSchema = None,
                    input_var_names: Sequence[str] = None,
                    input_params: Dict[str, Any] = None,
                    output_var_name: str = 'output',
                    output_var_dims: AbstractSet[str] = None,
                    output_var_dtype: Any = np.float64,
                    output_var_attrs: Dict[str, Any] = None,
                    vectorize: bool = None,
                    cube_asserted: bool = False) -> xr.Dataset:
    """
    Compute a new output dataset with a single variable named *output_var_name*
    from variables named *input_var_names* contained in zero, one, or more
    input data cubes in *input_cubes* using a cube factory function *cube_func*.

    *cube_func* is called concurrently for each of the chunks of the input variables.
    It is expected to return a chunk block whith is type ``np.ndarray``.

    If *input_cubes* is not empty, *cube_func* receives variables as specified by *input_var_names*.
    If *input_cubes* is empty, *input_var_names* must be empty too, and *input_cube_schema*
    must be given, so that a new cube can be created.

    The full signature of *cube_func* is:::

        def cube_func(*input_vars: np.ndarray,
                      input_params: Dict[str, Any] = None,
                      dim_coords: Dict[str, np.ndarray] = None,
                      dim_ranges: Dict[str, Tuple[int, int]] = None) -> np.ndarray:
            pass

    The arguments are:

    * ``input_vars``: the variables according to the given *input_var_names*;
    * ``input_params``: is this call's *input_params*, a mapping from parameter name to value;
    * ``dim_coords``: a mapping from dimension names to the current chunk's coordinate arrays;
    * ``dim_ranges``: a mapping from dimension names to the current chunk's index ranges.

    Only the ``input_vars`` argument is mandatory. The keyword arguments
    ``input_params``, ``input_params``, ``input_params`` do need to be present at all.

    *output_var_dims* my be given in the case, where ...
    TODO: describe new output_var_dims...

    :param cube_func: The cube factory function.
    :param input_cubes: An optional sequence of input cube datasets, must be provided if *input_cube_schema* is not.
    :param input_cube_schema: An optional input cube schema, must be provided if *input_cubes* is not.
    :param input_var_names: A sequence of variable names
    :param input_params: Optional dictionary with processing parameters passed to *cube_func*.
    :param output_var_name: Optional name of the output variable, defaults to ``'output'``.
    :param output_var_dims: Optional set of names of the output dimensions,
        used in the case *cube_func* reduces dimensions.
    :param output_var_dtype: Optional numpy datatype of the output variable, defaults to ``'float32'``.
    :param output_var_attrs: Optional metadata attributes for the output variable.
    :param vectorize: Whether all *input_cubes* have the same variables which are concatenated and passed as vectors
        to *cube_func*. Not implemented yet.
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new dataset that contains the computed output variable.
    """
    if vectorize is not None:
        # TODO: support vectorize = all cubes have same variables and cube_func
        #       receives variables as vectors (with extra dim)
        raise NotImplementedError('vectorize is not supported yet')

    if not cube_asserted:
        for cube in input_cubes:
            assert_cube(cube)

    # Check compatibility of inputs
    if input_cubes:
        input_cube_schema = CubeSchema.new(input_cubes[0])
        for cube in input_cubes:
            if not cube_asserted:
                assert_cube(cube)
            if cube != input_cubes[0]:
                # noinspection PyUnusedLocal
                other_schema = CubeSchema.new(cube)
                # TODO (forman): broadcast all cubes to same shape, rechunk to same chunks
    elif input_cube_schema is None:
        raise ValueError('input_cube_schema must be given')

    output_var_name = output_var_name or 'output'

    # Collect named input variables, raise if not found
    input_var_names = input_var_names or []
    input_vars = []
    for var_name in input_var_names:
        input_var = None
        for cube in input_cubes:
            if var_name in cube.data_vars:
                input_var = cube[var_name]
                break
        if input_var is None:
            raise ValueError(
                f'variable {var_name!r} not found in any of cubes')
        input_vars.append(input_var)

    # Find out, if cube_func uses any of _PREDEFINED_KEYWORDS
    has_input_params, has_dim_coords, has_dim_ranges = _inspect_cube_func(
        cube_func, input_var_names)

    def cube_func_wrapper(index_chunk, *input_var_chunks):
        nonlocal input_cube_schema, input_var_names, input_params, input_vars
        nonlocal has_input_params, has_dim_coords, has_dim_ranges

        # Note, xarray.apply_ufunc does a test call with empty input arrays,
        # so index_chunk.size == 0 is a valid case
        empty_call = index_chunk.size == 0

        # TODO: when output_var_dims is given, index_chunk must be reordered
        #   as core dimensions are moved to the and of index_chunk and input_var_chunks
        if not empty_call:
            index_chunk = index_chunk.ravel()

        if index_chunk.size < 2 * input_cube_schema.ndim:
            if not empty_call:
                warnings.warn(
                    f"unexpected index_chunk of size {index_chunk.size} received!"
                )
                return None

        dim_ranges = None
        if has_dim_ranges or has_dim_coords:
            dim_ranges = {}
            for i in range(input_cube_schema.ndim):
                dim_name = input_cube_schema.dims[i]
                if not empty_call:
                    start = int(index_chunk[2 * i + 0])
                    end = int(index_chunk[2 * i + 1])
                    dim_ranges[dim_name] = start, end
                else:
                    dim_ranges[dim_name] = ()

        dim_coords = None
        if has_dim_coords:
            dim_coords = {}
            for coord_var_name, coord_var in input_cube_schema.coords.items():
                coord_slices = [slice(None)] * coord_var.ndim
                for i in range(input_cube_schema.ndim):
                    dim_name = input_cube_schema.dims[i]
                    if dim_name in coord_var.dims:
                        j = coord_var.dims.index(dim_name)
                        coord_slices[j] = slice(*dim_ranges[dim_name])
                dim_coords[coord_var_name] = coord_var[tuple(
                    coord_slices)].values

        kwargs = {}
        if has_input_params:
            kwargs['input_params'] = input_params
        if has_dim_ranges:
            kwargs['dim_ranges'] = dim_ranges
        if has_dim_coords:
            kwargs['dim_coords'] = dim_coords

        return cube_func(*input_var_chunks, **kwargs)

    index_var = _gen_index_var(input_cube_schema)

    all_input_vars = [index_var] + input_vars

    input_core_dims = None
    if output_var_dims:
        input_core_dims = []
        has_warned = False
        for i in range(len(all_input_vars)):
            input_var = all_input_vars[i]
            var_core_dims = [
                dim for dim in input_var.dims if dim not in output_var_dims
            ]
            must_rechunk = False
            if var_core_dims and input_var.chunks:
                for var_core_dim in var_core_dims:
                    dim_index = input_var.dims.index(var_core_dim)
                    dim_chunk_size = input_var.chunks[dim_index][0]
                    dim_shape_size = input_var.shape[dim_index]
                    if dim_chunk_size != dim_shape_size:
                        must_rechunk = True
                        break
            if must_rechunk:
                if not has_warned:
                    warnings.warn(
                        f'Input variables must not be chunked in dimension(s): {", ".join(var_core_dims)}.\n'
                        f'Rechunking applies, which may drastically decrease runtime performance '
                        f'and increase memory usage.')
                    has_warned = True
                all_input_vars[i] = input_var.chunk(
                    {var_core_dim: -1
                     for var_core_dim in var_core_dims})
            input_core_dims.append(var_core_dims)

    output_var = xr.apply_ufunc(cube_func_wrapper,
                                *all_input_vars,
                                dask='parallelized',
                                input_core_dims=input_core_dims,
                                output_dtypes=[output_var_dtype])
    if output_var_attrs:
        output_var.attrs.update(output_var_attrs)
    return xr.Dataset({output_var_name: output_var},
                      coords=input_cube_schema.coords)

Пример #5

Показать файл

Файл: compute.py Проект: sfoucher/xcube

def compute_cube(cube_func: CubeFunc,
                 *input_cubes: xr.Dataset,
                 input_cube_schema: CubeSchema = None,
                 input_var_names: Sequence[str] = None,
                 input_params: Dict[str, Any] = None,
                 output_var_name: str = 'output',
                 output_var_dtype: Any = np.float64,
                 output_var_attrs: Dict[str, Any] = None,
                 vectorize: bool = None,
                 cube_asserted: bool = False) -> xr.Dataset:
    """
    Compute a new output data cube with a single variable named *output_var_name*
    from variables named *input_var_names* contained in zero, one, or more
    input data cubes in *input_cubes* using a cube factory function *cube_func*.

    *cube_func* is called concurrently for each of the chunks of the input variables.
    It is expected to return a chunk block whith is type ``np.ndarray``.

    If *input_cubes* is not empty, *cube_func* receives variables as specified by *input_var_names*.
    If *input_cubes* is empty, *input_var_names* must be empty too, and *input_cube_schema*
    must be given, so that a new cube can be created.

    The full signature of *cube_func* is:::

        def cube_func(*input_vars: np.ndarray,
                      input_params: Dict[str, Any] = None,
                      dim_coords: Dict[str, np.ndarray] = None,
                      dim_ranges: Dict[str, Tuple[int, int]] = None) -> np.ndarray:
            pass

    The arguments are:

    * ``input_vars``: the variables according to the given *input_var_names*;
    * ``input_params``: is this call's *input_params*, a mapping from parameter name to value;
    * ``dim_coords``: a mapping from dimension names to the current chunk's coordinate arrays;
    * ``dim_ranges``: a mapping from dimension names to the current chunk's index ranges.

    Only the ``input_vars`` argument is mandatory. The keyword arguments
    ``input_params``, ``input_params``, ``input_params`` do need to be present at all.

    :param cube_func: The cube factory function.
    :param input_cubes: An optional sequence of input cube datasets, must be provided if *input_cube_schema* is not.
    :param input_cube_schema: An optional input cube schema, must be provided if *input_cubes* is not.
    :param input_var_names: A sequence of variable names
    :param input_params: Optional dictionary with processing parameters passed to *cube_func*.
    :param output_var_name: Optional name of the output variable, defaults to ``'output'``.
    :param output_var_dtype: Optional numpy datatype of the output variable, defaults to ``'float32'``.
    :param output_var_attrs: Optional metadata attributes for the output variable.
    :param vectorize: Whether all *input_cubes* have the same variables which are concatenated and passed as vectors
        to *cube_func*. Not implemented yet.
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new dataset that contains the computed output variable.
    """
    if vectorize is not None:
        raise NotImplementedError('vectorize is not supported yet')

    if not cube_asserted:
        for cube in input_cubes:
            assert_cube(cube)

    if input_cubes:
        input_cube_schema = CubeSchema.new(input_cubes[0])
        for cube in input_cubes:
            if not cube_asserted:
                assert_cube(cube)
            if cube != input_cubes[0]:
                # noinspection PyUnusedLocal
                other_schema = CubeSchema.new(cube)
                # TODO (forman): broadcast all cubes to same shape, rechunk to same chunks
    elif input_cube_schema is None:
        raise ValueError('input_cube_schema must be given')

    if output_var_name is None:
        output_var_name = 'output'

    input_var_names = input_var_names or []
    input_vars = []
    for var_name in input_var_names:
        var = None
        for cube in input_cubes:
            if var_name in cube.data_vars:
                var = cube[var_name]
                break
        if var is None:
            raise ValueError(f'variable {var_name!r} not found in any of cubes')
        input_vars.append(var)

    has_input_params, has_dim_coords, has_dim_ranges = _inspect_cube_func(cube_func, input_var_names)

    def cube_func_wrapper(index_chunk, *input_var_chunks):
        nonlocal input_cube_schema, input_var_names, input_params, input_vars
        nonlocal has_input_params, has_dim_coords, has_dim_ranges

        index_chunk = index_chunk.ravel()

        if index_chunk.size < 2 * input_cube_schema.ndim:
            warnings.warn(f"weird index_chunk of size {index_chunk.size} received!")
            return

        dim_ranges = None
        if has_dim_ranges or has_dim_coords:
            dim_ranges = {}
            for i in range(input_cube_schema.ndim):
                dim_name = input_cube_schema.dims[i]
                start = int(index_chunk[2 * i + 0])
                end = int(index_chunk[2 * i + 1])
                dim_ranges[dim_name] = start, end

        dim_coords = None
        if has_dim_coords:
            dim_coords = {}
            for coord_var_name, coord_var in input_cube_schema.coords.items():
                coord_slices = [slice(None)] * coord_var.ndim
                for i in range(input_cube_schema.ndim):
                    dim_name = input_cube_schema.dims[i]
                    if dim_name in coord_var.dims:
                        j = coord_var.dims.index(dim_name)
                        coord_slices[j] = slice(*dim_ranges[dim_name])
                dim_coords[coord_var_name] = coord_var[tuple(coord_slices)].values

        kwargs = {}
        if has_input_params:
            kwargs['input_params'] = input_params
        if has_dim_ranges:
            kwargs['dim_ranges'] = dim_ranges
        if has_dim_coords:
            kwargs['dim_coords'] = dim_coords

        return cube_func(*input_var_chunks, **kwargs)

    index_var = _gen_index_var(input_cube_schema)

    output_var = xr.apply_ufunc(cube_func_wrapper,
                                index_var,
                                *input_vars,
                                dask='parallelized',
                                output_dtypes=[output_var_dtype])
    if output_var_attrs:
        output_var.attrs.update(output_var_attrs)
    return xr.Dataset({output_var_name: output_var}, coords=input_cube_schema.coords)

Пример #6

Показать файл

def resample_in_time(cube: xr.Dataset,
                     frequency: str,
                     method: Union[str, Sequence[str]],
                     offset=None,
                     base: int = 0,
                     tolerance=None,
                     interp_kind=None,
                     time_chunk_size=None,
                     var_names: Sequence[str] = None,
                     metadata: Dict[str, Any] = None,
                     cube_asserted: bool = False) -> xr.Dataset:
    """
    Resample a xcube dataset in the time dimension.

    :param cube: The xcube dataset.
    :param frequency: Temporal aggregation frequency. Use format "<count><offset>"
        "where <offset> is one of 'H', 'D', 'W', 'M', 'Q', 'Y'.
    :param method: Resampling method or sequence of resampling methods.
    :param offset: Offset used to adjust the resampled time labels.
        Uses same syntax as *frequency*.
    :param base: For frequencies that evenly subdivide 1 day, the "origin" of the
        aggregated intervals. For example, for '24H' frequency, base could range from 0 through 23.
    :param time_chunk_size: If not None, the chunk size to be used for the "time" dimension.
    :param var_names: Variable names to include.
    :param tolerance: Time tolerance for selective upsampling methods. Defaults to *frequency*.
    :param interp_kind: Kind of interpolation if *method* is 'interpolation'.
    :param metadata: Output metadata.
    :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube.
    :return: A new xcube dataset resampled in time.
    """
    if not cube_asserted:
        assert_cube(cube)

    if var_names:
        cube = select_vars(cube, var_names)

    resampler = cube.resample(skipna=True,
                              closed='left',
                              label='left',
                              keep_attrs=True,
                              time=frequency,
                              loffset=offset,
                              base=base)

    if isinstance(method, str):
        methods = [method]
    else:
        methods = list(method)

    resampled_cubes = []
    for method in methods:
        resampling_method = getattr(resampler, method)
        kwargs = get_method_kwargs(method, frequency, interp_kind, tolerance)
        resampled_cube = resampling_method(**kwargs)
        resampled_cube = resampled_cube.rename(
            {var_name: f'{var_name}_{method}' for var_name in resampled_cube.data_vars})
        resampled_cubes.append(resampled_cube)

    if len(resampled_cubes) == 1:
        resampled_cube = resampled_cubes[0]
    else:
        resampled_cube = xr.merge(resampled_cubes)

    # TODO: add time_bnds to resampled_ds
    time_coverage_start = '%s' % cube.time[0]
    time_coverage_end = '%s' % cube.time[-1]

    resampled_cube.attrs.update(metadata or {})
    # TODO: add other time_coverage_ attributes
    resampled_cube.attrs.update(time_coverage_start=time_coverage_start,
                                time_coverage_end=time_coverage_end)

    schema = CubeSchema.new(cube)
    chunk_sizes = {schema.dims[i]: schema.chunks[i] for i in range(schema.ndim)}

    if isinstance(time_chunk_size, int) and time_chunk_size >= 0:
        chunk_sizes['time'] = time_chunk_size

    return resampled_cube.chunk(chunk_sizes)