示例#1
0
文件: chunk.py 项目: micder/xcube
def chunk(cube, output, format=None, params=None, chunks=None):
    """
    (Re-)chunk xcube dataset.
    Changes the external chunking of all variables of CUBE according to CHUNKS and writes
    the result to OUTPUT.

    Note: There is a possibly more efficient way to (re-)chunk datasets through the
    dedicated tool "rechunker", see https://rechunker.readthedocs.io.
    """
    chunk_sizes = None
    if chunks:
        chunk_sizes = parse_cli_kwargs(chunks, metavar="CHUNKS")
        for k, v in chunk_sizes.items():
            if not isinstance(v, int) or v <= 0:
                raise click.ClickException(
                    "Invalid value for CHUNKS, "
                    f"chunk sizes must be positive integers: {chunks}")

    write_kwargs = dict()
    if params:
        write_kwargs = parse_cli_kwargs(params, metavar="PARAMS")

    from xcube.core.chunk import chunk_dataset
    from xcube.core.dsio import guess_dataset_format
    from xcube.core.dsio import open_dataset, write_dataset

    format_name = format if format else guess_dataset_format(output)

    with open_dataset(input_path=cube) as ds:
        if chunk_sizes:
            for k in chunk_sizes:
                if k not in ds.dims:
                    raise click.ClickException(
                        "Invalid value for CHUNKS, "
                        f"{k!r} is not the name of any dimension: {chunks}")

        chunked_dataset = chunk_dataset(ds,
                                        chunk_sizes=chunk_sizes,
                                        format_name=format_name)
        write_dataset(chunked_dataset,
                      output_path=output,
                      format_name=format_name,
                      **write_kwargs)
示例#2
0
def serve(cube: List[str], address: str, port: int, prefix: str, name: str,
          update_period: float, styles: str, config: str, tile_cache_size: str,
          tile_comp_mode: int, show: bool, verbose: bool, trace_perf: bool,
          aws_prof: str, aws_env: bool):
    """
    Serve data cubes via web service.

    Serves data cubes by a RESTful API and a OGC WMTS 1.0 RESTful and KVP interface.
    The RESTful API documentation can be found at https://app.swaggerhub.com/apis/bcdev/xcube-server.
    """

    from xcube.cli.common import parse_cli_kwargs
    import os.path

    prefix = prefix or name

    if config and cube:
        raise click.ClickException(
            "CONFIG and CUBES cannot be used at the same time.")
    if styles:
        styles = parse_cli_kwargs(styles, "STYLES")
    if (aws_prof or aws_env) and not cube:
        raise click.ClickException(
            "AWS credentials are only valid in combination with given CUBE argument(s)."
        )

    from xcube.version import version
    from xcube.webapi.defaults import SERVER_NAME, SERVER_DESCRIPTION
    print(f'{SERVER_NAME}: {SERVER_DESCRIPTION}, version {version}')

    if show:
        _run_viewer()

    from xcube.webapi.app import new_application
    from xcube.webapi.service import Service
    service = Service(new_application(
        prefix,
        os.path.dirname(config) if config else '.'),
                      prefix=prefix,
                      port=port,
                      address=address,
                      cube_paths=cube,
                      styles=styles,
                      config_file=config,
                      tile_cache_size=tile_cache_size,
                      tile_comp_mode=tile_comp_mode,
                      update_period=update_period,
                      log_to_stderr=verbose,
                      trace_perf=trace_perf,
                      aws_prof=aws_prof,
                      aws_env=aws_env)
    service.start()

    return 0
示例#3
0
    def test_parse_cli_kwargs(self):
        self.assertEqual(dict(),
                         parse_cli_kwargs("", metavar="<chunks>"))
        self.assertEqual(dict(time=1, lat=256, lon=512),
                         parse_cli_kwargs("time=1, lat=256, lon=512", metavar="<chunks>"))
        self.assertEqual(dict(chl_conc=(0, 20, 'greens'), chl_tsm=(0, 15, 'viridis')),
                         parse_cli_kwargs("chl_conc=(0,20,'greens'),chl_tsm=(0,15,'viridis')",
                                          metavar="<styles>"))

        with self.assertRaises(click.ClickException) as cm:
            parse_cli_kwargs("45 * 'A'", metavar="<chunks>")
        self.assertEqual("Invalid value for <chunks>: \"45 * 'A'\"",
                         f"{cm.exception}")

        with self.assertRaises(click.ClickException) as cm:
            parse_cli_kwargs("9==2")
        self.assertEqual("Invalid value: '9==2'",
                         f"{cm.exception}")
示例#4
0
def serve(cube: List[str], address: str, port: int, prefix: str,
          reverse_prefix: str, update_period: float, styles: str,
          config_file: str, base_dir: str, tile_cache_size: str,
          tile_comp_mode: int, show: bool, verbose: bool, trace_perf: bool,
          aws_prof: str, aws_env: bool):
    """
    Serve data cubes via web service.

    Serves data cubes by a RESTful API and a OGC WMTS 1.0 RESTful and KVP interface.
    The RESTful API documentation can be found at https://app.swaggerhub.com/apis/bcdev/xcube-server.
    """

    from xcube.cli.common import parse_cli_kwargs
    import os.path

    if config_file and cube:
        raise click.ClickException(
            "CONFIG and CUBES cannot be used at the same time.")
    if not config_file and not cube:
        config_file = os.environ.get(CONFIG_ENV_VAR)
    if styles:
        styles = parse_cli_kwargs(styles, "STYLES")
    if (aws_prof or aws_env) and not cube:
        raise click.ClickException(
            "AWS credentials are only valid in combination with given CUBE argument(s)."
        )
    if config_file and not os.path.isfile(config_file):
        raise click.ClickException(
            f"Configuration file not found: {config_file}")

    base_dir = base_dir or os.environ.get(
        BASE_ENV_VAR, config_file and os.path.dirname(config_file)) or '.'
    if not os.path.isdir(base_dir):
        raise click.ClickException(f"Base directory not found: {base_dir}")

    from xcube.version import version
    from xcube.webapi.defaults import SERVER_NAME, SERVER_DESCRIPTION
    print(f'{SERVER_NAME}: {SERVER_DESCRIPTION}, version {version}')

    if show:
        _run_viewer()

    from xcube.webapi.app import new_application
    application = new_application(route_prefix=prefix, base_dir=base_dir)

    from xcube.webapi.service import Service
    service = Service(application,
                      prefix=reverse_prefix or prefix,
                      port=port,
                      address=address,
                      cube_paths=cube,
                      styles=styles,
                      config_file=config_file,
                      base_dir=base_dir,
                      tile_cache_size=tile_cache_size,
                      tile_comp_mode=tile_comp_mode,
                      update_period=update_period,
                      log_to_stderr=verbose,
                      trace_perf=trace_perf,
                      aws_prof=aws_prof,
                      aws_env=aws_env)
    service.start()

    return 0
示例#5
0
文件: tile.py 项目: sfoucher/xcube
def tile(cube: str,
         variables: Optional[str],
         labels: Optional[str],
         tile_size: Optional[str],
         config_path: Optional[str],
         style_id: Optional[str],
         output_path: Optional[str],
         verbose: List[bool],
         dry_run: bool):
    """
    Create RGBA tiles from CUBE.

    Color bars and value ranges for variables can be specified in a CONFIG file.
    Here the color mappings are defined for a style named "ocean_color":

    \b
    Styles:
      - Identifier: ocean_color
        ColorMappings:
          conc_chl:
            ColorBar: "plasma"
            ValueRange: [0., 24.]
          conc_tsm:
            ColorBar: "PuBuGn"
            ValueRange: [0., 100.]
          kd489:
            ColorBar: "jet"
            ValueRange: [0., 6.]

    This is the same styles syntax as the configuration file for "xcube serve",
    hence its configuration can be reused.

    """
    import fractions
    import itertools
    import json
    import os.path
    # noinspection PyPackageRequirements
    import yaml
    import xarray as xr
    import numpy as np

    from xcube.core.mldataset import open_ml_dataset
    from xcube.core.mldataset import MultiLevelDataset
    from xcube.core.schema import CubeSchema
    from xcube.core.tile import get_ml_dataset_tile
    from xcube.core.tile import get_var_valid_range
    from xcube.core.tile import get_var_cmap_params
    from xcube.core.tile import parse_non_spatial_labels
    from xcube.core.select import select_variables_subset
    from xcube.cli.common import parse_cli_kwargs
    from xcube.cli.common import parse_cli_sequence
    from xcube.cli.common import assert_positive_int_item
    from xcube.util.tilegrid import TileGrid
    from xcube.util.tiledimage import DEFAULT_COLOR_MAP_NUM_COLORS

    # noinspection PyShadowingNames
    def write_tile_map_resource(path: str,
                                resolutions: List[fractions.Fraction],
                                tile_grid: TileGrid,
                                title='',
                                abstract='',
                                srs='CRS:84'):
        num_levels = len(resolutions)
        z_and_upp = zip(range(num_levels), map(float, resolutions))
        x1, y1, x2, y2 = tile_grid.geo_extent
        xml = [f'<TileMap version="1.0.0" tilemapservice="http://tms.osgeo.org/1.0.0">',
               f'  <Title>{title}</Title>',
               f'  <Abstract>{abstract}</Abstract>',
               f'  <SRS>{srs}</SRS>',
               f'  <BoundingBox minx="{x1}" miny="{y1}" maxx="{x2}" maxy="{y2}"/>',
               f'  <Origin x="{x1}" y="{y1}"/>',
               f'  <TileFormat width="{tile_grid.tile_width}" height="{tile_grid.tile_height}"'
               f' mime-type="image/png" extension="png"/>',
               f'  <TileSets profile="local">'] + [
                  f'    <TileSet href="{z}" order="{z}" units-per-pixel="{upp}"/>' for z, upp in z_and_upp] + [
                  f'  </TileSets>',
                  f'</TileMap>']
        with open(path, 'w') as fp:
            fp.write('\n'.join(xml))

    # noinspection PyShadowingNames
    def _convert_coord_var(coord_var: xr.DataArray):
        values = coord_var.values
        if np.issubdtype(values.dtype, np.datetime64):
            return list(np.datetime_as_string(values, timezone='UTC'))
        elif np.issubdtype(values.dtype, np.integer):
            return [int(value) for value in values]
        else:
            return [float(value) for value in values]

    # noinspection PyShadowingNames
    def _get_color_mappings(ml_dataset: MultiLevelDataset,
                            var_name: str,
                            config: Mapping[str, Any],
                            style_id: str):
        cmap_name = None
        cmap_range = None, None

        if config:
            style_id = style_id or 'default'
            styles = config.get('Styles')
            if styles:
                color_mappings = None
                for style in styles:
                    if style.get('Identifier') == style_id:
                        color_mappings = style.get('ColorMappings')
                        break
                if color_mappings:
                    color_mapping = color_mappings.get(var_name)
                    if color_mapping:
                        cmap_name = color_mapping.get('ColorBar')
                        cmap_vmin, cmap_vmax = color_mapping.get('ValueRange', (None, None))
                        cmap_range = cmap_vmin, cmap_vmax

        if cmap_name is not None and None not in cmap_range:
            return cmap_name, cmap_range
        var = ml_dataset.base_dataset[var_name]
        valid_range = get_var_valid_range(var)
        return get_var_cmap_params(var, cmap_name, cmap_range, valid_range)

    variables = parse_cli_sequence(variables, metavar='VARIABLES', num_items_min=1,
                                   item_plural_name='variables')

    tile_size = parse_cli_sequence(tile_size, num_items=2, metavar='TILE_SIZE',
                                   item_parser=int,
                                   item_validator=assert_positive_int_item,
                                   item_plural_name='tile sizes')

    labels = parse_cli_kwargs(labels, metavar='LABELS')

    verbosity = len(verbose)

    config = {}
    if config_path:
        if verbosity:
            print(f'Opening {config_path}...')
        with open(config_path, 'r') as fp:
            config = yaml.safe_load(fp)

    if verbosity:
        print(f'Opening {cube}...')

    ml_dataset = open_ml_dataset(cube, chunks='auto')
    tile_grid = ml_dataset.tile_grid
    base_dataset = ml_dataset.base_dataset
    schema = CubeSchema.new(base_dataset)
    spatial_dims = schema.x_dim, schema.y_dim

    if tile_size:
        tile_width, tile_height = tile_size
    else:
        if verbosity:
            print(f'Warning: using default tile sizes derived from CUBE')
        tile_width, tile_height = tile_grid.tile_width, tile_grid.tile_height

    indexers = None
    if labels:
        indexers = parse_non_spatial_labels(labels,
                                            schema.dims,
                                            schema.coords,
                                            allow_slices=True,
                                            exception_type=click.ClickException)

    def transform(ds: xr.Dataset) -> xr.Dataset:
        if variables:
            ds = select_variables_subset(ds, var_names=variables)
        if indexers:
            ds = ds.sel(**indexers)
        chunk_sizes = {dim: 1 for dim in ds.dims}
        chunk_sizes[spatial_dims[0]] = tile_width
        chunk_sizes[spatial_dims[1]] = tile_height
        return ds.chunk(chunk_sizes)

    ml_dataset = ml_dataset.apply(transform)
    tile_grid = ml_dataset.tile_grid
    base_dataset = ml_dataset.base_dataset
    schema = CubeSchema.new(base_dataset)
    spatial_dims = schema.x_dim, schema.y_dim

    x1, _, x2, _ = tile_grid.geo_extent
    num_levels = tile_grid.num_levels
    resolutions = [fractions.Fraction(fractions.Fraction(x2 - x1), tile_grid.width(z))
                   for z in range(num_levels)]

    if verbosity:
        print(f'Writing tile sets...')
        print(f'  Zoom levels: {num_levels}')
        print(f'  Resolutions: {", ".join(map(str, resolutions))} units/pixel')
        print(f'  Tile size:   {tile_width} x {tile_height} pixels')

    image_cache = {}

    for var_name, var in base_dataset.data_vars.items():
        color_bar, (value_min, value_max) = _get_color_mappings(ml_dataset, str(var_name), config, style_id)

        label_names = []
        label_indexes = []
        for dim in var.dims:
            if dim not in spatial_dims:
                label_names.append(dim)
                label_indexes.append(list(range(var[dim].size)))

        var_path = os.path.join(output_path, str(var_name))
        metadata_path = os.path.join(var_path, 'metadata.json')
        metadata = dict(name=str(var_name),
                        attrs={name: value
                               for name, value in var.attrs.items()},
                        dims=[str(dim)
                              for dim in var.dims],
                        dim_sizes={dim: int(var[dim].size)
                                   for dim in var.dims},
                        color_mapping=dict(color_bar=color_bar,
                                           value_min=value_min,
                                           value_max=value_max,
                                           num_colors=DEFAULT_COLOR_MAP_NUM_COLORS),
                        coordinates={name: _convert_coord_var(coord_var)
                                     for name, coord_var in var.coords.items() if coord_var.ndim == 1})
        if verbosity:
            print(f'Writing {metadata_path}')
        if not dry_run:
            os.makedirs(var_path, exist_ok=True)
            with open(metadata_path, 'w') as fp:
                json.dump(metadata, fp, indent=2)

        for label_index in itertools.product(*label_indexes):
            labels = {name: index for name, index in zip(label_names, label_index)}
            tilemap_path = os.path.join(var_path, *[str(l) for l in label_index])
            tilemap_resource_path = os.path.join(tilemap_path, 'tilemapresource.xml')
            if verbosity > 1:
                print(f'Writing {tilemap_resource_path}')
            if not dry_run:
                os.makedirs(tilemap_path, exist_ok=True)
                write_tile_map_resource(tilemap_resource_path, resolutions, tile_grid, title=f'{var_name}')
            for z in range(num_levels):
                num_tiles_x = tile_grid.num_tiles_x(z)
                num_tiles_y = tile_grid.num_tiles_y(z)
                tile_z_path = os.path.join(tilemap_path, str(z))
                if not dry_run and not os.path.exists(tile_z_path):
                    os.mkdir(tile_z_path)
                for x in range(num_tiles_x):
                    tile_zx_path = os.path.join(tile_z_path, str(x))
                    if not dry_run and not os.path.exists(tile_zx_path):
                        os.mkdir(tile_zx_path)
                    for y in range(num_tiles_y):
                        tile_bytes = get_ml_dataset_tile(ml_dataset,
                                                         str(var_name),
                                                         x, y, z,
                                                         labels=labels,
                                                         labels_are_indices=True,
                                                         cmap_name=color_bar,
                                                         cmap_range=(value_min, value_max),
                                                         image_cache=image_cache,
                                                         trace_perf=True,
                                                         exception_type=click.ClickException)
                        tile_path = os.path.join(tile_zx_path, f'{num_tiles_y - 1 - y}.png')
                        if verbosity > 2:
                            print(f'Writing tile {tile_path}')
                        if not dry_run:
                            with open(tile_path, 'wb') as fp:
                                fp.write(tile_bytes)

    print(f'Done writing tile sets.')
示例#6
0
def compute(script: str,
            cube: List[str],
            input_var_names: str,
            input_params: str,
            output_path: str,
            output_format: str,
            output_var_name: str,
            output_var_dtype: str):
    """
    Compute a cube from one or more other cubes.

    The command computes a cube variable from other cube variables in CUBEs
    using a user-provided Python function in SCRIPT.

    The SCRIPT must define a function named "compute":

    \b
        def compute(*input_vars: numpy.ndarray,
                    input_params: Mapping[str, Any] = None,
                    dim_coords: Mapping[str, np.ndarray] = None,
                    dim_ranges: Mapping[str, Tuple[int, int]] = None) \\
                    -> numpy.ndarray:
            # Compute new numpy array from inputs
            # output_array = ...
            return output_array

    where input_vars are numpy arrays (chunks) in the order given by VARIABLES or given by the variable names returned
    by an optional "initialize" function that my be defined in SCRIPT too, see below. input_params is a mapping of
    parameter names to values according to PARAMS or the ones returned by the aforesaid "initialize" function.
    dim_coords is a mapping from dimension name to coordinate labels for the current chunk to be computed.
    dim_ranges is a mapping from dimension name to index ranges into coordinate arrays of the cube.

    The SCRIPT may define a function named "initialize":

    \b
        def initialize(input_cubes: Sequence[xr.Dataset],
                       input_var_names: Sequence[str],
                       input_params: Mapping[str, Any]) \\
                       -> Tuple[Sequence[str], Mapping[str, Any]]:
            # Compute new variable names and/or new parameters
            # new_input_var_names = ...
            # new_input_params = ...
            return new_input_var_names, new_input_params

    where input_cubes are the respective CUBEs, input_var_names the respective VARIABLES, and input_params
    are the respective PARAMS. The "initialize" function can be used to validate the data cubes, extract
    the desired variables in desired order and to provide some extra processing parameters passed to the
    "compute" function.

    Note that if no input variable names are specified, no variables are passed to the "compute" function.

    The SCRIPT may also define a function named "finalize":

    \b
        def finalize(output_cube: xr.Dataset,
                     input_params: Mapping[str, Any]) \\
                     -> Optional[xr.Dataset]:
            # Optionally modify output_cube and return it or return None
            return output_cube

    If defined, the "finalize" function will be called before the command writes the
    new cube and then exists. The functions may perform a cleaning up or perform side effects such
    as write the cube to some sink. If the functions returns None, the CLI will *not* write
    any cube data.

    """
    from xcube.cli.common import parse_cli_kwargs
    from xcube.core.compute import compute_cube
    from xcube.core.dsio import open_cube
    from xcube.core.dsio import guess_dataset_format, find_dataset_io

    input_paths = cube

    compute_function_name = "compute"
    initialize_function_name = "initialize"
    finalize_function_name = "finalize"

    with open(script, "r") as fp:
        code = fp.read()

    locals_dict = dict()
    exec(code, globals(), locals_dict)

    input_var_names = list(map(lambda s: s.strip(), input_var_names.split(","))) if input_var_names else None

    compute_function = _get_function(locals_dict, compute_function_name, script, force=True)
    initialize_function = _get_function(locals_dict, initialize_function_name, script, force=False)
    finalize_function = _get_function(locals_dict, finalize_function_name, script, force=False)

    input_params = parse_cli_kwargs(input_params, "PARAMS")

    input_cubes = []
    for input_path in input_paths:
        input_cubes.append(open_cube(input_path=input_path))

    if initialize_function:
        input_var_names, input_params = initialize_function(input_cubes, input_var_names, input_params)

    output_cube = compute_cube(compute_function,
                               *input_cubes,
                               input_var_names=input_var_names,
                               input_params=input_params,
                               output_var_name=output_var_name,
                               output_var_dtype=output_var_dtype)

    if finalize_function:
        output_cube = finalize_function(output_cube)

    if output_cube is not None:
        output_format = output_format or guess_dataset_format(output_path)
        dataset_io = find_dataset_io(output_format, {"w"})
        dataset_io.write(output_cube, output_path)