def write_dataset(dataset: xr.Dataset, output_path: str, format_name: str = None, **kwargs) -> xr.Dataset: """ Write dataset to *output_path*. If *format* is not provided it will be guessed from *output_path*. :param dataset: Dataset to be written. :param output_path: output path :param format_name: format, e.g. "zarr" or "netcdf4" :param kwargs: format-specific keyword arguments :return: the input dataset """ format_name = format_name if format_name else guess_dataset_format( output_path) if format_name is None: raise ValueError("Unknown output format") dataset_io = find_dataset_io(format_name, modes=["w"]) if dataset_io is None: raise ValueError( f"Unknown output format {format_name!r} for {output_path}") dataset_io.write(dataset, output_path, **kwargs) return dataset
def read_dataset(input_path: str, format_name: str = None, is_cube: bool = False, **kwargs) -> xr.Dataset: """ Read dataset from *input_path*. If *format* is not provided it will be guessed from *output_path*. :param input_path: input path :param format_name: format, e.g. "zarr" or "netcdf4" :param is_cube: Weather a ValueError will be raised, if the dataset read from *input_path* is not a data cube. :param kwargs: format-specific keyword arguments :return: dataset object """ format_name = format_name if format_name else guess_dataset_format( input_path) if format_name is None: raise ValueError("Unknown input format") dataset_io = find_dataset_io(format_name, modes=["r"]) if dataset_io is None: raise ValueError( f"Unknown input format {format_name!r} for {input_path}") dataset = dataset_io.read(input_path, **kwargs) if is_cube: assert_cube(dataset) return dataset
def test_find_by_name(self): ds_io = find_dataset_io('netcdf4') self.assertIsInstance(ds_io, Netcdf4DatasetIO) ds_io = find_dataset_io('zarr', modes=['a']) self.assertIsInstance(ds_io, ZarrDatasetIO) ds_io = find_dataset_io('zarr', modes=['w']) self.assertIsInstance(ds_io, ZarrDatasetIO) ds_io = find_dataset_io('zarr', modes=['r']) self.assertIsInstance(ds_io, ZarrDatasetIO) ds_io = find_dataset_io('mem') self.assertIsInstance(ds_io, MemDatasetIO) ds_io = find_dataset_io('bibo', default=MemDatasetIO()) self.assertIsInstance(ds_io, MemDatasetIO)
def test_find_by_ext(self): ds_io = find_dataset_io('nc') self.assertIsInstance(ds_io, Netcdf4DatasetIO)
def apply(output: str, script: str, input: str, params: str, vars: str, dask: str, format: str, dtype: str): """ Apply a function to data cubes. The function is used to transform N chunks of equal shape to a new chunk of same shape. N is the number of variables from all data cubes. Uses the Python program <script> to transform data cubes given by <inputs> into a new data cube given by <output>. The <script> must define a function ``apply(*variables, **params)`` where variables are numpy arrays (chunks) in the order given by <vars> or given by the variables returned by an optional ``init()`` function that my be defined in <script>. If neither <vars> nor an ``init()`` function is defined, all variables are passed in arbitrary order. The optional ``init(*cubes, **params)`` function can be used to validate the data cubes, extract the desired variables in desired order and to provide some extra processing parameters passed to the ``apply()`` function. The ``init()`` argument *cubes* are the ``xarray.Dataset`` objects according to <input> and *params* are according to <params>. The return value of ``init()`` is a tuple (*variables*, *new_params*) where *variables* is a list of ``xarray.DataArray`` objects and *new_params* are newly computed parameters passed to ``apply()``. """ input_paths = input output_path = output apply_function_name = "apply" init_function_name = "init" with open(script, "r") as fp: code = fp.read() locals_dict = dict() exec(code, globals(), locals_dict) var_names = list(map(lambda s: s.strip(), vars.split(","))) if vars else None init_function = locals_dict.get(init_function_name) if init_function is not None and not callable(init_function): raise click.ClickException(f"{init_function_name!r} in {script} is not a callable") apply_function = locals_dict.get(apply_function_name) if apply_function is None: raise click.ClickException(f"missing function {apply_function_name!r} in {script}") if not callable(apply_function): raise click.ClickException(f"{apply_function!r} in {script} is not a callable") from xcube.api import read_cube from xcube.util.cliutil import parse_cli_kwargs from xcube.util.dsio import guess_dataset_format, find_dataset_io kwargs = parse_cli_kwargs(params, "<params>") input_cube_0 = None input_cubes = [] for input_path in input_paths: input_cube = read_cube(input_path=input_path) if input_cube_0 is None: input_cube_0 = input_cube else: # TODO (forman): make sure input_cube's and input_cube_0's coords and chunking are compatible pass input_cubes.append(input_cube) if var_names: input_cubes = [input_cube.drop(labels=set(input_cube.data_vars).difference(set(var_names))) for input_cube in input_cubes] import xarray as xr if init_function: variables, params = init_function(*input_cubes, **kwargs) else: variables, params = xr.merge(input_cubes).data_vars.values(), kwargs output_variable = xr.apply_ufunc(apply_function, *variables, dask=dask, output_dtypes=[dtype] if dask == "parallelized" else None) format = format or guess_dataset_format(output_path) dataset_io = find_dataset_io(format, {"w"}) dataset_io.write(xr.Dataset(dict(output=output_variable)), output_path)