def extract( cube, points, output=None, format=None, coords=False, bounds=False, indexes=False, refs=False, ): """ Extract cube points. Extracts data cells from CUBE at coordinates given in each POINTS record and writes the resulting values to given output path and format. POINTS must be a CSV file that provides at least the columns "lon", "lat", and "time". The "lon" and "lat" columns provide a point's location in decimal degrees. The "time" column provides a point's date or date-time. Its format should preferably be ISO, but other formats may work as well. """ if format != 'csv': raise click.ClickException(f'Format {format!r} is not supported.') import pandas as pd cube_path = cube points_path = points output_path = output include_coords = coords include_bounds = bounds include_indexes = indexes include_refs = refs from xcube.core.dsio import open_dataset from xcube.core.extract import get_cube_values_for_points, DEFAULT_INDEX_NAME_PATTERN, DEFAULT_REF_NAME_PATTERN # We may make the following CLI options index_name_pattern = DEFAULT_INDEX_NAME_PATTERN ref_name_pattern = DEFAULT_REF_NAME_PATTERN time_col_names = ["time"] points = pd.read_csv(points_path, parse_dates=time_col_names, infer_datetime_format=True) with open_dataset(cube_path) as cube: values = get_cube_values_for_points( cube, points, include_coords=include_coords, include_bounds=include_bounds, include_indexes=include_indexes, index_name_pattern=index_name_pattern, include_refs=include_refs, ref_name_pattern=ref_name_pattern).to_dataframe() values.to_csv( output_path if output_path else sys.stdout, # We may make the following CLI options sep=',', date_format='%Y-%m-%dT%H:%M:%SZ', index=True)
def vars2dim(cube, variable, dim_name, output=None, format=None): """ Convert cube variables into new dimension. Moves all variables of CUBE into into a single new variable <var-name> with a new dimension DIM-NAME and writes the results to OUTPUT. """ from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset, write_dataset from xcube.core.vars2dim import vars_to_dim import os if not output: dirname = os.path.dirname(cube) basename = os.path.basename(cube) basename, ext = os.path.splitext(basename) output = os.path.join(dirname, basename + '-vars2dim' + ext) format_name = format if format else guess_dataset_format(output) with open_dataset(input_path=cube) as ds: converted_dataset = vars_to_dim(ds, dim_name=dim_name, var_name=variable) write_dataset(converted_dataset, output_path=output, format_name=format_name)
def write_levels(output_path: str, dataset: xr.Dataset = None, input_path: str = None, link_input: bool = False, progress_monitor: PyramidLevelCallback = None, **kwargs) -> List[xr.Dataset]: """ Transform the given dataset given by a *dataset* instance or *input_path* string into the levels of a multi-level pyramid with spatial resolution decreasing by a factor of two in both spatial dimensions and write them to *output_path*. One of *dataset* and *input_path* must be given. :param output_path: Output path :param dataset: Dataset to be converted and written as levels. :param input_path: Input path to a dataset to be transformed and written as levels. :param link_input: Just link the dataset at level zero instead of writing it. :param progress_monitor: An optional progress monitor. :param kwargs: Keyword-arguments accepted by the ``compute_levels()`` function. :return: A list of dataset instances representing the multi-level pyramid. """ if dataset is None and input_path is None: raise ValueError("at least one of dataset or input_path must be given") if link_input and input_path is None: raise ValueError("input_path must be provided to link input") _post_process_level = kwargs.pop( "post_process_level") if "post_process_level" in kwargs else None def post_process_level(level_dataset, index, num_levels): if _post_process_level is not None: level_dataset = _post_process_level(level_dataset, index, num_levels) if index == 0 and link_input: with open(os.path.join(output_path, f"{index}.link"), "w") as fp: fp.write(input_path) else: path = os.path.join(output_path, f"{index}.zarr") level_dataset.to_zarr(path) level_dataset.close() level_dataset = xr.open_zarr(path) if progress_monitor is not None: progress_monitor(level_dataset, index, num_levels) return level_dataset if not os.path.exists(output_path): os.makedirs(output_path) if dataset is None: dataset = open_dataset(input_path) return compute_levels(dataset, post_process_level=post_process_level, **kwargs)
def test_open_dataset(self): with open_dataset(TEST_NC_FILE) as ds: self.assertIsNotNone(ds) np.testing.assert_array_equal(ds.time.values, self.dataset.time.values) np.testing.assert_array_equal(ds.lat.values, self.dataset.lat.values) np.testing.assert_array_equal(ds.lon.values, self.dataset.lon.values)
def dump(input, variable, encoding): """ Dump contents of an input dataset. """ from xcube.core.dsio import open_dataset from xcube.core.dump import dump_dataset with open_dataset(input) as ds: text = dump_dataset(ds, var_names=variable, show_var_encoding=encoding) print(text)
def _rectify(input_path: str, xy_names: Optional[Tuple[str, str]], var_names: Optional[Sequence[str]], output_path: str, output_format: Optional[str], output_size: Optional[Tuple[int, int]], output_tile_size: Optional[Tuple[int, int]], output_point: Optional[Tuple[float, float]], output_res: Optional[float], delta: float, dry_run: bool, monitor): from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset from xcube.core.dsio import write_dataset from xcube.core.rectify import rectify_dataset from xcube.core.rectify import ImageGeom from xcube.core.sentinel3 import is_sentinel3_product from xcube.core.sentinel3 import open_sentinel3_product if not output_format: output_format = guess_dataset_format(output_path) output_geom = None if output_size is not None and output_point is not None and output_res is not None: output_geom = ImageGeom(size=output_size, x_min=output_point[0], y_min=output_point[1], xy_res=output_res) elif output_size is not None or output_point is not None or output_res is not None: raise click.ClickException( 'SIZE, POINT, and RES must all be given or none of them.') monitor(f'Opening dataset from {input_path!r}...') if is_sentinel3_product(input_path): src_ds = open_sentinel3_product(input_path) else: src_ds = open_dataset(input_path) monitor('Rectifying...') reproj_ds = rectify_dataset(src_ds, xy_names=xy_names, var_names=var_names, output_geom=output_geom, tile_size=output_tile_size, uv_delta=delta) if reproj_ds is None: monitor( f'Dataset {input_path} does not seem to have an intersection with bounding box' ) return monitor(f'Writing rectified dataset to {output_path!r}...') if not dry_run: write_dataset(reproj_ds, output_path, output_format) monitor(f'Done.')
def _verify(input_path: str = None, monitor=None): from xcube.core.dsio import open_dataset from xcube.core.verify import verify_cube monitor(f'Opening cube from {input_path!r}...') with open_dataset(input_path) as cube: report = verify_cube(cube) if not report: monitor("INPUT is a valid cube.") return monitor('INPUT is not a valid cube due to the following reasons:') monitor('- ' + '\n- '.join(report)) # According to http://tldp.org/LDP/abs/html/exitcodes.html, exit code 3 is not reserved sys.exit(3)
def chunk(cube, output, format=None, params=None, chunks=None): """ (Re-)chunk xcube dataset. Changes the external chunking of all variables of CUBE according to CHUNKS and writes the result to OUTPUT. Note: There is a possibly more efficient way to (re-)chunk datasets through the dedicated tool "rechunker", see https://rechunker.readthedocs.io. """ chunk_sizes = None if chunks: chunk_sizes = parse_cli_kwargs(chunks, metavar="CHUNKS") for k, v in chunk_sizes.items(): if not isinstance(v, int) or v <= 0: raise click.ClickException( "Invalid value for CHUNKS, " f"chunk sizes must be positive integers: {chunks}") write_kwargs = dict() if params: write_kwargs = parse_cli_kwargs(params, metavar="PARAMS") from xcube.core.chunk import chunk_dataset from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset, write_dataset format_name = format if format else guess_dataset_format(output) with open_dataset(input_path=cube) as ds: if chunk_sizes: for k in chunk_sizes: if k not in ds.dims: raise click.ClickException( "Invalid value for CHUNKS, " f"{k!r} is not the name of any dimension: {chunks}") chunked_dataset = chunk_dataset(ds, chunk_sizes=chunk_sizes, format_name=format_name) write_dataset(chunked_dataset, output_path=output, format_name=format_name, **write_kwargs)
def _prune(input_path: str, dry_run: bool, monitor: Monitor): from xcube.core.chunk import get_empty_dataset_chunks from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset input_format = guess_dataset_format(input_path) if input_format != FORMAT_NAME_ZARR: raise click.ClickException("input must be a dataset in Zarr format") num_deleted_total = 0 monitor(f'Opening dataset from {input_path!r}...', 1) with open_dataset(input_path) as dataset: monitor('Identifying empty chunks...', 1) for var_name, chunk_indices in get_empty_dataset_chunks(dataset): num_empty_chunks = 0 num_deleted = 0 for chunk_index in chunk_indices: num_empty_chunks += 1 if num_empty_chunks == 1: monitor( f'Found empty chunks in variable {var_name!r}, ' f'deleting block files...', 2) ok = _delete_block_file(input_path, var_name, chunk_index, dry_run, monitor) if ok: num_deleted += 1 if num_deleted > 0: monitor( f'Deleted {num_deleted} block file(s) ' f'for variable {var_name!r}.', 2) elif num_empty_chunks > 0: monitor( f'No block files for variable {var_name!r} ' f'could be deleted.', 2) num_deleted_total += num_deleted monitor(f'Done, {num_deleted_total} block file(s) deleted total.', 1)
def _rectify(input_path: str, xy_names: Optional[Tuple[str, str]], var_names: Optional[Sequence[str]], output_path: str, output_format: Optional[str], output_size: Optional[Tuple[int, int]], output_tile_size: Optional[Tuple[int, int]], output_point: Optional[Tuple[float, float]], output_res: Optional[float], output_crs: Optional[str], delta: float, dry_run: bool, monitor): import pyproj.crs from xcube.core.dsio import guess_dataset_format from xcube.core.dsio import open_dataset from xcube.core.dsio import write_dataset from xcube.core.gridmapping import GridMapping from xcube.core.resampling import rectify_dataset from xcube.core.sentinel3 import is_sentinel3_product from xcube.core.sentinel3 import open_sentinel3_product if not output_format: output_format = guess_dataset_format(output_path) output_gm = None output_gm_given = (output_size is not None, output_point is not None, output_res is not None, output_crs is not None) if all(output_gm_given): output_gm = GridMapping.regular(size=output_size, xy_min=output_point, xy_res=output_res, crs=pyproj.crs.CRS.from_user_input(output_crs)) elif any(output_gm_given): raise click.ClickException('SIZE, POINT, RES, and CRS must all be given or none of them.') monitor(f'Opening dataset from {input_path!r}...') if is_sentinel3_product(input_path): src_ds = open_sentinel3_product(input_path) else: src_ds = open_dataset(input_path) monitor('Rectifying...') rectified_ds = rectify_dataset(src_ds, xy_var_names=xy_names, var_names=var_names, target_gm=output_gm, tile_size=output_tile_size, uv_delta=delta) if rectified_ds is None: monitor(f'Dataset {input_path} does not seem to have an intersection with bounding box') return monitor(f'Writing rectified dataset to {output_path!r}...') if not dry_run: write_dataset(rectified_ds, output_path, output_format) monitor(f'Done.')
def test_open_dataset(self): ds = open_dataset(f's3://{self.CUBE}') self.assertIn('B02', ds) print(ds)
s3 = s3fs.S3FileSystem(anon=True, ) # In[ ]: # List of available colombia data cubes: s3.ls('esdl-esdc-v2.0.1') # In[ ]: # Commands used to download a cube obs.get(obs.ls('esdl-esdc-v2.0.1/Cube_2019highColombiaCube_184x120x120.zarr'), './mylocalhighrescube', recursive=True) # Commands to access the data cube using xcube and then continue analysis using xarray: # In[ ]: import xarray as xr import xcube # In[ ]: from xcube.core.dsio import open_dataset ds = open_dataset( "https://s3.eu-central-1.amazonaws.com/esdl-esdc-v2.0.1/Cube_2019highColombiaCube_184x120x120.zarr", s3_kwargs=dict(anon=True)) # In[ ]: