示例#1
0
def open_ml_dataset_from_local_fs(
        ctx: ServiceContext,
        dataset_descriptor: DatasetDescriptor) -> MultiLevelDataset:
    ds_id = dataset_descriptor.get('Identifier')

    path = dataset_descriptor.get('Path')
    if not path:
        raise ServiceConfigError(
            f"Missing 'path' entry in dataset descriptor {ds_id}")

    if not os.path.isabs(path):
        path = os.path.join(ctx.base_dir, path)

    data_format = dataset_descriptor.get('Format', guess_cube_format(path))

    if data_format == FORMAT_NAME_NETCDF4:
        with measure_time(tag=f"opened local NetCDF dataset {path}"):
            ds = assert_cube(xr.open_dataset(path))
            return BaseMultiLevelDataset(ds)

    if data_format == FORMAT_NAME_ZARR:
        with measure_time(tag=f"opened local zarr dataset {path}"):
            ds = assert_cube(xr.open_zarr(path))
            return BaseMultiLevelDataset(ds)

    if data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened local levels dataset {path}"):
            return FileStorageMultiLevelDataset(path)

    raise ServiceConfigError(
        f"Illegal data format {data_format!r} for dataset {ds_id}")
示例#2
0
    def _get_dataset_lazily(self, index: int,
                            parameters: Dict[str, Any]) -> xr.Dataset:
        """
        Read the dataset for the level at given *index*.

        :param index: the level index
        :param parameters: keyword arguments passed to xr.open_zarr()
        :return: the dataset for the level at *index*.
        """
        ext, level_path = self._level_paths[index]
        if ext == ".link":
            with self._s3_file_system.open(level_path, "w") as fp:
                level_path = fp.read()
                # if file_path is a relative path, resolve it against the levels directory
                if not os.path.isabs(level_path):
                    base_dir = os.path.dirname(self._dir_path)
                    level_path = os.path.join(base_dir, level_path)
        store = s3fs.S3Map(root=level_path,
                           s3=self._s3_file_system,
                           check=False)
        max_size = self.get_chunk_cache_capacity(index)
        if max_size:
            store = zarr.LRUStoreCache(store, max_size=max_size)
        with measure_time(
                tag=f"opened remote dataset {level_path} for level {index}"):
            consolidated = self._s3_file_system.exists(
                f'{level_path}/.zmetadata')
            return assert_cube(xr.open_zarr(store,
                                            consolidated=consolidated,
                                            **parameters),
                               name=level_path)
示例#3
0
def open_ml_dataset_from_python_code(
        ctx: ServiceContext,
        dataset_descriptor: DatasetDescriptor) -> MultiLevelDataset:
    ds_id = dataset_descriptor.get('Identifier')

    path = dataset_descriptor.get('Path')
    if not path:
        raise ServiceConfigError(
            f"Missing 'path' entry in dataset descriptor {ds_id}")

    if not os.path.isabs(path):
        path = os.path.join(ctx.base_dir, path)

    callable_name = dataset_descriptor.get('Function', COMPUTE_DATASET)
    input_dataset_ids = dataset_descriptor.get('InputDatasets', [])
    input_parameters = dataset_descriptor.get('InputParameters', {})

    for input_dataset_id in input_dataset_ids:
        if not ctx.get_dataset_descriptor(input_dataset_id):
            raise ServiceConfigError(
                f"Invalid dataset descriptor {ds_id!r}: "
                f"Input dataset {input_dataset_id!r} of callable {callable_name!r} "
                f"must reference another dataset")

    with measure_time(tag=f"opened memory dataset {path}"):
        return ComputedMultiLevelDataset(ds_id,
                                         path,
                                         callable_name,
                                         input_dataset_ids,
                                         ctx.get_ml_dataset,
                                         input_parameters,
                                         exception_type=ServiceConfigError)
示例#4
0
def _find_places(ctx: ServiceContext,
                 place_group_id: str,
                 base_url: str,
                 query_geometry: shapely.geometry.base.BaseGeometry = None,
                 query_expr: Any = None,
                 comb_op: str = "and") -> GeoJsonFeatureCollection:
    with measure_time() as cm:
        features = __find_places(ctx, place_group_id, base_url, query_geometry, query_expr, comb_op)
    _LOG.info(f"{len(features)} places found within {cm.duration} seconds")
    return features
示例#5
0
文件: mldataset.py 项目: manzt/xcube
def open_ml_dataset_from_object_storage(path: str,
                                        data_format: str = None,
                                        ds_id: str = None,
                                        exception_type: type = ValueError,
                                        client_kwargs: Mapping[str,
                                                               Any] = None,
                                        **kwargs) -> MultiLevelDataset:
    data_format = data_format or guess_ml_dataset_format(path)

    endpoint_url, root = split_bucket_url(path)
    if endpoint_url:
        kwargs['endpoint_url'] = endpoint_url
        path = root

    client_kwargs = dict(client_kwargs or {})
    for arg_name in ['endpoint_url', 'region_name']:
        if arg_name in kwargs:
            client_kwargs[arg_name] = kwargs.pop(arg_name)

    obs_file_system = s3fs.S3FileSystem(anon=True, client_kwargs=client_kwargs)

    if data_format == FORMAT_NAME_ZARR:
        store = s3fs.S3Map(root=path, s3=obs_file_system, check=False)
        cached_store = zarr.LRUStoreCache(store, max_size=2**28)
        with measure_time(tag=f"opened remote zarr dataset {path}"):
            consolidated = obs_file_system.exists(f'{path}/.zmetadata')
            ds = assert_cube(
                xr.open_zarr(cached_store, consolidated=consolidated,
                             **kwargs))
        return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened remote levels dataset {path}"):
            return ObjectStorageMultiLevelDataset(
                obs_file_system,
                path,
                zarr_kwargs=kwargs,
                ds_id=ds_id,
                exception_type=exception_type)

    raise exception_type(
        f'Unrecognized multi-level dataset format {data_format!r} for path {path!r}'
    )
示例#6
0
def get_time_series_for_feature_collection(ctx: ServiceContext,
                                           ds_name: str,
                                           var_name: str,
                                           feature_collection: Dict,
                                           start_date: np.datetime64 = None,
                                           end_date: np.datetime64 = None,
                                           include_count: bool = False,
                                           include_stdev: bool = False,
                                           max_valids: int = None) -> Dict:
    """
    Get the time-series for the geometries of a given *feature_collection*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param feature_collection: The feature collection.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param include_count: Whether to include the valid number of observations in the result.
    :param include_stdev: Whether to include the standard deviation in the result.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    features = GeoJSON.get_feature_collection_features(feature_collection)
    if features is None:
        raise ServiceBadRequestError("Invalid GeoJSON feature collection")
    shapes = []
    for feature in features:
        geometry = GeoJSON.get_feature_geometry(feature)
        try:
            geometry = shapely.geometry.shape(geometry)
        except (TypeError, ValueError) as e:
            raise ServiceBadRequestError(
                "Invalid GeoJSON feature collection") from e
        shapes.append(geometry)
    with measure_time() as time_result:
        result = _get_time_series_for_geometries(dataset,
                                                 var_name,
                                                 shapes,
                                                 start_date=start_date,
                                                 end_date=end_date,
                                                 include_count=include_count,
                                                 include_stdev=include_stdev,
                                                 max_valids=max_valids)
    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_feature_collection: dataset id {ds_name}, variable {var_name},'
            f'size={len(result["results"])}, took {time_result.duration} seconds'
        )
    return result
示例#7
0
def open_ml_dataset_from_object_storage(path: str,
                                        data_format: str = None,
                                        ds_id: str = None,
                                        exception_type: type = ValueError,
                                        s3_kwargs: Mapping[str, Any] = None,
                                        s3_client_kwargs: Mapping[str,
                                                                  Any] = None,
                                        chunk_cache_capacity: int = None,
                                        **kwargs) -> MultiLevelDataset:
    data_format = data_format or guess_ml_dataset_format(path)

    s3, root = parse_s3_fs_and_root(path,
                                    s3_kwargs=s3_kwargs,
                                    s3_client_kwargs=s3_client_kwargs,
                                    mode='r')

    if data_format == FORMAT_NAME_ZARR:
        store = s3fs.S3Map(root=root, s3=s3, check=False)
        if chunk_cache_capacity:
            store = zarr.LRUStoreCache(store, max_size=chunk_cache_capacity)
        with measure_time(tag=f"opened remote zarr dataset {path}"):
            consolidated = s3.exists(f'{root}/.zmetadata')
            ds = assert_cube(
                xr.open_zarr(store, consolidated=consolidated, **kwargs))
        return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened remote levels dataset {path}"):
            return ObjectStorageMultiLevelDataset(
                s3,
                root,
                zarr_kwargs=kwargs,
                ds_id=ds_id,
                chunk_cache_capacity=chunk_cache_capacity,
                exception_type=exception_type)

    raise exception_type(
        f'Unrecognized multi-level dataset format {data_format!r} for path {path!r}'
    )
示例#8
0
 def get(self):
     with measure_time('get granted scopes'):
         granted_scopes = self.granted_scopes
     details = bool(int(self.params.get_query_argument('details', '0')))
     tile_client = self.params.get_query_argument('tiles', None)
     point = self.params.get_query_argument_point('point', None)
     response = get_datasets(self.service_context,
                             details=details,
                             client=tile_client,
                             point=point,
                             base_url=self.base_url,
                             granted_scopes=granted_scopes)
     self.set_header('Content-Type', 'application/json')
     self.write(json.dumps(response, indent=None if details else 2))
示例#9
0
def open_ml_dataset_from_object_storage(
        ctx: ServiceContext,
        dataset_descriptor: DatasetDescriptor) -> MultiLevelDataset:
    ds_id = dataset_descriptor.get('Identifier')

    path = dataset_descriptor.get('Path')
    if not path:
        raise ServiceConfigError(
            f"Missing 'path' entry in dataset descriptor {ds_id}")

    data_format = dataset_descriptor.get('Format', FORMAT_NAME_ZARR)

    s3_client_kwargs = {}
    if 'Endpoint' in dataset_descriptor:
        s3_client_kwargs['endpoint_url'] = dataset_descriptor['Endpoint']
    if 'Region' in dataset_descriptor:
        s3_client_kwargs['region_name'] = dataset_descriptor['Region']
    obs_file_system = s3fs.S3FileSystem(anon=True,
                                        client_kwargs=s3_client_kwargs)

    if data_format == FORMAT_NAME_ZARR:
        store = s3fs.S3Map(root=path, s3=obs_file_system, check=False)
        cached_store = zarr.LRUStoreCache(store, max_size=2**28)
        with measure_time(tag=f"opened remote zarr dataset {path}"):
            consolidated = obs_file_system.exists(f'{path}/.zmetadata')
            ds = assert_cube(
                xr.open_zarr(cached_store, consolidated=consolidated))
        return BaseMultiLevelDataset(ds)

    if data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened remote levels dataset {path}"):
            return ObjectStorageMultiLevelDataset(
                ds_id,
                obs_file_system,
                path,
                exception_type=ServiceConfigError)
示例#10
0
def open_ml_dataset_from_local_fs(path: str,
                                  data_format: str = None,
                                  ds_id: str = None,
                                  exception_type: type = ValueError,
                                  **kwargs) -> MultiLevelDataset:
    data_format = data_format or guess_ml_dataset_format(path)

    if data_format == FORMAT_NAME_NETCDF4:
        with measure_time(tag=f"opened local NetCDF dataset {path}"):
            ds = assert_cube(xr.open_dataset(path, **kwargs))
            return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_ZARR:
        with measure_time(tag=f"opened local zarr dataset {path}"):
            ds = assert_cube(xr.open_zarr(path, **kwargs))
            return BaseMultiLevelDataset(ds, ds_id=ds_id)
    elif data_format == FORMAT_NAME_LEVELS:
        with measure_time(tag=f"opened local levels dataset {path}"):
            return FileStorageMultiLevelDataset(path,
                                                ds_id=ds_id,
                                                zarr_kwargs=kwargs)

    raise exception_type(
        f'Unrecognized multi-level dataset format {data_format!r} for path {path!r}'
    )
示例#11
0
def get_time_series_for_geometry(ctx: ServiceContext,
                                 ds_name: str,
                                 var_name: str,
                                 geometry: Dict,
                                 start_date: np.datetime64 = None,
                                 end_date: np.datetime64 = None,
                                 include_count: bool = False,
                                 include_stdev: bool = False,
                                 max_valids: int = None) -> Dict:
    """
    Get the time-series for a given *geometry*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param geometry: The geometry, usually a point or polygon.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param include_count: Whether to include the valid number of observations in the result.
    :param include_stdev: Whether to include the standard deviation in the result.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    if not GeoJSON.is_geometry(geometry):
        raise ServiceBadRequestError("Invalid GeoJSON geometry")
    if isinstance(geometry, dict):
        geometry = shapely.geometry.shape(geometry)
    with measure_time() as time_result:
        result = _get_time_series_for_geometry(dataset,
                                               var_name,
                                               geometry,
                                               start_date=start_date,
                                               end_date=end_date,
                                               include_count=include_count,
                                               include_stdev=include_stdev,
                                               max_valids=max_valids)

    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_geometry: dataset id {ds_name}, variable {var_name}, '
            f'geometry type {geometry},'
            f'size={len(result["results"])}, took {time_result.duration} seconds'
        )
    return result
示例#12
0
def open_ml_dataset_from_python_code(
        script_path: str,
        callable_name: str,
        input_ml_dataset_ids: Sequence[str] = None,
        input_ml_dataset_getter: Callable[[str], MultiLevelDataset] = None,
        input_parameters: Mapping[str, Any] = None,
        ds_id: str = None,
        exception_type: type = ValueError) -> MultiLevelDataset:
    with measure_time(tag=f"opened memory dataset {script_path}"):
        return ComputedMultiLevelDataset(script_path,
                                         callable_name,
                                         input_ml_dataset_ids,
                                         input_ml_dataset_getter,
                                         input_parameters,
                                         ds_id=ds_id,
                                         exception_type=exception_type)
示例#13
0
    def _get_dataset_lazily(self, index: int, **zarr_kwargs) -> xr.Dataset:
        """
        Read the dataset for the level at given *index*.

        :param index: the level index
        :param zarr_kwargs: kwargs passed to xr.open_zarr()
        :return: the dataset for the level at *index*.
        """
        ext, level_path = self._level_paths[index]
        if ext == ".link":
            with open(level_path, "r") as fp:
                level_path = fp.read()
                # if file_path is a relative path, resolve it against the levels directory
                if not os.path.isabs(level_path):
                    base_dir = os.path.dirname(self._dir_path)
                    level_path = os.path.join(base_dir, level_path)
        with measure_time(
                tag=f"opened local dataset {level_path} for level {index}"):
            return assert_cube(xr.open_zarr(level_path, **zarr_kwargs),
                               name=level_path)
示例#14
0
 def _get_dataset_lazily(self, index: int, **kwargs) -> xr.Dataset:
     input_datasets = [
         self._input_ml_dataset_getter(ds_id).get_dataset(index)
         for ds_id in self._input_ml_dataset_ids
     ]
     try:
         with measure_time(
                 tag=
                 f"computed in-memory dataset {self._ds_id!r} at level {index}"
         ):
             computed_value = self._callable_obj(*input_datasets, **kwargs)
     except Exception as e:
         raise self._exception_type(
             f"Failed to compute in-memory dataset {self._ds_id!r} at level {index} "
             f"from function {self._callable_name!r}: {e}") from e
     if not isinstance(computed_value, xr.Dataset):
         raise self._exception_type(
             f"Failed to compute in-memory dataset {self._ds_id!r} at level {index} "
             f"from function {self._callable_name!r}: "
             f"expected an xarray.Dataset but got {type(computed_value)}")
     return assert_cube(computed_value, name=self._ds_id)
示例#15
0
def augment_ml_dataset(ml_dataset: MultiLevelDataset,
                       script_path: str,
                       callable_name: str,
                       input_ml_dataset_getter: Callable[[str],
                                                         MultiLevelDataset],
                       input_ml_dataset_setter: Callable[[MultiLevelDataset],
                                                         None],
                       input_parameters: Mapping[str, Any] = None,
                       exception_type: type = ValueError):
    with measure_time(tag=f"added augmentation from {script_path}"):
        orig_id = ml_dataset.ds_id
        aug_id = uuid.uuid4()
        aug_inp_id = f'aug-input-{aug_id}'
        aug_inp_ds = IdentityMultiLevelDataset(ml_dataset, ds_id=aug_inp_id)
        input_ml_dataset_setter(aug_inp_ds)
        aug_ds = ComputedMultiLevelDataset(script_path,
                                           callable_name, [aug_inp_id],
                                           input_ml_dataset_getter,
                                           input_parameters,
                                           ds_id=f'aug-{aug_id}',
                                           exception_type=exception_type)
        return CombinedMultiLevelDataset([ml_dataset, aug_ds], ds_id=orig_id)
示例#16
0
def get_time_series_for_point(ctx: ServiceContext,
                              ds_name: str,
                              var_name: str,
                              lon: float,
                              lat: float,
                              start_date: np.datetime64 = None,
                              end_date: np.datetime64 = None,
                              max_valids: int = None) -> Dict:
    """
    Get the time-series for a given point.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param lon: The point's longitude in decimal degrees.
    :param lat: The point's latitude in decimal degrees.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :return: Time-series data structure.
    """
    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    with measure_time() as time_result:
        result = _get_time_series_for_point(dataset,
                                            var_name,
                                            shapely.geometry.Point(lon, lat),
                                            start_date=start_date,
                                            end_date=end_date,
                                            max_valids=max_valids)
    if ctx.trace_perf:
        LOG.info(
            f'get_time_series_for_point:: dataset id {ds_name}, variable {var_name}, '
            f'geometry type {shapely.geometry.Point(lon, lat)}, size={len(result["results"])}, '
            f'took {time_result.duration} seconds')
    return result
示例#17
0
def get_time_series(
    ctx: ServiceContext,
    ds_name: str,
    var_name: str,
    geo_json: GeoJsonObj,
    agg_methods: Union[str, Sequence[str]] = None,
    start_date: np.datetime64 = None,
    end_date: np.datetime64 = None,
    max_valids: int = None,
    incl_ancillary_vars: bool = False
) -> Union[TimeSeries, TimeSeriesCollection]:
    """
    Get the time-series for a given GeoJSON object *geo_json*.

    If *geo_json* is a single geometry or feature a list of time-series values is returned.
    If *geo_json* is a single geometry collection or feature collection a collection of lists of time-series values
    is returned so that geometry/feature collection and time-series collection elements are corresponding
    at same indices.

    A time series value always contains the key "time" whose value is an ISO date/time string. Other entries
    are values with varying keys depending on the geometry type and *agg_methods*. Their values may be
    either a bool, int, float or None.
    For point geometries the second key is "value".
    For non-point geometries that cover spatial areas, there will be values for all keys given by *agg_methods*.

    :param ctx: Service context object
    :param ds_name: The dataset identifier.
    :param var_name: The variable name.
    :param geo_json: The GeoJSON object that is a or has a geometry or collection of geometryies.
    :param start_date: An optional start date.
    :param end_date: An optional end date.
    :param agg_methods: Spatial aggregation methods for geometries that cover a spatial area.
    :param max_valids: Optional number of valid points.
           If it is None (default), also missing values are returned as NaN;
           if it is -1 only valid values are returned;
           if it is a positive integer, the most recent valid values are returned.
    :param incl_ancillary_vars: For point geometries, include values of ancillary variables, if any.
    :return: Time-series data structure.
    """
    agg_methods = timeseries.normalize_agg_methods(
        agg_methods, exception_type=ServiceBadRequestError)

    dataset = ctx.get_time_series_dataset(ds_name, var_name=var_name)
    geo_json_geometries, is_collection = _to_geo_json_geometries(geo_json)
    geometries = _to_shapely_geometries(geo_json_geometries)

    with measure_time() as time_result:
        results = _get_time_series_for_geometries(
            dataset,
            var_name,
            geometries,
            start_date=start_date,
            end_date=end_date,
            agg_methods=agg_methods,
            max_valids=max_valids,
            incl_ancillary_vars=incl_ancillary_vars)

    if ctx.trace_perf:
        LOG.info(
            f'get_time_series: dataset id {ds_name}, variable {var_name}, '
            f'{len(results)} x {len(results[0])} values, took {time_result.duration} seconds'
        )

    return results[0] if not is_collection and len(results) == 1 else results
示例#18
0
def gen(request: Optional[str],
        dataset_name: Optional[str],
        band_names: Optional[Tuple],
        tile_size: Optional[str],
        geometry: Optional[str],
        spatial_res: Optional[float],
        crs: Optional[str],
        time_range: Optional[str],
        time_period: Optional[str],
        time_tolerance: Optional[str],
        output_path: Optional[str],
        four_d: bool,
        verbose: bool):
    """
    Generate a data cube from SENTINEL Hub.

    By default, the command will create a Zarr dataset with 3D arrays
    for each band e.g. "B01", "B02" with dimensions "time", "lat", "lon".
    Use option "--4d" to write a single 4D array "band_data"
    with dimensions "time", "lat", "lon", "band".

    Please use command "xcube sh req" to generate example request files that can be passed as REQUEST.
    REQUEST may have JSON or YAML format.
    You can also pipe a JSON request into this command. In this case
    """
    import json
    import os.path
    import sys
    import xarray as xr
    from xcube.core.dsio import write_dataset
    from xcube.util.perf import measure_time
    from xcube_sh.config import CubeConfig
    from xcube_sh.observers import Observers
    from xcube_sh.sentinelhub import SentinelHub
    from xcube_sh.chunkstore import SentinelHubChunkStore

    if request:
        request_dict = _load_request(request)
    elif not sys.stdin.isatty():
        request_dict = json.load(sys.stdin)
    else:
        request_dict = {}

    cube_config_dict = request_dict.get('cube_config', {})
    _overwrite_config_params(cube_config_dict,
                             dataset_name=dataset_name,
                             band_names=band_names if band_names else None,  # because of multiple=True
                             tile_size=tile_size,
                             geometry=geometry,
                             spatial_res=spatial_res,
                             crs=crs,
                             time_range=time_range,
                             time_period=time_period,
                             time_tolerance=time_tolerance,
                             four_d=four_d)

    input_config_dict = request_dict.get('input_config', {})
    if 'datastore_id' in input_config_dict:
        input_config_dict = dict(input_config_dict)
        datastore_id = input_config_dict.pop('datastore_id')
        if datastore_id != 'sentinelhub':
            warnings.warn(f'Unknown datastore_id={datastore_id!r} encountered in request. Ignoring it...')
    # _overwrite_config_params(input_config_dict, ...)
    # TODO: validate input_config_dict

    output_config_dict = request_dict.get('output_config', {})
    _overwrite_config_params(output_config_dict,
                             path=output_path)
    # TODO: validate output_config_dict

    cube_config = CubeConfig.from_dict(cube_config_dict,
                                       exception_type=click.ClickException)

    if 'path' in output_config_dict:
        output_path = output_config_dict.pop('path')
    else:
        output_path = DEFAULT_GEN_OUTPUT_PATH
    if not _is_bucket_url(output_path) and os.path.exists(output_path):
        raise click.ClickException(f'Output {output_path} already exists. Move it away first.')

    sentinel_hub = SentinelHub(**input_config_dict)

    print(f'Writing cube to {output_path}...')

    with measure_time() as cm:
        store = SentinelHubChunkStore(sentinel_hub, cube_config)
        request_collector = Observers.request_collector()
        store.add_observer(request_collector)
        if verbose:
            store.add_observer(Observers.request_dumper())
        cube = xr.open_zarr(store)
        if _is_bucket_url(output_path):
            client_kwargs = {k: output_config_dict.pop(k)
                             for k in ('provider_access_key_id', 'provider_secret_access_key')
                             if k in output_config_dict}
            write_dataset(cube, output_path, format_name='zarr', client_kwargs=client_kwargs, **output_config_dict)
        else:
            write_dataset(cube, output_path, **output_config_dict)

    print(f"Cube written to {output_path}, took {'%.2f' % cm.duration} seconds.")

    if verbose:
        request_collector.stats.dump()