Exemplo n.º 1
0
 def add_store_config(self, store_instance_id: str,
                      store_config: DataStoreConfig):
     assert_instance(store_instance_id, str, 'store_instance_id')
     assert_instance(store_config, DataStoreConfig, 'store_config')
     if store_instance_id in self._instances:
         self._instances[store_instance_id].close()
     self._instances[store_instance_id] = DataStoreInstance(store_config)
Exemplo n.º 2
0
Arquivo: s3.py Projeto: micder/xcube
 def write_data(self,
                data: Any,
                data_id: str = None,
                writer_id: str = None,
                replace: bool = False,
                **write_params) -> str:
     assert_instance(data, (xr.Dataset, MultiLevelDataset))
     if not writer_id:
         if isinstance(data, MultiLevelDataset):
             predicate = get_data_accessor_predicate(
                 type_specifier=TYPE_SPECIFIER_MULTILEVEL_DATASET,
                 format_id='levels',
                 storage_id=_STORAGE_ID)
         elif isinstance(data, xr.Dataset):
             predicate = get_data_accessor_predicate(
                 type_specifier=TYPE_SPECIFIER_DATASET,
                 format_id='zarr',
                 storage_id=_STORAGE_ID)
         else:
             raise DataStoreError(f'Unsupported data type "{type(data)}"')
         extensions = find_data_writer_extensions(predicate=predicate)
         writer_id = extensions[0].name
     data_id = self._ensure_valid_data_id(data_id, data)
     path = self._resolve_data_id_to_path(data_id)
     self._new_s3_writer(writer_id).write_data(data,
                                               data_id=path,
                                               replace=replace,
                                               **write_params)
     self.register_data(data_id, data)
     return data_id
Exemplo n.º 3
0
 def write_data(self,
                data: xr.Dataset,
                data_id: str,
                replace=False,
                **write_params):
     assert_instance(data, xr.Dataset, 'data')
     data.to_zarr(data_id, mode='w' if replace else None, **write_params)
Exemplo n.º 4
0
 def write_data(self,
                data: xr.Dataset,
                data_id: str,
                replace=False,
                **write_params):
     assert_instance(data, xr.Dataset, 'data')
     data.to_netcdf(data_id, **write_params)
Exemplo n.º 5
0
    def write_data(self,
                   data: xr.Dataset,
                   data_id: str,
                   replace=False,
                   **write_params) -> str:
        assert_instance(data, xr.Dataset, name='data')
        assert_instance(data_id, str, name='data_id')
        fs, root, write_params = self.load_fs(write_params)
        if not replace and fs.exists(data_id):
            raise DataStoreError(f'Data resource {data_id} already exists')

        # This doesn't yet work as expected with fsspec and netcdf:
        # engine = write_params.pop('engine', 'scipy')
        # with fs.open(data_id, 'wb') as file:
        #     data.to_netcdf(file, engine=engine, **write_params)

        is_local = is_local_fs(fs)
        if is_local:
            file_path = data_id
        else:
            _, file_path = new_temp_file(suffix='.nc')
        engine = write_params.pop('engine', 'netcdf4')
        data.to_netcdf(file_path, engine=engine, **write_params)
        if not is_local:
            fs.put_file(file_path, data_id)
        return data_id
Exemplo n.º 6
0
def _assert_valid_xy_coords(xy_coords: Any):
    assert_instance(xy_coords, xr.DataArray, name='xy_coords')
    assert_true(
        xy_coords.ndim == 3 and xy_coords.shape[0] == 2
        and xy_coords.shape[1] >= 2 and xy_coords.shape[2] >= 2,
        'xy_coords must have dimensions'
        ' (2, height, width) with height >= 2 and width >= 2')
Exemplo n.º 7
0
 def __init__(self,
              cube_config: CubeConfig,
              store_pool: DataStorePool = None):
     assert_instance(cube_config, CubeConfig, 'cube_config')
     if store_pool is not None:
         assert_instance(store_pool, DataStorePool, 'store_pool')
     self._cube_config = cube_config
     self._store_pool = store_pool
Exemplo n.º 8
0
 def _inject_attrs(self, attrs: Dict[str, Any]):
     assert_instance(attrs, dict, name='attrs')
     schema = self.get_schema()
     assert_true(isinstance(schema, JsonObjectSchema),
                 message='schema must be a JSON object schema')
     all_attrs = {k: None for k in (schema.properties or {}).keys()}
     all_attrs.update(attrs)
     JsonObjectSchema.inject_attrs(self, all_attrs)
Exemplo n.º 9
0
 def __init__(self, store_configs: Dict[str, DataStoreConfig] = None):
     if store_configs is not None:
         assert_instance(store_configs, dict, name='stores_configs')
         self._instances: Dict[str, DataStoreInstance] = {
             k: DataStoreInstance(v)
             for k, v in store_configs.items()
         }
     else:
         self._instances: Dict[str, DataStoreInstance] = {}
Exemplo n.º 10
0
 def __init__(self,
              request: CubeGeneratorRequest,
              store_pool: DataStorePool = None):
     assert_instance(request, CubeGeneratorRequest, name='request')
     if store_pool is not None:
         assert_instance(store_pool, DataStorePool, name='store_pool')
     self._request: CubeGeneratorRequest = request
     self._store_pool: Optional[DataStorePool] = store_pool
     self._dataset_descriptors: Optional[Sequence[DatasetDescriptor]] = None
Exemplo n.º 11
0
 def __init__(self,
              input_configs: Sequence[InputConfig],
              store_pool: DataStorePool = None):
     assert_true(len(input_configs) > 0,
                 'At least one input must be given')
     if store_pool is not None:
         assert_instance(store_pool, DataStorePool, 'store_pool')
     self._input_configs = input_configs
     self._store_pool = store_pool
Exemplo n.º 12
0
 def __init__(self, store_configs: DataStoreConfigDict = None):
     if store_configs is not None:
         assert_instance(store_configs, dict, name='stores_configs')
     else:
         store_configs = {}
     self._instances: DataStoreInstanceDict = {
         k: DataStoreInstance(v)
         for k, v in store_configs.items()
     }
Exemplo n.º 13
0
 def __init__(self,
              base_dataset: xr.Dataset,
              tile_grid: TileGrid = None,
              ds_id: str = None):
     assert_instance(base_dataset, xr.Dataset, name='base_dataset')
     self._base_cube, grid_mapping, _ = decode_cube(base_dataset,
                                                    force_non_empty=True)
     super().__init__(grid_mapping=grid_mapping,
                      tile_grid=tile_grid,
                      ds_id=ds_id)
Exemplo n.º 14
0
 def __init__(self,
              service_config: ServiceConfig,
              progress_period: float = 1.0,
              raise_on_error: bool = False,
              verbosity: int = 0):
     super().__init__(raise_on_error=raise_on_error, verbosity=verbosity)
     assert_instance(service_config, ServiceConfig, 'service_config')
     assert_instance(progress_period, (int, float), 'progress_period')
     self._service_config: ServiceConfig = service_config
     self._access_token: Optional[str] = service_config.access_token
     self._progress_period: float = progress_period
Exemplo n.º 15
0
 def __init__(self,
              store_id: str,
              store_params: Dict[str, Any] = None,
              title: str = None,
              description: str = None):
     assert_given(store_id, name='store_id')
     if store_params is not None:
         assert_instance(store_params, dict, name='store_params')
     self._store_id = store_id
     self._store_params = store_params
     self._title = title
     self._description = description
Exemplo n.º 16
0
 def __init__(self,
              other: Union[Mapping, MutableMapping],
              logger: Logger = LOG,
              name: Optional[str] = None):
     assert_instance(other, Mapping)
     self._other = other
     self._measure_time = measure_time_cm(logger=logger)
     self._name = name or 'chunk_store'
     if hasattr(other, 'listdir'):
         setattr(self, 'listdir', self.__listdir)
     if hasattr(other, 'getsize'):
         setattr(self, 'getsize', self.__getsize)
Exemplo n.º 17
0
 def __init__(self,
              *args,
              status_code: Optional[int] = None,
              remote_traceback: Optional[List[str]] = None,
              remote_output: Optional[List[str]] = None,
              **kwargs):
     # noinspection PyArgumentList
     super().__init__(*args, **kwargs)
     if status_code is not None:
         assert_instance(status_code, int, 'status_code')
     self._status_code = status_code
     self._remote_traceback = remote_traceback
     self._remote_output = remote_output
Exemplo n.º 18
0
 def __init__(self,
              fs: Optional[fsspec.AbstractFileSystem] = None,
              root: str = '',
              max_depth: Optional[int] = 1,
              read_only: bool = False):
     if fs is not None:
         assert_instance(fs, fsspec.AbstractFileSystem, name='fs')
     self._fs = fs
     self._raw_root: str = root or ''
     self._root: Optional[str] = None
     self._max_depth = max_depth
     self._read_only = read_only
     self._lock = RLock()
Exemplo n.º 19
0
    def __init__(self,
                 store_pool: DataStorePool = None,
                 raise_on_error: bool = False,
                 verbosity: int = 0):
        super().__init__(raise_on_error=raise_on_error, verbosity=verbosity)
        if store_pool is not None:
            assert_instance(store_pool, DataStorePool, 'store_pool')

        self._store_pool = store_pool if store_pool is not None \
            else DataStorePool()
        self._generated_data_id: Optional[str] = None
        self._generated_cube: Optional[xr.Dataset] = None
        self._generated_gm: Optional[GridMapping] = None
Exemplo n.º 20
0
 def get_store_instance_id(self,
                           store_config: DataStoreConfig,
                           strict_check: bool = False) -> Optional[str]:
     assert_instance(store_config, DataStoreConfig, 'store_config')
     for id, instance in self._instances.items():
         if strict_check:
             if instance.store_config == store_config:
                 return id
         else:
             if instance.store_config.store_id == store_config.store_id and \
                 instance.store_config.store_params == \
                     store_config.store_params:
                 return id
     return None
Exemplo n.º 21
0
def _assert_valid(obj: Optional[Dict[str, Any]],
                  schema: Optional[JsonObjectSchema], name: str, kind: str,
                  validator: Callable[[Dict[str, Any], JsonObjectSchema],
                                      Any]):
    if obj is None:
        return
    assert_instance(obj, dict, name=name)
    if schema is not None:
        assert_instance(schema, JsonObjectSchema, name=f'{name}_schema')
        try:
            validator(obj, schema)
        except jsonschema.ValidationError as e:
            raise DataStoreError(f'Invalid {kind}'
                                 f' detected: {e.message}') from e
Exemplo n.º 22
0
 def write_data(self, data: gpd.GeoDataFrame, data_id: str,
                **write_params) -> str:
     # TODO: implement me correctly,
     #  this is not valid for shapefile AND geojson
     assert_instance(data, (gpd.GeoDataFrame, pd.DataFrame), 'data')
     fs, root, write_params = self.load_fs(write_params)
     is_local = is_local_fs(fs)
     if is_local:
         file_path = data_id
     else:
         _, file_path = new_temp_file()
     data.to_file(file_path, driver=self.get_driver_name(), **write_params)
     if not is_local:
         fs.put_file(file_path, data_id)
     return data_id
Exemplo n.º 23
0
 def __init__(self,
              dtype: Type,
              alias: Union[None, str, Sequence[str]] = None):
     """
     :param dtype: The Python data type.
     :param alias: An alias name or list of aliases.
     """
     assert_instance(dtype, type, name='dtype')
     if alias is not None:
         assert_instance(alias, (str, tuple, list), name='alias')
     self._dtype = dtype
     self._aliases = (([] if alias is None else
                       [alias] if isinstance(alias, str) else list(alias)) +
                      [self._get_fully_qualified_type_name(dtype)])
     self._alias_set = set(self._aliases)  # for faster lookup
Exemplo n.º 24
0
    def open_data(self, data_id: str, **open_params) -> xr.Dataset:
        assert_instance(data_id, str, name='data_id')
        fs, root, open_params = self.load_fs(open_params)

        # This doesn't yet work as expected with fsspec and netcdf:
        # engine = open_params.pop('engine', 'scipy')
        # with fs.open(data_id, 'rb') as file:
        #     return xr.open_dataset(file, engine=engine, **open_params)

        is_local = is_local_fs(fs)
        if is_local:
            file_path = data_id
        else:
            _, file_path = new_temp_file(suffix='.nc')
            fs.get_file(data_id, file_path)
        engine = open_params.pop('engine', 'netcdf4')
        return xr.open_dataset(file_path, engine=engine, **open_params)
Exemplo n.º 25
0
 def __init__(self,
              status: str,
              status_code: Optional[int] = None,
              result: Optional[R] = None,
              message: Optional[str] = None,
              output: Optional[Sequence[str]] = None,
              traceback: Optional[Sequence[str]] = None,
              versions: Optional[Dict[str, str]] = None):
     assert_instance(status, str, name='status')
     assert_in(status, STATUS_IDS, name='status')
     self.status = status
     self.status_code = status_code
     self.result = result
     self.message = message if message else None
     self.output = list(output) if output else None
     self.traceback = list(traceback) if traceback else None
     self.versions = dict(versions) if versions else None
Exemplo n.º 26
0
    def __init__(self,
                 tile_size: int = 256,
                 min_level: Optional[int] = None,
                 max_level: Optional[int] = None):
        """

        :param tile_size:
        :param min_level:
        :param max_level:
        """
        assert_instance(tile_size, int, name='tile_size')
        super().__init__(tile_size=(tile_size, tile_size),
                         num_level_0_tiles=(2, 1),
                         crs=GEOGRAPHIC_CRS,
                         max_resolution=180. / tile_size,
                         extent=(-180., -90., 180., 90.),
                         min_level=min_level,
                         max_level=max_level)
Exemplo n.º 27
0
 def write_data(self,
                data: xr.Dataset,
                data_id: str,
                replace=False,
                **write_params):
     assert_instance(data, xr.Dataset, 'data')
     s3 = self._s3
     if s3 is None:
         s3, write_params = self.consume_s3fs_params(write_params)
     bucket_name, write_params = self.consume_bucket_name_param(
         write_params)
     try:
         data.to_zarr(s3fs.S3Map(
             root=f'{bucket_name}/{data_id}' if bucket_name else data_id,
             s3=s3,
             check=False),
                      mode='w' if replace else None,
                      **write_params)
     except ValueError as e:
         raise DataStoreError(f'{e}') from e
Exemplo n.º 28
0
 def open_data(self, data_id: str, **open_params) -> xr.Dataset:
     assert_instance(data_id, str, name='data_id')
     fs, root, open_params = self.load_fs(open_params)
     zarr_store = fs.get_mapper(data_id)
     cache_size = open_params.pop('cache_size', None)
     if isinstance(cache_size, int) and cache_size > 0:
         zarr_store = zarr.LRUStoreCache(zarr_store, max_size=cache_size)
     log_access = open_params.pop('log_access', None)
     if log_access:
         zarr_store = LoggingStore(zarr_store,
                                   name=f'zarr_store({data_id!r})')
     consolidated = open_params.pop('consolidated',
                                    fs.exists(f'{data_id}/.zmetadata'))
     try:
         return xr.open_zarr(zarr_store,
                             consolidated=consolidated,
                             **open_params)
     except ValueError as e:
         raise DataStoreError(f'Failed to open'
                              f' dataset {data_id!r}: {e}') from e
Exemplo n.º 29
0
 def __init__(self,
              path: str,
              sub_path: str = None,
              includes: Collection[str] = None,
              excludes: Collection[str] = None,
              storage_params: Dict[str, Any] = None):
     assert_instance(path, str, 'path')
     assert_given(path, 'path')
     if sub_path is not None:
         assert_instance(sub_path, str, 'sub_path')
     self._path = path
     self._sub_path = sub_path
     self._storage_params = dict(
         storage_params) if storage_params is not None else None
     self._includes = list(includes) if includes is not None else None
     self._excludes = list(excludes) if excludes is not None else None
     # computed members
     self._include_patterns = _translate_patterns(includes or [])
     self._exclude_patterns = _translate_patterns(excludes or [])
     # cached, computed members
     self._details: Optional[_FileSetDetails] = None
Exemplo n.º 30
0
 def __init__(self,
              source_images: Sequence[TiledImage],
              image_id: str = None,
              encode: bool = False,
              format: str = None,
              tile_cache: Cache = None,
              trace_perf: bool = False):
     assert_instance(source_images, (list, tuple), name='source_images')
     assert_true(len(source_images) == 3,
                 message='source_images must have length 3')
     proto_source_image = source_images[0]
     super().__init__(size=proto_source_image.size,
                      tile_size=proto_source_image.tile_size,
                      num_tiles=proto_source_image.num_tiles,
                      image_id=image_id,
                      format=format,
                      mode='RGBA',
                      tile_cache=tile_cache,
                      trace_perf=trace_perf)
     self._source_images = tuple(source_images)
     self._encode = encode