def write_data(self, data: Any, data_id: str = None, writer_id: str = None, replace: bool = False, **write_params) -> str: assert_instance(data, (xr.Dataset, MultiLevelDataset)) if not writer_id: if isinstance(data, MultiLevelDataset): predicate = get_data_accessor_predicate( type_specifier=TYPE_SPECIFIER_MULTILEVEL_DATASET, format_id='levels', storage_id=_STORAGE_ID) elif isinstance(data, xr.Dataset): predicate = get_data_accessor_predicate( type_specifier=TYPE_SPECIFIER_DATASET, format_id='zarr', storage_id=_STORAGE_ID) else: raise DataStoreError(f'Unsupported data type "{type(data)}"') extensions = find_data_writer_extensions(predicate=predicate) writer_id = extensions[0].name data_id = self._ensure_valid_data_id(data_id, data) path = self._resolve_data_id_to_path(data_id) self._new_s3_writer(writer_id).write_data(data, data_id=path, replace=replace, **write_params) self.register_data(data_id, data) return data_id
def test_data_accessor_predicate(self): def ext(name: str) -> Extension: return Extension(point='test', name=name, component=object()) p = get_data_accessor_predicate() self.assertEqual(True, p(ext('dataset:zarr:s3'))) p = get_data_accessor_predicate(type_specifier='dataset') self.assertEqual(True, p(ext('dataset:zarr:s3'))) self.assertEqual(False, p(ext('dataset[cube]:zarr:s3'))) self.assertEqual(False, p(ext('mldataset:levels:s3'))) self.assertEqual(False, p(ext('mldataset[cube]:levels:s3'))) self.assertEqual(False, p(ext('geodataframe:geojson:posix'))) p = get_data_accessor_predicate(type_specifier='mldataset[cube]') self.assertEqual(False, p(ext('dataset:zarr:s3'))) self.assertEqual(False, p(ext('dataset[cube]:zarr:s3'))) self.assertEqual(False, p(ext('mldataset:levels:s3'))) self.assertEqual(True, p(ext('mldataset[cube]:levels:s3'))) self.assertEqual(False, p(ext('geodataframe:geojson:posix'))) p = get_data_accessor_predicate(format_id='levels') self.assertEqual(False, p(ext('dataset:zarr:s3'))) self.assertEqual(False, p(ext('dataset[cube]:zarr:s3'))) self.assertEqual(True, p(ext('mldataset:levels:s3'))) self.assertEqual(True, p(ext('mldataset[cube]:levels:s3'))) self.assertEqual(False, p(ext('geodataframe:geojson:posix'))) p = get_data_accessor_predicate(storage_id='posix') self.assertEqual(False, p(ext('dataset:zarr:s3'))) self.assertEqual(False, p(ext('mldataset:levels:s3'))) self.assertEqual(True, p(ext('geodataframe:geojson:posix'))) p = get_data_accessor_predicate(type_specifier='dataset') self.assertEqual(True, p(ext('*:*:memory'))) p = get_data_accessor_predicate(format_id='levels') self.assertEqual(True, p(ext('*:*:memory'))) p = get_data_accessor_predicate(storage_id='memory') self.assertEqual(True, p(ext('*:*:memory'))) p = get_data_accessor_predicate(storage_id='posix') self.assertEqual(False, p(ext('*:*:memory'))) with self.assertRaises(DataStoreError) as cm: p(ext('geodataframe,geojson:posix')) self.assertEqual( 'Illegal data opener/writer extension name "geodataframe,geojson:posix"', f'{cm.exception}')
def get_write_data_params_schema(self, writer_id: str = None ) -> JsonObjectSchema: if not writer_id: extensions = find_data_writer_extensions( predicate=get_data_accessor_predicate(type_specifier='dataset', storage_id=_STORAGE_ID)) writer_id = extensions[0].name return self._new_s3_writer(writer_id).get_write_data_params_schema()
def get_data_writer_ids(self, type_specifier: str = None) -> Tuple[str, ...]: if type_specifier: type_specifier = TypeSpecifier.normalize(type_specifier) if type_specifier == TYPE_SPECIFIER_ANY: type_specifier = None self._assert_valid_type_specifier(type_specifier) extensions = find_data_writer_extensions( predicate=get_data_accessor_predicate( type_specifier=type_specifier, storage_id=_STORAGE_ID)) return tuple(ext.name for ext in extensions)
def get_write_data_params_schema(self, writer_id: str = None ) -> JsonObjectSchema: if not writer_id: extensions = find_data_writer_extensions( predicate=get_data_accessor_predicate( type_specifier='dataset', format_id=_DEFAULT_FORMAT_ID, storage_id=_STORAGE_ID)) assert extensions writer_id = extensions[0].name return new_data_writer(writer_id).get_write_data_params_schema()
def get_data_opener_ids(self, data_id: str = None, type_specifier: str = None) -> Tuple[str, ...]: if type_specifier: type_specifier = TypeSpecifier.normalize(type_specifier) if type_specifier == TYPE_SPECIFIER_ANY: type_specifier = None self._assert_valid_type_specifier(type_specifier) if not type_specifier and data_id: type_specifier, _, _ = self._get_accessor_id_parts(data_id) return tuple(ext.name for ext in find_data_opener_extensions( predicate=get_data_accessor_predicate( type_specifier=type_specifier, storage_id=_STORAGE_ID)))
def get_open_data_params_schema(self, data_id: str = None, opener_id: str = None) -> JsonObjectSchema: if not opener_id and data_id: opener_id = self._get_opener_id(data_id) if not opener_id: extensions = find_data_opener_extensions( predicate=get_data_accessor_predicate( type_specifier='dataset', format_id=_DEFAULT_FORMAT_ID, storage_id=_STORAGE_ID)) assert extensions opener_id = extensions[0].name return self._new_s3_opener(opener_id).get_open_data_params_schema( data_id=data_id)
def _get_accessor_extensions(self, data_id: str, get_data_accessor_extensions, require=True) -> List[Extension]: accessor_id_parts = self._get_accessor_id_parts(data_id, require=require) if not accessor_id_parts: return [] type_specifier, format_id, storage_id = accessor_id_parts predicate = get_data_accessor_predicate(type_specifier=type_specifier, format_id=format_id, storage_id=storage_id) extensions = get_data_accessor_extensions(predicate) if not extensions: if require: raise DataStoreError( f'No accessor found for data resource "{data_id}"') return [] return extensions