def __init__(self, prefix: str = None, base_dir: str = None, config: Config = None, trace_perf: bool = DEFAULT_TRACE_PERF, tile_comp_mode: int = None, tile_cache_capacity: int = None, ml_dataset_openers: Dict[str, MultiLevelDatasetOpener] = None): self._prefix = normalize_prefix(prefix) self._base_dir = os.path.abspath(base_dir or '') self._config = config if config is not None else dict() self._config_mtime = 0.0 self._place_group_cache = dict() self._feature_index = 0 self._ml_dataset_openers = ml_dataset_openers self._tile_comp_mode = tile_comp_mode self._trace_perf = trace_perf self._lock = threading.RLock() self._dataset_cache = dict( ) # contains tuples of form (MultiLevelDataset, ds_descriptor) self._image_cache = dict() if tile_cache_capacity and tile_cache_capacity > 0: self._tile_cache = Cache(MemoryCacheStore(), capacity=tile_cache_capacity, threshold=0.75) else: self._tile_cache = None
def test_load_from_key(self): cache_store = TracingCacheStore() cache = Cache(store=cache_store, capacity=1000) cache_store.trace = '' self.assertEqual(cache.get_value('k1'), None) self.assertEqual(cache.size, 0) self.assertEqual(cache_store.trace, 'can_load_from_key(k1);') cache_store.trace = '' self.assertEqual(cache.get_value('k5'), 'yyyy') self.assertEqual(cache.size, 600) self.assertEqual( cache_store.trace, 'can_load_from_key(k5);load_from_key(k5);restore(k5, S/yyyy);')
class ServiceContext: def __init__(self, prefix: str = None, base_dir: str = None, config: Config = None, trace_perf: bool = DEFAULT_TRACE_PERF, tile_comp_mode: int = None, tile_cache_capacity: int = None, ml_dataset_openers: Dict[str, MultiLevelDatasetOpener] = None): self._prefix = normalize_prefix(prefix) self._base_dir = os.path.abspath(base_dir or '') self._config = config if config is not None else dict() self._config_mtime = 0.0 self._place_group_cache = dict() self._feature_index = 0 self._ml_dataset_openers = ml_dataset_openers self._tile_comp_mode = tile_comp_mode self._trace_perf = trace_perf self._lock = threading.RLock() self._dataset_cache = dict( ) # contains tuples of form (MultiLevelDataset, ds_descriptor) self._image_cache = dict() if tile_cache_capacity and tile_cache_capacity > 0: self._tile_cache = Cache(MemoryCacheStore(), capacity=tile_cache_capacity, threshold=0.75) else: self._tile_cache = None @property def config(self) -> Config: return self._config @config.setter def config(self, config: Config): if self._config: with self._lock: # Close all datasets for ml_dataset, _ in self._dataset_cache.values(): # noinspection PyBroadException try: ml_dataset.close() except Exception: pass # Clear all caches if self._dataset_cache: self._dataset_cache.clear() if self._image_cache: self._image_cache.clear() if self._tile_cache: self._tile_cache.clear() if self._place_group_cache: self._place_group_cache.clear() self._config = config @property def config_mtime(self) -> float: return self._config_mtime @config_mtime.setter def config_mtime(self, value: float): self._config_mtime = value @property def base_dir(self) -> str: return self._base_dir @property def tile_comp_mode(self) -> int: return self._tile_comp_mode @property def dataset_cache( self) -> Dict[str, Tuple[MultiLevelDataset, Dict[str, Any]]]: return self._dataset_cache @property def image_cache(self) -> Dict[str, Any]: return self._image_cache @property def tile_cache(self) -> Optional[Cache]: return self._tile_cache @property def trace_perf(self) -> bool: return self._trace_perf def get_service_url(self, base_url, *path: str): if self._prefix: return base_url + '/' + self._prefix + '/' + '/'.join(path) else: return base_url + '/' + '/'.join(path) def get_ml_dataset(self, ds_id: str) -> MultiLevelDataset: ml_dataset, _ = self._get_dataset_entry(ds_id) return ml_dataset def get_dataset(self, ds_id: str, expected_var_names: Collection[str] = None) -> xr.Dataset: ml_dataset, _ = self._get_dataset_entry(ds_id) dataset = ml_dataset.base_dataset if expected_var_names: for var_name in expected_var_names: if var_name not in dataset: raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"' ) return dataset def get_variable_for_z(self, ds_id: str, var_name: str, z_index: int) -> xr.DataArray: ml_dataset = self.get_ml_dataset(ds_id) dataset = ml_dataset.get_dataset(ml_dataset.num_levels - 1 - z_index) if var_name not in dataset: raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"') return dataset[var_name] def get_dataset_descriptors(self): dataset_descriptors = self._config.get('Datasets') if not dataset_descriptors: raise ServiceConfigError(f"No datasets configured") return dataset_descriptors def get_dataset_descriptor(self, ds_id: str) -> Dict[str, Any]: dataset_descriptors = self.get_dataset_descriptors() if not dataset_descriptors: raise ServiceConfigError(f"No datasets configured") dataset_descriptor = self.find_dataset_descriptor( dataset_descriptors, ds_id) if dataset_descriptor is None: raise ServiceResourceNotFoundError(f'Dataset "{ds_id}" not found') return dataset_descriptor def get_s3_bucket_mapping(self): s3_bucket_mapping = {} for descriptor in self.get_dataset_descriptors(): ds_id = descriptor.get('Identifier') file_system = descriptor.get('FileSystem', 'local') if file_system == 'local': local_path = descriptor.get('Path') if not os.path.isabs(local_path): local_path = os.path.join(self.base_dir, local_path) local_path = os.path.normpath(local_path) if os.path.isdir(local_path): s3_bucket_mapping[ds_id] = local_path return s3_bucket_mapping def get_tile_grid(self, ds_id: str) -> TileGrid: ml_dataset, _ = self._get_dataset_entry(ds_id) return ml_dataset.tile_grid def get_color_mapping(self, ds_id: str, var_name: str): cmap_cbar, cmap_vmin, cmap_vmax = DEFAULT_CMAP_CBAR, DEFAULT_CMAP_VMIN, DEFAULT_CMAP_VMAX dataset_descriptor = self.get_dataset_descriptor(ds_id) style_name = dataset_descriptor.get('Style', 'default') styles = self._config.get('Styles') if styles: style = None for s in styles: if style_name == s['Identifier']: style = s break # TODO: check color_mappings is not None if style: color_mappings = style.get('ColorMappings') if color_mappings: # TODO: check color_mappings is not None color_mapping = color_mappings.get(var_name) if color_mapping: cmap_vmin, cmap_vmax = color_mapping.get( 'ValueRange', (cmap_vmin, cmap_vmax)) if color_mapping.get('ColorFile') is not None: cmap_cbar = color_mapping.get( 'ColorFile', cmap_cbar) else: cmap_cbar = color_mapping.get( 'ColorBar', cmap_cbar) cmap_cbar, _ = get_cmap(cmap_cbar) return cmap_cbar, cmap_vmin, cmap_vmax else: ds = self.get_dataset(ds_id, expected_var_names=[var_name]) var = ds[var_name] cmap_cbar = var.attrs.get('color_bar_name', cmap_cbar) cmap_vmin = var.attrs.get('color_value_min', cmap_vmin) cmap_vmax = var.attrs.get('color_value_max', cmap_vmax) _LOG.warning( f'color mapping for variable {var_name!r} of dataset {ds_id!r} undefined: using defaults' ) return cmap_cbar, cmap_vmin, cmap_vmax def _get_dataset_entry( self, ds_id: str) -> Tuple[MultiLevelDataset, Dict[str, Any]]: if ds_id not in self._dataset_cache: with self._lock: self._dataset_cache[ds_id] = self._create_dataset_entry(ds_id) return self._dataset_cache[ds_id] def _create_dataset_entry( self, ds_id: str) -> Tuple[MultiLevelDataset, Dict[str, Any]]: dataset_descriptor = self.get_dataset_descriptor(ds_id) ml_dataset = self._open_ml_dataset(dataset_descriptor) return ml_dataset, dataset_descriptor def _open_ml_dataset( self, dataset_descriptor: DatasetDescriptor) -> MultiLevelDataset: fs_type = dataset_descriptor.get('FileSystem', 'local') if self._ml_dataset_openers and fs_type in self._ml_dataset_openers: ml_dataset_opener = self._ml_dataset_openers[fs_type] elif fs_type in _DEFAULT_MULTI_LEVEL_DATASET_OPENERS: ml_dataset_opener = _DEFAULT_MULTI_LEVEL_DATASET_OPENERS[fs_type] else: ds_id = dataset_descriptor.get('Identifier') raise ServiceConfigError( f"Invalid fs={fs_type!r} in dataset descriptor {ds_id!r}") return ml_dataset_opener(self, dataset_descriptor) def get_legend_label(self, ds_name: str, var_name: str): dataset = self.get_dataset(ds_name) if var_name in dataset: ds = self.get_dataset(ds_name) units = ds[var_name].units return units raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_name}"') def get_dataset_place_groups(self, ds_id: str, load_features=False) -> List[Dict]: dataset_descriptor = self.get_dataset_descriptor(ds_id) place_group_id_prefix = f"DS-{ds_id}-" place_groups = [] for k, v in self._place_group_cache.items(): if k.startswith(place_group_id_prefix): place_groups.append(v) if place_groups: return place_groups place_groups = self._load_place_groups(dataset_descriptor.get( "PlaceGroups", []), is_global=False, load_features=load_features) for place_group in place_groups: self._place_group_cache[place_group_id_prefix + place_group["id"]] = place_group return place_groups def get_dataset_place_group(self, ds_id: str, place_group_id: str, load_features=False) -> Dict: place_groups = self.get_dataset_place_groups(ds_id, load_features=False) for place_group in place_groups: if place_group_id == place_group['id']: if load_features: self._load_place_group_features(place_group) return place_group raise ServiceResourceNotFoundError( f'Place group "{place_group_id}" not found') def get_global_place_groups(self, load_features=False) -> List[Dict]: return self._load_place_groups(self._config.get("PlaceGroups", []), is_global=True, load_features=load_features) def get_global_place_group(self, place_group_id: str, load_features: bool = False) -> Dict: place_group_descriptor = self._get_place_group_descriptor( place_group_id) return self._load_place_group(place_group_descriptor, is_global=True, load_features=load_features) def _get_place_group_descriptor(self, place_group_id: str) -> Dict: place_group_descriptors = self._config.get("PlaceGroups", []) for place_group_descriptor in place_group_descriptors: if place_group_descriptor['Identifier'] == place_group_id: return place_group_descriptor raise ServiceResourceNotFoundError( f'Place group "{place_group_id}" not found') def _load_place_groups(self, place_group_descriptors: Dict, is_global: bool = False, load_features: bool = False) -> List[Dict]: place_groups = [] for place_group_descriptor in place_group_descriptors: place_group = self._load_place_group(place_group_descriptor, is_global=is_global, load_features=load_features) place_groups.append(place_group) return place_groups def _load_place_group(self, place_group_descriptor: Dict[str, Any], is_global: bool = False, load_features: bool = False) -> Dict[str, Any]: place_group_id = place_group_descriptor.get("PlaceGroupRef") if place_group_id: if is_global: raise ServiceError( "'PlaceGroupRef' cannot be used in a global place group") if len(place_group_descriptor) > 1: raise ServiceError( "'PlaceGroupRef' if present, must be the only entry in a 'PlaceGroups' item" ) return self.get_global_place_group(place_group_id, load_features=load_features) place_group_id = place_group_descriptor.get("Identifier") if not place_group_id: raise ServiceError( "Missing 'Identifier' entry in a 'PlaceGroups' item") if place_group_id in self._place_group_cache: place_group = self._place_group_cache[place_group_id] else: place_group_title = place_group_descriptor.get( "Title", place_group_id) place_path_wc = place_group_descriptor.get("Path") if not place_path_wc: raise ServiceError( "Missing 'Path' entry in a 'PlaceGroups' item") if not os.path.isabs(place_path_wc): place_path_wc = os.path.join(self._base_dir, place_path_wc) source_paths = glob.glob(place_path_wc) source_encoding = place_group_descriptor.get( "CharacterEncoding", "utf-8") property_mapping = place_group_descriptor.get("PropertyMapping") place_group = dict(type="FeatureCollection", features=None, id=place_group_id, title=place_group_title, propertyMapping=property_mapping, sourcePaths=source_paths, sourceEncoding=source_encoding) sub_place_group_configs = place_group_descriptor.get("Places") if sub_place_group_configs: raise ServiceError( "Invalid 'Places' entry in a 'PlaceGroups' item: not implemented yet" ) # sub_place_group_descriptors = place_group_config.get("Places") # if sub_place_group_descriptors: # sub_place_groups = self._load_place_groups(sub_place_group_descriptors) # place_group["placeGroups"] = sub_place_groups self._place_group_cache[place_group_id] = place_group if load_features: self._load_place_group_features(place_group) return place_group def _load_place_group_features( self, place_group: Dict[str, Any]) -> List[Dict[str, Any]]: features = place_group.get('features') if features is not None: return features source_files = place_group['sourcePaths'] source_encoding = place_group['sourceEncoding'] features = [] for source_file in source_files: with fiona.open(source_file, encoding=source_encoding) as feature_collection: for feature in feature_collection: self._remove_feature_id(feature) feature["id"] = str(self._feature_index) self._feature_index += 1 features.append(feature) place_group['features'] = features return features @classmethod def _remove_feature_id(cls, feature: Dict): cls._remove_id(feature) if "properties" in feature: cls._remove_id(feature["properties"]) @classmethod def _remove_id(cls, properties: Dict): if "id" in properties: del properties["id"] if "ID" in properties: del properties["ID"] def get_dataset_and_coord_variable(self, ds_name: str, dim_name: str): ds = self.get_dataset(ds_name) if dim_name not in ds.coords: raise ServiceResourceNotFoundError( f'Dimension {dim_name!r} has no coordinates in dataset {ds_name!r}' ) return ds, ds.coords[dim_name] @classmethod def get_var_indexers(cls, ds_name: str, var_name: str, var: xr.DataArray, dim_names: List[str], params: RequestParams) -> Dict[str, Any]: var_indexers = dict() for dim_name in dim_names: if dim_name not in var.coords: raise ServiceBadRequestError( f'dimension {dim_name!r} of variable {var_name!r} of dataset {ds_name!r} has no coordinates' ) coord_var = var.coords[dim_name] dim_value_str = params.get_query_argument(dim_name, None) try: if dim_value_str is None: var_indexers[dim_name] = coord_var.values[0] elif dim_value_str == 'current': var_indexers[dim_name] = coord_var.values[-1] elif np.issubdtype(coord_var.dtype, np.floating): var_indexers[dim_name] = float(dim_value_str) elif np.issubdtype(coord_var.dtype, np.integer): var_indexers[dim_name] = int(dim_value_str) elif np.issubdtype(coord_var.dtype, np.datetime64): if '/' in dim_value_str: date_str_1, date_str_2 = dim_value_str.split( '/', maxsplit=1) var_indexer_1 = pd.to_datetime(date_str_1) var_indexer_2 = pd.to_datetime(date_str_2) var_indexers[dim_name] = var_indexer_1 + ( var_indexer_2 - var_indexer_1) / 2 else: date_str = dim_value_str var_indexers[dim_name] = pd.to_datetime(date_str) else: raise ValueError( f'unable to convert value {dim_value_str!r} to {coord_var.dtype!r}' ) except ValueError as e: raise ServiceBadRequestError( f'{dim_value_str!r} is not a valid value for dimension {dim_name!r} ' f'of variable {var_name!r} of dataset {ds_name!r}') from e return var_indexers @classmethod def find_dataset_descriptor(cls, dataset_descriptors: List[Dict[str, Any]], ds_name: str) -> Optional[Dict[str, Any]]: # Note: can be optimized by dict/key lookup return next( (dsd for dsd in dataset_descriptors if dsd['Identifier'] == ds_name), None)
def test_store_and_restore_and_discard(self): cache_store = TracingCacheStore() cache = Cache(store=cache_store, capacity=1000) self.assertIs(cache.store, cache_store) self.assertEqual(cache.size, 0) self.assertEqual(cache.max_size, 750) cache_store.trace = '' cache.put_value('k1', 'x') self.assertEqual(cache.get_value('k1'), 'x') self.assertEqual(cache.size, 100) self.assertEqual(cache_store.trace, 'store(k1, x);restore(k1, S/x);') cache_store.trace = '' cache.remove_value('k1') self.assertEqual(cache.size, 0) self.assertEqual(cache_store.trace, 'discard(k1, S/x);') cache_store.trace = '' cache_store.trace = '' cache.put_value('k1', 'x') cache.put_value('k1', 'xx') self.assertEqual(cache.get_value('k1'), 'xx') self.assertEqual(cache.size, 200) self.assertEqual( cache_store.trace, 'store(k1, x);discard(k1, S/x);store(k1, xx);restore(k1, S/xx);') cache_store.trace = '' cache.remove_value('k1') self.assertEqual(cache.size, 0) self.assertEqual(cache_store.trace, 'discard(k1, S/xx);') cache_store.trace = '' cache.put_value('k1', 'x') cache.put_value('k2', 'xxx') cache.put_value('k3', 'xx') self.assertEqual(cache.get_value('k1'), 'x') self.assertEqual(cache.get_value('k2'), 'xxx') self.assertEqual(cache.get_value('k3'), 'xx') self.assertEqual(cache.size, 600) self.assertEqual( cache_store.trace, 'store(k1, x);store(k2, xxx);store(k3, xx);' 'restore(k1, S/x);restore(k2, S/xxx);restore(k3, S/xx);') cache_store.trace = '' cache.put_value('k4', 'xxxx') self.assertEqual(cache.size, 600) self.assertEqual( cache_store.trace, 'store(k4, xxxx);discard(k1, S/x);discard(k2, S/xxx);') cache_store.trace = '' cache.clear() self.assertEqual(cache.size, 0)
class ServiceContext: def __init__(self, prefix: str = None, base_dir: str = None, config: Config = None, trace_perf: bool = DEFAULT_TRACE_PERF, tile_comp_mode: int = None, tile_cache_capacity: int = None, ml_dataset_openers: Dict[str, MultiLevelDatasetOpener] = None): self._prefix = normalize_prefix(prefix) self._base_dir = os.path.abspath(base_dir or '') self._config = config if config is not None else dict() self._config_mtime = 0.0 self._place_group_cache = dict() self._feature_index = 0 self._ml_dataset_openers = ml_dataset_openers self._tile_comp_mode = tile_comp_mode self._trace_perf = trace_perf self._lock = threading.RLock() self._dataset_cache = dict( ) # contains tuples of form (MultiLevelDataset, ds_descriptor) self._image_cache = dict() if tile_cache_capacity and tile_cache_capacity > 0: self._tile_cache = Cache(MemoryCacheStore(), capacity=tile_cache_capacity, threshold=0.75) else: self._tile_cache = None @property def config(self) -> Config: return self._config @config.setter def config(self, config: Config): if self._config: with self._lock: # Close all datasets for ml_dataset, _ in self._dataset_cache.values(): # noinspection PyBroadException try: ml_dataset.close() except Exception: pass # Clear all caches if self._dataset_cache: self._dataset_cache.clear() if self._image_cache: self._image_cache.clear() if self._tile_cache: self._tile_cache.clear() if self._place_group_cache: self._place_group_cache.clear() self._config = config @property def config_mtime(self) -> float: return self._config_mtime @config_mtime.setter def config_mtime(self, value: float): self._config_mtime = value @property def base_dir(self) -> str: return self._base_dir @property def tile_comp_mode(self) -> int: return self._tile_comp_mode @property def dataset_cache( self) -> Dict[str, Tuple[MultiLevelDataset, Dict[str, Any]]]: return self._dataset_cache @property def image_cache(self) -> Dict[str, Any]: return self._image_cache @property def tile_cache(self) -> Optional[Cache]: return self._tile_cache @property def trace_perf(self) -> bool: return self._trace_perf @property def access_control(self) -> Dict[str, Any]: return dict(self._config.get('AccessControl', {})) @property def required_scopes(self) -> List[str]: return self.access_control.get('RequiredScopes', []) def get_required_dataset_scopes( self, dataset_descriptor: DatasetDescriptor) -> Set[str]: return self._get_required_scopes(dataset_descriptor, 'read:dataset', 'Dataset', dataset_descriptor['Identifier']) def get_required_variable_scopes(self, dataset_descriptor: DatasetDescriptor, var_name: str) -> Set[str]: return self._get_required_scopes(dataset_descriptor, 'read:variable', 'Variable', var_name) def _get_required_scopes(self, dataset_descriptor: DatasetDescriptor, base_scope: str, value_name: str, value: str) -> Set[str]: base_scope_prefix = base_scope + ':' pattern_scope = base_scope_prefix + '{' + value_name + '}' dataset_access_control = dataset_descriptor.get('AccessControl', {}) dataset_required_scopes = dataset_access_control.get( 'RequiredScopes', []) dataset_required_scopes = set(self.required_scopes + dataset_required_scopes) dataset_required_scopes = { scope for scope in dataset_required_scopes if scope == base_scope or scope.startswith(base_scope_prefix) } if pattern_scope in dataset_required_scopes: dataset_required_scopes.remove(pattern_scope) dataset_required_scopes.add(base_scope_prefix + value) return dataset_required_scopes def get_service_url(self, base_url, *path: str): if self._prefix: return base_url + '/' + self._prefix + '/' + '/'.join(path) else: return base_url + '/' + '/'.join(path) def get_ml_dataset(self, ds_id: str) -> MultiLevelDataset: ml_dataset, _ = self._get_dataset_entry(ds_id) return ml_dataset def set_ml_dataset(self, ml_dataset: MultiLevelDataset): self._set_dataset_entry( (ml_dataset, dict(Identifier=ml_dataset.ds_id, Hidden=True))) def get_dataset(self, ds_id: str, expected_var_names: Collection[str] = None) -> xr.Dataset: ml_dataset, _ = self._get_dataset_entry(ds_id) dataset = ml_dataset.base_dataset if expected_var_names: for var_name in expected_var_names: if var_name not in dataset: raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"' ) return dataset def get_variable_for_z(self, ds_id: str, var_name: str, z_index: int) -> xr.DataArray: ml_dataset = self.get_ml_dataset(ds_id) index = ml_dataset.num_levels - 1 - z_index if index < 0 or index >= ml_dataset.num_levels: raise ServiceResourceNotFoundError( f'Variable "{var_name}" has no z-index {z_index} in dataset "{ds_id}"' ) dataset = ml_dataset.get_dataset(index) if var_name not in dataset: raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"') return dataset[var_name] def get_dataset_descriptors(self): dataset_descriptors = self._config.get('Datasets') if not dataset_descriptors: raise ServiceConfigError(f"No datasets configured") return dataset_descriptors def get_dataset_descriptor(self, ds_id: str) -> Dict[str, Any]: dataset_descriptors = self.get_dataset_descriptors() if not dataset_descriptors: raise ServiceConfigError(f"No datasets configured") dataset_descriptor = self.find_dataset_descriptor( dataset_descriptors, ds_id) if dataset_descriptor is None: raise ServiceResourceNotFoundError(f'Dataset "{ds_id}" not found') return dataset_descriptor def get_s3_bucket_mapping(self): s3_bucket_mapping = {} for descriptor in self.get_dataset_descriptors(): ds_id = descriptor.get('Identifier') file_system = descriptor.get('FileSystem', 'local') if file_system == 'local': local_path = self.get_descriptor_path( descriptor, f'dataset descriptor {ds_id}') local_path = os.path.normpath(local_path) if os.path.isdir(local_path): s3_bucket_mapping[ds_id] = local_path return s3_bucket_mapping def get_tile_grid(self, ds_id: str) -> TileGrid: ml_dataset, _ = self._get_dataset_entry(ds_id) return ml_dataset.tile_grid def get_rgb_color_mapping( self, ds_id: str, norm_range: Tuple[float, float] = (0., 1.) ) -> Tuple[List[Optional[str]], List[Tuple[float, float]]]: var_names = [None, None, None] norm_ranges = [norm_range, norm_range, norm_range] color_mappings = self.get_color_mappings(ds_id) if color_mappings: rgb_mapping = color_mappings.get('rgb') if rgb_mapping: components = 'Red', 'Green', 'Blue' for i in range(3): c = components[i] c_descriptor = rgb_mapping.get(c, {}) var_name = c_descriptor.get('Variable') norm_vmin, norm_vmax = c_descriptor.get( 'ValueRange', norm_range) var_names[i] = var_name norm_ranges[i] = norm_vmin, norm_vmax return var_names, norm_ranges def get_color_mapping(self, ds_id: str, var_name: str) -> Tuple[str, Tuple[float, float]]: cmap_name = None cmap_vmin, cmap_vmax = None, None color_mappings = self.get_color_mappings(ds_id) if color_mappings: color_mapping = color_mappings.get(var_name) if color_mapping: cmap_vmin, cmap_vmax = color_mapping.get( 'ValueRange', (None, None)) if color_mapping.get('ColorFile') is not None: cmap_name = color_mapping.get('ColorFile', cmap_name) else: cmap_name = color_mapping.get('ColorBar', cmap_name) cmap_name, _ = get_cmap(cmap_name) cmap_range = cmap_vmin, cmap_vmax if cmap_name is not None and None not in cmap_range: # noinspection PyTypeChecker return cmap_name, cmap_range ds = self.get_dataset(ds_id, expected_var_names=[var_name]) var = ds[var_name] valid_range = get_var_valid_range(var) return get_var_cmap_params(var, cmap_name, cmap_range, valid_range) def get_style(self, ds_id: str): dataset_descriptor = self.get_dataset_descriptor(ds_id) style_name = dataset_descriptor.get('Style', 'default') styles = self._config.get('Styles') if styles: for style in styles: if style_name == style['Identifier']: return style return None def get_color_mappings(self, ds_id: str) -> Optional[Dict[str, Dict[str, Any]]]: style = self.get_style(ds_id) if style: return style.get('ColorMappings') return None def _get_dataset_entry( self, ds_id: str) -> Tuple[MultiLevelDataset, DatasetDescriptor]: if ds_id not in self._dataset_cache: with self._lock: self._set_dataset_entry(self._create_dataset_entry(ds_id)) return self._dataset_cache[ds_id] def _set_dataset_entry(self, dataset_entry: Tuple[MultiLevelDataset, DatasetDescriptor]): ml_dataset, dataset_descriptor = dataset_entry self._dataset_cache[ml_dataset.ds_id] = ml_dataset, dataset_descriptor def _create_dataset_entry( self, ds_id: str) -> Tuple[MultiLevelDataset, Dict[str, Any]]: dataset_descriptor = self.get_dataset_descriptor(ds_id) ml_dataset = self._open_ml_dataset(dataset_descriptor) return ml_dataset, dataset_descriptor def _open_ml_dataset( self, dataset_descriptor: DatasetDescriptor) -> MultiLevelDataset: ds_id = dataset_descriptor.get('Identifier') fs_type = dataset_descriptor.get('FileSystem', 'local') if self._ml_dataset_openers and fs_type in self._ml_dataset_openers: ml_dataset_opener = self._ml_dataset_openers[fs_type] elif fs_type in _DEFAULT_MULTI_LEVEL_DATASET_OPENERS: ml_dataset_opener = _DEFAULT_MULTI_LEVEL_DATASET_OPENERS[fs_type] else: raise ServiceConfigError( f"Invalid fs={fs_type!r} in dataset descriptor {ds_id!r}") ml_dataset = ml_dataset_opener(self, dataset_descriptor) augmentation = dataset_descriptor.get('Augmentation') if augmentation: script_path = self.get_descriptor_path( augmentation, f"'Augmentation' of dataset descriptor {ds_id}") input_parameters = augmentation.get('InputParameters') callable_name = augmentation.get('Function', COMPUTE_VARIABLES) ml_dataset = augment_ml_dataset(ml_dataset, script_path, callable_name, self.get_ml_dataset, self.set_ml_dataset, input_parameters=input_parameters, exception_type=ServiceConfigError) return ml_dataset def get_legend_label(self, ds_id: str, var_name: str): dataset = self.get_dataset(ds_id) if var_name in dataset: ds = self.get_dataset(ds_id) units = ds[var_name].units return units raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"') def get_dataset_place_groups(self, ds_id: str, base_url: str, load_features=False) -> List[Dict]: dataset_descriptor = self.get_dataset_descriptor(ds_id) place_group_id_prefix = f"DS-{ds_id}-" place_groups = [] for k, v in self._place_group_cache.items(): if k.startswith(place_group_id_prefix): place_groups.append(v) if place_groups: return place_groups place_groups = self._load_place_groups(dataset_descriptor.get( "PlaceGroups", []), base_url, is_global=False, load_features=load_features) for place_group in place_groups: self._place_group_cache[place_group_id_prefix + place_group["id"]] = place_group return place_groups def get_dataset_place_group(self, ds_id: str, place_group_id: str, base_url: str, load_features=False) -> Dict: place_groups = self.get_dataset_place_groups(ds_id, base_url, load_features=False) for place_group in place_groups: if place_group_id == place_group['id']: if load_features: self._load_place_group_features(place_group) return place_group raise ServiceResourceNotFoundError( f'Place group "{place_group_id}" not found') def get_global_place_groups(self, base_url: str, load_features=False) -> List[Dict]: return self._load_place_groups(self._config.get("PlaceGroups", []), base_url, is_global=True, load_features=load_features) def get_global_place_group(self, place_group_id: str, base_url: str, load_features: bool = False) -> Dict: place_group_descriptor = self._get_place_group_descriptor( place_group_id) return self._load_place_group(place_group_descriptor, base_url, is_global=True, load_features=load_features) def _get_place_group_descriptor(self, place_group_id: str) -> Dict: place_group_descriptors = self._config.get("PlaceGroups", []) for place_group_descriptor in place_group_descriptors: if place_group_descriptor['Identifier'] == place_group_id: return place_group_descriptor raise ServiceResourceNotFoundError( f'Place group "{place_group_id}" not found') def _load_place_groups(self, place_group_descriptors: Dict, base_url: str, is_global: bool = False, load_features: bool = False) -> List[Dict]: place_groups = [] for place_group_descriptor in place_group_descriptors: place_group = self._load_place_group(place_group_descriptor, base_url, is_global=is_global, load_features=load_features) place_groups.append(place_group) return place_groups def _load_place_group(self, place_group_descriptor: Dict[str, Any], base_url: str, is_global: bool = False, load_features: bool = False) -> Dict[str, Any]: place_group_id = place_group_descriptor.get("PlaceGroupRef") if place_group_id: if is_global: raise ServiceConfigError( "'PlaceGroupRef' cannot be used in a global place group") if len(place_group_descriptor) > 1: raise ServiceConfigError( "'PlaceGroupRef' if present, must be the only entry in a 'PlaceGroups' item" ) return self.get_global_place_group(place_group_id, base_url, load_features=load_features) place_group_id = place_group_descriptor.get("Identifier") if not place_group_id: raise ServiceConfigError( "Missing 'Identifier' entry in a 'PlaceGroups' item") if place_group_id in self._place_group_cache: place_group = self._place_group_cache[place_group_id] else: place_group_title = place_group_descriptor.get( "Title", place_group_id) place_path_wc = self.get_descriptor_path(place_group_descriptor, f"'PlaceGroups' item") source_paths = glob.glob(place_path_wc) source_encoding = place_group_descriptor.get( "CharacterEncoding", "utf-8") join = None place_join = place_group_descriptor.get("Join") if isinstance(place_join, dict): join_path = self.get_descriptor_path( place_join, "'Join' of a 'PlaceGroups' item") join_property = place_join.get("Property") if not join_property: raise ServiceError( "Missing 'Property' entry in 'Join' of a 'PlaceGroups' item" ) join_encoding = place_join.get("CharacterEncoding", "utf-8") join = dict(path=join_path, property=join_property, encoding=join_encoding) property_mapping = place_group_descriptor.get("PropertyMapping") if property_mapping: property_mapping = dict(property_mapping) for key, value in property_mapping.items(): if isinstance(value, str) and '${base_url}' in value: property_mapping[key] = value.replace( '${base_url}', base_url) place_group = dict(type="FeatureCollection", features=None, id=place_group_id, title=place_group_title, propertyMapping=property_mapping, sourcePaths=source_paths, sourceEncoding=source_encoding, join=join) sub_place_group_configs = place_group_descriptor.get("Places") if sub_place_group_configs: raise ServiceConfigError( "Invalid 'Places' entry in a 'PlaceGroups' item: not implemented yet" ) # sub_place_group_descriptors = place_group_config.get("Places") # if sub_place_group_descriptors: # sub_place_groups = self._load_place_groups(sub_place_group_descriptors) # place_group["placeGroups"] = sub_place_groups self._place_group_cache[place_group_id] = place_group if load_features: self._load_place_group_features(place_group) return place_group def _load_place_group_features( self, place_group: Dict[str, Any]) -> List[Dict[str, Any]]: features = place_group.get('features') if features is not None: return features source_files = place_group['sourcePaths'] source_encoding = place_group['sourceEncoding'] features = [] for source_file in source_files: with fiona.open(source_file, encoding=source_encoding) as feature_collection: for feature in feature_collection: self._remove_feature_id(feature) feature["id"] = str(self._feature_index) self._feature_index += 1 features.append(feature) join = place_group['join'] if join: join_path = join['path'] join_property = join['property'] join_encoding = join['encoding'] with fiona.open(join_path, encoding=join_encoding) as feature_collection: indexed_join_features = self._get_indexed_features( feature_collection, join_property) for feature in features: properties = feature.get('properties') if isinstance(properties, dict) and join_property in properties: join_value = properties[join_property] join_feature = indexed_join_features.get(join_value) if join_feature: join_properties = join_feature.get('properties') if join_properties: properties.update(join_properties) feature['properties'] = properties place_group['features'] = features return features @classmethod def _get_indexed_features(cls, features: Sequence[Dict[str, Any]], property_name: str) -> Dict[Any, Any]: feature_index = {} for feature in features: properties = feature.get('properties') if properties and property_name in properties: property_value = properties[property_name] feature_index[property_value] = feature return feature_index @classmethod def _remove_feature_id(cls, feature: Dict): cls._remove_id(feature) @classmethod def _remove_id(cls, properties: Dict): if "id" in properties: del properties["id"] if "ID" in properties: del properties["ID"] def get_dataset_and_coord_variable(self, ds_name: str, dim_name: str): ds = self.get_dataset(ds_name) if dim_name not in ds.coords: raise ServiceResourceNotFoundError( f'Dimension {dim_name!r} has no coordinates in dataset {ds_name!r}' ) return ds, ds.coords[dim_name] @classmethod def get_var_indexers(cls, ds_name: str, var_name: str, var: xr.DataArray, dim_names: List[str], params: RequestParams) -> Dict[str, Any]: var_indexers = dict() for dim_name in dim_names: if dim_name not in var.coords: raise ServiceBadRequestError( f'dimension {dim_name!r} of variable {var_name!r} of dataset {ds_name!r} has no coordinates' ) coord_var = var.coords[dim_name] dim_value_str = params.get_query_argument(dim_name, None) try: if dim_value_str is None: var_indexers[dim_name] = coord_var.values[0] elif dim_value_str == 'current': var_indexers[dim_name] = coord_var.values[-1] elif np.issubdtype(coord_var.dtype, np.floating): var_indexers[dim_name] = float(dim_value_str) elif np.issubdtype(coord_var.dtype, np.integer): var_indexers[dim_name] = int(dim_value_str) elif np.issubdtype(coord_var.dtype, np.datetime64): if '/' in dim_value_str: date_str_1, date_str_2 = dim_value_str.split( '/', maxsplit=1) var_indexer_1 = pd.to_datetime(date_str_1) var_indexer_2 = pd.to_datetime(date_str_2) var_indexers[dim_name] = var_indexer_1 + ( var_indexer_2 - var_indexer_1) / 2 else: date_str = dim_value_str var_indexers[dim_name] = pd.to_datetime(date_str) else: raise ValueError( f'unable to convert value {dim_value_str!r} to {coord_var.dtype!r}' ) except ValueError as e: raise ServiceBadRequestError( f'{dim_value_str!r} is not a valid value for dimension {dim_name!r} ' f'of variable {var_name!r} of dataset {ds_name!r}') from e return var_indexers @classmethod def find_dataset_descriptor(cls, dataset_descriptors: List[Dict[str, Any]], ds_name: str) -> Optional[Dict[str, Any]]: # Note: can be optimized by dict/key lookup return next( (dsd for dsd in dataset_descriptors if dsd['Identifier'] == ds_name), None) def get_descriptor_path(self, descriptor: Dict[str, Any], descriptor_name: str, path_entry_name: str = 'Path', is_url: bool = False) -> str: path = descriptor.get(path_entry_name) if not path: raise ServiceError( f"Missing entry {path_entry_name!r} in {descriptor_name}") if not is_url and not os.path.isabs(path): path = os.path.join(self._base_dir, path) return path
class ServiceContext: def __init__(self, prefix: str = None, base_dir: str = None, config: Config = None, data_store_pool: DataStorePool = None, trace_perf: bool = DEFAULT_TRACE_PERF, tile_comp_mode: int = None, tile_cache_capacity: int = None, ml_dataset_openers: Dict[str, MultiLevelDatasetOpener] = None): self._prefix = normalize_prefix(prefix) self._base_dir = os.path.abspath(base_dir or '') self._config = config if config is not None else dict() self._config_mtime = 0.0 self._place_group_cache = dict() self._feature_index = 0 self._ml_dataset_openers = ml_dataset_openers self._tile_comp_mode = tile_comp_mode self._trace_perf = trace_perf self._lock = threading.RLock() # contains tuples of form (MultiLevelDataset, dataset_config) self._dataset_cache = dict() # cache for all dataset configs self._dataset_configs: Optional[List[DatasetConfigDict]] = None self._image_cache = dict() self._data_store_pool = data_store_pool or None if tile_cache_capacity: self._tile_cache = Cache(MemoryCacheStore(), capacity=tile_cache_capacity, threshold=0.75) else: self._tile_cache = None @property def config(self) -> Config: return self._config @config.setter def config(self, config: Config): if self._config: with self._lock: # Close all datasets for ml_dataset, _ in self._dataset_cache.values(): # noinspection PyBroadException try: ml_dataset.close() except Exception: pass # Clear all caches if self._dataset_cache: self._dataset_cache.clear() if self._image_cache: self._image_cache.clear() if self._tile_cache: self._tile_cache.clear() if self._place_group_cache: self._place_group_cache.clear() if self._data_store_pool: self._data_store_pool.remove_all_store_configs() self._dataset_configs = None self._config = config @property def config_mtime(self) -> float: return self._config_mtime @config_mtime.setter def config_mtime(self, value: float): self._config_mtime = value @property def base_dir(self) -> str: return self._base_dir @property def tile_comp_mode(self) -> int: return self._tile_comp_mode @property def dataset_cache( self) -> Dict[str, Tuple[MultiLevelDataset, Dict[str, Any]]]: return self._dataset_cache @property def image_cache(self) -> Dict[str, Any]: return self._image_cache @property def tile_cache(self) -> Optional[Cache]: return self._tile_cache @property def trace_perf(self) -> bool: return self._trace_perf @property def measure_time(self): return measure_time_cm(disabled=not self.trace_perf, logger=LOG) @property def access_control(self) -> Dict[str, Any]: return dict(self._config.get('AccessControl', {})) @property def required_scopes(self) -> List[str]: return self.access_control.get('RequiredScopes', []) def get_required_dataset_scopes( self, dataset_config: DatasetConfigDict) -> Set[str]: return self._get_required_scopes(dataset_config, 'read:dataset', 'Dataset', dataset_config['Identifier']) def get_required_variable_scopes(self, dataset_config: DatasetConfigDict, var_name: str) -> Set[str]: return self._get_required_scopes(dataset_config, 'read:variable', 'Variable', var_name) def _get_required_scopes(self, dataset_config: DatasetConfigDict, base_scope: str, value_name: str, value: str) -> Set[str]: base_scope_prefix = base_scope + ':' pattern_scope = base_scope_prefix + '{' + value_name + '}' dataset_access_control = dataset_config.get('AccessControl', {}) dataset_required_scopes = dataset_access_control.get( 'RequiredScopes', []) dataset_required_scopes = set(self.required_scopes + dataset_required_scopes) dataset_required_scopes = { scope for scope in dataset_required_scopes if scope == base_scope or scope.startswith(base_scope_prefix) } if pattern_scope in dataset_required_scopes: dataset_required_scopes.remove(pattern_scope) dataset_required_scopes.add(base_scope_prefix + value) return dataset_required_scopes def get_service_url(self, base_url, *path: str): # noinspection PyTypeChecker path_comp = '/'.join(path) if self._prefix: return base_url + self._prefix + '/' + path_comp else: return base_url + '/' + path_comp def get_ml_dataset(self, ds_id: str) -> MultiLevelDataset: ml_dataset, _ = self._get_dataset_entry(ds_id) return ml_dataset def set_ml_dataset(self, ml_dataset: MultiLevelDataset): self._set_dataset_entry( (ml_dataset, dict(Identifier=ml_dataset.ds_id, Hidden=True))) def get_dataset(self, ds_id: str, expected_var_names: Collection[str] = None) -> xr.Dataset: ml_dataset, _ = self._get_dataset_entry(ds_id) dataset = ml_dataset.base_dataset if expected_var_names: for var_name in expected_var_names: if var_name not in dataset: raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"' ) return dataset def get_time_series_dataset(self, ds_id: str, var_name: str = None) -> xr.Dataset: dataset_config = self.get_dataset_config(ds_id) ts_ds_name = dataset_config.get('TimeSeriesDataset', ds_id) try: # Try to get more efficient, time-chunked dataset return self.get_dataset( ts_ds_name, expected_var_names=[var_name] if var_name else None) except ServiceResourceNotFoundError: # This happens, if the dataset pointed to by 'TimeSeriesDataset' # does not contain the variable given by var_name. return self.get_dataset( ds_id, expected_var_names=[var_name] if var_name else None) def get_variable_for_z(self, ds_id: str, var_name: str, z_index: int) -> xr.DataArray: ml_dataset = self.get_ml_dataset(ds_id) index = ml_dataset.num_levels - 1 - z_index if index < 0 or index >= ml_dataset.num_levels: raise ServiceResourceNotFoundError( f'Variable "{var_name}" has no z-index {z_index} in dataset "{ds_id}"' ) dataset = ml_dataset.get_dataset(index) if var_name not in dataset: raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"') return dataset[var_name] def get_dataset_configs(self) -> List[DatasetConfigDict]: if self._dataset_configs is None: with self._lock: dataset_configs = self._config.get('Datasets', []) dataset_configs += \ self.get_dataset_configs_from_stores() self._dataset_configs = dataset_configs self._maybe_assign_store_instance_ids() return self._dataset_configs def _maybe_assign_store_instance_ids(self): assignable_dataset_configs = [ dc for dc in self._dataset_configs if 'StoreInstanceId' not in dc and dc.get('FileSystem', 'file') in NON_MEMORY_FILE_SYSTEMS ] # split into sublists according to file system and non-root store params config_lists = [] for config in assignable_dataset_configs: store_params = self._get_other_store_params_than_root(config) file_system = config.get('FileSystem', 'file') appended = False for config_list in config_lists: if config_list[0] == file_system and \ config_list[1] == store_params: config_list[2].append(config) appended = True break if not appended: config_lists.append((file_system, store_params, [config])) data_store_pool = self.get_data_store_pool() if not data_store_pool: data_store_pool = self._data_store_pool = DataStorePool() for file_system, store_params, config_list in config_lists: # Retrieve paths per configuration paths = [dc['Path'] for dc in config_list] list.sort(paths) # Determine common prefixes of paths (and call them roots) prefixes = _get_common_prefixes(paths) if len(prefixes) < 1: roots = [''] else: # perform further step to merge prefixes with same start prefixes = list(set(prefixes)) prefixes.sort() roots = [] root_candidate = prefixes[0] for root in prefixes[1:]: common_root = os.path.commonprefix([root_candidate, root]) if _is_not_empty(common_root): root_candidate = common_root else: roots.append(root_candidate) root_candidate = root roots.append(root_candidate) for root in roots: # ensure root does not end with full or partial directory # or file name while not root.endswith("/") and not root.endswith("\\") and \ len(root) > 0: root = root[:-1] if root.endswith("/") or root.endswith("\\"): root = root[:-1] abs_root = root # For local file systems: Determine absolute root from base dir fs_protocol = FS_TYPE_TO_PROTOCOL.get(file_system, file_system) if fs_protocol == 'file' and not os.path.isabs(abs_root): abs_root = os.path.join(self._base_dir, abs_root) abs_root = os.path.normpath(abs_root) store_params_for_root = store_params.copy() store_params_for_root['root'] = abs_root # See if there already is a store with this configuration data_store_config = DataStoreConfig( store_id=fs_protocol, store_params=store_params_for_root) store_instance_id = data_store_pool.\ get_store_instance_id(data_store_config) if not store_instance_id: # Create new store with new unique store instance id counter = 1 while data_store_pool.has_store_instance( f'{fs_protocol}_{counter}'): counter += 1 store_instance_id = f'{fs_protocol}_{counter}' data_store_pool.add_store_config(store_instance_id, data_store_config) for config in config_list: if config['Path'].startswith(root): config['StoreInstanceId'] = store_instance_id new_path = config['Path'][len(root):] while new_path.startswith("/") or \ new_path.startswith("\\"): new_path = new_path[1:] config['Path'] = new_path def _get_other_store_params_than_root(self, dataset_config: DatasetConfigDict) \ -> Dict: if FS_TYPE_TO_PROTOCOL.get(dataset_config.get('FileSystem', 'file')) != 's3': return {} storage_options = dict() if 'Anonymous' in dataset_config: storage_options['anon'] = dataset_config['Anonymous'] client_kwargs = dict() if 'Endpoint' in dataset_config: client_kwargs['endpoint_url'] = dataset_config['Endpoint'] if 'Region' in dataset_config: client_kwargs['region_name'] = dataset_config['Region'] storage_options['client_kwargs'] = client_kwargs store_params = dict(storage_options=storage_options) return store_params def get_dataset_configs_from_stores(self) \ -> List[DatasetConfigDict]: data_store_pool = self.get_data_store_pool() if data_store_pool is None: return [] all_dataset_configs: List[DatasetConfigDict] = [] for store_instance_id in data_store_pool.store_instance_ids: LOG.info(f'scanning store {store_instance_id!r}') data_store_config = data_store_pool.get_store_config( store_instance_id) data_store = data_store_pool.get_store(store_instance_id) store_dataset_ids = data_store.get_data_ids(data_type=DATASET_TYPE) for store_dataset_id in store_dataset_ids: dataset_config_base = {} store_dataset_configs: List[DatasetConfigDict] \ = data_store_config.user_data if store_dataset_configs: for store_dataset_config in store_dataset_configs: dataset_id_pattern = store_dataset_config.get( 'Path', '*') if fnmatch.fnmatch(store_dataset_id, dataset_id_pattern): dataset_config_base = store_dataset_config break else: dataset_config_base = None if dataset_config_base is not None: LOG.debug(f'selected dataset {store_dataset_id!r}') dataset_config = dict(StoreInstanceId=store_instance_id, **dataset_config_base) dataset_config['Path'] = store_dataset_id dataset_config['Identifier'] = \ f'{store_instance_id}{STORE_DS_ID_SEPARATOR}' \ f'{store_dataset_id}' all_dataset_configs.append(dataset_config) # Just for testing: debug_file = 'all_dataset_configs.json' with open(debug_file, 'w') as stream: json.dump(all_dataset_configs, stream) LOG.debug(f'wrote file {debug_file!r}') return all_dataset_configs def new_dataset_metadata(self, store_instance_id: str, dataset_id: str) -> Optional[DatasetDescriptor]: data_store = self._data_store_pool.get_store(store_instance_id) dataset_metadata = data_store.describe_data(dataset_id, data_type='dataset') if dataset_metadata.crs is not None: crs = pyproj.CRS.from_string(dataset_metadata.crs) if not crs.is_geographic: LOG.warn(f'ignoring dataset {dataset_id!r} from' f' store instance {store_instance_id!r}' f' because it uses a non-geographic CRS') return None # noinspection PyTypeChecker return dataset_metadata def get_data_store_pool(self) -> Optional[DataStorePool]: data_store_configs = self._config.get('DataStores', []) if not data_store_configs or self._data_store_pool: return self._data_store_pool if not isinstance(data_store_configs, list): raise ServiceConfigError('DataStores must be a list') store_configs: Dict[str, DataStoreConfig] = {} for data_store_config_dict in data_store_configs: store_instance_id = data_store_config_dict.get('Identifier') store_id = data_store_config_dict.get('StoreId') store_params = data_store_config_dict.get('StoreParams', {}) dataset_configs = data_store_config_dict.get('Datasets') store_config = DataStoreConfig(store_id, store_params=store_params, user_data=dataset_configs) store_configs[store_instance_id] = store_config self._data_store_pool = DataStorePool(store_configs) return self._data_store_pool def get_dataset_config(self, ds_id: str) -> Dict[str, Any]: dataset_configs = self.get_dataset_configs() dataset_config = self.find_dataset_config(dataset_configs, ds_id) if dataset_config is None: raise ServiceResourceNotFoundError(f'Dataset "{ds_id}" not found') return dataset_config def get_s3_bucket_mapping(self): s3_bucket_mapping = {} for dataset_config in self.get_dataset_configs(): ds_id = dataset_config.get('Identifier') protocol = FS_TYPE_TO_PROTOCOL.get( dataset_config.get('FileSystem', 'file')) if protocol == 'file': store_instance_id = dataset_config.get('StoreInstanceId') if store_instance_id: data_store_pool = self.get_data_store_pool() store_root = data_store_pool.get_store_config( store_instance_id). \ store_params.get('root') data_id = dataset_config.get('Path') local_path = os.path.join(store_root, data_id) else: local_path = self.get_config_path( dataset_config, f'dataset configuration' f' {ds_id!r}') local_path = os.path.normpath(local_path) if os.path.isdir(local_path): s3_bucket_mapping[ds_id] = local_path return s3_bucket_mapping def get_tile_grid(self, ds_id: str) -> TileGrid: ml_dataset, _ = self._get_dataset_entry(ds_id) return ml_dataset.tile_grid def get_rgb_color_mapping( self, ds_id: str, norm_range: Tuple[float, float] = (0., 1.) ) -> Tuple[List[Optional[str]], List[Tuple[float, float]]]: var_names = [None, None, None] norm_ranges = [norm_range, norm_range, norm_range] color_mappings = self.get_color_mappings(ds_id) if color_mappings: rgb_mapping = color_mappings.get('rgb') if rgb_mapping: components = 'Red', 'Green', 'Blue' for i in range(3): component = components[i] component_config = rgb_mapping.get(component, {}) var_name = component_config.get('Variable') norm_vmin, norm_vmax = component_config.get( 'ValueRange', norm_range) var_names[i] = var_name norm_ranges[i] = norm_vmin, norm_vmax return var_names, norm_ranges def get_color_mapping(self, ds_id: str, var_name: str) -> Tuple[str, Tuple[float, float]]: cmap_name = None cmap_vmin, cmap_vmax = None, None color_mappings = self.get_color_mappings(ds_id) if color_mappings: color_mapping = color_mappings.get(var_name) if color_mapping: cmap_vmin, cmap_vmax = color_mapping.get( 'ValueRange', (None, None)) if color_mapping.get('ColorFile') is not None: cmap_name = color_mapping.get('ColorFile', cmap_name) else: cmap_name = color_mapping.get('ColorBar', cmap_name) cmap_name, _ = get_cmap(cmap_name) cmap_range = cmap_vmin, cmap_vmax if cmap_name is not None and None not in cmap_range: # noinspection PyTypeChecker return cmap_name, cmap_range ds = self.get_dataset(ds_id, expected_var_names=[var_name]) var = ds[var_name] valid_range = get_var_valid_range(var) return get_var_cmap_params(var, cmap_name, cmap_range, valid_range) def get_style(self, ds_id: str): dataset_config = self.get_dataset_config(ds_id) style_name = dataset_config.get('Style', 'default') styles = self._config.get('Styles') if styles: for style in styles: if style_name == style['Identifier']: return style return None def get_color_mappings(self, ds_id: str) -> Optional[Dict[str, Dict[str, Any]]]: style = self.get_style(ds_id) if style: return style.get('ColorMappings') return None def _get_dataset_entry( self, ds_id: str) -> Tuple[MultiLevelDataset, DatasetConfigDict]: if ds_id not in self._dataset_cache: with self._lock: self._set_dataset_entry(self._create_dataset_entry(ds_id)) return self._dataset_cache[ds_id] def _set_dataset_entry(self, dataset_entry: Tuple[MultiLevelDataset, DatasetConfigDict]): ml_dataset, dataset_config = dataset_entry self._dataset_cache[ml_dataset.ds_id] = ml_dataset, dataset_config def _create_dataset_entry( self, ds_id: str) -> Tuple[MultiLevelDataset, Dict[str, Any]]: dataset_config = self.get_dataset_config(ds_id) ml_dataset = self._open_ml_dataset(dataset_config) return ml_dataset, dataset_config def _open_ml_dataset(self, dataset_config: DatasetConfigDict) \ -> MultiLevelDataset: ds_id: str = dataset_config.get('Identifier') store_instance_id = dataset_config.get('StoreInstanceId') if store_instance_id: data_store_pool = self.get_data_store_pool() data_store = data_store_pool.get_store(store_instance_id) data_id = dataset_config.get('Path') open_params = dataset_config.get('StoreOpenParams') or {} # Inject chunk_cache_capacity into open parameters chunk_cache_capacity = self.get_dataset_chunk_cache_capacity( dataset_config) if chunk_cache_capacity \ and (data_id.endswith('.zarr') or data_id.endswith('.levels')) \ and 'cache_size' not in open_params: open_params['cache_size'] = chunk_cache_capacity with self.measure_time(tag=f"opened dataset {ds_id!r}" f" from data store" f" {store_instance_id!r}"): dataset = data_store.open_data(data_id, **open_params) if isinstance(dataset, MultiLevelDataset): ml_dataset = dataset else: cube, _, _ = decode_cube(dataset, normalize=True, force_non_empty=True, force_geographic=True) ml_dataset = BaseMultiLevelDataset(cube, ds_id=ds_id) else: fs_type = dataset_config.get('FileSystem') if fs_type != 'memory': raise ServiceConfigError(f"Invalid FileSystem {fs_type!r}" f" in dataset configuration" f" {ds_id!r}") with self.measure_time(tag=f"opened dataset {ds_id!r}" f" from {fs_type!r}"): ml_dataset = _open_ml_dataset_from_python_code( self, dataset_config) augmentation = dataset_config.get('Augmentation') if augmentation: script_path = self.get_config_path( augmentation, f"'Augmentation' of dataset configuration {ds_id}") input_parameters = augmentation.get('InputParameters') callable_name = augmentation.get('Function', COMPUTE_VARIABLES) ml_dataset = augment_ml_dataset(ml_dataset, script_path, callable_name, self.get_ml_dataset, self.set_ml_dataset, input_parameters=input_parameters, exception_type=ServiceConfigError) return ml_dataset def get_legend_label(self, ds_id: str, var_name: str): dataset = self.get_dataset(ds_id) if var_name in dataset: ds = self.get_dataset(ds_id) units = ds[var_name].units return units raise ServiceResourceNotFoundError( f'Variable "{var_name}" not found in dataset "{ds_id}"') def get_dataset_place_groups(self, ds_id: str, base_url: str, load_features=False) -> List[Dict]: dataset_config = self.get_dataset_config(ds_id) place_group_id_prefix = f"DS-{ds_id}-" place_groups = [] for k, v in self._place_group_cache.items(): if k.startswith(place_group_id_prefix): place_groups.append(v) if place_groups: return place_groups place_groups = self._load_place_groups(dataset_config.get( "PlaceGroups", []), base_url, is_global=False, load_features=load_features) for place_group in place_groups: self._place_group_cache[place_group_id_prefix + place_group["id"]] = place_group return place_groups def get_dataset_place_group(self, ds_id: str, place_group_id: str, base_url: str, load_features=False) -> Dict: place_groups = self.get_dataset_place_groups(ds_id, base_url, load_features=False) for place_group in place_groups: if place_group_id == place_group['id']: if load_features: self._load_place_group_features(place_group) return place_group raise ServiceResourceNotFoundError( f'Place group "{place_group_id}" not found') def get_global_place_groups(self, base_url: str, load_features=False) -> List[Dict]: return self._load_place_groups(self._config.get("PlaceGroups", []), base_url, is_global=True, load_features=load_features) def get_global_place_group(self, place_group_id: str, base_url: str, load_features: bool = False) -> Dict: place_group_config = self._get_place_group_config(place_group_id) return self._load_place_group(place_group_config, base_url, is_global=True, load_features=load_features) def _get_place_group_config(self, place_group_id: str) -> Dict: place_group_configs = self._config.get("PlaceGroups", []) for place_group_config in place_group_configs: if place_group_config['Identifier'] == place_group_id: return place_group_config raise ServiceResourceNotFoundError( f'Place group "{place_group_id}" not found') def _load_place_groups(self, place_group_configs: Dict, base_url: str, is_global: bool = False, load_features: bool = False) -> List[Dict]: place_groups = [] for place_group_config in place_group_configs: place_group = self._load_place_group(place_group_config, base_url, is_global=is_global, load_features=load_features) place_groups.append(place_group) return place_groups def _load_place_group(self, place_group_config: Dict[str, Any], base_url: str, is_global: bool = False, load_features: bool = False) -> Dict[str, Any]: place_group_id = place_group_config.get("PlaceGroupRef") if place_group_id: if is_global: raise ServiceConfigError( "'PlaceGroupRef' cannot be used in a global place group") if len(place_group_config) > 1: raise ServiceConfigError( "'PlaceGroupRef' if present, must be the only entry in a 'PlaceGroups' item" ) return self.get_global_place_group(place_group_id, base_url, load_features=load_features) place_group_id = place_group_config.get("Identifier") if not place_group_id: raise ServiceConfigError( "Missing 'Identifier' entry in a 'PlaceGroups' item") if place_group_id in self._place_group_cache: place_group = self._place_group_cache[place_group_id] else: place_group_title = place_group_config.get("Title", place_group_id) place_path_wc = self.get_config_path(place_group_config, f"'PlaceGroups' item") source_paths = glob.glob(place_path_wc) source_encoding = place_group_config.get("CharacterEncoding", "utf-8") join = None place_join = place_group_config.get("Join") if isinstance(place_join, dict): join_path = self.get_config_path( place_join, "'Join' of a 'PlaceGroups' item") join_property = place_join.get("Property") if not join_property: raise ServiceError( "Missing 'Property' entry in 'Join' of a 'PlaceGroups' item" ) join_encoding = place_join.get("CharacterEncoding", "utf-8") join = dict(path=join_path, property=join_property, encoding=join_encoding) property_mapping = place_group_config.get("PropertyMapping") if property_mapping: property_mapping = dict(property_mapping) for key, value in property_mapping.items(): if isinstance(value, str) and '${base_url}' in value: property_mapping[key] = value.replace( '${base_url}', base_url) place_group = dict(type="FeatureCollection", features=None, id=place_group_id, title=place_group_title, propertyMapping=property_mapping, sourcePaths=source_paths, sourceEncoding=source_encoding, join=join) sub_place_group_configs = place_group_config.get("Places") if sub_place_group_configs: raise ServiceConfigError( "Invalid 'Places' entry in a 'PlaceGroups' item: not implemented yet" ) # sub_place_group_configs = place_group_config.get("Places") # if sub_place_group_configs: # sub_place_groups = self._load_place_groups(sub_place_group_configs) # place_group["placeGroups"] = sub_place_groups self._place_group_cache[place_group_id] = place_group if load_features: self._load_place_group_features(place_group) return place_group def _load_place_group_features( self, place_group: Dict[str, Any]) -> List[Dict[str, Any]]: features = place_group.get('features') if features is not None: return features source_files = place_group['sourcePaths'] source_encoding = place_group['sourceEncoding'] features = [] for source_file in source_files: with fiona.open(source_file, encoding=source_encoding) as feature_collection: for feature in feature_collection: self._remove_feature_id(feature) feature["id"] = str(self._feature_index) self._feature_index += 1 features.append(feature) join = place_group['join'] if join: join_path = join['path'] join_property = join['property'] join_encoding = join['encoding'] with fiona.open(join_path, encoding=join_encoding) as feature_collection: indexed_join_features = self._get_indexed_features( feature_collection, join_property) for feature in features: properties = feature.get('properties') if isinstance(properties, dict) and join_property in properties: join_value = properties[join_property] join_feature = indexed_join_features.get(join_value) if join_feature: join_properties = join_feature.get('properties') if join_properties: properties.update(join_properties) feature['properties'] = properties place_group['features'] = features return features @classmethod def _get_indexed_features(cls, features: Sequence[Dict[str, Any]], property_name: str) -> Dict[Any, Any]: feature_index = {} for feature in features: properties = feature.get('properties') if properties and property_name in properties: property_value = properties[property_name] feature_index[property_value] = feature return feature_index @classmethod def _remove_feature_id(cls, feature: Dict): cls._remove_id(feature) @classmethod def _remove_id(cls, properties: Dict): if "id" in properties: del properties["id"] if "ID" in properties: del properties["ID"] def get_dataset_and_coord_variable(self, ds_name: str, dim_name: str): ds = self.get_dataset(ds_name) if dim_name not in ds.coords: raise ServiceResourceNotFoundError( f'Dimension {dim_name!r} has no coordinates in dataset {ds_name!r}' ) return ds, ds.coords[dim_name] @classmethod def get_var_indexers(cls, ds_name: str, var_name: str, var: xr.DataArray, dim_names: List[str], params: RequestParams) -> Dict[str, Any]: var_indexers = dict() for dim_name in dim_names: if dim_name not in var.coords: raise ServiceBadRequestError( f'dimension {dim_name!r} of variable {var_name!r} of dataset {ds_name!r} has no coordinates' ) coord_var = var.coords[dim_name] dim_value_str = params.get_query_argument(dim_name, None) try: if dim_value_str is None: var_indexers[dim_name] = coord_var.values[0] elif dim_value_str == 'current': var_indexers[dim_name] = coord_var.values[-1] elif np.issubdtype(coord_var.dtype, np.floating): var_indexers[dim_name] = float(dim_value_str) elif np.issubdtype(coord_var.dtype, np.integer): var_indexers[dim_name] = int(dim_value_str) elif np.issubdtype(coord_var.dtype, np.datetime64): if '/' in dim_value_str: date_str_1, date_str_2 = dim_value_str.split( '/', maxsplit=1) var_indexer_1 = pd.to_datetime(date_str_1) var_indexer_2 = pd.to_datetime(date_str_2) var_indexers[dim_name] = var_indexer_1 + ( var_indexer_2 - var_indexer_1) / 2 else: date_str = dim_value_str var_indexers[dim_name] = pd.to_datetime(date_str) else: raise ValueError( f'unable to convert value {dim_value_str!r} to {coord_var.dtype!r}' ) except ValueError as e: raise ServiceBadRequestError( f'{dim_value_str!r} is not a valid value for dimension {dim_name!r} ' f'of variable {var_name!r} of dataset {ds_name!r}') from e return var_indexers @classmethod def find_dataset_config(cls, dataset_configs: List[Dict[str, Any]], ds_name: str) -> Optional[Dict[str, Any]]: # Note: can be optimized by dict/key lookup return next( (dsd for dsd in dataset_configs if dsd['Identifier'] == ds_name), None) def get_config_path(self, config: Dict[str, Any], config_name: str, path_entry_name: str = 'Path', is_url: bool = False) -> str: path = config.get(path_entry_name) if not path: raise ServiceError( f"Missing entry {path_entry_name!r} in {config_name}") if not is_url and not os.path.isabs(path): path = os.path.join(self._base_dir, path) return path def get_dataset_chunk_cache_capacity( self, dataset_config: DatasetConfigDict) -> Optional[int]: cache_size = self.get_chunk_cache_capacity(dataset_config, 'ChunkCacheSize') if cache_size is None: cache_size = self.get_chunk_cache_capacity( self.config, 'DatasetChunkCacheSize') return cache_size @classmethod def get_chunk_cache_capacity(cls, config: Dict[str, Any], cache_size_key: str) -> Optional[int]: cache_size = config.get(cache_size_key, None) if not cache_size: return None elif isinstance(cache_size, str): try: cache_size = parse_mem_size(cache_size) except ValueError: raise ServiceConfigError(f'Invalid {cache_size_key}') elif not isinstance(cache_size, int) or cache_size < 0: raise ServiceConfigError(f'Invalid {cache_size_key}') return cache_size