示例#1
0
文件: mldataset.py 项目: dzelge/xcube
    def __init__(self,
                 ds_id: str,
                 obs_file_system: s3fs.S3FileSystem,
                 dir_path: str,
                 zarr_kwargs: Dict[str, Any] = None,
                 exception_type=ValueError):

        level_paths = {}
        for entry in obs_file_system.walk(dir_path, directories=True):
            basename = None
            if entry.endswith(".zarr") and obs_file_system.isdir(entry):
                basename, _ = os.path.splitext(entry)
            elif entry.endswith(".link") and obs_file_system.isfile(entry):
                basename, _ = os.path.splitext(entry)
            if basename is not None and basename.isdigit():
                level = int(basename)
                level_paths[level] = dir_path + "/" + entry

        num_levels = len(level_paths)
        # Consistency check
        for level in range(num_levels):
            if level not in level_paths:
                raise exception_type(
                    f"Invalid dataset descriptor {ds_id!r}: missing level {level} in {dir_path}"
                )

        super().__init__(kwargs=zarr_kwargs)
        self._obs_file_system = obs_file_system
        self._dir_path = dir_path
        self._level_paths = level_paths
        self._num_levels = num_levels
示例#2
0
文件: mldataset.py 项目: manzt/xcube
    def __init__(self,
                 obs_file_system: s3fs.S3FileSystem,
                 dir_path: str,
                 zarr_kwargs: Dict[str, Any] = None,
                 ds_id: str = None,
                 exception_type: type = ValueError):

        level_paths = {}
        for entry in obs_file_system.walk(dir_path, directories=True):
            level_dir = entry.split("/")[-1]
            basename, ext = os.path.splitext(level_dir)
            if basename.isdigit():
                level = int(basename)
                if entry.endswith(".zarr") and obs_file_system.isdir(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)
                elif entry.endswith(".link") and obs_file_system.isfile(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)

        num_levels = len(level_paths)
        # Consistency check
        for level in range(num_levels):
            if level not in level_paths:
                raise exception_type(
                    f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}"
                )

        super().__init__(ds_id=ds_id, parameters=zarr_kwargs)
        self._obs_file_system = obs_file_system
        self._dir_path = dir_path
        self._level_paths = level_paths
        self._num_levels = num_levels
示例#3
0
    def __init__(self,
                 s3_file_system: s3fs.S3FileSystem,
                 dir_path: str,
                 zarr_kwargs: Dict[str, Any] = None,
                 ds_id: str = None,
                 chunk_cache_capacity: int = None,
                 exception_type: type = ValueError):

        level_paths = {}
        entries = s3_file_system.ls(dir_path, detail=False)
        for entry in entries:
            level_dir = entry.split("/")[-1]
            basename, ext = os.path.splitext(level_dir)
            if basename.isdigit():
                level = int(basename)
                if entry.endswith(".zarr") and s3_file_system.isdir(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)
                elif entry.endswith(".link") and s3_file_system.isfile(entry):
                    level_paths[level] = (ext, dir_path + "/" + level_dir)

        num_levels = len(level_paths)
        # Consistency check
        for level in range(num_levels):
            if level not in level_paths:
                raise exception_type(
                    f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}"
                )

        super().__init__(ds_id=ds_id, parameters=zarr_kwargs)
        self._s3_file_system = s3_file_system
        self._dir_path = dir_path
        self._level_paths = level_paths
        self._num_levels = num_levels

        self._chunk_cache_capacities = None
        if chunk_cache_capacity:
            weights = []
            weigth_sum = 0
            for level in range(num_levels):
                weight = 2**(num_levels - 1 - level)
                weight *= weight
                weigth_sum += weight
                weights.append(weight)
            self._chunk_cache_capacities = [
                round(chunk_cache_capacity * weight / weigth_sum)
                for weight in weights
            ]