def __init__(self, s3_file_system: s3fs.S3FileSystem, dir_path: str, zarr_kwargs: Dict[str, Any] = None, ds_id: str = None, chunk_cache_capacity: int = None, exception_type: type = ValueError): level_paths = {} entries = s3_file_system.ls(dir_path, detail=False) for entry in entries: level_dir = entry.split("/")[-1] basename, ext = os.path.splitext(level_dir) if basename.isdigit(): level = int(basename) if entry.endswith(".zarr") and s3_file_system.isdir(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) elif entry.endswith(".link") and s3_file_system.isfile(entry): level_paths[level] = (ext, dir_path + "/" + level_dir) num_levels = len(level_paths) # Consistency check for level in range(num_levels): if level not in level_paths: raise exception_type( f"Invalid multi-level dataset {ds_id!r}: missing level {level} in {dir_path}" ) super().__init__(ds_id=ds_id, parameters=zarr_kwargs) self._s3_file_system = s3_file_system self._dir_path = dir_path self._level_paths = level_paths self._num_levels = num_levels self._chunk_cache_capacities = None if chunk_cache_capacity: weights = [] weigth_sum = 0 for level in range(num_levels): weight = 2**(num_levels - 1 - level) weight *= weight weigth_sum += weight weights.append(weight) self._chunk_cache_capacities = [ round(chunk_cache_capacity * weight / weigth_sum) for weight in weights ]
def _local_create_subfolders(from_path: str, to_path: str, fs: s3fs.S3FileSystem) -> None: """ Helper for creating subdirectories when calling _s3_to_local_cp """ files = fs.ls(from_path, detail=True) subfolders = [ f["Key"].replace(from_path + "/", "") for f in files if f["StorageClass"] == "DIRECTORY" ] for sub in subfolders: from_sub_path = os.path.join(from_path, sub) path_to_create = os.path.join(to_path, sub) logger.debug(f"Creating local subfolder {to_path!r}") os.makedirs(path_to_create) _local_create_subfolders(from_sub_path, path_to_create, fs)
def ls(self, path, **kwargs): return S3FileSystem.ls(self, get_key(path), **kwargs)