def test_joining_absolute_paths(): absolute_one = AbsolutePath("/an/absolute/path.ext") absolute_two = AbsolutePath("/prefix/absolute/path") relative_one = absolute_one.as_relative_path() joined_1 = relative_one.prepend(absolute_two) joined_2 = absolute_two.append(relative_one) assert joined_1 == joined_2
class GCStorageAdapter(IOAdapter): _file_list: Optional[FileList] _bucket: Optional[Any] def __init__(self, project: str = "brain-deepviz", bucket: str = "lucid-flow") -> None: self.project_name = project self.bucket_name = bucket self.tempdir = AbsolutePath(mkdtemp()) self._file_list = None self._bucket = None @property def bucket(self) -> Any: if not self._bucket: self._client = storage.Client(project=self.project_name) self._bucket = self._client.bucket(self.bucket_name) return self._bucket @property def file_list(self) -> FileList: if not self._file_list: self._file_list = FileList(project=self.project_name, bucket=self.bucket_name) self._file_list._get_all_gcs_files() return self._file_list def normpath(self, path: str) -> AbsolutePath: # logging.debug(f"normpathing: {path}") if path.startswith("gs://"): path = path[5:] # logging.debug(f"removed gs scheme: {path}") if path.startswith(self.bucket_name): path = path[len(self.bucket_name):] # logging.debug(f"removed bucket: {path}") # if path.startswith("/"): # path = path[1:] # logging.debug(f"removed leading slash: {path}") return AbsolutePath(path) @contextmanager def _reading(self, path: AbsolutePath, mode: str = "r+b") -> IO: local_path = self._download(path) reading_file = localfs_open(local_path, mode=mode) yield reading_file reading_file.close() @contextmanager def _writing(self, path: AbsolutePath, mode: str = "w+b") -> IO: blob = storage.blob.Blob(path.as_relative_path(), self.bucket) local_path = self.tempdir.append(path.as_relative_path()) makedirs(dirname(local_path), exist_ok=True) writing_file = localfs_open(local_path, mode=mode) yield writing_file writing_file.close() blob.upload_from_filename(local_path) def _makedirs(self, path: str) -> None: pass def _glob(self, glob_path: AbsolutePath) -> List[AbsolutePath]: fields = "items/name,items/updated,nextPageToken" matched_paths: List[AbsolutePath] = [] # GCS returns folders iff a trailing slash is specified, so we try both: if glob_path.endswith("/"): other_path = AbsolutePath(glob_path[:-1]) else: other_path = AbsolutePath(glob_path + "/") for glob_string in [glob_path, other_path]: # prefix = glob_string.split('*')[0] # == entire string if no '*' found # bucket_listing = self.bucket.list_blobs(fields=fields, prefix=prefix) # file_paths = [blob.name for blob in bucket_listing] # file_paths = self.file_list.glob(glob_string) # matched_paths += fnmatch.filter(file_paths, glob_string) matched_paths += self.file_list.glob(glob_string) # matched_paths = list(sorted(set(matched_paths))) # should already be unique return matched_paths def _exist(self, paths: List[AbsolutePath]) -> List[bool]: return [self.file_list.exists(path) for path in paths] def _download(self, path: AbsolutePath) -> AbsolutePath: local_path = self.tempdir.append(path.as_relative_path()) makedirs(dirname(local_path), exist_ok=True) blob = storage.blob.Blob(path.as_relative_path(), self.bucket) blob.download_to_filename(local_path) return local_path def _upload(self, local_path: str, remote_path: RelativePath) -> None: assert not remote_path.startswith("/") blob = storage.blob.Blob(remote_path, self.bucket) blob.upload_from_filename(local_path)