def list_artifacts(self, path=None): from azure.storage.blob.models import BlobPrefix (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri) dest_path = artifact_path if path: dest_path = build_path(dest_path, path) infos = [] prefix = dest_path + "/" marker = None # Used to make next list request if this one exceeded the result limit while True: results = self.client.list_blobs(container, prefix=prefix, delimiter='/', marker=marker) for r in results: if isinstance( r, BlobPrefix ): # This is a prefix for items in a subdirectory subdir = r.name[len(artifact_path) + 1:] if subdir.endswith("/"): subdir = subdir[:-1] infos.append(FileInfo(subdir, True, None)) else: # Just a plain old blob file_name = r.name[len(artifact_path) + 1:] infos.append( FileInfo(file_name, False, r.properties.content_length)) # Check whether a new marker is returned, meaning we have to make another request if results.next_marker: marker = results.next_marker else: break return sorted(infos, key=lambda f: f.path)
def get_file_info(path, rel_path): """ Returns file meta data : location, size, ... etc :param path: Path to artifact :return: `FileInfo` object """ if is_directory(path): return FileInfo(rel_path, True, None) else: return FileInfo(rel_path, False, os.path.getsize(path))
def list_artifacts(self, path=None): artifact_dir = self.path list_dir = os.path.join(artifact_dir, path) if path else artifact_dir artifact_files = self.sftp.listdir(list_dir) infos = [] for file_name in artifact_files: file_path = file_name if path is None else os.path.join( path, file_name) full_file_path = os.path.join(list_dir, file_name) if self.sftp.isdir(full_file_path): infos.append(FileInfo(file_path, True, None)) else: infos.append( FileInfo(file_path, False, self.sftp.stat(full_file_path).st_size)) return infos
def test_creation_and_hydration(self): path = random_str(random_int(10, 50)) is_dir = random_int(10, 2500) % 2 == 0 size_in_bytes = random_int(1, 10000) fi1 = FileInfo(path, is_dir, size_in_bytes) self._check(fi1, path, is_dir, size_in_bytes) as_dict = {"path": path, "is_dir": is_dir, "file_size": size_in_bytes} self.assertEqual(dict(fi1), as_dict) proto = fi1.to_proto() fi2 = FileInfo.from_proto(proto) self._check(fi2, path, is_dir, size_in_bytes) fi3 = FileInfo.from_dictionary(as_dict) self._check(fi3, path, is_dir, size_in_bytes)
def list_artifacts(self, path=None): with self.get_ftp_client() as ftp: artifact_dir = self.path list_dir = os.path.join(artifact_dir, path) if path else artifact_dir artifact_files = ftp.nlst(list_dir) infos = [] for file_name in artifact_files: file_path = file_name if path is None else os.path.join( path, file_name) full_file_path = os.path.join(list_dir, file_name) if self._is_dir(full_file_path): infos.append(FileInfo(file_path, True, None)) else: size = self._size(full_file_path) infos.append(FileInfo(file_path, False, size)) return infos
def _list_folders(self, bkt, prefix): results = bkt.list_blobs(prefix=prefix, delimiter="/") dir_paths = set() for page in results.pages: dir_paths.update(page.prefixes) return [ FileInfo(path[len(prefix):-1], True, None) for path in dir_paths ]
def list_artifacts(self, path=None): with self.get_ftp_client() as ftp: artifact_dir = self.path list_dir = posixpath.join(artifact_dir, path) if path else artifact_dir if not self._is_dir(ftp, list_dir): return [] artifact_files = ftp.nlst(list_dir) artifact_files = list(filter(lambda x: x != "." and x != "..", artifact_files)) # Make sure artifact_files is a list of file names because ftp.nlst # may return absolute paths. artifact_files = [os.path.basename(f) for f in artifact_files] infos = [] for file_name in artifact_files: file_path = file_name if path is None else posixpath.join(path, file_name) full_file_path = posixpath.join(list_dir, file_name) if self._is_dir(ftp, full_file_path): infos.append(FileInfo(file_path, True, None)) else: size = self._size(ftp, full_file_path) infos.append(FileInfo(file_path, False, size)) return infos
def list_artifacts(self, path=None): (bucket, artifact_path) = self.parse_s3_uri(self.artifact_uri) dest_path = artifact_path if path: dest_path = build_path(dest_path, path) infos = [] prefix = dest_path + "/" paginator = boto3.client('s3').get_paginator("list_objects_v2") results = paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/') for result in results: # Subdirectories will be listed as "common prefixes" due to the way we made the request for obj in result.get("CommonPrefixes", []): subdir = obj.get("Prefix")[len(artifact_path)+1:] if subdir.endswith("/"): subdir = subdir[:-1] infos.append(FileInfo(subdir, True, None)) # Objects listed directly will be files for obj in result.get('Contents', []): name = obj.get("Key")[len(artifact_path)+1:] size = int(obj.get('Size')) infos.append(FileInfo(name, False, size)) return sorted(infos, key=lambda f: f.path)
def list_artifacts(self, path=None): (bucket, artifact_path) = self.parse_gcs_uri(self.artifact_uri) dest_path = artifact_path if path: dest_path = build_path(dest_path, path) prefix = dest_path + "/" bkt = self.gcs.Client().get_bucket(bucket) infos = self._list_folders(bkt, prefix, artifact_path) results = bkt.list_blobs(prefix=prefix, delimiter="/") for result in results: blob_path = result.name[len(artifact_path) + 1:] infos.append(FileInfo(blob_path, False, result.size)) return sorted(infos, key=lambda f: f.path)
def list_artifacts(self, path=None): (bucket, artifact_path) = self.parse_gcs_uri(self.artifact_uri) dest_path = artifact_path if path: dest_path = build_path(dest_path, path) infos = [] prefix = dest_path + "/" results = self.gcs.Client().get_bucket(bucket).list_blobs( prefix=prefix) for result in results: is_dir = result.name.endswith('/') if is_dir: blob_path = path[:-1] else: blob_path = result.name[len(artifact_path) + 1:] infos.append(FileInfo(blob_path, is_dir, result.size)) return sorted(infos, key=lambda f: f.path)
def list_artifacts(self, path=None): if path: dbfs_list_json = {'path': self._get_dbfs_path(path)} else: dbfs_list_json = {'path': self._get_dbfs_path('')} response = _dbfs_list_api(dbfs_list_json, self.http_request_kwargs) json_response = json.loads(response.text) # /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories. infos = [] artifact_prefix = strip_prefix(self.artifact_uri, 'dbfs:') if json_response.get('error_code', None) == RESOURCE_DOES_NOT_EXIST: return [] dbfs_files = json_response.get('files', []) for dbfs_file in dbfs_files: is_dir = dbfs_file['is_dir'] artifact_size = None if is_dir else dbfs_file['file_size'] stripped_path = strip_prefix(dbfs_file['path'], artifact_prefix + '/') infos.append(FileInfo(stripped_path, is_dir, artifact_size)) return sorted(infos, key=lambda f: f.path)