def list_artifacts(self, path=None):
     from azure.storage.blob.models import BlobPrefix
     (container, _, artifact_path) = self.parse_wasbs_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = build_path(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     marker = None  # Used to make next list request if this one exceeded the result limit
     while True:
         results = self.client.list_blobs(container,
                                          prefix=prefix,
                                          delimiter='/',
                                          marker=marker)
         for r in results:
             if isinstance(
                     r, BlobPrefix
             ):  # This is a prefix for items in a subdirectory
                 subdir = r.name[len(artifact_path) + 1:]
                 if subdir.endswith("/"):
                     subdir = subdir[:-1]
                 infos.append(FileInfo(subdir, True, None))
             else:  # Just a plain old blob
                 file_name = r.name[len(artifact_path) + 1:]
                 infos.append(
                     FileInfo(file_name, False,
                              r.properties.content_length))
         # Check whether a new marker is returned, meaning we have to make another request
         if results.next_marker:
             marker = results.next_marker
         else:
             break
     return sorted(infos, key=lambda f: f.path)
示例#2
0
def get_file_info(path, rel_path):
    """
    Returns file meta data : location, size, ... etc

    :param path: Path to artifact

    :return: `FileInfo` object
    """
    if is_directory(path):
        return FileInfo(rel_path, True, None)
    else:
        return FileInfo(rel_path, False, os.path.getsize(path))
示例#3
0
 def list_artifacts(self, path=None):
     artifact_dir = self.path
     list_dir = os.path.join(artifact_dir, path) if path else artifact_dir
     artifact_files = self.sftp.listdir(list_dir)
     infos = []
     for file_name in artifact_files:
         file_path = file_name if path is None else os.path.join(
             path, file_name)
         full_file_path = os.path.join(list_dir, file_name)
         if self.sftp.isdir(full_file_path):
             infos.append(FileInfo(file_path, True, None))
         else:
             infos.append(
                 FileInfo(file_path, False,
                          self.sftp.stat(full_file_path).st_size))
     return infos
示例#4
0
    def test_creation_and_hydration(self):
        path = random_str(random_int(10, 50))
        is_dir = random_int(10, 2500) % 2 == 0
        size_in_bytes = random_int(1, 10000)
        fi1 = FileInfo(path, is_dir, size_in_bytes)
        self._check(fi1, path, is_dir, size_in_bytes)

        as_dict = {"path": path, "is_dir": is_dir, "file_size": size_in_bytes}
        self.assertEqual(dict(fi1), as_dict)

        proto = fi1.to_proto()
        fi2 = FileInfo.from_proto(proto)
        self._check(fi2, path, is_dir, size_in_bytes)

        fi3 = FileInfo.from_dictionary(as_dict)
        self._check(fi3, path, is_dir, size_in_bytes)
示例#5
0
 def list_artifacts(self, path=None):
     with self.get_ftp_client() as ftp:
         artifact_dir = self.path
         list_dir = os.path.join(artifact_dir,
                                 path) if path else artifact_dir
         artifact_files = ftp.nlst(list_dir)
         infos = []
         for file_name in artifact_files:
             file_path = file_name if path is None else os.path.join(
                 path, file_name)
             full_file_path = os.path.join(list_dir, file_name)
             if self._is_dir(full_file_path):
                 infos.append(FileInfo(file_path, True, None))
             else:
                 size = self._size(full_file_path)
                 infos.append(FileInfo(file_path, False, size))
     return infos
示例#6
0
    def _list_folders(self, bkt, prefix):
        results = bkt.list_blobs(prefix=prefix, delimiter="/")
        dir_paths = set()
        for page in results.pages:
            dir_paths.update(page.prefixes)

        return [
            FileInfo(path[len(prefix):-1], True, None) for path in dir_paths
        ]
示例#7
0
 def list_artifacts(self, path=None):
     with self.get_ftp_client() as ftp:
         artifact_dir = self.path
         list_dir = posixpath.join(artifact_dir, path) if path else artifact_dir
         if not self._is_dir(ftp, list_dir):
             return []
         artifact_files = ftp.nlst(list_dir)
         artifact_files = list(filter(lambda x: x != "." and x != "..", artifact_files))
         # Make sure artifact_files is a list of file names because ftp.nlst
         # may return absolute paths.
         artifact_files = [os.path.basename(f) for f in artifact_files]
         infos = []
         for file_name in artifact_files:
             file_path = file_name if path is None else posixpath.join(path, file_name)
             full_file_path = posixpath.join(list_dir, file_name)
             if self._is_dir(ftp, full_file_path):
                 infos.append(FileInfo(file_path, True, None))
             else:
                 size = self._size(ftp, full_file_path)
                 infos.append(FileInfo(file_path, False, size))
     return infos
示例#8
0
 def list_artifacts(self, path=None):
     (bucket, artifact_path) = self.parse_s3_uri(self.artifact_uri)
     dest_path = artifact_path
     if path:
         dest_path = build_path(dest_path, path)
     infos = []
     prefix = dest_path + "/"
     paginator = boto3.client('s3').get_paginator("list_objects_v2")
     results = paginator.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/')
     for result in results:
         # Subdirectories will be listed as "common prefixes" due to the way we made the request
         for obj in result.get("CommonPrefixes", []):
             subdir = obj.get("Prefix")[len(artifact_path)+1:]
             if subdir.endswith("/"):
                 subdir = subdir[:-1]
             infos.append(FileInfo(subdir, True, None))
         # Objects listed directly will be files
         for obj in result.get('Contents', []):
             name = obj.get("Key")[len(artifact_path)+1:]
             size = int(obj.get('Size'))
             infos.append(FileInfo(name, False, size))
     return sorted(infos, key=lambda f: f.path)
示例#9
0
    def list_artifacts(self, path=None):
        (bucket, artifact_path) = self.parse_gcs_uri(self.artifact_uri)
        dest_path = artifact_path
        if path:
            dest_path = build_path(dest_path, path)
        prefix = dest_path + "/"

        bkt = self.gcs.Client().get_bucket(bucket)

        infos = self._list_folders(bkt, prefix, artifact_path)

        results = bkt.list_blobs(prefix=prefix, delimiter="/")
        for result in results:
            blob_path = result.name[len(artifact_path) + 1:]
            infos.append(FileInfo(blob_path, False, result.size))

        return sorted(infos, key=lambda f: f.path)
示例#10
0
    def list_artifacts(self, path=None):
        (bucket, artifact_path) = self.parse_gcs_uri(self.artifact_uri)
        dest_path = artifact_path
        if path:
            dest_path = build_path(dest_path, path)
        infos = []
        prefix = dest_path + "/"

        results = self.gcs.Client().get_bucket(bucket).list_blobs(
            prefix=prefix)
        for result in results:
            is_dir = result.name.endswith('/')
            if is_dir:
                blob_path = path[:-1]
            else:
                blob_path = result.name[len(artifact_path) + 1:]
            infos.append(FileInfo(blob_path, is_dir, result.size))
        return sorted(infos, key=lambda f: f.path)
示例#11
0
 def list_artifacts(self, path=None):
     if path:
         dbfs_list_json = {'path': self._get_dbfs_path(path)}
     else:
         dbfs_list_json = {'path': self._get_dbfs_path('')}
     response = _dbfs_list_api(dbfs_list_json, self.http_request_kwargs)
     json_response = json.loads(response.text)
     # /api/2.0/dbfs/list will not have the 'files' key in the response for empty directories.
     infos = []
     artifact_prefix = strip_prefix(self.artifact_uri, 'dbfs:')
     if json_response.get('error_code', None) == RESOURCE_DOES_NOT_EXIST:
         return []
     dbfs_files = json_response.get('files', [])
     for dbfs_file in dbfs_files:
         is_dir = dbfs_file['is_dir']
         artifact_size = None if is_dir else dbfs_file['file_size']
         stripped_path = strip_prefix(dbfs_file['path'], artifact_prefix + '/')
         infos.append(FileInfo(stripped_path, is_dir, artifact_size))
     return sorted(infos, key=lambda f: f.path)