Пример #1
0
    def test_single_file(self):
        """Parse a URL referring to a single file on Blob."""
        linked_bundle_path = parse_linked_bundle_url(
            "azfs://storageclwsdev0/bundles/uuid/contents.gz")
        self.assertEqual(linked_bundle_path.storage_type,
                         StorageType.AZURE_BLOB_STORAGE.value)
        self.assertEqual(linked_bundle_path.uses_beam, True)
        self.assertEqual(linked_bundle_path.bundle_path,
                         "azfs://storageclwsdev0/bundles/uuid/contents.gz")
        self.assertEqual(linked_bundle_path.is_archive, True)
        self.assertEqual(linked_bundle_path.is_archive_dir, False)
        self.assertEqual(linked_bundle_path.index_path,
                         "azfs://storageclwsdev0/bundles/uuid/index.sqlite")
        self.assertEqual(linked_bundle_path.archive_subpath, None)
        self.assertEqual(linked_bundle_path.bundle_uuid, "uuid")

        linked_bundle_path = parse_linked_bundle_url(
            "gs://codalabbucket1/uuid/contents.gz")
        self.assertEqual(linked_bundle_path.storage_type,
                         StorageType.GCS_STORAGE.value)
        self.assertEqual(linked_bundle_path.uses_beam, True)
        self.assertEqual(linked_bundle_path.bundle_path,
                         "gs://codalabbucket1/uuid/contents.gz")
        self.assertEqual(linked_bundle_path.is_archive, True)
        self.assertEqual(linked_bundle_path.is_archive_dir, False)
        self.assertEqual(linked_bundle_path.index_path,
                         "gs://codalabbucket1/uuid/index.sqlite")
        self.assertEqual(linked_bundle_path.archive_subpath, None)
        self.assertEqual(linked_bundle_path.bundle_uuid, "uuid")
Пример #2
0
 def test_container(self):
     """Parse a URL referring to a container or bucket."""
     linked_bundle_path = parse_linked_bundle_url("gs://codalab-test")
     self.assertEqual(linked_bundle_path.storage_type,
                      StorageType.GCS_STORAGE.value)
     linked_bundle_path = parse_linked_bundle_url(
         "azfs://devstoreaccount1/bundles")
     self.assertEqual(linked_bundle_path.storage_type,
                      StorageType.AZURE_BLOB_STORAGE.value)
Пример #3
0
def hash_directory(path, dirs_and_files=None):
    """
    Return the hash of the contents of the folder at the given path.
    This hash is independent of the path itself - if you were to move the
    directory and call get_hash again, you would get the same result.
    """
    if parse_linked_bundle_url(path).uses_beam:
        # On Azure Blob Storage, we just use the directory size for the hashed contents.
        return get_size(path)
    (directories, files) = dirs_and_files or recursive_ls(path)
    # Sort and then hash all directories and then compute a hash of the hashes.
    # This two-level hash is necessary so that the overall hash is unambiguous -
    # if we updated directory_hash with the directory names themselves, then
    # we'd be hashing the concatenation of these names, which could be generated
    # in multiple ways.
    directory_hash = hashlib.sha1()
    for directory in sorted(directories):
        relative_path = get_relative_path(path, directory)
        directory_hash.update(
            hashlib.sha1(relative_path.encode()).hexdigest().encode())
    # Use a similar two-level hashing scheme for all files, but incorporate a
    # hash of both the file name and contents.
    file_hash = hashlib.sha1()
    for file_name in sorted(files):
        relative_path = get_relative_path(path, file_name)
        file_hash.update(
            hashlib.sha1(relative_path.encode()).hexdigest().encode())
        file_hash.update(hash_file_contents(file_name).encode())
    # Return a hash of the two hashes.
    overall_hash = hashlib.sha1(directory_hash.hexdigest().encode())
    overall_hash.update(file_hash.hexdigest().encode())
    return overall_hash.hexdigest()
    def __init__(self, path: str):
        """Initialize TarSubdirStream.

        Args:
            path (str): Specified path of the subdirectory on Blob Storage. Must refer to a subdirectory path within a .tar.gz file.
        """
        from codalab.worker.file_util import OpenIndexedArchiveFile
        from codalab.worker.download_util import compute_target_info_blob_descendants_flat

        self.linked_bundle_path = parse_linked_bundle_url(path)

        # We add OpenIndexedArchiveFile to self._stack so that the context manager remains open and is exited
        # only in the method self.close().
        with ExitStack() as stack:
            self.tf = stack.enter_context(
                OpenIndexedArchiveFile(self.linked_bundle_path.bundle_path))
            self._stack = stack.pop_all()

        # Keep track of descendants of the specified subdirectory and the current descendant
        self.descendants = compute_target_info_blob_descendants_flat(path)
        self.current_desc = CurrentDescendant(desc=None,
                                              pos=0,
                                              finfo=EmptyFileInfo,
                                              tinfo=tarfile.TarInfo())

        # Buffer that stores the underlying bytes of the output tar archive
        self._buffer = BytesBuffer()

        # Output tar archive
        self.output = tarfile.open(fileobj=self._buffer, mode="w:")
Пример #5
0
 def create_file(self, contents=b"hello world"):
     """Creates a file on Blob (stored as a .gz with an index.sqlite index file) and returns its path."""
     bundle_uuid = str(random.random())
     bundle_path = f"azfs://storageclwsdev0/bundles/{bundle_uuid}/contents.gz"
     compressed_file = BytesIO(gzip.compress(contents))
     # TODO: Unify this code with code in BlobStorageUploader.write_fileobj().
     with FileSystems.create(
             bundle_path,
             compression_type=CompressionTypes.UNCOMPRESSED) as f:
         shutil.copyfileobj(compressed_file, f)
     compressed_file.seek(0)
     with tempfile.NamedTemporaryFile(suffix=".sqlite") as tmp_index_file:
         SQLiteIndexedTar(
             fileObject=compressed_file,
             tarFileName=
             "contents",  # If saving a single file as a .gz archive, this file can be accessed by the "/contents" entry in the index.
             writeIndex=True,
             clearIndexCache=True,
             indexFileName=tmp_index_file.name,
         )
         with FileSystems.create(
                 parse_linked_bundle_url(bundle_path).index_path,
                 compression_type=CompressionTypes.UNCOMPRESSED,
         ) as out_index_file, open(tmp_index_file.name, "rb") as tif:
             shutil.copyfileobj(tif, out_index_file)
     return bundle_uuid, bundle_path
Пример #6
0
def get_file_size(file_path):
    """
    Gets the size of the file, in bytes. If file is not found, raises a
    FileNotFoundError.
    """
    linked_bundle_path = parse_linked_bundle_url(file_path)
    if linked_bundle_path.uses_beam and linked_bundle_path.is_archive:
        # If no archive subpath is specified for a .tar.gz or .gz file, get the uncompressed size of the entire file,
        # or the compressed size of the entire directory.
        if not linked_bundle_path.archive_subpath:
            if linked_bundle_path.is_archive_dir:
                filesystem = FileSystems.get_filesystem(
                    linked_bundle_path.bundle_path)
                return filesystem.size(linked_bundle_path.bundle_path)
            else:
                with OpenFile(linked_bundle_path.bundle_path, 'rb') as fileobj:
                    fileobj.seek(0, os.SEEK_END)
                    return fileobj.tell()
        # If the archive file is a .tar.gz file on Azure, open the specified archive subpath within the archive.
        # If it is a .gz file on Azure, open the "/contents" entry, which represents the actual gzipped file.
        with OpenIndexedArchiveFile(linked_bundle_path.bundle_path) as tf:
            assert linked_bundle_path.is_archive_dir
            fpath = "/" + linked_bundle_path.archive_subpath
            finfo = tf.getFileInfo(fpath)
            if finfo is None:
                raise FileNotFoundError(fpath)
            return finfo.size
    if not get_path_exists(file_path):
        raise FileNotFoundError(file_path)
    # Local path
    return os.stat(file_path).st_size
Пример #7
0
 def write_fileobj(self, source_ext: str, source_fileobj: IO[bytes],
                   bundle_path: str, unpack_archive: bool):
     if unpack_archive:
         output_fileobj = zip_util.unpack_to_archive(
             source_ext, source_fileobj)
     else:
         output_fileobj = GzipStream(source_fileobj)
     # Write archive file.
     with FileSystems.create(
             bundle_path,
             compression_type=CompressionTypes.UNCOMPRESSED) as out:
         shutil.copyfileobj(output_fileobj, out)
     # Write index file to a temporary file, then write that file to Blob Storage.
     with FileSystems.open(bundle_path,
                           compression_type=CompressionTypes.UNCOMPRESSED
                           ) as ttf, tempfile.NamedTemporaryFile(
                               suffix=".sqlite") as tmp_index_file:
         SQLiteIndexedTar(
             fileObject=ttf,
             tarFileName=
             "contents",  # If saving a single file as a .gz archive, this file can be accessed by the "/contents" entry in the index.
             writeIndex=True,
             clearIndexCache=True,
             indexFileName=tmp_index_file.name,
         )
         with FileSystems.create(
                 parse_linked_bundle_url(bundle_path).index_path,
                 compression_type=CompressionTypes.UNCOMPRESSED,
         ) as out_index_file, open(tmp_index_file.name, "rb") as tif:
             shutil.copyfileobj(tif, out_index_file)
 def get_target_bypass_url(self, target, **kwargs):
     """
     Get SAS url with read permission. Used for bypass server downloading from Azure blob storage.
     """
     return parse_linked_bundle_url(
         self._get_target_path(target)).bundle_path_bypass_url(
             permission='r', **kwargs)
Пример #9
0
 def get_index_sas_token(self, path, **kwargs):
     """
     Get SAS token of the index file with read and write permission. Used for uploading.
     """
     return (parse_linked_bundle_url(path).index_path_bypass_url(
         permission='rw',
         **kwargs).split('?')[-1]  # Get SAS token from SAS url.
             )
Пример #10
0
 def get_bundle_sas_token(self, path, **kwargs):
     """
     Get SAS token with write permission. Used for bypass server uploading.
     """
     return (parse_linked_bundle_url(path).bundle_path_bypass_url(
         permission='rw',
         **kwargs).split('?')[-1]  # Get SAS token from SAS url.
             )
Пример #11
0
 def test_non_azure_file(self):
     """Should parse a non-Azure URL properly."""
     linked_bundle_path = parse_linked_bundle_url(
         "/tmp/storageclwsdev0/bundles/uuid/contents.txt")
     self.assertEqual(linked_bundle_path.storage_type,
                      StorageType.DISK_STORAGE.value)
     self.assertEqual(linked_bundle_path.uses_beam, False)
     self.assertEqual(linked_bundle_path.bundle_path,
                      "/tmp/storageclwsdev0/bundles/uuid/contents.txt")
     self.assertEqual(linked_bundle_path.is_archive, False)
Пример #12
0
def get_size(path, dirs_and_files=None):
    """
    Get the size (in bytes) of the file or directory at or under the given path.
    Does not include symlinked files and directories.
    """
    if parse_linked_bundle_url(path).uses_beam:
        return get_path_size(path)
    if os.path.islink(path) or not os.path.isdir(path):
        return os.lstat(path).st_size
    dirs_and_files = dirs_and_files or recursive_ls(path)
    return sum(
        os.lstat(path).st_size for path in itertools.chain(*dirs_and_files))
Пример #13
0
 def __init__(self, path: str):
     self.f = FileSystems.open(
         path, compression_type=CompressionTypes.UNCOMPRESSED)
     self.path = path
     with tempfile.NamedTemporaryFile(suffix=".sqlite",
                                      delete=False) as index_fileobj:
         self.index_file_name = index_fileobj.name
         shutil.copyfileobj(
             FileSystems.open(
                 parse_linked_bundle_url(self.path).index_path,
                 compression_type=CompressionTypes.UNCOMPRESSED,
             ),
             index_fileobj,
         )
Пример #14
0
    def create_directory(self):
        """Creates a directory (stored as a .tar.gz with an index.sqlite index file) and returns its path."""
        bundle_uuid = str(random.random())
        bundle_path = f"azfs://storageclwsdev0/bundles/{bundle_uuid}/contents.tar.gz"

        def writestr(tf, name, contents):
            tinfo = tarfile.TarInfo(name)
            tinfo.size = len(contents)
            tf.addfile(tinfo, BytesIO(contents.encode()))

        def writedir(tf, name):
            tinfo = tarfile.TarInfo(name)
            tinfo.type = tarfile.DIRTYPE
            tf.addfile(tinfo, BytesIO())

        # TODO: Unify this code with code in UploadManager.upload_to_bundle_store().
        with FileSystems.create(
            bundle_path, compression_type=CompressionTypes.UNCOMPRESSED
        ) as out, tempfile.NamedTemporaryFile(
            suffix=".tar.gz"
        ) as tmp_tar_file, tempfile.NamedTemporaryFile(
            suffix=".sqlite"
        ) as tmp_index_file:
            with tarfile.open(name=tmp_tar_file.name, mode="w:gz") as tf:
                # We need to create separate entries for each directory, as a regular
                # .tar.gz file would have.
                writestr(tf, "./README.md", "hello world")
                writedir(tf, "./src")
                writestr(tf, "./src/test.sh", "echo hi")
                writedir(tf, "./dist")
                writedir(tf, "./dist/a")
                writedir(tf, "./dist/a/b")
                writestr(tf, "./dist/a/b/test2.sh", "echo two")
            shutil.copyfileobj(tmp_tar_file, out)
            with open(tmp_tar_file.name, "rb") as ttf:
                SQLiteIndexedTar(
                    fileObject=ttf,
                    tarFileName="contents",
                    writeIndex=True,
                    clearIndexCache=True,
                    indexFilePath=tmp_index_file.name,
                )
            with FileSystems.create(
                parse_linked_bundle_url(bundle_path).index_path,
                compression_type=CompressionTypes.UNCOMPRESSED,
            ) as out_index_file, open(tmp_index_file.name, "rb") as tif:
                shutil.copyfileobj(tif, out_index_file)

        return bundle_uuid, bundle_path
Пример #15
0
 def _is_available_locally(self, target):
     """Returns whether the target is accessible from the current machine. Returns True
     if the target is on an accessible disk or if the target is on Azure Blob Storage.
     """
     file_path = self._get_target_path(target)
     if parse_linked_bundle_url(file_path).uses_beam:
         # Return True if the URL is in Azure Blob Storage.
         return True
     if self._bundle_model.get_bundle_state(target.bundle_uuid) in [
             State.RUNNING,
             State.PREPARING,
     ]:
         return self._bundle_model.get_bundle_worker(
             target.bundle_uuid)['shared_file_system']
     return True
Пример #16
0
 def test_directory_with_subpath(self):
     """Parse a URL referring to a subpath within an archived directory."""
     linked_bundle_path = parse_linked_bundle_url(
         "azfs://storageclwsdev0/bundles/uuid/contents.tar.gz/a/b.txt")
     self.assertEqual(linked_bundle_path.storage_type,
                      StorageType.AZURE_BLOB_STORAGE.value)
     self.assertEqual(
         linked_bundle_path.bundle_path,
         "azfs://storageclwsdev0/bundles/uuid/contents.tar.gz")
     self.assertEqual(linked_bundle_path.is_archive, True)
     self.assertEqual(linked_bundle_path.is_archive_dir, True)
     self.assertEqual(linked_bundle_path.index_path,
                      "azfs://storageclwsdev0/bundles/uuid/index.sqlite")
     self.assertEqual(linked_bundle_path.archive_subpath, "a/b.txt")
     self.assertEqual(linked_bundle_path.bundle_uuid, "uuid")
Пример #17
0
def _get_normalized_target_path(bundle_path: str, target: BundleTarget) -> str:
    if parse_linked_bundle_url(bundle_path).uses_beam:
        # On Azure, don't call os.path functions on the paths (which are azfs:// URLs).
        # We can just concatenate them together.
        return f"{bundle_path}/{target.subpath}" if target.subpath else bundle_path
    else:
        real_bundle_path = os.path.realpath(bundle_path)
        normalized_target_path = os.path.normpath(
            _get_target_path(real_bundle_path, target.subpath))

    error_path = _get_target_path(target.bundle_uuid, target.subpath)

    if not normalized_target_path.startswith(real_bundle_path):
        raise PathException('%s is not inside the bundle.' % error_path)

    return normalized_target_path
Пример #18
0
def get_target_info(bundle_path: str, target: BundleTarget, depth: int) -> TargetInfo:
    """
    Generates an index of the contents of the given path. The index contains
    the fields:
        name: Name of the entry.
        type: Type of the entry, one of 'file', 'directory' or 'link'.
        size: Size of the entry.
        perm: Permissions of the entry.
        link: If type is 'link', where the symbolic link points to.
        contents: If type is 'directory', a list of entries for the contents.

    For the top level entry, also contains resolved_target, a BundleTarget:

    Any entries more than depth levels deep are filtered out. Depth 0, for
    example, means only the top-level entry is included, and no contents. Depth
    1 means the contents of the top-level are included, but nothing deeper.

    If the given path does not exist, raises PathException.

    If reading the given path is not secure, raises a PathException.
    """
    final_path = _get_normalized_target_path(bundle_path, target)
    if parse_linked_bundle_url(final_path).uses_beam:
        # If the target is on Blob Storage, use a Blob-specific method
        # to get the target info.
        try:
            info = _compute_target_info_blob(final_path, depth)
        except Exception:
            logging.error(
                "Path '{}' in bundle {} not found: {}".format(
                    target.subpath, target.bundle_uuid, traceback.format_exc()
                )
            )
            raise PathException(
                "Path '{}' in bundle {} not found".format(target.subpath, target.bundle_uuid)
            )
    else:
        if not os.path.islink(final_path) and not os.path.exists(final_path):
            raise PathException(
                "Path '{}' in bundle {} not found".format(target.subpath, target.bundle_uuid)
            )
        info = _compute_target_info_local(final_path, depth)

    info['resolved_target'] = target
    return info
Пример #19
0
 def __enter__(self) -> IO[bytes]:
     linked_bundle_path = parse_linked_bundle_url(self.path)
     if linked_bundle_path.uses_beam and linked_bundle_path.is_archive:
         # Stream an entire, single .gz file from Blob Storage. This is gzipped by default,
         # so if the user requested a gzipped version of the entire file, just read and return it.
         if not linked_bundle_path.is_archive_dir and self.gzipped:
             return FileSystems.open(
                 self.path, compression_type=CompressionTypes.UNCOMPRESSED)
         # Stream an entire, single .tar.gz file from Blob Storage. This is gzipped by default,
         # and directories are always gzipped, so just read and return it.
         if linked_bundle_path.is_archive_dir and not linked_bundle_path.archive_subpath:
             if not self.gzipped:
                 raise IOError("Directories must be gzipped.")
             return FileSystems.open(
                 self.path, compression_type=CompressionTypes.UNCOMPRESSED)
         # If a file path is specified within an archive file on Blob Storage, open the specified path within the archive.
         with OpenIndexedArchiveFile(linked_bundle_path.bundle_path) as tf:
             isdir = lambda finfo: stat.S_ISDIR(finfo.mode)
             # If the archive file is a .tar.gz file, open the specified archive subpath within the archive.
             # If it is a .gz file, open the "/contents" entry, which represents the actual gzipped file.
             fpath = ("/" + linked_bundle_path.archive_subpath
                      if linked_bundle_path.is_archive_dir else "/contents")
             finfo = cast(FileInfo, tf.getFileInfo(fpath))
             if finfo is None:
                 raise FileNotFoundError(fpath)
             if isdir(finfo):
                 # Stream a directory from within the archive
                 if not self.gzipped:
                     raise IOError("Directories must be gzipped.")
                 return GzipStream(TarSubdirStream(self.path))
             else:
                 # Stream a single file from within the archive
                 fs = TarFileStream(tf, finfo)
                 return GzipStream(fs) if self.gzipped else fs
     else:
         # Stream a directory or file from disk storage.
         if os.path.isdir(self.path):
             if not self.gzipped:
                 raise IOError("Directories must be gzipped.")
             return tar_gzip_directory(self.path)
         if self.gzipped:
             raise IOError(
                 "Gzipping local files from disk from OpenFile is not yet supported. Please use file_util.gzip_file instead."
             )
         return open(self.path, self.mode)
Пример #20
0
def gzip_file(file_path: str) -> IO[bytes]:
    """
    Returns a file-like object containing the gzipped version of the given file.
    Note: For right now, it's important for gzip to run in a separate process,
    otherwise things on CodaLab grind to a halt!
    """

    if parse_linked_bundle_url(file_path).uses_beam:
        try:
            with OpenFile(file_path, gzipped=True) as file_path_obj:
                return file_path_obj
        except Exception as e:
            raise IOError(e)

    args = ['gzip', '-c', '-n', file_path]
    try:
        proc = subprocess.Popen(args, stdout=subprocess.PIPE)
        if proc.stdout is None:
            raise IOError("Stdout is empty")
        return proc.stdout
    except subprocess.CalledProcessError as e:
        raise IOError(e.output)
Пример #21
0
def get_path_size(path, exclude_names=[], ignore_nonexistent_path=False):
    """
    Returns the size of the contents of the given path, in bytes.

    If path is a directory, any directory entries in exclude_names will be
    ignored.

    If ignore_nonexistent_path is True and the input path is nonexistent, the value
    0 is returned. Else, an exception is raised (FileNotFoundError).
    """
    if parse_linked_bundle_url(path).uses_beam:
        # On Azure, use Apache Beam methods, not native os methods,
        # to get the path size.

        # Get the size of the specified path (file / directory).
        # This will only get the right size of files, not of directories (but we don't
        # store any bundles as directories on Azure).
        return get_file_size(path)

    try:
        result = os.lstat(path).st_size
    except FileNotFoundError:
        if ignore_nonexistent_path:
            # If we are to ignore nonexistent paths, return the size of this path as 0
            return 0
        # Raise the FileNotFoundError
        raise
    if not os.path.islink(path) and os.path.isdir(path):
        for child in os.listdir(path):
            if child not in exclude_names:
                try:
                    full_child_path = os.path.join(path, child)
                except UnicodeDecodeError:
                    full_child_path = os.path.join(path.decode('utf-8'),
                                                   child.decode('utf-8'))
                result += get_path_size(full_child_path,
                                        ignore_nonexistent_path=True)
    return result
Пример #22
0
def remove(path):
    """
    Remove the given path, whether it is a directory, file, or link.
    """
    if parse_linked_bundle_url(path).uses_beam:
        from apache_beam.io.filesystems import FileSystems

        if not FileSystems.exists(path):
            FileSystems.delete([path])
        return
    check_isvalid(path, 'remove')
    set_write_permissions(path)  # Allow permissions
    if os.path.islink(path):
        os.unlink(path)
    elif os.path.isdir(path):
        try:
            shutil.rmtree(path)
        except shutil.Error:
            pass
    else:
        os.remove(path)
    if os.path.exists(path):
        print('Failed to remove %s' % path)
Пример #23
0
def _compute_target_info_blob(
    path: str, depth: Union[int, float], return_generators=False
) -> TargetInfo:
    """Computes target info for a file that is externalized on Blob Storage, meaning
    that it's contained within an indexed archive file.

    Args:
        path (str): The path that refers to the specified target.
        depth (Union[int, float]): Depth until which directory contents are resolved.
        return_generators (bool, optional): If set to True, the 'contents' key of directories is equal to a generator instead of a list. Defaults to False.

    Raises:
        PathException: Path not found or invalid.

    Returns:
        TargetInfo: Target info of specified path.
    """

    linked_bundle_path = parse_linked_bundle_url(path)
    if not FileSystems.exists(linked_bundle_path.bundle_path):
        raise PathException(linked_bundle_path.bundle_path)
    if not linked_bundle_path.is_archive:
        # Single file
        raise PathException(
            "Single files on Blob Storage are not supported; only a path within an archive file is supported."
        )

    # process_contents is used to process the value of the 'contents' key (which is a generator) before it is returned.
    # If return_generators is False, it resolves the given generator into a list; otherwise, it just returns
    # the generator unchanged.
    process_contents = list if return_generators is False else lambda x: x

    with OpenIndexedArchiveFile(linked_bundle_path.bundle_path) as tf:
        islink = lambda finfo: stat.S_ISLNK(finfo.mode)
        readlink = lambda finfo: finfo.linkname
        isfile = lambda finfo: not stat.S_ISDIR(finfo.mode)
        isdir = lambda finfo: stat.S_ISDIR(finfo.mode)
        listdir = lambda path: cast(Dict[str, FileInfo], tf.listDir(path) or {})

        def _get_info(path: str, depth: Union[int, float]) -> TargetInfo:
            """This function is called to get the target info of the specified path.
            If the specified path is a directory and additional depth is requested, this
            function is recursively called to retrieve the target info of files within
            the directory, much like _compute_target_info_local.
            """
            if not path.startswith("/"):
                path = "/" + path
            finfo = cast(FileInfo, tf.getFileInfo(path))
            if finfo is None:
                # Not found
                raise PathException("File not found.")
            result: TargetInfo = {
                'name': os.path.basename(path),  # get last part of path
                'size': finfo.size,
                'perm': finfo.mode & 0o777,
                'type': '',
            }
            if islink(finfo):
                result['type'] = 'link'
                result['link'] = readlink(finfo)
            elif isfile(finfo):
                result['type'] = 'file'
            elif isdir(finfo):
                result['type'] = 'directory'
                if depth > 0:
                    result['contents'] = process_contents(
                        _get_info(path + "/" + file_name, depth - 1)
                        for file_name in listdir(path)
                        if file_name != "."
                    )
            return result

        if not linked_bundle_path.is_archive_dir:
            # Return the contents of the single .gz file.
            # The entry returned by ratarmount for a single .gz file is not technically part of a tar archive
            # and has a name hardcoded as "contents," so we modify the type, name, and permissions of
            # the output accordingly.
            return cast(
                TargetInfo,
                dict(
                    _get_info("/contents", depth),
                    type="file",
                    name=linked_bundle_path.bundle_uuid,
                    perm=0o755,
                ),
            )
        if linked_bundle_path.archive_subpath:
            # Return the contents of a subpath within a directory.
            return _get_info(linked_bundle_path.archive_subpath, depth)
        else:
            # No subpath, return the entire directory with the bundle
            # contents in it. The permissions of this directory
            # cannot be set by the user (the user can only set permissions
            # of files *within* this directory that are part of the bundle
            # itself), so we just return a placeholder value of 0o755
            # for this directory's permissions.
            file = FileSystems.match([path])[0].metadata_list[0]
            result: TargetInfo = {
                'name': linked_bundle_path.bundle_uuid,
                'type': 'directory',
                'size': file.size_in_bytes,
                'perm': 0o755,
            }
            if depth > 0:
                result['contents'] = process_contents(
                    _get_info(file_name, depth - 1)
                    for file_name in listdir("/")
                    if file_name != "."
                )
            return result
Пример #24
0
 def get_bundle_index_url(self, path, **kwargs):
     return parse_linked_bundle_url(path).index_path_bypass_url(**kwargs)
Пример #25
0
 def get_bundle_signed_url(self, path, **kwargs):
     """
     Get signed url for the bundle path
     """
     return parse_linked_bundle_url(path).bundle_path_bypass_url(**kwargs)
Пример #26
0
    def write_fileobj(
        self,
        source_ext: str,
        source_fileobj: IO[bytes],
        bundle_path: str,
        unpack_archive: bool,
        bundle_conn_str=None,
        index_conn_str=None,
        progress_callback=None,
    ):
        if unpack_archive:
            output_fileobj = zip_util.unpack_to_archive(
                source_ext, source_fileobj)
        else:
            output_fileobj = GzipStream(source_fileobj)

        # Write archive file.
        if bundle_conn_str is not None:
            conn_str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING', '')
            os.environ['AZURE_STORAGE_CONNECTION_STRING'] = bundle_conn_str
        try:
            bytes_uploaded = 0
            CHUNK_SIZE = 16 * 1024
            with FileSystems.create(
                    bundle_path,
                    compression_type=CompressionTypes.UNCOMPRESSED) as out:
                while True:
                    to_send = output_fileobj.read(CHUNK_SIZE)
                    if not to_send:
                        break
                    out.write(to_send)
                    bytes_uploaded += len(to_send)
                    if progress_callback is not None:
                        should_resume = progress_callback(bytes_uploaded)
                        if not should_resume:
                            raise Exception('Upload aborted by client')

            with FileSystems.open(
                    bundle_path, compression_type=CompressionTypes.UNCOMPRESSED
            ) as ttf, tempfile.NamedTemporaryFile(
                    suffix=".sqlite") as tmp_index_file:
                SQLiteIndexedTar(
                    fileObject=ttf,
                    tarFileName=
                    "contents",  # If saving a single file as a .gz archive, this file can be accessed by the "/contents" entry in the index.
                    writeIndex=True,
                    clearIndexCache=True,
                    indexFilePath=tmp_index_file.name,
                )
                if bundle_conn_str is not None:
                    os.environ[
                        'AZURE_STORAGE_CONNECTION_STRING'] = index_conn_str
                with FileSystems.create(
                        parse_linked_bundle_url(bundle_path).index_path,
                        compression_type=CompressionTypes.UNCOMPRESSED,
                ) as out_index_file, open(tmp_index_file.name, "rb") as tif:
                    while True:
                        to_send = tif.read(CHUNK_SIZE)
                        if not to_send:
                            break
                        out_index_file.write(to_send)
                        bytes_uploaded += len(to_send)
                        if progress_callback is not None:
                            should_resume = progress_callback(bytes_uploaded)
                            if not should_resume:
                                raise Exception('Upload aborted by client')
        except Exception as err:
            raise err
        finally:  # restore the origin connection string
            if bundle_conn_str is not None:
                os.environ[
                    'AZURE_STORAGE_CONNECTION_STRING'] = conn_str if conn_str != '' else None  # type: ignore