Пример #1
0
 def write_fileobj(self, source_ext: str, source_fileobj: IO[bytes],
                   bundle_path: str, unpack_archive: bool):
     if unpack_archive:
         zip_util.unpack(source_ext, source_fileobj, bundle_path)
     else:
         with open(bundle_path, 'wb') as out:
             shutil.copyfileobj(cast(IO, source_fileobj), out)
Пример #2
0
 def _unpack_file(self, source_path, dest_path, remove_source,
                  simplify_archive):
     zip_util.unpack(zip_util.get_archive_ext(source_path), source_path,
                     dest_path)
     if remove_source:
         path_util.remove(source_path)
     if simplify_archive:
         self._simplify_archive(dest_path)
Пример #3
0
 def write_fileobj(
     self,
     source_ext: str,
     source_fileobj: IO[bytes],
     bundle_path: str,
     unpack_archive: bool,
     bundle_conn_str=None,
     index_conn_str=None,
     progress_callback=None,
 ):
     if unpack_archive:
         zip_util.unpack(source_ext, source_fileobj, bundle_path)
     else:
         with open(bundle_path, 'wb') as out:
             shutil.copyfileobj(cast(IO, source_fileobj), out)
Пример #4
0
 def test_unpack_to_archive_single_compressed_file(self):
     """Unpack a single compressed file to a .gz file."""
     for (compress_fn, extension) in [
         (bz2.compress, ".bz2"),
         (gzip.compress, ".gz"),
     ]:
         with self.subTest(
                 extension=extension
         ), tempfile.TemporaryDirectory() as tmpdir, open(
                 os.path.join(tmpdir, "file.txt"),
                 "wb") as f, tempfile.TemporaryDirectory() as dest_path:
             f.write(compress_fn(SAMPLE_CONTENTS))
             f.flush()
             archive_fileobj = unpack_to_archive(
                 extension, open(os.path.join(tmpdir, "file.txt"), "rb"))
             unpack(".gz", archive_fileobj, os.path.join(dest_path, "out"))
             self.assertEqual(
                 SAMPLE_CONTENTS,
                 open(os.path.join(dest_path, "out"), "rb").read())
Пример #5
0
 def test_unpack_single_archive(self):
     """Unpack a single archive."""
     for (compress_fn, extension) in [
         (tar_gzip_directory, ".tar.gz"),
         (tar_bz2_directory, ".tar.bz2"),
         (zip_directory, ".zip"),
     ]:
         with self.subTest(
                 extension=extension
         ), tempfile.TemporaryDirectory() as tmpdir, open(
                 os.path.join(tmpdir, "file.txt"),
                 "wb") as f, tempfile.TemporaryDirectory() as dest_path:
             f.write(SAMPLE_CONTENTS)
             f.flush()
             unpack(extension, compress_fn(tmpdir),
                    os.path.join(dest_path, "out"))
             self.assertEqual(os.listdir(tmpdir), ["file.txt"])
             self.assertEqual(os.listdir(os.path.join(dest_path, "out")),
                              ["file.txt"])
             self.assertEqual(
                 open(os.path.join(dest_path, "out", "file.txt"),
                      "rb").read(), SAMPLE_CONTENTS)
Пример #6
0
    def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources):
        '''
        |sources|: specifies the locations of the contents to upload.  Each element is either a URL or a local path.
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o)
        |git|: for URL, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |remove_sources|: remove |sources|.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.

        Install the contents of the directory at |source| into
        DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        '''
        to_delete = []

        # Create temporary directory as a staging area and put everything there.
        temp_path = tempfile.mkdtemp('-bundle_store_upload')
        temp_subpaths = []
        for source in sources:
            # Where to save |source| to (might change this value if we unpack).
            temp_subpath = os.path.join(temp_path, os.path.basename(source))
            if remove_sources:
                to_delete.append(source)
            source_unpack = unpack and zip_util.path_is_archive(source)

            if path_util.path_is_url(source):
                # Download the URL.
                print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path))
                if git:
                    file_util.git_clone(source, temp_subpath)
                else:
                    file_util.download_url(source, temp_subpath, print_status=True)
                    if source_unpack:
                        zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath))
                        path_util.remove(temp_subpath)
                        temp_subpath = zip_util.strip_archive_ext(temp_subpath)
                print_util.clear_line()
            else:
                # Copy the local path.
                source_path = path_util.normalize(source)
                path_util.check_isvalid(source_path, 'upload')

                # Recursively copy the directory into a new BundleStore temp directory.
                print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath))
                if source_unpack:
                    zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath))
                    temp_subpath = zip_util.strip_archive_ext(temp_subpath)
                else:
                    if remove_sources:
                        path_util.rename(source_path, temp_subpath)
                    else:
                        path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
                print_util.clear_line()

            temp_subpaths.append(temp_subpath)

        # If exactly one source, then upload that directly.
        if len(temp_subpaths) == 1:
            to_delete.append(temp_path)
            temp_path = temp_subpaths[0]

        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])

        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        print_util.open_line('BundleStore.upload: hashing %s' % temp_path)
        data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),)
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' % temp_path)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        print_util.clear_line()
        final_path = os.path.join(self.data, data_hash)
        if os.path.exists(final_path):
            # Already exists, just delete it
            path_util.remove(temp_path)
        else:
            print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path)
            path_util.rename(temp_path, final_path)

        # Delete paths.
        for path in to_delete:
            if os.path.exists(path):
                path_util.remove(path)

        # After this operation there should always be a directory at the final path.
        assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,)
        return (data_hash, {'data_size': data_size})
Пример #7
0
 def _unpack_fileobj(self, source_filename, source_fileobj, dest_path, simplify_archive):
     zip_util.unpack(zip_util.get_archive_ext(source_filename), source_fileobj, dest_path)
     if simplify_archive:
         self._simplify_archive(dest_path)
Пример #8
0
 def _unpack_file(self, source_path, dest_path, remove_source, simplify_archive):
     zip_util.unpack(zip_util.get_archive_ext(source_path), source_path, dest_path)
     if remove_source:
         path_util.remove(source_path)
     if simplify_archive:
         self._simplify_archive(dest_path)
Пример #9
0
 def _unpack_fileobj(self, source_filename, source_fileobj, dest_path,
                     simplify_archive):
     zip_util.unpack(zip_util.get_archive_ext(source_filename),
                     source_fileobj, dest_path)
     if simplify_archive:
         self._simplify_archive(dest_path)
Пример #10
0
 def download_target(self, target, final_path):
     source_uuid = self.open_target_archive(target)
     source = RPCFileHandle(source_uuid, self.proxy)
     zip_util.unpack(source, final_path)
     self.finalize_file(source_uuid)
Пример #11
0
    def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, uuid):
        """
        |sources|: specifies the locations of the contents to upload.  Each element is either a URL or a local path.
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o)
        |git|: for URL, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |remove_sources|: remove |sources|.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.

        Install the contents of the directory at |source| into
        DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        """
        to_delete = []

        # If just a single file, set the final path to be equal to that file
        single_path = len(sources) == 1

        # Determine which disk this will go on
        disk_choice = self.ring.get_node(uuid)

        final_path = os.path.join(self.partitions, disk_choice, self.DATA_SUBDIRECTORY, uuid)
        if os.path.exists(final_path):
            raise UsageError('Path %s already present in bundle store' % final_path)
        # Only make if not there
        elif not single_path:
            path_util.make_directory(final_path)

        # Paths to resources
        subpaths = []

        for source in sources:
            # Where to save |source| to (might change this value if we unpack).
            if not single_path:
                subpath = os.path.join(final_path, os.path.basename(source))
            else:
                subpath = final_path

            if remove_sources:
                to_delete.append(source)
            source_unpack = unpack and zip_util.path_is_archive(source)

            if source_unpack and single_path:
                # Load the file into the bundle store under the given path
                subpath += zip_util.get_archive_ext(source)

            if path_util.path_is_url(source):
                # Download the URL.
                print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, subpath))
                if git:
                    file_util.git_clone(source, subpath)
                else:
                    file_util.download_url(source, subpath, print_status=True)
                    if source_unpack:
                        zip_util.unpack(subpath, zip_util.strip_archive_ext(subpath))
                        path_util.remove(subpath)
                        subpath = zip_util.strip_archive_ext(subpath)
                print_util.clear_line()
            else:
                # Copy the local path.
                source_path = path_util.normalize(source)
                path_util.check_isvalid(source_path, 'upload')

                # Recursively copy the directory into the BundleStore
                print_util.open_line('BundleStore.upload: %s => %s' % (source_path, subpath))
                if source_unpack:
                    zip_util.unpack(source_path, zip_util.strip_archive_ext(subpath))
                    subpath = zip_util.strip_archive_ext(subpath)
                else:
                    if remove_sources:
                        path_util.rename(source_path, subpath)
                    else:
                        path_util.copy(source_path, subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
                print_util.clear_line()

            subpaths.append(subpath)

        dirs_and_files = None
        if os.path.isdir(final_path):
            dirs_and_files = path_util.recursive_ls(final_path)
        else:
            dirs_and_files = [], [final_path]

        # Hash the contents of the bundle directory. Update the data_hash attribute
        # for the bundle
        print_util.open_line('BundleStore.upload: hashing %s' % final_path)
        data_hash = '0x%s' % (path_util.hash_directory(final_path, dirs_and_files))
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' % final_path)
        data_size = path_util.get_size(final_path, dirs_and_files)
        print_util.clear_line()

        # Delete paths.
        for path in to_delete:
            if os.path.exists(path):
                path_util.remove(path)

        # After this operation there should always be a directory at the final path.
        assert (os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,)
        return (data_hash, {'data_size': data_size})