def test_uncompress_tarpaths(tmp_path, datadir, prepare_shutil_state): """High level call uncompression on un/supported tarballs """ archive_dir = os.path.join(datadir, "archives") tarfiles = os.listdir(archive_dir) tarpaths = [os.path.join(archive_dir, tarfile) for tarfile in tarfiles] unsupported_tarpaths = [] for t in tarpaths: if t.endswith(".Z") or t.endswith(".x") or t.endswith(".lz"): unsupported_tarpaths.append(t) # not supported yet for tarpath in unsupported_tarpaths: with pytest.raises(ValueError, match=f"Problem during unpacking {tarpath}."): tarball.uncompress(tarpath, dest=tmp_path) # register those unsupported formats tarball.register_new_archive_formats() # unsupported formats are now supported for n, tarpath in enumerate(tarpaths, start=1): tarball.uncompress(tarpath, dest=tmp_path) assert n == len(tarpaths)
def aggregate_tarballs(extraction_dir, archive_paths): """Aggregate multiple tarballs into one and returns this new archive's path. Args: extraction_dir (path): Path to use for the tarballs computation archive_paths ([str]): Deposit's archive paths Returns: Tuple (directory to clean up, archive path (aggregated or not)) """ # rebuild one zip archive from (possibly) multiple ones os.makedirs(extraction_dir, 0o755, exist_ok=True) dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir) # root folder to build an aggregated tarball aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate") os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True) # uncompress in a temporary location all archives for archive_path in archive_paths: tarball.uncompress(archive_path, aggregated_tarball_rootdir) # Aggregate into one big tarball the multiple smaller ones temp_tarpath = shutil.make_archive(aggregated_tarball_rootdir, "tar", aggregated_tarball_rootdir) # can already clean up temporary directory shutil.rmtree(aggregated_tarball_rootdir) try: yield temp_tarpath finally: shutil.rmtree(dir_path)
def test_cran_extract_intrinsic_metadata(tmp_path, datadir): """Parsing existing archive's PKG-INFO should yield results""" uncompressed_archive_path = str(tmp_path) # sample url # https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz # noqa archive_path = path.join( datadir, "https_cran.r-project.org", "src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz", ) uncompress(archive_path, dest=uncompressed_archive_path) actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path) expected_metadata = { "Package": "KernSmooth", "Priority": "recommended", "Version": "2.22-6", "Date": "2001-June-08", "Title": "Functions for kernel smoothing for Wand & Jones (1995)", "Author": "S original by Matt Wand.\n\tR port by Brian Ripley <*****@*****.**>.", # noqa "Maintainer": "Brian Ripley <*****@*****.**>", "Description": 'functions for kernel smoothing (and density estimation)\n corresponding to the book: \n Wand, M.P. and Jones, M.C. (1995) "Kernel Smoothing".', # noqa "License": "Unlimited use and distribution (see LICENCE).", "URL": "http://www.biostat.harvard.edu/~mwand", } assert actual_metadata == expected_metadata
def test_uncompress_tar_failure(tmp_path, datadir): """Unpack inexistent tarball should fail""" tarpath = os.path.join(datadir, "archives", "inexistent-archive.tar.Z") assert not os.path.exists(tarpath) with pytest.raises(ValueError, match="Problem during unpacking"): tarball.uncompress(tarpath, tmp_path)
def test_uncompress_archives(tmp_path, datadir): """High level call uncompression on supported archives""" archive_dir = os.path.join(datadir, "archives") archive_files = os.listdir(archive_dir) for archive_file in archive_files: archive_path = os.path.join(archive_dir, archive_file) extract_dir = os.path.join(tmp_path, archive_file) tarball.uncompress(archive_path, dest=extract_dir) assert len(os.listdir(extract_dir)) > 0
def test_uncompress_tar(tmp_path, datadir): """Unpack supported tarball into an existent folder should be ok""" filename = "groff-1.02.tar.Z" tarpath = os.path.join(datadir, "archives", filename) assert os.path.exists(tarpath) extract_dir = os.path.join(tmp_path, filename) tarball.uncompress(tarpath, extract_dir) assert len(os.listdir(extract_dir)) > 0
def test_unpcompress_zip_imploded(tmp_path, datadir): """Unpack a zip archive with compression type 6 (implode), not supported by python zipfile module. """ filename = "msk316src.zip" zippath = os.path.join(datadir, "archives", filename) assert os.path.exists(zippath) extract_dir = os.path.join(tmp_path, filename) tarball.uncompress(zippath, extract_dir) assert len(os.listdir(extract_dir)) > 0
def test_compress_uncompress_tar(tmp_path): tocompress = tmp_path / "compressme" tocompress.mkdir() for i in range(10): fpath = tocompress / ("file%s.txt" % i) fpath.write_text("content of file %s" % i) tarfile = tmp_path / "archive.tar" tarball.compress(str(tarfile), "tar", str(tocompress)) destdir = tmp_path / "destdir" tarball.uncompress(str(tarfile), str(destdir)) lsdir = sorted(x.name for x in destdir.iterdir()) assert ["file%s.txt" % i for i in range(10)] == lsdir
def test_uncompress_upper_archive_extension(tmp_path, datadir): """Copy test archives in a temporary directory but turn their names to uppercase, then check they can be successfully extracted. """ archives_path = os.path.join(datadir, "archives") archive_files = [ f for f in os.listdir(archives_path) if os.path.isfile(os.path.join(archives_path, f)) ] for archive_file in archive_files: archive_file_upper = os.path.join(tmp_path, archive_file.upper()) extract_dir = os.path.join(tmp_path, archive_file) shutil.copy(os.path.join(archives_path, archive_file), archive_file_upper) tarball.uncompress(archive_file_upper, extract_dir) assert len(os.listdir(extract_dir)) > 0
def jar_dirs(datadir, tmp_path): jar_1_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.0-sources.jar") jar_2_path = os.path.join(datadir, "https_maven.org", "sprova4j-0.1.1-sources.jar") jar_1_extract_path = os.path.join(tmp_path, "jar_1") jar_2_extract_path = os.path.join(tmp_path, "jar_2") uncompress(jar_1_path, jar_1_extract_path) uncompress(jar_2_path, jar_2_extract_path) jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode()) jar_2_dir = Directory.from_disk(path=jar_2_extract_path.encode()) return [jar_1_dir, jar_2_dir]
def fetch_data(self): """Retrieve, uncompress archive and fetch objects from the tarball. The actual ingestion takes place in the :meth:`store_data` implementation below. """ url = self.get_tarball_url_to_retrieve() filepath, hashes = self.client.download(url) nature = tarball.uncompress(filepath, self.dir_path) dir_path = self.dir_path.encode('utf-8') directory = Directory.from_disk(path=dir_path, save_path=True) objects = directory.collect() if 'content' not in objects: objects['content'] = {} if 'directory' not in objects: objects['directory'] = {} # compute the full revision (with ids) revision = self.build_revision(filepath, nature, hashes) revision = revision_from(directory.hash, revision) objects['revision'] = { revision['id']: revision, } snapshot = self.build_snapshot(revision) objects['snapshot'] = { snapshot['id']: snapshot } self.objects = objects
def init_git_repo_from_archive(project_name, archive_path, root_temp_dir="/tmp"): """Given a path to an archive containing a git repository. Uncompress that archive to a temporary location and returns the path. If any problem whatsoever is raised, clean up the temporary location. Args: project_name (str): Project's name archive_path (str): Full path to the archive root_temp_dir (str): Optional temporary directory mount point (default to /tmp) Returns A tuple: - temporary folder: containing the mounted repository - repo_path, path to the mounted repository inside the temporary folder Raises ValueError in case of failure to run the command to uncompress """ temp_dir = tempfile.mkdtemp(suffix=".swh.loader.git", prefix="tmp.", dir=root_temp_dir) try: # create the repository that will be loaded with the dump tarball.uncompress(archive_path, temp_dir) repo_path = os.path.join(temp_dir, project_name) # tarball content may not be as expected (e.g. no top level directory # or a top level directory with a name different from project_name), # so try to make it loadable anyway if not os.path.exists(repo_path): os.mkdir(repo_path) for root, dirs, files in os.walk(temp_dir): if ".git" in dirs: shutil.copytree(os.path.join(root, ".git"), os.path.join(repo_path, ".git")) break return temp_dir, repo_path except Exception as e: shutil.rmtree(temp_dir) raise e
def test_uncompress_archive_no_extension(tmp_path, datadir): """Copy test archives in a temporary directory but turn their names to their md5 sums, then check they can be successfully extracted. """ archives_path = os.path.join(datadir, "archives") archive_files = [ f for f in os.listdir(archives_path) if os.path.isfile(os.path.join(archives_path, f)) ] for archive_file in archive_files: archive_file_path = os.path.join(archives_path, archive_file) with open(archive_file_path, "rb") as f: md5sum = hashlib.md5(f.read()).hexdigest() archive_file_md5sum = os.path.join(tmp_path, md5sum) extract_dir = os.path.join(tmp_path, archive_file) shutil.copy(archive_file_path, archive_file_md5sum) tarball.uncompress(archive_file_md5sum, extract_dir) assert len(os.listdir(extract_dir)) > 0
def test_compress_uncompress_tar_modes(tmp_path): tocompress = tmp_path / "compressme" tocompress.mkdir() fpath = tocompress / "text.txt" fpath.write_text("echo foo") fpath.chmod(0o644) fpath = tocompress / "executable.sh" fpath.write_text("echo foo") fpath.chmod(0o755) tarfile = tmp_path / "archive.tar" tarball.compress(str(tarfile), "tar", str(tocompress)) destdir = tmp_path / "destdir" tarball.uncompress(str(tarfile), str(destdir)) (executable_path, text_path) = sorted(destdir.iterdir()) assert text_path.stat().st_mode == 0o100644 assert executable_path.stat().st_mode == 0o100755
def test_compress_uncompress_zip_modes(tmp_path): tocompress = tmp_path / "compressme" tocompress.mkdir() fpath = tocompress / "text.txt" fpath.write_text("echo foo") fpath.chmod(0o644) fpath = tocompress / "executable.sh" fpath.write_text("echo foo") fpath.chmod(0o755) zipfile = tmp_path / "archive.zip" tarball.compress(str(zipfile), "zip", str(tocompress)) destdir = tmp_path / "destdir" tarball.uncompress(str(zipfile), str(destdir)) (executable_path, text_path) = sorted(destdir.iterdir()) assert text_path.stat().st_mode == 0o100644 # succeeds, it's the default assert executable_path.stat().st_mode == 0o100755 # fails
def test_pypi_extract_intrinsic_metadata(tmp_path, datadir): """Parsing existing archive's PKG-INFO should yield results""" uncompressed_archive_path = str(tmp_path) archive_path = path.join( datadir, "https_files.pythonhosted.org", "0805nexter-1.1.0.zip" ) uncompress(archive_path, dest=uncompressed_archive_path) actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path) expected_metadata = { "metadata_version": "1.0", "name": "0805nexter", "version": "1.1.0", "summary": "a simple printer of nested lest", "home_page": "http://www.hp.com", "author": "hgtkpython", "author_email": "*****@*****.**", "platforms": ["UNKNOWN"], } assert actual_metadata == expected_metadata