示例#1
0
    def test_untar(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            test_files = ["foo", "bar", "baz"]
            sha_list = []
            for name in test_files:
                filename = tmpdir / name
                with open(filename, "wb") as f:
                    f.write(name.encode())
                # Record checksums for later.
                sha_list.append(utils.sha256(filename))

            tar = tmpdir / "test.tgz"
            with utils.cd(tmpdir):
                with tarfile.open(tar, mode="w:gz") as tf:
                    for name in test_files:
                        tf.add(name)

            dest = tmpdir / "dest"
            dest.mkdir()
            utils.untar(tar, dest)
            for name, sha in zip(test_files, sha_list):
                filename = dest / name
                self.assertTrue(filename.exists())
                # Check that extracted files have the same checksums as
                # the files we put in the tar.
                self.assertEqual(utils.sha256(filename), sha)
示例#2
0
 def test_sha256_small(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         filename = pathlib.Path(tmpdir) / "test.foo"
         # write binary file, to avoid windows/mac/unix line ending differences
         with open(filename, "wb") as f:
             f.write(b"foo-bar-baz")
         sha256sum = utils.sha256(filename)
         self.assertEqual(
             sha256sum,
             # Calculated with `sha256sum` from GNU coreutils.
             "269dce1a5bb90188b2d9cf542a7c30e410c7d8251e34a97bfea56062df51ae23",
         )
示例#3
0
 def test_sha256_big(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         filename = pathlib.Path(tmpdir) / "test.foo"
         # write binary file, to avoid windows/mac/unix line ending differences
         with open(filename, "wb") as f:
             for i in range(1024 * 1024):
                 f.write(str(i).encode())
         sha256sum = utils.sha256(filename)
         self.assertEqual(
             sha256sum,
             # Calculated with `sha256sum` from GNU coreutils.
             "995e0fde646f7dc98423af9a862be96014574bfa76be1186b484f796c4e58533",
         )
示例#4
0
def setUpModule():
    destination = pathlib.Path("_test_cache/zipfiles/")
    for an in stdpopsim.all_annotations():
        key = an._cache.cache_path
        local_file = destination / key
        if not local_file.exists():
            cache_dir = local_file.parent
            cache_dir.mkdir(exist_ok=True, parents=True)
            print("Downloading", an.zarr_url)
            utils.download(an.zarr_url, local_file)
        # This assertion could fail if we update a file on AWS,
        # or a developer creates a new annotation with the wrong checksum
        # (in the latter case, this should at least be caught during CI tests).
        assert utils.sha256(local_file) == an.zarr_sha256, (
            f"SHA256 for {local_file} doesn't match the SHA256 for "
            f"{an.id}. If you didn't add this SHA256 yourself, "
            f"try deleting {local_file} and restarting the tests.")
        saved_urls[key] = an.zarr_url
        an.zarr_url = local_file.resolve().as_uri()
        an._cache.url = an.zarr_url
def setUpModule():
    destination = pathlib.Path("_test_cache/tarballs")
    for genetic_map in stdpopsim.all_genetic_maps():
        key = genetic_map.id
        local_file = destination / (key + ".tar.gz")
        if not local_file.exists():
            cache_dir = local_file.parent
            cache_dir.mkdir(exist_ok=True, parents=True)
            print("Downloading", genetic_map.url)
            utils.download(genetic_map.url, local_file)
        # This assertion could fail if we update a file on AWS,
        # or a developer creates a new genetic map with the wrong checksum
        # (in the latter case, this should at least be caught during CI tests).
        assert utils.sha256(local_file) == genetic_map.sha256, (
            f"SHA256 for {local_file} doesn't match the SHA256 for "
            f"{genetic_map.id}. If you didn't add this SHA256 yourself, "
            f"try deleting {local_file} and restarting the tests.")
        saved_urls[key] = genetic_map.url
        genetic_map.url = local_file.resolve().as_uri()
        genetic_map._cache.url = genetic_map.url
示例#6
0
    def test_caching(self):
        for extract in (True, False):
            with tempfile.TemporaryDirectory() as tmpdir:
                tmpdir = pathlib.Path(tmpdir)
                with utils.cd(tmpdir):
                    filename = "test.foo"
                    with open(filename, "w") as f:
                        print("foo", file=f)
                    tar = tmpdir / "test.tgz"
                    with tarfile.open(tar, "w:gz") as tf:
                        tf.add(filename)

                sha256 = utils.sha256(tar)
                cache = stdpopsim.CachedData(
                    namespace="test",
                    url=tar.resolve().as_uri(),
                    sha256=sha256,
                    extract=extract,
                )
                self.assertFalse(cache.is_cached())
                self.assertFalse(cache.is_valid())
                cache.download()
                self.assertTrue(cache.is_cached())
                self.assertTrue(cache.is_valid())

                # try to download with incorrect checksum
                cache.sha256 = "1234"
                self.assertTrue(cache.is_cached())
                self.assertFalse(cache.is_valid())
                with self.assertRaises(ValueError):
                    # checksum mismatch
                    cache.download()
                self.assertFalse(cache.is_cached())
                self.assertFalse(cache.is_valid())

                # fix the checksum and download again
                cache.sha256 = sha256
                cache.download()
                self.assertTrue(cache.is_cached())
                self.assertTrue(cache.is_valid())
示例#7
0
    def test_multiple_threads_downloading(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            with utils.cd(tmpdir):
                filename = "test.foo"
                with open(filename, "w") as f:
                    print("foo", file=f)
                tar = tmpdir / "test.tgz"
                with tarfile.open(tar, "w:gz") as tf:
                    tf.add(filename)

            cache = stdpopsim.CachedData(
                namespace="test",
                url=tar.resolve().as_uri(),
                sha256=utils.sha256(tar),
                extract=True,
            )
            cache.download()
            # Trick the download code into thinking there's several happening
            # concurrently
            cache.is_cached = lambda: False
            with self.assertWarns(UserWarning):
                cache.download()