Exemplo n.º 1
0
    def test_tar_path_traversal_attack(self):
        # Test for vulnerability to path-traversal attacks.
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            dest = tmpdir / "dest"
            dest.mkdir()
            for link_dest in ("../nonexistant", "/nonexistant"):
                tar = tmpdir / "symlink-path-traversal.tgz"
                filename = tmpdir / "link"
                filename.symlink_to(link_dest)
                with utils.cd(tmpdir):
                    with tarfile.open(tar, mode="w:gz") as tf:
                        tf.add("link")
                with self.assertRaises(ValueError):
                    utils.untar(tar, dest)
                rm_f(filename)
                rm_f(tar)

            for name in ("../nonexistant", "/nonexistant"):
                tar = tmpdir / "path-traversal.tgz"
                filename = tmpdir / "test-thing"
                with open(filename, "w") as f:
                    print("blah", file=f)
                with utils.cd(tmpdir):
                    with tarfile.open(tar, mode="w:gz") as tf:

                        def filt(info):
                            info.name = name  # path the file will be extracted to
                            return info

                        tf.add("test-thing", filter=filt)
                with self.assertRaises(ValueError):
                    utils.untar(tar, dest)
                rm_f(filename)
                rm_f(tar)
Exemplo n.º 2
0
    def test_bad_tar_members(self):
        # Pretend we downloaded a tarball containing a FIFO or device file.
        # There is no reasonable use for these types of files in stdpopsim,
        # so their presence likely indicates a maliciously crafted tarball.
        # Creating a character or block special device file requires root
        # privileges, so we instead modify the ``type`` field of each file
        # in the tar.
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            dest = tmpdir / "dest"
            dest.mkdir()
            for filename, type_ in [
                ("fifo", tarfile.FIFOTYPE),
                ("char-device", tarfile.CHRTYPE),
                ("block-device", tarfile.BLKTYPE),
            ]:
                tar = tmpdir / "irregular-type.tgz"
                filename = tmpdir / "irregular"
                with open(filename, "w") as f:
                    print("blah", file=f)
                with utils.cd(tmpdir):
                    with tarfile.open(tar, mode="w:gz") as tf:

                        def filt(info):
                            info.type = type_  # lie about the type
                            return info

                        tf.add("irregular", filter=filt)
                with self.assertRaises(ValueError):
                    utils.untar(tar, dest)
                rm_f(filename)
                rm_f(tar)
Exemplo n.º 3
0
    def test_untar(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            test_files = ["foo", "bar", "baz"]
            sha_list = []
            for name in test_files:
                filename = tmpdir / name
                with open(filename, "wb") as f:
                    f.write(name.encode())
                # Record checksums for later.
                sha_list.append(utils.sha256(filename))

            tar = tmpdir / "test.tgz"
            with utils.cd(tmpdir):
                with tarfile.open(tar, mode="w:gz") as tf:
                    for name in test_files:
                        tf.add(name)

            dest = tmpdir / "dest"
            dest.mkdir()
            utils.untar(tar, dest)
            for name, sha in zip(test_files, sha_list):
                filename = dest / name
                self.assertTrue(filename.exists())
                # Check that extracted files have the same checksums as
                # the files we put in the tar.
                self.assertEqual(utils.sha256(filename), sha)
Exemplo n.º 4
0
def get_genetic_map_tarball():
    """
    Returns a genetic map in hapmap format in a tarball as a bytes object.
    """
    with tempfile.TemporaryDirectory() as map_dir:
        for j in range(1, 10):
            # TODO Have a way to put in different maps??
            with open(os.path.join(map_dir, "prefix_chr{}.txt".format(j)),
                      "w") as f:
                print("Chromosome  Position(bp)    Rate(cM/Mb)     Map(cM)",
                      file=f)
                print("chr1        55550   2.981822        0.000000", file=f)
                print("chr1        82571   2.082414        0.080572", file=f)
                print("chr1        88169   0               0.092229", file=f)

        # For the tarfile to be in the right format, we must be in the right directory.
        with utils.cd(map_dir):
            # Now tar up this map_directory
            with tempfile.TemporaryFile("wb+") as tmp_file:
                with tarfile.open(fileobj=tmp_file, mode="w:gz") as tar_file:
                    for filename in os.listdir("."):
                        tar_file.add(filename)
                # Read back the tarball
                tmp_file.seek(0)
                tarball = tmp_file.read()
    return tarball
Exemplo n.º 5
0
 def test_cd_context_manager(self):
     # On Mac, the path we enter with "cd" may differ from
     # the path we get with cwd() due to symlinks. So we
     # resolve all paths here to ignore symlink-only differences.
     old_cwd = pathlib.Path.cwd().resolve()
     with tempfile.TemporaryDirectory() as tmpdir:
         tmpdir = pathlib.Path(tmpdir).resolve()
         self.assertNotEqual(old_cwd, tmpdir)
         with utils.cd(tmpdir):
             self.assertEqual(pathlib.Path.cwd().resolve(), tmpdir)
         self.assertEqual(pathlib.Path.cwd().resolve(), old_cwd)
Exemplo n.º 6
0
 def get_maps(self, tarball):
     maps = {}
     with tempfile.TemporaryFile("wb+") as f:
         f.write(tarball)
         f.seek(0)
         with tarfile.open(fileobj=f, mode="r") as tar_file:
             with tempfile.TemporaryDirectory() as extract_dir:
                 with utils.cd(extract_dir):
                     tar_file.extractall()
                     for fn in os.listdir(extract_dir):
                         maps[fn] = msprime.RateMap.read_hapmap(fn)
     return maps
Exemplo n.º 7
0
    def test_caching(self):
        for extract in (True, False):
            with tempfile.TemporaryDirectory() as tmpdir:
                tmpdir = pathlib.Path(tmpdir)
                with utils.cd(tmpdir):
                    filename = "test.foo"
                    with open(filename, "w") as f:
                        print("foo", file=f)
                    tar = tmpdir / "test.tgz"
                    with tarfile.open(tar, "w:gz") as tf:
                        tf.add(filename)

                sha256 = utils.sha256(tar)
                cache = stdpopsim.CachedData(
                    namespace="test",
                    url=tar.resolve().as_uri(),
                    sha256=sha256,
                    extract=extract,
                )
                self.assertFalse(cache.is_cached())
                self.assertFalse(cache.is_valid())
                cache.download()
                self.assertTrue(cache.is_cached())
                self.assertTrue(cache.is_valid())

                # try to download with incorrect checksum
                cache.sha256 = "1234"
                self.assertTrue(cache.is_cached())
                self.assertFalse(cache.is_valid())
                with self.assertRaises(ValueError):
                    # checksum mismatch
                    cache.download()
                self.assertFalse(cache.is_cached())
                self.assertFalse(cache.is_valid())

                # fix the checksum and download again
                cache.sha256 = sha256
                cache.download()
                self.assertTrue(cache.is_cached())
                self.assertTrue(cache.is_valid())
Exemplo n.º 8
0
    def test_multiple_threads_downloading(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            tmpdir = pathlib.Path(tmpdir)
            with utils.cd(tmpdir):
                filename = "test.foo"
                with open(filename, "w") as f:
                    print("foo", file=f)
                tar = tmpdir / "test.tgz"
                with tarfile.open(tar, "w:gz") as tf:
                    tf.add(filename)

            cache = stdpopsim.CachedData(
                namespace="test",
                url=tar.resolve().as_uri(),
                sha256=utils.sha256(tar),
                extract=True,
            )
            cache.download()
            # Trick the download code into thinking there's several happening
            # concurrently
            cache.is_cached = lambda: False
            with self.assertWarns(UserWarning):
                cache.download()