def test_conflicting_filenames(self): # This is an unnecessary limitation due to the fact that patool # does extract files to the actual filesystem. We could avoid it # by using zipfile, tarfile, etc. directly but it would be too # burdensome to support the same amount of archive types as # patool does. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo", b"some content") f.writestr("foo/bar", b"more content") with self.assertRaises(InvalidArchive): extract_files_from_archive(archive_data.getvalue())
def test_filename_with_null(self): # This is an expected and most likely unproblematic behavior. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo\0bar", b"some content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "foo", b"some content")])
def test_multiple_slashes_are_compressed(self): # This is a (probably expected and) desirable behavior. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo//bar", b"some content") self.assertCountEqual( extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "bar", b"some content")])
def test_multiple_slashes_are_compressed(self): # This is a (probably expected and) desirable behavior. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo//bar", b"some content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "bar", b"some content")])
def test_filename_with_null(self): # This is an expected and most likely unproblematic behavior. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr("foo\0bar", b"some content") self.assertCountEqual( extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "foo", b"some content")])
def test_zip(self): files = [ReceivedFile(None, "foo.c", b"some content"), ReceivedFile(None, "foo", b"some other content"), ReceivedFile(None, "foo.%l", b"more content")] archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w", compression=zipfile.ZIP_DEFLATED) as f: for _, filename, content in files: f.writestr(filename, content) self.assertCountEqual( extract_files_from_archive(archive_data.getvalue()), files)
def test_paths_that_might_escape(self): # This should check that the extracted files cannot "escape" # from the temporary directory where they're being extracted to. filenames = ["../foo/bar", "/foo/bar"] for filename in filenames: archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr(filename, b"some content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "bar", b"some content")])
def test_paths_that_might_escape(self): # This should check that the extracted files cannot "escape" # from the temporary directory where they're being extracted to. filenames = ["../foo/bar", "/foo/bar"] for filename in filenames: archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: f.writestr(filename, b"some content") self.assertCountEqual( extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "bar", b"some content")])
def test_tar_gz(self): files = [ReceivedFile(None, "foo.c", b"some content"), ReceivedFile(None, "foo", b"some other content"), ReceivedFile(None, "foo.%l", b"more content")] archive_data = io.BytesIO() with tarfile.open(fileobj=archive_data, mode="w:gz") as f: for _, filename, content in files: fileobj = io.BytesIO(content) tarinfo = tarfile.TarInfo(filename) tarinfo.size = len(content) f.addfile(tarinfo, fileobj) self.assertCountEqual( extract_files_from_archive(archive_data.getvalue()), files)
def test_zip(self): files = [ ReceivedFile(None, "foo.c", b"some content"), ReceivedFile(None, "foo", b"some other content"), ReceivedFile(None, "foo.%l", b"more content") ] archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w", compression=zipfile.ZIP_DEFLATED) as f: for _, filename, content in files: f.writestr(filename, content) six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), files)
def test_tar_gz(self): files = [ ReceivedFile(None, "foo.c", b"some content"), ReceivedFile(None, "foo", b"some other content"), ReceivedFile(None, "foo.%l", b"more content") ] archive_data = io.BytesIO() with tarfile.open(fileobj=archive_data, mode="w:gz") as f: for _, filename, content in files: fileobj = io.BytesIO(content) tarinfo = tarfile.TarInfo(filename) tarinfo.size = len(content) f.addfile(tarinfo, fileobj) six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), files)
def test_directories(self): # Make sure we ignore the directory structure and only use the # trailing component of the path (i.e., the basename) in the # return value, even if it leads to duplicated filenames. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w", compression=zipfile.ZIP_DEFLATED) as f: f.writestr("toplevel", b"some content") f.writestr("nested/once", b"some other content") f.writestr("two/levels/deep", b"more content") f.writestr("many/levels/deep", b"moar content") self.assertCountEqual( extract_files_from_archive(archive_data.getvalue()), [ReceivedFile(None, "toplevel", b"some content"), ReceivedFile(None, "once", b"some other content"), ReceivedFile(None, "deep", b"more content"), ReceivedFile(None, "deep", b"moar content")])
def test_empty_filename(self): # This is a quite unexpected behavior: luckily in practice it # should have no effect as the elements of the submission format # aren't allowed to be empty and thus the submission would be # rejected later on anyways. It also shouldn't leak any private # information. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: # Need ZipInfo because of "bug" in writestr. f.writestr(zipfile.ZipInfo(""), b"some content") res = extract_files_from_archive(archive_data.getvalue()) self.assertEqual(len(res), 1) f = res[0] self.assertIsNone(f.codename) # The extracted file is named like the temporary file where the # archive's contents were copied to, plus a trailing tilde. six.assertRegex(self, f.filename, "tmp[a-z0-9_]+~") self.assertEqual(f.content, b"some content")
def test_empty_filename(self): # This is a quite unexpected behavior: luckily in practice it # should have no effect as the elements of the submission format # aren't allowed to be empty and thus the submission would be # rejected later on anyways. It also shouldn't leak any private # information. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w") as f: # Need ZipInfo because of "bug" in writestr. f.writestr(zipfile.ZipInfo(""), b"some content") res = extract_files_from_archive(archive_data.getvalue()) self.assertEqual(len(res), 1) f = res[0] self.assertIsNone(f.codename) # The extracted file is named like the temporary file where the # archive's contents were copied to, plus a trailing tilde. self.assertRegex(f.filename, "tmp[a-z0-9_]+~") self.assertEqual(f.content, b"some content")
def test_directories(self): # Make sure we ignore the directory structure and only use the # trailing component of the path (i.e., the basename) in the # return value, even if it leads to duplicated filenames. archive_data = io.BytesIO() with zipfile.ZipFile(archive_data, "w", compression=zipfile.ZIP_DEFLATED) as f: f.writestr("toplevel", b"some content") f.writestr("nested/once", b"some other content") f.writestr("two/levels/deep", b"more content") f.writestr("many/levels/deep", b"moar content") six.assertCountEqual( self, extract_files_from_archive(archive_data.getvalue()), [ ReceivedFile(None, "toplevel", b"some content"), ReceivedFile(None, "once", b"some other content"), ReceivedFile(None, "deep", b"more content"), ReceivedFile(None, "deep", b"moar content") ])
def test_failure(self): with self.assertRaises(InvalidArchive): extract_files_from_archive(b"this is not a valid archive")