def test_tar_xz_broken_links(self): """Test that a .tar file with broken links is handled properly by iterator().""" helpers.patch(self, ['metrics.logs.log_warn']) archive_name = 'broken-links.tar.xz' archive_path = os.path.join(TESTDATA_PATH, archive_name) # Get the results we expect from iterator(). actual_results = [] for archive_file in archive.iterator(archive_path): if archive_file.handle is not None: actual_results.append((archive_file.name, archive_file.size, archive_file.handle.read())) else: actual_results.append( (archive_file.name, archive_file.size, None)) # Check that iterator returns what we expect it to. expected_results = [ ('testdir', 0, None), ('testdir/1', 0, None), ('testdir/1/a', 12, 'hello world\n'), ('testdir/2', 0, None), ('testdir/2/c', 0, 'hello world\n'), # Working link ('testdir/2/a', 0, None), ('testdir/2/b', 0, None) ] self.assertEqual(expected_results, actual_results) # Check that iterator calls log_warn on a broken link. self.mock.log_warn.assert_called_with( 'Check archive %s for broken links.' % archive_path, error_filepaths=['testdir/2/a', 'testdir/2/b'])
def _get_fuzz_targets_from_archive(archive_path): """Get iterator of fuzz targets from archive path.""" # Import here as this path is not available in App Engine context. from bot.fuzzers import utils as fuzzer_utils for archive_file in archive.iterator(archive_path): if fuzzer_utils.is_fuzz_target_local(archive_file.name, archive_file.handle): fuzz_target = os.path.splitext(os.path.basename(archive_file.name))[0] yield fuzz_target
def test_cwd_prefix(self): """Test that a .tgz file with cwd prefix is handled.""" tgz_path = os.path.join(TESTDATA_PATH, 'cwd-prefix.tgz') expected_results = {'./test': b'abc\n'} actual_results = { archive_file.name: archive_file.handle.read() for archive_file in archive.iterator(tgz_path) if archive_file.handle } self.assertEqual(actual_results, expected_results)
def test_tar_xz(self): """Test that a .tar.xz file is handled properly by iterator().""" tar_xz_path = os.path.join(TESTDATA_PATH, 'archive.tar.xz') expected_results = { 'archive_dir/hi': 'hi\n', 'archive_dir/bye': 'bye\n' } actual_results = { archive_file.name: archive_file.handle.read() for archive_file in archive.iterator(tar_xz_path) if archive_file.handle } self.assertEqual(actual_results, expected_results)
def unpack_seed_corpus_if_needed( fuzz_target_path, corpus_directory, max_bytes=float("inf"), force_unpack=False, max_files_for_unpack=MAX_FILES_FOR_UNPACK, ): """If seed corpus available, unpack it into the corpus directory if needed, ie: if corpus exists and either |force_unpack| is True, or the number of files in corpus_directory is less than |max_files_for_unpack|. Uses |fuzz_target_path| to find the seed corpus. If max_bytes is specified, then seed corpus files larger than |max_bytes| will not be unpacked. """ seed_corpus_archive_path = get_seed_corpus_path(fuzz_target_path) if not seed_corpus_archive_path: return num_corpus_files = len(shell.get_files_list(corpus_directory)) if not force_unpack and num_corpus_files > max_files_for_unpack: return if force_unpack: logs.log("Forced unpack: %s." % seed_corpus_archive_path) archive_iterator = archive.iterator(seed_corpus_archive_path) # Unpack seed corpus recursively into the root of the main corpus directory. idx = 0 for seed_corpus_file in archive_iterator: # Ignore directories. if seed_corpus_file.name.endswith("/"): continue # Allow callers to opt-out of unpacking large files. if seed_corpus_file.size > max_bytes: continue output_filename = "%016d" % idx output_file_path = os.path.join(corpus_directory, output_filename) with open(output_file_path, "wb") as file_handle: shutil.copyfileobj(seed_corpus_file.handle, file_handle) idx += 1 logs.log("Unarchiving %d files from seed corpus %s." % (idx, seed_corpus_archive_path))