Пример #1
0
def generate_new_testcase_mutations(corpus_directory,
                                    new_testcase_mutations_directory,
                                    fuzzer_name, candidate_generator):
    """Generate new testcase mutations, using existing corpus directory or other
  methods.

  Returns true if mutations are successfully generated using radamsa or ml rnn.
  A false return signifies either no generator use or unsuccessful generation of
  testcase mutations."""
    generation_timeout = get_new_testcase_mutations_timeout()
    pre_mutations_filecount = shell.get_directory_file_count(
        new_testcase_mutations_directory)

    # Generate new testcase mutations using Radamsa.
    if candidate_generator == Generator.RADAMSA:
        generate_new_testcase_mutations_using_radamsa(
            corpus_directory, new_testcase_mutations_directory,
            generation_timeout)
    # Generate new testcase mutations using ML RNN model.
    elif candidate_generator == Generator.ML_RNN:
        generate_new_testcase_mutations_using_ml_rnn(
            corpus_directory, new_testcase_mutations_directory, fuzzer_name,
            generation_timeout)

    # If new mutations are successfully generated, return true.
    if shell.get_directory_file_count(
            new_testcase_mutations_directory) > pre_mutations_filecount:
        return True

    return False
Пример #2
0
    def _merge_new_units(self, target_path, corpus_dir, new_corpus_dir,
                         fuzz_corpus_dirs, arguments, stat_overrides):
        """Merge new units."""
        # Make a decision on whether merge step is needed at all. If there are no
        # new units added by libFuzzer run, then no need to do merge at all.
        new_units_added = shell.get_directory_file_count(new_corpus_dir)
        if not new_units_added:
            stat_overrides['new_units_added'] = 0
            logs.log(
                'Skipped corpus merge since no new units added by fuzzing.')
            return

        # If this times out, it's possible that we will miss some units. However, if
        # we're taking >10 minutes to load/merge the corpus something is going very
        # wrong and we probably don't want to make things worse by adding units
        # anyway.
        merge_corpus = self._create_merge_corpus_dir()

        merge_dirs = fuzz_corpus_dirs[:]

        # Merge the new units with the initial corpus.
        if corpus_dir not in merge_dirs:
            merge_dirs.append(corpus_dir)

        old_corpus_len = shell.get_directory_file_count(corpus_dir)

        new_units_added = 0
        try:
            result = self._minimize_corpus_two_step(
                target_path=target_path,
                arguments=arguments,
                existing_corpus_dirs=merge_dirs,
                new_corpus_dir=new_corpus_dir,
                output_corpus_dir=merge_corpus,
                reproducers_dir=None,
                max_time=engine_common.get_merge_timeout(
                    libfuzzer.DEFAULT_MERGE_TIMEOUT))

            libfuzzer.move_mergeable_units(merge_corpus, corpus_dir)
            new_corpus_len = shell.get_directory_file_count(corpus_dir)
            new_units_added = new_corpus_len - old_corpus_len

            stat_overrides.update(result.stats)
        except (MergeError, TimeoutError) as e:
            logs.log_warn('Merge failed.', error=repr(e))

        stat_overrides['new_units_added'] = new_units_added

        # Record the stats to make them easily searchable in stackdriver.
        logs.log('Stats calculated.', stats=stat_overrides)
        if new_units_added:
            logs.log(f'New units added to corpus: {new_units_added}.')
        else:
            logs.log('No new units found.')
Пример #3
0
def generate_new_testcase_mutations_using_radamsa(
        corpus_directory, new_testcase_mutations_directory,
        generation_timeout):
    """Generate new testcase mutations based on Radamsa."""
    radamsa_path = get_radamsa_path()
    if not radamsa_path:
        # Mutations using radamsa are not supported on current platform, bail out.
        return

    radamsa_runner = new_process.ProcessRunner(radamsa_path)
    files_list = shell.get_files_list(corpus_directory)
    filtered_files_list = [
        f for f in files_list if os.path.getsize(f) <= CORPUS_INPUT_SIZE_LIMIT
    ]
    if not filtered_files_list:
        # No mutations to do on an empty corpus or one with very large files.
        return

    old_corpus_size = shell.get_directory_file_count(
        new_testcase_mutations_directory)
    expected_completion_time = time.time() + generation_timeout

    for i in range(RADAMSA_MUTATIONS):
        original_file_path = random_choice(filtered_files_list)
        original_filename = os.path.basename(original_file_path)
        output_path = os.path.join(
            new_testcase_mutations_directory,
            get_radamsa_output_filename(original_filename, i))

        result = radamsa_runner.run_and_wait(
            ['-o', output_path, original_file_path], timeout=RADAMSA_TIMEOUT)

        if (os.path.exists(output_path)
                and os.path.getsize(output_path) > CORPUS_INPUT_SIZE_LIMIT):
            # Skip large files to avoid further mutations and impact fuzzing
            # efficiency.
            shell.remove_file(output_path)
        elif result.return_code or result.timed_out:
            logs.log_warn('Radamsa failed to mutate or timed out.',
                          output=result.output)

        # Check if we exceeded our timeout. If yes, do no more mutations and break.
        if time.time() > expected_completion_time:
            break

    new_corpus_size = shell.get_directory_file_count(
        new_testcase_mutations_directory)
    logs.log('Added %d tests using Radamsa mutations.' %
             (new_corpus_size - old_corpus_size))
Пример #4
0
    def test(self):
        """Test get_directory_file_count."""
        self.fs.create_file('/test/aa/bb.txt', contents='abc')
        self.fs.create_file('/test/aa/cc.txt', contents='def')
        self.fs.create_file('/test/aa/aa/aa.txt', contents='ghi')
        self.fs.create_file('/test/aa/aa/dd.txt', contents='t')

        self.assertEqual(shell.get_directory_file_count('/test/aa'), 4)
    def run(self, initial_corpus_path, minimized_corpus_path, bad_units_path):
        """Run corpus pruning. Output result to directory."""
        if not shell.get_directory_file_count(initial_corpus_path):
            # Empty corpus, nothing to do.
            return None

        # Set memory tool options and fuzzer arguments.
        engine_common.unpack_seed_corpus_if_needed(self.runner.target_path,
                                                   initial_corpus_path,
                                                   force_unpack=True)

        environment.reset_current_memory_tool_options(redzone_size=MIN_REDZONE,
                                                      leaks=True)
        self.runner.process_sanitizer_options()
        additional_args = self.runner.get_libfuzzer_flags()

        # Execute fuzzer with arguments for corpus pruning.
        logs.log('Running merge...')
        try:
            result = self.runner.minimize_corpus(additional_args,
                                                 [initial_corpus_path],
                                                 minimized_corpus_path,
                                                 bad_units_path,
                                                 CORPUS_PRUNING_TIMEOUT)
        except TimeoutError as e:
            raise CorpusPruningException(
                'Corpus pruning timed out while minimizing corpus\n' + repr(e))
        except engine.Error as e:
            raise CorpusPruningException(
                'Corpus pruning failed to minimize corpus\n' + repr(e))

        symbolized_output = stack_symbolizer.symbolize_stacktrace(result.logs)

        # Sanity check that there are files in minimized corpus after merging.
        if not shell.get_directory_file_count(minimized_corpus_path):
            raise CorpusPruningException(
                'Corpus pruning failed to minimize corpus\n' +
                symbolized_output)

        logs.log('Corpus merge finished successfully.',
                 output=symbolized_output)

        return result.stats
Пример #6
0
    def test(self):
        """Test clear_system_temp_directory works as expected."""
        self.fs.create_file('/tmp/aa/bb.txt', contents='abc')
        self.fs.create_file('/tmp/cc/dd/ee.txt', contents='def')
        self.fs.create_dir('/tmp/ff/gg')
        self.fs.create_dir('/tmp/hh')
        self.fs.create_dir('/unrelated')
        self.fs.create_file('/unrelated/zz.txt', contents='zzz')
        os.symlink('/unrelated/zz.txt', '/tmp/hh/gg.txt')
        os.symlink('/unrelated', '/tmp/ii')

        shell.clear_system_temp_directory()

        self.assertTrue(os.path.exists('/tmp'))
        self.assertTrue(os.path.exists('/unrelated'))
        self.assertEqual(shell.get_directory_file_count('/tmp'), 0)
        self.assertEqual(shell.get_directory_file_count('/unrelated'), 1)
        self.assertFalse(os.path.exists('/tmp/aa/bb.txt'))
        self.assertFalse(os.path.exists('/tmp/cc/dd/ee.txt'))
        self.assertFalse(os.path.exists('/tmp/ff/gg'))
        self.assertFalse(os.path.exists('/tmp/hh'))
Пример #7
0
    def test_generate(self):
        """Test generate specified number of inputs."""
        # Set a large timeout value and a small count value to avoid timeout.
        timeout = 20
        expected_count = 2

        result = generator.run(self.input_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=expected_count,
                               hidden_state_size=MODEL_STATE_SIZE,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        # Process exits normally and no timeout.
        self.assertEqual(result.return_code, constants.ExitCode.SUCCESS)
        self.assertFalse(result.timed_out)

        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertEqual(expected_count, actual_count)
Пример #8
0
    def test_empty_corpus(self):
        """Test generation should abort for empty corpus."""
        # Set a large timeout value and a small count value to avoid timeout.
        timeout = 20
        expected_count = 2

        result = generator.run(self.empty_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=expected_count,
                               hidden_state_size=MODEL_STATE_SIZE,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        self.assertEqual(result.return_code,
                         constants.ExitCode.CORPUS_TOO_SMALL)
        self.assertFalse(result.timed_out)

        # No new units.
        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertEqual(actual_count, 0)
Пример #9
0
    def test_timeout(self):
        """Test timeout case in generation."""
        # Set a small timeout value and a large count value to trigger timeout.
        # Note that timeout cannot be set too small since it takes time to
        # start generator. If this test failed please increase timeout value.
        timeout = 10

        result = generator.run(self.input_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=sys.maxsize,
                               hidden_state_size=MODEL_STATE_SIZE,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        # Process timed out.
        self.assertNotEqual(result.return_code, constants.ExitCode.SUCCESS)
        self.assertTrue(result.timed_out)

        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertGreater(actual_count, 0)
    def run(self, timeout):
        """Merge testcases from corpus from other fuzz targets."""
        if not shell.get_directory_file_count(self.context.shared_corpus_path):
            logs.log('No files found in shared corpus, skip merge.')
            return None

        # Run pruning on the shared corpus and log the result in case of error.
        logs.log('Merging shared corpus...')
        environment.reset_current_memory_tool_options(
            redzone_size=DEFAULT_REDZONE)
        self.runner.process_sanitizer_options()

        additional_args = self.runner.get_libfuzzer_flags()

        try:
            result = self.runner.minimize_corpus(
                additional_args, [self.context.shared_corpus_path],
                self.context.minimized_corpus_path,
                self.context.bad_units_path, timeout)
            symbolized_output = stack_symbolizer.symbolize_stacktrace(
                result.logs)
            logs.log('Shared corpus merge finished successfully.',
                     output=symbolized_output)
        except TimeoutError as e:
            # Other cross pollinated fuzzer corpuses can have unexpected test cases
            # that time us out. This is expected, so bail out.
            logs.log_warn(
                'Corpus pruning timed out while merging shared corpus\n' +
                repr(e))
            return None
        except engine.Error as e:
            # Other cross pollinated fuzzer corpuses can be large, so we can run out
            # of disk space and exception out. This is expected, so bail out.
            logs.log_warn('Corpus pruning failed to merge shared corpus\n' +
                          repr(e))
            return None

        return result.stats
Пример #11
0
    def test_invalid_model(self):
        """Test TensorFlow should throw exception if model does not match."""
        # Set a large timeout value and a small count value to avoid timeout.
        timeout = 20
        expected_count = 2

        # Change model parameters to make demo model invalid.
        invalid_state_size = 8

        result = generator.run(self.input_directory,
                               self.output_directory,
                               self.model_path,
                               timeout,
                               generation_count=expected_count,
                               hidden_state_size=invalid_state_size,
                               hidden_layer_size=MODEL_LAYER_SIZE)

        self.assertEqual(result.return_code,
                         constants.ExitCode.TENSORFLOW_ERROR)
        self.assertFalse(result.timed_out)

        # No new units.
        actual_count = shell.get_directory_file_count(self.output_directory)
        self.assertEqual(actual_count, 0)
Пример #12
0
def execute(input_directory, output_directory, fuzzer_name,
            generation_timeout):
    """Execute ML RNN generator to produce new inputs.

  This method should be called inside launcher, to generate a number of
  new inputs based on ML RNN model.

  It will fetch ML model from GCS bucket specified in environment
  variable `CORPUS_BUCKET`. The script to run the model resides
  in folder `tools/fuzzers/ml/rnn`.

  Args:
    input_directory: Seed corpus path. The directory should not be empty.
    output_directory: The directory to place generated inputs.
    fuzzer_name: Name of the fuzzer, e.g libpng_read_fuzzer. It indicates the
        subdirectory in gcs bucket to store models.
    generation_timeout: Time in seconds for the generator to run. Normally it
        takes <1s to generate an input, assuming the input length is <4KB.
  """
    if environment.platform() != 'LINUX':
        logs.log('Unsupported platform for ML RNN generation, skipping.')
        return

    # Validate corpus folder.
    file_count = shell.get_directory_file_count(input_directory)
    if not file_count:
        logs.log('Corpus is empty. Skip generation.')
        return

    # Number of existing new inputs. They are possibly generated by other
    # generators.
    old_corpus_units = shell.get_directory_file_count(output_directory)
    old_corpus_bytes = shell.get_directory_size(output_directory)

    # Get model path.
    model_path = prepare_model_directory(fuzzer_name)
    if not model_path:
        return

    result = run(input_directory, output_directory, model_path,
                 generation_timeout)

    # Generation process exited abnormally but not caused by timeout, meaning
    # error occurred during execution.
    if result.return_code and not result.timed_out:
        if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL:
            logs.log_warn(
                'ML RNN generation for fuzzer %s aborted due to small corpus.'
                % fuzzer_name)
        else:
            logs.log_error(
                'ML RNN generation for fuzzer %s failed with ExitCode = %d.' %
                (fuzzer_name, result.return_code),
                output=utils.decode_to_unicode(result.output))
        return

    # Timeout is not error, if we have new units generated.
    if result.timed_out:
        logs.log_warn('ML RNN generation for fuzzer %s timed out.' %
                      fuzzer_name)

    new_corpus_units = (shell.get_directory_file_count(output_directory) -
                        old_corpus_units)
    new_corpus_bytes = (shell.get_directory_size(output_directory) -
                        old_corpus_bytes)
    if new_corpus_units:
        logs.log(
            'Added %d new inputs (%d bytes) using ML RNN generator for %s.' %
            (new_corpus_units, new_corpus_bytes, fuzzer_name))
    else:
        logs.log_error('ML RNN generator did not produce any inputs for %s' %
                       fuzzer_name,
                       output=utils.decode_to_unicode(result.output))
Пример #13
0
    def prepare(self, corpus_dir, target_path, build_dir):
        """Prepare for a fuzzing session, by generating options. Returns a
    FuzzOptions object.

    Args:
      corpus_dir: The main corpus directory.
      target_path: Path to the target.
      build_dir: Path to the build directory.

    Returns:
      A FuzzOptions object.
    """
        del build_dir
        arguments = fuzzer.get_arguments(target_path)
        grammar = fuzzer.get_grammar(target_path)

        if self.do_strategies:
            strategy_pool = strategy_selection.generate_weighted_strategy_pool(
                strategy_list=strategy.LIBFUZZER_STRATEGY_LIST,
                use_generator=True,
                engine_name=self.name)
        else:
            strategy_pool = strategy_selection.StrategyPool()

        strategy_info = libfuzzer.pick_strategies(strategy_pool, target_path,
                                                  corpus_dir, arguments,
                                                  grammar)

        arguments.extend(strategy_info.arguments)

        # Check for seed corpus and add it into corpus directory.
        engine_common.unpack_seed_corpus_if_needed(target_path, corpus_dir)

        # Pick a few testcases from our corpus to use as the initial corpus.
        subset_size = engine_common.random_choice(
            engine_common.CORPUS_SUBSET_NUM_TESTCASES)

        if (not strategy_info.use_dataflow_tracing
                and strategy_pool.do_strategy(strategy.CORPUS_SUBSET_STRATEGY)
                and shell.get_directory_file_count(corpus_dir) > subset_size):
            # Copy |subset_size| testcases into 'subset' directory.
            corpus_subset_dir = self._create_temp_corpus_dir('subset')
            libfuzzer.copy_from_corpus(corpus_subset_dir, corpus_dir,
                                       subset_size)
            strategy_info.fuzzing_strategies.append(
                strategy.CORPUS_SUBSET_STRATEGY.name + '_' + str(subset_size))
            strategy_info.additional_corpus_dirs.append(corpus_subset_dir)
        else:
            strategy_info.additional_corpus_dirs.append(corpus_dir)

        # Check dict argument to make sure that it's valid.
        dict_path = fuzzer_utils.extract_argument(arguments,
                                                  constants.DICT_FLAG,
                                                  remove=False)
        if dict_path and not os.path.exists(dict_path):
            logs.log_error(f'Invalid dict {dict_path} for {target_path}.')
            fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG)

        # If there's no dict argument, check for %target_binary_name%.dict file.
        dict_path = fuzzer_utils.extract_argument(arguments,
                                                  constants.DICT_FLAG,
                                                  remove=False)
        if not dict_path:
            dict_path = dictionary_manager.get_default_dictionary_path(
                target_path)
            if os.path.exists(dict_path):
                arguments.append(constants.DICT_FLAG + dict_path)

        # If we have a dictionary, correct any items that are not formatted properly
        # (e.g. quote items that are missing them).
        dictionary_manager.correct_if_needed(dict_path)

        strategies = stats.process_strategies(strategy_info.fuzzing_strategies,
                                              name_modifier=lambda x: x)
        return LibFuzzerOptions(corpus_dir, arguments, strategies,
                                strategy_info.additional_corpus_dirs,
                                strategy_info.extra_env,
                                strategy_info.use_dataflow_tracing,
                                strategy_info.is_mutations_run)
Пример #14
0
def _count_corpus_files(directory):
  """Count the number of corpus files."""
  return shell.get_directory_file_count(directory)
def do_corpus_pruning(context, last_execution_failed, revision):
    """Run corpus pruning."""
    # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz
    # target and its related files.
    environment.set_value('FUZZ_TARGET', context.fuzz_target.binary)

    if environment.is_trusted_host():
        from clusterfuzz._internal.bot.untrusted_runner import tasks_host
        return tasks_host.do_corpus_pruning(context, last_execution_failed,
                                            revision)

    if not build_manager.setup_build(revision=revision):
        raise CorpusPruningException('Failed to setup build.')

    build_directory = environment.get_value('BUILD_DIR')
    start_time = datetime.datetime.utcnow()
    runner = Runner(build_directory, context)
    pruner = CorpusPruner(runner)
    fuzzer_binary_name = os.path.basename(runner.target_path)

    # If our last execution failed, shrink to a randomized corpus of usable size
    # to prevent corpus from growing unbounded and recurring failures when trying
    # to minimize it.
    if last_execution_failed:
        for corpus_url in [
                context.corpus.get_gcs_url(),
                context.quarantine_corpus.get_gcs_url()
        ]:
            _limit_corpus_size(corpus_url)

    # Get initial corpus to process from GCS.
    context.sync_to_disk()
    initial_corpus_size = shell.get_directory_file_count(
        context.initial_corpus_path)

    # Restore a small batch of quarantined units back to corpus.
    context.restore_quarantined_units()

    # Shrink to a minimized corpus using corpus merge.
    pruner_stats = pruner.run(context.initial_corpus_path,
                              context.minimized_corpus_path,
                              context.bad_units_path)

    # Sync minimized corpus back to GCS.
    context.sync_to_gcs()

    # Create corpus backup.
    # Temporarily copy the past crash regressions folder into the minimized corpus
    # so that corpus backup archive can have both.
    regressions_input_dir = os.path.join(context.initial_corpus_path,
                                         'regressions')
    regressions_output_dir = os.path.join(context.minimized_corpus_path,
                                          'regressions')
    if shell.get_directory_file_count(regressions_input_dir):
        shutil.copytree(regressions_input_dir, regressions_output_dir)
    backup_bucket = environment.get_value('BACKUP_BUCKET')
    corpus_backup_url = corpus_manager.backup_corpus(
        backup_bucket, context.corpus, context.minimized_corpus_path)
    shell.remove_directory(regressions_output_dir)

    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)

    logs.log('Corpus pruned from %d to %d units.' %
             (initial_corpus_size, minimized_corpus_size_units))

    # Process bad units found during merge.
    # Mapping of crash state -> CorpusCrash
    crashes = {}
    pruner.process_bad_units(context.bad_units_path,
                             context.quarantine_corpus_path, crashes)
    context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)

    # Store corpus stats into CoverageInformation entity.
    project_qualified_name = context.fuzz_target.project_qualified_name()
    today = datetime.datetime.utcnow().date()
    coverage_info = data_types.CoverageInformation(
        fuzzer=project_qualified_name, date=today)

    quarantine_corpus_size = shell.get_directory_file_count(
        context.quarantine_corpus_path)
    quarantine_corpus_dir_size = shell.get_directory_size(
        context.quarantine_corpus_path)

    # Save the minimize corpus size before cross pollination to put in BigQuery.
    pre_pollination_corpus_size = minimized_corpus_size_units

    # Populate coverage stats.
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes
    coverage_info.quarantine_size_units = quarantine_corpus_size
    coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size
    coverage_info.corpus_backup_location = corpus_backup_url
    coverage_info.corpus_location = context.corpus.get_gcs_url()
    coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url()

    # Calculate remaining time to use for shared corpus merging.
    time_remaining = _get_time_remaining(start_time)
    if time_remaining <= 0:
        logs.log_warn('Not enough time for shared corpus merging.')
        return None

    cross_pollinator = CrossPollinator(runner)
    pollinator_stats = cross_pollinator.run(time_remaining)

    context.sync_to_gcs()

    # Update corpus size stats.
    minimized_corpus_size_units = shell.get_directory_file_count(
        context.minimized_corpus_path)
    minimized_corpus_size_bytes = shell.get_directory_size(
        context.minimized_corpus_path)
    coverage_info.corpus_size_units = minimized_corpus_size_units
    coverage_info.corpus_size_bytes = minimized_corpus_size_bytes

    logs.log('Finished.')

    sources = ','.join([
        fuzzer.fuzz_target.project_qualified_name()
        for fuzzer in context.cross_pollinate_fuzzers
    ])

    cross_pollination_stats = None
    if pruner_stats and pollinator_stats:
        cross_pollination_stats = CrossPollinationStats(
            project_qualified_name, context.cross_pollination_method, sources,
            context.tag, initial_corpus_size, pre_pollination_corpus_size,
            pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'],
            pruner_stats['feature_coverage'],
            pollinator_stats['feature_coverage'])

    return CorpusPruningResult(coverage_info=coverage_info,
                               crashes=list(crashes.values()),
                               fuzzer_binary_name=fuzzer_binary_name,
                               revision=environment.get_value('APP_REVISION'),
                               cross_pollination_stats=cross_pollination_stats)