Пример #1
0
    def conduct_trial(self):
        """Conduct the benchmarking trial."""
        self.initialize_directories()
        log_file = os.path.join(self.results_dir, 'fuzzer-log.txt')

        logs.info('Starting trial.')

        max_total_time = environment.get('MAX_TOTAL_TIME')
        args = (max_total_time, log_file)
        fuzz_thread = threading.Thread(target=run_fuzzer, args=args)
        fuzz_thread.start()

        if environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            # Hack so that the fuzz_thread has some time to fail if something is
            # wrong. Without this we will sleep for a long time before checking
            # if the fuzz thread is alive.
            time.sleep(5)

        while fuzz_thread.is_alive():
            self.sleep_until_next_sync()
            self.do_sync()
            self.cycle += 1

        logs.info('Doing final sync.')
        self.do_sync(final_sync=True)
        fuzz_thread.join()
Пример #2
0
def main():
    """Do an experiment on a development machine or on a GCP runner instance."""
    logs.initialize(
        default_extras={
            'benchmark': environment.get('BENCHMARK'),
            'component': 'runner',
            'fuzzer': environment.get('FUZZER'),
            'trial_id': str(environment.get('TRIAL_ID')),
        })
    experiment_main()
    return 0
Пример #3
0
def run_fuzzer(max_total_time, log_filename):
    """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer
    script to |log_filename| if provided."""
    input_corpus = environment.get('SEED_CORPUS_DIR')
    output_corpus = environment.get('OUTPUT_CORPUS_DIR')
    fuzz_target_name = environment.get('FUZZ_TARGET')
    target_binary = fuzzer_utils.get_fuzz_target_binary(
        FUZZ_TARGET_DIR, fuzz_target_name)
    if not target_binary:
        logs.error('Fuzz target binary not found.')
        return

    _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
    _clean_seed_corpus(input_corpus)

    if max_total_time is None:
        logs.warning('max_total_time is None. Fuzzing indefinitely.')

    runner_niceness = environment.get('RUNNER_NICENESS', 0)

    try:
        # Because the runner is launched at a higher priority,
        # set it back to the default(0) for fuzzing processes.
        command = [
            'nice', '-n',
            str(0 - runner_niceness), 'python3', '-u', '-c',
            ('import fuzzer; '
             'fuzzer.fuzz('
             "'{input_corpus}', '{output_corpus}', '{target_binary}')").format(
                 input_corpus=shlex.quote(input_corpus),
                 output_corpus=shlex.quote(output_corpus),
                 target_binary=shlex.quote(target_binary))
        ]

        fuzzer_environment = _get_fuzzer_environment()
        # Write output to stdout if user is fuzzing from command line.
        # Otherwise, write output to the log file.
        if environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            new_process.execute(command,
                                timeout=max_total_time,
                                write_to_stdout=True,
                                kill_children=True,
                                env=fuzzer_environment)
        else:
            with open(log_filename, 'wb') as log_file:
                new_process.execute(command,
                                    timeout=max_total_time,
                                    output_file=log_file,
                                    kill_children=True,
                                    env=fuzzer_environment)
    except subprocess.CalledProcessError:
        global fuzzer_errored_out  # pylint:disable=invalid-name
        fuzzer_errored_out = True
        logs.error('Fuzz process returned nonzero.')
Пример #4
0
def _clean_seed_corpus(seed_corpus_dir):
    """Prepares |seed_corpus_dir| for the trial. This ensures that it can be
    used by AFL which is picky about the seed corpus. Moves seed corpus files
    from sub-directories into the corpus directory root. Also, deletes any files
    that exceed the 1 MB limit. If the NO_SEEDS env var is specified than the
    seed corpus files are deleted."""
    if not os.path.exists(seed_corpus_dir):
        return

    if environment.get('NO_SEEDS'):
        logs.info('NO_SEEDS specified, deleting seed corpus files.')
        shutil.rmtree(seed_corpus_dir)
        os.mkdir(seed_corpus_dir)
        return

    failed_to_move_files = []
    for root, _, files in os.walk(seed_corpus_dir):
        for filename in files:
            file_path = os.path.join(root, filename)

            if os.path.getsize(file_path) > CORPUS_ELEMENT_BYTES_LIMIT:
                os.remove(file_path)
                logs.warning('Removed seed file %s as it exceeds 1 Mb limit.',
                             file_path)
                continue

            sha1sum = utils.file_hash(file_path)
            new_file_path = os.path.join(seed_corpus_dir, sha1sum)
            try:
                shutil.move(file_path, new_file_path)
            except OSError:
                failed_to_move_files.append((file_path, new_file_path))

    if failed_to_move_files:
        logs.error('Failed to move seed corpus files: %s', failed_to_move_files)
Пример #5
0
    def record_stats(self):
        """Use fuzzer.get_stats if it is offered, validate the stats and then
        save them to a file so that they will be synced to the filestore."""
        # TODO(metzman): Make this more resilient so we don't wait forever and
        # so that breakages in stats parsing doesn't break runner.

        fuzzer_module = get_fuzzer_module(self.fuzzer)

        fuzzer_module_get_stats = getattr(fuzzer_module, 'get_stats', None)
        if fuzzer_module_get_stats is None:
            # Stats support is optional.
            return

        try:
            output_corpus = environment.get('OUTPUT_CORPUS_DIR')
            stats_json_str = fuzzer_module_get_stats(output_corpus,
                                                     self.log_file)

        except Exception:  # pylint: disable=broad-except
            logs.error('Call to %d failed.', fuzzer_module_get_stats)
            return

        try:
            fuzzer_stats.validate_fuzzer_stats(stats_json_str)
        except (ValueError, json.decoder.JSONDecodeError):
            logs.error('Stats are invalid.')
            return

        stats_filename = experiment_utils.get_stats_filename(self.cycle)
        stats_path = os.path.join(self.results_dir, stats_filename)
        with open(stats_path, 'w') as stats_file_handle:
            stats_file_handle.write(stats_json_str)
Пример #6
0
def get_runner_image_url(experiment, benchmark, fuzzer, docker_registry):
    """Get the URL of the docker runner image for fuzzing the benchmark with
    fuzzer."""
    tag = 'latest' if environment.get('LOCAL_EXPERIMENT') else experiment
    return '{docker_registry}/runners/{fuzzer}/{benchmark}:{tag}'.format(
        docker_registry=docker_registry,
        fuzzer=fuzzer,
        benchmark=benchmark,
        tag=tag)
Пример #7
0
def gsutil_command(arguments, *args, parallel=True, **kwargs):
    """Executes a gsutil command with |arguments| and returns the result."""
    if environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
        logger.info('FUZZ_OUTSIDE_EXPERIMENT set, not running \'gsutil %s\'.',
                    ' '.join(arguments))
        return 0, ''
    command = ['gsutil']
    if parallel:
        command.append('-m')
    return new_process.execute(command + arguments, *args, **kwargs)
Пример #8
0
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory):
    """If a clusterfuzz seed corpus archive is available, unpack it into the
    corpus directory if it exists. Copied from unpack_seed_corpus in
    engine_common.py in ClusterFuzz.
    """
    oss_fuzz_corpus = environment.get('OSS_FUZZ_CORPUS')
    if oss_fuzz_corpus:
        benchmark = environment.get('BENCHMARK')
        corpus_archive_filename = f'{benchmark}.zip'
        oss_fuzz_corpus_archive_path = posixpath.join(
            experiment_utils.get_oss_fuzz_corpora_filestore_path(),
            corpus_archive_filename)
        seed_corpus_archive_path = posixpath.join(FUZZ_TARGET_DIR,
                                                  corpus_archive_filename)
        filestore_utils.cp(oss_fuzz_corpus_archive_path,
                           seed_corpus_archive_path)
    else:
        seed_corpus_archive_path = get_clusterfuzz_seed_corpus_path(
            fuzz_target_path)

    if not seed_corpus_archive_path:
        return

    with zipfile.ZipFile(seed_corpus_archive_path) as zip_file:
        # Unpack seed corpus recursively into the root of the main corpus
        # directory.
        idx = 0
        for seed_corpus_file in zip_file.infolist():
            if seed_corpus_file.filename.endswith('/'):
                # Ignore directories.
                continue

            # Allow callers to opt-out of unpacking large files.
            if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT:
                continue

            output_filename = '%016d' % idx
            output_file_path = os.path.join(corpus_directory, output_filename)
            zip_file.extract(seed_corpus_file, output_file_path)
            idx += 1

    logs.info('Unarchived %d files from seed corpus %s.', idx,
              seed_corpus_archive_path)
Пример #9
0
    def __init__(self):
        if not environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            benchmark = environment.get('BENCHMARK')
            fuzzer = environment.get('FUZZER')
            trial_id = environment.get('TRIAL_ID')
            self.gcs_sync_dir = experiment_utils.get_trial_bucket_dir(
                fuzzer, benchmark, trial_id)
            filestore_utils.rm(self.gcs_sync_dir, force=True, parallel=True)
        else:
            self.gcs_sync_dir = None

        self.cycle = 1
        self.corpus_dir = 'corpus'
        self.corpus_archives_dir = 'corpus-archives'
        self.results_dir = 'results'
        self.unchanged_cycles_path = os.path.join(self.results_dir,
                                                  'unchanged-cycles')
        self.last_sync_time = None
        self.corpus_dir_contents = set()
Пример #10
0
def main():
    """Set up Redis connection and start the experiment."""
    redis_connection = redis.Redis(host="queue-server")

    config_path = environment.get('EXPERIMENT_CONFIG',
                                  'fuzzbench/local-experiment-config.yaml')
    config = yaml_utils.read(config_path)
    config = config_utils.validate_and_expand(config)

    with rq.Connection(redis_connection):
        return run_experiment(config)
Пример #11
0
def run_fuzzer(max_total_time, log_filename):
    """Runs the fuzzer using its script. Logs stdout and stderr of the fuzzer
    script to |log_filename| if provided."""
    input_corpus = environment.get('SEED_CORPUS_DIR')
    output_corpus = environment.get('OUTPUT_CORPUS_DIR')
    fuzz_target_name = environment.get('FUZZ_TARGET')
    target_binary = fuzzer_utils.get_fuzz_target_binary(
        FUZZ_TARGET_DIR, fuzz_target_name)
    if not target_binary:
        logs.error('Fuzz target binary not found.')
        return

    _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
    _clean_seed_corpus(input_corpus)

    if max_total_time is None:
        logs.warning('max_total_time is None. Fuzzing indefinitely.')

    runner_niceness = environment.get('RUNNER_NICENESS', 0)

    try:
        with open(log_filename, 'w') as log_file:
            # Because the runner is launched at a higher priority,
            # set it back to the default(0) for fuzzing processes.
            new_process.execute([
                'nice', '-n',
                str(0 - runner_niceness), 'python3', '-u', '-c',
                ('import fuzzer; '
                 'fuzzer.fuzz('
                 "'{input_corpus}', '{output_corpus}', '{target_binary}')"
                 ).format(input_corpus=shlex.quote(input_corpus),
                          output_corpus=shlex.quote(output_corpus),
                          target_binary=shlex.quote(target_binary))
            ],
                                timeout=max_total_time,
                                output_files=[log_file],
                                kill_children=True,
                                env=_get_fuzzer_environment())
    except subprocess.CalledProcessError:
        logs.error('Fuzz process returned nonzero.')
Пример #12
0
def gsutil_command(arguments, *args, parallel=True, **kwargs):
    """Executes a gsutil command with |arguments| and returns the result."""
    if environment.get('LOCAL_EXPERIMENT'):
        logger.info('LOCAL_EXPERIMENT set, not running \'gsutil %s\'.',
                    ' '.join(arguments))
        return 0, ''
    command = ['gsutil']
    if parallel:
        command.append('-m')
    write_to_stdout = kwargs.pop('write_to_stdout', False)
    return new_process.execute(command + arguments,
                               *args,
                               write_to_stdout=write_to_stdout,
                               **kwargs)
Пример #13
0
def copy_coverage_binaries(benchmark):
    """Copy coverage binaries in a local experiment."""
    shared_coverage_binaries_dir = get_shared_coverage_binaries_dir()
    mount_arg = '{0}:{0}'.format(shared_coverage_binaries_dir)
    builder_image_url = benchmark_utils.get_builder_image_url(
        benchmark, 'coverage', environment.get('DOCKER_REGISTRY'))
    coverage_build_archive = 'coverage-build-{}.tar.gz'.format(benchmark)
    coverage_build_archive_shared_dir_path = os.path.join(
        shared_coverage_binaries_dir, coverage_build_archive)
    command = 'cd /out; tar -czvf {} * /src /work'.format(
        coverage_build_archive_shared_dir_path)
    return new_process.execute([
        'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c',
        command
    ])
Пример #14
0
def get_fuzzers_with_not_enough_samples(
        benchmark_snapshot_df, threshold=_DEFAULT_FUZZER_SAMPLE_NUM_THRESHOLD):
    """Returns fuzzers that didn't have enough trials running at snapshot time.
    It takes a benchmark snapshot and finds the fuzzers that have a sample size
    smaller than 80% of the largest sample size. Default threshold can be
    overridden.
    """
    # Allow overriding threshold with environment variable as well.
    threshold = environment.get('FUZZER_SAMPLE_NUM_THRESHOLD', threshold)

    samples_per_fuzzer = benchmark_snapshot_df.fuzzer.value_counts()
    max_samples = samples_per_fuzzer.max()
    few_sample_criteria = samples_per_fuzzer < threshold * max_samples
    few_sample_fuzzers = samples_per_fuzzer[few_sample_criteria].index
    return few_sample_fuzzers.tolist()
Пример #15
0
    def archive_corpus(self):
        """Archive this cycle's corpus."""
        archive = os.path.join(
            self.corpus_archives_dir,
            experiment_utils.get_corpus_archive_name(self.cycle))

        directories = [self.corpus_dir]
        if self.cycle == 1:
            # Some fuzzers like eclipser and LibFuzzer don't actually copy the
            # seed/input corpus to the output corpus (which AFL does do), this
            # results in their coverage being undercounted.
            seed_corpus = environment.get('SEED_CORPUS_DIR')
            directories.append(seed_corpus)

        archive_directories(directories, archive)
        return archive
Пример #16
0
def get_benchmark_snapshot(benchmark_df,
                           threshold=_MIN_FRACTION_OF_ALIVE_TRIALS_AT_SNAPSHOT):
    """Finds the latest time where |threshold| fraction of the trials were still
    running. In most cases, this is the end of the experiment. However, if less
    than |threshold| fraction of the trials reached the end of the experiment,
    then we will use an earlier "snapshot" time for comparing results.

    Returns a data frame that only contains the measurements of the picked
    snapshot time.
    """
    # Allow overriding threshold with environment variable as well.
    threshold = environment.get('BENCHMARK_SAMPLE_NUM_THRESHOLD', threshold)

    num_trials = benchmark_df.trial_id.nunique()
    trials_running_at_time = benchmark_df.time.value_counts()
    criteria = trials_running_at_time >= threshold * num_trials
    ok_times = trials_running_at_time[criteria]
    latest_ok_time = ok_times.index.max()
    benchmark_snapshot_df = benchmark_df[benchmark_df.time == latest_ok_time]
    return benchmark_snapshot_df
Пример #17
0
    def conduct_trial(self):
        """Conduct the benchmarking trial."""
        self.initialize_directories()
        log_file = os.path.join(self.results_dir, 'fuzzer-log.txt')

        logs.info('Starting trial.')

        max_total_time = environment.get('MAX_TOTAL_TIME')
        args = (max_total_time, log_file)
        thread = threading.Thread(target=run_fuzzer, args=args)
        thread.start()

        while thread.is_alive():
            self.sleep_until_next_sync()
            self.do_sync()
            self.cycle += 1

        logs.info('Doing final sync.')
        self.do_sync(final_sync=True)
        thread.join()
Пример #18
0
def copy_coverage_binaries(benchmark):
    """Copy coverage binaries in a local experiment."""
    shared_coverage_binaries_dir = get_shared_coverage_binaries_dir()
    mount_arg = '{0}:{0}'.format(shared_coverage_binaries_dir)
    builder_image_url = benchmark_utils.get_builder_image_url(
        benchmark, 'coverage', environment.get('CLOUD_PROJECT'))
    coverage_build_archive = 'coverage-build-{}.tar.gz'.format(benchmark)
    coverage_build_archive_shared_dir_path = os.path.join(
        shared_coverage_binaries_dir, coverage_build_archive)
    command = 'cd /out; tar -czvf {} *'.format(
        coverage_build_archive_shared_dir_path)
    new_process.execute([
        'docker', 'run', '-v', mount_arg, builder_image_url, '/bin/bash', '-c',
        command
    ])
    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
    coverage_build_archive_gcs_path = posixpath.join(
        exp_path.gcs(coverage_binaries_dir), coverage_build_archive)

    return gsutil.cp(coverage_build_archive_shared_dir_path,
                     coverage_build_archive_gcs_path)
Пример #19
0
def get_benchmark_snapshot(benchmark_df,
                           threshold=_DEFAULT_BENCHMARK_SAMPLE_NUM_THRESHOLD):
    """Finds the latest time where 80% of the trials were still running. In most
    cases, this is the end of the experiment. In this case, we won't consider
    the <20% of the trials that ended early for our analysis. If more than 20%
    of the trials ended early, it's better to pick an earlier snapshot time.
    The 80% can be overridden using the |threshold| argument. E.g., to find the
    latest time where each trials were running, set |threshold| to 1.0.

    Returns data frame that only contains the measurements of the picked
    snapshot time.
    """
    # Allow overriding threshold with environment variable as well.
    threshold = environment.get('BENCHMARK_SAMPLE_NUM_THRESHOLD', threshold)

    num_trials = benchmark_df.trial_id.nunique()
    trials_running_at_time = benchmark_df.time.value_counts()
    criteria = trials_running_at_time > threshold * num_trials
    ok_times = trials_running_at_time[criteria]
    latest_ok_time = ok_times.index.max()
    benchmark_snapshot_df = benchmark_df[benchmark_df.time == latest_ok_time]
    return benchmark_snapshot_df
Пример #20
0
    def __init__(self):
        benchmark_fuzzer_directory = '%s-%s' % (environment.get(
            'BENCHMARK'), environment.get('FUZZER_VARIANT_NAME'))
        if not environment.get('FUZZ_OUTSIDE_EXPERIMENT'):
            bucket = environment.get('CLOUD_EXPERIMENT_BUCKET')
            experiment_name = environment.get('EXPERIMENT')
            trial = 'trial-%d' % environment.get('TRIAL_ID')
            self.gcs_sync_dir = posixpath.join(bucket, experiment_name,
                                               'experiment-folders',
                                               benchmark_fuzzer_directory,
                                               trial)
            # Clean the directory before we use it.
            gsutil.rm(self.gcs_sync_dir, force=True)
        else:
            self.gcs_sync_dir = None

        self.cycle = 1
        self.corpus_dir = 'corpus'
        self.corpus_archives_dir = 'corpus-archives'
        self.results_dir = 'results'
        self.unchanged_cycles_path = os.path.join(self.results_dir,
                                                  'unchanged-cycles')
        self.last_sync_time = None
        self.corpus_dir_contents = set()
Пример #21
0
def is_local_experiment():
    """Returns True if running a local experiment."""
    return bool(environment.get('LOCAL_EXPERIMENT'))
Пример #22
0
def get_snapshot_seconds():
    """Returns the amount of time in seconds between snapshots of a
    fuzzer's corpus during an experiment."""
    return environment.get('SNAPSHOT_PERIOD', DEFAULT_SNAPSHOT_SECONDS)