Пример #1
0
def db_experiment(experiment_config, db):
    """A fixture that populates the database with an experiment entity with the
    name specified in the experiment_config fixture."""
    experiment = models.Experiment(name=experiment_config['experiment'])
    db_utils.add_all([experiment])
    # yield so that the experiment exists until the using function exits.
    yield
Пример #2
0
def test_add_nonprivate_experiments_for_merge_with_clobber(db):
    """Tests that add_nonprivate_experiments_for_merge_with_clobber doesn't
    include private experiments and returns the expected results in the correct
    order."""
    experiment_names = ['1', '2', '3']
    db_utils.add_all([
        models.Experiment(name=name,
                          time_created=ARBITRARY_DATETIME,
                          time_ended=ARBITRARY_DATETIME +
                          datetime.timedelta(days=1),
                          private=False) for name in experiment_names
    ])
    db_utils.add_all([
        models.Experiment(name='private',
                          time_created=ARBITRARY_DATETIME,
                          private=True),
        models.Experiment(name='earlier-nonprivate',
                          time_created=ARBITRARY_DATETIME -
                          datetime.timedelta(days=1),
                          time_ended=ARBITRARY_DATETIME,
                          private=False),
        models.Experiment(name='nonprivate',
                          time_created=ARBITRARY_DATETIME,
                          time_ended=ARBITRARY_DATETIME +
                          datetime.timedelta(days=1),
                          private=False),
        models.Experiment(name='nonprivate-in-progress',
                          time_created=ARBITRARY_DATETIME,
                          time_ended=None,
                          private=False),
    ])
    expected_results = ['earlier-nonprivate', 'nonprivate', '1', '2', '3']
    results = queries.add_nonprivate_experiments_for_merge_with_clobber(
        experiment_names)
    assert results == expected_results
Пример #3
0
def _record_experiment_time_ended(experiment_name: str):
    """Record |experiment| end time in the database."""
    with db_utils.session_scope() as session:
        experiment = session.query(models.Experiment).filter(
            models.Experiment.name == experiment_name).one()
    experiment.time_ended = datetime.datetime.utcnow()
    db_utils.add_all([experiment])
Пример #4
0
def pending_trials(db, experiment_config):
    """Adds trials to the database and returns pending trials."""
    create_experiments(experiment_config)

    def create_trial(experiment, time_started=None, time_ended=None):
        """Creates a database trial."""
        return models.Trial(experiment=experiment,
                            benchmark=BENCHMARK,
                            fuzzer=FUZZER,
                            time_started=time_started,
                            time_ended=time_ended)

    our_pending_trials = [
        create_trial(experiment_config['experiment']),
        create_trial(experiment_config['experiment'])
    ]
    other_experiment_name = get_other_experiment_name(experiment_config)
    other_trials = [
        create_trial(other_experiment_name),
        create_trial(experiment_config['experiment'], ARBITRARY_DATETIME),
        create_trial(experiment_config['experiment'], ARBITRARY_DATETIME)
    ]
    db_utils.add_all(other_trials + our_pending_trials)
    our_trial_ids = [trial.id for trial in our_pending_trials]
    return db_utils.query(models.Trial).filter(
        models.Trial.id.in_(our_trial_ids))
Пример #5
0
def pending_trials(db, experiment_config):
    """Adds trials to the database and returns pending trials."""
    other_experiment_name = experiment_config['experiment'] + 'other'
    db_utils.add_all([
        models.Experiment(name=experiment_config['experiment']),
        models.Experiment(name=other_experiment_name)
    ])

    def create_trial(experiment, time_started=None, time_ended=None):
        """Creates a database trial."""
        return models.Trial(experiment=experiment,
                            benchmark=BENCHMARK,
                            fuzzer=FUZZER,
                            time_started=time_started,
                            time_ended=time_ended)

    our_pending_trials = [
        create_trial(experiment_config['experiment']),
        create_trial(experiment_config['experiment'])
    ]
    other_trials = [
        create_trial(other_experiment_name),
        create_trial(experiment_config['experiment'], datetime.datetime.now()),
        create_trial(experiment_config['experiment'], datetime.datetime.now())
    ]
    db_utils.add_all(other_trials + our_pending_trials)
    our_trial_ids = [trial.id for trial in our_pending_trials]
    return db_utils.query(models.Trial).filter(
        models.Trial.id.in_(our_trial_ids))
Пример #6
0
def test_add_nonprivate_experiments_for_merge_with_clobber(db):
    """Tests that add_nonprivate_experiments_for_merge_with_clobber doesn't
    include private experiments and returns the expected results in the correct
    order."""
    experiment_names = ['1', '2', '3']
    arbitrary_datetime = datetime.datetime(2020, 1, 1)
    db_utils.add_all([
        models.Experiment(name=name,
                          time_created=arbitrary_datetime,
                          private=False) for name in experiment_names
    ])
    db_utils.add_all([
        models.Experiment(name='private',
                          time_created=arbitrary_datetime,
                          private=True),
        models.Experiment(name='later-nonprivate',
                          time_created=arbitrary_datetime +
                          datetime.timedelta(days=1),
                          private=False),
        models.Experiment(name='nonprivate',
                          time_created=arbitrary_datetime,
                          private=False),
    ])
    expected_results = ['nonprivate', 'later-nonprivate', '1', '2', '3']
    results = queries.add_nonprivate_experiments_for_merge_with_clobber(
        experiment_names)
    assert results == expected_results
Пример #7
0
def start_trials(trials, experiment_config: dict, pool):
    """Start all |trials| that are possible to start. Marks the ones that were
    started as started."""
    logger.info('Starting trials.')
    trial_id_mapping = {
        trial.id: trial
        for trial in trials.filter(models.Trial.time_started.is_(None))
    }
    started_trial_proxies = pool.starmap(
        _start_trial, [(TrialProxy(trial), experiment_config)
                       for trial in trial_id_mapping.values()])

    # Map proxies back to trials and mark trials as started when proxies were
    # marked as such.
    started_trials = []
    for proxy in started_trial_proxies:
        if not proxy:
            continue
        trial = trial_id_mapping[proxy.id]
        trial.time_started = proxy.time_started
        started_trials.append(trial)

    if started_trials:
        db_utils.add_all(started_trials)
    return started_trials
Пример #8
0
def test_get_fuzzers_changed_since_last_non_master_experiment(
        mocked_info, mocked_get_changed_files, db_experiment):
    """Tests that get_fuzzers_changed_since_last returns the
    correct result when the first experiment's git hash is not in branch"""
    # Set up a newer, out-of-branch experiment.
    out_of_branch_experiment = models.Experiment()
    out_of_branch_experiment.name = 'out-of-branch-experiment'
    out_of_branch_hash = 'out-of-branch-experiment-hash'
    out_of_branch_experiment.git_hash = out_of_branch_hash
    db_utils.add_all([out_of_branch_experiment])

    # Update the time of out_of_branch_experiment to come after db_experiment.
    out_of_branch_experiment.time_created = (db_experiment.time_created +
                                             datetime.timedelta(days=1))

    db_utils.add_all([out_of_branch_experiment])

    def get_changed_files(commit_hash):
        if commit_hash == 'out-of-branch-experiment-hash':
            raise diff_utils.DiffError(commit_hash)
        return AFL_FUZZER_PY

    mocked_get_changed_files.side_effect = get_changed_files

    assert not experiment_changes.get_fuzzers_changed_since_last()
    mocked_info.assert_called_with('Skipping %s. Commit is not in branch.',
                                   out_of_branch_hash)
    mocked_get_changed_files.assert_has_calls(
        [mock.call(out_of_branch_hash),
         mock.call('hash')])
Пример #9
0
def test_get_preempted_trials_new_preempted(mocked_get_preempted_operations,
                                            preempt_exp_conf):
    """Tests that TrialInstanceManager.get_preempted_trials returns trials that
    new preempted trials we don't know about until we query for them and not
    trials that we already knew were preempted."""
    trial_instance_manager = get_trial_instance_manager(preempt_exp_conf)

    # Create trials.
    experiment = preempt_exp_conf['experiment']
    time_started = ARBITRARY_DATETIME
    known_preempted = models.Trial(experiment=experiment,
                                   fuzzer=FUZZER,
                                   benchmark=BENCHMARK,
                                   time_started=time_started)
    unknown_preempted = models.Trial(experiment=experiment,
                                     fuzzer=FUZZER,
                                     benchmark=BENCHMARK,
                                     time_started=time_started)
    trials = [known_preempted, unknown_preempted]
    db_utils.add_all(trials)
    mocked_get_preempted_operations.return_value = [
        _get_preemption_operation(trial.id, preempt_exp_conf)
        for trial in trials
    ]

    trial_instance_manager.preempted_trials = {
        known_preempted.id: known_preempted
    }
    result = trial_instance_manager.get_preempted_trials()
    expected_result = [unknown_preempted]
    assert result == expected_result
Пример #10
0
def db_experiment(db):
    """Fixture that creates a database populated the databse with an
    experiment."""
    experiment = models.Experiment()
    experiment.name = 'experiment'
    experiment.git_hash = 'hash'
    db_utils.add_all([experiment])
    return experiment
Пример #11
0
def create_experiments(experiment_config):
    """Create the experiment experiment entity for the experiment in
    |experiment_config| and create another one and save the results to the
    db."""
    other_experiment_name = get_other_experiment_name(experiment_config)
    db_utils.add_all([
        models.Experiment(name=experiment_config['experiment']),
        models.Experiment(name=other_experiment_name)
    ])
Пример #12
0
    def save_snapshots():
        """Saves measured snapshots if there were any, resets |snapshots| to an
        empty list and records the fact that snapshots have been measured."""
        if not snapshots:
            return

        db_utils.add_all(snapshots)
        snapshots.clear()
        nonlocal snapshots_measured
        snapshots_measured = True
Пример #13
0
    def test_measure_snapshot_coverage(  # pylint: disable=too-many-locals
            self, mocked_is_cycle_unchanged, db, experiment, tmp_path):
        """Integration test for measure_snapshot_coverage."""
        # WORK is set by experiment to a directory that only makes sense in a
        # fakefs. A directory containing necessary llvm tools is also added to
        # PATH.
        llvm_tools_path = get_test_data_path('llvm_tools')
        os.environ["PATH"] += os.pathsep + llvm_tools_path
        os.environ['WORK'] = str(tmp_path)
        mocked_is_cycle_unchanged.return_value = False
        # Set up the coverage binary.
        benchmark = 'freetype2-2017'
        coverage_binary_src = get_test_data_path(
            'test_measure_snapshot_coverage', benchmark + '-coverage')
        benchmark_cov_binary_dir = os.path.join(
            build_utils.get_coverage_binaries_dir(), benchmark)

        os.makedirs(benchmark_cov_binary_dir)
        coverage_binary_dst_dir = os.path.join(benchmark_cov_binary_dir,
                                               'ftfuzzer')

        shutil.copy(coverage_binary_src, coverage_binary_dst_dir)

        # Set up entities in database so that the snapshot can be created.
        experiment = models.Experiment(name=os.environ['EXPERIMENT'])
        db_utils.add_all([experiment])
        trial = models.Trial(fuzzer=FUZZER,
                             benchmark=benchmark,
                             experiment=os.environ['EXPERIMENT'])
        db_utils.add_all([trial])

        snapshot_measurer = measurer.SnapshotMeasurer(trial.fuzzer,
                                                      trial.benchmark,
                                                      trial.id,
                                                      SNAPSHOT_LOGGER)

        # Set up the snapshot archive.
        cycle = 1
        archive = get_test_data_path('test_measure_snapshot_coverage',
                                     'corpus-archive-%04d.tar.gz' % cycle)
        corpus_dir = os.path.join(snapshot_measurer.trial_dir, 'corpus')
        os.makedirs(corpus_dir)
        shutil.copy(archive, corpus_dir)

        with mock.patch('common.filestore_utils.cp') as mocked_cp:
            mocked_cp.return_value = new_process.ProcessResult(0, '', False)
            # TODO(metzman): Create a system for using actual buckets in
            # integration tests.
            snapshot = measurer.measure_snapshot_coverage(
                snapshot_measurer.fuzzer, snapshot_measurer.benchmark,
                snapshot_measurer.trial_num, cycle)
        assert snapshot
        assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds()
        assert snapshot.edges_covered == 13178
Пример #14
0
def _initialize_experiment_in_db(experiment: str, git_hash: str,
                                 trials: List[models.Trial]):
    """Initializes |experiment| in the database by creating the experiment
    entity and entities for each trial in the experiment."""
    db_utils.add_all([
        db_utils.get_or_create(models.Experiment,
                               name=experiment,
                               git_hash=git_hash)
    ])

    # TODO(metzman): Consider doing this without sqlalchemy. This can get
    # slow with SQLalchemy (it's much worse with add_all).
    db_utils.bulk_save(trials)
Пример #15
0
def update_started_trials(trial_proxies, trial_id_mapping):
    """Update started trials in |trial_id_mapping| with results from
    |trial_proxies| and save the updated trials."""
    # Map proxies back to trials and mark trials as started when proxies were
    # marked as such.
    started_trials = []
    for proxy in trial_proxies:
        if not proxy:
            continue
        trial = trial_id_mapping[proxy.id]
        trial.time_started = proxy.time_started
        started_trials.append(trial)
    if started_trials:
        db_utils.add_all(started_trials)
    return started_trials
Пример #16
0
def _initialize_experiment_in_db(experiment_config: dict):
    """Initializes |experiment| in the database by creating the experiment
    entity."""
    experiment_exists = db_utils.query(models.Experiment).filter(
        models.Experiment.name == experiment_config['experiment']).first()
    if experiment_exists:
        raise Exception('Experiment already exists in database.')

    db_utils.add_all([
        db_utils.get_or_create(
            models.Experiment,
            name=experiment_config['experiment'],
            git_hash=experiment_config['git_hash'],
            private=experiment_config.get('private', True),
            experiment_filestore=experiment_config['experiment_filestore'])
    ])
Пример #17
0
def _initialize_experiment_in_db(experiment: str, benchmarks: List[str],
                                 fuzzers: List[str], num_trials: int):
    """Initializes |experiment| in the database by creating the experiment
    entity and entities for each trial in the experiment."""
    db_utils.add_all(
        [db_utils.get_or_create(models.Experiment, name=experiment)])

    trials_args = itertools.product(sorted(benchmarks), range(num_trials),
                                    sorted(fuzzers))
    trials = [
        models.Trial(fuzzer=fuzzer, experiment=experiment, benchmark=benchmark)
        for benchmark, _, fuzzer in trials_args
    ]
    # TODO(metzman): Consider doing this without sqlalchemy. This can get
    # slow with SQLalchemy (it's much worse with add_all).
    db_utils.bulk_save(trials)
Пример #18
0
def test_get_preempted_trials_stale_preempted(_, preempt_exp_conf):
    """Tests that TrialInstanceManager.get_preempted_trials doesn't return
    trials that we already know were preempted."""
    trial_instance_manager = get_trial_instance_manager(preempt_exp_conf)
    trial = models.Trial(experiment=preempt_exp_conf['experiment'],
                         fuzzer=FUZZER,
                         benchmark=BENCHMARK)
    db_utils.add_all([trial])
    instance_name = experiment_utils.get_trial_instance_name(
        preempt_exp_conf['experiment'], trial.id)
    trial_instance_manager.preempted_trials = {instance_name: trial}
    with mock.patch(
            'experiment.scheduler.TrialInstanceManager.'
            '_get_started_unfinished_instances',
            return_value=[instance_name]):
        assert trial_instance_manager.get_preempted_trials() == []
Пример #19
0
    def test_measure_snapshot_coverage(  # pylint: disable=too-many-locals
            self, mocked_is_cycle_unchanged, create_measurer, db, experiment):
        """Integration test for measure_snapshot_coverage."""
        mocked_is_cycle_unchanged.return_value = False
        # Set up the coverage binary.
        benchmark = 'freetype2-2017'
        coverage_binary_src = get_test_data_path(
            'test_measure_snapshot_coverage', benchmark + '-coverage')
        benchmark_cov_binary_dir = os.path.join(
            build_utils.get_coverage_binaries_dir(), benchmark)

        os.makedirs(benchmark_cov_binary_dir)
        coverage_binary_dst_dir = os.path.join(benchmark_cov_binary_dir,
                                               'fuzz-target')

        shutil.copy(coverage_binary_src, coverage_binary_dst_dir)

        # Set up entities in database so that the snapshot can be created.
        experiment = models.Experiment(name=os.environ['EXPERIMENT'])
        db_utils.add_all([experiment])
        trial = models.Trial(fuzzer=FUZZER,
                             benchmark=benchmark,
                             experiment=os.environ['EXPERIMENT'])
        db_utils.add_all([trial])

        snapshot_measurer = create_measurer(trial.fuzzer, trial.benchmark,
                                            trial.id)

        # Set up the snapshot archive.
        cycle = 1
        archive = get_test_data_path('test_measure_snapshot_coverage',
                                     'corpus-archive-%04d.tar.gz' % cycle)
        corpus_dir = os.path.join(snapshot_measurer.trial_dir, 'corpus')
        os.makedirs(corpus_dir)
        shutil.copy(archive, corpus_dir)

        with mock.patch('common.gsutil.cp') as mocked_cp:
            mocked_cp.return_value = new_process.ProcessResult(0, '', False)
            # TODO(metzman): Create a system for using actual buckets in
            # integration tests.
            snapshot = measurer.measure_snapshot_coverage(
                snapshot_measurer.fuzzer, snapshot_measurer.benchmark,
                snapshot_measurer.trial_num, cycle)
        assert snapshot
        assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds()
        assert snapshot.edges_covered == 3798
Пример #20
0
def test_get_last_trial_time_started_called_early(db, experiment_config):
    """Tests that get_last_trial_time_started raises an exception if called
    while there are still pending trials."""
    experiment = experiment_config['experiment']
    db_utils.add_all([
        models.Experiment(name=experiment),
    ])
    trial1 = models.Trial(experiment=experiment,
                          benchmark=BENCHMARK,
                          fuzzer=FUZZER)
    trial2 = models.Trial(experiment=experiment,
                          benchmark=BENCHMARK,
                          fuzzer=FUZZER)
    first_time = datetime.datetime.fromtimestamp(time.mktime(time.gmtime(0)))
    trial1.time_started = first_time
    trials = [trial1, trial2]
    db_utils.add_all(trials)
    with pytest.raises(AssertionError):
        scheduler.get_last_trial_time_started(experiment)
Пример #21
0
def test_measure_all_trials(_, __, mocked_execute, db, fs):
    """Tests that measure_all_trials does what is intended under normal
    conditions."""
    mocked_execute.return_value = new_process.ProcessResult(0, '', False)

    dispatcher._initialize_experiment_in_db(
        experiment_utils.get_experiment_name(), BENCHMARKS, FUZZERS, NUM_TRIALS)
    trials = scheduler.get_pending_trials(
        experiment_utils.get_experiment_name()).all()
    for trial in trials:
        trial.time_started = datetime.datetime.utcnow()
    db_utils.add_all(trials)

    fs.create_file(measurer.get_experiment_folders_dir() / NEW_UNIT)
    mock_pool = test_utils.MockPool()

    assert measurer.measure_all_trials(experiment_utils.get_experiment_name(),
                                       MAX_TOTAL_TIME, mock_pool, queue.Queue())

    actual_ids = [call[2] for call in mock_pool.func_calls]
    # 4 (trials) * 2 (fuzzers) * 2 (benchmarks)
    assert sorted(actual_ids) == list(range(1, 17))
Пример #22
0
def test_get_last_trial_time_started(db, experiment_config):
    """Tests that get_last_trial_time_started returns the time_started of the
    last trial to be started."""
    experiment = experiment_config['experiment']
    db_utils.add_all([
        models.Experiment(name=experiment),
    ])
    trial1 = models.Trial(experiment=experiment,
                          benchmark=BENCHMARK,
                          fuzzer=FUZZER)
    trial2 = models.Trial(experiment=experiment,
                          benchmark=BENCHMARK,
                          fuzzer=FUZZER)
    first_time = datetime.datetime.fromtimestamp(time.mktime(time.gmtime(0)))
    trial1.time_started = first_time
    last_time_started = first_time + datetime.timedelta(days=1)
    trial2.time_started = last_time_started
    trials = [trial1, trial2]
    db_utils.add_all(trials)

    assert scheduler.get_last_trial_time_started(
        experiment) == last_time_started
Пример #23
0
def test_get_experiment_data_fuzzer_stats(db):
    """Tests that get_experiment_data handles fuzzer_stats correctly."""
    experiment_name = 'experiment-1'
    db_utils.add_all([
        models.Experiment(name=experiment_name,
                          time_created=ARBITRARY_DATETIME,
                          private=False)
    ])
    trial = models.Trial(fuzzer='afl',
                         experiment=experiment_name,
                         benchmark='libpng')
    db_utils.add_all([trial])
    fuzzer_stats = {'execs_per_sec': 100.0}
    snapshot = models.Snapshot(time=900,
                               trial_id=trial.id,
                               edges_covered=100,
                               fuzzer_stats=fuzzer_stats)
    db_utils.add_all([snapshot])
    experiment_df = queries.get_experiment_data([experiment_name])  # pylint: disable=unused-variable
Пример #24
0
    def handle_preempted_trials(self):
        """Handle preempted trials by marking them as preempted and creating
        replacement trials when appropriate.
        This is the algorithm used by handle_preempted_trials:

        1. Query the GCE API to find trials that were preempted since our last
        query (or the start of the experiment on our first query.

        2. For every preempted trial, ensure that it was not handled before and
        if it wasn't then mark the trials as finished and preempted and create
        replacement trials if appropriate.

        This is how it is determined whether a preempted trial should be
        replaced and what it should be replaced with:

        1. First we see if we can replace it with a preemptible instance. We
        will replace it with a preemptible instance if:

          a. We haven't created more than double the number of preemptible trial
          instances than the number of trial this experiment would take if it
          were using non-preemptibles ("target_trials") . This bounds the cost
          of our preemptible usage to <2X cost of using preemptibles naively
          If preemptibles are 20% cost of non-preemptibles, then <40% the cost
          of a non-preemptible experiment.

          b. We haven't spent longer than 3X the duration of time the
          experiment would take if using nonpreemptibles. This bounds the
          duration of the experiment to 4X the length of the nonpreemptible
          experiment.

        2. If we can't create a preemptible replacement, we replace it with a
        nonpreemptible if:

          a. We haven't created more than target_trials/20 nonpreemptibles
          already. This bounds the cost of the nonpreemptibles to 5% of the cost
          of a 100% nonpreemptible experiment.

          b. (TODO): Using preemptibles will actually help the results of this
          experiment. If we can't create any preemptible instances but we need
          to replace target_trials number of instances, replacing the tiny
          fraction of them with preemptibles will give you a 5% complete
          experiment. This is a hard issue to solve, because we restart
          trials as they are preempted so we may not determine it is futile to
          use nonpreemptibles until the last nonpreemptible above our limit is
          reached.

        3. TODO: There are other cases where we probably shouldn't replace
        trials that we haven't implemented, but would like to such as:

          a. If a trial is preempted very close to the end of its budgeted time.
          In that case it's probably fine if the comparison on the benchmark
          happens at 22:45 instead of 23:00.

          b. If a trial is the only trial for the fuzzer-benchmark that was
          preempted. In that case, not replacing the trial will save time and
          not hurt results much.

        The impact of this algorithm is that:

        1. The cost of a preemptible experiment, in the worst case scenario is
        45% of a nonpreemptible experiment. On average we find they will be
        ~30% the cost of a nonpreemptible experiment.

        2. Time of an experiment will be 4X the length of a nonpreemptible
        experiment in the worst case scenario. This is fine however because most
        of the experiment will finish earlier, only a few trials that won't
        change results very much will trickle in at the end.

        3. Experiments are guaranteed to terminate but results won't necessarily
        be complete if the preemption rate is pathologically high. This is
        acceptable because a human should intervene in these edge cases.
        """
        logger.info('Handling preempted.')
        if not self.experiment_config.get('preemptible_runners'):
            # Nothing to do here if not a preemptible experiment.
            return []

        preempted_trials = self.get_preempted_trials()
        if not preempted_trials:
            logs.info('No preempteds to handle.')
            return []

        replacements = self._get_preempted_replacements(preempted_trials)
        experiment = self.experiment_config['experiment']
        instances = [
            experiment_utils.get_trial_instance_name(experiment, trial.id)
            for trial in preempted_trials
        ]

        logs.info('Deleting preempted instances: %s', instances)
        if not delete_instances(instances, self.experiment_config):
            logs.error('Could not delete preempted instances: %s', instances)

        db_utils.add_all(preempted_trials + replacements)
        logger.info('Done handling preempted.')
        return replacements