def db_experiment(experiment_config, db): """A fixture that populates the database with an experiment entity with the name specified in the experiment_config fixture.""" experiment = models.Experiment(name=experiment_config['experiment']) db_utils.add_all([experiment]) # yield so that the experiment exists until the using function exits. yield
def test_add_nonprivate_experiments_for_merge_with_clobber(db): """Tests that add_nonprivate_experiments_for_merge_with_clobber doesn't include private experiments and returns the expected results in the correct order.""" experiment_names = ['1', '2', '3'] db_utils.add_all([ models.Experiment(name=name, time_created=ARBITRARY_DATETIME, time_ended=ARBITRARY_DATETIME + datetime.timedelta(days=1), private=False) for name in experiment_names ]) db_utils.add_all([ models.Experiment(name='private', time_created=ARBITRARY_DATETIME, private=True), models.Experiment(name='earlier-nonprivate', time_created=ARBITRARY_DATETIME - datetime.timedelta(days=1), time_ended=ARBITRARY_DATETIME, private=False), models.Experiment(name='nonprivate', time_created=ARBITRARY_DATETIME, time_ended=ARBITRARY_DATETIME + datetime.timedelta(days=1), private=False), models.Experiment(name='nonprivate-in-progress', time_created=ARBITRARY_DATETIME, time_ended=None, private=False), ]) expected_results = ['earlier-nonprivate', 'nonprivate', '1', '2', '3'] results = queries.add_nonprivate_experiments_for_merge_with_clobber( experiment_names) assert results == expected_results
def _record_experiment_time_ended(experiment_name: str): """Record |experiment| end time in the database.""" with db_utils.session_scope() as session: experiment = session.query(models.Experiment).filter( models.Experiment.name == experiment_name).one() experiment.time_ended = datetime.datetime.utcnow() db_utils.add_all([experiment])
def pending_trials(db, experiment_config): """Adds trials to the database and returns pending trials.""" create_experiments(experiment_config) def create_trial(experiment, time_started=None, time_ended=None): """Creates a database trial.""" return models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER, time_started=time_started, time_ended=time_ended) our_pending_trials = [ create_trial(experiment_config['experiment']), create_trial(experiment_config['experiment']) ] other_experiment_name = get_other_experiment_name(experiment_config) other_trials = [ create_trial(other_experiment_name), create_trial(experiment_config['experiment'], ARBITRARY_DATETIME), create_trial(experiment_config['experiment'], ARBITRARY_DATETIME) ] db_utils.add_all(other_trials + our_pending_trials) our_trial_ids = [trial.id for trial in our_pending_trials] return db_utils.query(models.Trial).filter( models.Trial.id.in_(our_trial_ids))
def pending_trials(db, experiment_config): """Adds trials to the database and returns pending trials.""" other_experiment_name = experiment_config['experiment'] + 'other' db_utils.add_all([ models.Experiment(name=experiment_config['experiment']), models.Experiment(name=other_experiment_name) ]) def create_trial(experiment, time_started=None, time_ended=None): """Creates a database trial.""" return models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER, time_started=time_started, time_ended=time_ended) our_pending_trials = [ create_trial(experiment_config['experiment']), create_trial(experiment_config['experiment']) ] other_trials = [ create_trial(other_experiment_name), create_trial(experiment_config['experiment'], datetime.datetime.now()), create_trial(experiment_config['experiment'], datetime.datetime.now()) ] db_utils.add_all(other_trials + our_pending_trials) our_trial_ids = [trial.id for trial in our_pending_trials] return db_utils.query(models.Trial).filter( models.Trial.id.in_(our_trial_ids))
def test_add_nonprivate_experiments_for_merge_with_clobber(db): """Tests that add_nonprivate_experiments_for_merge_with_clobber doesn't include private experiments and returns the expected results in the correct order.""" experiment_names = ['1', '2', '3'] arbitrary_datetime = datetime.datetime(2020, 1, 1) db_utils.add_all([ models.Experiment(name=name, time_created=arbitrary_datetime, private=False) for name in experiment_names ]) db_utils.add_all([ models.Experiment(name='private', time_created=arbitrary_datetime, private=True), models.Experiment(name='later-nonprivate', time_created=arbitrary_datetime + datetime.timedelta(days=1), private=False), models.Experiment(name='nonprivate', time_created=arbitrary_datetime, private=False), ]) expected_results = ['nonprivate', 'later-nonprivate', '1', '2', '3'] results = queries.add_nonprivate_experiments_for_merge_with_clobber( experiment_names) assert results == expected_results
def start_trials(trials, experiment_config: dict, pool): """Start all |trials| that are possible to start. Marks the ones that were started as started.""" logger.info('Starting trials.') trial_id_mapping = { trial.id: trial for trial in trials.filter(models.Trial.time_started.is_(None)) } started_trial_proxies = pool.starmap( _start_trial, [(TrialProxy(trial), experiment_config) for trial in trial_id_mapping.values()]) # Map proxies back to trials and mark trials as started when proxies were # marked as such. started_trials = [] for proxy in started_trial_proxies: if not proxy: continue trial = trial_id_mapping[proxy.id] trial.time_started = proxy.time_started started_trials.append(trial) if started_trials: db_utils.add_all(started_trials) return started_trials
def test_get_fuzzers_changed_since_last_non_master_experiment( mocked_info, mocked_get_changed_files, db_experiment): """Tests that get_fuzzers_changed_since_last returns the correct result when the first experiment's git hash is not in branch""" # Set up a newer, out-of-branch experiment. out_of_branch_experiment = models.Experiment() out_of_branch_experiment.name = 'out-of-branch-experiment' out_of_branch_hash = 'out-of-branch-experiment-hash' out_of_branch_experiment.git_hash = out_of_branch_hash db_utils.add_all([out_of_branch_experiment]) # Update the time of out_of_branch_experiment to come after db_experiment. out_of_branch_experiment.time_created = (db_experiment.time_created + datetime.timedelta(days=1)) db_utils.add_all([out_of_branch_experiment]) def get_changed_files(commit_hash): if commit_hash == 'out-of-branch-experiment-hash': raise diff_utils.DiffError(commit_hash) return AFL_FUZZER_PY mocked_get_changed_files.side_effect = get_changed_files assert not experiment_changes.get_fuzzers_changed_since_last() mocked_info.assert_called_with('Skipping %s. Commit is not in branch.', out_of_branch_hash) mocked_get_changed_files.assert_has_calls( [mock.call(out_of_branch_hash), mock.call('hash')])
def test_get_preempted_trials_new_preempted(mocked_get_preempted_operations, preempt_exp_conf): """Tests that TrialInstanceManager.get_preempted_trials returns trials that new preempted trials we don't know about until we query for them and not trials that we already knew were preempted.""" trial_instance_manager = get_trial_instance_manager(preempt_exp_conf) # Create trials. experiment = preempt_exp_conf['experiment'] time_started = ARBITRARY_DATETIME known_preempted = models.Trial(experiment=experiment, fuzzer=FUZZER, benchmark=BENCHMARK, time_started=time_started) unknown_preempted = models.Trial(experiment=experiment, fuzzer=FUZZER, benchmark=BENCHMARK, time_started=time_started) trials = [known_preempted, unknown_preempted] db_utils.add_all(trials) mocked_get_preempted_operations.return_value = [ _get_preemption_operation(trial.id, preempt_exp_conf) for trial in trials ] trial_instance_manager.preempted_trials = { known_preempted.id: known_preempted } result = trial_instance_manager.get_preempted_trials() expected_result = [unknown_preempted] assert result == expected_result
def db_experiment(db): """Fixture that creates a database populated the databse with an experiment.""" experiment = models.Experiment() experiment.name = 'experiment' experiment.git_hash = 'hash' db_utils.add_all([experiment]) return experiment
def create_experiments(experiment_config): """Create the experiment experiment entity for the experiment in |experiment_config| and create another one and save the results to the db.""" other_experiment_name = get_other_experiment_name(experiment_config) db_utils.add_all([ models.Experiment(name=experiment_config['experiment']), models.Experiment(name=other_experiment_name) ])
def save_snapshots(): """Saves measured snapshots if there were any, resets |snapshots| to an empty list and records the fact that snapshots have been measured.""" if not snapshots: return db_utils.add_all(snapshots) snapshots.clear() nonlocal snapshots_measured snapshots_measured = True
def test_measure_snapshot_coverage( # pylint: disable=too-many-locals self, mocked_is_cycle_unchanged, db, experiment, tmp_path): """Integration test for measure_snapshot_coverage.""" # WORK is set by experiment to a directory that only makes sense in a # fakefs. A directory containing necessary llvm tools is also added to # PATH. llvm_tools_path = get_test_data_path('llvm_tools') os.environ["PATH"] += os.pathsep + llvm_tools_path os.environ['WORK'] = str(tmp_path) mocked_is_cycle_unchanged.return_value = False # Set up the coverage binary. benchmark = 'freetype2-2017' coverage_binary_src = get_test_data_path( 'test_measure_snapshot_coverage', benchmark + '-coverage') benchmark_cov_binary_dir = os.path.join( build_utils.get_coverage_binaries_dir(), benchmark) os.makedirs(benchmark_cov_binary_dir) coverage_binary_dst_dir = os.path.join(benchmark_cov_binary_dir, 'ftfuzzer') shutil.copy(coverage_binary_src, coverage_binary_dst_dir) # Set up entities in database so that the snapshot can be created. experiment = models.Experiment(name=os.environ['EXPERIMENT']) db_utils.add_all([experiment]) trial = models.Trial(fuzzer=FUZZER, benchmark=benchmark, experiment=os.environ['EXPERIMENT']) db_utils.add_all([trial]) snapshot_measurer = measurer.SnapshotMeasurer(trial.fuzzer, trial.benchmark, trial.id, SNAPSHOT_LOGGER) # Set up the snapshot archive. cycle = 1 archive = get_test_data_path('test_measure_snapshot_coverage', 'corpus-archive-%04d.tar.gz' % cycle) corpus_dir = os.path.join(snapshot_measurer.trial_dir, 'corpus') os.makedirs(corpus_dir) shutil.copy(archive, corpus_dir) with mock.patch('common.filestore_utils.cp') as mocked_cp: mocked_cp.return_value = new_process.ProcessResult(0, '', False) # TODO(metzman): Create a system for using actual buckets in # integration tests. snapshot = measurer.measure_snapshot_coverage( snapshot_measurer.fuzzer, snapshot_measurer.benchmark, snapshot_measurer.trial_num, cycle) assert snapshot assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds() assert snapshot.edges_covered == 13178
def _initialize_experiment_in_db(experiment: str, git_hash: str, trials: List[models.Trial]): """Initializes |experiment| in the database by creating the experiment entity and entities for each trial in the experiment.""" db_utils.add_all([ db_utils.get_or_create(models.Experiment, name=experiment, git_hash=git_hash) ]) # TODO(metzman): Consider doing this without sqlalchemy. This can get # slow with SQLalchemy (it's much worse with add_all). db_utils.bulk_save(trials)
def update_started_trials(trial_proxies, trial_id_mapping): """Update started trials in |trial_id_mapping| with results from |trial_proxies| and save the updated trials.""" # Map proxies back to trials and mark trials as started when proxies were # marked as such. started_trials = [] for proxy in trial_proxies: if not proxy: continue trial = trial_id_mapping[proxy.id] trial.time_started = proxy.time_started started_trials.append(trial) if started_trials: db_utils.add_all(started_trials) return started_trials
def _initialize_experiment_in_db(experiment_config: dict): """Initializes |experiment| in the database by creating the experiment entity.""" experiment_exists = db_utils.query(models.Experiment).filter( models.Experiment.name == experiment_config['experiment']).first() if experiment_exists: raise Exception('Experiment already exists in database.') db_utils.add_all([ db_utils.get_or_create( models.Experiment, name=experiment_config['experiment'], git_hash=experiment_config['git_hash'], private=experiment_config.get('private', True), experiment_filestore=experiment_config['experiment_filestore']) ])
def _initialize_experiment_in_db(experiment: str, benchmarks: List[str], fuzzers: List[str], num_trials: int): """Initializes |experiment| in the database by creating the experiment entity and entities for each trial in the experiment.""" db_utils.add_all( [db_utils.get_or_create(models.Experiment, name=experiment)]) trials_args = itertools.product(sorted(benchmarks), range(num_trials), sorted(fuzzers)) trials = [ models.Trial(fuzzer=fuzzer, experiment=experiment, benchmark=benchmark) for benchmark, _, fuzzer in trials_args ] # TODO(metzman): Consider doing this without sqlalchemy. This can get # slow with SQLalchemy (it's much worse with add_all). db_utils.bulk_save(trials)
def test_get_preempted_trials_stale_preempted(_, preempt_exp_conf): """Tests that TrialInstanceManager.get_preempted_trials doesn't return trials that we already know were preempted.""" trial_instance_manager = get_trial_instance_manager(preempt_exp_conf) trial = models.Trial(experiment=preempt_exp_conf['experiment'], fuzzer=FUZZER, benchmark=BENCHMARK) db_utils.add_all([trial]) instance_name = experiment_utils.get_trial_instance_name( preempt_exp_conf['experiment'], trial.id) trial_instance_manager.preempted_trials = {instance_name: trial} with mock.patch( 'experiment.scheduler.TrialInstanceManager.' '_get_started_unfinished_instances', return_value=[instance_name]): assert trial_instance_manager.get_preempted_trials() == []
def test_measure_snapshot_coverage( # pylint: disable=too-many-locals self, mocked_is_cycle_unchanged, create_measurer, db, experiment): """Integration test for measure_snapshot_coverage.""" mocked_is_cycle_unchanged.return_value = False # Set up the coverage binary. benchmark = 'freetype2-2017' coverage_binary_src = get_test_data_path( 'test_measure_snapshot_coverage', benchmark + '-coverage') benchmark_cov_binary_dir = os.path.join( build_utils.get_coverage_binaries_dir(), benchmark) os.makedirs(benchmark_cov_binary_dir) coverage_binary_dst_dir = os.path.join(benchmark_cov_binary_dir, 'fuzz-target') shutil.copy(coverage_binary_src, coverage_binary_dst_dir) # Set up entities in database so that the snapshot can be created. experiment = models.Experiment(name=os.environ['EXPERIMENT']) db_utils.add_all([experiment]) trial = models.Trial(fuzzer=FUZZER, benchmark=benchmark, experiment=os.environ['EXPERIMENT']) db_utils.add_all([trial]) snapshot_measurer = create_measurer(trial.fuzzer, trial.benchmark, trial.id) # Set up the snapshot archive. cycle = 1 archive = get_test_data_path('test_measure_snapshot_coverage', 'corpus-archive-%04d.tar.gz' % cycle) corpus_dir = os.path.join(snapshot_measurer.trial_dir, 'corpus') os.makedirs(corpus_dir) shutil.copy(archive, corpus_dir) with mock.patch('common.gsutil.cp') as mocked_cp: mocked_cp.return_value = new_process.ProcessResult(0, '', False) # TODO(metzman): Create a system for using actual buckets in # integration tests. snapshot = measurer.measure_snapshot_coverage( snapshot_measurer.fuzzer, snapshot_measurer.benchmark, snapshot_measurer.trial_num, cycle) assert snapshot assert snapshot.time == cycle * experiment_utils.get_snapshot_seconds() assert snapshot.edges_covered == 3798
def test_get_last_trial_time_started_called_early(db, experiment_config): """Tests that get_last_trial_time_started raises an exception if called while there are still pending trials.""" experiment = experiment_config['experiment'] db_utils.add_all([ models.Experiment(name=experiment), ]) trial1 = models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER) trial2 = models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER) first_time = datetime.datetime.fromtimestamp(time.mktime(time.gmtime(0))) trial1.time_started = first_time trials = [trial1, trial2] db_utils.add_all(trials) with pytest.raises(AssertionError): scheduler.get_last_trial_time_started(experiment)
def test_measure_all_trials(_, __, mocked_execute, db, fs): """Tests that measure_all_trials does what is intended under normal conditions.""" mocked_execute.return_value = new_process.ProcessResult(0, '', False) dispatcher._initialize_experiment_in_db( experiment_utils.get_experiment_name(), BENCHMARKS, FUZZERS, NUM_TRIALS) trials = scheduler.get_pending_trials( experiment_utils.get_experiment_name()).all() for trial in trials: trial.time_started = datetime.datetime.utcnow() db_utils.add_all(trials) fs.create_file(measurer.get_experiment_folders_dir() / NEW_UNIT) mock_pool = test_utils.MockPool() assert measurer.measure_all_trials(experiment_utils.get_experiment_name(), MAX_TOTAL_TIME, mock_pool, queue.Queue()) actual_ids = [call[2] for call in mock_pool.func_calls] # 4 (trials) * 2 (fuzzers) * 2 (benchmarks) assert sorted(actual_ids) == list(range(1, 17))
def test_get_last_trial_time_started(db, experiment_config): """Tests that get_last_trial_time_started returns the time_started of the last trial to be started.""" experiment = experiment_config['experiment'] db_utils.add_all([ models.Experiment(name=experiment), ]) trial1 = models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER) trial2 = models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER) first_time = datetime.datetime.fromtimestamp(time.mktime(time.gmtime(0))) trial1.time_started = first_time last_time_started = first_time + datetime.timedelta(days=1) trial2.time_started = last_time_started trials = [trial1, trial2] db_utils.add_all(trials) assert scheduler.get_last_trial_time_started( experiment) == last_time_started
def test_get_experiment_data_fuzzer_stats(db): """Tests that get_experiment_data handles fuzzer_stats correctly.""" experiment_name = 'experiment-1' db_utils.add_all([ models.Experiment(name=experiment_name, time_created=ARBITRARY_DATETIME, private=False) ]) trial = models.Trial(fuzzer='afl', experiment=experiment_name, benchmark='libpng') db_utils.add_all([trial]) fuzzer_stats = {'execs_per_sec': 100.0} snapshot = models.Snapshot(time=900, trial_id=trial.id, edges_covered=100, fuzzer_stats=fuzzer_stats) db_utils.add_all([snapshot]) experiment_df = queries.get_experiment_data([experiment_name]) # pylint: disable=unused-variable
def handle_preempted_trials(self): """Handle preempted trials by marking them as preempted and creating replacement trials when appropriate. This is the algorithm used by handle_preempted_trials: 1. Query the GCE API to find trials that were preempted since our last query (or the start of the experiment on our first query. 2. For every preempted trial, ensure that it was not handled before and if it wasn't then mark the trials as finished and preempted and create replacement trials if appropriate. This is how it is determined whether a preempted trial should be replaced and what it should be replaced with: 1. First we see if we can replace it with a preemptible instance. We will replace it with a preemptible instance if: a. We haven't created more than double the number of preemptible trial instances than the number of trial this experiment would take if it were using non-preemptibles ("target_trials") . This bounds the cost of our preemptible usage to <2X cost of using preemptibles naively If preemptibles are 20% cost of non-preemptibles, then <40% the cost of a non-preemptible experiment. b. We haven't spent longer than 3X the duration of time the experiment would take if using nonpreemptibles. This bounds the duration of the experiment to 4X the length of the nonpreemptible experiment. 2. If we can't create a preemptible replacement, we replace it with a nonpreemptible if: a. We haven't created more than target_trials/20 nonpreemptibles already. This bounds the cost of the nonpreemptibles to 5% of the cost of a 100% nonpreemptible experiment. b. (TODO): Using preemptibles will actually help the results of this experiment. If we can't create any preemptible instances but we need to replace target_trials number of instances, replacing the tiny fraction of them with preemptibles will give you a 5% complete experiment. This is a hard issue to solve, because we restart trials as they are preempted so we may not determine it is futile to use nonpreemptibles until the last nonpreemptible above our limit is reached. 3. TODO: There are other cases where we probably shouldn't replace trials that we haven't implemented, but would like to such as: a. If a trial is preempted very close to the end of its budgeted time. In that case it's probably fine if the comparison on the benchmark happens at 22:45 instead of 23:00. b. If a trial is the only trial for the fuzzer-benchmark that was preempted. In that case, not replacing the trial will save time and not hurt results much. The impact of this algorithm is that: 1. The cost of a preemptible experiment, in the worst case scenario is 45% of a nonpreemptible experiment. On average we find they will be ~30% the cost of a nonpreemptible experiment. 2. Time of an experiment will be 4X the length of a nonpreemptible experiment in the worst case scenario. This is fine however because most of the experiment will finish earlier, only a few trials that won't change results very much will trickle in at the end. 3. Experiments are guaranteed to terminate but results won't necessarily be complete if the preemption rate is pathologically high. This is acceptable because a human should intervene in these edge cases. """ logger.info('Handling preempted.') if not self.experiment_config.get('preemptible_runners'): # Nothing to do here if not a preemptible experiment. return [] preempted_trials = self.get_preempted_trials() if not preempted_trials: logs.info('No preempteds to handle.') return [] replacements = self._get_preempted_replacements(preempted_trials) experiment = self.experiment_config['experiment'] instances = [ experiment_utils.get_trial_instance_name(experiment, trial.id) for trial in preempted_trials ] logs.info('Deleting preempted instances: %s', instances) if not delete_instances(instances, self.experiment_config): logs.error('Could not delete preempted instances: %s', instances) db_utils.add_all(preempted_trials + replacements) logger.info('Done handling preempted.') return replacements