def test_schedule(_, __, mocked_datetime_now, mocked_execute, pending_trials, experiment_config): """Tests that schedule() ends expired trials and starts new ones as needed.""" mocked_execute.return_value = new_process.ProcessResult(0, '', False) experiment = experiment_config['experiment'] with db_utils.session_scope() as session: datetimes_first_experiments_started = [ trial.time_started for trial in session.query(models.Trial).filter( models.Trial.experiment == experiment).filter( models.Trial.time_started.isnot(None)) ] mocked_datetime_now.return_value = ( max(datetimes_first_experiments_started) + datetime.timedelta(seconds=(experiment_config['max_total_time'] + scheduler.GRACE_TIME_SECONDS * 2))) with ThreadPool() as pool: scheduler.schedule(experiment_config, pool) with db_utils.session_scope() as session: assert session.query(models.Trial).filter( models.Trial.time_started.in_( datetimes_first_experiments_started)).all() == (session.query( models.Trial).filter( models.Trial.time_ended.isnot(None)).all()) assert pending_trials.filter( models.Trial.time_started.isnot(None)).all() == pending_trials.all()
def allow_download(self): try: logger.info("{}: Allowing download".format( self.message_id[-config.ID_LENGTH:].upper())) with session_scope(DB_PATH) as new_session: message = new_session.query(Messages).filter( Messages.message_id == self.message_id).first() if message: file_path = "{}/{}".format(config.FILE_DIRECTORY, message.saved_file_filename) # Pick a download slot to fill (2 slots per domain) domain = urlparse(message.file_url).netloc lockfile1 = "/var/lock/upload_{}_1.lock".format(domain) lockfile2 = "/var/lock/upload_{}_2.lock".format(domain) lf = LF() lockfile = random.choice([lockfile1, lockfile2]) if lf.lock_acquire(lockfile, to=600): try: (file_download_successful, file_size, file_amount, file_do_not_download, file_sha256_hashes_match, media_info, message_steg) = download_and_extract( message.thread.chan.address, self.message_id, message.file_url, json.loads(message.file_upload_settings), json.loads( message.file_extracts_start_base64), message.upload_filename, file_path, message.file_sha256_hash, message.file_enc_cipher, message.file_enc_key_bytes, message.file_enc_password) finally: lf.lock_release(lockfile) if file_download_successful: if file_size: message.file_size = file_size if file_amount: message.file_amount = file_amount message.file_download_successful = file_download_successful message.file_do_not_download = file_do_not_download message.file_sha256_hashes_match = file_sha256_hashes_match message.media_info = json.dumps(media_info) message.message_steg = json.dumps(message_steg) new_session.commit() except Exception as e: logger.error("{}: Error allowing download: {}".format( self.message_id[-config.ID_LENGTH:].upper(), e)) finally: with session_scope(DB_PATH) as new_session: message = new_session.query(Messages).filter( Messages.message_id == self.message_id).first() message.file_currently_downloading = False new_session.commit()
def regenerate_thread_card_and_popup(thread_hash=None, message_id=None): with session_scope(DB_PATH) as new_session: if thread_hash: thread = new_session.query(Threads).filter( Threads.thread_hash == thread_hash).first() if thread: message = new_session.query(Messages).filter( and_(Messages.thread_id == thread.id, Messages.is_op.is_(True))).first() if message: message.regenerate_popup_html = True card_test = new_session.query(PostCards).filter( PostCards.thread_id == thread_hash).first() if card_test: card_test.regenerate = True if message or card_test: new_session.commit() if message_id: message = new_session.query(Messages).filter( Messages.message_id == message_id).first() if message: message.regenerate_popup_html = True new_session.commit()
def add_mod_log_entry(description, timestamp=None, message_id=None, user_from=None, board_address=None, thread_hash=None): with session_scope(DB_PATH) as new_session: log_entry = ModLog() log_entry.description = description if message_id: log_entry.message_id = message_id if timestamp: try: log_entry.timestamp = int(timestamp) except: log_entry.timestamp = time.time() else: log_entry.timestamp = time.time() if user_from: log_entry.user_from = user_from if board_address: log_entry.board_address = board_address if thread_hash: log_entry.thread_hash = thread_hash new_session.add(log_entry) new_session.commit()
def pending_trials(db, experiment_config): """Adds trials to the database and returns pending trials.""" create_experiments(experiment_config) def create_trial(experiment, time_started=None, time_ended=None): """Creates a database trial.""" return models.Trial(experiment=experiment, benchmark=BENCHMARK, fuzzer=FUZZER, time_started=time_started, time_ended=time_ended) our_pending_trials = [ create_trial(experiment_config['experiment']), create_trial(experiment_config['experiment']) ] other_experiment_name = get_other_experiment_name(experiment_config) other_trials = [ create_trial(other_experiment_name), create_trial(experiment_config['experiment'], ARBITRARY_DATETIME), create_trial(experiment_config['experiment'], ARBITRARY_DATETIME) ] db_utils.add_all(other_trials + our_pending_trials) our_trial_ids = [trial.id for trial in our_pending_trials] with db_utils.session_scope() as session: return session.query(models.Trial).filter( models.Trial.id.in_(our_trial_ids))
def __init__(self, num_trials, experiment_config): self.experiment_config = experiment_config self.num_trials = num_trials # Bound for the number of nonpreemptibles we can start if the experiment # specified preemptible_runners. self.max_nonpreemptibles = min( math.ceil(self.num_trials * self.NONPREEMPTIBLES_FRACTION), self.MAX_NONPREEMPTIBLES) logger.info('Max nonpreemptibles: %d.', self.max_nonpreemptibles) # Attributes for preemptible retry window. The preemptible retry window # is a time period that starts when the last initial trial is started. # It determines how long we can retry preempted trials using # preemptibles. This bounds the length of time an experiment lasts. self.preemptible_window = (experiment_config['max_total_time'] * self.PREEMPTIBLE_WINDOW_MULTIPLIER) self._initial_trials = list( get_experiment_trials(experiment_config['experiment'])) self._max_time_started = None self.preempted_trials = {} self.preemptible_starts_futile = False # Filter operations happening before the experiment started. with db_utils.session_scope() as session: self.last_preemptible_query = (session.query( models.Experiment).filter( models.Experiment.name == experiment_config['experiment'] ).one().time_created.replace(tzinfo=datetime.timezone.utc))
def test_initialize_experiment_in_db(dispatcher_experiment): """Tests that _initialize_experiment_in_db adds the right things to the database.""" trials_args = itertools.product(dispatcher_experiment.benchmarks, range(dispatcher_experiment.num_trials), dispatcher_experiment.fuzzers) trials = [ models.Trial(fuzzer=fuzzer, experiment=dispatcher_experiment.experiment_name, benchmark=benchmark) for benchmark, _, fuzzer in trials_args ] dispatcher._initialize_experiment_in_db(dispatcher_experiment.config) dispatcher._initialize_trials_in_db(trials) with db_utils.session_scope() as session: db_experiments = session.query(models.Experiment).all() assert len(db_experiments) == 1 db_experiment = db_experiments[0] assert db_experiment.name == os.environ['EXPERIMENT'] trials = session.query(models.Trial).all() fuzzer_and_benchmarks = [ (trial.benchmark, trial.fuzzer) for trial in trials ] assert fuzzer_and_benchmarks == ([('benchmark-1', 'fuzzer-a'), ('benchmark-1', 'fuzzer-b')] * 4) + [('benchmark-2', 'fuzzer-a'), ('benchmark-2', 'fuzzer-b')] * 4
def _record_experiment_time_ended(experiment_name: str): """Record |experiment| end time in the database.""" with db_utils.session_scope() as session: experiment = session.query(models.Experiment).filter( models.Experiment.name == experiment_name).one() experiment.time_ended = datetime.datetime.utcnow() db_utils.add_all([experiment])
def add_nonprivate_experiments_for_merge_with_clobber(experiment_names): """Returns a new list containing experiment names preeceeded by a list of nonprivate experiments in the order in which they were run, such that these nonprivate experiments executed before. This is useful if you want to combine reports from |experiment_names| and all nonprivate experiments.""" earliest_creation_time = None with db_utils.session_scope() as session: for result in session.query(Experiment.time_created).filter( Experiment.name.in_(experiment_names)): experiment_creation_time = result[0] if not earliest_creation_time: earliest_creation_time = experiment_creation_time else: earliest_creation_time = min(earliest_creation_time, experiment_creation_time) nonprivate_experiments = session.query(Experiment.name).filter( ~Experiment.private, ~Experiment.name.in_(experiment_names), ~Experiment.time_ended.is_(None), Experiment.time_created <= earliest_creation_time).order_by( Experiment.time_created) nonprivate_experiment_names = [ result[0] for result in nonprivate_experiments ] return nonprivate_experiment_names + experiment_names
def get_fuzzers_changed_since_last(): """Returns a list of fuzzers that have changed since the last experiment stored in the database that has a commit that is in the current branch.""" # TODO(metzman): Figure out a way of skipping experiments that were stopped # early. # Loop over experiments since some may have hashes that are not in the # current branch. with db_utils.session_scope() as session: experiments = list( session.query(models.Experiment).order_by( models.Experiment.time_created.desc())) if not experiments: raise Exception('No experiments found. Cannot find changed fuzzers.') changed_files = None for experiment in experiments: try: changed_files = diff_utils.get_changed_files(experiment.git_hash) break except diff_utils.DiffError: logs.warning('Skipping %s. Commit is not in branch.', experiment.git_hash) if changed_files is None: raise Exception('No in-branch experiments. ' 'Cannot find changed fuzzers.') return change_utils.get_changed_fuzzers(changed_files)
def get_experiment_description(experiment_name): """Get the description of the experiment named by |experiment_name|.""" # Do another query for the description so we don't explode the size of the # results from get_experiment_data. with db_utils.session_scope() as session: return session.query(Experiment.description)\ .select_from(Experiment)\ .filter(Experiment.name == experiment_name).one()
def get_access(address): with session_scope(DB_PATH) as new_session: chan = new_session.query(Chan).filter(Chan.address == address).first() if chan: admin_cmd = new_session.query(Command).filter( and_(Command.action == "set", Command.action_type == "options", Command.chan_address == chan.address)).first() return get_combined_access(admin_cmd, chan) return {}
def get_trial_ids(experiment: str, fuzzer: str, benchmark: str): """Gets ids of all finished trials for a pair of fuzzer and benchmark.""" with db_utils.session_scope() as session: trial_ids = [ trial_id_tuple[0] for trial_id_tuple in session.query(models.Trial.id).filter( models.Trial.experiment == experiment, models.Trial.fuzzer == fuzzer, models.Trial.benchmark == benchmark, ~models.Trial.preempted) ] return trial_ids
def post_has_image(message_id): with session_scope(DB_PATH) as new_session: message = new_session.query(Messages).filter( Messages.message_id == message_id).first() if message: try: media_info = json.loads(message.media_info) except: media_info = {} for filename, info in media_info.items(): if info["extension"] in config.FILE_EXTENSIONS_IMAGE: return True
def delete_post(message_id): with session_scope(DB_PATH) as new_session: message = new_session.query(Messages).filter( Messages.message_id == message_id).first() thread_hash = message.thread.thread_hash chan_id = message.thread.chan.id if message: # Delete all files associated with message delete_message_files(message.message_id) # Delete reply entry and references to post ID delete_message_replies(message.message_id) # Add deleted message entry daemon_com.trash_message(message_id) # Signal card needs to be rendered again card = new_session.query(PostCards).filter( PostCards.thread_id == thread_hash).first() if card: card.regenerate = True # Indicate which board needs to regenerate post numbers chan = new_session.query(Chan).filter(Chan.id == chan_id).first() if chan: chan.regenerate_numbers = True # Delete message from database new_session.delete(message) new_session.commit() # Update thread timestamp to last current post timestamp thread = new_session.query(Threads).filter( Threads.thread_hash == thread_hash).first() if thread: message_latest = new_session.query(Messages).filter( Messages.thread_id == thread.id).order_by( Messages.timestamp_sent.desc()).first() if message_latest: thread.timestamp_sent = message_latest.timestamp_sent new_session.commit() # Update board timestamp to latest thread timestamp board = new_session.query(Chan).filter( Chan.id == thread.chan_id).first() if board: latest_thread = new_session.query(Threads).filter( Threads.chan_id == board.id).order_by( Threads.timestamp_sent.desc()).first() if latest_thread: board.timestamp_sent = latest_thread.timestamp_sent new_session.commit()
def set_up_coverage_binaries(pool, experiment): """Set up coverage binaries for all benchmarks in |experiment|.""" # Use set comprehension to select distinct benchmarks. with db_utils.session_scope() as session: benchmarks = [ benchmark_tuple[0] for benchmark_tuple in session.query(models.Trial.benchmark). distinct().filter(models.Trial.experiment == experiment) ] coverage_binaries_dir = build_utils.get_coverage_binaries_dir() filesystem.create_directory(coverage_binaries_dir) pool.map(set_up_coverage_binary, benchmarks)
def delete_thread(thread_id): with session_scope(DB_PATH) as new_session: card = new_session.query(PostCards).filter( PostCards.thread_id == thread_id).first() if card: new_session.delete(card) new_session.commit() thread = new_session.query(Threads).filter( Threads.thread_hash == thread_id).first() if thread: new_session.delete(thread) new_session.commit()
def _query_unmeasured_trials(experiment: str): """Returns a query of trials in |experiment| that have not been measured.""" ids_of_trials_with_snapshots = _query_ids_of_measured_trials(experiment) with db_utils.session_scope() as session: trial_query = session.query(models.Trial) no_snapshots_filter = ~models.Trial.id.in_(ids_of_trials_with_snapshots) started_trials_filter = ~models.Trial.time_started.is_(None) nonpreempted_trials_filter = ~models.Trial.preempted experiment_trials_filter = models.Trial.experiment == experiment return trial_query.filter(experiment_trials_filter, no_snapshots_filter, started_trials_filter, nonpreempted_trials_filter)
def delete_and_replace_comment(message_id, new_comment, from_address=None, local_delete=False): with session_scope(DB_PATH) as new_session: message = new_session.query(Messages).filter( Messages.message_id == message_id).first() if message: if from_address: message.message = '<span class="god-text">ORIGINAL COMMENT DELETED BY {}. REASON: {}</span>'.format( html.escape(from_address), html.escape(new_comment)) user_from = html.escape(from_address) else: message.message = '<span class="god-text">ORIGINAL COMMENT LOCALLY DELETED. REASON: {}</span>'.format( html.escape(new_comment)) user_from = None message.file_url = None message.file_decoded = None message.file_download_successful = None message.message_steg = "{}" message.file_amount = 0 message.file_size = None message.media_width = None message.media_width = None message.file_filename = None message.file_extension = None message.file_currently_downloading = None message.file_sha256_hash = None message.file_sha256_hashes_match = None message.file_do_not_download = None message.file_download_successful = None new_session.commit() regenerate_thread_card_and_popup(message.thread.thread_hash) # Delete all files associated with message delete_message_files(message_id) # Add mod log entry if local_delete: log_description = "Post locally deleted with comment: {}".format( html.escape(new_comment)) else: log_description = "Post remotely deleted with comment: {}".format( html.escape(new_comment)) add_mod_log_entry(log_description, message_id=message_id, user_from=user_from, board_address=message.thread.chan.address, thread_hash=message.thread.thread_hash)
def _query_ids_of_measured_trials(experiment: str): """Returns a query of the ids of trials in |experiment| that have measured snapshots.""" with db_utils.session_scope() as session: trials_and_snapshots_query = session.query(models.Snapshot).options( orm.joinedload('trial')) experiment_trials_filter = models.Snapshot.trial.has( experiment=experiment, preempted=False) experiment_trials_and_snapshots_query = ( trials_and_snapshots_query.filter(experiment_trials_filter)) experiment_snapshot_trial_ids_query = ( experiment_trials_and_snapshots_query.with_entities( models.Snapshot.trial_id)) return experiment_snapshot_trial_ids_query.distinct()
def get_trial_instance_manager(experiment_config: dict): """Returns an instance of TrialInstanceManager for |experiment_config|.""" with db_utils.session_scope() as session: experiment_exists = bool( session.query(models.Experiment).filter( models.Experiment.name == experiment_config['experiment']).first()) if not experiment_exists: create_experiments(experiment_config) default_num_trials = 100 return scheduler.TrialInstanceManager(default_num_trials, experiment_config)
def get_last_trial_time_started(experiment: str): """Returns the time_started of the last trial that was started in |experiment|. This function cannot be called if there are any unstarted (e.g. pending trials). It will raise an assertion failure if there are any pending trials because it does not make sense to call this function before that time.""" assert get_pending_trials(experiment).first() is None # Don't use get_experiment_trials because it already orders the results by # id. with db_utils.session_scope() as session: last_trial = session.query(models.Trial).filter( models.Trial.experiment == experiment, STARTED_TRIALS_FILTER).order_by( models.Trial.time_started.desc()).first() return last_trial.time_started
def progress(self, download_t, download_d, upload_t, upload_d): now = time.time() if self.update_timestamp < now: while self.update_timestamp < now: self.update_timestamp += 5 with session_scope(DB_PATH) as new_session: upl = new_session.query(UploadProgress).filter( UploadProgress.upload_id == self.upload_id).first() if upl and upload_d > upl.progress_size_bytes: upl.progress_size_bytes = upload_d upl.progress_percent = upload_d / upload_t * 100 new_session.commit() logger.info("Upload {}: {}/{} ({:.1f} %) uploaded".format( upl.upload_id, upl.progress_size_bytes, upl.total_size_bytes, upl.progress_percent))
def _query_measured_latest_snapshots(experiment: str): """Returns a generator of a SnapshotWithTime representing a snapshot that is the first snapshot for their trial. The trials are trials in |experiment|.""" latest_time_column = func.max(models.Snapshot.time) # The order of these columns must correspond to the fields in # SnapshotWithTime. columns = (models.Trial.fuzzer, models.Trial.benchmark, models.Snapshot.trial_id, latest_time_column) experiment_filter = models.Snapshot.trial.has(experiment=experiment) group_by_columns = (models.Snapshot.trial_id, models.Trial.benchmark, models.Trial.fuzzer) with db_utils.session_scope() as session: snapshots_query = session.query(*columns).join( models.Trial).filter(experiment_filter).group_by(*group_by_columns) return (SnapshotWithTime(*snapshot) for snapshot in snapshots_query)
def delete_message_replies(message_id): post_id = get_post_id(message_id) with session_scope(DB_PATH) as new_session: reply_entries = new_session.query(PostReplies).filter( PostReplies.reply_ids.contains(post_id)).all() for each_entry in reply_entries: replies = json.loads(each_entry.reply_ids) replies.remove(post_id) each_entry.reply_ids = json.dumps(replies) new_session.commit() # delete reply entry reply_entry = new_session.query(PostReplies).filter( PostReplies.post_id == post_id).first() if reply_entry: new_session.delete(reply_entry)
def run_requested_experiment(dry_run): """Run the oldest requested experiment that hasn't been run yet in experiment-requests.yaml.""" requested_experiments = _get_requested_experiments() # TODO(metzman): Look into supporting benchmarks as an optional parameter so # that people can add fuzzers that don't support everything. if PAUSE_SERVICE_KEYWORD in requested_experiments: # Check if automated experiment service is paused. logs.warning('Pause service requested, not running experiment.') return None requested_experiment = None for experiment_config in reversed(requested_experiments): experiment_name = _get_experiment_name(experiment_config) with db_utils.session_scope() as session: is_new_experiment = session.query(models.Experiment).filter( models.Experiment.name == experiment_name).first() is None if is_new_experiment: requested_experiment = experiment_config break if requested_experiment is None: logs.info('No new experiment to run. Exiting.') return None experiment_name = _get_experiment_name(requested_experiment) if not validate_experiment_requests([requested_experiment]): logs.error('Requested experiment: %s in %s is not valid.', requested_experiment, REQUESTED_EXPERIMENTS_PATH) return None fuzzers = requested_experiment['fuzzers'] benchmark_type = requested_experiment.get('type') if benchmark_type == benchmark_utils.BenchmarkType.BUG.value: benchmarks = benchmark_utils.get_bug_benchmarks() else: benchmarks = (benchmark_utils.get_oss_fuzz_coverage_benchmarks() + benchmark_utils.get_standard_coverage_benchmarks()) logs.info('Running experiment: %s with fuzzers: %s.', experiment_name, ' '.join(fuzzers)) description = _get_description(requested_experiment) oss_fuzz_corpus = _use_oss_fuzz_corpus(requested_experiment) return _run_experiment(experiment_name, fuzzers, benchmarks, description, oss_fuzz_corpus, dry_run)
def is_upload_site_in_database(file_upload_settings): with session_scope(DB_PATH) as new_session: upload_site = new_session.query(UploadSites).filter(and_( UploadSites.domain == file_upload_settings["domain"], UploadSites.type == file_upload_settings["type"], UploadSites.uri == file_upload_settings["uri"], UploadSites.download_prefix == file_upload_settings["download_prefix"], UploadSites.response == file_upload_settings["response"], UploadSites.direct_dl_url == file_upload_settings["direct_dl_url"], UploadSites.extra_curl_options == file_upload_settings["extra_curl_options"], UploadSites.upload_word == file_upload_settings["upload_word"], UploadSites.form_name == file_upload_settings["form_name"], )).first() if upload_site: return True else: return False
def _initialize_experiment_in_db(experiment_config: dict): """Initializes |experiment| in the database by creating the experiment entity.""" with db_utils.session_scope() as session: experiment_exists = session.query(models.Experiment).filter( models.Experiment.name == experiment_config['experiment']).first() if experiment_exists: raise Exception('Experiment already exists in database.') db_utils.add_all([ db_utils.get_or_create( models.Experiment, name=experiment_config['experiment'], git_hash=experiment_config['git_hash'], private=experiment_config.get('private', True), experiment_filestore=experiment_config['experiment_filestore'], description=experiment_config['description']), ])
def upl_callback(monitor): now = time.time() if self.update_timestamp < now: while self.update_timestamp < now: self.update_timestamp += 5 with session_scope(DB_PATH) as new_session: upl = new_session.query(UploadProgress).filter( UploadProgress.upload_id == self.upload_id).first() if upl and monitor.bytes_read > upl.progress_size_bytes: upl.progress_size_bytes = monitor.bytes_read upl.progress_percent = monitor.bytes_read / upl.total_size_bytes * 100 new_session.commit() logger.info( "Upload {}: {}/{} ({:.1f} %) uploaded".format( upl.upload_id, upl.progress_size_bytes, upl.total_size_bytes, upl.progress_percent))
def get_card_link_html(message, card_text=None, external_thread=False): if not message: msg = "Could not find message" logger.error(msg) return msg ret_str = '' try: ret_str += '<a class="reply-tooltip link">{lstr}' \ '<div class="reply-main">'.format( ch=message.thread.chan.address, th=message.thread.thread_hash, pid=message.post_id, lstr=card_text) if external_thread: ret_str += '<div class="reply-header link">/{}/ - {}</div>' \ '<div class="reply-break"></div>'.format( message.thread.chan.label, message.thread.chan.description) ret_str += '<div class="reply-header link">{}</div>' \ '<div class="reply-break"></div>'.format( popup_message_header(message, external_thread=external_thread)) ret_str += generate_header_file_info(message) if message.popup_html and not message.regenerate_popup_html: ret_str += message.popup_html else: with session_scope(DB_PATH) as new_session: message_edit = new_session.query(Messages).filter( Messages.message_id == message.message_id).first() message_edit.popup_html = generate_reply_link_html(message) if message.regenerate_popup_html: message_edit.regenerate_popup_html = False new_session.commit() ret_str += message_edit.popup_html except: logger.exception("Could not generate popup html") ret_str = card_text return ret_str