def _query_and_upload_strategy_probabilities(engine): """Uploads queried data into datastore. Calls query functions and uploads query results to datastore to use as new probabilities. Probabilities are based on new_edges feature.""" strategy_data = [] data = _query_multi_armed_bandit_probabilities(engine) logs.log('Queried distribution for {}.'.format(engine.name)) # TODO(mukundv): Update once we choose a temperature parameter for final # implementation. for row in data: curr_strategy = data_types.FuzzStrategyProbability() curr_strategy.strategy_name = str(row['strategy']) curr_strategy.probability = float(row['bandit_weight']) curr_strategy.engine = engine.name strategy_data.append(curr_strategy) query = data_types.FuzzStrategyProbability.query( data_types.FuzzStrategyProbability.engine == engine.name) ndb_utils.delete_multi( [entity.key for entity in ndb_utils.get_all_from_query(query)]) ndb_utils.put_multi(strategy_data) logs.log('Uploaded queried distribution to ndb for {}'.format(engine.name)) _store_probabilities_in_bigquery(engine, data) logs.log('Uploaded queried distribution to BigQuery for {}'.format( engine.name))
def execute_task(*_): """Execute the report uploads.""" logs.log('Uploading pending reports.') # Get metadata for reports requiring upload. reports_metadata = ndb_utils.get_all_from_query( data_types.ReportMetadata.query( ndb_utils.is_false(data_types.ReportMetadata.is_uploaded))) reports_metadata = list(reports_metadata) if not reports_metadata: logs.log('No reports that need upload found.') return environment.set_value('UPLOAD_MODE', 'prod') # Otherwise, upload corresponding reports. logs.log('Uploading reports for testcases: %s' % str([report.testcase_id for report in reports_metadata])) report_metadata_to_delete = [] for report_metadata in reports_metadata: # Convert metadata back into actual report. crash_info = crash_uploader.crash_report_info_from_metadata( report_metadata) testcase_id = report_metadata.testcase_id try: _ = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: logs.log_warn('Could not find testcase %s.' % testcase_id) report_metadata_to_delete.append(report_metadata.key) continue # Upload the report and update the corresponding testcase info. logs.log('Processing testcase %s for crash upload.' % testcase_id) crash_report_id = crash_info.upload() if crash_report_id is None: logs.log_error( 'Crash upload for testcase %s failed, retry later.' % testcase_id) continue # Update the report metadata to indicate successful upload. report_metadata.crash_report_id = crash_report_id report_metadata.is_uploaded = True report_metadata.put() logs.log('Uploaded testcase %s to crash, got back report id %s.' % (testcase_id, crash_report_id)) time.sleep(1) # Delete report metadata entries where testcase does not exist anymore or # upload is not supported. if report_metadata_to_delete: ndb_utils.delete_multi(report_metadata_to_delete) # Log done with uploads. # Deletion happens in batches in cleanup_task, so that in case of error there # is some buffer for looking at stored ReportMetadata in the meantime. logs.log('Finished uploading crash reports.')
def update_mappings_for_fuzzer(fuzzer, mappings=None): """Clear existing mappings for a fuzzer, and replace them.""" if mappings is None: mappings = fuzzer.jobs query = data_types.FuzzerJob.query() query = query.filter(data_types.FuzzerJob.fuzzer == fuzzer.name) entities = ndb_utils.get_all_from_query(query) old_mappings = {} for entity in entities: old_mappings[entity.job] = entity new_mappings = [] for job_name in mappings: mapping = old_mappings.pop(job_name, None) if mapping: continue job = data_types.Job.query(data_types.Job.name == job_name).get() if not job: logs.log_error('An unknown job %s was selected for fuzzer %s.' % (job_name, fuzzer.name)) continue mapping = data_types.FuzzerJob() mapping.fuzzer = fuzzer.name mapping.job = job_name mapping.platform = job.platform new_mappings.append(mapping) ndb_utils.put_multi(new_mappings) ndb_utils.delete_multi([m.key for m in list(old_mappings.values())])
def update_platform_for_job(job_name, new_platform): """Update platform for all mappings for a particular job.""" query = data_types.FuzzerJob.query() query = query.filter(data_types.FuzzerJob.job == job_name) mappings = ndb_utils.get_all_from_query(query) new_mappings = [] for mapping in mappings: mapping.platform = new_platform new_mappings.append(mapping) ndb_utils.put_multi(new_mappings)
def get_similar_issues(issue_tracker, testcase, only_open=True): """Get issue objects that seem to be related to a particular test case.""" # Get list of issues using the search query. keywords = get_search_keywords(testcase) issues = issue_tracker.find_issues(keywords=keywords, only_open=only_open) if issues: issues = list(issues) else: issues = [] issue_ids = [issue.id for issue in issues] # Add issues from similar testcases sharing the same group id. if testcase.group_id: group_query = data_types.Testcase.query( data_types.Testcase.group_id == testcase.group_id) similar_testcases = ndb_utils.get_all_from_query(group_query) for similar_testcase in similar_testcases: if not similar_testcase.bug_information: continue # Exclude issues already added above from search terms. issue_id = int(similar_testcase.bug_information) if issue_id in issue_ids: continue # Get issue object using ID. issue = issue_tracker.get_issue(issue_id) if not issue: continue # If our search criteria allows open bugs only, then check issue and # testcase status so as to exclude closed ones. if (only_open and (not issue.is_open or not testcase.open)): continue issues.append(issue) issue_ids.append(issue_id) return issues
def get_fuzz_target_jobs(fuzz_target_name=None, engine=None, job=None, limit=None): """Return a Datastore query for fuzz target to job mappings.""" query = data_types.FuzzTargetJob.query() if fuzz_target_name: query = query.filter( data_types.FuzzTargetJob.fuzz_target_name == fuzz_target_name) if job: query = query.filter(data_types.FuzzTargetJob.job == job) if engine: query = query.filter(data_types.FuzzTargetJob.engine == engine) if limit is not None: return query.iter(limit=limit) return ndb_utils.get_all_from_query(query)
def get_fuzz_task_payload(platform=None): """Select a fuzzer that can run on this platform.""" if not platform: queue_override = environment.get_value('QUEUE_OVERRIDE') platform = queue_override if queue_override else environment.platform() if environment.is_local_development(): query = data_types.FuzzerJob.query() query = query.filter(data_types.FuzzerJobs.platform == platform) mappings = list(ndb_utils.get_all_from_query(query)) else: query = data_types.FuzzerJobs.query() query = query.filter(data_types.FuzzerJobs.platform == platform) mappings = [] for entity in query: mappings.extend(entity.fuzzer_jobs) if not mappings: return None, None selection = utils.random_weighted_choice(mappings, weight_attribute='actual_weight') return selection.fuzzer, selection.job
def update_fuzzer_and_data_bundles(fuzzer_name): """Update the fuzzer with a given name if necessary.""" fuzzer = data_types.Fuzzer.query( data_types.Fuzzer.name == fuzzer_name).get() if not fuzzer: logs.log_error('No fuzzer exists with name %s.' % fuzzer_name) raise errors.InvalidFuzzerError # Set some helper environment variables. fuzzer_directory = get_fuzzer_directory(fuzzer_name) environment.set_value('FUZZER_DIR', fuzzer_directory) environment.set_value('UNTRUSTED_CONTENT', fuzzer.untrusted_content) # If the fuzzer generates large testcases or a large number of small ones # that don't fit on tmpfs, then use the larger disk directory. if fuzzer.has_large_testcases: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) # Adjust the test timeout, if user has provided one. if fuzzer.timeout: environment.set_value('TEST_TIMEOUT', fuzzer.timeout) # Increase fuzz test timeout if the fuzzer timeout is higher than its # current value. fuzz_test_timeout = environment.get_value('FUZZ_TEST_TIMEOUT') if fuzz_test_timeout and fuzz_test_timeout < fuzzer.timeout: environment.set_value('FUZZ_TEST_TIMEOUT', fuzzer.timeout) # Adjust the max testcases if this fuzzer has specified a lower limit. max_testcases = environment.get_value('MAX_TESTCASES') if fuzzer.max_testcases and fuzzer.max_testcases < max_testcases: environment.set_value('MAX_TESTCASES', fuzzer.max_testcases) # Check for updates to this fuzzer. version_file = os.path.join(fuzzer_directory, '.%s_version' % fuzzer_name) if (not fuzzer.builtin and revisions.needs_update(version_file, fuzzer.revision)): logs.log('Fuzzer update was found, updating.') # Clear the old fuzzer directory if it exists. if not shell.remove_directory(fuzzer_directory, recreate=True): logs.log_error('Failed to clear fuzzer directory.') return None # Copy the archive to local disk and unpack it. archive_path = os.path.join(fuzzer_directory, fuzzer.filename) if not blobs.read_blob_to_disk(fuzzer.blobstore_key, archive_path): logs.log_error('Failed to copy fuzzer archive.') return None try: archive.unpack(archive_path, fuzzer_directory) except Exception: error_message = ( 'Failed to unpack fuzzer archive %s ' '(bad archive or unsupported format).') % fuzzer.filename logs.log_error(error_message) fuzzer_logs.upload_script_log('Fatal error: ' + error_message, fuzzer_name=fuzzer_name) return None fuzzer_path = os.path.join(fuzzer_directory, fuzzer.executable_path) if not os.path.exists(fuzzer_path): error_message = ( 'Fuzzer executable %s not found. ' 'Check fuzzer configuration.') % fuzzer.executable_path logs.log_error(error_message) fuzzer_logs.upload_script_log('Fatal error: ' + error_message, fuzzer_name=fuzzer_name) return None # Make fuzzer executable. os.chmod(fuzzer_path, 0o750) # Cleanup unneeded archive. shell.remove_file(archive_path) # Save the current revision of this fuzzer in a file for later checks. revisions.write_revision_to_revision_file(version_file, fuzzer.revision) logs.log('Updated fuzzer to revision %d.' % fuzzer.revision) _clear_old_data_bundles_if_needed() # Setup data bundles associated with this fuzzer. data_bundles = ndb_utils.get_all_from_query( data_types.DataBundle.query( data_types.DataBundle.name == fuzzer.data_bundle_name)) for data_bundle in data_bundles: if not update_data_bundle(fuzzer, data_bundle): return None # Setup environment variable for launcher script path. if fuzzer.launcher_script: fuzzer_launcher_path = os.path.join(fuzzer_directory, fuzzer.launcher_script) environment.set_value('LAUNCHER_PATH', fuzzer_launcher_path) # For launcher script usecase, we need the entire fuzzer directory on the # worker. if environment.is_trusted_host(): from clusterfuzz._internal.bot.untrusted_runner import file_host worker_fuzzer_directory = file_host.rebase_to_worker_root( fuzzer_directory) file_host.copy_directory_to_worker(fuzzer_directory, worker_fuzzer_directory, replace=True) return fuzzer
def _get_job_list_for_fuzzer(fuzzer): """Helper function to return the mappings for a fuzzer as a list.""" query = data_types.FuzzerJob.query() query.filter(data_types.FuzzerJob.fuzzer == fuzzer.name) return [m.job for m in ndb_utils.get_all_from_query(query)]
def _check_and_update_similar_bug(testcase, issue_tracker): """Get list of similar open issues and ones that were recently closed.""" # Get similar testcases from the same group. similar_testcases_from_group = [] if testcase.group_id: group_query = data_types.Testcase.query( data_types.Testcase.group_id == testcase.group_id) similar_testcases_from_group = ndb_utils.get_all_from_query( group_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2) # Get testcases with the same crash params. These might not be in the a group # if they were just fixed. same_crash_params_query = data_types.Testcase.query( data_types.Testcase.crash_type == testcase.crash_type, data_types.Testcase.crash_state == testcase.crash_state, data_types.Testcase.security_flag == testcase.security_flag, data_types.Testcase.project_name == testcase.project_name, data_types.Testcase.status == 'Processed') similar_testcases_from_query = ndb_utils.get_all_from_query( same_crash_params_query, batch_size=data_types.TESTCASE_ENTITY_QUERY_LIMIT // 2) for similar_testcase in itertools.chain(similar_testcases_from_group, similar_testcases_from_query): # Exclude ourself from comparison. if similar_testcase.key.id() == testcase.key.id(): continue # Exclude similar testcases without bug information. if not similar_testcase.bug_information: continue # Get the issue object given its ID. issue = issue_tracker.get_issue(similar_testcase.bug_information) if not issue: continue # If the reproducible issue is not verified yet, bug is still valid and # might be caused by non-availability of latest builds. In that case, # don't file a new bug yet. if similar_testcase.open and not similar_testcase.one_time_crasher_flag: return True # If the issue is still open, no need to file a duplicate bug. if issue.is_open: return True # If the issue indicates that this crash needs to be ignored, no need to # file another one. policy = issue_tracker_policy.get(issue_tracker.project) ignore_label = policy.label('ignore') if ignore_label in issue.labels: _add_triage_message( testcase, ('Skipping filing a bug since similar testcase ({testcase_id}) in ' 'issue ({issue_id}) is blacklisted with {ignore_label} label.' ).format( testcase_id=similar_testcase.key.id(), issue_id=issue.id, ignore_label=ignore_label)) return True # If the issue is recently closed, wait certain time period to make sure # our fixed verification has completed. if (issue.closed_time and not dates.time_has_expired( issue.closed_time, hours=data_types.MIN_ELAPSED_TIME_SINCE_FIXED)): _add_triage_message( testcase, ('Delaying filing a bug since similar testcase ' '({testcase_id}) in issue ({issue_id}) was just fixed.').format( testcase_id=similar_testcase.key.id(), issue_id=issue.id)) return True return False
def redo_testcase(testcase, tasks, user_email): """Redo specific tasks for a testcase.""" for task in tasks: if task not in VALID_REDO_TASKS: raise InvalidRedoTask(task) minimize = 'minimize' in tasks regression = 'regression' in tasks progression = 'progression' in tasks impact = 'impact' in tasks blame = 'blame' in tasks task_list = [] testcase_id = testcase.key.id() # Metadata keys to clear based on which redo tasks were selected. metadata_keys_to_clear = ['potentially_flaky'] if minimize: task_list.append('minimize') testcase.minimized_keys = '' testcase.set_metadata('redo_minimize', True, update_testcase=False) metadata_keys_to_clear += [ 'env', 'current_minimization_phase_attempts', 'minimization_phase' ] # If this testcase was archived during minimization, update the state. testcase.archive_state &= ~data_types.ArchiveStatus.MINIMIZED if regression: task_list.append('regression') testcase.regression = '' metadata_keys_to_clear += [ 'last_regression_min', 'last_regression_max' ] if progression: task_list.append('progression') testcase.fixed = '' testcase.open = True testcase.last_tested_crash_stacktrace = None testcase.triaged = False testcase.set_metadata('progression_pending', True, update_testcase=False) metadata_keys_to_clear += [ 'last_progression_min', 'last_progression_max', 'last_tested_revision' ] if impact: task_list.append('impact') testcase.is_impact_set_flag = False if blame: task_list.append('blame') testcase.set_metadata('blame_pending', True, update_testcase=False) testcase.set_metadata('predator_result', None, update_testcase=False) for key in metadata_keys_to_clear: testcase.delete_metadata(key, update_testcase=False) testcase.comments += '[%s] %s: Redo task(s): %s\n' % ( utils.current_date_time(), user_email, ', '.join(sorted(task_list))) testcase.one_time_crasher_flag = False testcase.put() # Allow new notifications to be sent for this testcase. notifications = ndb_utils.get_all_from_query(data_types.Notification.query( data_types.Notification.testcase_id == testcase.key.id()), keys_only=True) ndb_utils.delete_multi(notifications) # If we are re-doing minimization, other tasks will be done automatically # after minimization completes. So, don't add those tasks. if minimize: add_task('minimize', testcase_id, testcase.job_type, queue_for_testcase(testcase)) else: if regression: add_task('regression', testcase_id, testcase.job_type, queue_for_testcase(testcase)) if progression: add_task('progression', testcase_id, testcase.job_type, queue_for_testcase(testcase)) if impact: add_task('impact', testcase_id, testcase.job_type, queue_for_testcase(testcase)) if blame: add_task('blame', testcase_id, testcase.job_type, queue_for_testcase(testcase))