def cleanup_testcases_and_issues(): """Clean up unneeded open testcases and their associated issues.""" jobs = data_handler.get_all_job_type_names() testcase_keys = ndb_utils.get_all_from_query( data_types.Testcase.query( ndb_utils.is_false(data_types.Testcase.triaged)), keys_only=True) top_crashes_by_project_and_platform_map = ( get_top_crashes_for_all_projects_and_platforms()) utils.python_gc() testcases_processed = 0 empty_issue_tracker_policy = issue_tracker_policy.get_empty() for testcase_key in testcase_keys: testcase_id = testcase_key.id() try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue logs.log('Processing testcase %d.' % testcase_id) issue = issue_tracker_utils.get_issue_for_testcase(testcase) policy = issue_tracker_utils.get_issue_tracker_policy_for_testcase(testcase) if not policy: policy = empty_issue_tracker_policy # Issue updates. update_os_labels(policy, testcase, issue) update_fuzz_blocker_label(policy, testcase, issue, top_crashes_by_project_and_platform_map) update_component_labels(testcase, issue) update_issue_ccs_from_owners_file(policy, testcase, issue) update_issue_owner_and_ccs_from_predator_results(policy, testcase, issue) update_issue_labels_for_flaky_testcase(policy, testcase, issue) # Testcase marking rules. mark_duplicate_testcase_as_closed_with_no_issue(testcase) mark_issue_as_closed_if_testcase_is_fixed(policy, testcase, issue) mark_testcase_as_closed_if_issue_is_closed(policy, testcase, issue) mark_testcase_as_closed_if_job_is_invalid(testcase, jobs) mark_unreproducible_testcase_as_fixed_if_issue_is_closed(testcase, issue) mark_unreproducible_testcase_and_issue_as_closed_after_deadline( policy, testcase, issue) # Notification, to be done at end after testcase state is updated from # previous rules. notify_closed_issue_if_testcase_is_open(policy, testcase, issue) notify_issue_if_testcase_is_invalid(policy, testcase, issue) notify_uploader_when_testcase_is_processed(policy, testcase, issue) # Mark testcase as triage complete if both testcase and associated issue # are closed. This also need to be done before the deletion rules. mark_testcase_as_triaged_if_needed(testcase, issue) # Testcase deletion rules. delete_unreproducible_testcase_with_no_issue(testcase) testcases_processed += 1 if testcases_processed % 100 == 0: utils.python_gc()
def unpack(archive_path, output_directory, trusted=False, file_match_callback=None): """Extracts an archive into the target directory.""" if not os.path.exists(archive_path): logs.log_error('Archive %s not found.' % archive_path) return # If the output directory is a symlink, get its actual path since we will be # doing directory traversal checks later when unpacking the archive. output_directory = os.path.realpath(output_directory) archive_filename = os.path.basename(archive_path) error_occurred = False # Choose to unpack all files or ones matching a particular regex. file_list = get_file_list(archive_path, file_match_callback=file_match_callback) archive_file_unpack_count = 0 archive_file_total_count = len(file_list) # If the archive is not trusted, do file path checks to make # sure this archive is safe and is not attempting to do path # traversals. if not trusted: for filename in file_list: absolute_file_path = os.path.join(output_directory, os.path.normpath(filename)) real_file_path = os.path.realpath(absolute_file_path) if real_file_path == output_directory: # Workaround for https://bugs.python.org/issue28488. # Ignore directories named '.'. continue if real_file_path != absolute_file_path: logs.log_error( 'Directory traversal attempted while unpacking archive %s ' '(file path=%s, actual file path=%s). Aborting.' % (archive_path, absolute_file_path, real_file_path)) return archive_type = get_archive_type(archive_filename) # Extract based on file's extension. if archive_type == ArchiveType.ZIP: zip_file_handle = open(archive_path, 'rb') zip_archive = zipfile.ZipFile(zip_file_handle) for filename in file_list: try: extracted_path = zip_archive.extract(filename, output_directory) # Preserve permissions for regular files. 640 is the default # permission for extract. If we need execute permission, we need # to chmod it explicitly. Also, get rid of suid bit for security # reasons. external_attr = zip_archive.getinfo( filename).external_attr >> 16 if oct(external_attr).startswith(FILE_ATTRIBUTE): old_mode = external_attr & 07777 new_mode = external_attr & 0777 new_mode |= 0440 needs_execute_permission = external_attr & 0100 if new_mode != old_mode or needs_execute_permission: # Default extract condition is 640 which is safe. # |new_mode| might have read+write+execute bit for # others, so remove those. new_mode &= 0770 os.chmod(extracted_path, new_mode) # Create symlink if needed (only on unix platforms). if (trusted and hasattr(os, 'symlink') and oct(external_attr).startswith(SYMLINK_ATTRIBUTE)): symlink_source = zip_archive.read(filename) if os.path.exists(extracted_path): os.remove(extracted_path) os.symlink(symlink_source, extracted_path) # Keep heartbeat happy by updating with our progress. archive_file_unpack_count += 1 if archive_file_unpack_count % 1000 == 0: logs.log( 'Unpacked %d/%d.' % (archive_file_unpack_count, archive_file_total_count)) except: # In case of errors, we try to extract whatever we can without errors. error_occurred = True continue zip_archive.close() zip_file_handle.close() if error_occurred: logs.log_error('Failed to extract everything from archive %s.' % archive_filename) elif archive_type == ArchiveType.TAR or archive_type == ArchiveType.TAR_LZMA: if archive_type == ArchiveType.TAR_LZMA: # Import lzma here so that if lzma installation fails (as it may on # Windows), other archives can still be opened. # TODO(metzman): Determine if this actually fails on Windows and move this # to the top of the file if it doesn't. from backports import lzma lzma_file = lzma.LZMAFile(archive_path) tar_archive = tarfile.open(fileobj=lzma_file) else: tar_archive = tarfile.open(archive_path) try: tar_archive.extractall(path=output_directory) except: # In case of errors, we try to extract whatever we can without errors. logs.log_error( 'Failed to extract everything from archive %s, trying one at a time.' % archive_filename) for filename in file_list: try: tar_archive.extract(filename, output_directory) except: continue # Keep heartbeat happy by updating with our progress. archive_file_unpack_count += 1 if archive_file_unpack_count % 1000 == 0: logs.log( 'Unpacked %d/%d.' % (archive_file_unpack_count, archive_file_total_count)) tar_archive.close() if archive_type == ArchiveType.TAR_LZMA: lzma_file.close() else: logs.log_error('Unsupported compression type for file %s.' % archive_filename) return
def update_source_code(): """Updates source code files with latest version from appengine.""" process_handler.cleanup_stale_processes() shell.clear_temp_directory() root_directory = environment.get_value('ROOT_DIR') temp_directory = environment.get_value('BOT_TMPDIR') temp_archive = os.path.join(temp_directory, 'clusterfuzz-source.zip') try: storage.copy_file_from(get_source_url(), temp_archive) except Exception: logs.log_error('Could not retrieve source code archive from url.') return try: file_list = archive.get_file_list(temp_archive) zip_archive = zipfile.ZipFile(temp_archive, 'r') except Exception: logs.log_error('Bad zip file.') return src_directory = os.path.join(root_directory, 'src') output_directory = os.path.dirname(root_directory) error_occurred = False normalized_file_set = set() for filepath in file_list: filename = os.path.basename(filepath) # This file cannot be updated on the fly since it is running as server. if filename == 'adb': continue absolute_filepath = os.path.join(output_directory, filepath) if os.path.altsep: absolute_filepath = absolute_filepath.replace(os.path.altsep, os.path.sep) if os.path.realpath(absolute_filepath) != absolute_filepath: continue normalized_file_set.add(absolute_filepath) try: file_extension = os.path.splitext(filename)[1] # Remove any .so files first before overwriting, as they can be loaded # in the memory of existing processes. Overwriting them directly causes # segfaults in existing processes (e.g. run.py). if file_extension == '.so' and os.path.exists(absolute_filepath): os.remove(absolute_filepath) # On Windows, to update DLLs (and native .pyd extensions), we rename it # first so that we can install the new version. if (environment.platform() == 'WINDOWS' and file_extension in ['.dll', '.pyd'] and os.path.exists(absolute_filepath)): _rename_dll_for_update(absolute_filepath) except Exception: logs.log_error('Failed to remove or move %s before extracting new ' 'version.' % absolute_filepath) try: extracted_path = zip_archive.extract(filepath, output_directory) external_attr = zip_archive.getinfo(filepath).external_attr mode = (external_attr >> 16) & 0o777 mode |= 0o440 os.chmod(extracted_path, mode) except: error_occurred = True logs.log_error( 'Failed to extract file %s from source archive.' % filepath) zip_archive.close() if error_occurred: return clear_pyc_files(src_directory) clear_old_files(src_directory, normalized_file_set) local_manifest_path = os.path.join(root_directory, utils.LOCAL_SOURCE_MANIFEST) source_version = utils.read_data_from_file( local_manifest_path, eval_data=False) logs.log('Source code updated to %s.' % source_version)
def create(self): """Configures a QEMU process which can subsequently be `run`. Assumes that initial_qemu_setup was already called exactly once. """ qemu_vars = _fetch_qemu_vars() # Get a free port for the VM, so we can SSH in later. tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tcp.bind(('localhost', 0)) _, port = tcp.getsockname() tcp.close() # Fuzzing jobs that SSH into the QEMU VM need access to this env var. environment.set_value('FUCHSIA_PORTNUM', port) environment.set_value('FUCHSIA_RESOURCES_DIR', qemu_vars['fuchsia_resources_dir']) # yapf: disable qemu_args = [ '-m', '3072', '-nographic', '-kernel', qemu_vars['kernel_path'], '-initrd', qemu_vars['initrd_path'], '-smp', '4', '-drive', ('file=' + qemu_vars['drive_path'] + ',format=raw,if=none,' 'id=blobstore'), '-device', 'virtio-blk-pci,drive=blobstore', '-monitor', 'none', '-append', 'kernel.serial=legacy TERM=dumb', '-machine', 'q35', '-display', 'none', '-netdev', ('user,id=net0,net=192.168.3.0/24,dhcpstart=192.168.3.9,' 'host=192.168.3.2,hostfwd=tcp::') + str(port) + '-:22', '-device', 'e1000,netdev=net0,mac=52:54:00:63:5e:7b', '-L', qemu_vars['sharefiles_path'] ] # yapf: enable # Detecting KVM is tricky, so use an environment variable to determine # whether to turn it on or not. if environment.get_value('FUCHSIA_USE_KVM'): # In builds before fxrev.dev/375343, a bug prevents booting with newer # versions of KVM. On some of these older builds, # `kernel.x86.disable-spec-mitigations` also doesn't work as # expected, so we work around this by selecting a CPU type where the # speculation mitigation will not applied. if environment.get_value('APP_REVISION') < 20200414210423: qemu_args.extend(['-cpu', 'Opteron_G5,+invtsc']) else: qemu_args.extend(['-cpu', 'host,migratable=no,+invtsc']) qemu_args.append('-enable-kvm') else: # Can't use host CPU since we don't necessarily have KVM on the machine. # Emulate a Haswell CPU with a few feature toggles. This mirrors the most # common configuration for Fuchsia VMs when using in-tree tools. qemu_args.extend(['-cpu', 'Haswell,+smap,-check,-fsgsbase']) # Get the list of fuzzers for ClusterFuzz to choose from. host = Host.from_dir( os.path.join(qemu_vars['fuchsia_resources_dir'], 'build', 'out', 'default')) Device(host, 'localhost', str(port)) Fuzzer.filter(host.fuzzers, '') # Fuzzing jobs that SSH into the QEMU VM need access to this env var. environment.set_value('FUCHSIA_PKEY_PATH', qemu_vars['pkey_path']) logs.log('Ready to run QEMU. Command: ' + qemu_vars['qemu_path'] + ' ' + ' '.join(shlex.quote(arg) for arg in qemu_args)) self.process_runner = new_process.ProcessRunner( qemu_vars['qemu_path'], qemu_args)
def mark_unreproducible_testcase_and_issue_as_closed_after_deadline( testcase, issue): """Closes an unreproducible testcase and its associated issue after a certain time period.""" # If the testcase is already closed, no more work to do. if not testcase.open: return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # Make sure that this testcase is an unreproducible bug. If not, bail out. if not testcase.one_time_crasher_flag: return # Make sure that this testcase has an associated bug. If not, bail out. if not testcase.bug_information: return # If this testcase was manually uploaded, don't change issue state as our # reproduction result might be incorrect. if testcase.uploader_email: return # Make sure that there is an associated bug and it is in open state. if not issue or not issue.open: return # Check if there are any reproducible open testcases are associated with # this bug. If yes, return. similar_testcase = data_types.Testcase.query( data_types.Testcase.bug_information == testcase.bug_information, ndb_utils.is_true(data_types.Testcase.open), ndb_utils.is_false(data_types.Testcase.one_time_crasher_flag)).get() if similar_testcase: return # Make sure that testcase is atleast older than # |UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE|, otherwise it will be seen in # crash stats anyway. if (testcase.timestamp and not dates.time_has_expired( testcase.timestamp, days=data_types.UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE)): return # Handle testcase that turned from reproducible to unreproducible. Account # for the recent progression task run time. last_tested_crash_time = testcase.get_metadata('last_tested_crash_time') if (last_tested_crash_time and not dates.time_has_expired( last_tested_crash_time, days=data_types.UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE)): return # Make that there is no crash seen in the deadline period. if get_crash_occurrence_platforms( testcase, data_types.UNREPRODUCIBLE_TESTCASE_WITH_BUG_DEADLINE): return # As a last check, do the expensive call of actually checking all issue # comments to make sure we we didn't get called out on issue mistriage. if issue.has_comment_with_label(data_types.ISSUE_MISTRIAGED_LABEL): return # Close associated issue and testcase. comment = ('ClusterFuzz testcase %d is flaky and no longer crashes, ' 'so closing issue.' % testcase.key.id()) if utils.is_oss_fuzz(): comment += OSS_FUZZ_INCORRECT_COMMENT else: comment += INTERNAL_INCORRECT_COMMENT comment += ' and re-open the issue.' issue.comment = comment issue.status = 'WontFix' issue.open = False issue.save(send_email=True) testcase.fixed = 'NA' testcase.open = False testcase.put() logs.log('Closed unreproducible testcase %d and associated issue.' % testcase.key.id())
def find_fixed_range(testcase_id, job_type): """Attempt to find the revision range where a testcase was fixed.""" deadline = tasks.get_task_completion_deadline() testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return if testcase.fixed: logs.log_error('Fixed range is already set as %s, skip.' % testcase.fixed) return # Setup testcase and its dependencies. file_list, _, testcase_file_path = setup.setup_testcase(testcase) if not file_list: return # Set a flag to indicate we are running progression task. This shows pending # status on testcase report page and avoid conflicting testcase updates by # triage cron. testcase.set_metadata('progression_pending', True) # Custom binaries are handled as special cases. if build_manager.is_custom_binary(): _check_fixed_for_custom_binary(testcase, job_type, testcase_file_path) return release_build_bucket_path = environment.get_value( 'RELEASE_BUILD_BUCKET_PATH') revision_list = build_manager.get_revisions_list(release_build_bucket_path, testcase=testcase) if not revision_list: testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Failed to fetch revision list') tasks.add_task('progression', testcase_id, job_type) return # Use min, max_index to mark the start and end of revision list that is used # for bisecting the progression range. Set start to the revision where noticed # the crash. Set end to the trunk revision. Also, use min, max from past run # if it timed out. min_revision = testcase.get_metadata('last_progression_min') max_revision = testcase.get_metadata('last_progression_max') last_tested_revision = testcase.get_metadata('last_tested_crash_revision') known_crash_revision = last_tested_revision or testcase.crash_revision if not min_revision: min_revision = known_crash_revision if not max_revision: max_revision = revisions.get_last_revision_in_list(revision_list) min_index = revisions.find_min_revision_index(revision_list, min_revision) if min_index is None: raise errors.BuildNotFoundError(min_revision, job_type) max_index = revisions.find_max_revision_index(revision_list, max_revision) if max_index is None: raise errors.BuildNotFoundError(max_revision, job_type) testcase = data_handler.get_testcase_by_id(testcase_id) data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED, 'r%d' % max_revision) # Check to see if this testcase is still crashing now. If it is, then just # bail out. result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, max_revision) if result.is_crash(): logs.log('Found crash with same signature on latest revision r%d.' % max_revision) app_path = environment.get_value('APP_PATH') command = testcase_manager.get_command_line_for_application( testcase_file_path, app_path=app_path, needs_http=testcase.http_flag) symbolized_crash_stacktrace = result.get_stacktrace(symbolized=True) unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) stacktrace = utils.get_crash_stacktrace_output( command, symbolized_crash_stacktrace, unsymbolized_crash_stacktrace) testcase = data_handler.get_testcase_by_id(testcase_id) testcase.last_tested_crash_stacktrace = data_handler.filter_stacktrace( stacktrace) _update_completion_metadata( testcase, max_revision, is_crash=True, message='still crashes on latest revision r%s' % max_revision) # Since we've verified that the test case is still crashing, clear out any # metadata indicating potential flake from previous runs. task_creation.mark_unreproducible_if_flaky(testcase, False) # For chromium project, save latest crash information for later upload # to chromecrash/. state = result.get_symbolized_data() crash_uploader.save_crash_info_if_needed(testcase_id, max_revision, job_type, state.crash_type, state.crash_address, state.frames) return # Don't burden NFS server with caching these random builds. environment.set_value('CACHE_STORE', False) # Verify that we do crash in the min revision. This is assumed to be true # while we are doing the bisect. result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, min_revision) if result and not result.is_crash(): testcase = data_handler.get_testcase_by_id(testcase_id) # Retry once on another bot to confirm our result. if data_handler.is_first_retry_for_task(testcase, reset_after_retry=True): tasks.add_task('progression', testcase_id, job_type) error_message = ( 'Known crash revision %d did not crash, will retry on another bot to ' 'confirm result' % known_crash_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) _update_completion_metadata(testcase, max_revision) return _clear_progression_pending(testcase) error_message = ('Known crash revision %d did not crash' % known_crash_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) task_creation.mark_unreproducible_if_flaky(testcase, True) return # Start a binary search to find last non-crashing revision. At this point, we # know that we do crash in the min_revision, and do not crash in max_revision. while time.time() < deadline: min_revision = revision_list[min_index] max_revision = revision_list[max_index] # If the min and max revisions are one apart this is as much as we can # narrow the range. if max_index - min_index == 1: _save_fixed_range(testcase_id, min_revision, max_revision) return # Test the middle revision of our range. middle_index = (min_index + max_index) // 2 middle_revision = revision_list[middle_index] testcase = data_handler.get_testcase_by_id(testcase_id) log_message = 'Testing r%d (current range %d:%d)' % ( middle_revision, min_revision, max_revision) data_handler.update_testcase_comment(testcase, data_types.TaskState.WIP, log_message) try: result = _testcase_reproduces_in_revision(testcase, testcase_file_path, job_type, middle_revision) except errors.BadBuildError: # Skip this revision. del revision_list[middle_index] max_index -= 1 continue if result.is_crash(): min_index = middle_index else: max_index = middle_index _save_current_fixed_range_indices(testcase_id, revision_list[min_index], revision_list[max_index]) # If we've broken out of the loop, we've exceeded the deadline. Recreate the # task to pick up where we left off. testcase = data_handler.get_testcase_by_id(testcase_id) error_message = ('Timed out, current range r%d:r%d' % (revision_list[min_index], revision_list[max_index])) data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, error_message) tasks.add_task('progression', testcase_id, job_type)
def flash_to_latest_build_if_needed(): """Wipes user data, resetting the device to original factory state.""" if environment.get_value('LOCAL_DEVELOPMENT'): # Don't reimage local development devices. return run_timeout = environment.get_value('RUN_TIMEOUT') if run_timeout: # If we have a run timeout, then we are already scheduled to bail out and # will be probably get re-imaged. E.g. using frameworks like Tradefed. return # Check if a flash is needed based on last recorded flash time. last_flash_time = persistent_cache.get_value( constants.LAST_FLASH_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) needs_flash = last_flash_time is None or dates.time_has_expired( last_flash_time, seconds=FLASH_INTERVAL) if not needs_flash: return is_google_device = settings.is_google_device() if is_google_device is None: logs.log_error('Unable to query device. Reimaging failed.') adb.bad_state_reached() elif not is_google_device: # We can't reimage these, skip. logs.log('Non-Google device found, skipping reimage.') return # Check if both |BUILD_BRANCH| and |BUILD_TARGET| environment variables # are set. If not, we don't have enough data for reimaging and hence # we bail out. branch = environment.get_value('BUILD_BRANCH') target = environment.get_value('BUILD_TARGET') if not target: # We default to userdebug configuration. build_params = settings.get_build_parameters() if build_params: target = build_params.get('target') + '-userdebug' # Cache target in environment. This is also useful for cases when # device is bricked and we don't have this information available. environment.set_value('BUILD_TARGET', target) if not branch or not target: logs.log_warn( 'BUILD_BRANCH and BUILD_TARGET are not set, skipping reimage.') return image_directory = environment.get_value('IMAGES_DIR') build_info = fetch_artifact.get_latest_artifact_info(branch, target) if not build_info: logs.log_error('Unable to fetch information on latest build artifact for ' 'branch %s and target %s.' % (branch, target)) return if environment.is_android_cuttlefish(): download_latest_build(build_info, FLASH_CUTTLEFISH_REGEXES, image_directory) adb.recreate_cuttlefish_device() adb.connect_to_cuttlefish_device() else: download_latest_build(build_info, FLASH_IMAGE_REGEXES, image_directory) # We do one device flash at a time on one host, otherwise we run into # failures and device being stuck in a bad state. flash_lock_key_name = 'flash:%s' % socket.gethostname() if not locks.acquire_lock(flash_lock_key_name, by_zone=True): logs.log_error('Failed to acquire lock for reimaging, exiting.') return logs.log('Reimaging started.') logs.log('Rebooting into bootloader mode.') for _ in range(FLASH_RETRIES): adb.run_as_root() adb.run_command(['reboot-bootloader']) time.sleep(FLASH_REBOOT_BOOTLOADER_WAIT) adb.run_fastboot_command(['oem', 'off-mode-charge', '0']) adb.run_fastboot_command(['-w', 'reboot-bootloader']) for partition, partition_image_filename in FLASH_IMAGE_FILES: partition_image_file_path = os.path.join(image_directory, partition_image_filename) adb.run_fastboot_command( ['flash', partition, partition_image_file_path]) if partition in ['bootloader', 'radio']: adb.run_fastboot_command(['reboot-bootloader']) # Disable ramdump to avoid capturing ramdumps during kernel crashes. # This causes device lockup of several minutes during boot and we intend # to analyze them ourselves. adb.run_fastboot_command(['oem', 'ramdump', 'disable']) adb.run_fastboot_command('reboot') time.sleep(FLASH_REBOOT_WAIT) if adb.get_device_state() == 'device': break logs.log_error('Reimaging failed, retrying.') locks.release_lock(flash_lock_key_name, by_zone=True) if adb.get_device_state() != 'device': logs.log_error('Unable to find device. Reimaging failed.') adb.bad_state_reached() logs.log('Reimaging finished.') # Reset all of our persistent keys after wipe. persistent_cache.delete_value(constants.BUILD_PROP_MD5_KEY) persistent_cache.delete_value(constants.LAST_TEST_ACCOUNT_CHECK_KEY) persistent_cache.set_value(constants.LAST_FLASH_BUILD_KEY, build_info) persistent_cache.set_value(constants.LAST_FLASH_TIME_KEY, time.time())
def process_bad_units(self, bad_units_path, quarantine_corpus_path, crashes): """Process bad units found during merge.""" # TODO(ochang): A lot of this function is similar to parts of fuzz_task. # Ideally fuzz_task can be refactored in a way that lets us share the common # code. environment.reset_current_memory_tool_options( redzone_size=DEFAULT_REDZONE) self.runner.process_sanitizer_options() logs.log('Processing bad units.') corpus_file_paths = _get_corpus_file_paths(bad_units_path) num_bad_units = 0 # Run each corpus item individually. for i, unit_path in enumerate(corpus_file_paths, 1): if i % 100 == 0: logs.log('Up to %d' % i) unit_name = os.path.basename(unit_path) if unit_name.startswith('timeout-') or unit_name.startswith( 'oom-'): # Don't waste time re-running timeout or oom testcases. unit_path = self._quarantine_unit(unit_path, quarantine_corpus_path) num_bad_units += 1 continue try: result = self._run_single_unit(unit_path) except engine.TimeoutError: # Slow unit. Quarantine it. unit_path = self._quarantine_unit(unit_path, quarantine_corpus_path) num_bad_units += 1 continue if not crash_analyzer.is_memory_tool_crash(result.output): # Didn't crash. continue # Get memory tool crash information. state = stack_analyzer.get_crash_data(result.output, symbolize_flag=True) # Crashed or caused a leak. Quarantine it. unit_path = self._quarantine_unit(unit_path, quarantine_corpus_path) num_bad_units += 1 if crash_analyzer.ignore_stacktrace(state.crash_stacktrace): continue # Local de-duplication. if state.crash_state not in crashes: security_flag = crash_analyzer.is_security_issue( state.crash_stacktrace, state.crash_type, state.crash_address) crashes[state.crash_state] = CorpusCrash( state.crash_state, state.crash_type, state.crash_address, state.crash_stacktrace, unit_path, security_flag) logs.log('Found %d bad units, %d unique crashes.' % (num_bad_units, len(crashes)))
def do_corpus_pruning(context, last_execution_failed, revision): """Run corpus pruning.""" # Set |FUZZ_TARGET| environment variable to help with unarchiving only fuzz # target and its related files. environment.set_value('FUZZ_TARGET', context.fuzz_target.binary) if environment.is_trusted_host(): from bot.untrusted_runner import tasks_host return tasks_host.do_corpus_pruning(context, last_execution_failed, revision) build_manager.setup_build(revision=revision) build_directory = environment.get_value('BUILD_DIR') if not build_directory: raise CorpusPruningException('Failed to setup build.') start_time = datetime.datetime.utcnow() runner = Runner(build_directory, context) pruner = CorpusPruner(runner) fuzzer_binary_name = os.path.basename(runner.target_path) # If our last execution failed, shrink to a randomized corpus of usable size # to prevent corpus from growing unbounded and recurring failures when trying # to minimize it. if last_execution_failed: for corpus_url in [ context.corpus.get_gcs_url(), context.quarantine_corpus.get_gcs_url() ]: _limit_corpus_size(corpus_url, CORPUS_SIZE_LIMIT_FOR_FAILURES) # Get initial corpus to process from GCS. context.sync_to_disk() initial_corpus_size = shell.get_directory_file_count( context.initial_corpus_path) # Restore a small batch of quarantined units back to corpus. context.restore_quarantined_units() # Shrink to a minimized corpus using corpus merge. pruner_stats = pruner.run(context.initial_corpus_path, context.minimized_corpus_path, context.bad_units_path) # Sync minimized corpus back to GCS. context.sync_to_gcs() # Create corpus backup. backup_bucket = environment.get_value('BACKUP_BUCKET') corpus_backup_url = corpus_manager.backup_corpus( backup_bucket, context.corpus, context.minimized_corpus_path) minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) logs.log('Corpus pruned from %d to %d units.' % (initial_corpus_size, minimized_corpus_size_units)) # Process bad units found during merge. # Mapping of crash state -> CorpusCrash crashes = {} pruner.process_bad_units(context.bad_units_path, context.quarantine_corpus_path, crashes) context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path) # Store corpus stats into CoverageInformation entity. project_qualified_name = context.fuzz_target.project_qualified_name() today = datetime.datetime.utcnow().date() coverage_info = data_types.CoverageInformation( fuzzer=project_qualified_name, date=today) quarantine_corpus_size = shell.get_directory_file_count( context.quarantine_corpus_path) quarantine_corpus_dir_size = shell.get_directory_size( context.quarantine_corpus_path) # Save the minimize corpus size before cross pollination to put in BigQuery. pre_pollination_corpus_size = minimized_corpus_size_units # Populate coverage stats. coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes coverage_info.quarantine_size_units = quarantine_corpus_size coverage_info.quarantine_size_bytes = quarantine_corpus_dir_size coverage_info.corpus_backup_location = corpus_backup_url coverage_info.corpus_location = context.corpus.get_gcs_url() coverage_info.quarantine_location = context.quarantine_corpus.get_gcs_url() # Calculate remaining time to use for shared corpus merging. time_remaining = _get_time_remaining(start_time) if time_remaining <= 0: logs.log_warn('Not enough time for shared corpus merging.') return None cross_pollinator = CrossPollinator(runner) pollinator_stats = cross_pollinator.run(time_remaining) context.sync_to_gcs() # Update corpus size stats. minimized_corpus_size_units = shell.get_directory_file_count( context.minimized_corpus_path) minimized_corpus_size_bytes = shell.get_directory_size( context.minimized_corpus_path) coverage_info.corpus_size_units = minimized_corpus_size_units coverage_info.corpus_size_bytes = minimized_corpus_size_bytes logs.log('Finished.') sources = ','.join([ fuzzer.fuzz_target.project_qualified_name() for fuzzer in context.cross_pollinate_fuzzers ]) cross_pollination_stats = None if pruner_stats and pollinator_stats: cross_pollination_stats = CrossPollinationStats( project_qualified_name, context.cross_pollination_method, sources, context.tag, initial_corpus_size, pre_pollination_corpus_size, pruner_stats['edge_coverage'], pollinator_stats['edge_coverage'], pruner_stats['feature_coverage'], pollinator_stats['feature_coverage']) return CorpusPruningResult(coverage_info=coverage_info, crashes=list(crashes.values()), fuzzer_binary_name=fuzzer_binary_name, revision=environment.get_value('APP_REVISION'), cross_pollination_stats=cross_pollination_stats)
def terminate_stale_application_instances(): """Kill stale instances of the application running for this command.""" if environment.is_trusted_host(): from bot.untrusted_runner import remote_process_host remote_process_host.terminate_stale_application_instances() return # Stale instance cleanup is sometimes disabled for local testing. if not environment.get_value('KILL_STALE_INSTANCES', True): return additional_process_to_kill = environment.get_value( 'ADDITIONAL_PROCESSES_TO_KILL') builds_directory = environment.get_value('BUILDS_DIR') llvm_symbolizer_filename = environment.get_executable_filename( 'llvm-symbolizer') platform = environment.platform() start_time = time.time() processes_to_kill = [] # Avoid killing the test binary when running the reproduce tool. It is # commonly in-use on the side on developer workstations. if not environment.get_value('REPRODUCE_TOOL'): app_name = environment.get_value('APP_NAME') processes_to_kill += [app_name] if additional_process_to_kill: processes_to_kill += additional_process_to_kill.split(' ') processes_to_kill = [x for x in processes_to_kill if x] if platform == 'ANDROID': # Cleanup any stale adb connections. device_serial = environment.get_value('ANDROID_SERIAL') adb_search_string = 'adb -s %s' % device_serial # Terminate llvm symbolizer processes matching exact path. This is important # for Android where multiple device instances run on same host. llvm_symbolizer_path = environment.get_llvm_symbolizer_path() terminate_processes_matching_cmd_line( [adb_search_string, llvm_symbolizer_path], kill=True) # Make sure device is online and rooted. android.adb.run_as_root() # Make sure to reset SE Linux Permissive Mode (might be lost in reboot). android.settings.change_se_linux_to_permissive_mode() # Make sure that device forwarder is running (might be lost in reboot or # process crash). android.device.setup_host_and_device_forwarder_if_needed() # Make sure that package optimization is complete (might be triggered due to # unexpected circumstances). android.app.wait_until_optimization_complete() # Reset application state, which kills its pending instances and re-grants # the storage permissions. android.app.reset() elif platform == 'WINDOWS': processes_to_kill += [ 'cdb.exe', 'handle.exe', 'msdt.exe', 'openwith.exe', 'WerFault.exe', llvm_symbolizer_filename, ] terminate_processes_matching_names(processes_to_kill, kill=True) terminate_processes_matching_cmd_line(builds_directory, kill=True) # Artifical sleep to let the processes get terminated. time.sleep(1) else: # Handle Linux and Mac platforms. processes_to_kill += [ 'addr2line', 'atos', 'chrome-devel-sandbox', 'gdb', 'nacl_helper', 'xdotool', llvm_symbolizer_filename, ] terminate_processes_matching_names(processes_to_kill, kill=True) terminate_processes_matching_cmd_line(builds_directory, kill=True) duration = int(time.time() - start_time) if duration >= 5: logs.log('Process kill took longer than usual - %s.' % str(datetime.timedelta(seconds=duration)))
def main(): """Prepare the configuration options and start requesting tasks.""" logs.configure('run_bot') root_directory = environment.get_value('ROOT_DIR') if not root_directory: print('Please set ROOT_DIR environment variable to the root of the source ' 'checkout before running. Exiting.') print('For an example, check init.bash in the local directory.') return dates.initialize_timezone_from_environment() environment.set_bot_environment() monitor.initialize() if not profiler.start_if_needed('python_profiler_bot'): sys.exit(-1) if environment.is_trusted_host(ensure_connected=False): from bot.untrusted_runner import host host.init() if environment.is_untrusted_worker(): # Track revision since we won't go into the task_loop. update_task.track_revision() from bot.untrusted_runner import untrusted as untrusted_worker untrusted_worker.start_server() assert False, 'Unreachable code' while True: # task_loop should be an infinite loop, # unless we run into an exception. error_stacktrace, clean_exit, task_payload = task_loop() # Print the error trace to the console. if not clean_exit: print('Exception occurred while running "%s".' % task_payload) print('-' * 80) print(error_stacktrace) print('-' * 80) should_terminate = ( clean_exit or errors.error_in_list(error_stacktrace, errors.BOT_ERROR_TERMINATION_LIST)) if should_terminate: return logs.log_error( 'Task exited with exception.', error_stacktrace=error_stacktrace, task_payload=task_payload) should_hang = errors.error_in_list(error_stacktrace, errors.BOT_ERROR_HANG_LIST) if should_hang: logs.log('Start hanging forever.') while True: # Sleep to avoid consuming 100% of CPU. time.sleep(60) # See if our run timed out, if yes bail out. if data_handler.bot_run_timed_out(): return # Clear the current exception. sys.exc_clear()
def run_process(cmdline, current_working_directory=None, timeout=DEFAULT_TEST_TIMEOUT, need_shell=False, gestures=None, env_copy=None, testcase_run=True, ignore_children=True): """Executes a process with a given command line and other parameters.""" if environment.is_trusted_host() and testcase_run: from bot.untrusted_runner import remote_process_host return remote_process_host.run_process(cmdline, current_working_directory, timeout, need_shell, gestures, env_copy, testcase_run, ignore_children) if gestures is None: gestures = [] if env_copy: os.environ.update(env_copy) # FIXME(mbarbella): Using LAUNCHER_PATH here is error prone. It forces us to # do certain operations before fuzzer setup (e.g. bad build check). launcher = environment.get_value('LAUNCHER_PATH') # This is used when running scripts on native linux OS and not on the device. # E.g. running a fuzzer to generate testcases or launcher script. plt = environment.platform() if plt in ['ANDROID', 'FUCHSIA'] and (not testcase_run or launcher): plt = 'LINUX' elif plt == 'IOS' and (not testcase_run or launcher): plt = 'MAC' # Lower down testcase timeout slightly to account for time for crash analysis. timeout -= CRASH_ANALYSIS_TIME # LeakSanitizer hack - give time for stdout/stderr processing. lsan = environment.get_value('LSAN', False) if lsan: timeout -= LSAN_ANALYSIS_TIME # Initialize variables. adb_output = None process_output = '' process_status = None return_code = 0 process_poll_interval = environment.get_value('PROCESS_POLL_INTERVAL', 0.5) start_time = time.time() watch_for_process_exit = (environment.get_value('WATCH_FOR_PROCESS_EXIT') if plt == 'ANDROID' else True) window_list = [] # Get gesture start time from last element in gesture list. gestures = copy.deepcopy(gestures) if gestures and gestures[-1].startswith('Trigger'): gesture_start_time = int(gestures[-1].split(':')[1]) gestures.pop() else: gesture_start_time = timeout // 2 if plt == 'ANDROID': # Clear the log upfront. android.logger.clear_log() # Run the app. adb_output = android.adb.run_command(cmdline, timeout=timeout) else: cmd, args = shell.get_command_and_arguments(cmdline) process_output = mozprocess.processhandler.StoreOutput() process_status = ProcessStatus() try: process_handle = mozprocess.ProcessHandlerMixin( cmd, args, cwd=current_working_directory, shell=need_shell, processOutputLine=[process_output], onFinish=[process_status], ignore_children=ignore_children) start_process(process_handle) except: logs.log_error('Exception occurred when running command: %s.' % cmdline) return None, None, '' while True: time.sleep(process_poll_interval) # Run the gestures at gesture_start_time or in case we didn't find windows # in the last try. if (gestures and time.time() - start_time >= gesture_start_time and not window_list): # In case, we don't find any windows, we increment the gesture start time # so that the next check is after 1 second. gesture_start_time += 1 if plt == 'LINUX': linux.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'WINDOWS': windows.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'ANDROID': android.gestures.run_gestures(gestures, start_time, timeout) # TODO(mbarbella): We add a fake window here to prevent gestures on # Android from getting executed more than once. window_list = ['FAKE'] if time.time() - start_time >= timeout: break # Collect the process output. output = (android.logger.log_output() if plt == 'ANDROID' else b'\n'.join(process_output.output)) output = utils.decode_to_unicode(output) if crash_analyzer.is_memory_tool_crash(output): break # Check if we need to bail out on process exit. if watch_for_process_exit: # If |watch_for_process_exit| is set, then we already completed running # our app launch command. So, we can bail out. if plt == 'ANDROID': break # On desktop, we bail out as soon as the process finishes. if process_status and process_status.finished: # Wait for process shutdown and set return code. process_handle.wait(timeout=PROCESS_CLEANUP_WAIT_TIME) break # Process output based on platform. if plt == 'ANDROID': # Get current log output. If device is in reboot mode, logcat automatically # waits for device to be online. time.sleep(ANDROID_CRASH_LOGCAT_WAIT_TIME) output = android.logger.log_output() if android.constants.LOW_MEMORY_REGEX.search(output): # If the device is low on memory, we should force reboot and bail out to # prevent device from getting in a frozen state. logs.log('Device is low on memory, rebooting.', output=output) android.adb.hard_reset() android.adb.wait_for_device() elif android.adb.time_since_last_reboot() < time.time() - start_time: # Check if a reboot has happened, if yes, append log output before reboot # and kernel logs content to output. log_before_last_reboot = android.logger.log_output_before_last_reboot( ) kernel_log = android.adb.get_kernel_log_content() output = '%s%s%s%s%s' % ( log_before_last_reboot, utils.get_line_seperator('Device rebooted'), output, utils.get_line_seperator('Kernel Log'), kernel_log) # Make sure to reset SE Linux Permissive Mode. This can be done cheaply # in ~0.15 sec and is needed especially between runs for kernel crashes. android.adb.run_as_root() android.settings.change_se_linux_to_permissive_mode() return_code = 1 # Add output from adb to the front. if adb_output: output = '%s\n\n%s' % (adb_output, output) # Kill the application if it is still running. We do this at the end to # prevent this from adding noise to the logcat output. task_name = environment.get_value('TASK_NAME') child_process_termination_pattern = environment.get_value( 'CHILD_PROCESS_TERMINATION_PATTERN') if task_name == 'fuzz' and child_process_termination_pattern: # In some cases, we do not want to terminate the application after each # run to avoid long startup times (e.g. for chrome). Terminate processes # matching a particular pattern for light cleanup in this case. android.adb.kill_processes_and_children_matching_name( child_process_termination_pattern) else: # There is no special termination behavior. Simply stop the application. android.app.stop() else: # Get the return code in case the process has finished already. # If the process hasn't finished, return_code will be None which is what # callers expect unless the output indicates a crash. return_code = process_handle.poll() # If the process is still running, then terminate it. if not process_status.finished: launcher_with_interpreter = shell.get_execute_command( launcher, is_blackbox_fuzzer=True) if launcher else None if (launcher_with_interpreter and cmdline.startswith(launcher_with_interpreter)): # If this was a launcher script, we KILL all child processes created # except for APP_NAME. # It is expected that, if the launcher script terminated normally, it # cleans up all the child processes it created itself. terminate_root_and_child_processes(process_handle.pid) else: try: # kill() here actually sends SIGTERM on posix. process_handle.kill() except: pass if lsan: time.sleep(LSAN_ANALYSIS_TIME) output = b'\n'.join(process_output.output) output = utils.decode_to_unicode(output) # X Server hack when max client reached. if ('Maximum number of clients reached' in output or 'Unable to get connection to X server' in output): logs.log_error('Unable to connect to X server, exiting.') os.system('sudo killall -9 Xvfb blackbox >/dev/null 2>&1') sys.exit(0) if testcase_run and (crash_analyzer.is_memory_tool_crash(output) or crash_analyzer.is_check_failure_crash(output)): return_code = 1 # If a crash is found, then we add the memory state as well. if return_code and plt == 'ANDROID': ps_output = android.adb.get_ps_output() if ps_output: output += utils.get_line_seperator('Memory Statistics') output += ps_output if return_code: logs.log_warn('Process (%s) ended with exit code (%s).' % (repr(cmdline), str(return_code)), output=output) return return_code, round(time.time() - start_time, 1), output
def notify_closed_issue_if_testcase_is_open(policy, testcase, issue): """Notify closed issue if associated testcase is still open after a certain time period.""" needs_feedback_label = policy.label('needs_feedback') if not needs_feedback_label: return # If the testcase is already closed, no more work to do. if not testcase.open: return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is still open, no work needs to be done. Bail out. if issue.is_open: return # If we have already passed our deadline based on issue closed timestamp, # no need to notify. We will close the testcase instead. if (issue.closed_time and not dates.time_has_expired( issue.closed_time, days=data_types.NOTIFY_CLOSED_BUG_WITH_OPEN_TESTCASE_DEADLINE)): return # Check if there is ignore label on issue already. If yes, bail out. if issue_tracker_utils.was_label_added(issue, policy.label('ignore')): return # Check if we did add the notification comment already. If yes, bail out. if issue_tracker_utils.was_label_added(issue, needs_feedback_label): return issue.labels.add(needs_feedback_label) if issue.status in [policy.status('fixed'), policy.status('verified')]: issue_comment = ( 'ClusterFuzz testcase {id} is still reproducing on tip-of-tree build ' '(trunk).\n\nPlease re-test your fix against this testcase and if the ' 'fix was incorrect or incomplete, please re-open the bug.' ).format(id=testcase.key.id()) wrong_label = policy.label('wrong') if wrong_label: issue_comment += ( (' Otherwise, ignore this notification and add the ' '{label_text}.' ).format(label_text=issue.issue_tracker.label_text(wrong_label))) else: # Covers WontFix, Archived cases. issue_comment = ( 'ClusterFuzz testcase {id} is still reproducing on tip-of-tree build ' '(trunk).\n\nIf this testcase was not reproducible locally or ' 'unworkable, ignore this notification and we will file another ' 'bug soon with hopefully a better and workable testcase.\n\n'.format( id=testcase.key.id())) ignore_label = policy.label('ignore') if ignore_label: issue_comment += ( 'Otherwise, if this is not intended to be fixed (e.g. this is an ' 'intentional crash), please add the {label_text} to ' 'prevent future bug filing with similar crash stacktrace.'.format( label_text=issue.issue_tracker.label_text(ignore_label))) issue.save(new_comment=issue_comment, notify=True) logs.log('Notified closed issue for open testcase %d.' % testcase.key.id())
def mark_issue_as_closed_if_testcase_is_fixed(policy, testcase, issue): """Mark an issue as fixed if all of its associated reproducible testcase are fixed.""" verified_label = policy.label('verified') if not verified_label: return # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is closed in a status other than Fixed, like Duplicate, WontFix # or Archived, we shouldn't change it. Bail out. if not issue.is_open and issue.status != policy.status('fixed'): return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # If the testcase is still open, no work needs to be done. Bail out. if testcase.open: return # FIXME: Find a better solution to skip over reproducible tests that are now # showing up a flaky (esp when we are unable to reproduce crash in original # crash revision). if testcase.fixed == 'NA': return # We can only verify fixed issues for reproducible testcases. If the testcase # is unreproducible, bail out. Exception is if we explicitly marked this as # fixed. if testcase.one_time_crasher_flag and testcase.fixed != 'Yes': return # Make sure that no other testcases associated with this issue are open. similar_testcase = data_types.Testcase.query( data_types.Testcase.bug_information == testcase.bug_information, ndb_utils.is_true(data_types.Testcase.open), ndb_utils.is_false(data_types.Testcase.one_time_crasher_flag)).get() if similar_testcase: return # As a last check, do the expensive call of actually checking all issue # comments to make sure we didn't do the verification already and we didn't # get called out on issue mistriage. if (issue_tracker_utils.was_label_added(issue, verified_label) or issue_tracker_utils.was_label_added(issue, policy.label('wrong'))): return issue.labels.add(verified_label) comment = 'ClusterFuzz testcase %d is verified as fixed' % testcase.key.id() fixed_range_url = data_handler.get_fixed_range_url(testcase) if fixed_range_url: comment += ' in ' + fixed_range_url else: comment += '.' if utils.is_oss_fuzz(): comment += OSS_FUZZ_INCORRECT_COMMENT else: comment = _append_generic_incorrect_comment(comment, policy, issue, ' and re-open the issue.') skip_auto_close = data_handler.get_value_from_job_definition( testcase.job_type, 'SKIP_AUTO_CLOSE_ISSUE') if not skip_auto_close: issue.status = policy.status('verified') issue.save(new_comment=comment, notify=True) logs.log('Mark issue %d as verified for fixed testcase %d.' % (issue.id, testcase.key.id()))
def archive_testcase_and_dependencies_in_gcs(resource_list, testcase_path): """Archive testcase and its dependencies, and store in blobstore.""" if not os.path.exists(testcase_path): logs.log_error('Unable to find testcase %s.' % testcase_path) return None, None, None, None absolute_filename = testcase_path archived = False zip_filename = None zip_path = None if not resource_list: resource_list = [] # Add resource dependencies based on testcase path. These include # stuff like extensions directory, dependency files, etc. resource_list.extend( testcase_manager.get_resource_dependencies(testcase_path)) # Filter out duplicates, directories, and files that do not exist. resource_list = utils.filter_file_list(resource_list) logs.log('Testcase and related files :\n%s' % str(resource_list)) if len(resource_list) <= 1: # If this does not have any resources, just save the testcase. # TODO(flowerhack): Update this when we teach CF how to download testcases. try: file_handle = open(testcase_path, 'rb') except IOError: logs.log_error('Unable to open testcase %s.' % testcase_path) return None, None, None, None else: # If there are resources, create an archive. # Find the common root directory for all of the resources. # Assumption: resource_list[0] is the testcase path. base_directory_list = resource_list[0].split(os.path.sep) for list_index in range(1, len(resource_list)): current_directory_list = resource_list[list_index].split(os.path.sep) length = min(len(base_directory_list), len(current_directory_list)) for directory_index in range(length): if (current_directory_list[directory_index] != base_directory_list[directory_index]): base_directory_list = base_directory_list[0:directory_index] break base_directory = os.path.sep.join(base_directory_list) logs.log('Subresource common base directory: %s' % base_directory) if base_directory: # Common parent directory, archive sub-paths only. base_len = len(base_directory) + len(os.path.sep) else: # No common parent directory, archive all paths as it-is. base_len = 0 # Prepare the filename for the archive. zip_filename, _ = os.path.splitext(os.path.basename(testcase_path)) zip_filename += _TESTCASE_ARCHIVE_EXTENSION # Create the archive. zip_path = os.path.join(environment.get_value('INPUT_DIR'), zip_filename) zip_file = zipfile.ZipFile(zip_path, 'w') for file_name in resource_list: if os.path.exists(file_name): relative_filename = file_name[base_len:] zip_file.write(file_name, relative_filename, zipfile.ZIP_DEFLATED) zip_file.close() try: file_handle = open(zip_path, 'rb') except IOError: logs.log_error('Unable to open testcase archive %s.' % zip_path) return None, None, None, None archived = True absolute_filename = testcase_path[base_len:] fuzzed_key = blobs.write_blob(file_handle) file_handle.close() # Don't need the archive after writing testcase to blobstore. if zip_path: shell.remove_file(zip_path) return fuzzed_key, archived, absolute_filename, zip_filename
def remove_cache_file_and_metadata(cache_file_path): """Removes cache file and its metadata.""" logs.log('Removing cache file %s and its metadata.' % cache_file_path) shell.remove_file(get_cache_file_metadata_path(cache_file_path)) shell.remove_file(cache_file_path)
def main(argv): """Run libFuzzer as specified by argv.""" atexit.register(fuzzer_utils.cleanup) # Initialize variables. arguments = argv[1:] testcase_file_path = arguments.pop(0) target_name = arguments.pop(0) fuzzer_name = data_types.fuzz_target_project_qualified_name( utils.current_project(), target_name) # Initialize log handler. logs.configure( 'run_fuzzer', { 'fuzzer': fuzzer_name, 'engine': 'libFuzzer', 'job_name': environment.get_value('JOB_NAME') }) profiler.start_if_needed('libfuzzer_launcher') # Make sure that the fuzzer binary exists. build_directory = environment.get_value('BUILD_DIR') fuzzer_path = engine_common.find_fuzzer_path(build_directory, target_name) if not fuzzer_path: # This is an expected case when doing regression testing with old builds # that do not have that fuzz target. It can also happen when a host sends a # message to an untrusted worker that just restarted and lost information on # build directory. logs.log_warn('Could not find fuzz target %s.' % target_name) return # Install signal handler. signal.signal(signal.SIGTERM, engine_common.signal_term_handler) # Set up temp dir. engine_common.recreate_directory(fuzzer_utils.get_temp_dir()) # Setup minijail if needed. use_minijail = environment.get_value('USE_MINIJAIL') runner = libfuzzer.get_runner(fuzzer_path, temp_dir=fuzzer_utils.get_temp_dir()) if use_minijail: minijail_chroot = runner.chroot else: minijail_chroot = None # Get corpus directory. corpus_directory = environment.get_value('FUZZ_CORPUS_DIR') # Add common arguments which are necessary to be used for every run. arguments = expand_with_common_arguments(arguments) # Add sanitizer options to environment that were specified in the .options # file and options that this script requires. set_sanitizer_options(fuzzer_path) # Minimize test argument. minimize_to = fuzzer_utils.extract_argument(arguments, MINIMIZE_TO_ARGUMENT) minimize_timeout = fuzzer_utils.extract_argument( arguments, MINIMIZE_TIMEOUT_ARGUMENT) if minimize_to and minimize_timeout: minimize_testcase(runner, testcase_file_path, minimize_to, int(minimize_timeout), arguments, use_minijail) return # Cleanse argument. cleanse_to = fuzzer_utils.extract_argument(arguments, CLEANSE_TO_ARGUMENT) cleanse_timeout = fuzzer_utils.extract_argument(arguments, CLEANSE_TIMEOUT_ARGUMENT) if cleanse_to and cleanse_timeout: cleanse_testcase(runner, testcase_file_path, cleanse_to, int(cleanse_timeout), arguments, use_minijail) return # If we don't have a corpus, then that means this is not a fuzzing run. # TODO(flowerhack): Implement this to properly load past testcases. if not corpus_directory and environment.platform() != 'FUCHSIA': load_testcase_if_exists(runner, testcase_file_path, fuzzer_name, use_minijail, arguments) return # We don't have a crash testcase, fuzz. # Check dict argument to make sure that it's valid. dict_argument = fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG, remove=False) if dict_argument and not os.path.exists(dict_argument): logs.log_error('Invalid dict %s for %s.' % (dict_argument, fuzzer_name)) fuzzer_utils.extract_argument(arguments, constants.DICT_FLAG) # If there's no dict argument, check for %target_binary_name%.dict file. if (not fuzzer_utils.extract_argument( arguments, constants.DICT_FLAG, remove=False)): default_dict_path = dictionary_manager.get_default_dictionary_path( fuzzer_path) if os.path.exists(default_dict_path): arguments.append(constants.DICT_FLAG + default_dict_path) # Strategy pool is the list of strategies that we attempt to enable, whereas # fuzzing strategies is the list of strategies that are enabled. (e.g. if # mutator is selected in the pool, but not available for a given target, it # would not be added to fuzzing strategies.) strategy_pool = strategy_selection.generate_weighted_strategy_pool() fuzzing_strategies = [] # Select a generator to use for existing testcase mutations. generator = _select_generator(strategy_pool, fuzzer_path) is_mutations_run = generator != Generator.NONE # Timeout for fuzzer run. fuzz_timeout = get_fuzz_timeout(is_mutations_run) # Set up scratch directory for writing new units. new_testcases_directory = create_corpus_directory('new') # Get list of corpus directories. # TODO(flowerhack): Implement this to handle corpus sync'ing. if environment.platform() == 'FUCHSIA': corpus_directories = [] else: corpus_directories = get_corpus_directories( corpus_directory, new_testcases_directory, fuzzer_path, fuzzing_strategies, strategy_pool, minijail_chroot) # Bind corpus directories in minijail. if use_minijail: artifact_prefix = constants.ARTIFACT_PREFIX_FLAG + '/' else: artifact_prefix = '%s%s/' % (constants.ARTIFACT_PREFIX_FLAG, os.path.abspath( os.path.dirname(testcase_file_path))) # Generate new testcase mutations using radamsa, etc. if is_mutations_run: new_testcase_mutations_directory = generate_new_testcase_mutations( corpus_directory, fuzzer_name, generator, fuzzing_strategies) corpus_directories.append(new_testcase_mutations_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [new_testcase_mutations_directory]) if strategy_pool.do_strategy(strategy.RANDOM_MAX_LENGTH_STRATEGY): max_len_argument = fuzzer_utils.extract_argument( arguments, constants.MAX_LEN_FLAG, remove=False) if not max_len_argument: max_length = random.SystemRandom().randint( 1, MAX_VALUE_FOR_MAX_LENGTH) arguments.append('%s%d' % (constants.MAX_LEN_FLAG, max_length)) fuzzing_strategies.append(strategy.RANDOM_MAX_LENGTH_STRATEGY.name) if (strategy_pool.do_strategy(strategy.RECOMMENDED_DICTIONARY_STRATEGY) and add_recommended_dictionary(arguments, fuzzer_name, fuzzer_path)): fuzzing_strategies.append( strategy.RECOMMENDED_DICTIONARY_STRATEGY.name) if strategy_pool.do_strategy(strategy.VALUE_PROFILE_STRATEGY): arguments.append(constants.VALUE_PROFILE_ARGUMENT) fuzzing_strategies.append(strategy.VALUE_PROFILE_STRATEGY.name) if strategy_pool.do_strategy(strategy.FORK_STRATEGY): max_fuzz_threads = environment.get_value('MAX_FUZZ_THREADS', 1) num_fuzz_processes = max( 1, multiprocessing.cpu_count() // max_fuzz_threads) arguments.append('%s%d' % (constants.FORK_FLAG, num_fuzz_processes)) fuzzing_strategies.append( '%s_%d' % (strategy.FORK_STRATEGY.name, num_fuzz_processes)) extra_env = {} if (strategy_pool.do_strategy(strategy.MUTATOR_PLUGIN_STRATEGY) and use_mutator_plugin(target_name, extra_env, minijail_chroot)): fuzzing_strategies.append(strategy.MUTATOR_PLUGIN_STRATEGY.name) # Execute the fuzzer binary with original arguments. fuzz_result = runner.fuzz(corpus_directories, fuzz_timeout=fuzz_timeout, additional_args=arguments + [artifact_prefix], extra_env=extra_env) if (not use_minijail and fuzz_result.return_code == constants.LIBFUZZER_ERROR_EXITCODE): # Minijail returns 1 if the exit code is nonzero. # Otherwise: we can assume that a return code of 1 means that libFuzzer # itself ran into an error. logs.log_error(ENGINE_ERROR_MESSAGE, engine_output=fuzz_result.output) log_lines = fuzz_result.output.splitlines() # Output can be large, so save some memory by removing reference to the # original output which is no longer needed. fuzz_result.output = None # Check if we crashed, and get the crash testcase path. crash_testcase_file_path = None for line in log_lines: match = re.match(CRASH_TESTCASE_REGEX, line) if match: crash_testcase_file_path = match.group(1) break if crash_testcase_file_path: # Write the new testcase. if use_minijail: # Convert chroot relative path to host path. Remove the leading '/' before # joining. crash_testcase_file_path = os.path.join( minijail_chroot.directory, crash_testcase_file_path[1:]) # Copy crash testcase contents into the main testcase path. shutil.move(crash_testcase_file_path, testcase_file_path) # Print the command output. log_header_format = ('Command: %s\n' 'Bot: %s\n' 'Time ran: %f\n') bot_name = environment.get_value('BOT_NAME', '') command = fuzz_result.command if use_minijail: # Remove minijail prefix. command = engine_common.strip_minijail_command(command, fuzzer_path) print(log_header_format % (engine_common.get_command_quoted(command), bot_name, fuzz_result.time_executed)) # Parse stats information based on libFuzzer output. parsed_stats = parse_log_stats(log_lines) # Extend parsed stats by additional performance features. parsed_stats.update( stats.parse_performance_features(log_lines, fuzzing_strategies, arguments)) # Set some initial stat overrides. timeout_limit = fuzzer_utils.extract_argument(arguments, constants.TIMEOUT_FLAG, remove=False) expected_duration = runner.get_max_total_time(fuzz_timeout) actual_duration = int(fuzz_result.time_executed) fuzzing_time_percent = 100 * actual_duration / float(expected_duration) stat_overrides = { 'timeout_limit': int(timeout_limit), 'expected_duration': expected_duration, 'actual_duration': actual_duration, 'fuzzing_time_percent': fuzzing_time_percent, } # Remove fuzzing arguments before merge and dictionary analysis step. remove_fuzzing_arguments(arguments) # Make a decision on whether merge step is needed at all. If there are no # new units added by libFuzzer run, then no need to do merge at all. new_units_added = shell.get_directory_file_count(new_testcases_directory) merge_error = None if new_units_added: # Merge the new units with the initial corpus. if corpus_directory not in corpus_directories: corpus_directories.append(corpus_directory) # If this times out, it's possible that we will miss some units. However, if # we're taking >10 minutes to load/merge the corpus something is going very # wrong and we probably don't want to make things worse by adding units # anyway. merge_tmp_dir = None if not use_minijail: merge_tmp_dir = os.path.join(fuzzer_utils.get_temp_dir(), 'merge_workdir') engine_common.recreate_directory(merge_tmp_dir) old_corpus_len = shell.get_directory_file_count(corpus_directory) merge_directory = create_merge_directory() corpus_directories.insert(0, merge_directory) if use_minijail: bind_corpus_dirs(minijail_chroot, [merge_directory]) merge_result = runner.merge( corpus_directories, merge_timeout=engine_common.get_merge_timeout( DEFAULT_MERGE_TIMEOUT), tmp_dir=merge_tmp_dir, additional_args=arguments) move_mergeable_units(merge_directory, corpus_directory) new_corpus_len = shell.get_directory_file_count(corpus_directory) new_units_added = 0 merge_error = None if merge_result.timed_out: merge_error = 'Merging new testcases timed out:' elif merge_result.return_code != 0: merge_error = 'Merging new testcases failed:' else: new_units_added = new_corpus_len - old_corpus_len stat_overrides['new_units_added'] = new_units_added if merge_result.output: stat_overrides.update( stats.parse_stats_from_merge_log( merge_result.output.splitlines())) else: stat_overrides['new_units_added'] = 0 logs.log('Skipped corpus merge since no new units added by fuzzing.') # Get corpus size after merge. This removes the duplicate units that were # created during this fuzzing session. # TODO(flowerhack): Remove this workaround once we can handle corpus sync. if environment.platform() != 'FUCHSIA': stat_overrides['corpus_size'] = shell.get_directory_file_count( corpus_directory) # Delete all corpus directories except for the main one. These were temporary # directories to store new testcase mutations and have already been merged to # main corpus directory. if corpus_directory in corpus_directories: corpus_directories.remove(corpus_directory) for directory in corpus_directories: shutil.rmtree(directory, ignore_errors=True) if use_minijail: unbind_corpus_dirs(minijail_chroot, corpus_directories) # Apply overridden stats to the parsed stats prior to dumping. parsed_stats.update(stat_overrides) # Dump stats data for further uploading to BigQuery. engine_common.dump_big_query_data(parsed_stats, testcase_file_path, LIBFUZZER_PREFIX, fuzzer_name, command) # Add custom crash state based on fuzzer name (if needed). add_custom_crash_state_if_needed(fuzzer_name, log_lines, parsed_stats) for line in log_lines: print(line) # Add fuzzing strategies used. engine_common.print_fuzzing_strategies(fuzzing_strategies) # Add merge error (if any). if merge_error: print(data_types.CRASH_STACKTRACE_END_MARKER) print(merge_error) print( 'Command:', get_printable_command(merge_result.command, fuzzer_path, use_minijail)) print(merge_result.output) analyze_and_update_recommended_dictionary(runner, fuzzer_name, log_lines, corpus_directory, arguments) # Close minijail chroot. if use_minijail: minijail_chroot.close() # Record the stats to make them easily searchable in stackdriver. if new_units_added: logs.log('New units added to corpus: %d.' % new_units_added, stats=parsed_stats) else: logs.log('No new units found.', stats=parsed_stats)
def store_file_in_cache(file_path, cached_files_per_directory_limit=True, force_update=False): """Get file from nfs cache if available.""" if not os.path.exists(file_path): logs.log_error( 'Local file %s does not exist, nothing to store in cache.' % file_path) return if os.path.getsize(file_path) > CACHE_SIZE_LIMIT: logs.log('File %s is too large to store in cache, skipping.' % file_path) return nfs_root = environment.get_value('NFS_ROOT') if not nfs_root: # No NFS, nothing to store in cache. return # If NFS server is not available due to heavy load, skip storage operation # altogether as we would fail to store file. if not os.path.exists(os.path.join(nfs_root, '.')): # Use . to iterate mount. logs.log_warn('Cache %s not available.' % nfs_root) return cache_file_path = get_cache_file_path(file_path) cache_directory = os.path.dirname(cache_file_path) filename = os.path.basename(file_path) if not os.path.exists(cache_directory): if not shell.create_directory(cache_directory, create_intermediates=True): logs.log_error('Failed to create cache directory %s.' % cache_directory) return # Check if the file already exists in cache. if file_exists_in_cache(cache_file_path): if not force_update: return # If we are forcing update, we need to remove current cached file and its # metadata. remove_cache_file_and_metadata(cache_file_path) # Delete old cached files beyond our maximum storage limit. if cached_files_per_directory_limit: # Get a list of cached files. cached_files_list = [] for cached_filename in os.listdir(cache_directory): if cached_filename.endswith(CACHE_METADATA_FILE_EXTENSION): continue cached_file_path = os.path.join(cache_directory, cached_filename) cached_files_list.append(cached_file_path) mtime = lambda f: os.stat(f).st_mtime last_used_cached_files_list = list( sorted(cached_files_list, key=mtime, reverse=True)) for cached_file_path in ( last_used_cached_files_list[MAX_CACHED_FILES_PER_DIRECTORY - 1:]): remove_cache_file_and_metadata(cached_file_path) # Start storing the actual file in cache now. logs.log('Started storing file %s into cache.' % filename) # Fetch lock to store this file. Try only once since if any other bot has # started to store it, we don't need to do it ourselves. Just bail out. lock_name = 'store:cache_file:%s' % utils.string_hash(cache_file_path) if not locks.acquire_lock(lock_name, max_hold_seconds=CACHE_LOCK_TIMEOUT, retries=1, by_zone=True): logs.log_warn( 'Unable to fetch lock to update cache file %s, skipping.' % filename) return # Check if another bot already updated it. if file_exists_in_cache(cache_file_path): locks.release_lock(lock_name, by_zone=True) return shell.copy_file(file_path, cache_file_path) write_cache_file_metadata(cache_file_path, file_path) time.sleep(CACHE_COPY_WAIT_TIME) error_occurred = not file_exists_in_cache(cache_file_path) locks.release_lock(lock_name, by_zone=True) if error_occurred: logs.log_error('Failed to store file %s into cache.' % filename) else: logs.log('Completed storing file %s into cache.' % filename)
def _log_output(revision, crash_result): """Log process output.""" logs.log('Testing %s.' % revision, revision=revision, output=crash_result.get_stacktrace(symbolized=True))
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = storage.get_cloud_storage_file_path(bucket_name, blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))
def get(self): """Handle a get request.""" try: grouper.group_testcases() except: logs.log_error('Error occurred while grouping test cases.') return # Free up memory after group task run. utils.python_gc() # Get list of jobs excluded from bug filing. excluded_jobs = get_excluded_jobs() for testcase_id in data_handler.get_open_testcase_id_iterator(): try: testcase = data_handler.get_testcase_by_id(testcase_id) except errors.InvalidTestcaseError: # Already deleted. continue # Skip if testcase's job type is in exclusions list. if testcase.job_type in excluded_jobs: continue # Skip if we are running progression task at this time. if testcase.get_metadata('progression_pending'): continue # If the testcase has a bug filed already, no triage is needed. if is_bug_filed(testcase): continue # Check if the crash is important, i.e. it is either a reproducible crash # or an unreproducible crash happening frequently. if not is_crash_important(testcase): continue # Require that all tasks like minimizaton, regression testing, etc have # finished. if not data_handler.critical_tasks_completed(testcase): continue # For testcases that are not part of a group, wait an additional time till # group task completes. # FIXME: In future, grouping might be dependent on regression range, so we # would have to add an additional wait time. if not testcase.group_id and not dates.time_has_expired( testcase.timestamp, hours=data_types.MIN_ELAPSED_TIME_SINCE_REPORT): continue # If this project does not have an associated issue tracker, we cannot # file this crash anywhere. issue_tracker = issue_tracker_utils.get_issue_tracker_for_testcase( testcase) if not issue_tracker: continue # If there are similar issues to this test case already filed or recently # closed, skip filing a duplicate bug. if is_similar_bug_open_or_recently_closed(testcase, issue_tracker): continue # File the bug first and then create filed bug metadata. issue_filer.file_issue(testcase, issue_tracker) create_filed_bug_metadata(testcase) logs.log('Filed new issue %s for testcase %d.' % (testcase.bug_information, testcase_id))
def check_for_bad_build(job_type, crash_revision): """Return true if the build is bad, i.e. crashes on startup.""" # Check the bad build check flag to see if we want do this. if not environment.get_value('BAD_BUILD_CHECK'): return False # Create a blank command line with no file to run and no http. command = get_command_line_for_application(file_to_run='', needs_http=False) # When checking for bad builds, we use the default window size. # We don't want to pick a custom size since it can potentially cause a # startup crash and cause a build to be detected incorrectly as bad. default_window_argument = environment.get_value('WINDOW_ARG', '') if default_window_argument: command = command.replace(' %s' % default_window_argument, '') # TSAN is slow, and boots slow on first startup. Increase the warmup # timeout for this case. if environment.tool_matches('TSAN', job_type): fast_warmup_timeout = environment.get_value('WARMUP_TIMEOUT') else: fast_warmup_timeout = environment.get_value('FAST_WARMUP_TIMEOUT') # Initialize helper variables. is_bad_build = False build_run_console_output = '' app_directory = environment.get_value('APP_DIR') # Exit all running instances. process_handler.terminate_stale_application_instances() # Check if the build is bad. return_code, crash_time, output = process_handler.run_process( command, timeout=fast_warmup_timeout, current_working_directory=app_directory) crash_result = CrashResult(return_code, crash_time, output) # 1. Need to account for startup crashes with no crash state. E.g. failed to # load shared library. So, ignore state for comparison. # 2. Ignore leaks as they don't block a build from reporting regular crashes # and also don't impact regression range calculations. if (crash_result.is_crash(ignore_state=True) and not crash_result.should_ignore() and not crash_result.get_type() in ['Direct-leak', 'Indirect-leak']): is_bad_build = True build_run_console_output = utils.get_crash_stacktrace_output( command, crash_result.get_stacktrace(symbolized=True), crash_result.get_stacktrace(symbolized=False)) logs.log('Bad build for %s detected at r%d.' % (job_type, crash_revision), output=build_run_console_output) # Exit all running instances. process_handler.terminate_stale_application_instances() # Any of the conditions below indicate that bot is in a bad state and it is # not caused by the build itself. In that case, just exit. build_state = data_handler.get_build_state(job_type, crash_revision) if is_bad_build and utils.sub_string_exists_in(BAD_STATE_HINTS, output): logs.log_fatal_and_exit( 'Bad bot environment detected, exiting.', output=build_run_console_output, snapshot=process_handler.get_runtime_snapshot()) # If none of the other bots have added information about this build, # then add it now. if (build_state == data_types.BuildState.UNMARKED and not crash_result.should_ignore()): data_handler.add_build_metadata(job_type, crash_revision, is_bad_build, build_run_console_output) return is_bad_build
def mark_issue_as_closed_if_testcase_is_fixed(testcase, issue): """Mark an issue as fixed if all of its associated reproducible testcase are fixed.""" # If there is no associated issue, then bail out. if not issue or not testcase.bug_information: return # If the issue is closed in a status other than Fixed, like Duplicate, WontFix # or Archived, we shouldn't change it. Bail out. if not issue.open and issue.status != 'Fixed': return # Check testcase status, so as to skip unreproducible uploads. if testcase.status not in ['Processed', 'Duplicate']: return # If the testcase is still open, no work needs to be done. Bail out. if testcase.open: return # FIXME: Find a better solution to skip over reproducible tests that are now # showing up a flaky (esp when we are unable to reproduce crash in original # crash revision). if testcase.fixed == 'NA': return # We can only verify fixed issues for reproducible testcases. If the testcase # is unreproducible, bail out. Exception is if we explicitly marked this as # fixed. if testcase.one_time_crasher_flag and testcase.fixed != 'Yes': return # Make sure that no other testcases associated with this issue are open. similar_testcase = data_types.Testcase.query( data_types.Testcase.bug_information == testcase.bug_information, ndb_utils.is_true(data_types.Testcase.open), ndb_utils.is_false(data_types.Testcase.one_time_crasher_flag)).get() if similar_testcase: return # As a last check, do the expensive call of actually checking all issue # comments to make sure we didn't do the verification already and we didn't # get called out on issue mistriage. if (issue.has_comment_with_label(data_types.ISSUE_VERIFIED_LABEL) or issue.has_comment_with_label(data_types.ISSUE_MISTRIAGED_LABEL)): return issue.add_label(data_types.ISSUE_VERIFIED_LABEL) comment = ('ClusterFuzz testcase %d is verified as fixed, ' 'so closing issue as verified.' % testcase.key.id()) if utils.is_oss_fuzz(): comment += OSS_FUZZ_INCORRECT_COMMENT else: comment += INTERNAL_INCORRECT_COMMENT comment += ' and re-open the issue.' issue.comment = comment issue.status = 'Verified' issue.open = False issue.save(send_email=True) logs.log('Closed issue %d for fixed testcase %d.' % (issue.id, testcase.key.id()))
def reproduce_with_retries(self, retries, expected_state=None, expected_security_flag=None, flaky_stacktrace=False): """Try reproducing a crash with retries.""" self._pre_run_cleanup() crash_result = None for round_number in range(1, retries + 1): crash_result = self.run(round_number) state = self._get_crash_state(round_number, crash_result) if not expected_state: logs.log('Crash stacktrace comparison skipped.') return crash_result if crash_result.should_ignore(): logs.log( 'Crash stacktrace matched ignore signatures, ignored.') continue if crash_result.is_security_issue() != expected_security_flag: logs.log('Crash security flag does not match, ignored.') continue if flaky_stacktrace: logs.log( 'Crash stacktrace is marked flaky, skipping comparison.') return crash_result crash_comparer = CrashComparer(state.crash_state, expected_state) if crash_comparer.is_similar(): logs.log('Crash stacktrace is similar to original stacktrace.') return crash_result else: logs.log( 'Crash stacktrace does not match original stacktrace.') logs.log('Didn\'t crash at all.') return CrashResult(return_code=0, crash_time=0, output=crash_result.output)
def wait_until_good_state(): """Check battery and make sure it is charged beyond minimum level and temperature thresholds.""" # Battery levels are not applicable on GCE. if adb.is_gce(): return # Make sure device is online. adb.wait_for_device() # Skip battery check if done recently. last_battery_check_time = persistent_cache.get_value( LAST_BATTERY_CHECK_TIME_KEY, constructor=datetime.datetime.utcfromtimestamp) if last_battery_check_time and not dates.time_has_expired( last_battery_check_time, seconds=BATTERY_CHECK_INTERVAL): return # Initialize variables. battery_level_threshold = environment.get_value( 'LOW_BATTERY_LEVEL_THRESHOLD', LOW_BATTERY_LEVEL_THRESHOLD) battery_temperature_threshold = environment.get_value( 'MAX_BATTERY_TEMPERATURE_THRESHOLD', MAX_BATTERY_TEMPERATURE_THRESHOLD) device_restarted = False while True: battery_information = get_battery_level_and_temperature() if battery_information is None: logs.log_error( 'Failed to get battery information, skipping check.') return battery_level = battery_information['level'] battery_temperature = battery_information['temperature'] logs.log( 'Battery information: level (%d%%), temperature (%.1f celsius).' % (battery_level, battery_temperature)) if (battery_level >= battery_level_threshold and battery_temperature <= battery_temperature_threshold): persistent_cache.set_value(LAST_BATTERY_CHECK_TIME_KEY, time.time()) return logs.log('Battery in bad battery state, putting device in sleep mode.') if not device_restarted: adb.reboot() device_restarted = True # Change thresholds to expected levels (only if they were below minimum # thresholds). if battery_level < battery_level_threshold: battery_level_threshold = EXPECTED_BATTERY_LEVEL if battery_temperature > battery_temperature_threshold: battery_temperature_threshold = EXPECTED_BATTERY_TEMPERATURE # Stopping shell should help with shutting off a lot of services that would # otherwise use up the battery. However, we need to turn it back on to get # battery status information. adb.stop_shell() time.sleep(BATTERY_CHARGE_INTERVAL) adb.start_shell()
def update_data_bundle(fuzzer, data_bundle): """Updates a data bundle to the latest version.""" # This module can't be in the global imports due to appengine issues # with multiprocessing and psutil imports. from google_cloud_utils import gsutil # If we are using a data bundle on NFS, it is expected that our testcases # will usually be large enough that we would fill up our tmpfs directory # pretty quickly. So, change it to use an on-disk directory. if not data_bundle.is_local: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) data_bundle_directory = get_data_bundle_directory(fuzzer.name) if not data_bundle_directory: logs.log_error('Failed to setup data bundle %s.' % data_bundle.name) return False if not shell.create_directory( data_bundle_directory, create_intermediates=True): logs.log_error( 'Failed to create data bundle %s directory.' % data_bundle.name) return False # Check if data bundle is up to date. If yes, skip the update. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Data bundle was recently synced, skip.') return True # Fetch lock for this data bundle. if not _fetch_lock_for_data_bundle_update(data_bundle): logs.log_error('Failed to lock data bundle %s.' % data_bundle.name) return False # Re-check if another bot did the sync already. If yes, skip. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Another bot finished the sync, skip.') _release_lock_for_data_bundle_update(data_bundle) return True time_before_sync_start = time.time() # No need to sync anything if this is a search index data bundle. In that # case, the fuzzer will generate testcases from a gcs bucket periodically. if not _is_search_index_data_bundle(data_bundle.name): bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import corpus_manager from bot.untrusted_runner import file_host worker_data_bundle_directory = file_host.rebase_to_worker_root( data_bundle_directory) file_host.create_directory( worker_data_bundle_directory, create_intermediates=True) result = corpus_manager.RemoteGSUtilRunner().rsync( bucket_url, worker_data_bundle_directory, delete=False) else: result = gsutil.GSUtilRunner().rsync( bucket_url, data_bundle_directory, delete=False) if result.return_code != 0: logs.log_error('Failed to sync data bundle %s: %s.' % (data_bundle.name, result.output)) _release_lock_for_data_bundle_update(data_bundle) return False # Update the testcase list file. testcase_manager.create_testcase_list_file(data_bundle_directory) # Write last synced time in the sync file. sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) utils.write_data_to_file(time_before_sync_start, sync_file_path) if environment.is_trusted_host() and data_bundle.sync_to_worker: from bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) file_host.copy_file_to_worker(sync_file_path, worker_sync_file_path) # Release acquired lock. _release_lock_for_data_bundle_update(data_bundle) return True
def get_newer_source_revision(): """Returns the latest source revision if there is an update, or None if the current source is up to date.""" if (environment.get_value('LOCAL_SRC') or environment.get_value('LOCAL_DEVELOPMENT')): logs.log('Using local source, skipping source code update.') return None root_directory = environment.get_value('ROOT_DIR') temp_directory = environment.get_value('BOT_TMPDIR') source_manifest_url = get_source_manifest_url() if (not get_source_url() or not source_manifest_url or not temp_directory or not root_directory): logs.log('Skipping source code update.') return None logs.log('Checking source code for updates.') try: source_version = get_remote_source_revision(source_manifest_url) except Exception: logs.log_error('Error occurred while checking source version.') return None local_source_version = get_local_source_revision() if not local_source_version: logs.log('No manifest found. Forcing an update.') return source_version logs.log('Local source code version: %s.' % local_source_version) logs.log('Remote source code version: %s.' % source_version) if local_source_version >= source_version: logs.log('Remote souce code <= local source code. No update.') # No source code update found. Source code is current, bail out. return None return source_version
def update_fuzzer_and_data_bundles(fuzzer_name): """Update the fuzzer with a given name if necessary.""" fuzzer = data_types.Fuzzer.query(data_types.Fuzzer.name == fuzzer_name).get() if not fuzzer: logs.log_error('No fuzzer exists with name %s.' % fuzzer_name) raise errors.InvalidFuzzerError # Set some helper environment variables. fuzzer_directory = get_fuzzer_directory(fuzzer_name) environment.set_value('FUZZER_DIR', fuzzer_directory) environment.set_value('UNTRUSTED_CONTENT', fuzzer.untrusted_content) # If the fuzzer generates large testcases or a large number of small ones # that don't fit on tmpfs, then use the larger disk directory. if fuzzer.has_large_testcases: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) # Adjust the test timeout, if user has provided one. if fuzzer.timeout: environment.set_value('TEST_TIMEOUT', fuzzer.timeout) # Increase fuzz test timeout if the fuzzer timeout is higher than its # current value. fuzz_test_timeout = environment.get_value('FUZZ_TEST_TIMEOUT') if fuzz_test_timeout and fuzz_test_timeout < fuzzer.timeout: environment.set_value('FUZZ_TEST_TIMEOUT', fuzzer.timeout) # Adjust the max testcases if this fuzzer has specified a lower limit. max_testcases = environment.get_value('MAX_TESTCASES') if fuzzer.max_testcases and fuzzer.max_testcases < max_testcases: environment.set_value('MAX_TESTCASES', fuzzer.max_testcases) # Check for updates to this fuzzer. version_file = os.path.join(fuzzer_directory, '.%s_version' % fuzzer_name) if (not fuzzer.builtin and revisions.needs_update(version_file, fuzzer.revision)): logs.log('Fuzzer update was found, updating.') # Clear the old fuzzer directory if it exists. if not shell.remove_directory(fuzzer_directory, recreate=True): logs.log_error('Failed to clear fuzzer directory.') return None # Copy the archive to local disk and unpack it. archive_path = os.path.join(fuzzer_directory, fuzzer.filename) if not blobs.read_blob_to_disk(fuzzer.blobstore_key, archive_path): logs.log_error('Failed to copy fuzzer archive.') return None try: archive.unpack(archive_path, fuzzer_directory) except Exception: error_message = ('Failed to unpack fuzzer archive %s ' '(bad archive or unsupported format).') % fuzzer.filename logs.log_error(error_message) fuzzer_logs.upload_script_log( 'Fatal error: ' + error_message, fuzzer_name=fuzzer_name) return None fuzzer_path = os.path.join(fuzzer_directory, fuzzer.executable_path) if not os.path.exists(fuzzer_path): error_message = ('Fuzzer executable %s not found. ' 'Check fuzzer configuration.') % fuzzer.executable_path logs.log_error(error_message) fuzzer_logs.upload_script_log( 'Fatal error: ' + error_message, fuzzer_name=fuzzer_name) return None # Make fuzzer executable. os.chmod(fuzzer_path, 0o750) # Cleanup unneeded archive. shell.remove_file(archive_path) # Save the current revision of this fuzzer in a file for later checks. revisions.write_revision_to_revision_file(version_file, fuzzer.revision) logs.log('Updated fuzzer to revision %d.' % fuzzer.revision) _clear_old_data_bundles_if_needed() # Setup data bundles associated with this fuzzer. data_bundles = ndb_utils.get_all_from_query( data_types.DataBundle.query( data_types.DataBundle.name == fuzzer.data_bundle_name)) for data_bundle in data_bundles: if not update_data_bundle(fuzzer, data_bundle): return None # Setup environment variable for launcher script path. if fuzzer.launcher_script: fuzzer_launcher_path = os.path.join(fuzzer_directory, fuzzer.launcher_script) environment.set_value('LAUNCHER_PATH', fuzzer_launcher_path) # For launcher script usecase, we need the entire fuzzer directory on the # worker. if environment.is_trusted_host(): from bot.untrusted_runner import file_host worker_fuzzer_directory = file_host.rebase_to_worker_root( fuzzer_directory) file_host.copy_directory_to_worker( fuzzer_directory, worker_fuzzer_directory, replace=True) return fuzzer
def _post_put_hook(self, _): logs.log('Updated testcase %d (bug %s).' % (self.key.id(), self.bug_information or '-'))
def execute_task(testcase_id, job_type): """Run analyze task.""" # Reset redzones. environment.reset_current_memory_tool_options(redzone_size=128) # Unset window location size and position properties so as to use default. environment.set_value('WINDOW_ARG', '') # Locate the testcase associated with the id. testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return data_handler.update_testcase_comment(testcase, data_types.TaskState.STARTED) metadata = data_types.TestcaseUploadMetadata.query( data_types.TestcaseUploadMetadata.testcase_id == int( testcase_id)).get() if not metadata: logs.log_error('Testcase %s has no associated upload metadata.' % testcase_id) testcase.key.delete() return is_lsan_enabled = environment.get_value('LSAN') if is_lsan_enabled: # Creates empty local blacklist so all leaks will be visible to uploader. leak_blacklist.create_empty_local_blacklist() # Store the bot name and timestamp in upload metadata. bot_name = environment.get_value('BOT_NAME') metadata.bot_name = bot_name metadata.timestamp = datetime.datetime.utcnow() metadata.put() # Adjust the test timeout, if user has provided one. if metadata.timeout: environment.set_value('TEST_TIMEOUT', metadata.timeout) # Adjust the number of retries, if user has provided one. if metadata.retries is not None: environment.set_value('CRASH_RETRIES', metadata.retries) # Setup testcase and get absolute testcase path. file_list, _, testcase_file_path = setup.setup_testcase(testcase) if not file_list: return # Set up a custom or regular build based on revision. build_manager.setup_build(testcase.crash_revision) # Check if we have an application path. If not, our build failed # to setup correctly. app_path = environment.get_value('APP_PATH') if not app_path: data_handler.update_testcase_comment(testcase, data_types.TaskState.ERROR, 'Build setup failed') if data_handler.is_first_retry_for_task(testcase): build_fail_wait = environment.get_value('FAIL_WAIT') tasks.add_task('analyze', testcase_id, job_type, wait_time=build_fail_wait) else: close_invalid_testcase_and_update_status(testcase, metadata, 'Build setup failed') return # Update initial testcase information. testcase.absolute_path = testcase_file_path testcase.job_type = job_type testcase.binary_flag = utils.is_binary_file(testcase_file_path) testcase.queue = tasks.default_queue() testcase.crash_state = '' # Set initial testcase metadata fields (e.g. build url, etc). data_handler.set_initial_testcase_metadata(testcase) # Update minimized arguments and use ones provided during user upload. if not testcase.minimized_arguments: minimized_arguments = environment.get_value('APP_ARGS') or '' additional_command_line_flags = testcase.get_metadata( 'uploaded_additional_args') if additional_command_line_flags: minimized_arguments += ' %s' % additional_command_line_flags environment.set_value('APP_ARGS', minimized_arguments) testcase.minimized_arguments = minimized_arguments # Update other fields not set at upload time. testcase.crash_revision = environment.get_value('APP_REVISION') data_handler.set_initial_testcase_metadata(testcase) testcase.put() # Initialize some variables. gestures = testcase.gestures http_flag = testcase.http_flag test_timeout = environment.get_value('TEST_TIMEOUT') # Get the crash output. result = tests.test_for_crash_with_retries(testcase, testcase_file_path, test_timeout, http_flag=http_flag, compare_crash=False) # If we don't get a crash, try enabling http to see if we can get a crash. # Skip engine fuzzer jobs (e.g. libFuzzer, AFL) for which http testcase paths # are not applicable. if (not result.is_crash() and not http_flag and not environment.is_engine_fuzzer_job()): result_with_http = tests.test_for_crash_with_retries( testcase, testcase_file_path, test_timeout, http_flag=True, compare_crash=False) if result_with_http.is_crash(): logs.log('Testcase needs http flag for crash.') http_flag = True result = result_with_http # Refresh our object. testcase = data_handler.get_testcase_by_id(testcase_id) if not testcase: return # Set application command line with the correct http flag. application_command_line = (tests.get_command_line_for_application( testcase_file_path, needs_http=http_flag)) # Get the crash data. crashed = result.is_crash() crash_time = result.get_crash_time() state = result.get_symbolized_data() unsymbolized_crash_stacktrace = result.get_stacktrace(symbolized=False) # Get crash info object with minidump info. Also, re-generate unsymbolized # stacktrace if needed. crash_info, _ = (crash_uploader.get_crash_info_and_stacktrace( application_command_line, state.crash_stacktrace, gestures)) if crash_info: testcase.minidump_keys = crash_info.store_minidump() if not crashed: # Could not reproduce the crash. log_message = ('Testcase didn\'t crash in %d seconds (with retries)' % test_timeout) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED, log_message) # For an unreproducible testcase, retry once on another bot to confirm # our results and in case this bot is in a bad state which we didn't catch # through our usual means. if data_handler.is_first_retry_for_task(testcase): testcase.status = 'Unreproducible, retrying' testcase.put() tasks.add_task('analyze', testcase_id, job_type) return # In the general case, we will not attempt to symbolize if we do not detect # a crash. For user uploads, we should symbolize anyway to provide more # information about what might be happening. crash_stacktrace_output = utils.get_crash_stacktrace_output( application_command_line, state.crash_stacktrace, unsymbolized_crash_stacktrace) testcase.crash_stacktrace = data_handler.filter_stacktrace( crash_stacktrace_output) close_invalid_testcase_and_update_status(testcase, metadata, 'Unreproducible') # A non-reproducing testcase might still impact production branches. # Add the impact task to get that information. task_creation.create_impact_task_if_needed(testcase) return # Update http flag and re-run testcase to store dependencies (for bundled # archives only). testcase.http_flag = http_flag if not store_testcase_dependencies_from_bundled_testcase_archive( metadata, testcase, testcase_file_path): return # Update testcase crash parameters. testcase.crash_type = state.crash_type testcase.crash_address = state.crash_address testcase.crash_state = state.crash_state # Try to guess if the bug is security or not. security_flag = crash_analyzer.is_security_issue(state.crash_stacktrace, state.crash_type, state.crash_address) testcase.security_flag = security_flag # If it is, guess the severity. if security_flag: testcase.security_severity = severity_analyzer.get_security_severity( state.crash_type, state.crash_stacktrace, job_type, bool(gestures)) log_message = ('Testcase crashed in %d seconds (r%d)' % (crash_time, testcase.crash_revision)) data_handler.update_testcase_comment(testcase, data_types.TaskState.FINISHED, log_message) # See if we have to ignore this crash. if crash_analyzer.ignore_stacktrace(state.crash_state, state.crash_stacktrace): close_invalid_testcase_and_update_status(testcase, metadata, 'Irrelavant') return # Test for reproducibility. one_time_crasher_flag = not tests.test_for_reproducibility( testcase_file_path, state.crash_state, security_flag, test_timeout, http_flag, gestures) testcase.one_time_crasher_flag = one_time_crasher_flag # Check to see if this is a duplicate. project_name = data_handler.get_project_name(job_type) existing_testcase = data_handler.find_testcase(project_name, state.crash_type, state.crash_state, security_flag) if existing_testcase: # If the existing test case is unreproducible and we are, replace the # existing test case with this one. if existing_testcase.one_time_crasher_flag and not one_time_crasher_flag: duplicate_testcase = existing_testcase original_testcase = testcase else: duplicate_testcase = testcase original_testcase = existing_testcase metadata.status = 'Duplicate' metadata.duplicate_of = existing_testcase.key.id() duplicate_testcase.status = 'Duplicate' duplicate_testcase.duplicate_of = original_testcase.key.id() duplicate_testcase.put() # Set testcase and metadata status if not set already. if testcase.status != 'Duplicate': testcase.status = 'Processed' metadata.status = 'Confirmed' # Add new leaks to global blacklist to avoid detecting duplicates. # Only add if testcase has a direct leak crash and if it's reproducible. if is_lsan_enabled: leak_blacklist.add_crash_to_global_blacklist_if_needed(testcase) # Add application specific information in the trace. crash_stacktrace_output = utils.get_crash_stacktrace_output( application_command_line, state.crash_stacktrace, unsymbolized_crash_stacktrace) testcase.crash_stacktrace = data_handler.filter_stacktrace( crash_stacktrace_output) # Update the testcase values. testcase.put() # Update the upload metadata. metadata.security_flag = security_flag metadata.put() # Create tasks to # 1. Minimize testcase (minimize). # 2. Find regression range (regression). # 3. Find testcase impact on production branches (impact). # 4. Check whether testcase is fixed (progression). # 5. Get second stacktrace from another job in case of # one-time crashers (stack). task_creation.create_tasks(testcase)