def run_and_wait(self, *args, **kwargs): # pylint: disable=arguments-differ """Overridden run_and_wait which always decodes the output.""" result = ProcessRunner.run_and_wait(self, *args, **kwargs) if result.output is not None: result.output = utils.decode_to_unicode(result.output) return result
def communicate(self, input=None): # pylint: disable=redefined-builtin """subprocess.Popen.communicate.""" stdout = b'' stderr = b'' if self._interactive: if input: self._popen.stdin.write(input) while True: line = self._popen.stdout.readline() if not line: break if self._stdout_file: self._stdout_file.write(line) else: stdout += line sys.stdout.write(utils.decode_to_unicode(line)) self._popen.wait() else: stdout, stderr = self._popen.communicate(input) if not self._max_stdout_len: return stdout, stderr with self._stdout_file: return utils.read_from_handle_truncated( self._stdout_file, self._max_stdout_len), stderr
def _find_sanitizer_stacktrace(reproducers_dir): """Find the sanitizer stacktrace from the reproducers dir.""" for stacktrace_path in glob.glob( os.path.join(reproducers_dir, _HF_SANITIZER_LOG_PREFIX + '*')): with open(stacktrace_path, 'rb') as f: return utils.decode_to_unicode(f.read()) return None
def undercoat_api_command(*args): """Make an API call to the undercoat binary.""" logs.log(f'Running undercoat command {args}') bundle_dir = environment.get_value('FUCHSIA_RESOURCES_DIR') undercoat_path = os.path.join(bundle_dir, 'undercoat', 'undercoat') undercoat = new_process.ProcessRunner(undercoat_path, args) # The undercoat log is sent to stderr, which we capture to a tempfile with tempfile.TemporaryFile() as undercoat_log: result = undercoat.run_and_wait( stderr=undercoat_log, extra_env={'TMPDIR': get_temp_dir()}) result.output = utils.decode_to_unicode(result.output) if result.return_code != 0: # Dump the undercoat log to assist in debugging log_data = utils.read_from_handle_truncated(undercoat_log, 1024 * 1024) logs.log_warn('Log output from undercoat: ' + utils.decode_to_unicode(log_data)) # The API error message is returned on stdout raise UndercoatError( 'Error running undercoat command %s: %s' % (args, result.output)) return result
def process_stacktrace(self, unsymbolized_crash_stacktrace): self.frame_no = 0 symbolized_crash_stacktrace = u'' unsymbolized_crash_stacktrace_lines = \ unsymbolized_crash_stacktrace.splitlines() if lkl.is_lkl_stack_trace(unsymbolized_crash_stacktrace): line_parser = self._lkl_line_parser self.lkl_binary_name = lkl.get_lkl_binary_name( unsymbolized_crash_stacktrace_lines) # This should never happen but if it does, lets just return the unsymbolized stack. # We can't symbolize anything anyways. if not self.lkl_binary_name: return unsymbolized_crash_stacktrace else: line_parser = self._line_parser for line in unsymbolized_crash_stacktrace_lines: self.current_line = utils.decode_to_unicode(line.rstrip()) frameno_str, addr, binary, offset, arch = line_parser(line) if not binary or not offset: symbolized_crash_stacktrace += u'%s\n' % self.current_line continue if frameno_str == '0': # Assume that frame #0 is the first frame of new stack trace. self.frame_no = 0 original_binary = binary if self.binary_path_filter: binary = self.binary_path_filter(binary) symbolized_line = self.symbolize_address(addr, binary, offset, arch) if not symbolized_line: if original_binary != binary: symbolized_line = self.symbolize_address(addr, original_binary, offset, arch) if not symbolized_line: symbolized_crash_stacktrace += u'%s\n' % self.current_line else: for symbolized_frame in symbolized_line: symbolized_crash_stacktrace += u'%s\n' % ( ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()) self.frame_no += 1 # Close any left-over open pipes. for pipe in pipes: pipe.stdin.close() pipe.stdout.close() pipe.kill() return symbolized_crash_stacktrace
def __init__(self, return_code, crash_time, output, unexpected_crash=False): self.return_code = return_code self.crash_time = crash_time self.output = utils.decode_to_unicode( output) if output else 'No output!' # For crashes against an expected state, this indicates whether if there # was a crash that didn't match. self.unexpected_crash = unexpected_crash self._symbolized_crash_data = None self._unsymbolized_crash_data = None
def _get_stats_from_log(log_path, strategies=None, arguments=None, stats_overrides=None): """Calculate stats for the given log the same way as the engine does.""" if strategies is None: strategies = [] if arguments is None: arguments = [] log_lines = utils.decode_to_unicode( utils.read_data_from_file(log_path, eval_data=False)).splitlines() stats = libfuzzer.parse_log_stats(log_lines) stats.update( performance_stats.parse_performance_features(log_lines, strategies, arguments)) if stats_overrides: stats.update(stats_overrides) return stats
def get_crash_info_and_stacktrace(application_command_line, crash_stacktrace, gestures): """Return crash minidump location and updated crash stacktrace.""" app_name_lower = environment.get_value('APP_NAME').lower() retry_limit = environment.get_value('FAIL_RETRIES') using_android = environment.is_android() using_chrome = 'chrome' in app_name_lower or 'chromium' in app_name_lower warmup_timeout = environment.get_value('WARMUP_TIMEOUT', 90) # Minidump generation is only applicable on Chrome application. # FIXME: Support minidump generation on platforms other than Android. if not using_chrome or not using_android: return None, crash_stacktrace # Get the crash info from stacktrace. crash_info = get_crash_info(crash_stacktrace) # If we lost the minidump file, we need to recreate it. # Note that because of the way crash_info is generated now, if we have a # non-None crash_info, we should also have its minidump path; we insert # the check to safeguard against possibly constructing the crash_info in # other ways in the future that might potentially lose the minidump path. if not crash_info or not crash_info.minidump_info.path: for _ in range(retry_limit): _, _, output = (process_handler.run_process( application_command_line, timeout=warmup_timeout, gestures=gestures)) crash_info = get_crash_info(output) if crash_info and crash_info.minidump_info.path: crash_stacktrace = utils.decode_to_unicode(output) break if not crash_info or not crash_info.minidump_info.path: # We could not regenerate a minidump for this crash. logs.log('Unable to regenerate a minidump for this crash.') return crash_info, crash_stacktrace
def execute(input_directory, output_directory, fuzzer_name, generation_timeout): """Execute ML RNN generator to produce new inputs. This method should be called inside launcher, to generate a number of new inputs based on ML RNN model. It will fetch ML model from GCS bucket specified in environment variable `CORPUS_BUCKET`. The script to run the model resides in folder `tools/fuzzers/ml/rnn`. Args: input_directory: Seed corpus path. The directory should not be empty. output_directory: The directory to place generated inputs. fuzzer_name: Name of the fuzzer, e.g libpng_read_fuzzer. It indicates the subdirectory in gcs bucket to store models. generation_timeout: Time in seconds for the generator to run. Normally it takes <1s to generate an input, assuming the input length is <4KB. """ if environment.platform() != 'LINUX': logs.log('Unsupported platform for ML RNN generation, skipping.') return # Validate corpus folder. file_count = shell.get_directory_file_count(input_directory) if not file_count: logs.log('Corpus is empty. Skip generation.') return # Number of existing new inputs. They are possibly generated by other # generators. old_corpus_units = shell.get_directory_file_count(output_directory) old_corpus_bytes = shell.get_directory_size(output_directory) # Get model path. model_path = prepare_model_directory(fuzzer_name) if not model_path: return result = run(input_directory, output_directory, model_path, generation_timeout) # Generation process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN generation for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN generation for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=utils.decode_to_unicode(result.output)) return # Timeout is not error, if we have new units generated. if result.timed_out: logs.log_warn('ML RNN generation for fuzzer %s timed out.' % fuzzer_name) new_corpus_units = (shell.get_directory_file_count(output_directory) - old_corpus_units) new_corpus_bytes = (shell.get_directory_size(output_directory) - old_corpus_bytes) if new_corpus_units: logs.log( 'Added %d new inputs (%d bytes) using ML RNN generator for %s.' % (new_corpus_units, new_corpus_bytes, fuzzer_name)) else: logs.log_error('ML RNN generator did not produce any inputs for %s' % fuzzer_name, output=utils.decode_to_unicode(result.output))
def parse_mime_to_crash_report_info(local_minidump_mime_path): """Read the (local) minidump MIME file into a CrashReportInfo object.""" # Get the minidump name and path. minidump_path_match = re.match(r'(.*)\.mime', local_minidump_mime_path) if minidump_path_match is None: logs.log_error('Minidump filename in unexpected format: \'%s\'.' % local_minidump_mime_path) return None minidump_path = '%s.dmp' % minidump_path_match.group(1).strip() # Reformat the minidump MIME to include the boundary. with open(local_minidump_mime_path, 'rb') as minidump_mime_file_content: # The boundary is the first line after the first two dashes. boundary = minidump_mime_file_content.readline().strip()[2:] minidump_mime_bytes = ( b'Content-Type: multipart/form-data; boundary=\"%s\"\r\n--%s\r\n' % (boundary, boundary)) minidump_mime_bytes += minidump_mime_file_content.read() minidump_mime_contents = email.message_from_bytes(minidump_mime_bytes) # Parse the MIME contents, extracting the parameters needed for upload. mime_key_values = {} for mime_part in minidump_mime_contents.get_payload(): if isinstance(mime_part, str): mime_part = utils.decode_to_unicode(mime_part) logs.log_error('Unexpected str mime_part from mime path %s: %s' % (local_minidump_mime_path, mime_part)) continue part_descriptor = list(mime_part.values()) key_tokens = part_descriptor[0].split('; ') key_match = re.match(r'name="(.*)".*', key_tokens[1]) # Extract from the MIME part the key-value pairs used by report uploading. if key_match is not None: report_key = key_match.group(1) report_value = mime_part.get_payload(decode=True) if report_key == MINIDUMP_FILE_KEY: utils.write_data_to_file(report_value, minidump_path) else: # Take care of aliases. if report_key in ('prod', 'buildTargetId'): report_key = PRODUCT_KEY elif report_key == 'ver': report_key = VERSION_KEY # Save the key-value pair. mime_key_values[report_key] = report_value # Pull out product and version explicitly since these are required # for upload. product, version = None, None if PRODUCT_KEY in mime_key_values: product = mime_key_values.pop(PRODUCT_KEY).decode('utf-8') else: logs.log_error( 'Could not find \'%s\' or alias in mime_key_values key.' % PRODUCT_KEY) if VERSION_KEY in mime_key_values: version = mime_key_values.pop(VERSION_KEY).decode('utf-8') else: logs.log_error( 'Could not find \'%s\' or alias in mime_key_values key.' % VERSION_KEY) # If missing, return None and log keys that do exist; otherwise, construct # CrashReportInfo and return. if product is None or version is None: logs.log_error('mime_key_values dict keys:\n%s' % str(list(mime_key_values.keys()))) return None return CrashReportInfo(minidump_path=minidump_path, product=product, version=version, optional_params=mime_key_values)
def run_process(cmdline, current_working_directory=None, timeout=DEFAULT_TEST_TIMEOUT, need_shell=False, gestures=None, env_copy=None, testcase_run=True, ignore_children=True): """Executes a process with a given command line and other parameters.""" if environment.is_trusted_host() and testcase_run: from clusterfuzz._internal.bot.untrusted_runner import remote_process_host return remote_process_host.run_process(cmdline, current_working_directory, timeout, need_shell, gestures, env_copy, testcase_run, ignore_children) if gestures is None: gestures = [] if env_copy: os.environ.update(env_copy) # FIXME(mbarbella): Using LAUNCHER_PATH here is error prone. It forces us to # do certain operations before fuzzer setup (e.g. bad build check). launcher = environment.get_value('LAUNCHER_PATH') # This is used when running scripts on native linux OS and not on the device. # E.g. running a fuzzer to generate testcases or launcher script. plt = environment.platform() runs_on_device = environment.is_android(plt) or plt == 'FUCHSIA' if runs_on_device and (not testcase_run or launcher): plt = 'LINUX' is_android = environment.is_android(plt) # Lower down testcase timeout slightly to account for time for crash analysis. timeout -= CRASH_ANALYSIS_TIME # LeakSanitizer hack - give time for stdout/stderr processing. lsan = environment.get_value('LSAN', False) if lsan: timeout -= LSAN_ANALYSIS_TIME # Initialize variables. adb_output = None process_output = '' process_status = None return_code = 0 process_poll_interval = environment.get_value('PROCESS_POLL_INTERVAL', 0.5) start_time = time.time() watch_for_process_exit = (environment.get_value('WATCH_FOR_PROCESS_EXIT') if is_android else True) window_list = [] # Get gesture start time from last element in gesture list. gestures = copy.deepcopy(gestures) if gestures and gestures[-1].startswith('Trigger'): gesture_start_time = int(gestures[-1].split(':')[1]) gestures.pop() else: gesture_start_time = timeout // 2 if is_android: # Clear the log upfront. android.logger.clear_log() # Run the app. adb_output = android.adb.run_command(cmdline, timeout=timeout) else: cmd = shell.get_command(cmdline) process_output = mozprocess.processhandler.StoreOutput() process_status = ProcessStatus() try: process_handle = mozprocess.ProcessHandlerMixin( cmd, args=None, cwd=current_working_directory, shell=need_shell, processOutputLine=[process_output], onFinish=[process_status], ignore_children=ignore_children) start_process(process_handle) except: logs.log_error('Exception occurred when running command: %s.' % cmdline) return None, None, '' while True: time.sleep(process_poll_interval) # Run the gestures at gesture_start_time or in case we didn't find windows # in the last try. if (gestures and time.time() - start_time >= gesture_start_time and not window_list): # In case, we don't find any windows, we increment the gesture start time # so that the next check is after 1 second. gesture_start_time += 1 if plt == 'LINUX': linux.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif plt == 'WINDOWS': windows.gestures.run_gestures(gestures, process_handle.pid, process_status, start_time, timeout, window_list) elif is_android: android.gestures.run_gestures(gestures, start_time, timeout) # TODO(mbarbella): We add a fake window here to prevent gestures on # Android from getting executed more than once. window_list = ['FAKE'] if time.time() - start_time >= timeout: break # Collect the process output. output = (android.logger.log_output() if is_android else b'\n'.join(process_output.output)) output = utils.decode_to_unicode(output) if crash_analyzer.is_memory_tool_crash(output): break # Check if we need to bail out on process exit. if watch_for_process_exit: # If |watch_for_process_exit| is set, then we already completed running # our app launch command. So, we can bail out. if is_android: break # On desktop, we bail out as soon as the process finishes. if process_status and process_status.finished: # Wait for process shutdown and set return code. process_handle.wait(timeout=PROCESS_CLEANUP_WAIT_TIME) break # Process output based on platform. if is_android: # Get current log output. If device is in reboot mode, logcat automatically # waits for device to be online. time.sleep(ANDROID_CRASH_LOGCAT_WAIT_TIME) output = android.logger.log_output() if android.constants.LOW_MEMORY_REGEX.search(output): # If the device is low on memory, we should force reboot and bail out to # prevent device from getting in a frozen state. logs.log('Device is low on memory, rebooting.', output=output) android.adb.hard_reset() android.adb.wait_for_device() elif android.adb.time_since_last_reboot() < time.time() - start_time: # Check if a reboot has happened, if yes, append log output before reboot # and kernel logs content to output. log_before_last_reboot = android.logger.log_output_before_last_reboot( ) kernel_log = android.adb.get_kernel_log_content() output = '%s%s%s%s%s' % ( log_before_last_reboot, utils.get_line_seperator('Device rebooted'), output, utils.get_line_seperator('Kernel Log'), kernel_log) # Make sure to reset SE Linux Permissive Mode. This can be done cheaply # in ~0.15 sec and is needed especially between runs for kernel crashes. android.adb.run_as_root() android.settings.change_se_linux_to_permissive_mode() return_code = 1 # Add output from adb to the front. if adb_output: output = '%s\n\n%s' % (adb_output, output) # Kill the application if it is still running. We do this at the end to # prevent this from adding noise to the logcat output. task_name = environment.get_value('TASK_NAME') child_process_termination_pattern = environment.get_value( 'CHILD_PROCESS_TERMINATION_PATTERN') if task_name == 'fuzz' and child_process_termination_pattern: # In some cases, we do not want to terminate the application after each # run to avoid long startup times (e.g. for chrome). Terminate processes # matching a particular pattern for light cleanup in this case. android.adb.kill_processes_and_children_matching_name( child_process_termination_pattern) else: # There is no special termination behavior. Simply stop the application. android.app.stop() else: # Get the return code in case the process has finished already. # If the process hasn't finished, return_code will be None which is what # callers expect unless the output indicates a crash. return_code = process_handle.poll() # If the process is still running, then terminate it. if not process_status.finished: launcher_with_interpreter = shell.get_execute_command( launcher) if launcher else None if (launcher_with_interpreter and cmdline.startswith(launcher_with_interpreter)): # If this was a launcher script, we KILL all child processes created # except for APP_NAME. # It is expected that, if the launcher script terminated normally, it # cleans up all the child processes it created itself. terminate_root_and_child_processes(process_handle.pid) else: try: # kill() here actually sends SIGTERM on posix. process_handle.kill() except: pass if lsan: time.sleep(LSAN_ANALYSIS_TIME) output = b'\n'.join(process_output.output) output = utils.decode_to_unicode(output) # X Server hack when max client reached. if ('Maximum number of clients reached' in output or 'Unable to get connection to X server' in output): logs.log_error('Unable to connect to X server, exiting.') os.system('sudo killall -9 Xvfb blackbox >/dev/null 2>&1') sys.exit(0) if testcase_run and (crash_analyzer.is_memory_tool_crash(output) or crash_analyzer.is_check_failure_crash(output)): return_code = 1 # If a crash is found, then we add the memory state as well. if return_code and is_android: ps_output = android.adb.get_ps_output() if ps_output: output += utils.get_line_seperator('Memory Statistics') output += ps_output if return_code: logs.log_warn('Process (%s) ended with exit code (%s).' % (repr(cmdline), str(return_code)), output=output) return return_code, round(time.time() - start_time, 1), output
def execute_task(full_fuzzer_name, job_type): """Execute ML RNN training task. The task is training RNN model by default. If more models are developed, arguments can be modified to specify which model to use. Args: fuzzer_name: Name of fuzzer, e.g. libpng_read_fuzzer. job_type: Job type, e.g. libfuzzer_chrome_asan. """ del job_type # Sets up fuzzer binary build. fuzz_target = data_handler.get_fuzz_target(full_fuzzer_name) if not fuzz_target: logs.log_warn( f'Fuzzer not found: {full_fuzzer_name}, skip RNN training.') return fuzzer_name = fuzz_target.project_qualified_name() # Directory to place training files, such as logs, models, corpus. # Use |FUZZ_INPUTS_DISK| since it is not size constrained. temp_directory = environment.get_value('FUZZ_INPUTS_DISK') # Get corpus. corpus_directory = get_corpus_directory(temp_directory, fuzzer_name) shell.remove_directory(corpus_directory, recreate=True) logs.log('Downloading corpus backup for %s.' % fuzzer_name) if not ml_train_utils.get_corpus(corpus_directory, fuzzer_name): logs.log_error('Failed to download corpus backup for %s.' % fuzzer_name) return # Get the directory to save models. model_directory = get_model_files_directory(temp_directory, fuzzer_name) shell.remove_directory(model_directory, recreate=True) # Get the directory to save training logs. log_directory = get_model_log_directory(temp_directory, fuzzer_name) shell.remove_directory(log_directory, recreate=True) result = train_rnn(corpus_directory, model_directory, log_directory) # Training process exited abnormally but not caused by timeout, meaning # error occurred during execution. if result.return_code and not result.timed_out: if result.return_code == constants.ExitCode.CORPUS_TOO_SMALL: logs.log_warn( 'ML RNN training task for fuzzer %s aborted due to small corpus.' % fuzzer_name) else: logs.log_error( 'ML RNN training task for fuzzer %s failed with ExitCode = %d.' % (fuzzer_name, result.return_code), output=utils.decode_to_unicode(result.output)) return # Timing out may be caused by large training corpus, but intermediate models # are frequently saved and can be uploaded. if result.timed_out: logs.log_warn('ML RNN training task for %s timed out.' % fuzzer_name) upload_model_to_gcs(model_directory, fuzzer_name)