def download_repo_prop_if_needed(symbols_directory, build_id, cache_target, targets_with_type_and_san, cache_type): """Downloads the repo.prop for a branch""" artifact_file_name = 'repo.prop' symbols_archive_filename = get_repo_prop_archive_filename( build_id, cache_target) output_filename_override = symbols_archive_filename # We create our own build_params for cache build_params = { 'build_id': build_id, 'target': cache_target, 'type': cache_type } build_params_check_path = os.path.join(symbols_directory, '.cached_build_params') if check_symbols_cached(build_params_check_path, build_params): return symbols_archive_path = os.path.join(symbols_directory, symbols_archive_filename) download_artifact_if_needed(build_id, symbols_directory, symbols_archive_path, targets_with_type_and_san, artifact_file_name, output_filename_override) if not os.path.exists(symbols_archive_path): logs.log_error('Unable to locate repo.prop %s.' % symbols_archive_path) return # Store the artifact for later use or for use by other bots. storage.store_file_in_cache(symbols_archive_path) utils.write_data_to_file(build_params, build_params_check_path)
def test_recommended_dictionary_merge(self): """Test merging with GCS copy of recommended dictionary.""" fake_gcs_dict_path = os.path.join( DATA_DIRECTORY, 'fake_gcs_recommended_dictionary.txt') dict_manager = dictionary_manager.DictionaryManager('fuzzer_name') log_data = utils.read_data_from_file(os.path.join( DATA_DIRECTORY, 'log_with_recommended_dict.txt'), eval_data=False).decode('utf-8') dict_from_log = dict_manager.parse_recommended_dictionary_from_data( log_data) utils.write_data_to_file('\n'.join(dict_from_log), self.local_dict_path) dictionary_manager.merge_dictionary_files(self.local_dict_path, fake_gcs_dict_path, self.local_dict_path) # Compare resulting dictionary with its expected result. merged_dictionary = self._parse_dictionary_file(self.local_dict_path) expected_dictionary_path = os.path.join( DATA_DIRECTORY, 'expected_merged_recommended_dictionary.txt') expected_dictionary = self._parse_dictionary_file( expected_dictionary_path) self.assertEqual(sorted(merged_dictionary), sorted(expected_dictionary))
def test(self): """Tests copy_local_directory_to_remote.""" utils.write_data_to_file('a', os.path.join(self.local_temp_dir, 'a')) shell.create_directory(os.path.join(self.local_temp_dir, 'b')) utils.write_data_to_file('c', os.path.join(self.local_temp_dir, 'b', 'c')) adb.copy_local_directory_to_remote(self.local_temp_dir, self.device_temp_dir) self.assertTrue( adb.file_exists(os.path.join(self.device_temp_dir, 'a'))) self.assertFalse( adb.directory_exists(os.path.join(self.device_temp_dir, 'a'))) self.assertEqual( adb.get_file_size(os.path.join(self.device_temp_dir, 'a')), 1) self.assertTrue( adb.directory_exists(os.path.join(self.device_temp_dir, 'b'))) self.assertFalse( adb.file_exists(os.path.join(self.device_temp_dir, 'b'))) self.assertTrue( adb.file_exists(os.path.join(self.device_temp_dir, 'b', 'c'))) self.assertFalse( adb.directory_exists(os.path.join(self.device_temp_dir, 'b', 'c'))) self.assertEqual( adb.get_file_size(os.path.join(self.device_temp_dir, 'b', 'c')), 1)
def run(self, input_directory, output_directory, no_of_files): """Run the fuzzer to generate testcases.""" fuzzer_binary_name, fuzzer_path = self._get_fuzzer_binary_name_and_path( ) project_qualified_name = data_types.fuzz_target_project_qualified_name( utils.current_project(), fuzzer_binary_name) arguments = self.generate_arguments(fuzzer_path) corpus_directory = get_corpus_directory(input_directory, project_qualified_name) # Create fuzz testcases. for i in range(no_of_files): # Contents of testcase file don't matter at this point. Need to create # something non-null so that it is not ignored. testcase_file_path = os.path.join( output_directory, '%s%d' % (testcase_manager.FUZZ_PREFIX, i)) utils.write_data_to_file(' ', testcase_file_path) # Write the flags file containing command line for running launcher # script. flags_file_path = os.path.join( output_directory, '%s%d' % (testcase_manager.FLAGS_PREFIX, i)) flags = ['%TESTCASE%', fuzzer_binary_name] if arguments: flags.append(arguments) flags_file_content = ' '.join(flags) utils.write_data_to_file(flags_file_content, flags_file_path) output = 'Generated %d testcase for fuzzer %s.\n' % ( no_of_files, fuzzer_binary_name) output += 'metadata::fuzzer_binary_name: %s\n' % fuzzer_binary_name issue_owners = engine_common.get_issue_owners(fuzzer_path) if issue_owners: output += 'metadata::issue_owners: %s\n' % ','.join(issue_owners) issue_labels = engine_common.get_issue_labels(fuzzer_path) if issue_labels: output += 'metadata::issue_labels: %s\n' % ','.join(issue_labels) issue_components = engine_common.get_issue_components(fuzzer_path) if issue_components: output += 'metadata::issue_components: %s\n' % ','.join( issue_components) # Update *SAN_OPTIONS in current environment from .options file. This # environment is used in fuzz task later for deriving the environment # string in |get_environment_settings_as_string| and embedding this as # part of stacktrace. engine_common.process_sanitizer_options_overrides(fuzzer_path) return BuiltinFuzzerResult(output=output, corpus_directory=corpus_directory)
def download_system_symbols_if_needed(symbols_directory): """Download system libraries from |SYMBOLS_URL| and cache locally.""" if not should_download_symbols(): return # Get the build fingerprint parameters. build_params = settings.get_build_parameters() if not build_params: logs.log_error('Unable to determine build parameters.') return build_params_check_path = os.path.join(symbols_directory, '.cached_build_params') if check_symbols_cached(build_params_check_path, build_params): return build_id = build_params.get('build_id') target = build_params.get('target') build_type = build_params.get('type') if not build_id or not target or not build_type: logs.log_error('Null build parameters found, exiting.') return symbols_archive_filename = f'{target}-symbols-{build_id}.zip' artifact_file_name = symbols_archive_filename output_filename_override = None # Include type and sanitizer information in the target. tool_suffix = environment.get_value('SANITIZER_TOOL_NAME') target_with_type_and_san = f'{target}-{build_type}' if tool_suffix and not tool_suffix in target_with_type_and_san: target_with_type_and_san += f'_{tool_suffix}' targets_with_type_and_san = [target_with_type_and_san] symbols_archive_path = os.path.join(symbols_directory, symbols_archive_filename) download_artifact_if_needed(build_id, symbols_directory, symbols_archive_path, targets_with_type_and_san, artifact_file_name, output_filename_override) if not os.path.exists(symbols_archive_path): logs.log_error( 'Unable to locate symbols archive %s.' % symbols_archive_path) return # Store the artifact for later use or for use by other bots. storage.store_file_in_cache(symbols_archive_path) archive.unpack(symbols_archive_path, symbols_directory, trusted=True) shell.remove_file(symbols_archive_path) utils.write_data_to_file(build_params, build_params_check_path)
def correct_if_needed(dict_path): """Corrects obvious errors such as missing quotes in a dictionary.""" if not dict_path or not os.path.exists(dict_path): return content = utils.read_data_from_file( dict_path, eval_data=False).decode('utf-8') new_content = '' for current_line in content.splitlines(): new_content += _fix_dictionary_line(current_line, dict_path) + '\n' # End of file newlines are inconsistent in dictionaries. if new_content.rstrip('\n') != content.rstrip('\n'): utils.write_data_to_file(new_content, dict_path)
def _download_testcase(testcase_id, testcase, configuration): """Download the test case and return its path.""" print('Downloading testcase...') testcase_download_url = '{url}?id={id}'.format( url=configuration.get('testcase_download_url'), id=testcase_id) response, content = http_utils.request(testcase_download_url, method=http_utils.GET_METHOD, configuration=configuration) if response.status != 200: raise errors.ReproduceToolUnrecoverableError( 'Unable to download test case.') bot_absolute_filename = response[FILENAME_RESPONSE_HEADER] # Store the test case in the config directory for debuggability. testcase_directory = os.path.join(CONFIG_DIRECTORY, 'current-testcase') shell.remove_directory(testcase_directory, recreate=True) environment.set_value('FUZZ_INPUTS', testcase_directory) testcase_path = os.path.join(testcase_directory, os.path.basename(bot_absolute_filename)) utils.write_data_to_file(content, testcase_path) # Unpack the test case if it's archived. # TODO(mbarbella): Rewrite setup.unpack_testcase and share this code. if testcase.minimized_keys and testcase.minimized_keys != 'NA': mask = data_types.ArchiveStatus.MINIMIZED else: mask = data_types.ArchiveStatus.FUZZED if testcase.archive_state & mask: archive.unpack(testcase_path, testcase_directory) file_list = archive.get_file_list(testcase_path) testcase_path = None for file_name in file_list: if os.path.basename(file_name) == os.path.basename( testcase.absolute_path): testcase_path = os.path.join(testcase_directory, file_name) break if not testcase_path: raise errors.ReproduceToolUnrecoverableError( 'Test case file was not found in archive.\n' 'Original filename: {absolute_path}.\n' 'Archive contents: {file_list}'.format( absolute_path=testcase.absolute_path, file_list=file_list)) return testcase_path
def setup_user_profile_directory_if_needed(user_profile_directory): """Set user profile directory if it does not exist.""" if os.path.exists(user_profile_directory): # User profile directory already exists. Bail out. return shell.create_directory(user_profile_directory) # Create a file in user profile directory based on format: # filename;base64 encoded zlib compressed file contents. user_profile_file = environment.get_value('USER_PROFILE_FILE') if user_profile_file and ';' in user_profile_file: user_profile_filename, encoded_file_contents = ( user_profile_file.split(';', 1)) user_profile_file_contents = zlib.decompress( base64.b64decode(encoded_file_contents)) user_profile_file_path = os.path.join(user_profile_directory, user_profile_filename) utils.write_data_to_file(user_profile_file_contents, user_profile_file_path) # For Firefox, we need to install a special fuzzPriv extension that exposes # special functions to javascript, e.g. gc(), etc. app_name = environment.get_value('APP_NAME') if app_name.startswith('firefox'): # Create extensions directory. extensions_directory = os.path.join(user_profile_directory, 'extensions') shell.create_directory(extensions_directory) # Unpack the fuzzPriv extension. extension_archive = os.path.join(environment.get_resources_directory(), 'firefox', 'fuzzPriv-extension.zip') archive.unpack(extension_archive, extensions_directory) # Add this extension in the extensions configuration file. extension_config_file_path = os.path.join(user_profile_directory, 'extensions.ini') fuzz_extension_directory = os.path.join(extensions_directory, '*****@*****.**') extension_config_file_contents = ('[ExtensionDirs]\r\n' 'Extension0=%s\r\n' '\r\n' '[ThemeDirs]\r\n' % fuzz_extension_directory) utils.write_data_to_file(extension_config_file_contents, extension_config_file_path)
def create_testcase_list_file(output_directory): """Create a testcase list file for tests in a directory.""" files_list = [] files_list_file_path = os.path.join(output_directory, TESTCASE_LIST_FILENAME) for root, _, files in shell.walk(output_directory): for filename in files: if filename.endswith(INFO_FILE_EXTENSION): # Skip an info file. continue file_path = os.path.join(root, filename) if not utils.is_valid_testcase_file(file_path, check_if_exists=False): continue normalized_relative_file_path = utils.get_normalized_relative_path( file_path, output_directory) files_list.append(normalized_relative_file_path) utils.write_data_to_file('\n'.join(sorted(files_list)), files_list_file_path)
def merge_dictionary_files(original_dictionary_path, recommended_dictionary_path, merged_dictionary_path): """Merge a list of dictionaries with given paths into a singe dictionary.""" if original_dictionary_path and os.path.exists(original_dictionary_path): merged_dictionary_data = utils.read_data_from_file( original_dictionary_path, eval_data=False).decode('utf-8') else: merged_dictionary_data = '' recommended_dictionary_lines = utils.read_data_from_file( recommended_dictionary_path, eval_data=False).decode('utf-8').splitlines() dictionary_lines_to_add = set() for line in recommended_dictionary_lines: if line not in merged_dictionary_data: dictionary_lines_to_add.add(line) merged_dictionary_data += '\n%s\n' % RECOMMENDED_DICTIONARY_HEADER merged_dictionary_data += '\n'.join(dictionary_lines_to_add) utils.write_data_to_file(merged_dictionary_data, merged_dictionary_path)
def request(url, body=None, method=POST_METHOD, force_reauthorization=False, configuration=None): """Make an HTTP request to the specified URL.""" if configuration: authorization = _get_authorization(force_reauthorization, configuration) headers = { 'User-Agent': 'clusterfuzz-reproduce', 'Authorization': authorization } else: headers = {} http = httplib2.Http() request_body = json_utils.dumps(body) if body is not None else '' response, content = http.request(url, method=method, headers=headers, body=request_body) # If the server returns 401 we may need to reauthenticate. Try the request # a second time if this happens. if response.status == 401 and not force_reauthorization: return request(url, body, method=method, force_reauthorization=True, configuration=configuration) if AUTHORIZATION_HEADER in response: shell.create_directory(os.path.dirname(AUTHORIZATION_CACHE_FILE), create_intermediates=True) utils.write_data_to_file(response[AUTHORIZATION_HEADER], AUTHORIZATION_CACHE_FILE) return response, content
def remount_if_needed(): """Remount nfs volume if it is not working.""" nfs_root = environment.get_value('NFS_ROOT') if not nfs_root: return nfs_host = environment.get_value('NFS_HOST') nfs_volume = environment.get_value('NFS_VOLUME') check_file_path = os.path.join(nfs_root, 'check') if os.path.exists(check_file_path): # Volume is mounted correctly and readable, bail out. return # Un-mount the nfs drive first. Ignore the return code as we might have # not mounted the drive at all. subprocess.call(['umount', '-f', nfs_root]) # Mount the nfs drive. logs.log_warn('Trying to remount the NFS volume.') nfs_volume_path = '%s:/%s' % (nfs_host, nfs_volume) subprocess.check_call([ 'mount', '-o', 'anon', '-o', 'nolock', '-o', 'retry=10', nfs_volume_path, nfs_root ]) if os.path.exists(check_file_path): # Volume is mounted correctly and readable, bail out. return # Update check file if needed. utils.write_data_to_file('ok', check_file_path) # Make sure that check file exists. if not os.path.exists(check_file_path): raise Exception('Failed to write check file on nfs volume.')
def _do_run_testcase_and_return_result_in_queue(crash_queue, thread_index, file_path, gestures, env_copy, upload_output=False): """Run a single testcase and return crash results in the crash queue.""" try: # Run testcase and check whether a crash occurred or not. return_code, crash_time, output = run_testcase(thread_index, file_path, gestures, env_copy) # Pull testcase directory to host to get any stats files. if environment.is_trusted_host(): from clusterfuzz._internal.bot.untrusted_runner import file_host file_host.pull_testcases_from_worker() # Analyze the crash. crash_output = _get_crash_output(output) crash_result = CrashResult(return_code, crash_time, crash_output) # To provide consistency between stats and logs, we use timestamp taken # from stats when uploading logs and testcase. if upload_output: log_time = _get_testcase_time(file_path) if crash_result.is_crash(): # Initialize resource list with the testcase path. resource_list = [file_path] resource_list += get_resource_paths(crash_output) # Store the crash stack file in the crash stacktrace directory # with filename as the hash of the testcase path. crash_stacks_directory = environment.get_value( 'CRASH_STACKTRACES_DIR') stack_file_path = os.path.join(crash_stacks_directory, utils.string_hash(file_path)) utils.write_data_to_file(crash_output, stack_file_path) # Put crash/no-crash results in the crash queue. crash_queue.put( Crash(file_path=file_path, crash_time=crash_time, return_code=return_code, resource_list=resource_list, gestures=gestures, stack_file_path=stack_file_path)) # Don't upload uninteresting testcases (no crash) or if there is no log to # correlate it with (not upload_output). if upload_output: upload_testcase(file_path, log_time) if upload_output: # Include full output for uploaded logs (crash output, merge output, etc). crash_result_full = CrashResult(return_code, crash_time, output) log = prepare_log_for_upload(crash_result_full.get_stacktrace(), return_code) upload_log(log, log_time) except Exception: logs.log_error('Exception occurred while running ' 'run_testcase_and_return_result_in_queue.')
def write_cache_file_metadata(cache_file_path, file_path): """Write cache file metadata.""" cache_file_metadata_path = get_cache_file_metadata_path(cache_file_path) utils.write_data_to_file({'size': os.path.getsize(file_path)}, cache_file_metadata_path)
def parse_mime_to_crash_report_info(local_minidump_mime_path): """Read the (local) minidump MIME file into a CrashReportInfo object.""" # Get the minidump name and path. minidump_path_match = re.match(r'(.*)\.mime', local_minidump_mime_path) if minidump_path_match is None: logs.log_error('Minidump filename in unexpected format: \'%s\'.' % local_minidump_mime_path) return None minidump_path = '%s.dmp' % minidump_path_match.group(1).strip() # Reformat the minidump MIME to include the boundary. with open(local_minidump_mime_path, 'rb') as minidump_mime_file_content: # The boundary is the first line after the first two dashes. boundary = minidump_mime_file_content.readline().strip()[2:] minidump_mime_bytes = ( b'Content-Type: multipart/form-data; boundary=\"%s\"\r\n--%s\r\n' % (boundary, boundary)) minidump_mime_bytes += minidump_mime_file_content.read() minidump_mime_contents = email.message_from_bytes(minidump_mime_bytes) # Parse the MIME contents, extracting the parameters needed for upload. mime_key_values = {} for mime_part in minidump_mime_contents.get_payload(): if isinstance(mime_part, str): mime_part = utils.decode_to_unicode(mime_part) logs.log_error('Unexpected str mime_part from mime path %s: %s' % (local_minidump_mime_path, mime_part)) continue part_descriptor = list(mime_part.values()) key_tokens = part_descriptor[0].split('; ') key_match = re.match(r'name="(.*)".*', key_tokens[1]) # Extract from the MIME part the key-value pairs used by report uploading. if key_match is not None: report_key = key_match.group(1) report_value = mime_part.get_payload(decode=True) if report_key == MINIDUMP_FILE_KEY: utils.write_data_to_file(report_value, minidump_path) else: # Take care of aliases. if report_key in ('prod', 'buildTargetId'): report_key = PRODUCT_KEY elif report_key == 'ver': report_key = VERSION_KEY # Save the key-value pair. mime_key_values[report_key] = report_value # Pull out product and version explicitly since these are required # for upload. product, version = None, None if PRODUCT_KEY in mime_key_values: product = mime_key_values.pop(PRODUCT_KEY).decode('utf-8') else: logs.log_error( 'Could not find \'%s\' or alias in mime_key_values key.' % PRODUCT_KEY) if VERSION_KEY in mime_key_values: version = mime_key_values.pop(VERSION_KEY).decode('utf-8') else: logs.log_error( 'Could not find \'%s\' or alias in mime_key_values key.' % VERSION_KEY) # If missing, return None and log keys that do exist; otherwise, construct # CrashReportInfo and return. if product is None or version is None: logs.log_error('mime_key_values dict keys:\n%s' % str(list(mime_key_values.keys()))) return None return CrashReportInfo(minidump_path=minidump_path, product=product, version=version, optional_params=mime_key_values)
def update_data_bundle(fuzzer, data_bundle): """Updates a data bundle to the latest version.""" # This module can't be in the global imports due to appengine issues # with multiprocessing and psutil imports. from clusterfuzz._internal.google_cloud_utils import gsutil # If we are using a data bundle on NFS, it is expected that our testcases # will usually be large enough that we would fill up our tmpfs directory # pretty quickly. So, change it to use an on-disk directory. if not data_bundle.is_local: testcase_disk_directory = environment.get_value('FUZZ_INPUTS_DISK') environment.set_value('FUZZ_INPUTS', testcase_disk_directory) data_bundle_directory = get_data_bundle_directory(fuzzer.name) if not data_bundle_directory: logs.log_error('Failed to setup data bundle %s.' % data_bundle.name) return False if not shell.create_directory(data_bundle_directory, create_intermediates=True): logs.log_error('Failed to create data bundle %s directory.' % data_bundle.name) return False # Check if data bundle is up to date. If yes, skip the update. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Data bundle was recently synced, skip.') return True # Fetch lock for this data bundle. if not _fetch_lock_for_data_bundle_update(data_bundle): logs.log_error('Failed to lock data bundle %s.' % data_bundle.name) return False # Re-check if another bot did the sync already. If yes, skip. if _is_data_bundle_up_to_date(data_bundle, data_bundle_directory): logs.log('Another bot finished the sync, skip.') _release_lock_for_data_bundle_update(data_bundle) return True time_before_sync_start = time.time() # No need to sync anything if this is a search index data bundle. In that # case, the fuzzer will generate testcases from a gcs bucket periodically. if not _is_search_index_data_bundle(data_bundle.name): bucket_url = data_handler.get_data_bundle_bucket_url(data_bundle.name) if environment.is_trusted_host() and data_bundle.sync_to_worker: from clusterfuzz._internal.bot.untrusted_runner import corpus_manager from clusterfuzz._internal.bot.untrusted_runner import file_host worker_data_bundle_directory = file_host.rebase_to_worker_root( data_bundle_directory) file_host.create_directory(worker_data_bundle_directory, create_intermediates=True) result = corpus_manager.RemoteGSUtilRunner().rsync( bucket_url, worker_data_bundle_directory, delete=False) else: result = gsutil.GSUtilRunner().rsync(bucket_url, data_bundle_directory, delete=False) if result.return_code != 0: logs.log_error('Failed to sync data bundle %s: %s.' % (data_bundle.name, result.output)) _release_lock_for_data_bundle_update(data_bundle) return False # Update the testcase list file. testcase_manager.create_testcase_list_file(data_bundle_directory) # Write last synced time in the sync file. sync_file_path = _get_data_bundle_sync_file_path(data_bundle_directory) utils.write_data_to_file(time_before_sync_start, sync_file_path) if environment.is_trusted_host() and data_bundle.sync_to_worker: from clusterfuzz._internal.bot.untrusted_runner import file_host worker_sync_file_path = file_host.rebase_to_worker_root(sync_file_path) file_host.copy_file_to_worker(sync_file_path, worker_sync_file_path) # Release acquired lock. _release_lock_for_data_bundle_update(data_bundle) return True