def get_build_urls_list(bucket_path, reverse=True): """Returns a sorted list of build urls from a bucket path.""" if not bucket_path: return [] base_url = os.path.dirname(bucket_path) if environment.is_running_on_app_engine(): build_urls = list(storage.list_blobs(base_url)) else: keys_directory = environment.get_value('BUILD_URLS_DIR') keys_filename = '%s.list' % utils.string_hash(bucket_path) keys_file_path = os.path.join(keys_directory, keys_filename) # For one task, keys file that is cached locally should be re-used. # Otherwise, we do waste lot of network bandwidth calling and getting the # same set of urls (esp for regression and progression testing). if not os.path.exists(keys_file_path): # Get url list by reading the GCS bucket. with open(keys_file_path, 'w') as f: for path in storage.list_blobs(base_url): f.write(path + '\n') content = utils.read_data_from_file(keys_file_path, eval_data=False) if not content: return [] build_urls = content.splitlines() return _sort_build_urls_by_revision(build_urls, bucket_path, reverse)
def _process_project(project, bucket): """Collects coverage information for all fuzz targets in the given project and the total stats for the project.""" project_name = _basename(project) logs.log('Processing coverage for %s project.' % project_name) report_path = storage.get_cloud_storage_file_path(bucket, project) report_info = _read_json(report_path) if not report_info: logs.log_warn('Skipping code coverage for %s project.' % project_name) return # Iterate through report_info['fuzzer_stats_dir'] and prepare # CoverageInformation entities for invididual fuzz targets. entities = [] for fuzzer in storage.list_blobs(report_info['fuzzer_stats_dir'], recursive=False): entities.append( _process_fuzzer_stats(fuzzer, report_info, project_name, bucket)) logs.log('Processed coverage for %d targets in %s project.' % (len(entities), project_name)) # Prepare CoverageInformation entity for the total project stats. entities.append(_process_project_stats(report_info, project_name)) ndb_utils.put_multi(entities)
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory): """Upload test cases from the list to a cloud storage bucket.""" bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return # Only consider test cases in the output directory. We might upload too much # if we search the data directory as well, or have missing resources. # TODO(mbarbella): Support resources in data bundles. testcase_list = [ os.path.relpath(testcase, testcase_directory) for testcase in testcase_list if testcase.startswith(testcase_directory) ] if not testcase_list: return # Bail out if this batch of test cases is too large. directory_size = shell.get_directory_size(testcase_directory) if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(testcase_list), list_gcs_url): return runner.rsync(testcase_directory, gcs_base_url) logs.log('Synced {count} test cases to {gcs_url}'.format( count=len(testcase_list), gcs_url=gcs_base_url))
def _get_mutator_plugins_from_bucket(): """Returns list of the mutator plugin archives in the mutator plugin storage bucket.""" mutator_plugins_bucket_url = _get_mutator_plugins_bucket_url() if not mutator_plugins_bucket_url: return None return storage.list_blobs(mutator_plugins_bucket_url)
def _limit_corpus_size(corpus_url, size_limit): """Limit number of files in a corpus url.""" files_list = list(storage.list_blobs(corpus_url)) corpus_size = len(files_list) if corpus_size <= size_limit: # Corpus directory size is within limit, no more work to do. return logs.log( 'Limit corpus at {corpus_url} from {corpus_size} to {size_limit}.'.format( corpus_url=corpus_url, corpus_size=corpus_size, size_limit=size_limit)) files_to_delete = random.sample(files_list, corpus_size - size_limit) bucket, _ = storage.get_bucket_name_and_path(corpus_url) for file_to_delete in files_to_delete: path_to_delete = storage.get_cloud_storage_file_path(bucket, file_to_delete) storage.delete(path_to_delete)
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))
def collect_fuzzer_coverage(bucket): """Actual implementation of the fuzzer coverage task.""" url = _latest_report_info_dir(bucket) for project in storage.list_blobs(url, recursive=False): _process_project(project, bucket)