def _compare_and_swap_gcs_dictionary(self, old_content, new_content): """Compare and swap implementation for dictionary stored in GCS. Of course, this function is not atomic, but window for race is acceptably small.""" current_content = storage.read_data(self.gcs_path) if current_content != old_content: return False, current_content storage.write_data(new_content, self.gcs_path) return True, old_content
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory): """Upload test cases from the list to a cloud storage bucket.""" bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return # Only consider test cases in the output directory. We might upload too much # if we search the data directory as well, or have missing resources. # TODO(mbarbella): Support resources in data bundles. testcase_list = [ os.path.relpath(testcase, testcase_directory) for testcase in testcase_list if testcase.startswith(testcase_directory) ] if not testcase_list: return # Bail out if this batch of test cases is too large. directory_size = shell.get_directory_size(testcase_directory) if directory_size >= MAX_TESTCASE_DIRECTORY_SIZE: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(testcase_list), list_gcs_url): return runner.rsync(testcase_directory, gcs_base_url) logs.log('Synced {count} test cases to {gcs_url}'.format( count=len(testcase_list), gcs_url=gcs_base_url))
def update_recommended_dictionary(self, new_dictionary): """Update recommended dictionary stored in GCS with new dictionary elements. Args: new_dictionary: A set of dictionary elements to be added into dictionary. Returns: A number of new elements actually added to the dictionary stored in GCS. """ if environment.is_lib(): return 0 # If the dictionary does not already exist, then directly update it. if not storage.exists(self.gcs_path): storage.write_data('\n'.join(new_dictionary).encode('utf-8'), self.gcs_path) return len(new_dictionary) # Read current version of the dictionary. old_dictionary_data = storage.read_data(self.gcs_path).decode('utf-8') # Use "Compare-and-swap"-like approach to avoid race conditions and also to # avoid having a separate job merging multiple recommended dictionaries. succeeded = False while not succeeded: # If old_dictionary_data is None, there is no dictionary in GCS yet, i.e. # it's empty. Otherwise, we parse it and use it. old_dictionary = set() if old_dictionary_data: old_dictionary = set(old_dictionary_data.splitlines()) # Merge two dictionaries. new_dictionary |= old_dictionary if new_dictionary == old_dictionary: # "New dictionary" elements have been already added to GCS, bail out. return 0 succeeded, old_dictionary_data = self._compare_and_swap_gcs_dictionary( old_dictionary_data, '\n'.join(new_dictionary)) return len(new_dictionary) - len(old_dictionary)
def upload_stats(stats_list, filename=None): """Upload the fuzzer run to the bigquery bucket. Assumes that all the stats given are for the same fuzzer/job run.""" if not stats_list: logs.log_error("Failed to upload fuzzer stats: empty stats.") return if not isinstance(stats_list, list): raise AssertionError bucket_name = big_query.get_bucket() if not bucket_name: logs.log_error("Failed to upload fuzzer stats: missing bucket name.") return kind = stats_list[0].kind fuzzer = stats_list[0].fuzzer # Group all stats for fuzz targets. fuzzer_or_engine_name = get_fuzzer_or_engine_name(fuzzer) if not filename: # Generate a random filename. filename = "%016x" % random.randint(0, (1 << 64) - 1) + ".json" # Handle runs that bleed into the next day. def timestamp_start_of_day(s): return utils.utc_date_to_timestamp( datetime.datetime.utcfromtimestamp(s.timestamp).date()) stats_list.sort(key=lambda s: s.timestamp) for timestamp, stats in itertools.groupby(stats_list, timestamp_start_of_day): upload_data = "\n".join(stat.to_json() for stat in stats) day_path = ("gs:/" + get_gcs_stats_path( kind, fuzzer_or_engine_name, timestamp=timestamp) + filename) if not storage.write_data(upload_data, day_path): logs.log_error("Failed to upload FuzzerRun.")
def upload_to_logs(bucket_name, contents, time=None, fuzzer_name=None, job_type=None, file_extension=None): """Upload file contents to log directory in GCS bucket. Args: bucket_name: Bucket logs are stored in. contents: String containing log to be uploaded. time: A datetime object used to generate filename for the log. fuzzer_name: Name of the fuzzer. If None, gets this from the environment. job_type: Job name. If None, gets this from the environment. file_extension: A string appended to the end of the log filename. A default value is used if None. Returns: The path of the uploaded file and whether the uploaded succeeded. """ if not fuzzer_name: fuzzer_name = environment.get_value('FUZZER_NAME') if not job_type: job_type = environment.get_value('JOB_NAME') log_directory = get_logs_directory(bucket_name, fuzzer_name, job_type) if not time: time = datetime.datetime.utcnow() log_path = 'gs:/' + log_directory + '/' + get_log_relative_path( time, file_extension) if storage.write_data(contents, log_path): logs.log('Uploaded file to logs bucket.', log_path=log_path) else: logs.log_error('Failed to write file to logs bucket.', log_path=log_path)
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in builtin_fuzzers.BUILTIN_FUZZERS: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = 'gs://{bucket}/{blob}'.format(bucket=bucket_name, blob=blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))