def _read_to_bytesio(self, gcs_path): """Return a bytesio representing a GCS object.""" data = storage.read_data(gcs_path) if not data: raise helpers.EarlyExitException( 'Failed to read uploaded archive.', 500) return io.BytesIO(data)
def _compare_and_swap_gcs_dictionary(self, old_content, new_content): """Compare and swap implementation for dictionary stored in GCS. Of course, this function is not atomic, but window for race is acceptably small.""" current_content = storage.read_data(self.gcs_path).decode('utf-8') if current_content != old_content: return False, current_content storage.write_data(new_content.encode('utf-8'), self.gcs_path) return True, old_content
def get_introspector_index(): """Return introspector projects status""" if storage.exists(INTROSPECTOR_INDEX_JSON_URL): introspector_index = json.loads( storage.read_data(INTROSPECTOR_INDEX_JSON_URL)) else: introspector_index = {} logs.log('Loaded introspector status: %d' % len(introspector_index)) return introspector_index
def _read_json(url): """Returns a JSON obejct loaded from the given GCS url.""" data = storage.read_data(url) result = None try: result = json.loads(data) except Exception as e: logs.log_warn('Empty or malformed code coverage JSON (%s): %s.' % (url, str(e))) return result
def _get_url_content(url): """Read a potentially base64-encoded resource from the given URL.""" if url.startswith(storage.GS_PREFIX): # Fetch a GCS path with authentication. url_data = storage.read_data(url) if url_data is None: return None url_content = url_data.decode('utf-8') else: # Fetch a regular url without authentication. url_content = utils.fetch_url(url) # Urls on googlesource.com return file data as base64 encoded to avoid # cross-site scripting attacks. If the requested url contains |format=text|, # then the output is base64 encoded. So, decode it first. if url_content and url.endswith('format=text'): url_content = base64.b64decode(url_content) return url_content
def update_recommended_dictionary(self, new_dictionary): """Update recommended dictionary stored in GCS with new dictionary elements. Args: new_dictionary: A set of dictionary elements to be added into dictionary. Returns: A number of new elements actually added to the dictionary stored in GCS. """ if environment.is_lib(): return 0 # If the dictionary does not already exist, then directly update it. if not storage.exists(self.gcs_path): storage.write_data('\n'.join(new_dictionary).encode('utf-8'), self.gcs_path) return len(new_dictionary) # Read current version of the dictionary. old_dictionary_data = storage.read_data(self.gcs_path).decode('utf-8') # Use "Compare-and-swap"-like approach to avoid race conditions and also to # avoid having a separate job merging multiple recommended dictionaries. succeeded = False while not succeeded: # If old_dictionary_data is None, there is no dictionary in GCS yet, i.e. # it's empty. Otherwise, we parse it and use it. old_dictionary = set() if old_dictionary_data: old_dictionary = set(old_dictionary_data.splitlines()) # Merge two dictionaries. new_dictionary |= old_dictionary if new_dictionary == old_dictionary: # "New dictionary" elements have been already added to GCS, bail out. return 0 succeeded, old_dictionary_data = self._compare_and_swap_gcs_dictionary( old_dictionary_data, '\n'.join(new_dictionary)) return len(new_dictionary) - len(old_dictionary)
def get_projects_from_gcs(gcs_url): """Get projects from GCS path.""" data = json.loads(storage.read_data(gcs_url)) return [(project['name'], project) for project in data['projects']]
def get_remote_source_revision(source_manifest_url): """Get remote revision. We refactor this method out, so that we can mock it.""" return storage.read_data(source_manifest_url).decode('utf-8').strip()
def upload_testcases_if_needed(fuzzer_name, testcase_list, testcase_directory, data_directory): """Upload test cases from the list to a cloud storage bucket.""" # Since builtin fuzzers have a coverage minimized corpus, no need to upload # test case samples for them. if fuzzer_name in fuzzing.ENGINES: return bucket_name = local_config.ProjectConfig().get( 'coverage.fuzzer-testcases.bucket') if not bucket_name: return files_list = [] has_testcases_in_testcase_directory = False has_testcases_in_data_directory = False for testcase_path in testcase_list: if testcase_path.startswith(testcase_directory): files_list.append( os.path.relpath(testcase_path, testcase_directory)) has_testcases_in_testcase_directory = True elif testcase_path.startswith(data_directory): files_list.append(os.path.relpath(testcase_path, data_directory)) has_testcases_in_data_directory = True if not files_list: return formatted_date = str(utils.utcnow().date()) gcs_base_url = 'gs://{bucket_name}/{date}/{fuzzer_name}/'.format( bucket_name=bucket_name, date=formatted_date, fuzzer_name=fuzzer_name) runner = gsutil.GSUtilRunner() batch_directory_blobs = storage.list_blobs(gcs_base_url) total_testcases = 0 for blob in batch_directory_blobs: if not blob.endswith(LIST_FILE_BASENAME): continue list_gcs_url = storage.get_cloud_storage_file_path(bucket_name, blob) data = storage.read_data(list_gcs_url) if not data: logs.log_error( 'Read no data from test case list at {gcs_url}'.format( gcs_url=list_gcs_url)) continue total_testcases += len(data.splitlines()) # If we've already uploaded enough test cases for this fuzzer today, return. if total_testcases >= TESTCASES_PER_DAY: return # Cap the number of files. testcases_limit = min(len(files_list), TESTCASES_PER_DAY - total_testcases) files_list = files_list[:testcases_limit] # Upload each batch of tests to its own unique sub-bucket. identifier = environment.get_value('BOT_NAME') + str(utils.utcnow()) gcs_base_url += utils.string_hash(identifier) list_gcs_url = gcs_base_url + '/' + LIST_FILE_BASENAME if not storage.write_data('\n'.join(files_list).encode('utf-8'), list_gcs_url): return if has_testcases_in_testcase_directory: # Sync everything in |testcase_directory| since it is fuzzer-generated. runner.rsync(testcase_directory, gcs_base_url) if has_testcases_in_data_directory: # Sync all fuzzer generated testcase in data bundle directory. runner.rsync(data_directory, gcs_base_url, exclusion_pattern=('(?!.*{fuzz_prefix})'.format( fuzz_prefix=testcase_manager.FUZZ_PREFIX))) # Sync all possible resource dependencies as a best effort. It matches # |resources-| prefix that a fuzzer can use to indicate resources. Also, it # matches resources directory that Chromium web_tests use for dependencies. runner.rsync(data_directory, gcs_base_url, exclusion_pattern='(?!.*resource)') logs.log('Synced {count} test cases to {gcs_url}.'.format( count=len(files_list), gcs_url=gcs_base_url))
def _get_config_dict(url): """Read configs from a json and return them as a dict""" url_data = storage.read_data(url) if not url_data: raise BuildOverrideError(OVERRIDE_CONFIG_NOT_READ_ERROR.format(url)) return json.loads(url_data)