def _do_update(self, source_repo, repo, vulnerability, yaml_path): """Process updates on a vulnerability.""" logging.info(f'Processing update for vulnerability {vulnerability.id}') package_repo_dir = tempfile.TemporaryDirectory() package_repo_url = None package_repo = None added_ranges = set() added_versions = set() try: for affected_range in vulnerability.affects.ranges: # Go through existing provided ranges to find additional ranges (via # cherrypicks and branches). if affected_range.type != vulnerability_pb2.AffectedRangeNew.GIT: continue current_repo_url = affected_range.repo if current_repo_url != package_repo_url: # Different repo from previous one. package_repo_dir.cleanup() package_repo_dir = tempfile.TemporaryDirectory() package_repo_url = current_repo_url package_repo = osv.clone_with_retries( package_repo_url, package_repo_dir.name) result = osv.get_affected(package_repo, affected_range.introduced, affected_range.fixed) new_ranges, new_versions = osv.update_vulnerability( vulnerability, package_repo_url, result) # Collect newly added ranges and versions. added_ranges.update(new_ranges) added_versions.update(new_versions) finally: package_repo_dir.cleanup() if added_ranges or added_versions: if not self._push_new_ranges_and_versions( source_repo, repo, vulnerability, yaml_path, added_ranges, added_versions): logging.warning( f'Discarding changes for {vulnerability.id} due to conflicts.' ) return else: # Nothing to do. logging.info( f'No range/version changes for vulnerability {vulnerability.id}.' ) # Update datastore with new information. bug = osv.Bug.get_by_id(vulnerability.id) if not bug: # TODO(ochang): Create new entry if needed. logging.error('Failed to find bug with ID %s', vulnerability.id) return bug.update_from_vulnerability(vulnerability) bug.put()
def _source_update(self, message): """Source update.""" source = message.attributes['source'] path = message.attributes['path'] original_sha256 = message.attributes['original_sha256'] source_repo = osv.get_source_repository(source) repo = osv.clone_with_retries( source_repo.repo_url, os.path.join(self._sources_dir, source), callbacks=self._git_callbacks(source_repo)) yaml_path = os.path.join(osv.repo_path(repo), path) current_sha256 = osv.sha256(yaml_path) if current_sha256 != original_sha256: logging.warning( 'sha256sum of %s no longer matches (expected=%s vs current=%s).', path, original_sha256, current_sha256) return try: vulnerability = osv.parse_vulnerability(yaml_path) except Exception as e: logging.error('Failed to parse vulnerability %s: %s', yaml_path, e) return self._do_update(source_repo, repo, vulnerability, yaml_path, original_sha256)
def process_oss_fuzz(self, oss_fuzz_source): """Process OSS-Fuzz source data.""" # Export OSS-Fuzz Vulnerability data into source repository. # OSS-Fuzz data is first imported via a special Pub/Sub pipeline into OSV. # This data needs to be dumped into a publicly accessible/editable place for # manual/human editing if required. # # This then becomes the source of truth where any edits are imported back # into OSV. with tempfile.TemporaryDirectory() as tmp_dir: callbacks = GitRemoteCallback(oss_fuzz_source.repo_username, self._ssh_key_public_path, self._ssh_key_private_path) repo = osv.clone_with_retries(oss_fuzz_source.repo_url, tmp_dir, callbacks=callbacks) if not repo: raise RuntimeError('Failed to clone source repo') vulnerabilities_path = os.path.join( tmp_dir, oss_fuzz_source.directory_path or '') # TODO(ochang): Make this more efficient by recording whether or not we # imported already in Datastore. for bug in osv.Bug.query( osv.Bug.status == osv.BugStatus.PROCESSED): source_name, source_id = osv.parse_source_id(bug.source_id) if source_name != oss_fuzz_source.name: continue project_dir = os.path.join(vulnerabilities_path, bug.project) os.makedirs(project_dir, exist_ok=True) vulnerability_path = os.path.join(project_dir, source_id + '.yaml') if os.path.exists(vulnerability_path): continue logging.info('Writing %s', bug.key.id()) with open(vulnerability_path, 'w') as handle: data = json_format.MessageToDict(bug.to_vulnerability()) yaml.dump(data, handle, sort_keys=False, Dumper=self.YamlDumper) # Commit Vulnerability changes back to the oss-fuzz source repository. logging.info('Commiting and pushing changes') repo.index.add_all() repo.index.write() tree = repo.index.write_tree() author = _git_author() repo.create_commit(repo.head.name, author, author, 'Import from OSS-Fuzz', tree, [repo.head.peel().oid]) # TODO(ochang): Rebase and retry if necessary. repo.remotes['origin'].push([repo.head.name], callbacks=callbacks)
def _source_update(self, message): """Source update.""" source = message.attributes['source'] path = message.attributes['path'] source_repo = osv.get_source_repository(source) repo = osv.clone_with_retries( source_repo.repo_url, os.path.join(self._sources_dir, source), callbacks=self._git_callbacks(source_repo)) yaml_path = os.path.join(osv.repo_path(repo), path) vulnerability = osv.parse_vulnerability(yaml_path) self._do_update(source_repo, repo, vulnerability, yaml_path)
def checkout(self, source_repo): """Check out a source repo.""" checkout_dir = os.path.join(self._work_dir, source_repo.name) if os.path.exists(checkout_dir): # Already exists, reset and checkout latest revision. try: return self._use_existing_checkout(source_repo, checkout_dir) except Exception as e: # Failed to re-use existing checkout. Delete it and start over. logging.error('Failed to load existing checkout: %s', e) shutil.rmtree(checkout_dir) return osv.clone_with_retries( source_repo.repo_url, checkout_dir, callbacks=self._git_callbacks(source_repo))
def process_impact_task(source_id, message): """Process an impact task.""" logging.info('Processing impact task for %s', source_id) regress_result = ndb.Key(osv.RegressResult, source_id).get() if not regress_result: logging.error('Missing RegressResult for %s', source_id) return fix_result = ndb.Key(osv.FixResult, source_id).get() if not fix_result: logging.warning('Missing FixResult for %s', source_id) fix_result = osv.FixResult() # Check if there is an existing Bug for the same source, but with a different # allocated ID. This shouldn't happen. allocated_bug_id = message.attributes['allocated_id'] existing_bug = osv.Bug.query(osv.Bug.source_id == source_id).get() if existing_bug and existing_bug.key.id() != allocated_bug_id: logging.error('Bug entry already exists for %s with a different ID %s', source_id, existing_bug.key.id()) return if existing_bug and existing_bug.status == osv.BugStatus.INVALID: logging.warning('Bug %s already marked as invalid.', existing_bug.key.id()) return if existing_bug: public = existing_bug.public else: raise osv.ImpactError('Task requested without Bug allocated.') # TODO(ochang): Handle changing repo types? e.g. SVN -> Git. repo_url = regress_result.repo_url or fix_result.repo_url if not repo_url: raise osv.ImpactError('No repo_url set') issue_id = fix_result.issue_id or regress_result.issue_id fix_commit = fix_result.commit with tempfile.TemporaryDirectory() as tmp_dir: repo = osv.clone_with_retries(repo_url, tmp_dir) # If not a precise fix commit, try to find the exact one by going through # commit messages (oss-fuzz only). if source_id.startswith(SOURCE_PREFIX) and ':' in fix_commit: start_commit, end_commit = fix_commit.split(':') commit = find_oss_fuzz_fix_via_commit(repo, start_commit, end_commit, source_id, issue_id) if commit: logging.info('Found exact fix commit %s via commit message (oss-fuzz)', commit) fix_commit = commit # Actually compute the affected commits/tags. result = osv.get_affected(repo, regress_result.commit, fix_commit) logging.info('Found affected %s', ', '.join(result.tags)) # If the range resolved to a single commit, simplify it. if len(result.fix_commits) == 1: fix_commit = result.fix_commits[0] elif not result.fix_commits: # Not fixed. fix_commit = '' if len(result.regress_commits) == 1: regress_commit = result.regress_commits[0] else: regress_commit = regress_result.commit project = fix_result.project or regress_result.project ecosystem = fix_result.ecosystem or regress_result.ecosystem summary = fix_result.summary or regress_result.summary details = fix_result.details or regress_result.details severity = fix_result.severity or regress_result.severity reference_urls = fix_result.reference_urls or regress_result.reference_urls update_affected_commits(allocated_bug_id, result, project, ecosystem, public) existing_bug.repo_url = repo_url existing_bug.fixed = fix_commit existing_bug.regressed = regress_commit existing_bug.affected = result.tags existing_bug.affected_fuzzy = osv.normalize_tags(result.tags) existing_bug.confidence = result.confidence existing_bug.issue_id = issue_id existing_bug.project = project existing_bug.ecosystem = ecosystem existing_bug.summary = summary existing_bug.details = details existing_bug.status = osv.BugStatus.PROCESSED existing_bug.severity = severity existing_bug.reference_urls = reference_urls existing_bug.additional_commit_ranges = [] # Don't display additional ranges for imprecise commits, as they can be # confusing. if ':' in existing_bug.fixed or ':' in existing_bug.regressed: existing_bug.put() return def _sort_key(value): # Allow sorting of None values. return (value[0] or '', value[1] or '') for introduced_in, fixed_in in sorted(result.affected_ranges, key=_sort_key): if (introduced_in == existing_bug.regressed and (fixed_in or '') == existing_bug.fixed): # Don't include the main range. continue existing_bug.additional_commit_ranges.append( osv.CommitRange(introduced_in=introduced_in, fixed_in=fixed_in)) existing_bug.put()
def _do_update(self, source_repo, repo, vulnerability, yaml_path, original_sha256): """Process updates on a vulnerability.""" logging.info('Processing update for vulnerability %s', vulnerability.id) package_repo_dir = tempfile.TemporaryDirectory() package_repo_url = None package_repo = None bug = osv.Bug.get_by_id(vulnerability.id) if bug: fix_result = osv.FixResult.get_by_id(bug.source_id) if fix_result: add_fix_information(vulnerability, bug, fix_result) range_collectors = collections.defaultdict(osv.RangeCollector) versions_with_bug = set() versions_with_fix = set() commits = set() try: for affected_range in vulnerability.affects.ranges: if affected_range.type != vulnerability_pb2.AffectedRange.GIT: continue range_collectors[affected_range.repo].add( affected_range.introduced, affected_range.fixed) for affected_range in vulnerability.affects.ranges: # Go through existing provided ranges to find additional ranges (via # cherrypicks and branches). if affected_range.type != vulnerability_pb2.AffectedRange.GIT: continue current_repo_url = affected_range.repo if current_repo_url != package_repo_url: # Different repo from previous one. package_repo_dir.cleanup() package_repo_dir = tempfile.TemporaryDirectory() package_repo_url = current_repo_url package_repo = osv.clone_with_retries( package_repo_url, package_repo_dir.name) result = osv.get_affected(package_repo, affected_range.introduced, affected_range.fixed) for introduced, fixed in result.affected_ranges: range_collectors[current_repo_url].add(introduced, fixed) versions_with_fix.update(result.tags_with_fix) versions_with_bug.update(result.tags_with_bug) commits.update(result.commits) finally: package_repo_dir.cleanup() if self._push_new_ranges_and_versions( source_repo, repo, vulnerability, yaml_path, original_sha256, range_collectors, list(versions_with_bug - versions_with_fix)): logging.info('Updated range/versions for vulnerability %s.', vulnerability.id) else: logging.warning('Discarding changes for %s due to conflicts.', vulnerability.id) return # Update datastore with new information. bug = osv.Bug.get_by_id(vulnerability.id) if not bug: # TODO(ochang): Create new entry if needed. logging.error('Failed to find bug with ID %s', vulnerability.id) return bug.update_from_vulnerability(vulnerability) bug.put() osv.update_affected_commits(bug.key.id(), commits, bug.project, bug.ecosystem, bug.public)
def process_impact_task(source_id, message): """Process an impact task.""" logging.info('Processing impact task for %s', source_id) regress_result = ndb.Key(osv.RegressResult, source_id).get() if not regress_result: logging.error('Missing RegressResult for %s', source_id) return fix_result = ndb.Key(osv.FixResult, source_id).get() if not fix_result: logging.warning('Missing FixResult for %s', source_id) fix_result = osv.FixResult() # Check if there is an existing Bug for the same source, but with a different # allocated ID. This shouldn't happen. allocated_bug_id = message.attributes['allocated_id'] existing_bug = osv.Bug.query(osv.Bug.source_id == source_id).get() if existing_bug and existing_bug.key.id() != allocated_bug_id: logging.error('Bug entry already exists for %s with a different ID %s', source_id, existing_bug.key.id()) return if existing_bug and existing_bug.status == osv.BugStatus.INVALID: logging.warning('Bug %s already marked as invalid.', existing_bug.key.id()) return if existing_bug: public = existing_bug.public else: raise osv.ImpactError('Task requested without Bug allocated.') repo_url = regress_result.repo_url or fix_result.repo_url if not repo_url: raise osv.ImpactError('No repo_url set') # Always populate Bug attributes, even if the remainder of the analysis fails. # This does not mark the Bug as being valid. set_bug_attributes(existing_bug, regress_result, fix_result) existing_bug.put() issue_id = fix_result.issue_id or regress_result.issue_id fix_commit = fix_result.commit with tempfile.TemporaryDirectory() as tmp_dir: repo = osv.clone_with_retries(repo_url, tmp_dir) # If not a precise fix commit, try to find the exact one by going through # commit messages (oss-fuzz only). if source_id.startswith(SOURCE_PREFIX) and ':' in fix_commit: start_commit, end_commit = fix_commit.split(':') commit = find_oss_fuzz_fix_via_commit(repo, start_commit, end_commit, source_id, issue_id) if commit: logging.info( 'Found exact fix commit %s via commit message (oss-fuzz)', commit) fix_commit = commit # Actually compute the affected commits/tags. repo_analyzer = osv.RepoAnalyzer() result = repo_analyzer.get_affected(repo, regress_result.commit, fix_commit) affected_tags = sorted(list(result.tags)) logging.info('Found affected %s', ', '.join(affected_tags)) # If the range resolved to a single commit, simplify it. if len(result.fix_commits) == 1: fix_commit = result.fix_commits[0] elif not result.fix_commits: # Not fixed. fix_commit = '' if (len(result.regress_commits) == 1 and osv.UNKNOWN_COMMIT not in regress_result.commit): regress_commit = result.regress_commits[0] else: regress_commit = regress_result.commit project = fix_result.project or regress_result.project ecosystem = fix_result.ecosystem or regress_result.ecosystem osv.update_affected_commits(allocated_bug_id, result.commits, project, ecosystem, public) affected_tags = sorted(list(result.tags)) existing_bug.fixed = fix_commit existing_bug.regressed = regress_commit existing_bug.affected = affected_tags existing_bug.affected_fuzzy = osv.normalize_tags(affected_tags) existing_bug.status = osv.BugStatus.PROCESSED # For the AffectedRange, use the first commit in the regress commit range, and # the last commit in the fix commit range. introduced = result.regress_commits[0] if result.regress_commits else '' fixed = result.fix_commits[-1] if result.fix_commits else '' existing_bug.affected_ranges = [ osv.AffectedRange(type='GIT', repo_url=repo_url, introduced=introduced, fixed=fixed), ] # Expose range data in `database_specific`. database_specific = {} if ':' in existing_bug.regressed: database_specific['introduced_range'] = existing_bug.regressed if ':' in existing_bug.fixed: database_specific['fixed_range'] = existing_bug.fixed if database_specific: existing_bug.database_specific = database_specific # Don't display additional ranges for imprecise commits, as they can be # confusing. if ':' in existing_bug.fixed or ':' in existing_bug.regressed: existing_bug.put() return def _sort_key(value): # Allow sorting of None values. return (value[0] or '', value[1] or '') for introduced_in, fixed_in in sorted(result.affected_ranges, key=_sort_key): if not fixed_in: fixed_in = '' if (introduced_in == existing_bug.regressed and fixed_in == existing_bug.fixed): # Don't repeat the main range. continue existing_bug.affected_ranges.append( osv.AffectedRange(type='GIT', repo_url=repo_url, introduced=introduced_in, fixed=fixed_in)) existing_bug.put()
def _do_update(self, source_repo, repo, vulnerability, yaml_path, relative_path, original_sha256): """Process updates on a vulnerability.""" logging.info('Processing update for vulnerability %s', vulnerability.id) package_repo_dir = tempfile.TemporaryDirectory() package_repo_url = None package_repo = None bug = osv.Bug.get_by_id(vulnerability.id) if bug: fix_result = osv.FixResult.get_by_id(bug.source_id) if fix_result: add_fix_information(vulnerability, bug, fix_result) # Repo -> Git range collectors range_collectors = collections.defaultdict(osv.RangeCollector) versions_with_bug = set() versions_with_fix = set() commits = set() try: for affected_range in vulnerability.affects.ranges: if affected_range.type != vulnerability_pb2.AffectedRange.GIT: continue # Convert empty values ('') to None. introduced = affected_range.introduced or None fixed = affected_range.fixed or None range_collectors[affected_range.repo].add(introduced, fixed) for affected_range in vulnerability.affects.ranges: # Go through existing provided ranges to find additional ranges (via # cherrypicks and branches). if affected_range.type != vulnerability_pb2.AffectedRange.GIT: continue current_repo_url = affected_range.repo if current_repo_url != package_repo_url: # Different repo from previous one. package_repo_dir.cleanup() package_repo_dir = tempfile.TemporaryDirectory() package_repo_url = current_repo_url package_repo = osv.clone_with_retries( package_repo_url, package_repo_dir.name) result = osv.get_affected(package_repo, affected_range.introduced, affected_range.fixed) for introduced, fixed in result.affected_ranges: range_collectors[current_repo_url].add(introduced, fixed) versions_with_fix.update(result.tags_with_fix) versions_with_bug.update(result.tags_with_bug) commits.update(result.commits) finally: package_repo_dir.cleanup() # Enumerate ECOSYSTEM and SEMVER ranges. versions = self._enumerate_versions(vulnerability.package.name, vulnerability.package.ecosystem, vulnerability.affects.ranges) # Add additional versions derived from tags. versions.extend(versions_with_bug - versions_with_fix) if self._push_new_ranges_and_versions(source_repo, repo, vulnerability, yaml_path, original_sha256, range_collectors, versions): logging.info('Updated range/versions for vulnerability %s.', vulnerability.id) else: logging.warning('Discarding changes for %s due to conflicts.', vulnerability.id) return # Update datastore with new information. bug = osv.Bug.get_by_id(vulnerability.id) if not bug: if source_repo.name == 'oss-fuzz': logging.warning('%s not found for OSS-Fuzz source.', vulnerability.id) return bug = osv.Bug(id=vulnerability.id, source_id=f'{source_repo.name}:{relative_path}', timestamp=osv.utcnow(), status=osv.BugStatus.PROCESSED, source_of_truth=osv.SourceOfTruth.SOURCE_REPO) bug.update_from_vulnerability(vulnerability) bug.put() osv.update_affected_commits(bug.key.id(), commits, bug.project, bug.ecosystem, bug.public)