def cli(options, args): detector = DependencyDetector(options) if options.json: listener = JSONDependencyListener(options) else: listener = CLIDependencyListener(options) detector.add_listener(listener) if len(args) > 1: options.multi = True for revspec in args: revs = GitUtils.rev_parse(revspec) if len(revs) > 1: revs = GitUtils.rev_list(revspec) if len(revs) > 1: options.multi = True for rev in revs: detector.find_dependencies(rev) if options.json: print(json.dumps(listener.json(), sort_keys=True, indent=4))
def add_commit(self, commit): """Adds the commit to the commits array if it doesn't already exist, and returns the commit's index in the array. """ sha1 = commit.hex if sha1 in self._commits: return self._commits[sha1] title, separator, body = commit.message.partition("\n") commit = { 'explored': False, 'sha1': sha1, 'name': GitUtils.abbreviate_sha1(sha1), 'describe': GitUtils.describe(sha1), 'refs': GitUtils.refs_to(sha1, self.repo()), 'author_name': commit.author.name, 'author_mail': commit.author.email, 'author_time': commit.author.time, 'author_offset': commit.author.offset, 'committer_name': commit.committer.name, 'committer_mail': commit.committer.email, 'committer_time': commit.committer.time, 'committer_offset': commit.committer.offset, # 'message': commit.message, 'title': title, 'separator': separator, 'body': body.lstrip("\n"), } self._json['commits'].append(commit) self._commits[sha1] = len(self._json['commits']) - 1 return self._commits[sha1]
def cherry_pick(self, sha): GitExplodeUtils.git('cherry-pick', sha) self.update_current_topic(sha) head = GitExplodeUtils.get_head_sha1() self.exploded[sha] = head commit = GitUtils.ref_commit(self.repo, sha) self.logger.debug( "- cherry-picked %s as %s (%s)" % (sha[:8], self.exploded[sha][:8], GitUtils.oneline(commit)))
def __init__(self, repo, base, head, debug, context_lines): self.logger = standard_logger('git-explode', debug) self.debug = debug self.repo = repo self.base = base self.base_commit = GitUtils.ref_commit(repo, base) self.logger.debug("base commit %s is %s" % (base, GitUtils.commit_summary(self.base_commit))) self.head = head self.context_lines = context_lines self.topic_mgr = TopicManager('topic%d', self.logger) # Map commits to their exploded version self.exploded = {}
def get_dependencies(self): """ Detect commit dependency tree, and return a tuple of dicts mapping this in both directions. Note that the dependency tree goes in the reverse direction to the git commit graph, in that the leaves of the dependency tree are the oldest commits, because newer commits depend on older commits :return: (dependencies_from, dependencies_on) """ detector_args = OpenStruct({ 'recurse': True, 'exclude_commits': [self.base], 'debug': self.debug, 'context_lines': self.context_lines, }) detector = DependencyDetector(detector_args, self.repo) listener = ExplodeDependencyListener({}) detector.add_listener(listener) revs = GitUtils.rev_list("%s..%s" % (self.base, self.head)) for rev in revs: try: detector.find_dependencies(rev) except KeyboardInterrupt: pass return (detector.commits, listener.dependencies_from(), listener.dependencies_on())
def get_commit(self, rev): if rev in self.commits: return self.commits[rev] self.commits[rev] = GitUtils.ref_commit(self.repo, rev) return self.commits[rev]
def process_new_dependency(self,dependent, dependent_sha1, dependency, dependency_sha1, path, line_num): if not self.seen_commit(dependency): self.notify_listeners("new_commit", dependency) self.dependencies[dependent_sha1][dependency_sha1] = {} self.notify_listeners("new_dependency", dependent, dependency, path, line_num) self.logger.info( " New dependency %s -> %s via line %s (%s)" % (dependent_sha1[:8], dependency_sha1[:8], line_num, GitUtils.oneline(dependency))) if dependency_sha1 in self.todo_d: self.logger.info( " Dependency on %s via line %s already in TODO" % (dependency_sha1[:8], line_num,)) return if dependency_sha1 in self.done_d: self.logger.info( " Dependency on %s via line %s already done" % (dependency_sha1[:8], line_num,)) return if dependency_sha1 not in self.dependencies: if self.options.recurse: self.todo.append(dependency) self.todo_d[dependency.hex] = True self.logger.info(" + Added %s to TODO" % dependency.hex[:8])
def process_hunk_line(self, dependent, dependent_sha1, parent, path, line, line_to_culprit): self.logger.debug(" ! " + line.rstrip()) m = re.match('^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line) if not m: return dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3) line_num = int(line_num) dependency = self.get_commit(dependency_sha1) line_to_culprit[line_num] = dependency.hex if self.is_excluded(dependency): self.logger.debug( " Excluding dependency %s from line %s (%s)" % (dependency_sha1[:8], line_num, GitUtils.oneline(dependency))) return if dependency_sha1 not in self.dependencies[dependent_sha1]: self.process_new_dependency(dependent, dependent_sha1, dependency, dependency_sha1, path, line_num) self.record_dependency_source(parent, dependent, dependent_sha1, dependency, dependency_sha1, path, line_num, line)
def deps(revspec): detector = DependencyDetector(options) listener = JSONDependencyListener(options) detector.add_listener(listener) if '..' in revspec: try: revisions = GitUtils.rev_list(revspec) except subprocess.CalledProcessError: return json_error(422, 'Invalid revision range', "Could not resolve revision range '%s'" % revspec, revspec=revspec) else: revisions = [revspec] for rev in revisions: try: detector.get_commit(rev) except InvalidCommitish: return json_error(422, 'Invalid revision', "Could not resolve revision '%s'" % rev, rev=rev) detector.find_dependencies(rev) tip_commit = detector.get_commit(revisions[0]) tip_sha1 = tip_commit.hex json = listener.json() json['query'] = { 'revspec': revspec, 'revisions': revisions, 'tip_sha1': tip_sha1, 'tip_abbrev': GitUtils.abbreviate_sha1(tip_sha1), } return jsonify(json)
def explode(self, commits, deps_from, deps_on): """ Walk the dependency tree breadth-first starting with the leaves at the bottom. For each commit, figure out whether it should be exploded :param commits: dict mapping SHA1 hashes to pygit2.Commit objects :param deps_from: dict mapping dependents to dependencies :param deps_on: dict mapping in opposite direction """ todo = self.get_leaves(commits, deps_from) # Each time we explode a commit, we'll remove it from any # dict which is a value of this dict. unexploded_deps_from = copy.deepcopy(deps_from) self.logger.debug("Initial queue of leaves:") for commit in todo: self.logger.debug(' ' + GitUtils.commit_summary(commit)) self.current_branch = None while todo: commit = todo.pop(0) sha = commit.hex self.logger.debug("Exploding %s" % GitUtils.commit_summary(commit)) if unexploded_deps_from[sha]: abort("BUG: unexploded deps from %s" % GitUtils.commit_summary(commit)) deps = deps_from[sha] self.prepare_cherrypick_base(sha, deps, commits) self.cherry_pick(sha) self.queue_new_leaves(todo, commit, commits, deps_on, unexploded_deps_from)
def queue_new_leaves(self, todo, exploded_commit, commits, deps_on, unexploded_deps_from): """When a commit is exploded, there may be other commits in the dependency tree which only had a single dependency on this commit. In that case they have effectively become leaves on the dependency tree of unexploded commits, so they should be added to the explode queue. """ sha1 = exploded_commit.hex for dependent in deps_on[sha1]: del unexploded_deps_from[dependent][sha1] if not unexploded_deps_from[dependent]: new = commits[dependent] self.logger.debug("+ pushed to queue: %s" % GitUtils.commit_summary(new)) todo.insert(0, new)
def __init__(self, options, repo=None, logger=None): self.options = options if logger is None: self.logger = standard_logger(self.__class__.__name__, options.debug) if repo is None: self.repo = GitUtils.get_repo() else: self.repo = repo # Nested dict mapping dependents -> dependencies -> files # causing that dependency -> numbers of lines within that file # causing that dependency. The first two levels form edges in # the dependency graph, and the latter two tell us what caused # those edges. self.dependencies = {} # A TODO list (queue) and dict of dependencies which haven't # yet been recursively followed. Only useful when recursing. self.todo = [] self.todo_d = {} # An ordered list and dict of commits whose dependencies we # have already detected. self.done = [] self.done_d = {} # A cache mapping SHA1s to commit objects self.commits = {} # Memoization for branch_contains() self.branch_contains_cache = {} # Callbacks to be invoked when a new dependency has been # discovered. self.listeners = []
def test_abbreviate_sha1(): sha1 = GitUtils.abbreviate_sha1("HEAD") assert len(sha1) == 7
def register_new_dependent(self, dependent, dependent_sha1): if dependent_sha1 not in self.dependencies: self.logger.info(" New dependent: %s" % GitUtils.commit_summary(dependent)) self.dependencies[dependent_sha1] = {} self.notify_listeners("new_dependent", dependent)
def blame_hunk(self, dependent, parent, path, hunk): """Run git blame on the parts of the hunk which exist in the older commit in the diff. The commits generated by git blame are the commits which the newer commit in the diff depends on, because without the lines from those commits, the hunk would not apply correctly. """ line_range_before = "-%d,%d" % (hunk.old_start, hunk.old_lines) line_range_after = "+%d,%d" % (hunk.new_start, hunk.new_lines) self.logger.debug(" Blaming hunk %s @ %s" % (line_range_before, parent.hex[:8])) if not self.tree_lookup(path, parent): # This is probably because dependent added a new directory # which was not previously in the parent. return cmd = [ 'git', 'blame', '--porcelain', '-L', "%d,+%d" % (hunk.old_start, hunk.old_lines), parent.hex, '--', path ] blame = subprocess.check_output(cmd, universal_newlines=True) dependent_sha1 = dependent.hex if dependent_sha1 not in self.dependencies: self.logger.debug(" New dependent: %s" % GitUtils.commit_summary(dependent)) self.dependencies[dependent_sha1] = {} self.notify_listeners("new_dependent", dependent) line_to_culprit = {} for line in blame.split('\n'): self.logger.debug(" !" + line.rstrip()) m = re.match('^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line) if not m: continue dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3) line_num = int(line_num) dependency = self.get_commit(dependency_sha1) line_to_culprit[line_num] = dependency.hex if self.is_excluded(dependency): self.logger.debug( " Excluding dependency %s from line %s (%s)" % (dependency_sha1[:8], line_num, GitUtils.oneline(dependency))) continue if dependency_sha1 not in self.dependencies[dependent_sha1]: if not self.seen_commit(dependency): self.notify_listeners("new_commit", dependency) self.dependencies[dependent_sha1][dependency_sha1] = {} self.notify_listeners("new_dependency", dependent, dependency, path, line_num) self.logger.debug( " New dependency %s -> %s via line %s (%s)" % (dependent_sha1[:8], dependency_sha1[:8], line_num, GitUtils.oneline(dependency))) if dependency_sha1 in self.todo_d: self.logger.debug( " Dependency on %s via line %s already in TODO" % ( dependency_sha1[:8], line_num, )) continue if dependency_sha1 in self.done_d: self.logger.debug( " Dependency on %s via line %s already done" % ( dependency_sha1[:8], line_num, )) continue if dependency_sha1 not in self.dependencies: if self.options.recurse: self.todo.append(dependency) self.todo_d[dependency.hex] = True self.logger.debug(" + Added %s to TODO" % dependency.hex[:8]) dep_sources = self.dependencies[dependent_sha1][dependency_sha1] if path not in dep_sources: dep_sources[path] = {} self.notify_listeners('new_path', dependent, dependency, path, line_num) if line_num in dep_sources[path]: abort("line %d already found when blaming %s:%s\n" "old:\n %s\n" "new:\n %s" % (line_num, parent.hex[:8], path, dep_sources[path][line_num], line)) dep_sources[path][line_num] = line self.logger.debug(" New line for %s -> %s: %s" % (dependent_sha1[:8], dependency_sha1[:8], line)) self.notify_listeners('new_line', dependent, dependency, path, line_num) diff_format = ' |%8.8s %5s %s%s' hunk_header = '@@ %s %s @@' % (line_range_before, line_range_after) self.logger.debug(diff_format % ('--------', '-----', '', hunk_header)) line_num = hunk.old_start for line in hunk.lines: if "\n\\ No newline at end of file" == line.content.rstrip(): break if line.origin == '+': rev = ln = '' else: rev = line_to_culprit[line_num] ln = line_num line_num += 1 self.logger.debug(diff_format % (rev, ln, line.origin, line.content.rstrip()))
def test_new(): repo = GitUtils.get_repo() exploder = GitExploder(repo, "HEAD~5", "HEAD", False, 1) assert exploder is not None
def main(args): args = parse_args(args) repo = GitUtils.get_repo() exploder = GitExploder(repo, args.base, args.head, args.debug, args.context_lines) exploder.run()