def GetBranchRevisionMap(gitRepoPath): # 1. Compare all of the different branch's commit timestamps. # 2. Select the earliest commit and iterate over commits (from all branches). # 3. Store each processed commit hash against the hash of the "tree" object to which it # points. You can get the tree hashes via the `git cat-file -p <commit-hash>` command. # 4. When processing any new commit, look up to see if the tree object has already been # cataloged in step 3. # YES -> Figure out if it makes sense for these branches to be stitched together. # It is recommended that you consider the committer, the commit time, the author # and the author time of each commit. # NO -> Continue. if git.isRepo(gitRepoPath): repo = git.open(gitRepoPath) branchList = repo.branch_list() branchRevMap = {} # For each branch in the branch list get the commit history which doesn't share # ancestry with any other branch. # See 'git rev-list' # Example command: # git rev-list --reverse my-branch ^other-branch ^another-branch for current_branch in branchList: # Get the commits that are only on this branch. revListArgs = [current_branch.name] for other_branch in branchList: if current_branch != other_branch: revListArgs.append('^{0}'.format(other_branch.name)) git_cmd = u'git rev-list --reverse' for arg in revListArgs: git_cmd = u'{0} {1}'.format(git_cmd, arg) cmd = git_cmd.split() try: revlist = subprocess.check_output(git.to_utf8(c) for c in cmd) except subprocess.CalledProcessError as e: print(u'Failed to execute command: {0}'.format(git_cmd)) raise e #print(u'Executed: {0}'.format(git_cmd)) revlist = revlist.split() # For each of the commits returned by the git rev-list command get the tree hash # to which they point and store it in a map against the tree hash. # For each commit map it to its tree by using the git cat-file command. for rev in revlist: commit_info = CatFileCommit(rev) commit_info[u'branch'] = current_branch tree_hash = commit_info[u'object'][u'hash'] #print(u'commit: {0}, tree: {1}, branch: {2}'.format(rev, tree_hash, current_branch.name)) if not branchRevMap.has_key(tree_hash): branchRevMap[tree_hash] = [] branchRevMap[tree_hash].append(commit_info) return branchRevMap return None
def GetBranchRevisionMap(gitRepoPath): # 1. Compare all of the different branch's commit timestamps. # 2. Select the earliest commit and iterate over commits (from all branches). # 3. Store each processed commit hash against the hash of the "tree" object to which it # points. You can get the tree hashes via the `git cat-file -p <commit-hash>` command. # 4. When processing any new commit, look up to see if the tree object has already been # cataloged in step 3. # YES -> Figure out if it makes sense for these branches to be stitched together. # It is recommended that you consider the committer, the commit time, the author # and the author time of each commit. # NO -> Continue. if git.isRepo(gitRepoPath): repo = git.open(gitRepoPath) branchList = repo.branch_list() branchRevMap = {} # For each branch in the branch list get the commit history which doesn't share # ancestry with any other branch. # See 'git rev-list' # Example command: # git rev-list --reverse my-branch ^other-branch ^another-branch for current_branch in branchList: # Get the commits that are only on this branch. revListArgs = [ current_branch.name ] for other_branch in branchList: if current_branch != other_branch: revListArgs.append('^{0}'.format(other_branch.name)) git_cmd = u'git rev-list --reverse' for arg in revListArgs: git_cmd = u'{0} {1}'.format(git_cmd, arg) cmd = git_cmd.split() try: revlist = subprocess.check_output(git.to_utf8(c) for c in cmd) except subprocess.CalledProcessError as e: print(u'Failed to execute command: {0}'.format(git_cmd)) raise e #print(u'Executed: {0}'.format(git_cmd)) revlist = revlist.split() # For each of the commits returned by the git rev-list command get the tree hash # to which they point and store it in a map against the tree hash. # For each commit map it to its tree by using the git cat-file command. for rev in revlist: commit_info = CatFileCommit(rev) commit_info[u'branch'] = current_branch tree_hash = commit_info[u'object'][u'hash'] #print(u'commit: {0}, tree: {1}, branch: {2}'.format(rev, tree_hash, current_branch.name)) if not branchRevMap.has_key(tree_hash): branchRevMap[tree_hash] = [] branchRevMap[tree_hash].append(commit_info) return branchRevMap return None
def CatFileCommit(commit_hash): refRe = re.compile(r'(?P<type>tree|blob) (?P<hash>[a-fA-F0-9]+)') parentRe = re.compile(r'parent (?P<hash>[a-fA-F0-9]+)') authorCommitterRe = re.compile( r'(?P<who>author|committer) (?P<name>\w.*) <(?P<email>.*)> (?P<time>[0-9]+) (?P<timezone>[\+-]?[0-9]+)' ) git_cmd = u'git cat-file -p {0}'.format(commit_hash) cmd = git_cmd.split() try: cat_file_output = subprocess.check_output(git.to_utf8(c) for c in cmd) except subprocess.CalledProcessError as e: print(u'Failed to execute command: {0}'.format(git_cmd)) raise e commit_info = {} commit_info[u'hash'] = commit_hash cat_file_lines = cat_file_output.split(git.to_utf8(u'\n')) # The first line is the object to which the commit points, parse it. nextIndex = 0 line = cat_file_lines[nextIndex] m = refRe.match(line) obj = {} if m: obj[u'type'] = m.group(u'type') obj[u'hash'] = m.group(u'hash') commit_info[u'object'] = obj # The next few lines are the parent/s. Consume them all. nextIndex += 1 parents = [] for i in xrange(nextIndex, len(cat_file_lines)): m = parentRe.match(cat_file_lines[i]) if m: parents.append(m.group(u'hash')) else: nextIndex = i break if len(parents) > 0: commit_info[u'parents'] = parents # The next two lines are the author followed by the committer. line = cat_file_lines[nextIndex] m = authorCommitterRe.match(line) if m: author = {} author[u'name'] = m.group(u'name') author[u'email'] = m.group(u'email') author[u'time'] = m.group(u'time') author[u'timezone'] = m.group(u'timezone') commit_info[u'author'] = author nextIndex += 1 line = cat_file_lines[nextIndex] m = authorCommitterRe.match(line) if m: committer = {} committer[u'name'] = m.group(u'name') committer[u'email'] = m.group(u'email') committer[u'time'] = m.group(u'time') committer[u'timezone'] = m.group(u'timezone') commit_info[u'committer'] = committer nextIndex += 1 if len(cat_file_lines[nextIndex]) == 0: nextIndex += 1 comment = git.to_utf8(u'\n').join(cat_file_lines[nextIndex:]) commit_info[u'comment'] = comment return commit_info
def CatFileCommit(commit_hash): refRe = re.compile(r'(?P<type>tree|blob) (?P<hash>[a-fA-F0-9]+)') parentRe = re.compile(r'parent (?P<hash>[a-fA-F0-9]+)') authorCommitterRe = re.compile(r'(?P<who>author|committer) (?P<name>\w.*) <(?P<email>.*)> (?P<time>[0-9]+) (?P<timezone>[\+-]?[0-9]+)') git_cmd = u'git cat-file -p {0}'.format(commit_hash) cmd = git_cmd.split() try: cat_file_output = subprocess.check_output(git.to_utf8(c) for c in cmd) except subprocess.CalledProcessError as e: print(u'Failed to execute command: {0}'.format(git_cmd)) raise e commit_info = {} commit_info[u'hash'] = commit_hash cat_file_lines = cat_file_output.split(git.to_utf8(u'\n')) # The first line is the object to which the commit points, parse it. nextIndex = 0 line = cat_file_lines[nextIndex] m = refRe.match(line) obj = {} if m: obj[u'type'] = m.group(u'type') obj[u'hash'] = m.group(u'hash') commit_info[u'object'] = obj # The next few lines are the parent/s. Consume them all. nextIndex += 1 parents = [] for i in xrange(nextIndex, len(cat_file_lines)): m = parentRe.match(cat_file_lines[i]) if m: parents.append(m.group(u'hash')) else: nextIndex = i break if len(parents) > 0: commit_info[u'parents'] = parents # The next two lines are the author followed by the committer. line = cat_file_lines[nextIndex] m = authorCommitterRe.match(line) if m: author = {} author[u'name'] = m.group(u'name') author[u'email'] = m.group(u'email') author[u'time'] = m.group(u'time') author[u'timezone'] = m.group(u'timezone') commit_info[u'author'] = author nextIndex += 1 line = cat_file_lines[nextIndex] m = authorCommitterRe.match(line) if m: committer = {} committer[u'name'] = m.group(u'name') committer[u'email'] = m.group(u'email') committer[u'time'] = m.group(u'time') committer[u'timezone'] = m.group(u'timezone') commit_info[u'committer'] = committer nextIndex += 1 if len(cat_file_lines[nextIndex]) == 0: nextIndex += 1 comment = git.to_utf8(u'\n').join(cat_file_lines[nextIndex:]) commit_info[u'comment'] = comment return commit_info