def test_parse_hash(self): self.assertEqual( '1a2e41e3f7cdf51b1e1d02880cfb65eab9327ef2', Commit._parse_hash('1a2e41e3f7cdf51b1e1d02880cfb65eab9327ef2')) self.assertEqual( 'c3bd784f8b88bd03f64467ddd3304ed8be28acbe', Commit._parse_hash('C3BD784F8B88BD03F64467DDD3304ED8BE28ACBE')) self.assertEqual(None, Commit._parse_hash('invalid hash')) self.assertEqual( None, Commit._parse_hash('c3bd784f8b88bd03f64467ddd3304ed8be28acbe1'))
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True): # Only git-svn checkouts can convert revisions to fully qualified commits if revision and not self.is_svn: raise self.Exception( 'This git checkout does not support SVN revisions') # Determine the hash for a provided Subversion revision elif revision: if hash: raise ValueError('Cannot define both hash and revision') revision = Commit._parse_revision(revision, do_assert=True) revision_log = run( [self.executable(), 'svn', 'find-rev', 'r{}'.format(revision)], cwd=self.root_path, capture_output=True, encoding='utf-8', timeout=3, ) if revision_log.returncode: raise self.Exception( "Failed to retrieve commit information for 'r{}'".format( revision)) hash = revision_log.stdout.rstrip() if not hash: raise self.Exception("Failed to find 'r{}'".format(revision)) default_branch = self.default_branch parsed_branch_point = None log_format = ['-1'] if include_log else ['-1', '--format=short'] # Determine the `git log` output and branch for a given identifier if identifier is not None: if revision: raise ValueError('Cannot define both revision and identifier') if hash: raise ValueError('Cannot define both hash and identifier') if tag: raise ValueError('Cannot define both tag and identifier') parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier( identifier, do_assert=True) if parsed_branch: if branch and branch != parsed_branch: raise ValueError( "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})" .format( branch, parsed_branch, ), ) branch = parsed_branch baseline = branch or 'HEAD' is_default = baseline == default_branch if baseline == 'HEAD': is_default = default_branch in self._branches_for(baseline) if is_default and parsed_branch_point: raise self.Exception( 'Cannot provide a branch point for a commit on the default branch' ) base_count = self._commit_count( baseline if is_default else '{}..{}'. format(default_branch, baseline)) if identifier > base_count: raise self.Exception( 'Identifier {} cannot be found on the specified branch in the current checkout' .format(identifier)) log = run( [ self.executable(), 'log', '{}~{}'.format( branch or 'HEAD', base_count - identifier) ] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8', ) if log.returncode: raise self.Exception( "Failed to retrieve commit information for 'i{}@{}'". format(identifier, branch or 'HEAD')) # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier if identifier < 0 and is_default: raise self.Exception( 'Illegal negative identifier on the default branch') if identifier < 0: identifier = None # Determine the `git log` output for a given branch or tag elif branch or tag: if hash: raise ValueError('Cannot define both tag/branch and hash') if branch and tag: raise ValueError('Cannot define both tag and branch') log = run([self.executable(), 'log', branch or tag] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8') if log.returncode: raise self.Exception( "Failed to retrieve commit information for '{}'".format( branch or tag)) # Determine the `git log` output for a given hash else: hash = Commit._parse_hash(hash, do_assert=True) log = run([self.executable(), 'log', hash or 'HEAD'] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8') if log.returncode: raise self.Exception( "Failed to retrieve commit information for '{}'".format( hash or 'HEAD')) # Fully define the hash from the `git log` output match = self.GIT_COMMIT.match(log.stdout.splitlines()[0]) if not match: raise self.Exception('Invalid commit hash in git log') hash = match.group('hash') # A commit is often on multiple branches, the canonical branch is the one with the highest priority branch = self.prioritize_branches(self._branches_for(hash)) # Compute the identifier if the function did not receive one and we were asked to if not identifier and include_identifier: identifier = self._commit_count( hash if branch == default_branch else '{}..{}'.format(default_branch, hash)) # Only compute the branch point we're on something other than the default branch branch_point = None if not include_identifier or branch == default_branch else self._commit_count( hash) - identifier if branch_point and parsed_branch_point and branch_point != parsed_branch_point: raise ValueError( "Provided 'branch_point' does not match branch point of specified branch" ) # Check the commit log for a git-svn revision logcontent = '\n'.join(line[4:] for line in log.stdout.splitlines()[4:]) matches = self.GIT_SVN_REVISION.findall(logcontent) revision = int(matches[-1].split('@')[0]) if matches else None # We only care about when a commit was commited commit_time = run( [self.executable(), 'show', '-s', '--format=%ct', hash], cwd=self.root_path, capture_output=True, encoding='utf-8', ) if commit_time.returncode: raise self.Exception( 'Failed to retrieve commit time for {}'.format(hash)) timestamp = int(commit_time.stdout.lstrip()) # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git, # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a # zero-indexed "order" within it's timestamp. order = 0 while not identifier or order + 1 < identifier + (branch_point or 0): commit_time = run( [ self.executable(), 'show', '-s', '--format=%ct', '{}~{}'.format(hash, order + 1) ], cwd=self.root_path, capture_output=True, encoding='utf-8', ) if commit_time.returncode: break if int(commit_time.stdout.lstrip()) != timestamp: break order += 1 return Commit( repository_id=self.id, hash=hash, revision=revision, identifier=identifier if include_identifier else None, branch_point=branch_point, branch=branch, timestamp=timestamp, order=order, author=Contributor.from_scm_log(log.stdout.splitlines()[1], self.contributors), message=logcontent if include_log else None, )
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True): if revision: raise self.Exception('Cannot map revisions to commits on GitHub') # Determine the commit data and branch for a given identifier if identifier is not None: if revision: raise ValueError('Cannot define both revision and identifier') if hash: raise ValueError('Cannot define both hash and identifier') if tag: raise ValueError('Cannot define both tag and identifier') parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(identifier, do_assert=True) if parsed_branch: if branch and branch != parsed_branch: raise ValueError( "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})".format( branch, parsed_branch, ), ) branch = parsed_branch branch = branch or self.default_branch is_default = branch == self.default_branch if is_default and parsed_branch_point: raise self.Exception('Cannot provide a branch point for a commit on the default branch') if is_default: base_count, base_ref = self._count_for_ref(ref=self.default_branch) else: _, base_ref = self._count_for_ref(ref=branch) base_count = self._difference(self.default_branch, base_ref) if identifier > base_count: raise self.Exception('Identifier {} cannot be found on {}'.format(identifier, branch)) # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier if identifier < 0 and is_default: raise self.Exception('Illegal negative identifier on the default branch') commit_data = self.request('commits/{}~{}'.format(base_ref, base_count - identifier)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}@{}'".format(identifier, branch or 'HEAD')) # If an identifier is negative, unset it so we re-compute before constructing the commit. if identifier <= 0: identifier = None # Determine the commit data for a given branch or tag elif branch or tag: if hash: raise ValueError('Cannot define both tag/branch and hash') if branch and tag: raise ValueError('Cannot define both tag and branch') commit_data = self.request('commits/{}'.format(branch or tag)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag)) # Determine the commit data for a given hash else: hash = Commit._parse_hash(hash, do_assert=True) commit_data = self.request('commits/{}'.format(hash or self.default_branch)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD')) # A commit is often on multiple branches, the canonical branch is the one with the highest priority branches = self._branches_for(commit_data['sha']) if branches: branch = self.prioritize_branches(branches) else: # A commit not on any branches cannot have an identifier identifier = None branch = None # Define identifiers on default branch branch_point = None if include_identifier and branch and branch == self.default_branch: if not identifier: result = self._count_for_ref(ref=commit_data['sha']) if not result: raise Exception('{} {}'.format(result, commit_data['sha'])) identifier, _ = result # Define identifiers on branches diverged from the default branch elif include_identifier and branch: if not identifier: identifier = self._difference(self.default_branch, commit_data['sha']) branch_point = self._count_for_ref(ref=commit_data['sha'])[0] - identifier # Check the commit log for a git-svn revision matches = self.GIT_SVN_REVISION.findall(commit_data['commit']['message']) revision = int(matches[-1].split('@')[0]) if matches else None email_match = self.EMAIL_RE.match(commit_data['commit']['author']['email']) timestamp = int(calendar.timegm(datetime.strptime( commit_data['commit']['committer']['date'], '%Y-%m-%dT%H:%M:%SZ', ).timetuple())) # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git, # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a # zero-indexed "order" within it's timestamp. order = 0 lhash = commit_data['sha'] while lhash: response = self.request('commits', paginate=False, params=dict(sha=lhash, per_page=20)) if len(response) <= 1: break for c in response: if lhash == c['sha']: continue parent_timestamp = int(calendar.timegm(datetime.strptime( c['commit']['committer']['date'], '%Y-%m-%dT%H:%M:%SZ', ).timetuple())) if parent_timestamp != timestamp: lhash = None break lhash = c['sha'] order += 1 return Commit( repository_id=self.id, hash=commit_data['sha'], revision=revision, branch_point=branch_point, identifier=identifier if include_identifier else None, branch=branch, timestamp=timestamp, order=order, author=self.contributors.create( commit_data['commit']['author']['name'], email_match.group('email') if email_match else None, ), message=commit_data['commit']['message'] if include_log else None, )
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True): if revision: raise self.Exception('Cannot map revisions to commits on BitBucket') # Determine the commit data and branch for a given identifier if identifier is not None: if revision: raise ValueError('Cannot define both revision and identifier') if hash: raise ValueError('Cannot define both hash and identifier') if tag: raise ValueError('Cannot define both tag and identifier') parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(identifier, do_assert=True) if parsed_branch: if branch and branch != parsed_branch: raise ValueError( "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})".format( branch, parsed_branch, ), ) branch = parsed_branch branch = branch or self.default_branch is_default = branch == self.default_branch if is_default and parsed_branch_point: raise self.Exception('Cannot provide a branch point for a commit on the default branch') commit_data = self.request('commits/{}'.format(branch), params=dict(limit=1)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(branch)) base_ref = commit_data['id'] if is_default: base_count = self._distance(base_ref) else: base_count = self._distance(base_ref, magnitude=256, condition=lambda val: self.default_branch not in val) if identifier > base_count: raise self.Exception('Identifier {} cannot be found on {}'.format(identifier, branch)) # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier if identifier < 0 and is_default: raise self.Exception('Illegal negative identifier on the default branch') commit_data = self.request('commits/{}~{}'.format(base_ref, base_count - identifier)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}@{}'".format(identifier, branch or 'HEAD')) # If an identifier is negative, unset it so we re-compute before constructing the commit. if identifier <= 0: identifier = None # Determine the commit data for a given branch or tag elif branch or tag: if hash: raise ValueError('Cannot define both tag/branch and hash') if branch and tag: raise ValueError('Cannot define both tag and branch') commit_data = self.request('commits/{}'.format(branch or tag)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag)) # Determine the commit data for a given hash else: hash = Commit._parse_hash(hash, do_assert=True) commit_data = self.request('commits/{}'.format(hash or self.default_branch)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD')) # A commit is often on multiple branches, the canonical branch is the one with the highest priority branches = self._branches_for(commit_data['id']) if branches: branch = self.prioritize_branches(branches) else: # A commit not on any branches cannot have an identifier identifier = None branch = None # Define identifiers on default branch branch_point = None if include_identifier and branch and branch == self.default_branch: if not identifier: identifier = self._distance(commit_data['id']) # Define identifiers on branches diverged from the default branch elif include_identifier and branch: if not identifier: identifier = self._distance(commit_data['id'], magnitude=256, condition=lambda val: self.default_branch not in val) branch_point = self._distance(commit_data['id']) - identifier # Check the commit log for a git-svn revision matches = self.GIT_SVN_REVISION.findall(commit_data['message']) revision = int(matches[-1].split('@')[0]) if matches else None # Comparing commits in different repositories involves comparing timestamps. This is problematic because it git, # it's possible for a series of commits to share a commit time. To handle this case, we assign each commit a # zero-indexed "order" within it's timestamp. timestamp = int(commit_data['committerTimestamp'] / 1000) order = 0 while not identifier or order + 1 < identifier + (branch_point or 0): response = self.request('commits/{}'.format('{}~{}'.format(commit_data['id'], order + 1))) if not response: break parent_timestamp = int(response['committerTimestamp'] / 1000) if parent_timestamp != timestamp: break order += 1 return Commit( repository_id=self.id, hash=commit_data['id'], revision=revision, branch_point=branch_point, identifier=identifier if include_identifier else None, branch=branch, timestamp=timestamp, order=order, author=self.contributors.create( commit_data.get('committer', {}).get('displayName', None), commit_data.get('committer', {}).get('emailAddress', None), ), message=commit_data['message'] if include_log else None, )
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True, include_identifier=True): if revision: raise self.Exception('Cannot map revisions to commits on GitHub') if identifier is not None: if revision: raise ValueError('Cannot define both revision and identifier') if hash: raise ValueError('Cannot define both hash and identifier') if tag: raise ValueError('Cannot define both tag and identifier') parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier(identifier, do_assert=True) if parsed_branch: if branch and branch != parsed_branch: raise ValueError( "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})".format( branch, parsed_branch, ), ) branch = parsed_branch branch = branch or self.default_branch is_default = branch == self.default_branch if is_default and parsed_branch_point: raise self.Exception('Cannot provide a branch point for a commit on the default branch') if is_default: base_count, base_ref = self._count_for_ref(ref=self.default_branch) else: _, base_ref = self._count_for_ref(ref=branch) base_count = self._difference(self.default_branch, base_ref) if identifier > base_count: raise self.Exception('Identifier {} cannot be found on {}'.format(identifier, branch)) # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier if identifier < 0 and is_default: raise self.Exception('Illegal negative identifier on the default branch') commit_data = self.request('commits/{}~{}'.format(base_ref, base_count - identifier)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}@{}'".format(identifier, branch or 'HEAD')) # If an identifier is negative, unset it so we re-compute before constructing the commit. if identifier <= 0: identifier = None elif branch or tag: if hash: raise ValueError('Cannot define both tag/branch and hash') if branch and tag: raise ValueError('Cannot define both tag and branch') commit_data = self.request('commits/{}'.format(branch or tag)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(branch or tag)) else: hash = Commit._parse_hash(hash, do_assert=True) commit_data = self.request('commits/{}'.format(hash or self.default_branch)) if not commit_data: raise self.Exception("Failed to retrieve commit information for '{}'".format(hash or 'HEAD')) branches = self._branches_for(commit_data['sha']) if branches: branch = self.prioritize_branches(branches) else: # A commit not on any branches cannot have an identifier identifier = None branch = None branch_point = None if include_identifier and branch and branch == self.default_branch: if not identifier: result = self._count_for_ref(ref=commit_data['sha']) if not result: raise Exception('{} {}'.format(result, commit_data['sha'])) identifier, _ = result elif include_identifier and branch: if not identifier: identifier = self._difference(self.default_branch, commit_data['sha']) branch_point = self._count_for_ref(ref=commit_data['sha'])[0] - identifier matches = self.GIT_SVN_REVISION.findall(commit_data['commit']['message']) revision = int(matches[-1].split('@')[0]) if matches else None email_match = self.EMAIL_RE.match(commit_data['commit']['author']['email']) timestamp = int(calendar.timegm(datetime.strptime( commit_data['commit']['committer']['date'], '%Y-%m-%dT%H:%M:%SZ', ).timetuple())) order = 0 while not identifier or order + 1 < identifier + (branch_point or 0): response = self.request('commits/{}'.format('{}~{}'.format(commit_data['sha'], order + 1))) if not response: break parent_timestamp = int(calendar.timegm(datetime.strptime( response['commit']['committer']['date'], '%Y-%m-%dT%H:%M:%SZ', ).timetuple())) if parent_timestamp != timestamp: break order += 1 return Commit( repository_id=self.id, hash=commit_data['sha'], revision=revision, branch_point=branch_point, identifier=identifier if include_identifier else None, branch=branch, timestamp=timestamp, order=order, author=self.contributors.create( commit_data['commit']['author']['name'], email_match.group('email') if email_match else None, ), message=commit_data['commit']['message'] if include_log else None, )
def commit(self, hash=None, revision=None, identifier=None, branch=None, tag=None, include_log=True): if revision and not self.is_svn: raise self.Exception( 'This git checkout does not support SVN revisions') elif revision: if hash: raise ValueError('Cannot define both hash and revision') revision = Commit._parse_revision(revision, do_assert=True) revision_log = run( [self.executable(), 'svn', 'find-rev', 'r{}'.format(revision)], cwd=self.root_path, capture_output=True, encoding='utf-8', timeout=3, ) if revision_log.returncode: raise self.Exception( "Failed to retrieve commit information for 'r{}'".format( revision)) hash = revision_log.stdout.rstrip() if not hash: raise self.Exception("Failed to find 'r{}'".format(revision)) default_branch = self.default_branch parsed_branch_point = None log_format = ['-1'] if include_log else ['-1', '--format=short'] if identifier is not None: if revision: raise ValueError('Cannot define both revision and identifier') if hash: raise ValueError('Cannot define both hash and identifier') if tag: raise ValueError('Cannot define both tag and identifier') parsed_branch_point, identifier, parsed_branch = Commit._parse_identifier( identifier, do_assert=True) if parsed_branch: if branch and branch != parsed_branch: raise ValueError( "Caller passed both 'branch' and 'identifier', but specified different branches ({} and {})" .format( branch, parsed_branch, ), ) branch = parsed_branch baseline = branch or 'HEAD' is_default = baseline == default_branch if baseline == 'HEAD': is_default = default_branch in self._branches_for(baseline) if is_default and parsed_branch_point: raise self.Exception( 'Cannot provide a branch point for a commit on the default branch' ) base_count = self._commit_count( baseline if is_default else '{}..{}'. format(default_branch, baseline)) if identifier > base_count: raise self.Exception( 'Identifier {} cannot be found on the specified branch in the current checkout' .format(identifier)) log = run( [ self.executable(), 'log', '{}~{}'.format( branch or 'HEAD', base_count - identifier) ] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8', ) if log.returncode: raise self.Exception( "Failed to retrieve commit information for 'i{}@{}'". format(identifier, branch or 'HEAD')) # Negative identifiers are actually commits on the default branch, we will need to re-compute the identifier if identifier < 0 and is_default: raise self.Exception( 'Illegal negative identifier on the default branch') if identifier < 0: identifier = None elif branch or tag: if hash: raise ValueError('Cannot define both tag/branch and hash') if branch and tag: raise ValueError('Cannot define both tag and branch') log = run([self.executable(), 'log', branch or tag] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8') if log.returncode: raise self.Exception( "Failed to retrieve commit information for '{}'".format( branch or tag)) else: hash = Commit._parse_hash(hash, do_assert=True) log = run([self.executable(), 'log', hash or 'HEAD'] + log_format, cwd=self.root_path, capture_output=True, encoding='utf-8') if log.returncode: raise self.Exception( "Failed to retrieve commit information for '{}'".format( hash or 'HEAD')) match = self.GIT_COMMIT.match(log.stdout.splitlines()[0]) if not match: raise self.Exception('Invalid commit hash in git log') hash = match.group('hash') branch = self.prioritize_branches(self._branches_for(hash)) if not identifier: identifier = self._commit_count( hash if branch == default_branch else '{}..{}'.format(default_branch, hash)) branch_point = None if branch == default_branch else self._commit_count( hash) - identifier if branch_point and parsed_branch_point and branch_point != parsed_branch_point: raise ValueError( "Provided 'branch_point' does not match branch point of specified branch" ) match = self.GIT_SVN_REVISION.search(log.stdout) revision = int(match.group('revision')) if match else None commit_time = run( [self.executable(), 'show', '-s', '--format=%ct', hash], cwd=self.root_path, capture_output=True, encoding='utf-8', ) if commit_time.returncode: raise self.Exception( 'Failed to retrieve commit time for {}'.format(hash)) return Commit( hash=hash, revision=revision, identifier=identifier, branch_point=branch_point, branch=branch, timestamp=int(commit_time.stdout.lstrip()), author=Contributor.from_scm_log(log.stdout.splitlines()[1], self.contributors), message='\n'.join(line[4:] for line in log.stdout.splitlines()[4:]) if include_log else None, )