def get_first_parents(repo_path): repo = dulwich.repo.Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit # In order to properly work on python 2 and 3 we need some utf magic parents = commit.parents and [_to_str(i) for i in commit.parents] if not parents: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) elif len(parents) == 1 and not on_merge: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if parents[0] not in first_parents: first_parents.append(parents[0]) elif len(parents) > 1 and not on_merge: on_merge = True if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if parents[0] not in first_parents: first_parents.append(parents[0]) elif parents and commit.sha().hexdigest() in first_parents: if parents[0] not in first_parents: first_parents.append(parents[0]) return first_parents
def get_first_parents(repo_path): repo = dulwich.repo.Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit if not commit.parents: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) elif len(commit.parents) == 1 and not on_merge: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) elif len(commit.parents) > 1 and not on_merge: on_merge = True if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) elif commit.parents and commit.sha().hexdigest() in first_parents: if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) return first_parents
def get_first_parents(repo_path): repo = dulwich.repo.Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit # In order to properly work on python 2 and 3 we need some utf magic parents = commit.parents and [i.decode('utf-8') for i in commit.parents] if not parents: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) elif len(parents) == 1 and not on_merge: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if parents[0] not in first_parents: first_parents.append(parents[0]) elif len(parents) > 1 and not on_merge: on_merge = True if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if parents[0] not in first_parents: first_parents.append(parents[0]) elif parents and commit.sha().hexdigest() in first_parents: if parents[0] not in first_parents: first_parents.append(parents[0]) return first_parents
def get_first_parents(repo_path): repo = dulwich.repo.Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit if not commit.parents: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) elif len(commit.parents) == 1 and not on_merge: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) elif len(commit.parents) > 1 and not on_merge: on_merge = True if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) elif commit.parents and commit.sha().hexdigest() in first_parents: if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) if commit.parents: # If this is the case, we have a shallow git clone # which means that we don't have the metadata of the # first's commit parent. LOGGER.debug('This is a shallow git clone,' ' removing the first\'s commit parent.') first_parents.pop() return first_parents
def get_children_per_parent(repo_path): repo = dulwich.repo.Repo(repo_path) children_per_parent = defaultdict(set) for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): for parent in entry.commit.parents: children_per_parent[parent].add(entry.commit.sha().hexdigest()) return children_per_parent
def get_changelog(repo_path, from_commit=None): """ Given a repo path and an option commit/tag/refspec to start from, will get the rpm compatible changelog Args: repo_path (str): path to the git repo from_commit (str): refspec (partial commit hash, tag, branch, full refspec, partial refspec) to start the changelog from Returns: str: Rpm compatible changelog """ repo = dulwich.repo.Repo(repo_path) tags = get_tags(repo) refs = get_refs(repo) changelog = [] maj_version = 0 feat_version = 0 fix_version = 0 start_including = False if from_commit is None: start_including = True for entry in repo.get_walker(reverse=True): commit = entry.commit commit_sha = commit.sha().hexdigest() if commit_sha in tags: maj_version, feat_version = tags[commit_sha].split('.') maj_version = int(maj_version) feat_version = int(feat_version) fix_version = 0 elif MAJOR_HEADER.search(commit.message): maj_version += 1 feat_version = 0 fix_version = 0 elif FEAT_HEADER.search(commit.message): feat_version += 1 fix_version = 0 else: fix_version += 1 version = '%s.%s.%s' % (maj_version, feat_version, fix_version) if ( not start_including and not commit_sha.startswith(from_commit) and not fuzzy_matches_refs(from_commit, refs.get(commit_sha, [])) ): continue start_including = True changelog.append(pretty_commit(commit, version)) return '\n'.join(reversed(changelog))
def build_relatives(repo): children = {} parents = {} heads = {n: s for n, s in repo.get_refs().items() if n.startswith(b'refs/heads/')} for entry in repo.get_walker(include=list(heads.values())): for p in entry.commit.parents: parents.setdefault(entry.commit.id, set()).add(p) children.setdefault(p, set()).add(entry.commit.id) return children, parents
def get_changelog(repo_path, from_commit=None): """ Given a repo path and an option commit/tag/refspec to start from, will get the rpm compatible changelog Args: repo_path (str): path to the git repo from_commit (str): refspec (partial commit hash, tag, branch, full refspec, partial refspec) to start the changelog from Returns: str: Rpm compatible changelog """ repo = dulwich.repo.Repo(repo_path) tags = get_tags(repo) refs = get_refs(repo) changelog = [] maj_version = 0 feat_version = 0 fix_version = 0 start_including = False if from_commit is None: start_including = True for entry in repo.get_walker(reverse=True): commit = entry.commit commit_sha = commit.sha().hexdigest() if commit_sha in tags: maj_version, feat_version = tags[commit_sha].split('.') maj_version = int(maj_version) feat_version = int(feat_version) fix_version = 0 elif MAJOR_HEADER.search(commit.message): maj_version += 1 feat_version = 0 fix_version = 0 elif FEAT_HEADER.search(commit.message): feat_version += 1 fix_version = 0 else: fix_version += 1 version = '%s.%s.%s' % (maj_version, feat_version, fix_version) if (not start_including and not commit_sha.startswith(from_commit) and not fuzzy_matches_refs(from_commit, refs.get(commit_sha, []))): continue start_including = True changelog.append(pretty_commit(commit, version)) return '\n'.join(reversed(changelog))
def get_links(repo): for walk_entry in repo.get_walker(): commit = walk_entry.commit trace = TraceInfo( author = commit.author , committer = commit.committer , commit_id = commit.id , date = commit.commit_time # + commit.commit_time_zone ? , message = commit.message , bug_ids = detect(commit.message) ) yield trace
def faasm_main(): if os.environ.get("PYTHONWASM") == "1": repo_path = "/lib/python3.7/site-packages/pyperformance/benchmarks/data/asyncio.git" else: repo_path = "/usr/local/code/faasm/venv/lib/python3.6/site-packages/pyperformance/benchmarks/data/asyncio.git" if not exists(repo_path): repo_path = "/usr/local/lib/python3.6/dist-packages/pyperformance/benchmarks/data/asyncio.git" repo = dulwich.repo.Repo(repo_path) head = repo.head() # Iterate on all changes on the Git repository for _ in repo.get_walker(head): pass repo.close()
def get_first_parents(repo_path): repo = dulwich.repo.Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit commit_sha = commit.sha().hexdigest().encode('utf-8') if not commit.parents: append_parent(first_parents, commit, add_parent=False) elif len(commit.parents) == 1 and not on_merge: append_parent(first_parents, commit) elif len(commit.parents) > 1 and not on_merge: on_merge = True append_parent(first_parents, commit) elif commit.parents and commit_sha in first_parents: append_parent(first_parents, commit, add_digest=False) return first_parents
def get_root(repo): """ Retrieve the hash of the repo root to uniquely identify the git repository """ # Check if the repository is empty if len(repo.get_refs()) == 0: return None # Get walker needs at least the HEAD ref to be present walker = repo.get_walker() entry = None # Iterate on the lazy iterator to get to the last one for entry in walker: pass assert entry is not None # SHA should always be valid utf-8 return to_utf8(entry.commit.id)
def get_version(repo_path): """ Given a repo will return the version string, according to semantic versioning, counting as non-backwards compatible commit any one with a message header that matches (case insensitive):: sem-ver: .*break.* And as features any commit with a header matching:: sem-ver: feature And counting any other as a bugfix """ repo = dulwich.repo.Repo(repo_path) tags = get_tags(repo) maj_version = 0 feat_version = 0 fix_version = 0 for entry in repo.get_walker(reverse=True): commit = entry.commit commit_sha = commit.sha().hexdigest() if commit_sha in tags: maj_version, feat_version = tags[commit_sha].split('.') maj_version = int(maj_version) feat_version = int(feat_version) fix_version = 0 elif MAJOR_HEADER.search(commit.message): maj_version += 1 feat_version = 0 fix_version = 0 elif FEAT_HEADER.search(commit.message): feat_version += 1 fix_version = 0 else: fix_version += 1 return '%s.%s.%s' % (maj_version, feat_version, fix_version)
def releases(git_dir): repo = dulwich.repo.Repo(git_dir) releases = [] tags = [ repo.get_object(repo.refs['refs/tags/%s' % tag]) for tag in repo.refs.keys(base='refs/tags/') ] tag_commits = {} for tag in tags: if tag.type_name == 'tag': tag_commits.setdefault(tag.object[1], []).append(tag) for walker in repo.get_walker(repo.refs['HEAD']): if walker.commit.id not in tag_commits: continue for tag in tag_commits[walker.commit.id]: m = re.match(r'beaker-([\d.]*)$', tag.name) if m: break # also check for tito tags, used up to 0.14.1 m = re.match(r'beaker-([\d.]*)-1$', tag.name) if m: break if not m: continue version = m.group(1) name, email = re.match(r'(.*) <(.*)>', tag.tagger).groups() timestamp = datetime.datetime.fromtimestamp( tag.tag_time, tzoffset(None, tag.tag_timezone)) releases.append( Release(version=m.group(1), timestamp=timestamp, name=name.decode('utf-8'), email=email, tag=tag.name)) releases = sorted(releases, key=lambda r: r.timestamp, reverse=True) # skip anything prior to 0.9 releases = list(takewhile(lambda r: r.version != '0.8.99', releases)) return sorted(releases, key=lambda r: r.version_tuple, reverse=True)
def get_first_parents(repo_path): repo = dulwich.repo.Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit if not commit.parents: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) elif len(commit.parents) == 1 and not on_merge: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) elif len(commit.parents) > 1 and not on_merge: on_merge = True if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) elif commit.parents and commit.sha().hexdigest() in first_parents: if commit.parents[0] not in first_parents: first_parents.append(commit.parents[0]) if commit.parents: # If this is the case, we have a shallow git clone # which means that we don't have the metadata of the # first's commit parent. LOGGER.debug( 'This is a shallow git clone,' ' removing the first\'s commit parent.' ) first_parents.pop() return first_parents
def iter_all_commits(repo): # iterate on all changes on the Git repository for entry in repo.get_walker(head): pass
projects = load_projects() for project in projects: dest_fn = os.path.join(project.full_path, 'issue2git.csv') if os.path.exists(dest_fn): continue if project.name == 'eclipse': continue repos = load_repos(project) golds = load_goldsets(project) ids = set(i for i,g in golds) i2g = dict.fromkeys(ids) for k in i2g: i2g[k] = set() for repo in repos: #b = re.compile('BOOKKEEPER-([\d]+):') #b = re.compile('ZOOKEEPER-([\d]+)') b = re.compile('%s-([\d]+)' % project.name.upper()) for entry in repo.get_walker(): a = entry.commit for issue in b.findall(a.message): if issue in i2g: i2g[issue].add(a.id) with open(dest_fn, 'w') as f: w = csv.writer(f) for issue, gits in i2g.items(): if gits: w.writerow([issue] + list(gits))
projects = load_projects() for project in projects: dest_fn = os.path.join(project.full_path, 'issue2git.csv') if os.path.exists(dest_fn): continue if project.name == 'eclipse': continue repos = load_repos(project) golds = load_goldsets(project) ids = set(i for i, g in golds) i2g = dict.fromkeys(ids) for k in i2g: i2g[k] = set() for repo in repos: #b = re.compile('BOOKKEEPER-([\d]+):') #b = re.compile('ZOOKEEPER-([\d]+)') b = re.compile('%s-([\d]+)' % project.name.upper()) for entry in repo.get_walker(): a = entry.commit for issue in b.findall(a.message): if issue in i2g: i2g[issue].add(a.id) with open(dest_fn, 'w') as f: w = csv.writer(f) for issue, gits in i2g.items(): if gits: w.writerow([issue] + list(gits))