def diff_status(): # get latest commits of repos git_folder = app.config['GIT_REPOS_FOLDER'] rep = Repo(git_folder + '/terraform/') repowlkr = rep.get_walker(max_entries=1) lastfcommit = next(iter(repowlkr)).commit modules_repo = Repo(git_folder + '/terraform-cognite-modules') repowlkr_modules = modules_repo.get_walker(max_entries=1) lastfcommit_modules = next(iter(repowlkr_modules)).commit all_files = porcelain.ls_files(rep) state_map = {} diff_module_map = {} diff_utils.set_state_map(state_map, all_files, lastfcommit) count = 0 # For now, we are comparing everything against Greenfield. We assume Greenfield is the most up to date project for folder_path, modules in state_map['cognitedata-greenfield'].items(): for module, module_info in modules.items(): for full_module_path, git_ref in module_info.items(): dirfnm = full_module_path.encode('utf-8') repowlkr_subpath = modules_repo.get_walker(paths=[dirfnm]) repowlkr = modules_repo.get_walker() iterator = iter(repowlkr) iterator_subpath = iter(repowlkr_subpath) all_commits = [] subpath_commits = [] diff_utils.create_all_commits_list(iterator, all_commits) diff_utils.create_subpath_commits_list(iterator_subpath, subpath_commits) greenfield_commit = diff_utils.get_commit_in_subpath(git_ref, all_commits, subpath_commits) for project in state_map: if project != 'cognitedata-greenfield': project_commit = None if folder_path in state_map[project]: if module in state_map[project][folder_path]: project_commit = project + '_commit' project_commit = diff_utils.get_commit_in_subpath(state_map[project][folder_path][module][full_module_path], all_commits, subpath_commits) else: continue else: continue if project_commit is None: continue diff_utils.set_diff_module_map(project_commit, project, greenfield_commit, folder_path, module, full_module_path, subpath_commits, diff_module_map, count) count += 1 cache.set('diff_module_map', diff_module_map) return json.dumps(diff_module_map)
def list_all_contributors (self): tmp = 1 tot = len(self.repos) all_contribs = [] for repo in self.repos: print >> sys.stderr, "[%d/%d Analyzing %s]" % (tmp, tot, repo) tmp += 1 repo = Repo(repo) master = repo.get_refs()['refs/heads/master'] for i in repo.get_walker ([master]): if "<" in i.commit.author: split = i.commit.author.split("<") author = split[0] email = split[1] author = author.strip () email = email.strip () email = email[:-1] else: author = i.commit.author email = "" all_contribs.append((author, email)) del repo tmp = [] for c in all_contribs: if c in tmp: continue tmp.append(c) return tmp
def get_first_parents(repo_path: str) -> List[str]: repo = Repo(repo_path) #: these are the commits that are parents of more than one other commit first_parents: List[str] = [] on_merge = False for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): commit = entry.commit # In order to properly work on python 2 and 3 we need some utf magic parents = commit.parents and [_to_str(i) for i in commit.parents] if not parents: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) elif len(parents) == 1 and not on_merge: if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if parents[0] not in first_parents: first_parents.append(parents[0]) elif len(parents) > 1 and not on_merge: on_merge = True if commit.sha().hexdigest() not in first_parents: first_parents.append(commit.sha().hexdigest()) if parents[0] not in first_parents: first_parents.append(parents[0]) elif parents and commit.sha().hexdigest() in first_parents: if parents[0] not in first_parents: first_parents.append(parents[0]) return first_parents
class Git(): """ object that holds the git repository """ def __init__(self): self.repo_path = user_data_dir(appname, appauthor) self.files_under_version_controll = ['config.json', 'data.json'] # initialize repo if it doesn't exist try: self.repo = Repo(self.repo_path) except NotGitRepository: # create repo if not os.path.exists(self.repo_path): try: os.makedirs(self.repo_path) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise Repo.init(self.repo_path) self.repo = Repo(self.repo_path) self.commit('initial commit') def commit(self, message): """ commits the current status of files_under_version_controll :param message: str; commit message """ self.repo.stage(self.files_under_version_controll) self.repo.do_commit(str.encode(message), str.encode('nextSongs')) def get_current_head(self): """ get sha as bytes of current head :return: bytes; sha1 checksum of current head """ return self.repo.head() def get_commits(self): """ generates a list of last commits :return: list-of-dulwich.objects.Commit """ commits = [] for i in self.repo.get_walker(): commits.append(i.commit) return reversed( sorted( commits, key=lambda x: datetime.datetime.fromtimestamp(x.author_time))) def restore(self, commit): """ does a hard reset to a given commit :param commit: list-of-dulwich.objects.Commit; commit to reset to """ porcelain.reset(self.repo, 'hard', str.encode(commit.sha().hexdigest())) self.commit("Restored setting and data.") Config.read_config()
def get_commit(repo: Repo, path: Path) -> Tuple[float, str]: try: paths = [bytes(path)] if path.name else None walker = repo.get_walker(paths=paths, follow=True, reverse=True) commit = next(iter(walker)).commit return (commit.author_time, re.sub(" <.*", "", commit.author.decode())) except (KeyError, StopIteration): return (0.0, "")
def get_children_per_parent(repo_path: str) -> DefaultDict[str, Set[str]]: repo = Repo(repo_path) children_per_parent: DefaultDict[str, Set[str]] = defaultdict(set) for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO): for parent in entry.commit.parents: children_per_parent[_to_str(parent)].add( entry.commit.sha().hexdigest()) return children_per_parent
def branch_has_change(branch, change, repo_path): repo = Repo(repo_path) if not branch.startswith('refs/heads/'): branch = 'refs/heads/' + branch branch = repo.refs[branch] msg = '\nChange-Id: ' + change matches = (True for parent in repo.get_walker(include=[branch]) if msg in parent.commit.message) is_in = next(matches, False) return is_in
def _find_date_boundaries (self): for repo in self.repos: repo = Repo(repo) master = repo.get_refs()['refs/heads/master'] for i in repo.get_walker ([master]): if self.date_oldest == None or self.date_oldest > i.commit.commit_time: self.date_oldest = i.commit.commit_time if self.date_newest == None or self.date_newest < i.commit.commit_time: self.date_newest = i.commit.commit_time del i del repo
def _git_commit_list(self): """ Generate an in-order list of commits """ _repo = Repo(self.config['top_dir']) commits = [] for entry in _repo.get_walker(order=walk.ORDER_DATE): commits.append(entry.commit.id) return commits
def branch_has_change(branch, change, repo_path): repo = Repo(repo_path) if not branch.startswith('refs/heads/'): branch = 'refs/heads/' + branch branch = repo.refs[branch] msg = '\nChange-Id: ' + change matches = ( True for parent in repo.get_walker(include=[branch]) if msg in parent.commit.message ) is_in = next(matches, False) return is_in
def analyze(self): # ToDo add support for older file versions for root, dirs, files in os.walk(self.path): for f in files: # maybe support other files in the future if not f.endswith('.py'): continue cleaned_path = os.path.join(root, f).replace(self.path, '') self.job.meta['current_file'] = cleaned_path self.job.save() with open(os.path.join(root, f), 'r') as source_file: self.run_tests(source_file.read(), cleaned_path) if self.previous: try: r = Repo(self.path) except: return for root, dirs, files in os.walk(self.path): for f in files: if not f.endswith('.py'): continue cleaned_path = os.path.join(root, f).replace(self.path, '') self.job.meta['current_file'] = cleaned_path self.job.save() walker = r.get_walker(paths=[cleaned_path[1:]]) commits = iter(walker) first = True for commit in commits: if first: first = False continue try: source = self.get_file(r, r[commit.commit.id].tree, cleaned_path[1:]) self.run_tests(source, cleaned_path, True, commit.commit.id) except KeyError: # known dulwich error # @FixThis pass shutil.rmtree(self.path)
def load_git(path, revision): # Thanks to Jelmer Vernooij for spelling this one out for me :-D repo = Repo(path) rev = revision.encode("ascii") for r in repo.get_walker(): if r.commit.id.startswith(rev): rev = r.commit.id break menu = porcelain.get_object_by_path(repo, "menu", rev) all = {} for name, mode, object_id in menu.iteritems(): text = str(repo[object_id].data, "utf-8") check_indents(text, name) all[name] = json.loads(text) return all
class ManifestWalker: """ Walk all branches for a manifest repository and return key info and the contents of each commit; this walker moves forward in Git history """ def __init__(self, manifest_dir, latest_sha): """Initialize the repository connection and encode latest SHAs""" self.repo = Repo(manifest_dir) self.latest_sha = [sha.encode('utf-8') for sha in latest_sha] def walk(self): """ Find all branches and do a full walk from a given commit, history forward, returning key information and contents of each commit """ branches = [ self.repo.get_object(self.repo.refs[ref]) for ref in self.repo.refs.keys() if ref.startswith(b'refs/remotes') ] walker = self.repo.get_walker(include=list( set([branch.id for branch in branches])), exclude=self.latest_sha, reverse=True) for entry in walker: changes = entry.changes() # Skip any commit that doesn't have exactly one change # (Zero is a merge commit, more than one is a multi-file # commit) if len(changes) != 1: continue change = changes[0] yield ((change.new.path, entry.commit), self.repo.get_object(change.new.sha).as_pretty_string())
def commit_history(repo_key, branch): repo = Repo(settings.REPOS[repo_key]) #get all the branches and set the name branch in a ref list (don't #add the selected one, this will be added sepparetly in the template) references = [] selected_branch = branch for ref, sha in repo.get_refs().iteritems(): #get the name of the branch without the pefix if (LOCAL_BRANCH_PREFIX in ref): references.append(ref.replace(LOCAL_BRANCH_PREFIX, '', 1)) #Get the branch walker walker = repo.get_walker(include = [repo.get_refs()[LOCAL_BRANCH_PREFIX+branch], ]) #Start getting all the commits from the branch commits = [] commits_per_day = [] previous_commit_time = None #Group commits by day (I use list instead of a dict because the list is ordered already, so I don't need to sort the dict) for i in walker: commit = i.commit commit_time = filters.convert_unix_time_filter(commit.commit_time, '%d %b %Y') #if is new or like the previous one time, then add to the list, if not then save the list and create a new one if (previous_commit_time is None) or (commit_time == previous_commit_time): commits_per_day.append(commit) else: commits.append(commits_per_day) commits_per_day = [commit,] previous_commit_time = commit_time #Add last ones commits.append(commits_per_day) return render_template('commit-history.html', commits=commits, repo_key=repo_key, references = references, selected_branch=selected_branch)
def build_stats_by_periods (self, periods, filter_fn=None): assert (len(periods) > 0) assert (reduce(lambda x,y: x and y, map(lambda x: isinstance(x,int), periods))) lower = self.date_oldest upper = self.date_newest periods.sort() periods = dict.fromkeys(periods, []) tmp = 1 tot = len(self.repos) for repo in self.repos: print >> sys.stderr, "[%d/%d Analyzing %s]" % (tmp, tot, repo) tmp += 1 repo = Repo(repo) master = repo.get_refs()['refs/heads/master'] for i in repo.get_walker ([master]): keys = periods.keys() keys.sort() lower = keys[0] upper = keys[-1] if i.commit.commit_time < lower or i.commit.commit_time > upper: continue if filter_fn != None and not filter_fn (i): continue period = self._find_period (periods.keys(), i.commit.commit_time) author = i.commit.author.split("<")[0].strip() periods[period].append(author) del i del repo for period in periods.keys(): periods[period] = self._plain_to_count(periods[period]) return periods
def get_latest_commits(git_folder, module_state_map): """Gets latest commit in /terraform and sets module_state_map Args: git_folder: A string of the repo path module_state_map: An empty map Returns: A nested dictionary of all terraform modules being used by all projects, path name within that project and path to that specific module. See diff_utils.set_module_state_map for example """ rep = Repo(git_folder + '/terraform/') repowlkr = rep.get_walker(max_entries=1) lastfcommit = next(iter(repowlkr)).commit all_files = porcelain.ls_files(rep) module_state_map = cache.get('module_state_map') if module_state_map is None: module_state_map = {} diff_utils.set_module_state_map(module_state_map, all_files, lastfcommit) cache.set('module_state_map', module_state_map) return module_state_map
class DiffWalker: """ Handles determining which new commits occurred between two successive builds, taking into account possibly having no previous build """ def __init__(self, repo_dir): """Initialize the repository connection""" # Making the assumption the repo is already checked out # at this location from previous steps self.repo = Repo(str(repo_dir.resolve())) def walk(self, old_shas, new_shas): """ Walk through the set of commits between the sets of given SHAs to determine the new commits and return the list of the commits """ try: walker = self.repo.get_walker(include=new_shas, exclude=old_shas) except dulwich.errors.MissingCommitError as exc: raise MissingCommitError(exc) return [entry.commit for entry in walker]
#!/usr/bin/python # Example printing the last author of a specified file import sys import time from dulwich.repo import Repo if len(sys.argv) < 2: print "usage: %s filename" % (sys.argv[0],) sys.exit(1) r = Repo(".") w = r.get_walker(paths=[sys.argv[1]], max_entries=1) try: c = iter(w).next().commit except StopIteration: print "No file %s anywhere in history." % sys.argv[1] else: print "%s was last changed at %s by %s (commit %s)" % (sys.argv[1], c.author, time.ctime(c.author_time), c.id)
"""Example printing the last author of a specified file.""" import sys import time from dulwich.repo import Repo if len(sys.argv) < 2: print("usage: %s filename" % (sys.argv[0], )) sys.exit(1) r = Repo(".") path = sys.argv[1].encode('utf-8') w = r.get_walker(paths=[path], max_entries=1) try: c = next(iter(w)).commit except StopIteration: print("No file %s anywhere in history." % sys.argv[1]) else: print("%s was last changed by %s at %s (commit %s)" % (sys.argv[1], c.author, time.ctime(c.author_time), c.id))
def display_diff(): git_folder = app.config['GIT_REPOS_FOLDER'] rep = Repo(git_folder + '/terraform/') repowlkr = rep.get_walker(max_entries=1) lastfcommit = next(iter(repowlkr)).commit modules_repo = Repo(git_folder + '/terraform-cognite-modules') repowlkr_modules = modules_repo.get_walker(max_entries=1) lastfcommit_modules = next(iter(repowlkr_modules)).commit all_files = porcelain.ls_files(rep) state_map = {} ret = cache.get('ret') if ret is None: ret = {} diff_utils.set_state_map(state_map, all_files, lastfcommit) ret["all_subpath_commits"] = {} for folder_path, modules in state_map['cognitedata-greenfield'].items(): for module, module_info in modules.items(): ret["all_subpath_commits"][module] = {} ret["all_subpath_commits"][module]["module_commits"] = [] for full_module_path, git_ref in module_info.items(): dirfnm = full_module_path.encode('utf-8') repowlkr_subpath = modules_repo.get_walker(paths=[dirfnm]) repowlkr = modules_repo.get_walker() iterator = iter(repowlkr) iterator_subpath = iter(repowlkr_subpath) all_commits = [] subpath_commits = [] diff_utils.create_all_commits_list(iterator, all_commits) diff_utils.create_subpath_commits_list(iterator_subpath, subpath_commits) for commit in subpath_commits: parsed_commit = commit.as_pretty_string().decode().split( '\n') committer = parsed_commit[2] description = parsed_commit[-2] # sometimes them commit description has 2 new lines at the end if len(description) == 0: description = parsed_commit[-3] sha_commit = commit.id.decode() commit_info = {} commit_info["committer"] = committer commit_info["description"] = description commit_info["sha_commit"] = sha_commit ret["all_subpath_commits"][module][ "module_commits"].append(commit_info) greenfield_commit = diff_utils.get_commit_in_subpath( git_ref, all_commits, subpath_commits) ret["all_subpath_commits"][module][ "cognitedata-greenfield"] = greenfield_commit.id.decode() for project in state_map: if project != 'cognitedata-greenfield': project_commit = None if folder_path in state_map[project]: if module in state_map[project][folder_path]: project_commit = project + '_commit' project_commit = diff_utils.get_commit_in_subpath( state_map[project][folder_path][module] [full_module_path], all_commits, subpath_commits) if module in ret["all_subpath_commits"]: ret["all_subpath_commits"][module][ project] = project_commit.id.decode() if module not in ret: ret[module] = {} ret[module][ "greenfield"] = greenfield_commit.id.decode( ) ret[module][ project] = project_commit.id.decode() else: continue else: continue if project_commit is None: # raise a flag if equinor's git ref is invalid continue return jsonify(ret)
class Repo(object): """ Wrapper around a libgit Repository that knows: * How to get all the files in the repository * How to get the oid of HEAD * How to get the commit times of the files we want commit times for It's written with speed in mind, given the constraints of making performant code in python! """ def __init__(self, root_folder): self.git = Repository(root_folder) def all_files(self): """Return a set of all the files under git control""" return set([entry.decode() for entry, _ in self.git.open_index().items()]) @property def first_commit(self): """Return the oid of HEAD""" return self.git.head().decode() def file_commit_times(self, use_files_paths, debug=False): """ Traverse the commits in the repository, starting from HEAD until we have found the commit times for all the files we care about. Yield each file once, only when it is found to be changed in some commit. If self.debug is true, also output log.debug for the speed we are going through commits (output commits/second every 1000 commits and every 100000 commits) """ prefixes = PrefixTree() prefixes.fill(use_files_paths) for entry in self.git.get_walker(): # Commit time taking into account the timezone commit_time = entry.commit.commit_time - entry.commit.commit_timezone # Get us the two different tree structures between parents and current cf_and_pf, changes = self.tree_structures_for(() , entry.commit.tree , [self.git.get_object(oid).tree for oid in entry.commit.parents] , prefixes ) # Deep dive into any differences difference = [] if changes: cfs_and_pfs = [(cf_and_pf, changes)] while cfs_and_pfs: nxt, changes = cfs_and_pfs.pop(0) for thing, changes, is_path in self.differences_between(nxt[0], nxt[1], changes, prefixes): if is_path: found = prefixes.remove(thing[:-1], thing[-1]) if found: difference.append('/'.join(thing)) else: cfs_and_pfs.append((thing, changes)) # Only yield if there was a difference if difference: yield entry.commit.sha().hexdigest(), commit_time, difference # If nothing remains, then break! if not prefixes: break def entries_in_tree_oid(self, prefix, tree_oid): """Find the tree at this oid and return entries prefixed with ``prefix``""" try: tree = self.git.get_object(tree_oid) except KeyError: log.warning("Couldn't find object {0}".format(tree_oid)) return empty else: return frozenset(self.entries_in_tree(prefix, tree)) def entries_in_tree(self, prefix, tree): """ Traverse the entries in this tree and yield (prefix, is_tree, oid) Where prefix is a tuple of the given prefix and the name of the entry. """ for entry in tree.items(): if prefix: new_prefix = prefix + (entry.path.decode(), ) else: new_prefix = (entry.path.decode(), ) yield (new_prefix, stat.S_ISDIR(entry.mode), entry.sha) def tree_structures_for(self, prefix, current_oid, parent_oids, prefixes): """ Return the entries for this commit, the entries of the parent commits, and the difference between the two (current_files - parent_files) """ if prefix and prefixes and prefix not in prefixes: return empty, empty parent_files = set() for oid in parent_oids: parent_files.update(self.entries_in_tree_oid(prefix, oid)) current_files = self.entries_in_tree_oid(prefix, current_oid) return (current_files, parent_files), (current_files - parent_files) def differences_between(self, current_files, parent_files, changes, prefixes): """ yield (thing, changes, is_path) If is_path is true, changes is None and thing is the path as a tuple. If is_path is false, thing is the current_files and parent_files for that changed treeentry and changes is the difference between current_files and parent_files. The code here is written to squeeze as much performance as possible out of this operation. """ parent_oid = None if any(is_tree for _, is_tree, _ in changes): if len(changes) == 1: wanted_path = list(changes)[0][0] parent_oid = frozenset([oid for path, is_tree, oid in parent_files if path == wanted_path and is_tree]) else: parent_values = defaultdict(set) parent_changes = parent_files - current_files for path, is_tree, oid in parent_changes: if is_tree: parent_values[path].add(oid) for path, is_tree, oid in changes: if is_tree and path not in prefixes: continue if not is_tree: yield path, None, True else: parent_oids = parent_oid if parent_oid is not None else parent_values.get(path, empty) cf_and_pf, changes = self.tree_structures_for(path, oid, parent_oids, prefixes) if changes: yield cf_and_pf, changes, False
def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev): """ Returns lists of changesets that can be merged from org_repo@org_rev to other_repo@other_rev ... and the other way ... and the ancestors that would be used for merge :param org_repo: repo object, that is most likely the original repo we forked from :param org_rev: the revision we want our compare to be made :param other_repo: repo object, most likely the fork of org_repo. It has all changesets that we need to obtain :param other_rev: revision we want out compare to be made on other_repo """ ancestors = None if org_rev == other_rev: org_changesets = [] other_changesets = [] elif alias == 'hg': # case two independent repos if org_repo != other_repo: hgrepo = mercurial.unionrepo.makeunionrepository( other_repo.baseui, safe_bytes(other_repo.path), safe_bytes(org_repo.path)) # all ancestors of other_rev will be in other_repo and # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot # no remote compare do it on the same repository else: hgrepo = other_repo._repo ancestors = [ ascii_str(hgrepo[ancestor].hex()) for ancestor in hgrepo.revs( b"id(%s) & ::id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev)) ] if ancestors: log.debug("shortcut found: %s is already an ancestor of %s", other_rev, org_rev) else: log.debug("no shortcut found: %s is not an ancestor of %s", other_rev, org_rev) ancestors = [ ascii_str(hgrepo[ancestor].hex()) for ancestor in hgrepo.revs(b"heads(::id(%s) & ::id(%s))", ascii_bytes(org_rev), ascii_bytes(other_rev)) ] # FIXME: expensive! other_changesets = [ other_repo.get_changeset(rev) for rev in hgrepo.revs( b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)", ascii_bytes(other_rev), ascii_bytes(org_rev), ascii_bytes(org_rev)) ] org_changesets = [ org_repo.get_changeset(ascii_str(hgrepo[rev].hex())) for rev in hgrepo.revs( b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)", ascii_bytes(org_rev), ascii_bytes(other_rev), ascii_bytes(other_rev)) ] elif alias == 'git': if org_repo != other_repo: from dulwich.repo import Repo from dulwich.client import SubprocessGitClient gitrepo = Repo(org_repo.path) SubprocessGitClient(thin_packs=False).fetch( other_repo.path, gitrepo) gitrepo_remote = Repo(other_repo.path) SubprocessGitClient(thin_packs=False).fetch( org_repo.path, gitrepo_remote) revs = [ ascii_str(x.commit.id) for x in gitrepo_remote.get_walker( include=[ascii_bytes(other_rev)], exclude=[ascii_bytes(org_rev)]) ] other_changesets = [ other_repo.get_changeset(rev) for rev in reversed(revs) ] if other_changesets: ancestors = [other_changesets[0].parents[0].raw_id] else: # no changesets from other repo, ancestor is the other_rev ancestors = [other_rev] gitrepo.close() gitrepo_remote.close() else: so = org_repo.run_git_command([ 'log', '--reverse', '--pretty=format:%H', '-s', '%s..%s' % (org_rev, other_rev) ]) other_changesets = [ org_repo.get_changeset(cs) for cs in re.findall(r'[0-9a-fA-F]{40}', so) ] so = org_repo.run_git_command( ['merge-base', org_rev, other_rev]) ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]] org_changesets = [] else: raise Exception('Bad alias only git and hg is allowed') return other_changesets, org_changesets, ancestors
class backend(): def __init__(self): self.username = "" self.email = "" self.activity = "" self.repo_path = "" self.repo_name = "" self.isaclone = 0 self.cloned_from = "" def set_authorinfo(self, username, email): self.username = username self.email = email def local_init(self, repo_name, activity): self.activity = activity self.repo_name = repo_name try: self.repo = p.init(repo_name) self.current_dir = os.getcwd() self.repo_path = self.current_dir + '/' + self.repo_name print self.repo_path print "Local Repo Created" except: print "Repo already exist, delete it first" def load_repo(self, repo_name): self.repo_name = repo_name self.repo = DulwichRepo(self.repo_name) self.current_dir = os.getcwd() self.repo_path = self.current_dir + '/' + self.repo_name def create_file(self, name, content): try: file = open(os.path.join(self.repo_path,name), 'w') file.write(content) file.close() except: print 'Unable to create README, does it already exist?' def edit_readme(self, name, content): file = open(os.path.join(self.repo_path,name), 'w') file.write(content) file.close() def add(self, a): #a can be list of files or a single file print self.repo_name print self.repo if type(a) == list: for i in a: p.add(self.repo, i) else: p.add(self.repo, a) def get_status(self): if os.path.exists(self.repo_path): print self.repo_path print p.status(self.repo_path) else: print "Repo does not exist" def commit(self, message): p.commit(self.repo, message) def get_commit_history(self): print self.repo_path r = self.repo f = "README" w = r.get_walker(paths=[f], max_entries=None) count = 0 for i in iter(w): count += 1 print count, print i print i.commit def clone_local(self,clone_repo_name): #Creating a clone of a given repo. The repo should be local. p.clone(self.repo_path,clone_repo_name) def clone_remote(remote_repo_name, clone_repo_name): #Creating a clone of remote repo. p.clone(remote_repo_name, clone_repo_name) def commit_logs(self): try: if os.path.exists(self.repo_path): print p.log(self.repo) else: print "Repo does not exist" except: print "No commits yet" """ #Some issues - have to be rectified asap def revert_to_commit(self): print self.repo_path r = self.repo f = "README" w = r.get_walker(paths=[], max_entries=None) count = 0 for i in iter(w): count += 1 print count, print type(i) print i print i.commit.id a = i.commit.id #a = a[0:8] #print i.commit.get_sha_for() print a p.reset(self.repo, "hard", a) """ def get_diff(self): #p.diff_tree(self.repo,) f = "README" tree_list = [] w = self.repo.get_walker(paths=[f], max_entries=None) for i in iter(w): tree_list.append(i.commit.tree) print i.commit.tree print len(tree_list) p.diff_tree(self.repo, tree_list[0], tree_list[3]) def update_local(self): if self.isaclone == 1: try: p.pull(self.repo, self.cloned_from) except: print "Error" else: print "Can not update"
#!/usr/bin/python # Example printing the last author of a specified file import sys import time from dulwich.repo import Repo if len(sys.argv) < 2: print("usage: %s filename" % (sys.argv[0], )) sys.exit(1) r = Repo(".") w = r.get_walker(paths=[sys.argv[1]], max_entries=1) try: c = next(iter(w)).commit except StopIteration: print("No file %s anywhere in history." % sys.argv[1]) else: print("%s was last changed by %s at %s (commit %s)" % (sys.argv[1], c.author, time.ctime(c.author_time), c.id))
#!/usr/bin/python # Example printing the last author of a specified file import sys import time from dulwich.repo import Repo if len(sys.argv) < 2: print("usage: %s filename" % (sys.argv[0], )) sys.exit(1) r = Repo(".") path = sys.argv[1].encode('utf-8') w = r.get_walker(paths=[path], max_entries=1) try: c = next(iter(w)).commit except StopIteration: print("No file %s anywhere in history." % sys.argv[1]) else: print("%s was last changed by %s at %s (commit %s)" % ( sys.argv[1], c.author, time.ctime(c.author_time), c.id))
def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev): """ Returns lists of changesets that can be merged from org_repo@org_rev to other_repo@other_rev ... and the other way ... and the ancestor that would be used for merge :param org_repo: repo object, that is most likely the original repo we forked from :param org_rev: the revision we want our compare to be made :param other_repo: repo object, most likely the fork of org_repo. It has all changesets that we need to obtain :param other_rev: revision we want out compare to be made on other_repo """ ancestor = None if org_rev == other_rev: org_changesets = [] other_changesets = [] ancestor = org_rev elif alias == 'hg': #case two independent repos if org_repo != other_repo: hgrepo = unionrepo.unionrepository(other_repo.baseui, other_repo.path, org_repo.path) # all ancestors of other_rev will be in other_repo and # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot #no remote compare do it on the same repository else: hgrepo = other_repo._repo if org_repo.EMPTY_CHANGESET in (org_rev, other_rev): # work around unexpected behaviour in Mercurial < 3.4 ancestor = org_repo.EMPTY_CHANGESET else: ancestors = hgrepo.revs("ancestor(id(%s), id(%s))", org_rev, other_rev) if ancestors: # FIXME: picks arbitrary ancestor - but there is usually only one try: ancestor = hgrepo[ancestors.first()].hex() except AttributeError: # removed in hg 3.2 ancestor = hgrepo[ancestors[0]].hex() other_revs = hgrepo.revs("ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)", other_rev, org_rev, org_rev) other_changesets = [other_repo.get_changeset(rev) for rev in other_revs] org_revs = hgrepo.revs("ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)", org_rev, other_rev, other_rev) org_changesets = [org_repo.get_changeset(hgrepo[rev].hex()) for rev in org_revs] elif alias == 'git': if org_repo != other_repo: from dulwich.repo import Repo from dulwich.client import SubprocessGitClient gitrepo = Repo(org_repo.path) SubprocessGitClient(thin_packs=False).fetch(safe_str(other_repo.path), gitrepo) gitrepo_remote = Repo(other_repo.path) SubprocessGitClient(thin_packs=False).fetch(safe_str(org_repo.path), gitrepo_remote) revs = [] for x in gitrepo_remote.get_walker(include=[other_rev], exclude=[org_rev]): revs.append(x.commit.id) other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)] if other_changesets: ancestor = other_changesets[0].parents[0].raw_id else: # no changesets from other repo, ancestor is the other_rev ancestor = other_rev else: so, se = org_repo.run_git_command( ['log', '--reverse', '--pretty=format:%H', '-s', '%s..%s' % (org_rev, other_rev)] ) other_changesets = [org_repo.get_changeset(cs) for cs in re.findall(r'[0-9a-fA-F]{40}', so)] so, se = org_repo.run_git_command( ['merge-base', org_rev, other_rev] ) ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0] org_changesets = [] else: raise Exception('Bad alias only git and hg is allowed') return other_changesets, org_changesets, ancestor
class backend(): def __init__(self): self.username = "" self.email = "" self.activity = "" self.repo_path = "" self.repo_name = "" self.isaclone = 0 self.cloned_from = "ssh://[email protected]/vikramahuja1001/TurtleCodes" self.current_file_name = "" def set_authorinfo(self, username, email): self.username = username self.email = email def local_init(self, repo_name, activity): self.activity = activity self.repo_name = repo_name try: self.repo = p.init(repo_name) self.current_dir = os.getcwd() self.repo_path = self.current_dir + '/' + self.repo_name print self.repo_path print "Local Repo Created" except: print "Repo already exist, delete it first" def load_repo(self, repo_name): self.repo_name = repo_name self.repo = DulwichRepo(self.repo_name) self.current_dir = os.getcwd() self.repo_path = self.current_dir + '/' + self.repo_name def create_file(self, name, content): try: file = open(os.path.join(self.repo_path,name), 'w') file.write(content) file.close() except: print 'Unable to create README, does it already exist?' def edit_file(self, name, content): file = open(os.path.join(self.repo_path,name), 'w') file.write(content) file.close() def add(self, a): #a can be list of files or a single file print self.repo_name print self.repo if type(a) == list: for i in a: p.add(self.repo, i) else: p.add(self.repo, a) def get_status(self): if os.path.exists(self.repo_path): print self.repo_path return p.status(self.repo_path) else: print "Repo does not exist" def commit(self, message): p.commit(self.repo, message) def get_commit_history(self, f): print self.repo_path r = self.repo w = r.get_walker(paths=[f], max_entries=None) count = 0 a = [] for i in iter(w): count += 1 a.append(i.commit) print type(i) print type(i.commit) a.append('\n') return a def get_commit_id_and_message(self, f): print self.repo_path r = self.repo w = r.get_walker(paths=[f], max_entries=None) count = 0 a = [] for i in iter(w): count += 1 lin = '' lin = i.commit.id + '_' + i.commit.message a.append(lin) print type(i) print type(i.commit) a.append('\n') return a def clone_local(self,clone_repo_name): #Creating a clone of a given repo. The repo should be local. p.clone(self.repo_path,clone_repo_name) def clone_remote(self, clone_repo_name): #Creating a clone of remote repo. self.isaclone = 1 #Creating a clone of remote repo. p.clone(self.cloned_from, clone_repo_name) def commit_logs(self): try: if os.path.exists(self.repo_path): a = p.log(self.repo) return a else: return "Repo does not exist" except: return "No commits yet" def set_current_file_name(self, f): self.current_file_name = f """ #Some issues - have to be rectified asap def revert_to_commit(self): print self.repo_path r = self.repo f = "README" w = r.get_walker(paths=[], max_entries=None) count = 0 for i in iter(w): count += 1 print count, print type(i) print i print i.commit.id a = i.commit.id #a = a[0:8] #print i.commit.get_sha_for() print a p.reset(self.repo, "hard", a) """ def get_diff(self): #p.diff_tree(self.repo,) f = self.current_file_name tree_list = [] w = self.repo.get_walker(paths=[f], max_entries=None) a = [] for i in iter(w): tree_list.append(i.commit.tree) #print i.commit.tree a.append(i.commit.tree) #print len(tree_list) for i in range(len(tree_list) - 1): #print "Diff between commits" a.append(p.diff_tree(self.repo, tree_list[i], tree_list[i+1])) print return a def update_local(self): if self.isaclone == 1: try: p.pull(self.repo, self.cloned_from) except: print "Error" else: print "Can not update" def push(self): try: refs_path = b"refs/heads/master" new_id = self.repo[b'HEAD'].id #self.assertNotEqual(new_id, ZERO_SHA) self.repo.refs[refs_path] = new_id p.push( self.repo.path,self.cloned_from, b"HEAD:" + refs_path) except Exception as e: print e print "Error"
def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev): """ Returns lists of changesets that can be merged from org_repo@org_rev to other_repo@other_rev ... and the other way ... and the ancestor that would be used for merge :param org_repo: repo object, that is most likely the original repo we forked from :param org_rev: the revision we want our compare to be made :param other_repo: repo object, most likely the fork of org_repo. It has all changesets that we need to obtain :param other_rev: revision we want out compare to be made on other_repo """ ancestor = None if org_rev == other_rev or org_repo.EMPTY_CHANGESET in (org_rev, other_rev): org_changesets = [] other_changesets = [] ancestor = org_rev elif alias == 'hg': #case two independent repos if org_repo != other_repo: hgrepo = unionrepo.unionrepository(other_repo.baseui, other_repo.path, org_repo.path) # all ancestors of other_rev will be in other_repo and # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot #no remote compare do it on the same repository else: hgrepo = other_repo._repo ancestors = hgrepo.revs("ancestor(id(%s), id(%s))", org_rev, other_rev) if ancestors: # FIXME: picks arbitrary ancestor - but there is usually only one try: ancestor = hgrepo[ancestors.first()].hex() except AttributeError: # removed in hg 3.2 ancestor = hgrepo[ancestors[0]].hex() other_revs = hgrepo.revs( "ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)", other_rev, org_rev, org_rev) other_changesets = [ other_repo.get_changeset(rev) for rev in other_revs ] org_revs = hgrepo.revs( "ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)", org_rev, other_rev, other_rev) org_changesets = [ org_repo.get_changeset(hgrepo[rev].hex()) for rev in org_revs ] elif alias == 'git': if org_repo != other_repo: from dulwich.repo import Repo from dulwich.client import SubprocessGitClient gitrepo = Repo(org_repo.path) SubprocessGitClient(thin_packs=False).fetch( other_repo.path, gitrepo) gitrepo_remote = Repo(other_repo.path) SubprocessGitClient(thin_packs=False).fetch( org_repo.path, gitrepo_remote) revs = [] for x in gitrepo_remote.get_walker(include=[other_rev], exclude=[org_rev]): revs.append(x.commit.id) other_changesets = [ other_repo.get_changeset(rev) for rev in reversed(revs) ] if other_changesets: ancestor = other_changesets[0].parents[0].raw_id else: # no changesets from other repo, ancestor is the other_rev ancestor = other_rev else: so, se = org_repo.run_git_command( 'log --reverse --pretty="format: %%H" -s %s..%s' % (org_rev, other_rev)) other_changesets = [ org_repo.get_changeset(cs) for cs in re.findall(r'[0-9a-fA-F]{40}', so) ] so, se = org_repo.run_git_command('merge-base %s %s' % (org_rev, other_rev)) ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0] org_changesets = [] else: raise Exception('Bad alias only git and hg is allowed') return other_changesets, org_changesets, ancestor
class backend(): def __init__(self): self.username = "" self.email = "" self.activity = "" self.repo_path = "" self.repo_name = "" self.isaclone = 0 self.cloned_from = "" def set_authorinfo(self, username, email): self.username = username self.email = email def local_init(self, repo_name, activity): self.activity = activity self.repo_name = repo_name try: self.repo = p.init(repo_name) self.current_dir = os.getcwd() self.repo_path = self.current_dir + '/' + self.repo_name print self.repo_path print "Local Repo Created" except: print "Repo already exist, delete it first" def load_repo(self, repo_name): self.repo_name = repo_name self.repo = DulwichRepo(self.repo_name) self.current_dir = os.getcwd() self.repo_path = self.current_dir + '/' + self.repo_name def create_file(self, name, content): try: file = open(os.path.join(self.repo_path, name), 'w') file.write(content) file.close() except: print 'Unable to create README, does it already exist?' def edit_readme(self, name, content): file = open(os.path.join(self.repo_path, name), 'w') file.write(content) file.close() def add(self, a): #a can be list of files or a single file print self.repo_name print self.repo if type(a) == list: for i in a: p.add(self.repo, i) else: p.add(self.repo, a) def get_status(self): if os.path.exists(self.repo_path): print self.repo_path print p.status(self.repo_path) else: print "Repo does not exist" def commit(self, message): p.commit(self.repo, message) def get_commit_history(self): print self.repo_path r = self.repo f = "README" w = r.get_walker(paths=[f], max_entries=None) count = 0 for i in iter(w): count += 1 print count, print i print i.commit def clone_local(self, clone_repo_name): #Creating a clone of a given repo. The repo should be local. p.clone(self.repo_path, clone_repo_name) def clone_remote(remote_repo_name, clone_repo_name): #Creating a clone of remote repo. p.clone(remote_repo_name, clone_repo_name) def commit_logs(self): try: if os.path.exists(self.repo_path): print p.log(self.repo) else: print "Repo does not exist" except: print "No commits yet" """ #Some issues - have to be rectified asap def revert_to_commit(self): print self.repo_path r = self.repo f = "README" w = r.get_walker(paths=[], max_entries=None) count = 0 for i in iter(w): count += 1 print count, print type(i) print i print i.commit.id a = i.commit.id #a = a[0:8] #print i.commit.get_sha_for() print a p.reset(self.repo, "hard", a) """ def get_diff(self): #p.diff_tree(self.repo,) f = "README" tree_list = [] w = self.repo.get_walker(paths=[f], max_entries=None) for i in iter(w): tree_list.append(i.commit.tree) print i.commit.tree print len(tree_list) p.diff_tree(self.repo, tree_list[0], tree_list[3]) def update_local(self): if self.isaclone == 1: try: p.pull(self.repo, self.cloned_from) except: print "Error" else: print "Can not update"
def find_git_mtimes(self, context, silent_build): """ Use git to find the mtimes of the files we care about """ if not context.use_git_timestamps: return {} parent_dir = context.parent_dir root_folder = context.git_root # Can't use git timestamps if it's just a shallow clone # Otherwise all the files get the timestamp of the latest commit if context.use_git_timestamps and os.path.exists( os.path.join(root_folder, ".git", "shallow")): raise HarpoonError("Can't get git timestamps from a shallow clone", directory=parent_dir) git = Repo(root_folder) mtimes = {} all_files = set(git.open_index()) use_files = set() for filename in all_files: relpath = os.path.relpath(os.path.join(root_folder, filename), context.parent_dir) # Only include files under the parent_dir if relpath.startswith("../"): continue # Ignore files that we don't want git_timestamps from if context.use_git_timestamps and type( context.use_git_timestamps) is not bool: match = False for line in context.use_git_timestamps: if fnmatch.fnmatch(relpath, line): match = True break if not match: continue # Matched is true by default if # * Have context.exclude # * No context.exclude and no context.include matched = context.exclude or not any( [context.exclude, context.include]) # Anything not matching exclude gets included if context.exclude: for line in context.exclude: if fnmatch.fnmatch(relpath, line): matched = False # Anything matching include gets included if context.include: for line in context.include: if fnmatch.fnmatch(relpath, line): matched = True break # Either didn't match any exclude or matched an include if matched: use_files.add(filename) if not silent_build: log.info( "Finding modified times for %s/%s git controlled files in %s", len(use_files), len(all_files), root_folder) for entry in git.get_walker(paths=use_files): date = entry.commit.author_time for changes in entry.changes(): if type(changes) is not list: changes = [changes] for change in changes: path = change.new.path if root_folder and change.new.path and context.parent_dir: new_relpath = os.path.relpath( os.path.join(root_folder, change.new.path), context.parent_dir) if path in use_files and mtimes.get( new_relpath, 0 ) < date and not new_relpath.startswith("../"): mtimes[new_relpath] = date if len(use_files - set(mtimes)) == 0: break return mtimes
class Gittle(object): """All paths used in Gittle external methods must be paths relative to the git repository """ DEFAULT_COMMIT = 'HEAD' DEFAULT_BRANCH = 'master' DEFAULT_REMOTE = 'origin' DEFAULT_MESSAGE = '**No Message**' DEFAULT_USER_INFO = { 'name': None, 'email': None, } DIFF_FUNCTIONS = { 'classic': utils.git.classic_tree_diff, 'dict': utils.git.dict_tree_diff, 'changes': utils.git.dict_tree_diff } DEFAULT_DIFF_TYPE = 'dict' HIDDEN_REGEXES = [ # Hide git directory r'.*\/\.git\/.*', ] # References REFS_BRANCHES = 'refs/heads/' REFS_REMOTES = 'refs/remotes/' REFS_TAGS = 'refs/tags/' # Name pattern truths # Used for detecting if files are : # - deleted # - added # - changed PATTERN_ADDED = (False, True) PATTERN_REMOVED = (True, False) PATTERN_MODIFIED = (True, True) # Permissions MODE_DIRECTORY = 0o40000 # Used to tell if a tree entry is a directory # Tree depth MAX_TREE_DEPTH = 1000 # Acceptable Root paths ROOT_PATHS = (os.path.curdir, os.path.sep) def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs): if isinstance(repo_or_path, DulwichRepo): self.repo = repo_or_path elif isinstance(repo_or_path, Gittle): self.repo = DulwichRepo(repo_or_path.path) elif isinstance(repo_or_path, basestring): path = os.path.abspath(repo_or_path) self.repo = DulwichRepo(path) else: logging.warning('Repo is of type %s' % type(repo_or_path)) raise Exception('Gittle must be initialized with either a dulwich repository or a string to the path') # Set path self.path = self.repo.path # The remote url self.origin_uri = origin_uri # Report client activty self._report_activity = report_activity # Build ignore filter self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES) self.hidden_regexes.extend(self._get_ignore_regexes()) self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes) self.filters = [ self.ignore_filter, ] # Get authenticator if auth: self.authenticator = auth else: self.auth(*args, **kwargs) def report_activity(self, *args, **kwargs): if not self._report_activity: return return self._report_activity(*args, **kwargs) def _format_author(self, name, email): return "%s <%s>" % (name, email) def _format_userinfo(self, userinfo): name = userinfo.get('name') email = userinfo.get('email') if name and email: return self._format_author(name, email) return None def _format_ref(self, base, extra): return ''.join([base, extra]) def _format_ref_branch(self, branch_name): return self._format_ref(self.REFS_BRANCHES, branch_name) def _format_ref_remote(self, remote_name): return self._format_ref(self.REFS_REMOTES, remote_name) def _format_ref_tag(self, tag_name): return self._format_ref(self.REFS_TAGS, tag_name) @property def head(self): """Return SHA of the current HEAD """ return self.repo.head() @property def is_bare(self): """Bare repositories have no working directories or indexes """ return self.repo.bare @property def is_working(self): return not(self.is_bare) def has_index(self): """Opposite of is_bare """ return self.repo.has_index() @property def has_commits(self): """ If the repository has no HEAD we consider that is has no commits """ try: self.repo.head() except KeyError: return False return True def ref_walker(self, ref=None): """ Very simple, basic walker """ ref = ref or 'HEAD' sha = self._commit_sha(ref) for entry in self.repo.get_walker(sha): yield entry.commit def branch_walker(self, branch): branch = branch or self.active_branch ref = self._format_ref_branch(branch) return self.ref_walker(ref) def commit_info(self, start=0, end=None, branch=None): """Return a generator of commits with all their attached information """ if not self.has_commits: return [] commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)] if not end: return commits return commits[start:end] @funky.uniquify def recent_contributors(self, n=None, branch=None): n = n or 10 return funky.pluck(self.commit_info(end=n, branch=branch), 'author') @property def commit_count(self): try: return len(self.ref_walker()) except KeyError: return 0 def commits(self): """Return a list of SHAs for all the concerned commits """ return [commit['sha'] for commit in self.commit_info()] @property def git_dir(self): return self.repo.controldir() def auth(self, *args, **kwargs): self.authenticator = GittleAuth(*args, **kwargs) return self.authenticator # Generate a branch selector (used for pushing) def _wants_branch(self, branch_name=None): branch_name = branch_name or self.active_branch refs_key = self._format_ref_branch(branch_name) sha = self.branches[branch_name] def wants_func(old): refs_key = self._format_ref_branch(branch_name) return { refs_key: sha } return wants_func def _get_ignore_regexes(self): gitignore_filename = os.path.join(self.path, '.gitignore') if not os.path.exists(gitignore_filename): return [] lines = open(gitignore_filename).readlines() globers = map(lambda line: line.rstrip(), lines) return utils.paths.globers_to_regex(globers) # Get the absolute path for a file in the git repo def abspath(self, repo_file): return os.path.abspath( os.path.join(self.path, repo_file) ) # Get the relative path from the absolute path def relpath(self, abspath): return os.path.relpath(abspath, self.path) @property def last_commit(self): return self[self.repo.head()] @property def index(self): return self.repo.open_index() @classmethod def init(cls, path, bare=None, *args, **kwargs): """Initialize a repository""" mkdir_safe(path) # Constructor to use if bare: constructor = DulwichRepo.init_bare else: constructor = DulwichRepo.init # Create dulwich repo repo = constructor(path) # Create Gittle repo return cls(repo, *args, **kwargs) @classmethod def init_bare(cls, *args, **kwargs): kwargs.setdefault('bare', True) return cls.init(*args, **kwargs) @classmethod def is_repo(cls, path): """Returns True if path is a git repository, False if it is not""" try: repo = Gittle(path) except NotGitRepository: return False else: return True def get_client(self, origin_uri=None, **kwargs): # Get the remote URL origin_uri = origin_uri or self.origin_uri # Fail if inexistant if not origin_uri: raise InvalidRemoteUrl() client_kwargs = {} auth_kwargs = self.authenticator.kwargs() client_kwargs.update(auth_kwargs) client_kwargs.update(kwargs) client_kwargs.update({ 'report_activity': self.report_activity }) client, remote_path = get_transport_and_path(origin_uri, **client_kwargs) return client, remote_path def push_to(self, origin_uri, branch_name=None, progress=None): selector = self._wants_branch(branch_name=branch_name) client, remote_path = self.get_client(origin_uri) return client.send_pack( remote_path, selector, self.repo.object_store.generate_pack_contents, progress=progress ) # Like: git push def push(self, origin_uri=None, branch_name=None, progress=None): return self.push_to(origin_uri, branch_name, progress) # Not recommended at ALL ... !!! def dirty_pull_from(self, origin_uri, branch_name=None): # Remove all previously existing data rmtree(self.path) mkdir_safe(self.path) self.repo = DulwichRepo.init(self.path) # Fetch brand new copy from remote return self.pull_from(origin_uri, branch_name) def pull_from(self, origin_uri, branch_name=None): return self.fetch(origin_uri) # Like: git pull def pull(self, origin_uri=None, branch_name=None): return self.pull_from(origin_uri, branch_name) def fetch_remote(self, origin_uri=None): # Get client client, remote_path = self.get_client(origin_uri=origin_uri) # Fetch data from remote repository remote_refs = client.fetch(remote_path, self.repo) return remote_refs def _setup_fetched_refs(self, refs, origin, bare): remote_tags = utils.git.subrefs(refs, 'refs/tags') remote_heads = utils.git.subrefs(refs, 'refs/heads') # Filter refs clean_remote_tags = utils.git.clean_refs(remote_tags) clean_remote_heads = utils.git.clean_refs(remote_heads) # Base of new refs heads_base = 'refs/remotes/' + origin if bare: heads_base = 'refs/heads' # Import branches self.import_refs( heads_base, clean_remote_heads ) # Import tags self.import_refs( 'refs/tags', clean_remote_tags ) # Update HEAD for k, v in utils.git.clean_refs(refs).items(): self[k] = v def fetch(self, origin_uri=None, bare=None, origin=None): bare = bare or False origin = origin or self.DEFAULT_REMOTE # Remote refs remote_refs = self.fetch_remote(origin_uri) # Update head # Hit repo because head doesn't yet exist so # print("REFS = %s" % remote_refs) # If no refs (empty repository() if not remote_refs: return # Update refs (branches, tags, HEAD) self._setup_fetched_refs(remote_refs, origin, bare) # Checkout working directories if not bare and self.has_commits: self.checkout_all() else: self.update_server_info() @classmethod def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs): """Clone a remote repository""" mkdir_safe(local_path) # Initialize the local repository if bare: local_repo = cls.init_bare(local_path) else: local_repo = cls.init(local_path) repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs) repo.fetch(bare=bare) # Add origin repo.add_remote('origin', origin_uri) return repo @classmethod def clone_bare(cls, *args, **kwargs): """Same as .clone except clones to a bare repository by default """ kwargs.setdefault('bare', True) return cls.clone(*args, **kwargs) def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs): if not tree: # If no tree then stage files modified_files = files or self.modified_files logging.info("STAGING : %s" % modified_files) self.repo.stage(modified_files) # Messages message = message or self.DEFAULT_MESSAGE author_msg = self._format_userinfo(author) committer_msg = self._format_userinfo(committer) return self.repo.do_commit( message=message, author=author_msg, committer=committer_msg, encoding='UTF-8', tree=tree, *args, **kwargs ) def _tree_from_structure(self, structure): # TODO : Support directories tree = Tree() for file_info in structure: # str only try: data = file_info['data'].encode('ascii') name = file_info['name'].encode('ascii') mode = file_info['mode'] except: # Skip file on encoding errors continue blob = Blob() blob.data = data # Store file's contents self.repo.object_store.add_object(blob) # Add blob entry tree.add( name, mode, blob.id ) # Store tree self.repo.object_store.add_object(tree) return tree.id # Like: git commmit -a def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs): user_info = { 'name': name, 'email': email, } return self._commit( committer=user_info, author=user_info, message=message, files=files, *args, **kwargs ) def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs): """Main use is to do commits directly to bare repositories For example doing a first Initial Commit so the repo can be cloned and worked on right away """ if not structure: return tree = self._tree_from_structure(structure) user_info = { 'name': name, 'email': email, } return self._commit( committer=user_info, author=user_info, message=message, tree=tree, *args, **kwargs ) # Push all local commits # and pull all remote commits def sync(self, origin_uri=None): self.push(origin_uri) return self.pull(origin_uri) def lookup_entry(self, relpath, trackable_files=set()): if not relpath in trackable_files: raise KeyError abspath = self.abspath(relpath) with open(abspath, 'rb') as git_file: data = git_file.read() s = sha1() s.update("blob %u\0" % len(data)) s.update(data) return (s.hexdigest(), os.stat(abspath).st_mode) @property @funky.transform(set) def tracked_files(self): return list(self.index) @property @funky.transform(set) def raw_files(self): return utils.paths.subpaths(self.path) @property @funky.transform(set) def ignored_files(self): return utils.paths.subpaths(self.path, filters=self.filters) @property @funky.transform(set) def trackable_files(self): return self.raw_files - self.ignored_files @property @funky.transform(set) def untracked_files(self): return self.trackable_files - self.tracked_files """ @property @funky.transform(set) def modified_staged_files(self): "Checks if the file has changed since last commit" timestamp = self.last_commit.commit_time index = self.index return [ f for f in self.tracked_files if index[f][1][0] > timestamp ] """ # Return a list of tuples # representing the changed elements in the git tree def _changed_entries(self, ref=None): ref = ref or self.DEFAULT_COMMIT if not self.has_commits: return [] obj_sto = self.repo.object_store tree_id = self[ref].tree names = self.trackable_files lookup_func = partial(self.lookup_entry, trackable_files=names) # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...] tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False) return list(tree_diff) @funky.transform(set) def _changed_entries_by_pattern(self, pattern): changed_entries = self._changed_entries() filtered_paths = None #if the pattern is PATTERN_MODIFIED, should check the sha if self.PATTERN_MODIFIED == pattern: filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) and sha[0] == sha[1] ] else : filtered_paths = [ funky.first_true(names) for names, modes, sha in changed_entries if tuple(map(bool, names)) == pattern and funky.first_true(names) ] return filtered_paths @property @funky.transform(set) def removed_files(self): return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files @property @funky.transform(set) def added_files(self): return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files @property @funky.transform(set) def modified_files(self): modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files return modified_files @property @funky.transform(set) def modified_unstaged_files(self): timestamp = self.last_commit.commit_time return [ f for f in self.tracked_files if os.stat(self.abspath(f)).st_mtime > timestamp ] @property def pending_files(self): """ Returns a list of all files that could be possibly staged """ # Union of both return self.modified_files | self.added_files | self.removed_files @property def pending_files_by_state(self): files = { 'modified': self.modified_files, 'added': self.added_files, 'removed': self.removed_files } # "Flip" the dictionary return { path: state for state, paths in files.items() for path in paths } """ @property @funky.transform(set) def modified_files(self): return self.modified_staged_files | self.modified_unstaged_files """ # Like: git add @funky.arglist_method def stage(self, files): return self.repo.stage(files) def add(self, *args, **kwargs): return self.stage(*args, **kwargs) # Like: git rm @funky.arglist_method def rm(self, files, force=False): index = self.index index_files = filter(lambda f: f in index, files) for f in index_files: del self.index[f] return index.write() def mv_fs(self, file_pair): old_name, new_name = file_pair os.rename(old_name, new_name) # Like: git mv @funky.arglist_method def mv(self, files_pair): index = self.index files_in_index = filter(lambda f: f[0] in index, files_pair) map(self.mv_fs, files_in_index) old_files = map(funky.first, files_in_index) new_files = map(funky.last, files_in_index) self.add(new_files) self.rm(old_files) self.add(old_files) return @working_only def _checkout_tree(self, tree): return build_index_from_tree( self.repo.path, self.repo.index_path(), self.repo.object_store, tree ) def checkout_all(self, commit_sha=None): commit_sha = commit_sha or self.head commit_tree = self._commit_tree(commit_sha) # Rebuild index from the current tree return self._checkout_tree(commit_tree) def checkout(self, ref): """Checkout a given ref or SHA """ self.repo.refs.set_symbolic_ref('HEAD', ref) commit_tree = self._commit_tree(ref) # Rebuild index from the current tree return self._checkout_tree(commit_tree) @funky.arglist_method def reset(self, files, commit='HEAD'): pass def rm_all(self): # if we go at the index via the property, it is reconstructed # each time and therefore clear() doesn't have the desired effect, # therefore, we cache it in a variable and use that. i = self.index i.clear() return i.write() def _to_commit(self, commit_obj): """Allows methods to accept both SHA's or dulwich Commit objects as arguments """ if isinstance(commit_obj, basestring): return self.repo[commit_obj] return commit_obj def _commit_sha(self, commit_obj): """Extracts a Dulwich commits SHA """ if utils.git.is_sha(commit_obj): return commit_obj elif isinstance(commit_obj, basestring): # Can't use self[commit_obj] to avoid infinite recursion commit_obj = self.repo[self.dwim_reference(commit_obj)] return commit_obj.id def dwim_reference(self, ref): """Dwim resolves a short reference to a full reference """ # Formats of refs we want to try in order formats = [ "%s", "refs/%s", "refs/tags/%s", "refs/heads/%s", "refs/remotes/%s", "refs/remotes/%s/HEAD", ] for f in formats: try: fullref = f % ref if not fullref in self.repo: continue return fullref except: continue raise Exception("Could not resolve ref") def blob_data(self, sha): """Return a blobs content for a given SHA """ return self[sha].data # Get the nth parent back for a given commit def get_parent_commit(self, commit, n=None): """ Recursively gets the nth parent for a given commit Warning: Remember that parents aren't the previous commits """ if n is None: n = 1 commit = self._to_commit(commit) parents = commit.parents if n <= 0 or not parents: # Return a SHA return self._commit_sha(commit) parent_sha = parents[0] parent = self[parent_sha] # Recur return self.get_parent_commit(parent, n - 1) def get_previous_commit(self, commit_ref, n=None): commit_sha = self._parse_reference(commit_ref) n = n or 1 commits = self.commits() return funky.next(commits, commit_sha, n=n, default=commit_sha) def _parse_reference(self, ref_string): # COMMIT_REF~x if '~' in ref_string: ref, count = ref_string.split('~') count = int(count) commit_sha = self._commit_sha(ref) return self.get_previous_commit(commit_sha, count) return self._commit_sha(ref_string) def _commit_tree(self, commit_sha): """Return the tree object for a given commit """ return self[commit_sha].tree def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True): diff_type = diff_type or self.DEFAULT_DIFF_TYPE diff_func = self.DIFF_FUNCTIONS[diff_type] if not compare_to: compare_to = self.get_previous_commit(commit_sha) return self._diff_between(compare_to, commit_sha, diff_function=diff_func) def diff_working(self, ref=None, filter_binary=True): """Diff between the current working directory and the HEAD """ return utils.git.diff_changes_paths( self.repo.object_store, self.path, self._changed_entries(ref=ref), filter_binary=filter_binary ) def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None): """Returns a dict of the following Format : { "directory/filename.txt": { 'name': 'filename.txt', 'path': "directory/filename.txt", "sha": "xxxxxxxxxxxxxxxxxxxx", "data": "blablabla", "mode": 0xxxxx", }, ... } """ # Default values context = {} is_tree = is_tree or False parent_path = parent_path or '' if is_tree: tree = self[commit_sha] else: tree = self[self._commit_tree(commit_sha)] for entry in tree.items(): # Check if entry is a directory if entry.mode == self.MODE_DIRECTORY: context.update( self.get_commit_files(entry.sha, parent_path=os.path.join(parent_path, entry.path), is_tree=True, paths=paths) ) continue subpath = os.path.join(parent_path, entry.path) # Only add the files we want if not(paths is None or subpath in paths): continue # Add file entry context[subpath] = { 'name': entry.path, 'path': subpath, 'mode': entry.mode, 'sha': entry.sha, 'data': self.blob_data(entry.sha), } return context def file_versions(self, path): """Returns all commits where given file was modified """ versions = [] commits_info = self.commit_info() seen_shas = set() for commit in commits_info: try: files = self.get_commit_files(commit['sha'], paths=[path]) file_path, file_data = files.items()[0] except IndexError: continue file_sha = file_data['sha'] if file_sha in seen_shas: continue else: seen_shas.add(file_sha) # Add file info commit['file'] = file_data versions.append(file_data) return versions def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True): """Internal method for getting a diff between two commits Please use .diff method unless you have very specific needs """ # If commit is first commit (new_commit_sha == old_commit_sha) # then compare to an empty tree if new_commit_sha == old_commit_sha: old_tree = Tree() else: old_tree = self._commit_tree(old_commit_sha) new_tree = self._commit_tree(new_commit_sha) return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary) def changes(self, *args, **kwargs): """ List of changes between two SHAs Returns a list of lists of tuples : [ [ (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) ], ... ] """ kwargs['diff_type'] = 'changes' return self.diff(*args, **kwargs) def changes_count(self, *args, **kwargs): return len(self.changes(*args, **kwargs)) def _refs_by_pattern(self, pattern): refs = self.refs def item_filter(key_value): """Filter only concered refs""" key, value = key_value return key.startswith(pattern) def item_map(key_value): """Rewrite keys""" key, value = key_value new_key = key[len(pattern):] return (new_key, value) return dict( map(item_map, filter( item_filter, refs.items() ) ) ) @property def refs(self): return self.repo.get_refs() def set_refs(refs_dict): for k, v in refs_dict.items(): self.repo[k] = v def import_refs(self, base, other): return self.repo.refs.import_refs(base, other) @property def branches(self): return self._refs_by_pattern(self.REFS_BRANCHES) @property def active_branch(self): """Returns the name of the active branch, or None, if HEAD is detached """ x = self.repo.refs.read_ref('HEAD') if not x.startswith(SYMREF): return None else: symref = x[len(SYMREF):] if not symref.startswith(self.REFS_BRANCHES): return None else: return symref[len(self.REFS_BRANCHES):] @property def active_sha(self): """Deprecated equivalent to head property """ return self.head @property def remote_branches(self): return self._refs_by_pattern(self.REFS_REMOTES) @property def tags(self): return self._refs_by_pattern(self.REFS_TAGS) @property def remotes(self): """ Dict of remotes { 'origin': 'http://friendco.de/some_user/repo.git', ... } """ config = self.repo.get_config() return { keys[1]: values['url'] for keys, values in config.items() if keys[0] == 'remote' } def add_remote(self, remote_name, remote_url): # Get repo's config config = self.repo.get_config() # Add new entries for remote config.set(('remote', remote_name), 'url', remote_url) config.set(('remote', remote_name), 'fetch', "+refs/heads/*:refs/remotes/%s/*" % remote_name) # Write to disk config.write_to_path() return remote_name def add_ref(self, new_ref, old_ref): self.repo.refs[new_ref] = old_ref self.update_server_info() def remove_ref(self, ref_name): # Returns False if ref doesn't exist if not ref_name in self.repo.refs: return False del self.repo.refs[ref_name] self.update_server_info() return True def create_branch(self, base_branch, new_branch, tracking=None): """Try creating a new branch which tracks the given remote if such a branch does not exist then branch off a local branch """ # The remote to track tracking = self.DEFAULT_REMOTE # Already exists if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) # Get information about remote_branch remote_branch = os.path.sep.join([tracking, base_branch]) # Fork Local if base_branch in self.branches: base_ref = self._format_ref_branch(base_branch) # Fork remote elif remote_branch in self.remote_branches: base_ref = self._format_ref_remote(remote_branch) # TODO : track else: raise Exception("Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking)) # Reference of new branch new_ref = self._format_ref_branch(new_branch) # Copy reference to create branch self.add_ref(new_ref, base_ref) return new_ref def create_orphan_branch(self, new_branch, empty_index=None): """ Create a new branch with no commits in it. Technically, just points HEAD to a non-existent branch. The actual branch will only be created if something is committed. This is equivalent to: git checkout --orphan <new_branch>, Unless empty_index is set to True, in which case the index will be emptied along with the file-tree (which is always emptied). Against a clean working tree, this is equivalent to: git checkout --orphan <new_branch> git reset --merge """ if new_branch in self.branches: raise Exception("branch %s already exists" % new_branch) new_ref = self._format_ref_branch(new_branch) self.repo.refs.set_symbolic_ref('HEAD', new_ref) if self.is_working: if empty_index: self.rm_all() self.clean_working() return new_ref def remove_branch(self, branch_name): ref = self._format_ref_branch(branch_name) return self.remove_ref(ref) def switch_branch(self, branch_name, tracking=None, create=None): """Changes the current branch """ if create is None: create = True # Check if branch exists if not branch_name in self.branches: self.create_branch(branch_name, branch_name, tracking=tracking) # Get branch reference branch_ref = self._format_ref_branch(branch_name) # Change main branch self.repo.refs.set_symbolic_ref('HEAD', branch_ref) if self.is_working: # Remove all files self.clean_working() # Add files for the current branch self.checkout_all() def create_tag(self, tag_name, target): ref = self._format_ref_tag(tag_name) return self.add_ref(ref, self._parse_reference(target)) def remove_tag(self, tag_name): ref = self._format_ref_tag(tag_name) return self.remove_ref(ref) def clean(self, force=None, directories=None): untracked_files = self.untracked_files map(os.remove, untracked_files) return untracked_files def clean_working(self): """Purges all the working (removes everything except .git) used by checkout_all to get clean branch switching """ return self.clean() def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None): tree = self[tree_sha] structure = {} if depth is None: depth = self.MAX_TREE_DEPTH elif depth == 0: return structure for entry in tree.items(): # tree if entry.mode == self.MODE_DIRECTORY: # Recur structure[entry.path] = self._get_fs_structure(entry.sha, depth=depth - 1, parent_sha=tree_sha) # commit else: structure[entry.path] = entry.sha structure['.'] = tree_sha structure['..'] = parent_sha or tree_sha return structure def _get_fs_structure_by_path(self, tree_sha, path): parts = path.split(os.path.sep) depth = len(parts) + 1 structure = self._get_fs_structure(tree_sha, depth=depth) return funky.subkey(structure, parts) def commit_ls(self, ref, subpath=None): """List a "directory" for a given commit using the tree of that commit """ tree_sha = self._commit_tree(ref) # Root path if subpath in self.ROOT_PATHS or not subpath: return self._get_fs_structure(tree_sha, depth=1) # Any other path return self._get_fs_structure_by_path(tree_sha, subpath) def commit_file(self, ref, path): """Return info on a given file for a given commit """ name, info = self.get_commit_files(ref, paths=[path]).items()[0] return info def commit_tree(self, ref, *args, **kwargs): tree_sha = self._commit_tree(ref) return self._get_fs_structure(tree_sha, *args, **kwargs) def update_server_info(self): if not self.is_bare: return update_server_info(self.repo) def _is_fast_forward(self): pass def _merge_fast_forward(self): pass def __hash__(self): """This is required otherwise the memoize function will just mess it up """ return hash(self.path) def __getitem__(self, key): try: sha = self._parse_reference(key) except: raise KeyError(key) return self.repo[sha] def __setitem__(self, key, value): try: key = self.dwim_reference(key) except: pass self.repo[key] = value def __contains__(self, key): try: key = self.dwim_reference(key) except: pass return key in self.repo def __delitem__(self, key): try: key = self.dwim_reference(key) except: raise KeyError(key) self.remove_ref(key) # Alias to clone_bare fork = clone_bare log = commit_info diff_count = changes_count contributors = recent_contributors
def find_git_mtimes(self, context, silent_build): """ Use git to find the mtimes of the files we care about """ if not context.use_git_timestamps: return {} parent_dir = context.parent_dir root_folder = context.git_root # Can't use git timestamps if it's just a shallow clone # Otherwise all the files get the timestamp of the latest commit if context.use_git_timestamps and os.path.exists(os.path.join(root_folder, ".git", "shallow")): raise HarpoonError("Can't get git timestamps from a shallow clone", directory=parent_dir) git = Repo(root_folder) mtimes = {} all_files = set(git.open_index()) use_files = set() use_files_relpaths = set() for filename in all_files: relpath = os.path.relpath(os.path.join(root_folder, filename.decode('utf-8')), context.parent_dir) # Only include files under the parent_dir if relpath.startswith("../"): continue # Ignore files that we don't want git_timestamps from if context.use_git_timestamps and type(context.use_git_timestamps) is not bool: match = False for line in context.use_git_timestamps: if fnmatch.fnmatch(relpath, line): match = True break if not match: continue # Matched is true by default if # * Have context.exclude # * No context.exclude and no context.include matched = context.exclude or not any([context.exclude, context.include]) # Anything not matching exclude gets included if context.exclude: for line in context.exclude: if fnmatch.fnmatch(relpath, line): matched = False # Anything matching include gets included if context.include: for line in context.include: if fnmatch.fnmatch(relpath, line): matched = True break # Either didn't match any exclude or matched an include if matched: use_files.add(filename) use_files_relpaths.add(relpath) if not silent_build: log.info("Finding modified times for %s/%s git controlled files in %s", len(use_files), len(all_files), root_folder) first_commit = None cached_commit, cached_mtimes = self.get_cached_mtimes(root_folder, use_files_relpaths) for entry in git.get_walker(): if first_commit is None: first_commit = entry.commit.id.decode('utf-8') if cached_commit and entry.commit.id.decode('utf-8') == cached_commit: new_mtimes = cached_mtimes new_mtimes.update(mtimes) mtimes = new_mtimes break date = entry.commit.author_time added = False for changes in entry.changes(): if type(changes) is not list: changes = [changes] for change in changes: path = change.new.path if root_folder and change.new.path and context.parent_dir: if path in use_files: new_relpath = os.path.relpath(os.path.join(root_folder, change.new.path.decode('utf-8')), context.parent_dir).encode('utf-8') if not new_relpath.decode('utf-8').startswith("../"): if mtimes.get(new_relpath, 0) < date: mtimes[new_relpath] = date added = True if added: if len(use_files - set(mtimes)) == 0: break mtimes = dict((fn.decode('utf-8') if hasattr(fn, "decode") else fn, mtime) for fn, mtime in mtimes.items()) if first_commit != cached_commit: self.set_cached_mtimes(root_folder, first_commit, mtimes, use_files_relpaths) return mtimes
raise SyntaxError("Bad mix of whitespace on %s line %d" % (fn, num + 1)) all = {} if not args.revision: for p, _, flist in os.walk("menu"): for fn in flist: text = open(os.path.join(p, fn), "r", encoding="utf-8").read() check_indents(text, fn) all[fn] = json.loads(text) else: # Thanks to Jelmer Vernooij for spelling this one out for me :-D repo = Repo('.') rev = args.revision.encode("ascii") for r in repo.get_walker(): if r.commit.id.startswith(rev): rev = r.commit.id break menu = porcelain.get_object_by_path(repo, "menu", rev) for name, mode, object_id in menu.iteritems(): text = str(repo[object_id].data, "utf-8") check_indents(text, name) all[name] = json.loads(text) if args.weeks: dates = [ datetime.datetime.strptime(e["start"], "%Y-%m-%d") for e in all.values() ] # Using max(dates) instead of just today's date so we're a
class Analyser(object): ######################### ## STATIC CLASS MEMBER ## ######################### CHANGE_TYPES = ( 'add', 'modify', 'delete', ) #################### ## PUBLIC METHODS ## #################### def __init__( self, repo_name, searching_paths, allowed_endings, exclude_patters, exclude_paths, ): """ """ # Repository self.repo_name = repo_name self.repo = Repo(repo_name) # File infos self.file_paths = {} self.deleted_paths = {} # Commits self.authors = {} self.commits = 0 # Searched self.SEARCHING_PATHS = searching_paths self.ALLOWED_ENDINGS = allowed_endings # Excludes self.EXCLUDE_PATTERNS = exclude_patters self.EXCLUDE_PATHS = exclude_paths def do_analyse(self): """ """ for change_tree in self.repo.get_walker(): author_name = change_tree.commit.author if not author_name in self.authors: self.authors[author_name] = Author.Author(name=author_name) self.authors[author_name].commits.append(change_tree.commit) for tree_change in change_tree.changes(): # Save tree data self._save_tree_data(change_tree=change_tree, tree_change=tree_change) def report_file_endings(self): """ """ file_ending_report = FileEndingReport(paths=self.file_paths) file_ending_report.generate() chart_type = ChartExporter.EXPORT_TYPE['PIE'] file_ending_report.report(exporter=ChartExporter(type=chart_type)) # # # Print report # print("############################################") # # for ending in file_endings: # print("%s: %s" % (ending, file_endings[ending])) # # print("############################################") def report_authors_commits(self): """ """ print("############################################") for author_name in self.authors: author_commit_count = len(self.authors[author_name].commits) print("%s has %s commits" % (author_name, author_commit_count)) print("############################################") def report_commits_per_file(self): print("############################################") for file_path in self.file_paths: repo_file = self.file_paths[file_path] file_commit_count = len(repo_file.commits) print("%s is in %s commits" % (repo_file.path, file_commit_count)) print("############################################") def report_top_10_commited_files(self): print("############################################") top_ten = [] for file_path in self.file_paths: top_ten.append(self.file_paths[file_path]) # Sort the files by the number of commits sorted_top_ten = sorted(top_ten, key=lambda repo_file: len(repo_file.commits), reverse=True) for idx, repo_file in enumerate(sorted_top_ten): # To have correct index we need to check here if idx is 10: break file_commit_count = len(repo_file.commits) print("%s is in %s commits" % (repo_file.path, file_commit_count)) print("############################################") def report_for_all_authors(self): """ """ author_commit_report = AuthorsCommitReport(authors=self.authors) author_commit_report.generate() chart_type = ChartExporter.EXPORT_TYPE['SPLINE'] author_commit_report.report(exporter=ChartExporter(type=chart_type)) def report_for_author(self, name): """ """ author = self.authors[name] print("############################################") print("Author: %s" % author.name) # year years = {} for commit in author.commits: author_commit_time = time.localtime(commit.author_time) commit_year = str(author_commit_time.tm_year) if not commit_year in years: years[commit_year] = {} this_year = years[commit_year] commit_month = author_commit_time.tm_mon if not commit_month in this_year: this_year[commit_month] = {} this_month = this_year[commit_month] commit_day = author_commit_time.tm_mday if not commit_day in this_month: this_month[commit_day] = 0 this_month[commit_day] += 1 # author_commit_time = time.ctime(commit.author_time) # print("%s : %s" % (author_commit_time, commit.sha)) reverse_sort_order = True sorted_years = sorted(years, reverse=reverse_sort_order) for year in sorted_years: print("Year %s:" % year) year_dict = years[year] sorted_year_dict = sorted(year_dict, reverse=reverse_sort_order) for month in sorted_year_dict: print(' Month %s:' % month) month_dict = year_dict[month] sorted_month_dict = sorted(month_dict, reverse=reverse_sort_order) for day in sorted_month_dict: if day < 10: print(' Day %s: %s' % (day, month_dict[day])) else: print(' Day %s: %s' % (day, month_dict[day])) print("############################################") ##################### ## PRIVATE METHODS ## ##################### def _is_matching_exclude_pattern(self, path): """ """ for pattern in self.EXCLUDE_PATTERNS: p = re.compile(pattern) if p.match(path) is not None: return True return False def _in_exclude_path(self, path): """ """ for exclude_path in self.EXCLUDE_PATHS: if path.startswith(exclude_path): return True return False def _is_allowed_path(self, path): """ """ try: file_ending = File.get_ending(file_path=path) for search_path in self.SEARCHING_PATHS: # Looks if path in exclude path if self._in_exclude_path(path): return False # Looks if path matches the excluding pattern if self._is_matching_exclude_pattern(path): return False # TODO: Check out if this is logical # Looks if path starts not in a searching path if not self._is_in_search_path(path=path, search_path=search_path): return False if file_ending not in self.ALLOWED_ENDINGS or self._has_repo_file( file_path=path): return False except Exception as err: print(err) return True def _save_tree_data(self, change_tree, tree_change): # Check if is list if type(tree_change) is list: for change in tree_change: self._parse_change_tree(change_tree=change_tree, tree_change=change) else: self._parse_change_tree(change_tree=change_tree, tree_change=tree_change) def _parse_change_tree(self, change_tree, tree_change): change_type = tree_change.type if change_type is 'add' or change_type is 'modify': new_tree_sha = tree_change.new.sha new_tree_value = self.repo[new_tree_sha] new_tree_data = new_tree_value.data file_path = tree_change.new.path # Check if the file has not been later being deleted if file_path in self.deleted_paths: return file_ending = File.get_ending(file_path=file_path) # Try to get repo file try: repo_file = self._get_repo_file(file_path=file_path) # Add commit to repo file repo_file.commits.append(change_tree.commit) except: pass # Check if file is in allowed path if not self._is_allowed_path(path=file_path): return counted_lines = new_tree_data.count('\n') # Get repo file repo_file = self._create_repo_file(file_path=file_path) # Set repo file data repo_file.code_lines = counted_lines repo_file.ending = file_ending repo_file.commits.append(change_tree.commit) elif change_type is 'delete': file_path = tree_change.old.path self.deleted_paths[file_path] = True def _is_in_search_path(self, path, search_path): """ """ if not path.startswith('/'): path = '/' + path if search_path == '': search_path = '/' elif not search_path.startswith('/'): search_path = '/' + search_path return path.startswith(search_path) def _create_repo_file(self, file_path): """ """ if not self._has_repo_file(file_path=file_path): file = File(path=file_path) self.file_paths[file_path] = file return file else: return None def _get_repo_file(self, file_path): """ """ file = self.file_paths[file_path] return file def _has_repo_file(self, file_path): """ """ if file_path in self.file_paths: return True else: return False