def test_git_clone(): # FIXME: THIS ONE IS CAUSING SOME INTERESTING PROBLEMS? from metrique.utils import git_clone, safestr, remove_file uri = 'https://github.com/kejbaly2/tornadohttp.git' local_path = os.path.join(cache_dir, safestr(uri)) remove_file(local_path, force=True) _t = time() repo = git_clone(uri, pull=False, reflect=False, cache_dir=cache_dir) assert repo == local_path not_cached = time() - _t _t = time() repo = git_clone(uri, pull=False, reflect=True, cache_dir=cache_dir) cached = time() - _t assert repo.path == local_path assert cached < not_cached git_clone(uri, pull=True, reflect=False, cache_dir=cache_dir) remove_file(local_path, force=True)
def get_objects(self, uri, pull=True, **kwargs): ''' Walk through repo commits to generate a list of repo commit objects. Each object has the following properties: * repo uri * general commit info * files added, removed fnames * lines added, removed * acked_by * signed_off_by * resolves * related ''' self.repo = repo = git_clone(uri, pull=pull, reflect=True) # get a full list of all commit SHAs in the repo (all branches) cmd = 'git rev-list --all' output = sys_call(cmd, cwd=repo.path) repo_shas = set(x.strip() for x in output.split('\n') if x) logger.debug("Total Commits: %s" % len(repo_shas)) cmd = 'git --no-pager log --all --format=sha:%H --numstat' output = sys_call(cmd) all_logs = re.sub('\n+', '\n', output) c_logs = [x for x in [s.strip() for s in all_logs.split('sha:')] if x] _end = None # once was true, always is true... objs = [] for c_log in c_logs: sha, s, all_changes = c_log.partition('\n') #try: c = repo.get_object(sha) # FIXME: not normalizing to UTC _start = ts2dt(c.commit_time) #except Exception as e: # _start = now # obj = dict(_oid=sha, _start=_start, _end=_end, # repo_uri=uri, _e={sha: to_encoding(e)}) # self.objects.add(obj) # continue # and some basic stuff... obj = dict(_oid=sha, _start=_start, _end=_end, repo_uri=uri, tree=c.tree, parents=c.parents, author=c.author, committer=c.committer, author_time=c.author_time, message=c.message, mergetag=c.mergetag, extra=c.extra) for _file in all_changes.split('\n'): _file = _file.strip() obj.setdefault('files', {}) if not _file: added, removed, fname = 0, 0, None else: added, removed, fname = _file.split('\t') added = 0 if added == '-' else int(added) removed = 0 if removed == '-' else int(removed) # FIXME: sql doesn't nest well.. changes = {'added': added, 'removed': removed} obj['files'][fname] = changes # file +/- totals obj['added'] = sum( [v.get('added', 0) for v in obj['files'].itervalues()]) obj['removed'] = sum( [v.get('removed', 0) for v in obj['files'].itervalues()]) # extract interesting bits from the message obj['acked_by'] = acked_by_re.findall(c.message) obj['signed_off_by'] = signed_off_by_re.findall(c.message) obj['resolves'] = resolves_re.findall(c.message) obj['related'] = related_re.findall(c.message) objs.append(obj) self.objects.extend(objs) return super(Commit, self).get_objects(**kwargs)
def get_objects(self, uri, pull=True, **kwargs): """ Walk through repo commits to generate a list of repo commit objects. Each object has the following properties: * repo uri * general commit info * files added, removed fnames * lines added, removed * acked_by * signed_off_by * resolves * related """ self.repo = repo = git_clone(uri, pull=pull, reflect=True) # get a full list of all commit SHAs in the repo (all branches) cmd = "git rev-list --all" output = sys_call(cmd, cwd=repo.path) repo_shas = set(x.strip() for x in output.split("\n") if x) logger.debug("Total Commits: %s" % len(repo_shas)) cmd = "git --no-pager log --all --format=sha:%H --numstat" output = sys_call(cmd) all_logs = re.sub("\n+", "\n", output) c_logs = [x for x in [s.strip() for s in all_logs.split("sha:")] if x] _end = None # once was true, always is true... objs = [] for c_log in c_logs: sha, s, all_changes = c_log.partition("\n") # try: c = repo.get_object(sha) # FIXME: not normalizing to UTC _start = ts2dt(c.commit_time) # except Exception as e: # _start = now # obj = dict(_oid=sha, _start=_start, _end=_end, # repo_uri=uri, _e={sha: to_encoding(e)}) # self.objects.add(obj) # continue # and some basic stuff... obj = dict( _oid=sha, _start=_start, _end=_end, repo_uri=uri, tree=c.tree, parents=c.parents, author=c.author, committer=c.committer, author_time=c.author_time, message=c.message, mergetag=c.mergetag, extra=c.extra, ) for _file in all_changes.split("\n"): _file = _file.strip() obj.setdefault("files", {}) if not _file: added, removed, fname = 0, 0, None else: added, removed, fname = _file.split("\t") added = 0 if added == "-" else int(added) removed = 0 if removed == "-" else int(removed) # FIXME: sql doesn't nest well.. changes = {"added": added, "removed": removed} obj["files"][fname] = changes # file +/- totals obj["added"] = sum([v.get("added", 0) for v in obj["files"].itervalues()]) obj["removed"] = sum([v.get("removed", 0) for v in obj["files"].itervalues()]) # extract interesting bits from the message obj["acked_by"] = acked_by_re.findall(c.message) obj["signed_off_by"] = signed_off_by_re.findall(c.message) obj["resolves"] = resolves_re.findall(c.message) obj["related"] = related_re.findall(c.message) objs.append(obj) self.objects.extend(objs) return super(Commit, self).get_objects(**kwargs)