def getChangedTexts(self, commitobj): if self.changedTexts != None: return self.changedTexts elif self.changedTexts_data != None: return self._loadChangedTextFromBackingVar() elif commitobj == None: raise Exception("NULL passed to getChangedTexts when local changedTexts was not set") alldiffs = [] differ = gdiff.diff_match_patch() for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('M'): #Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() diffs = differ.diff_main(left, right) if diffs: differ.diff_cleanupSemantic(diffs) for d in diffs: if d[0] != 0 and d[1].strip(): alldiffs.append(d[1].lower()) for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('A'): #Added addition = d.b_blob.data_stream.read() alldiffs.append(addition.lower()) #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('D'): #Deleted # pass #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('R'): #Renamed # pass self.changedTexts = alldiffs return self.changedTexts
def getCommits(repo, startdate, enddate): localfolder = urlToFolder(repo.url) differ = gdiff.diff_match_patch() repoloc = Config.fsdir + 'git-repos/' + localfolder + '/' if os.path.exists(repoloc): c = pygit.Repo(repoloc) else: os.makedirs(repoloc) c = pygit.Repo.init(repoloc) c.create_remote('origin', repo.url) c.remotes.origin.fetch() c.remotes.origin.pull('master') commits = [] msgs = c.iter_commits(since=unixToGitDateFormat(startdate)) for m in msgs: if m.committed_date > enddate: continue # TODO BUG: We are unable to get a git diff of the first commit. # http://osdir.com/ml/version-control.git/2005-05/msg01309.html # http://git.661346.n2.nabble.com/git-diff-tree-against-the-root-commit-td5685272.html if not m.parents: continue c = GitCommit() alldiffs = c.getChangedTexts(m) c.loadFromSource(repo, m.message, m.committed_date, m.stats.files.keys(), m.__str__(), alldiffs) commits.append(c) return commits
def getDiffsArray(self): if self.diffIsReallyBig: return [] alldiffs = [] differ = gdiff.diff_match_patch() client = pysvn.Client() for f in self.files: loc = self.repo.url + f loc = loc.replace("trunk//trunk", "trunk/") #Try/Catches are easier than seeing if the diff is an addition/deletion try: left = client.cat(url_or_path=loc, revision=pysvn.Revision( pysvn.opt_revision_kind.number, int(self.uniqueid) - 1)) except: left = '' try: right = client.cat(url_or_path=loc, revision=pysvn.Revision( pysvn.opt_revision_kind.number, int(self.uniqueid))) except: right = '' alldiffs.append(differ.diff_main(left, right)) return alldiffs
def getChangedTexts(self, commitobj): if self.changedTexts != None: return self.changedTexts elif self.changedTexts_data != None: return self._loadChangedTextFromBackingVar() elif commitobj == None: raise Exception( "NULL passed to getChangedTexts when local changedTexts was not set" ) alldiffs = [] differ = gdiff.diff_match_patch() for d in commitobj.diff(commitobj.__str__() + '^').iter_change_type('M'): #Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() diffs = differ.diff_main(left, right) if diffs: differ.diff_cleanupSemantic(diffs) for d in diffs: if d[0] != 0 and d[1].strip(): alldiffs.append(d[1].lower()) for d in commitobj.diff(commitobj.__str__() + '^').iter_change_type('A'): #Added addition = d.b_blob.data_stream.read() alldiffs.append(addition.lower()) #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('D'): #Deleted # pass #for d in commitobj.diff(commitobj.__str__()+'^').iter_change_type('R'): #Renamed # pass self.changedTexts = alldiffs return self.changedTexts
def getPrettyDiffs(self): diffs = self.getDiffsArray() differ = gdiff.diff_match_patch() for d in diffs: differ.diff_cleanupSemantic(d) str = differ.diff_prettyHtml(d) if not isinstance(str, unicode): str = unicode(str, 'utf-8') else: str = str.encode('utf-8') yield str
def run(self, revisionTextA, revisionTextB, revisionDict): gdiff = diff_match_patch() revisionDiffs = gdiff.diff_main(revisionTextA, revisionTextB, False) gdiff.diff_cleanupSemantic(revisionDiffs) revisionDiffs = filter(self.isRemoveOrAdd, revisionDiffs) diffWordCount = map(self.countWords, revisionDiffs) addedWordCount = self.getAddWordCount(diffWordCount) deletedWordCount = self.getDeletedWordCount(diffWordCount) revision = Revision(**revisionDict) revision.wordsAdded = addedWordCount revision.wordsDeleted = deletedWordCount revision.wordCount = self.getWordCount(revisionTextB) return revision.to_dict()
def getPrettyDiffs(self, htmlize=True): if not htmlize: raise Exception("Do not know how to not htmlize prettyDiffs") diffs = self.getDiffsArray() differ = gdiff.diff_match_patch() for d in diffs: differ.diff_cleanupSemantic(d) str = differ.diff_prettyHtml(d) if not isinstance(str, unicode): str = unicode(str, 'utf-8') else: str = str.encode('utf-8') yield Markup(str)
def getDiffsArray(self): if self.diffIsReallyBig: return [] alldiffs = [] differ = gdiff.diff_match_patch() commit = self.getChangedTextMetadata() for d in commit.diff(commit.__str__()+'^').iter_change_type('M'): #Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() alldiffs.append(differ.diff_main(left, right)) for d in commit.diff(commit.__str__()+'^').iter_change_type('A'): #Added addition = d.b_blob.data_stream.read() alldiffs.append(differ.diff_main('', addition)) return alldiffs
def getCommits(repo, startdate, enddate): localfolder = urlToFolder(repo.url) differ = gdiff.diff_match_patch() repoloc = "git-repos/" + localfolder + "/" if os.path.exists(repoloc): c = pygit.Repo(repoloc) else: os.makedirs(repoloc) c = pygit.Repo.init(repoloc) c.create_remote("origin", repo.url) c.remotes.origin.fetch() c.remotes.origin.pull("master") commits = [] msgs = c.iter_commits(since=unixToGitDateFormat(startdate)) for m in msgs: if m.committed_date > enddate: continue alldiffs = [] for d in m.diff("HEAD~1").iter_change_type("M"): # Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() diffs = differ.diff_main(left, right) if diffs: differ.diff_cleanupSemantic(diffs) for d in diffs: if d[0] != 0 and d[1].strip(): alldiffs.append(d) for d in m.diff().iter_change_type("A"): # Added pass for d in m.diff().iter_change_type("D"): # Deleted pass for d in m.diff().iter_change_type("R"): # Renamed pass c = Commit() c.loadFromSource(repo, m.message, m.committed_date, m.stats.files.keys(), m.__str__(), alldiffs) commits.append(c) return commits
def getCommits(repo, startdate, enddate): localfolder = urlToFolder(repo.url) differ = gdiff.diff_match_patch() repoloc = 'git-repos/' + localfolder + '/' if os.path.exists(repoloc): c = pygit.Repo(repoloc) else: os.makedirs(repoloc) c = pygit.Repo.init(repoloc) c.create_remote('origin', repo.url) c.remotes.origin.fetch() c.remotes.origin.pull('master') commits = [] msgs = c.iter_commits(since=unixToGitDateFormat(startdate)) for m in msgs: if m.committed_date > enddate: continue alldiffs = [] for d in m.diff('HEAD~1').iter_change_type('M'): #Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() diffs = differ.diff_main(left, right) if diffs: differ.diff_cleanupSemantic(diffs) for d in diffs: if d[0] != 0 and d[1].strip(): alldiffs.append(d) for d in m.diff().iter_change_type('A'): #Added pass for d in m.diff().iter_change_type('D'): #Deleted pass for d in m.diff().iter_change_type('R'): #Renamed pass c = Commit() c.loadFromSource(repo, m.message, m.committed_date, m.stats.files.keys(), m.__str__(), alldiffs) commits.append(c) return commits
def getDiffsArray(self): if self.diffIsReallyBig: return [] alldiffs = [] differ = gdiff.diff_match_patch() commit = self.getChangedTextMetadata() for d in commit.diff(commit.__str__() + '^').iter_change_type('M'): #Changed left = d.a_blob.data_stream.read() right = d.b_blob.data_stream.read() alldiffs.append(differ.diff_main(left, right)) for d in commit.diff(commit.__str__() + '^').iter_change_type('A'): #Added addition = d.b_blob.data_stream.read() alldiffs.append(differ.diff_main('', addition)) return alldiffs
def getDiffsArray(self): if self.diffIsReallyBig: return [] alldiffs = [] differ = gdiff.diff_match_patch() client = pysvn.Client() for f in self.files: loc = self.repo.url + f loc = loc.replace("trunk//trunk", "trunk/") #Try/Catches are easier than seeing if the diff is an addition/deletion try: left = client.cat(url_or_path=loc, revision=pysvn.Revision(pysvn.opt_revision_kind.number, int(self.uniqueid)-1)) except: left = '' try: right = client.cat(url_or_path=loc, revision=pysvn.Revision(pysvn.opt_revision_kind.number, int(self.uniqueid))) except: right = '' alldiffs.append(differ.diff_main(left, right)) return alldiffs