def __init__(self, basetext, atext, btext, base=None, a=None, b=None): self.basetext = basetext self.atext = atext self.btext = btext if base is None: base = mdiff.splitnewlines(basetext) if a is None: a = mdiff.splitnewlines(atext) if b is None: b = mdiff.splitnewlines(btext) self.base = base self.a = a self.b = b
def findrenames(repo, added=None, removed=None, threshold=0.5): '''find renamed files -- yields (before, after, score) tuples''' if added is None or removed is None: added, removed = repo.status()[1:3] ctx = repo.changectx() for a in added: aa = repo.wread(a) bestname, bestscore = None, threshold for r in removed: rr = ctx.filectx(r).data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 alines = mdiff.splitnewlines(aa) matches = bdiff.blocks(aa, rr) for x1, x2, y1, y2 in matches: for line in alines[x1:x2]: equal += len(line) lengths = len(aa) + len(rr) if lengths: myscore = equal * 2.0 / lengths if myscore >= bestscore: bestname, bestscore = r, myscore if bestname: yield bestname, a, bestscore
def findrenames(repo, added=None, removed=None, threshold=0.5): '''find renamed files -- yields (before, after, score) tuples''' if added is None or removed is None: added, removed = repo.status()[1:3] ctx = repo.changectx() for a in added: aa = repo.wread(a) bestname, bestscore = None, threshold for r in removed: rr = ctx.filectx(r).data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 alines = mdiff.splitnewlines(aa) matches = bdiff.blocks(aa, rr) for x1,x2,y1,y2 in matches: for line in alines[x1:x2]: equal += len(line) lengths = len(aa) + len(rr) if lengths: myscore = equal*2.0 / lengths if myscore >= bestscore: bestname, bestscore = r, myscore if bestname: yield bestname, a, bestscore
def score(text): if not len(text): return 0.0 if not fctx.cmp(text): return 1.0 if threshold == 1.0: return 0.0 orig = fctx.data() # bdiff.blocks() returns blocks of matching lines # count the number of bytes in each equal = 0 alines = mdiff.splitnewlines(text) matches = bdiff.blocks(text, orig) for x1, x2, y1, y2 in matches: for line in alines[x1:x2]: equal += len(line) lengths = len(text) + len(orig) return equal * 2.0 / lengths
def data(): orig = r.data() return orig, mdiff.splitnewlines(orig)