def test1(a, b): d = mdiff.textdiff(a, b) if not d: raise ValueError("empty") c = mdiff.patches(a, [d]) if c != b: raise ValueError("bad")
def assert_bdiff_applies(self, a, b): d = mdiff.textdiff(a, b) c = a if d: c = mdiff.patches(a, [d]) self.assertEqual( c, b, ("bad diff+patch result from\n %r to\n " "%r: \nbdiff: %r\npatched: %r" % (a, b, d, c[:200])))
def apply_deltas(self, history_chain, out_file_name): """ Rebuild a file from a series of patches and write it into out_file_name. """ assert len(history_chain) > 0 deltas = [] text = None index = 0 while index < len(history_chain): link = history_chain[index] if link[2] == NULL_SHA: text = link[3] if text is None: text = self.get_data_func(link[0]) break delta = link[3] if delta is None: delta = self.get_data_func(link[0]) assert not delta is None deltas.append(delta) index += 1 assert not text is None text = decompress(text) if len(deltas) == 0: raw = text else: for index in range(0, len(deltas)): deltas[index] = decompress(deltas[index]) deltas.reverse() # iterate in reverse? raw = mdiff.patches(text, deltas) text = None out_file = open(out_file_name, "wb") try: out_file.write(raw) finally: out_file.close()
def get(self, name, node): """Fetches the full text revision contents of the given name+node pair. If the full text doesn't exist, throws a KeyError. Under the hood, this uses getdeltachain() across all the stores to build up a full chain to produce the full text. """ chain = self.getdeltachain(name, node) if chain[-1][ChainIndicies.BASENODE] != nullid: # If we didn't receive a full chain, throw raise KeyError((name, hex(node))) # The last entry in the chain is a full text, so we start our delta # applies with that. fulltext = chain.pop()[ChainIndicies.DATA] text = fulltext while chain: delta = chain.pop()[ChainIndicies.DATA] text = mdiff.patches(text, [delta]) return text
def apply_patch(old_text, patch): """ Return raw new file bytes by applying patch to old_text. """ return mdiff.patches(old_text, [decompress(patch)])
def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None): """Resolve a delta chain for a file node.""" # TODO the "not in ({stops})" here is possibly slowing down the query # because it needs to perform the lookup on every recursive invocation. # This could possibly be faster if we created a temporary query with # baseid "poisoned" to null and limited the recursive filter to # "is not null". res = db.execute( 'WITH RECURSIVE ' ' deltachain(deltaid, baseid) AS (' ' SELECT deltaid, deltabaseid FROM fileindex ' ' WHERE pathid=? AND node=? ' ' UNION ALL ' ' SELECT fileindex.deltaid, deltabaseid ' ' FROM fileindex, deltachain ' ' WHERE ' ' fileindex.id=deltachain.baseid ' ' AND deltachain.baseid IS NOT NULL ' ' AND fileindex.id NOT IN ({stops}) ' ' ) ' 'SELECT deltachain.baseid, compression, delta ' 'FROM deltachain, delta ' 'WHERE delta.id=deltachain.deltaid'.format( stops=','.join(['?'] * len(stoprids)) ), tuple([pathid, node] + list(stoprids.keys())), ) deltas = [] lastdeltabaseid = None for deltabaseid, compression, delta in res: lastdeltabaseid = deltabaseid if compression == COMPRESSION_ZSTD: delta = zstddctx.decompress(delta) elif compression == COMPRESSION_NONE: delta = delta elif compression == COMPRESSION_ZLIB: delta = zlib.decompress(delta) else: raise SQLiteStoreError( b'unhandled compression type: %d' % compression ) deltas.append(delta) if lastdeltabaseid in stoprids: basetext = revisioncache[stoprids[lastdeltabaseid]] else: basetext = deltas.pop() deltas.reverse() fulltext = mdiff.patches(basetext, deltas) # SQLite returns buffer instances for blob columns on Python 2. This # type can propagate through the delta application layer. Because # downstream callers assume revisions are bytes, cast as needed. if not isinstance(fulltext, bytes): fulltext = bytes(delta) return fulltext
def dopatch(text, bins): if not cache: r.clearcaches() mdiff.patches(text, bins)
def perfrevlogrevision(ui, repo, file_, rev=None, cache=None, **opts): """Benchmark obtaining a revlog revision. Obtaining a revlog revision consists of roughly the following steps: 1. Compute the delta chain 2. Obtain the raw chunks for that delta chain 3. Decompress each raw chunk 4. Apply binary patches to obtain fulltext 5. Verify hash of fulltext This command measures the time spent in each of these phases. """ if opts.get('changelog') or opts.get('manifest'): file_, rev = None, file_ elif rev is None: raise error.CommandError('perfrevlogrevision', 'invalid arguments') r = cmdutil.openrevlog(repo, 'perfrevlogrevision', file_, opts) node = r.lookup(rev) rev = r.rev(node) def dodeltachain(rev): if not cache: r.clearcaches() r._deltachain(rev) def doread(chain): if not cache: r.clearcaches() r._chunkraw(chain[0], chain[-1]) def dodecompress(data, chain): if not cache: r.clearcaches() start = r.start length = r.length inline = r._inline iosize = r._io.size buffer = util.buffer offset = start(chain[0]) for rev in chain: chunkstart = start(rev) if inline: chunkstart += (rev + 1) * iosize chunklength = length(rev) b = buffer(data, chunkstart - offset, chunklength) revlog.decompress(b) def dopatch(text, bins): if not cache: r.clearcaches() mdiff.patches(text, bins) def dohash(text): if not cache: r.clearcaches() r._checkhash(text, node, rev) def dorevision(): if not cache: r.clearcaches() r.revision(node) chain = r._deltachain(rev)[0] data = r._chunkraw(chain[0], chain[-1])[1] bins = r._chunks(chain) text = str(bins[0]) bins = bins[1:] text = mdiff.patches(text, bins) benches = [ (lambda: dorevision(), 'full'), (lambda: dodeltachain(rev), 'deltachain'), (lambda: doread(chain), 'read'), (lambda: dodecompress(data, chain), 'decompress'), (lambda: dopatch(text, bins), 'patch'), (lambda: dohash(text), 'hash'), ] for fn, title in benches: timer, fm = gettimer(ui, opts) timer(fn, title=title) fm.end()