Пример #1
0
def test1(a, b):
    d = mdiff.textdiff(a, b)
    if not d:
        raise ValueError("empty")
    c = mdiff.patches(a, [d])
    if c != b:
        raise ValueError("bad")
Пример #2
0
    def make_delta(self, history_chain, old_file, new_file, out_file_name):
        """ Make a new binary change blob and write it into out_file_name.

        """
        if len(history_chain) == 0:
            #print "DOING FULL INSERT"
            return self.make_full_insert(new_file, out_file_name)

        #print "MAKING DELTA"
        in_file = open(new_file, 'rb')
        raw_new = None
        try:
            raw_new = in_file.read()
        finally:
            in_file.close()

        parent = NULL_SHA
        in_old = open(old_file, 'rb')
        try:
            raw_old = in_old.read()
            values = compress(mdiff.textdiff(raw_old, raw_new))
            parent = history_chain[0][0]
            out_file = open(out_file_name, 'wb')
            try:
                if values[0]:
                    out_file.write(values[0])
                out_file.write(values[1])
            finally:
                out_file.close()
        finally:
            in_old.close()

        return parent
Пример #3
0
    def diff(self, delta_object):
        def flatten(s):
            return s if isinstance(s, bytes) else bytes(s)

        return textdiff(
            flatten(delta_object.raw_data) if delta_object else b'',
            flatten(self.raw_data))
Пример #4
0
    def make_delta(self, history_chain, old_file, new_file, out_file_name):
        """ Make a new binary change blob and write it into out_file_name.

        """
        if len(history_chain) == 0:
            #print "DOING FULL INSERT"
            return self.make_full_insert(new_file, out_file_name)

        #print "MAKING DELTA"
        in_file = open(new_file, 'rb')
        raw_new = None
        try:
            raw_new = in_file.read()
        finally:
            in_file.close()

        parent = NULL_SHA
        in_old = open(old_file, 'rb')
        try:
            raw_old = in_old.read()
            values = compress(mdiff.textdiff(raw_old, raw_new))
            parent = history_chain[0][0]
            out_file = open(out_file_name, 'wb')
            try:
                if values[0]:
                    out_file.write(values[0])
                out_file.write(values[1])
            finally:
                out_file.close()
        finally:
            in_old.close()

        return parent
Пример #5
0
def unicode_make_patch(old_text, new_text):
    """ Helper wrapper around make_patch() which takes unicode strings."""
    values = compress(mdiff.textdiff(old_text.encode('utf8'),
                                     new_text.encode('utf8')))
    if values[0]:
        return ''.join(values)

    return values[1]
Пример #6
0
 def assert_bdiff_applies(self, a, b):
     d = mdiff.textdiff(a, b)
     c = a
     if d:
         c = mdiff.patches(a, [d])
     self.assertEqual(
         c, b, ("bad diff+patch result from\n  %r to\n  "
                "%r: \nbdiff: %r\npatched: %r" % (a, b, d, c[:200])))
Пример #7
0
    def revdiff(self, rev1, rev2):
        validaterev(rev1)
        validaterev(rev2)

        node1 = self.node(rev1)
        node2 = self.node(rev2)

        return mdiff.textdiff(self.revision(node1, raw=True),
                              self.revision(node2, raw=True))
Пример #8
0
    def revdiff(self, rev1, rev2):
        """return or calculate a delta between two revisions"""
        if rev1 > self.repotiprev and rev2 > self.repotiprev:
            return self.revlog2.revdiff(self.revlog2.rev(self.node(rev1)),
                                        self.revlog2.rev(self.node(rev2)))
        elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
            return revlog.revlog.revdiff(self, rev1, rev2)

        return mdiff.textdiff(self.revision(self.node(rev1)),
                              self.revision(self.node(rev2)))
Пример #9
0
    def revdiff(self, rev1, rev2):
        """return or calculate a delta between two revisions"""
        if rev1 > self.repotiprev and rev2 > self.repotiprev:
            return self.revlog2.revdiff(
                self.revlog2.rev(self.node(rev1)),
                self.revlog2.rev(self.node(rev2)))
        elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
            return revlog.revlog.revdiff(self, rev1, rev2)

        return mdiff.textdiff(self.revision(self.node(rev1)),
                              self.revision(self.node(rev2)))
Пример #10
0
 def get_delta(self, src, dst):
     """
     Calculate strings delta
     :param src: Source string
     :param dst: Destination string
     :return: (<type>, delta)
     """
     delta = textdiff(src, dst)
     if len(delta) >= len(dst):
         return self.T_FILE, dst
     else:
         return self.T_BDIFF, delta
Пример #11
0
def fastdelta(mf, mfgetter, base, changes):
    """Given a base manifest text as an array.array and a list of changes
    relative to that text, compute a delta that can be used by revlog.
    """
    delta = []
    dstart = None
    dend = None
    dline = [""]
    start = 0
    # zero copy representation of base as a buffer
    addbuf = util.buffer(base)

    changes = list(changes)
    if len(changes) < 1000:
        # start with a readonly loop that finds the offset of
        # each line and creates the deltas
        for f, todelete in changes:
            # bs will either be the index of the item or the insert point
            start, end = manifest._msearch(addbuf, f, start)
            if not todelete:
                h, fl = mfgetter(f)
                l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
            else:
                if start == end:
                    # item we want to delete was not found, error out
                    raise AssertionError(
                            (("failed to remove %s from manifest") % f))
                l = ""
            if dstart is not None and dstart <= start and dend >= start:
                if dend < end:
                    dend = end
                if l:
                    dline.append(l)
            else:
                if dstart is not None:
                    delta.append([dstart, dend, "".join(dline)])
                dstart = start
                dend = end
                dline = [l]

        if dstart is not None:
            delta.append([dstart, dend, "".join(dline)])
        # apply the delta to the base, and get a delta for addrevision
        deltatext, arraytext = manifest._addlistdelta(base, delta)
    else:
        # For large changes, it's much cheaper to just build the text and
        # diff it.
        arraytext = bytearray(mf.text())
        deltatext = mdiff.textdiff(util.buffer(base), util.buffer(arraytext))

    return arraytext, deltatext
Пример #12
0
 def showdiff(self, a, b):
     bin = mdiff.textdiff(a, b)
     pos = 0
     q = 0
     actions = []
     while pos < len(bin):
         p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12])
         pos += 12
         if p1:
             actions.append(a[q:p1])
         actions.append(diffreplace(p1, p2, a[p1:p2], bin[pos:pos + l]))
         pos += l
         q = p2
     if q < len(a):
         actions.append(a[q:])
     return actions
Пример #13
0
    def repackdata(self, ledger, target):
        ui = self.repo.ui
        maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

        byfile = {}
        for entry in ledger.entries.itervalues():
            if entry.datasource:
                byfile.setdefault(entry.filename, {})[entry.node] = entry

        count = 0
        repackprogress = ui.makeprogress(_("repacking data"),
                                         unit=self.unit,
                                         total=len(byfile))
        for filename, entries in sorted(byfile.iteritems()):
            repackprogress.update(count)

            ancestors = {}
            nodes = list(node for node in entries)
            nohistory = []
            buildprogress = ui.makeprogress(_("building history"),
                                            unit='nodes',
                                            total=len(nodes))
            for i, node in enumerate(nodes):
                if node in ancestors:
                    continue
                buildprogress.update(i)
                try:
                    ancestors.update(
                        self.fullhistory.getancestors(filename,
                                                      node,
                                                      known=ancestors))
                except KeyError:
                    # Since we're packing data entries, we may not have the
                    # corresponding history entries for them. It's not a big
                    # deal, but the entries won't be delta'd perfectly.
                    nohistory.append(node)
            buildprogress.complete()

            # Order the nodes children first, so we can produce reverse deltas
            orderednodes = list(reversed(self._toposort(ancestors)))
            if len(nohistory) > 0:
                ui.debug('repackdata: %d nodes without history\n' %
                         len(nohistory))
            orderednodes.extend(sorted(nohistory))

            # Filter orderednodes to just the nodes we want to serialize (it
            # currently also has the edge nodes' ancestors).
            orderednodes = list(
                filter(lambda node: node in nodes, orderednodes))

            # Garbage collect old nodes:
            if self.garbagecollect:
                neworderednodes = []
                for node in orderednodes:
                    # If the node is old and is not in the keepset, we skip it,
                    # and mark as garbage collected
                    if ((filename, node) not in self.keepkeys
                            and self.isold(self.repo, filename, node)):
                        entries[node].gced = True
                        continue
                    neworderednodes.append(node)
                orderednodes = neworderednodes

            # Compute delta bases for nodes:
            deltabases = {}
            nobase = set()
            referenced = set()
            nodes = set(nodes)
            processprogress = ui.makeprogress(_("processing nodes"),
                                              unit='nodes',
                                              total=len(orderednodes))
            for i, node in enumerate(orderednodes):
                processprogress.update(i)
                # Find delta base
                # TODO: allow delta'ing against most recent descendant instead
                # of immediate child
                deltatuple = deltabases.get(node, None)
                if deltatuple is None:
                    deltabase, chainlen = nullid, 0
                    deltabases[node] = (nullid, 0)
                    nobase.add(node)
                else:
                    deltabase, chainlen = deltatuple
                    referenced.add(deltabase)

                # Use available ancestor information to inform our delta choices
                ancestorinfo = ancestors.get(node)
                if ancestorinfo:
                    p1, p2, linknode, copyfrom = ancestorinfo

                    # The presence of copyfrom means we're at a point where the
                    # file was copied from elsewhere. So don't attempt to do any
                    # deltas with the other file.
                    if copyfrom:
                        p1 = nullid

                    if chainlen < maxchainlen:
                        # Record this child as the delta base for its parents.
                        # This may be non optimal, since the parents may have
                        # many children, and this will only choose the last one.
                        # TODO: record all children and try all deltas to find
                        # best
                        if p1 != nullid:
                            deltabases[p1] = (node, chainlen + 1)
                        if p2 != nullid:
                            deltabases[p2] = (node, chainlen + 1)

            # experimental config: repack.chainorphansbysize
            if ui.configbool('repack', 'chainorphansbysize'):
                orphans = nobase - referenced
                orderednodes = self._chainorphans(ui, filename, orderednodes,
                                                  orphans, deltabases)

            # Compute deltas and write to the pack
            for i, node in enumerate(orderednodes):
                deltabase, chainlen = deltabases[node]
                # Compute delta
                # TODO: Optimize the deltachain fetching. Since we're
                # iterating over the different version of the file, we may
                # be fetching the same deltachain over and over again.
                if deltabase != nullid:
                    deltaentry = self.data.getdelta(filename, node)
                    delta, deltabasename, origdeltabase, meta = deltaentry
                    size = meta.get(constants.METAKEYSIZE)
                    if (deltabasename != filename or origdeltabase != deltabase
                            or size is None):
                        deltabasetext = self.data.get(filename, deltabase)
                        original = self.data.get(filename, node)
                        size = len(original)
                        delta = mdiff.textdiff(deltabasetext, original)
                else:
                    delta = self.data.get(filename, node)
                    size = len(delta)
                    meta = self.data.getmeta(filename, node)

                # TODO: don't use the delta if it's larger than the fulltext
                if constants.METAKEYSIZE not in meta:
                    meta[constants.METAKEYSIZE] = size
                target.add(filename, node, deltabase, delta, meta)

                entries[node].datarepacked = True

            processprogress.complete()
            count += 1

        repackprogress.complete()
        target.close(ledger=ledger)
Пример #14
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.revision(node1), self.revision(node2))
Пример #15
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.revision(node1),
                           self.revision(node2))
Пример #16
0
 def diff(self, delta_object):
     def flatten(s):
         return s if isinstance(s, str) else str(s)
     return textdiff(flatten(delta_object.raw_data) if delta_object else '',
                     flatten(self.raw_data))
Пример #17
0
def make_patch(old_text, new_text):
    """ Return a raw patch bytes which transforms old_text into new_text. """
    values = compress(mdiff.textdiff(old_text, new_text))
    if values[0]:
        return ''.join(values)
    return values[1]
Пример #18
0
 def diff(self, other):
     return textdiff(other.data if other else '', self.data)
Пример #19
0
 def diff(self, other):
     return mdiff.textdiff(other.data if other else '', self.data)
Пример #20
0
    def repackdata(self, ledger, target):
        ui = self.repo.ui
        maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

        byfile = {}
        for entry in ledger.entries.itervalues():
            if entry.datasource:
                byfile.setdefault(entry.filename, {})[entry.node] = entry

        count = 0
        for filename, entries in sorted(byfile.iteritems()):
            ui.progress(_("repacking data"), count, unit=self.unit,
                        total=len(byfile))

            ancestors = {}
            nodes = list(node for node in entries.iterkeys())
            nohistory = []
            for i, node in enumerate(nodes):
                if node in ancestors:
                    continue
                ui.progress(_("building history"), i, unit='nodes',
                            total=len(nodes))
                try:
                    ancestors.update(self.fullhistory.getancestors(filename,
                        node, known=ancestors))
                except KeyError:
                    # Since we're packing data entries, we may not have the
                    # corresponding history entries for them. It's not a big
                    # deal, but the entries won't be delta'd perfectly.
                    nohistory.append(node)
            ui.progress(_("building history"), None)

            # Order the nodes children first, so we can produce reverse deltas
            orderednodes = list(reversed(self._toposort(ancestors)))
            if len(nohistory) > 0:
                ui.debug('repackdata: %d nodes without history\n' %
                         len(nohistory))
            orderednodes.extend(sorted(nohistory))

            # Filter orderednodes to just the nodes we want to serialize (it
            # currently also has the edge nodes' ancestors).
            orderednodes = filter(lambda node: node in nodes, orderednodes)

            # Garbage collect old nodes:
            if self.garbagecollect:
                neworderednodes = []
                for node in orderednodes:
                    # If the node is old and is not in the keepset, we skip it,
                    # and mark as garbage collected
                    if ((filename, node) not in self.keepkeys and
                        self.isold(self.repo, filename, node)):
                        entries[node].gced = True
                        continue
                    neworderednodes.append(node)
                orderednodes = neworderednodes

            # Compute delta bases for nodes:
            deltabases = {}
            nobase = set()
            referenced = set()
            nodes = set(nodes)
            for i, node in enumerate(orderednodes):
                ui.progress(_("processing nodes"), i, unit='nodes',
                            total=len(orderednodes))
                # Find delta base
                # TODO: allow delta'ing against most recent descendant instead
                # of immediate child
                deltatuple = deltabases.get(node, None)
                if deltatuple is None:
                    deltabase, chainlen = nullid, 0
                    deltabases[node] = (nullid, 0)
                    nobase.add(node)
                else:
                    deltabase, chainlen = deltatuple
                    referenced.add(deltabase)

                # Use available ancestor information to inform our delta choices
                ancestorinfo = ancestors.get(node)
                if ancestorinfo:
                    p1, p2, linknode, copyfrom = ancestorinfo

                    # The presence of copyfrom means we're at a point where the
                    # file was copied from elsewhere. So don't attempt to do any
                    # deltas with the other file.
                    if copyfrom:
                        p1 = nullid

                    if chainlen < maxchainlen:
                        # Record this child as the delta base for its parents.
                        # This may be non optimal, since the parents may have
                        # many children, and this will only choose the last one.
                        # TODO: record all children and try all deltas to find
                        # best
                        if p1 != nullid:
                            deltabases[p1] = (node, chainlen + 1)
                        if p2 != nullid:
                            deltabases[p2] = (node, chainlen + 1)

            # experimental config: repack.chainorphansbysize
            if ui.configbool('repack', 'chainorphansbysize', True):
                orphans = nobase - referenced
                orderednodes = self._chainorphans(ui, filename, orderednodes,
                    orphans, deltabases)

            # Compute deltas and write to the pack
            for i, node in enumerate(orderednodes):
                deltabase, chainlen = deltabases[node]
                # Compute delta
                # TODO: Optimize the deltachain fetching. Since we're
                # iterating over the different version of the file, we may
                # be fetching the same deltachain over and over again.
                meta = None
                if deltabase != nullid:
                    deltaentry = self.data.getdelta(filename, node)
                    delta, deltabasename, origdeltabase, meta = deltaentry
                    size = meta.get(constants.METAKEYSIZE)
                    if (deltabasename != filename or origdeltabase != deltabase
                        or size is None):
                        deltabasetext = self.data.get(filename, deltabase)
                        original = self.data.get(filename, node)
                        size = len(original)
                        delta = mdiff.textdiff(deltabasetext, original)
                else:
                    delta = self.data.get(filename, node)
                    size = len(delta)
                    meta = self.data.getmeta(filename, node)

                # TODO: don't use the delta if it's larger than the fulltext
                if constants.METAKEYSIZE not in meta:
                    meta[constants.METAKEYSIZE] = size
                target.add(filename, node, deltabase, delta, meta)

                entries[node].datarepacked = True

            ui.progress(_("processing nodes"), None)
            count += 1

        ui.progress(_("repacking data"), None)
        target.close(ledger=ledger)
Пример #21
0
    def _addrawrevision(
        self,
        node,
        revisiondata,
        transaction,
        linkrev,
        p1,
        p2,
        storedelta=None,
        flags=0,
    ):
        if self._pathid is None:
            res = self._db.execute(
                'INSERT INTO filepath (path) VALUES (?)', (self._path,)
            )
            self._pathid = res.lastrowid

        # For simplicity, always store a delta against p1.
        # TODO we need a lot more logic here to make behavior reasonable.

        if storedelta:
            deltabase, delta = storedelta

            if isinstance(deltabase, int):
                deltabase = self.node(deltabase)

        else:
            assert revisiondata is not None
            deltabase = p1

            if deltabase == nullid:
                delta = revisiondata
            else:
                delta = mdiff.textdiff(
                    self.revision(self.rev(deltabase)), revisiondata
                )

        # File index stores a pointer to its delta and the parent delta.
        # The parent delta is stored via a pointer to the fileindex PK.
        if deltabase == nullid:
            baseid = None
        else:
            baseid = self._revisions[deltabase].rid

        # Deltas are stored with a hash of their content. This allows
        # us to de-duplicate. The table is configured to ignore conflicts
        # and it is faster to just insert and silently noop than to look
        # first.
        deltahash = hashutil.sha1(delta).digest()

        if self._compengine == b'zstd':
            deltablob = self._cctx.compress(delta)
            compression = COMPRESSION_ZSTD
        elif self._compengine == b'zlib':
            deltablob = zlib.compress(delta)
            compression = COMPRESSION_ZLIB
        elif self._compengine == b'none':
            deltablob = delta
            compression = COMPRESSION_NONE
        else:
            raise error.ProgrammingError(
                b'unhandled compression engine: %s' % self._compengine
            )

        # Don't store compressed data if it isn't practical.
        if len(deltablob) >= len(delta):
            deltablob = delta
            compression = COMPRESSION_NONE

        deltaid = insertdelta(self._db, compression, deltahash, deltablob)

        rev = len(self)

        if p1 == nullid:
            p1rev = nullrev
        else:
            p1rev = self._nodetorev[p1]

        if p2 == nullid:
            p2rev = nullrev
        else:
            p2rev = self._nodetorev[p2]

        rid = self._db.execute(
            'INSERT INTO fileindex ('
            '    pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
            '    deltaid, deltabaseid) '
            '    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
            (
                self._pathid,
                rev,
                node,
                p1rev,
                p2rev,
                linkrev,
                flags,
                deltaid,
                baseid,
            ),
        ).lastrowid

        entry = revisionentry(
            rid=rid,
            rev=rev,
            node=node,
            p1rev=p1rev,
            p2rev=p2rev,
            p1node=p1,
            p2node=p2,
            linkrev=linkrev,
            flags=flags,
        )

        self._nodetorev[node] = rev
        self._revtonode[rev] = node
        self._revisions[node] = entry

        return node
Пример #22
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.revision(node1, raw=True),
                           self.revision(node2, raw=True))
Пример #23
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.revision(node1, raw=True),
                           self.revision(node2, raw=True))
Пример #24
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.rawdata(node1), self.rawdata(node2))
Пример #25
0
 def d():
     for pair in textpairs:
         mdiff.textdiff(*pair)