Пример #1
0
def _mergeentriesiter(*iterables, **kwargs):
    """Given a set of sorted iterables, yield the next entry in merged order

    Note that by default entries go from most recent to oldest.
    """
    order = kwargs.pop('order', max)
    iterables = [iter(it) for it in iterables]
    # this tracks still active iterables; iterables are deleted as they are
    # exhausted, which is why this is a dictionary and why each entry also
    # stores the key. Entries are mutable so we can store the next value each
    # time.
    iterable_map = {}
    for key, it in enumerate(iterables):
        try:
            iterable_map[key] = [next(it), key, it]
        except StopIteration:
            # empty entry, can be ignored
            pass

    while iterable_map:
        value, key, it = order(pycompat.itervalues(iterable_map))
        yield value
        try:
            iterable_map[key][0] = next(it)
        except StopIteration:
            # this iterable is empty, remove it from consideration
            del iterable_map[key]
Пример #2
0
    def repackhistory(self, ledger, target):
        ui = self.repo.ui

        byfile = {}
        for entry in pycompat.itervalues(ledger.entries):
            if entry.historysource:
                byfile.setdefault(entry.filename, {})[entry.node] = entry

        progress = ui.makeprogress(
            _(b"repacking history"), unit=self.unit, total=len(byfile)
        )
        for filename, entries in sorted(pycompat.iteritems(byfile)):
            ancestors = {}
            nodes = list(node for node in entries)

            for node in nodes:
                if node in ancestors:
                    continue
                ancestors.update(
                    self.history.getancestors(filename, node, known=ancestors)
                )

            # Order the nodes children first
            orderednodes = reversed(self._toposort(ancestors))

            # Write to the pack
            dontprocess = set()
            for node in orderednodes:
                p1, p2, linknode, copyfrom = ancestors[node]

                # If the node is marked dontprocess, but it's also in the
                # explicit entries set, that means the node exists both in this
                # file and in another file that was copied to this file.
                # Usually this happens if the file was copied to another file,
                # then the copy was deleted, then reintroduced without copy
                # metadata. The original add and the new add have the same hash
                # since the content is identical and the parents are null.
                if node in dontprocess and node not in entries:
                    # If copyfrom == filename, it means the copy history
                    # went to come other file, then came back to this one, so we
                    # should continue processing it.
                    if p1 != self.repo.nullid and copyfrom != filename:
                        dontprocess.add(p1)
                    if p2 != self.repo.nullid:
                        dontprocess.add(p2)
                    continue

                if copyfrom:
                    dontprocess.add(p1)

                target.add(filename, node, p1, p2, linknode, copyfrom)

                if node in entries:
                    entries[node].historyrepacked = True

            progress.increment()

        progress.complete()
        target.close(ledger=ledger)
Пример #3
0
 def write(self):
     if self.dirty and self.transplantfile:
         if not os.path.isdir(self.path):
             os.mkdir(self.path)
         fp = self.opener(self.transplantfile, b'w')
         for list in pycompat.itervalues(self.transplants):
             for t in list:
                 l, r = map(hex, (t.lnode, t.rnode))
                 fp.write(l + b':' + r + b'\n')
         fp.close()
     self.dirty = False
Пример #4
0
def verifyremotefilelog(ui, path, **opts):
    decompress = opts.get('decompress')

    for root, dirs, files in os.walk(path):
        for file in files:
            if file == b"repos":
                continue
            filepath = os.path.join(root, file)
            size, firstnode, mapping = parsefileblob(filepath, decompress)
            for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping):
                if linknode == nullid:
                    actualpath = os.path.relpath(root, path)
                    key = fileserverclient.getcachekey(b"reponame", actualpath,
                                                       file)
                    ui.status(b"%s %s\n" %
                              (key, os.path.relpath(filepath, path)))
Пример #5
0
 def close(self):
     for pathpool in pycompat.itervalues(self._pool):
         for conn in pathpool:
             conn.close()
         del pathpool[:]
Пример #6
0
    def repackdata(self, ledger, target):
        ui = self.repo.ui
        maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)

        byfile = {}
        for entry in pycompat.itervalues(ledger.entries):
            if entry.datasource:
                byfile.setdefault(entry.filename, {})[entry.node] = entry

        count = 0
        repackprogress = ui.makeprogress(_(b"repacking data"),
                                         unit=self.unit,
                                         total=len(byfile))
        for filename, entries in sorted(pycompat.iteritems(byfile)):
            repackprogress.update(count)

            ancestors = {}
            nodes = list(node for node in entries)
            nohistory = []
            buildprogress = ui.makeprogress(_(b"building history"),
                                            unit=b'nodes',
                                            total=len(nodes))
            for i, node in enumerate(nodes):
                if node in ancestors:
                    continue
                buildprogress.update(i)
                try:
                    ancestors.update(
                        self.fullhistory.getancestors(filename,
                                                      node,
                                                      known=ancestors))
                except KeyError:
                    # Since we're packing data entries, we may not have the
                    # corresponding history entries for them. It's not a big
                    # deal, but the entries won't be delta'd perfectly.
                    nohistory.append(node)
            buildprogress.complete()

            # Order the nodes children first, so we can produce reverse deltas
            orderednodes = list(reversed(self._toposort(ancestors)))
            if len(nohistory) > 0:
                ui.debug(b'repackdata: %d nodes without history\n' %
                         len(nohistory))
            orderednodes.extend(sorted(nohistory))

            # Filter orderednodes to just the nodes we want to serialize (it
            # currently also has the edge nodes' ancestors).
            orderednodes = list(
                filter(lambda node: node in nodes, orderednodes))

            # Garbage collect old nodes:
            if self.garbagecollect:
                neworderednodes = []
                for node in orderednodes:
                    # If the node is old and is not in the keepset, we skip it,
                    # and mark as garbage collected
                    if (filename, node) not in self.keepkeys and self.isold(
                            self.repo, filename, node):
                        entries[node].gced = True
                        continue
                    neworderednodes.append(node)
                orderednodes = neworderednodes

            # Compute delta bases for nodes:
            deltabases = {}
            nobase = set()
            referenced = set()
            nodes = set(nodes)
            processprogress = ui.makeprogress(_(b"processing nodes"),
                                              unit=b'nodes',
                                              total=len(orderednodes))
            for i, node in enumerate(orderednodes):
                processprogress.update(i)
                # Find delta base
                # TODO: allow delta'ing against most recent descendant instead
                # of immediate child
                deltatuple = deltabases.get(node, None)
                if deltatuple is None:
                    deltabase, chainlen = nullid, 0
                    deltabases[node] = (nullid, 0)
                    nobase.add(node)
                else:
                    deltabase, chainlen = deltatuple
                    referenced.add(deltabase)

                # Use available ancestor information to inform our delta choices
                ancestorinfo = ancestors.get(node)
                if ancestorinfo:
                    p1, p2, linknode, copyfrom = ancestorinfo

                    # The presence of copyfrom means we're at a point where the
                    # file was copied from elsewhere. So don't attempt to do any
                    # deltas with the other file.
                    if copyfrom:
                        p1 = nullid

                    if chainlen < maxchainlen:
                        # Record this child as the delta base for its parents.
                        # This may be non optimal, since the parents may have
                        # many children, and this will only choose the last one.
                        # TODO: record all children and try all deltas to find
                        # best
                        if p1 != nullid:
                            deltabases[p1] = (node, chainlen + 1)
                        if p2 != nullid:
                            deltabases[p2] = (node, chainlen + 1)

            # experimental config: repack.chainorphansbysize
            if ui.configbool(b'repack', b'chainorphansbysize'):
                orphans = nobase - referenced
                orderednodes = self._chainorphans(ui, filename, orderednodes,
                                                  orphans, deltabases)

            # Compute deltas and write to the pack
            for i, node in enumerate(orderednodes):
                deltabase, chainlen = deltabases[node]
                # Compute delta
                # TODO: Optimize the deltachain fetching. Since we're
                # iterating over the different version of the file, we may
                # be fetching the same deltachain over and over again.
                if deltabase != nullid:
                    deltaentry = self.data.getdelta(filename, node)
                    delta, deltabasename, origdeltabase, meta = deltaentry
                    size = meta.get(constants.METAKEYSIZE)
                    if (deltabasename != filename or origdeltabase != deltabase
                            or size is None):
                        deltabasetext = self.data.get(filename, deltabase)
                        original = self.data.get(filename, node)
                        size = len(original)
                        delta = mdiff.textdiff(deltabasetext, original)
                else:
                    delta = self.data.get(filename, node)
                    size = len(delta)
                    meta = self.data.getmeta(filename, node)

                # TODO: don't use the delta if it's larger than the fulltext
                if constants.METAKEYSIZE not in meta:
                    meta[constants.METAKEYSIZE] = size
                target.add(filename, node, deltabase, delta, meta)

                entries[node].datarepacked = True

            processprogress.complete()
            count += 1

        repackprogress.complete()
        target.close(ledger=ledger)