def _mergeentriesiter(*iterables, **kwargs): """Given a set of sorted iterables, yield the next entry in merged order Note that by default entries go from most recent to oldest. """ order = kwargs.pop('order', max) iterables = [iter(it) for it in iterables] # this tracks still active iterables; iterables are deleted as they are # exhausted, which is why this is a dictionary and why each entry also # stores the key. Entries are mutable so we can store the next value each # time. iterable_map = {} for key, it in enumerate(iterables): try: iterable_map[key] = [next(it), key, it] except StopIteration: # empty entry, can be ignored pass while iterable_map: value, key, it = order(pycompat.itervalues(iterable_map)) yield value try: iterable_map[key][0] = next(it) except StopIteration: # this iterable is empty, remove it from consideration del iterable_map[key]
def repackhistory(self, ledger, target): ui = self.repo.ui byfile = {} for entry in pycompat.itervalues(ledger.entries): if entry.historysource: byfile.setdefault(entry.filename, {})[entry.node] = entry progress = ui.makeprogress( _(b"repacking history"), unit=self.unit, total=len(byfile) ) for filename, entries in sorted(pycompat.iteritems(byfile)): ancestors = {} nodes = list(node for node in entries) for node in nodes: if node in ancestors: continue ancestors.update( self.history.getancestors(filename, node, known=ancestors) ) # Order the nodes children first orderednodes = reversed(self._toposort(ancestors)) # Write to the pack dontprocess = set() for node in orderednodes: p1, p2, linknode, copyfrom = ancestors[node] # If the node is marked dontprocess, but it's also in the # explicit entries set, that means the node exists both in this # file and in another file that was copied to this file. # Usually this happens if the file was copied to another file, # then the copy was deleted, then reintroduced without copy # metadata. The original add and the new add have the same hash # since the content is identical and the parents are null. if node in dontprocess and node not in entries: # If copyfrom == filename, it means the copy history # went to come other file, then came back to this one, so we # should continue processing it. if p1 != self.repo.nullid and copyfrom != filename: dontprocess.add(p1) if p2 != self.repo.nullid: dontprocess.add(p2) continue if copyfrom: dontprocess.add(p1) target.add(filename, node, p1, p2, linknode, copyfrom) if node in entries: entries[node].historyrepacked = True progress.increment() progress.complete() target.close(ledger=ledger)
def write(self): if self.dirty and self.transplantfile: if not os.path.isdir(self.path): os.mkdir(self.path) fp = self.opener(self.transplantfile, b'w') for list in pycompat.itervalues(self.transplants): for t in list: l, r = map(hex, (t.lnode, t.rnode)) fp.write(l + b':' + r + b'\n') fp.close() self.dirty = False
def verifyremotefilelog(ui, path, **opts): decompress = opts.get('decompress') for root, dirs, files in os.walk(path): for file in files: if file == b"repos": continue filepath = os.path.join(root, file) size, firstnode, mapping = parsefileblob(filepath, decompress) for p1, p2, linknode, copyfrom in pycompat.itervalues(mapping): if linknode == nullid: actualpath = os.path.relpath(root, path) key = fileserverclient.getcachekey(b"reponame", actualpath, file) ui.status(b"%s %s\n" % (key, os.path.relpath(filepath, path)))
def close(self): for pathpool in pycompat.itervalues(self._pool): for conn in pathpool: conn.close() del pathpool[:]
def repackdata(self, ledger, target): ui = self.repo.ui maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000) byfile = {} for entry in pycompat.itervalues(ledger.entries): if entry.datasource: byfile.setdefault(entry.filename, {})[entry.node] = entry count = 0 repackprogress = ui.makeprogress(_(b"repacking data"), unit=self.unit, total=len(byfile)) for filename, entries in sorted(pycompat.iteritems(byfile)): repackprogress.update(count) ancestors = {} nodes = list(node for node in entries) nohistory = [] buildprogress = ui.makeprogress(_(b"building history"), unit=b'nodes', total=len(nodes)) for i, node in enumerate(nodes): if node in ancestors: continue buildprogress.update(i) try: ancestors.update( self.fullhistory.getancestors(filename, node, known=ancestors)) except KeyError: # Since we're packing data entries, we may not have the # corresponding history entries for them. It's not a big # deal, but the entries won't be delta'd perfectly. nohistory.append(node) buildprogress.complete() # Order the nodes children first, so we can produce reverse deltas orderednodes = list(reversed(self._toposort(ancestors))) if len(nohistory) > 0: ui.debug(b'repackdata: %d nodes without history\n' % len(nohistory)) orderednodes.extend(sorted(nohistory)) # Filter orderednodes to just the nodes we want to serialize (it # currently also has the edge nodes' ancestors). orderednodes = list( filter(lambda node: node in nodes, orderednodes)) # Garbage collect old nodes: if self.garbagecollect: neworderednodes = [] for node in orderednodes: # If the node is old and is not in the keepset, we skip it, # and mark as garbage collected if (filename, node) not in self.keepkeys and self.isold( self.repo, filename, node): entries[node].gced = True continue neworderednodes.append(node) orderednodes = neworderednodes # Compute delta bases for nodes: deltabases = {} nobase = set() referenced = set() nodes = set(nodes) processprogress = ui.makeprogress(_(b"processing nodes"), unit=b'nodes', total=len(orderednodes)) for i, node in enumerate(orderednodes): processprogress.update(i) # Find delta base # TODO: allow delta'ing against most recent descendant instead # of immediate child deltatuple = deltabases.get(node, None) if deltatuple is None: deltabase, chainlen = nullid, 0 deltabases[node] = (nullid, 0) nobase.add(node) else: deltabase, chainlen = deltatuple referenced.add(deltabase) # Use available ancestor information to inform our delta choices ancestorinfo = ancestors.get(node) if ancestorinfo: p1, p2, linknode, copyfrom = ancestorinfo # The presence of copyfrom means we're at a point where the # file was copied from elsewhere. So don't attempt to do any # deltas with the other file. if copyfrom: p1 = nullid if chainlen < maxchainlen: # Record this child as the delta base for its parents. # This may be non optimal, since the parents may have # many children, and this will only choose the last one. # TODO: record all children and try all deltas to find # best if p1 != nullid: deltabases[p1] = (node, chainlen + 1) if p2 != nullid: deltabases[p2] = (node, chainlen + 1) # experimental config: repack.chainorphansbysize if ui.configbool(b'repack', b'chainorphansbysize'): orphans = nobase - referenced orderednodes = self._chainorphans(ui, filename, orderednodes, orphans, deltabases) # Compute deltas and write to the pack for i, node in enumerate(orderednodes): deltabase, chainlen = deltabases[node] # Compute delta # TODO: Optimize the deltachain fetching. Since we're # iterating over the different version of the file, we may # be fetching the same deltachain over and over again. if deltabase != nullid: deltaentry = self.data.getdelta(filename, node) delta, deltabasename, origdeltabase, meta = deltaentry size = meta.get(constants.METAKEYSIZE) if (deltabasename != filename or origdeltabase != deltabase or size is None): deltabasetext = self.data.get(filename, deltabase) original = self.data.get(filename, node) size = len(original) delta = mdiff.textdiff(deltabasetext, original) else: delta = self.data.get(filename, node) size = len(delta) meta = self.data.getmeta(filename, node) # TODO: don't use the delta if it's larger than the fulltext if constants.METAKEYSIZE not in meta: meta[constants.METAKEYSIZE] = size target.add(filename, node, deltabase, delta, meta) entries[node].datarepacked = True processprogress.complete() count += 1 repackprogress.complete() target.close(ledger=ledger)