def _gc_inventory(self): """Remove inventories that are not referenced from the revision store.""" self.pb.update(gettext('Checking unused inventories'), 0, 1) self._check_garbage_inventories() self.pb.update(gettext('Checking unused inventories'), 1, 3) if not self.garbage_inventories: ui.ui_factory.note(gettext('Inventory ok.')) return self.pb.update(gettext('Backing up inventory'), 0, 0) self.repo._backup_inventory() ui.ui_factory.note(gettext('Backup Inventory created')) # asking for '' should never return a non-empty weave new_inventories = self.repo._temp_inventories() # we have topological order of revisions and non ghost parents ready. graph = self.revisions.get_parent_map(self.revisions.keys()) revision_keys = topo_sort(graph) revision_ids = [key[-1] for key in revision_keys] self._setup_steps(len(revision_keys)) stream = self._change_inv_parents( self.inventory.get_record_stream(revision_keys, 'unordered', True), graph.__getitem__, set(revision_keys)) new_inventories.insert_record_stream(stream) # if this worked, the set of new_inventory_vf.names should equal # the revisionds list if not (set(new_inventories.keys()) == set(revision_keys)): raise AssertionError() self.pb.update(gettext('Writing weave')) self.repo._activate_new_inventory() self.inventory = None ui.ui_factory.note(gettext('Inventory regenerated.'))
def _gc_inventory(self): """Remove inventories that are not referenced from the revision store.""" self.pb.update(gettext('Checking unused inventories'), 0, 1) self._check_garbage_inventories() self.pb.update(gettext('Checking unused inventories'), 1, 3) if not self.garbage_inventories: ui.ui_factory.note(gettext('Inventory ok.')) return self.pb.update(gettext('Backing up inventory'), 0, 0) self.repo._backup_inventory() ui.ui_factory.note(gettext('Backup Inventory created')) # asking for '' should never return a non-empty weave new_inventories = self.repo._temp_inventories() # we have topological order of revisions and non ghost parents ready. graph = self.revisions.get_parent_map(self.revisions.keys()) revision_keys = topo_sort(graph) revision_ids = [key[-1] for key in revision_keys] self._setup_steps(len(revision_keys)) stream = self._change_inv_parents( self.inventory.get_record_stream(revision_keys, 'unordered', True), graph.__getitem__, set(revision_keys)) new_inventories.insert_record_stream(stream) # if this worked, the set of new_inventory_vf.names should equal # the revisionds list if not(set(new_inventories.keys()) == set(revision_keys)): raise AssertionError() self.pb.update(gettext('Writing weave')) self.repo._activate_new_inventory() self.inventory = None ui.ui_factory.note(gettext('Inventory regenerated.'))
def _reweave_inventory(self): """Regenerate the inventory weave for the repository from scratch. This is a smart function: it will only do the reweave if doing it will correct data issues. The self.thorough flag controls whether only data-loss causing issues (!self.thorough) or all issues (self.thorough) are treated as requiring the reweave. """ transaction = self.repo.get_transaction() self.pb.update(gettext('Reading inventory data')) self.inventory = self.repo.inventories self.revisions = self.repo.revisions # the total set of revisions to process self.pending = set([key[-1] for key in self.revisions.keys()]) # mapping from revision_id to parents self._rev_graph = {} # errors that we detect self.inconsistent_parents = 0 # we need the revision id of each revision and its available parents list self._setup_steps(len(self.pending)) for rev_id in self.pending: # put a revision into the graph. self._graph_revision(rev_id) self._check_garbage_inventories() # if there are no inconsistent_parents and # (no garbage inventories or we are not doing a thorough check) if (not self.inconsistent_parents and (not self.garbage_inventories or not self.thorough)): ui.ui_factory.note(gettext('Inventory ok.')) return self.pb.update(gettext('Backing up inventory'), 0, 0) self.repo._backup_inventory() ui.ui_factory.note(gettext('Backup inventory created.')) new_inventories = self.repo._temp_inventories() # we have topological order of revisions and non ghost parents ready. self._setup_steps(len(self._rev_graph)) revision_keys = [(rev_id,) for rev_id in topo_sort(self._rev_graph)] stream = self._change_inv_parents( self.inventory.get_record_stream(revision_keys, 'unordered', True), self._new_inv_parents, set(revision_keys)) new_inventories.insert_record_stream(stream) # if this worked, the set of new_inventories.keys should equal # self.pending if not (set(new_inventories.keys()) == set([(revid,) for revid in self.pending])): raise AssertionError() self.pb.update(gettext('Writing weave')) self.repo._activate_new_inventory() self.inventory = None ui.ui_factory.note(gettext('Inventory regenerated.'))
def _reweave_inventory(self): """Regenerate the inventory weave for the repository from scratch. This is a smart function: it will only do the reweave if doing it will correct data issues. The self.thorough flag controls whether only data-loss causing issues (!self.thorough) or all issues (self.thorough) are treated as requiring the reweave. """ transaction = self.repo.get_transaction() self.pb.update(gettext('Reading inventory data')) self.inventory = self.repo.inventories self.revisions = self.repo.revisions # the total set of revisions to process self.pending = set([key[-1] for key in self.revisions.keys()]) # mapping from revision_id to parents self._rev_graph = {} # errors that we detect self.inconsistent_parents = 0 # we need the revision id of each revision and its available parents list self._setup_steps(len(self.pending)) for rev_id in self.pending: # put a revision into the graph. self._graph_revision(rev_id) self._check_garbage_inventories() # if there are no inconsistent_parents and # (no garbage inventories or we are not doing a thorough check) if (not self.inconsistent_parents and (not self.garbage_inventories or not self.thorough)): ui.ui_factory.note(gettext('Inventory ok.')) return self.pb.update(gettext('Backing up inventory'), 0, 0) self.repo._backup_inventory() ui.ui_factory.note(gettext('Backup inventory created.')) new_inventories = self.repo._temp_inventories() # we have topological order of revisions and non ghost parents ready. self._setup_steps(len(self._rev_graph)) revision_keys = [(rev_id, ) for rev_id in topo_sort(self._rev_graph)] stream = self._change_inv_parents( self.inventory.get_record_stream(revision_keys, 'unordered', True), self._new_inv_parents, set(revision_keys)) new_inventories.insert_record_stream(stream) # if this worked, the set of new_inventories.keys should equal # self.pending if not (set(new_inventories.keys()) == set( [(revid, ) for revid in self.pending])): raise AssertionError() self.pb.update(gettext('Writing weave')) self.repo._activate_new_inventory() self.inventory = None ui.ui_factory.note(gettext('Inventory regenerated.'))
def _reweave(wa, wb, pb=None, msg=None): """Combine two weaves and return the result. This works even if a revision R has different parents in wa and wb. In the resulting weave all the parents are given. This is done by just building up a new weave, maintaining ordering of the versions in the two inputs. More efficient approaches might be possible but it should only be necessary to do this operation rarely, when a new previously ghost version is inserted. :param pb: An optional progress bar, indicating how far done we are :param msg: An optional message for the progress """ wr = Weave() ia = ib = 0 queue_a = range(wa.num_versions()) queue_b = range(wb.num_versions()) # first determine combined parents of all versions # map from version name -> all parent names combined_parents = _reweave_parent_graphs(wa, wb) mutter("combined parents: %r", combined_parents) order = topo_sort(combined_parents.iteritems()) mutter("order to reweave: %r", order) if pb and not msg: msg = 'reweave' for idx, name in enumerate(order): if pb: pb.update(msg, idx, len(order)) if name in wa._name_map: lines = wa.get_lines(name) if name in wb._name_map: lines_b = wb.get_lines(name) if lines != lines_b: mutter('Weaves differ on content. rev_id {%s}', name) mutter('weaves: %s, %s', wa._weave_name, wb._weave_name) import difflib lines = list( difflib.unified_diff(lines, lines_b, wa._weave_name, wb._weave_name)) mutter('lines:\n%s', ''.join(lines)) raise errors.WeaveTextDiffers(name, wa, wb) else: lines = wb.get_lines(name) wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]]) return wr
def _reweave(wa, wb, pb=None, msg=None): """Combine two weaves and return the result. This works even if a revision R has different parents in wa and wb. In the resulting weave all the parents are given. This is done by just building up a new weave, maintaining ordering of the versions in the two inputs. More efficient approaches might be possible but it should only be necessary to do this operation rarely, when a new previously ghost version is inserted. :param pb: An optional progress bar, indicating how far done we are :param msg: An optional message for the progress """ wr = Weave() ia = ib = 0 queue_a = range(wa.num_versions()) queue_b = range(wb.num_versions()) # first determine combined parents of all versions # map from version name -> all parent names combined_parents = _reweave_parent_graphs(wa, wb) mutter("combined parents: %r", combined_parents) order = topo_sort(combined_parents.iteritems()) mutter("order to reweave: %r", order) if pb and not msg: msg = 'reweave' for idx, name in enumerate(order): if pb: pb.update(msg, idx, len(order)) if name in wa._name_map: lines = wa.get_lines(name) if name in wb._name_map: lines_b = wb.get_lines(name) if lines != lines_b: mutter('Weaves differ on content. rev_id {%s}', name) mutter('weaves: %s, %s', wa._weave_name, wb._weave_name) import difflib lines = list(difflib.unified_diff(lines, lines_b, wa._weave_name, wb._weave_name)) mutter('lines:\n%s', ''.join(lines)) raise errors.WeaveTextDiffers(name, wa, wb) else: lines = wb.get_lines(name) wr._add(name, lines, [wr._lookup(i) for i in combined_parents[name]]) return wr
def assertSortAndIterateOrder(self, graph): """Check topo_sort and iter_topo_order is genuinely topological order. For every child in the graph, check if it comes after all of it's parents. """ sort_result = topo_sort(graph) iter_result = list(TopoSorter(graph).iter_topo_order()) for (node, parents) in graph: for parent in parents: if sort_result.index(node) < sort_result.index(parent): self.fail("parent %s must come before child %s:\n%s" % (parent, node, sort_result)) if iter_result.index(node) < iter_result.index(parent): self.fail("parent %s must come before child %s:\n%s" % (parent, node, iter_result))
def get_ancestry(self, node_id, topo_sorted=True): """Return the inclusive ancestors of node_id in topological order.""" # maybe optimise this ? from bzrlib.tsort import topo_sort result = {} pending = set([node_id]) while len(pending): current = pending.pop() parents = self._graph_ancestors[current] parents = [parent for parent in parents if parent not in self.ghosts] result[current] = parents for parent in parents: if parent not in result and parent not in pending: pending.add(parent) if not topo_sorted: return result.keys() return topo_sort(result.items())
def import_git_objects(repo, mapping, num_objects, object_iter, pb=None): """Import a set of git objects into a bzr repository. :param repo: Bazaar repository :param mapping: Mapping to use :param num_objects: Number of objects. :param object_iter: Iterator over Git objects. """ # TODO: a more (memory-)efficient implementation of this objects = {} for i, o in enumerate(object_iter): if pb is not None: pb.update("fetching objects", i, num_objects) objects[o.id] = o graph = [] root_trees = {} revisions = {} # Find and convert commit objects for o in objects.itervalues(): if isinstance(o, Commit): rev = mapping.import_commit(o) root_trees[rev.revision_id] = objects[o.tree] revisions[rev.revision_id] = rev graph.append((rev.revision_id, rev.parent_ids)) # Order the revisions # Create the inventory objects for i, revid in enumerate(topo_sort(graph)): if pb is not None: pb.update("fetching revisions", i, len(graph)) root_tree = root_trees[revid] rev = revisions[revid] # We have to do this here, since we have to walk the tree and # we need to make sure to import the blobs / trees with the riht # path; this may involve adding them more than once. inv = Inventory() inv.revision_id = rev.revision_id def lookup_object(sha): if sha in objects: return objects[sha] return reconstruct_git_object(repo, mapping, sha) parent_invs = [repo.get_inventory(r) for r in rev.parent_ids] import_git_tree(repo, mapping, "", root_tree, inv, parent_invs, lookup_object) repo.add_revision(rev.revision_id, rev, inv)
def generate_root_texts(self, revs): """Generate VersionedFiles for all root ids. :param revs: the revisions to include """ to_texts = self.target.texts graph = self.source.get_graph() parent_map = graph.get_parent_map(revs) rev_order = topo_sort(parent_map) rev_id_to_root_id, root_id_to_rev_ids = self._find_root_ids( revs, parent_map, graph) root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in rev_order] # Guaranteed stable, this groups all the file id operations together # retaining topological order within the revisions of a file id. # File id splits and joins would invalidate this, but they don't exist # yet, and are unlikely to in non-rich-root environments anyway. root_id_order.sort(key=operator.itemgetter(0)) # Create a record stream containing the roots to create. def yield_roots(): for key in root_id_order: root_id, rev_id = key rev_parents = parent_map[rev_id] # We drop revision parents with different file-ids, because # that represents a rename of the root to a different location # - its not actually a parent for us. (We could look for that # file id in the revision tree at considerably more expense, # but for now this is sufficient (and reconcile will catch and # correct this anyway). # When a parent revision is a ghost, we guess that its root id # was unchanged (rather than trimming it from the parent list). parent_keys = tuple( (root_id, parent) for parent in rev_parents if parent != NULL_REVISION and rev_id_to_root_id.get(parent, root_id) == root_id) yield FulltextContentFactory(key, parent_keys, None, '') to_texts.insert_record_stream(yield_roots())
def get_record_stream(self, versions, ordering, include_delta_closure): """Get a stream of records for versions. :param versions: The versions to include. Each version is a tuple (version,). :param ordering: Either 'unordered' or 'topological'. A topologically sorted stream has compression parents strictly before their children. :param include_delta_closure: If True then the closure across any compression parents will be included (in the opaque data). :return: An iterator of ContentFactory objects, each of which is only valid until the iterator is advanced. """ versions = [version[-1] for version in versions] if ordering == 'topological': parents = self.get_parent_map(versions) new_versions = topo_sort(parents) new_versions.extend(set(versions).difference(set(parents))) versions = new_versions for version in versions: if version in self: yield WeaveContentFactory(version, self) else: yield AbsentContentFactory((version, ))
def get_record_stream(self, versions, ordering, include_delta_closure): """Get a stream of records for versions. :param versions: The versions to include. Each version is a tuple (version,). :param ordering: Either 'unordered' or 'topological'. A topologically sorted stream has compression parents strictly before their children. :param include_delta_closure: If True then the closure across any compression parents will be included (in the opaque data). :return: An iterator of ContentFactory objects, each of which is only valid until the iterator is advanced. """ versions = [version[-1] for version in versions] if ordering == 'topological': parents = self.get_parent_map(versions) new_versions = topo_sort(parents) new_versions.extend(set(versions).difference(set(parents))) versions = new_versions for version in versions: if version in self: yield WeaveContentFactory(version, self) else: yield AbsentContentFactory((version,))
def generate_root_texts(self, revs): """Generate VersionedFiles for all root ids. :param revs: the revisions to include """ to_texts = self.target.texts graph = self.source.get_graph() parent_map = graph.get_parent_map(revs) rev_order = topo_sort(parent_map) rev_id_to_root_id, root_id_to_rev_ids = self._find_root_ids( revs, parent_map, graph) root_id_order = [(rev_id_to_root_id[rev_id], rev_id) for rev_id in rev_order] # Guaranteed stable, this groups all the file id operations together # retaining topological order within the revisions of a file id. # File id splits and joins would invalidate this, but they don't exist # yet, and are unlikely to in non-rich-root environments anyway. root_id_order.sort(key=operator.itemgetter(0)) # Create a record stream containing the roots to create. def yield_roots(): for key in root_id_order: root_id, rev_id = key rev_parents = parent_map[rev_id] # We drop revision parents with different file-ids, because # that represents a rename of the root to a different location # - its not actually a parent for us. (We could look for that # file id in the revision tree at considerably more expense, # but for now this is sufficient (and reconcile will catch and # correct this anyway). # When a parent revision is a ghost, we guess that its root id # was unchanged (rather than trimming it from the parent list). parent_keys = tuple((root_id, parent) for parent in rev_parents if parent != NULL_REVISION and rev_id_to_root_id.get(parent, root_id) == root_id) yield FulltextContentFactory(key, parent_keys, None, '') to_texts.insert_record_stream(yield_roots())
def generate_simple_plan(todo_set, start_revid, stop_revid, onto_revid, graph, generate_revid, skip_full_merged=False): """Create a simple rebase plan that replays history based on one revision being replayed on top of another. :param todo_set: A set of revisions to rebase. Only the revisions topologically between stop_revid and start_revid (inclusive) are rebased; other revisions are ignored (and references to them are preserved). :param start_revid: Id of revision at which to start replaying :param stop_revid: Id of revision until which to stop replaying :param onto_revid: Id of revision on top of which to replay :param graph: Graph object :param generate_revid: Function for generating new revision ids :param skip_full_merged: Skip revisions that merge already merged revisions. :return: replace map """ assert start_revid is None or start_revid in todo_set, \ "invalid start revid(%r), todo_set(%r)" % (start_revid, todo_set) assert stop_revid is None or stop_revid in todo_set, "invalid stop_revid" replace_map = {} parent_map = graph.get_parent_map(todo_set) order = topo_sort(parent_map) if stop_revid is None: stop_revid = order[-1] if start_revid is None: # We need a common base. lca = graph.find_lca(stop_revid, onto_revid) if lca == set([NULL_REVISION]): raise UnrelatedBranches() start_revid = order[0] todo = order[order.index(start_revid):order.index(stop_revid)+1] heads_cache = FrozenHeadsCache(graph) # XXX: The output replacemap'd parents should get looked up in some manner # by the heads cache? RBC 20080719 for oldrevid in todo: oldparents = parent_map[oldrevid] assert isinstance(oldparents, tuple), "not tuple: %r" % oldparents parents = [] # Left parent: if heads_cache.heads((oldparents[0], onto_revid)) == set((onto_revid,)): parents.append(onto_revid) elif oldparents[0] in replace_map: parents.append(replace_map[oldparents[0]][0]) else: parents.append(onto_revid) parents.append(oldparents[0]) # Other parents: if len(oldparents) > 1: additional_parents = heads_cache.heads(oldparents[1:]) for oldparent in oldparents[1:]: if oldparent in additional_parents: if heads_cache.heads((oldparent, onto_revid)) == set((onto_revid,)): pass elif oldparent in replace_map: newparent = replace_map[oldparent][0] if parents[0] == onto_revid: parents[0] = newparent else: parents.append(newparent) else: parents.append(oldparent) if len(parents) == 1 and skip_full_merged: continue parents = tuple(parents) newrevid = generate_revid(oldrevid, parents) assert newrevid != oldrevid, "old and newrevid equal (%r)" % newrevid assert isinstance(parents, tuple), "parents not tuple: %r" % parents replace_map[oldrevid] = (newrevid, parents) return replace_map
def exportBranches(branches, repo, cfg): log("Collecting heads of all branches") branches = [(b.last_revision(), ref, b, name) for ref, b, name in branches] branchesToExport = [] commitsToExport = [] # if head is already in marks we don't need to do anything else knownBranches, branches = split(lambda b: cfg.getMark(b[0]), branches) if cfg.forceAll: branchesToExport = knownBranches # if there are new heads, lets download full list of revisions and try to # figure out which revisions we need to export if branches: log("Getting list of all revisions") revisions = set(repo.all_revision_ids()) # filter out branches with bad heads branches, badHeads = split(lambda b: b[0] in revisions, branches) for head, ref, b, name in badHeads: log("WARN: {0} -- invalid or empty head ({1})", name, head) # now gather a list of revisions to export if branches: log("Collating a set of revisions to be exported") b, graph = collateNewHistory(branches, revisions, repo, cfg) branchesToExport.extend(b) log("Got {0} revisions to export. Sorting them", len(graph)) commitsToExport = tsort.topo_sort(graph) if commitsToExport: lock = repo.lock_read() try: log("Starting export of {0} revisions", len(commitsToExport)) cfg.stats = Stats() for revid in commitsToExport: if cfg.getMark(revid): log("WARN: we shouldn't be here. Revid: {0}", revid) cfg.stats.skipRev() continue exportCommit(revid, "__bzr_export_tmp_ref", repo, cfg) cfg.stats.exportRev() if cfg.stats._exportedRevs % 1000 == 0: log("{0}", cfg.stats) log("Finished: {0}", cfg.stats) finally: lock.unlock() log("Writing {0} branch references", len(branchesToExport)) buf = [] for head, ref, b, name in branchesToExport: #log("Exporting branch {0} as {1} ({2})", b.nick, ref, b.user_url) mark = cfg.getMark(head) assert(mark is not None) # it's good branches only emitReset(buf, ref, mark) if cfg.tagFilter: exportTags(buf, branchesToExport, repo, cfg) # write out buffer with all stuff in it writeBuffer(buf)
def assertSortAndIterate(self, graph, result_list): """Check that sorting and iter_topo_order on graph works.""" self.assertEquals(result_list, topo_sort(graph)) self.assertEqual(result_list, list(TopoSorter(graph).iter_topo_order()))
def assertSortAndIterate(self, graph, result_list): """Check that sorting and iter_topo_order on graph works.""" self.assertEqual(result_list, topo_sort(graph)) self.assertEqual(result_list, list(TopoSorter(graph).iter_topo_order()))