def manifest_and_flags_from_tree(parent_trees, tree, mapping, parent_node_lookup): """Generate a manifest from a Bazaar tree. :param parent_trees: Parent trees :param tree: Tree :param mapping: Bzr<->Hg mapping :param parent_node_lookup: 2-tuple with functions to look up the nodes of paths in the tree's parents """ assert len(parent_node_lookup) == 2 unusual_fileids = {} def get_text_parents(path): assert type(path) == str ret = [] for lookup in parent_node_lookup: try: ret.append(lookup(path)) except KeyError: ret.append(mercurial.node.nullid) assert len(ret) == 2 return tuple(ret) manifest = {} flags = {} for path, entry in tree.iter_entries_by_dir(): this_sha1 = entry_sha1(entry) prev_entry = find_matching_entry(parent_trees, path, this_sha1) utf8_path = path.encode("utf-8") if entry.kind == 'symlink': flags[utf8_path] = 'l' if prev_entry is None: manifest[utf8_path] = hghash(entry.symlink_target, *get_text_parents(utf8_path)) elif entry.kind == 'file': if entry.executable: flags[utf8_path] = 'x' if prev_entry is None: manifest[utf8_path] = hghash(tree.get_file_text(entry.file_id), *get_text_parents(utf8_path)) if entry.kind in ('file', 'symlink') and prev_entry is not None: manifest[utf8_path] = parent_node_lookup[prev_entry](utf8_path) if ((mapping.generate_file_id(utf8_path) != entry.file_id or entry.kind == 'directory') and (parent_trees == [] or parent_trees[0].path2id(path) != entry.file_id)): unusual_fileids[utf8_path] = entry.file_id return (manifest, flags, unusual_fileids)
def drevisions(repo, mapping, revids, files, changelog_ids, manifest_ids, overlay, fileids={}, lossy=True): """Serialize a series of Bazaar revisions as Mercurial changesets. :param repo: Bazaar repository :param mapping: Bzr<->Hg Mapping :param revids: Iterable over revision ids :param files: Dictionary for looking up the set of changed files by revid :param manifest_ids: Dictionary for looking up the manifest id by revid :return: Iterable over changeset fulltexts """ for revid in revids: if revid == _mod_revision.NULL_REVISION: yield "", (mercurial.node.nullid, mercurial.node.nullid), mercurial.node.nullid continue rev = repo.get_revision(revid) (manifest_id, user, date, desc, extra) = mapping.export_revision(rev, lossy=lossy, fileids=fileids.get(revid, {})) if manifest_id is None: manifest_id = manifest_ids[revid] if revid in manifest_ids and manifest_id != manifest_ids[revid]: raise AssertionError text = format_changeset(manifest_id, files[revid], user, date, desc, extra) ps = as_hg_parents(rev.parent_ids, changelog_ids.__getitem__) hgid = hghash(text, ps[0], ps[1]) changelog_ids[revid] = hgid yield text, ps, hgid
def text_contents(repo, path, keys, overlay): """Generate revlog text tuples. :param repo: Bazaar repository :param path: UTF8 path :param keys: (fileid, revision) tuples of texts to convert :param overlay: Overlay :return: Always yields a base text first, then yields tuples with VersionedFileContentFactory, parent nodes, node id for each key """ if not keys: yield "" return def text_as_node((fileid, revision)): try: return text_nodes[revision] except KeyError: return overlay.lookup_text_node_by_revid_and_path(revision, path) text_nodes = {} base_reported = False file_graph = repo.get_file_graph() first_parents = file_graph.get_parent_map([keys[0]])[keys[0]] if len(first_parents) == 0: yield "" else: base_stream = repo.texts.get_record_stream([first_parents[0]], 'unordered', True) yield base_stream.next().get_bytes_as("fulltext") for record in repo.texts.get_record_stream(keys, 'topological', True): fulltext = record.get_bytes_as('fulltext') parents = as_hg_parents(record.parents, text_as_node) node = hghash(fulltext, parents[0], parents[1]) text_nodes[record.key[1]] = node yield (record, parents, node)
def _update_idmap(self, stop_revision=None): present_revids = self.idmap.revids() graph = self.repo.get_graph() if stop_revision is None: wanted = self.repo.all_revision_ids() else: wanted = graph.find_unique_ancestors(stop_revision, present_revids) todo = set(wanted) - present_revids - set( [_mod_revision.NULL_REVISION]) revs = self.repo.get_revisions(todo) pb = ui.ui_factory.nested_progress_bar() try: for i, revid in enumerate(graph.iter_topo_order(todo)): pb.update("updating cache", i, len(todo)) rev = self.repo.get_revision(revid) (manifest_id, user, (time, timezone), desc, extra) = \ self.mapping.export_revision(rev) if manifest_id is None: manifest_text = self.get_manifest_text_by_revid(revid) self.remember_manifest_text(revid, rev.parent_ids, manifest_text) manifest_id = hghash( manifest_text, *as_hg_parents(rev.parent_ids[:2], self.lookup_manifest_id_by_revid)) changeset_text = self.get_changeset_text_by_revid( revid, rev, manifest_id=manifest_id) changeset_id = hghash( changeset_text, *as_hg_parents( rev.parent_ids[:2], lambda x: self.lookup_changeset_id_by_revid(x)[0])) self.idmap.insert_revision(revid, manifest_id, changeset_id, self.mapping) self._update_texts(revid) finally: pb.finished()
def pack_chunk_iter(entries, textbase): """Create a chained series of Mercurial deltas. The first entry is not packed but rather used as a base for the delta for the second. :param entries: Iterator over (fulltext, (p1, p2), link) tuples. :return: iterator over delta chunks """ for (fulltext, (p1, p2), link) in entries: assert len(p1) == 20 assert len(p2) == 20 node = hghash(fulltext, p1, p2) assert len(node) == 20 assert len(link) == 20 delta = mercurial.mdiff.bdiff.bdiff(textbase, fulltext) chunk = struct.pack("20s20s20s20s", node, p1, p2, link) + delta yield chunk textbase = fulltext
def dinventories(repo, mapping, revids, manifest_ids, files, overlay, texts, fileids, lossy=True): """Generate manifests from a series of revision trees. :param repo: Bazaar repository to fetch revisions from :param revids: Revision ids to yield manifests for (returned in same order) :param manifest_ids: Dictionary with revid -> file id mappings for known manifests. Used to look up parent manifests not processed :param files: Dictionary to store mercurial file dictionaries in, by revid :param overlay: Mercurial overlay object for the Bazaar repository :param texts: Dictionary with node -> (fileid, revision) tuples :param fileids: Dictionary mapping revision ids to file id lookup dictionaries, for any "unusual" file ids (not matching that predicted by the mapping). (only relevant for non-lossy conversions) :param lossy: Whether or not to do a lossy conversion. """ def get_manifest(revid): if revid in manifest_ids: try: return manifests[manifest_ids[revid]] except KeyError: pass return overlay.get_manifest_and_flags_by_revid(revid) if revids == []: return skip_revid = revids[0] if revids[0] == _mod_revision.NULL_REVISION: yield "", (mercurial.node.nullid, mercurial.node.nullid), revids[0] revids = revids[1:] manifests = {} # TODO: Very naive and slow: for tree in repo.revision_trees(revids): revid = tree.get_revision_id() rev = repo.get_revision(revid) lookup_text_node = [] for parent in rev.parent_ids[:2]: lookup_text_node.append(get_manifest(parent)[0].__getitem__) while len(lookup_text_node) < 2: lookup_text_node.append(lambda path: mercurial.node.nullid) # TODO: This refetches the parent trees, which we'll likely have seen # earlier in this loop. parent_trees = list(repo.revision_trees(rev.parent_ids[:2])) (manifest, flags, extrafileids) = manifest_and_flags_from_tree(parent_trees, tree, mapping, lookup_text_node) fileids[revid] = extrafileids manifests[revid] = (manifest, flags) try: base_tree = parent_trees[0] except IndexError: base_tree = repo.revision_tree(_mod_revision.NULL_REVISION) files[revid] = files_from_delta(tree.changes_from(base_tree), tree, revid) # Avoid sending texts for first revision, it's listed so we get the # base text for the manifest delta's. if revid != skip_revid: for p in files[revid]: fileid = tree.path2id(p) if fileid is not None: # FIXME: This is probably not correct, as 'files' # don't include new revisions that don't include changes # (but are e.g. relevant for parents) texts[p].add((fileid, tree.get_file_revision(p))) text = format_manifest(manifest, flags) node_parents = as_hg_parents(rev.parent_ids, manifest_ids.__getitem__) manifest_id = hghash(text, node_parents[0], node_parents[1]) manifest_ids[revid] = manifest_id if 'check' in debug.debug_flags: assert mapping.export_revision(rev)[0] in (None, manifest_id) yield text, node_parents, revid