def test_reverse_post_order(self): xml = """<document> <story firstPageTemplate='FirstPage'> <section xml:id='oldfirst' ref='3' single-ref='3'> <para>First paragraph</para> </section> <section xml:id='oldlast' ref='4' single-ref='4'> <para>Last paragraph</para> </section> </story> </document> """ root = etree.fromstring(xml) tree = root.getroottree() res = [ tree.getpath(x) for x in utils.reverse_post_order_traverse(root) ] self.assertEqual( res, [ "/document/story/section[2]/para", "/document/story/section[2]", "/document/story/section[1]/para", "/document/story/section[1]", "/document/story", "/document", ], )
def diff(self, left=None, right=None): # Make sure the matching is done first, diff() needs the l2r/r2l maps. if not self._matches: self.match(left, right) # The paper talks about the five phases, and then does four of them # in one phase, in a different order that described. This # implementation in turn differs in order yet again. ltree = self.left.getroottree() for rnode in utils.breadth_first_traverse(self.right): # (a) rparent = rnode.getparent() ltarget = self._r2lmap.get(id(rparent)) # (b) Insert if id(rnode) not in self._r2lmap: # (i) pos = self.find_pos(rnode) # (ii) if rnode.tag is etree.Comment: yield actions.InsertComment(utils.getpath(ltarget, ltree), pos, rnode.text) lnode = etree.Comment(rnode.text) else: yield actions.InsertNode(utils.getpath(ltarget, ltree), rnode.tag, pos) lnode = ltarget.makeelement(rnode.tag) # (iii) self.append_match(lnode, rnode, 1.0) ltarget.insert(pos, lnode) self._inorder.add(lnode) self._inorder.add(rnode) # And then we update attributes. This is different from the # paper, because the paper assumes nodes only has labels and # values. Nodes also has texts, we do them later. for action in self.update_node_attr(lnode, rnode): yield action # (c) else: # Normally there is a check that rnode isn't a root, # but that's perhaps only because comparing valueless # roots is pointless, but in an elementtree we have no such # thing as a valueless root anyway. # (i) lnode = self._r2lmap[id(rnode)] # (iii) Move lparent = lnode.getparent() if ltarget is not lparent: pos = self.find_pos(rnode) yield actions.MoveNode(utils.getpath(lnode, ltree), utils.getpath(ltarget, ltree), pos) # Move the node from current parent to target lparent.remove(lnode) ltarget.insert(pos, lnode) self._inorder.add(lnode) self._inorder.add(rnode) # Rename for action in self.update_node_tag(lnode, rnode): yield action # (ii) Update # XXX If they are exactly equal, we can skip this, # maybe store match results in a cache? for action in self.update_node_attr(lnode, rnode): yield action # (d) Align for action in self.align_children(lnode, rnode): yield action # And lastly, we update all node texts. We do this after # aligning children, because when you generate an XML diff # from this, that XML diff update generates more children, # confusing later inserts or deletes. lnode = self._r2lmap[id(rnode)] for action in self.update_node_text(lnode, rnode): yield action for lnode in utils.reverse_post_order_traverse(self.left): if id(lnode) not in self._l2rmap: # No match yield actions.DeleteNode(utils.getpath(lnode, ltree)) lnode.getparent().remove(lnode)