예제 #1
0
파일: test_utils.py 프로젝트: toyg/xmldiff
    def test_reverse_post_order(self):
        xml = """<document>
    <story firstPageTemplate='FirstPage'>
        <section xml:id='oldfirst' ref='3' single-ref='3'>
            <para>First paragraph</para>
        </section>
        <section xml:id='oldlast' ref='4' single-ref='4'>
            <para>Last paragraph</para>
        </section>
    </story>
</document>
"""
        root = etree.fromstring(xml)
        tree = root.getroottree()
        res = [
            tree.getpath(x) for x in utils.reverse_post_order_traverse(root)
        ]
        self.assertEqual(
            res,
            [
                "/document/story/section[2]/para",
                "/document/story/section[2]",
                "/document/story/section[1]/para",
                "/document/story/section[1]",
                "/document/story",
                "/document",
            ],
        )
예제 #2
0
    def diff(self, left=None, right=None):
        # Make sure the matching is done first, diff() needs the l2r/r2l maps.
        if not self._matches:
            self.match(left, right)

        # The paper talks about the five phases, and then does four of them
        # in one phase, in a different order that described. This
        # implementation in turn differs in order yet again.
        ltree = self.left.getroottree()

        for rnode in utils.breadth_first_traverse(self.right):
            # (a)
            rparent = rnode.getparent()
            ltarget = self._r2lmap.get(id(rparent))

            # (b) Insert
            if id(rnode) not in self._r2lmap:
                # (i)
                pos = self.find_pos(rnode)
                # (ii)
                if rnode.tag is etree.Comment:
                    yield actions.InsertComment(utils.getpath(ltarget, ltree),
                                                pos, rnode.text)
                    lnode = etree.Comment(rnode.text)
                else:
                    yield actions.InsertNode(utils.getpath(ltarget, ltree),
                                             rnode.tag, pos)
                    lnode = ltarget.makeelement(rnode.tag)

                    # (iii)
                self.append_match(lnode, rnode, 1.0)
                ltarget.insert(pos, lnode)
                self._inorder.add(lnode)
                self._inorder.add(rnode)
                # And then we update attributes. This is different from the
                # paper, because the paper assumes nodes only has labels and
                # values. Nodes also has texts, we do them later.
                for action in self.update_node_attr(lnode, rnode):
                    yield action

            # (c)
            else:
                # Normally there is a check that rnode isn't a root,
                # but that's perhaps only because comparing valueless
                # roots is pointless, but in an elementtree we have no such
                # thing as a valueless root anyway.
                # (i)
                lnode = self._r2lmap[id(rnode)]

                # (iii) Move
                lparent = lnode.getparent()
                if ltarget is not lparent:
                    pos = self.find_pos(rnode)
                    yield actions.MoveNode(utils.getpath(lnode, ltree),
                                           utils.getpath(ltarget, ltree), pos)
                    # Move the node from current parent to target
                    lparent.remove(lnode)
                    ltarget.insert(pos, lnode)
                    self._inorder.add(lnode)
                    self._inorder.add(rnode)

                # Rename
                for action in self.update_node_tag(lnode, rnode):
                    yield action

                # (ii) Update
                # XXX If they are exactly equal, we can skip this,
                # maybe store match results in a cache?
                for action in self.update_node_attr(lnode, rnode):
                    yield action

            # (d) Align
            for action in self.align_children(lnode, rnode):
                yield action

            # And lastly, we update all node texts. We do this after
            # aligning children, because when you generate an XML diff
            # from this, that XML diff update generates more children,
            # confusing later inserts or deletes.
            lnode = self._r2lmap[id(rnode)]
            for action in self.update_node_text(lnode, rnode):
                yield action

        for lnode in utils.reverse_post_order_traverse(self.left):
            if id(lnode) not in self._l2rmap:
                # No match
                yield actions.DeleteNode(utils.getpath(lnode, ltree))
                lnode.getparent().remove(lnode)