예제 #1
0
    def _diff(self, left, right, result):
        res = []
        for x, y in utils.longest_common_subsequence(left, right):
            self.assertEqual(left[x], right[y])
            res.append(left[x])

        self.assertEqual(''.join(res), result)
예제 #2
0
    def align_children(self, left, right):
        lchildren = [
            c for c in left.getchildren()
            if (id(c) in self._l2rmap
                and self._l2rmap[id(c)].getparent() is right)
        ]
        rchildren = [
            c for c in right.getchildren() if
            (id(c) in self._r2lmap and self._r2lmap[id(c)].getparent() is left)
        ]
        if not lchildren or not rchildren:
            # Nothing to align
            return

        lcs = utils.longest_common_subsequence(
            lchildren, rchildren, lambda x, y: self._l2rmap[id(x)] is y)

        for x, y in lcs:
            # Mark these as in order
            self._inorder.add(lchildren[x])
            self._inorder.add(rchildren[y])

        # Go over those children that are not in order:
        for lchild in lchildren:
            if lchild in self._inorder:
                # Already aligned
                continue

            rchild = self._l2rmap[id(lchild)]
            right_pos = self.find_pos(rchild)
            rtarget = rchild.getparent()
            ltarget = self._r2lmap[id(rtarget)]
            yield actions.MoveNode(utils.getpath(lchild),
                                   utils.getpath(ltarget), right_pos)
            # Do the actual move:
            left.remove(lchild)
            ltarget.insert(right_pos, lchild)
            # Mark the nodes as in order
            self._inorder.add(lchild)
            self._inorder.add(rchild)
예제 #3
0
    def match(self, left=None, right=None):
        # This is not a generator, because the diff() functions needs
        # _l2rmap and _r2lmap, so if match() was a generator, then
        # diff() would have to first do list(self.match()) without storing
        # the result, and that would be silly.

        # Nothing in this library is actually using the resulting list of
        # matches match() returns, but it may be useful for somebody that
        # actually do not want a diff, but only a list of matches.
        # It also makes testing the match function easier.

        if left is not None or right is not None:
            self.set_trees(left, right)

        if self._matches is not None:
            # We already matched these sequences, use the cache
            return self._matches

        # Initialize the caches:
        self._matches = []
        self._l2rmap = {}
        self._r2lmap = {}
        self._inorder = set()
        self._text_cache = {}

        # Generate the node lists
        lnodes = list(utils.post_order_traverse(self.left))
        rnodes = list(utils.post_order_traverse(self.right))

        # TODO: If the roots do not match, we should create new roots, and
        # have the old roots be children of the new roots, but let's skip
        # that for now, we don't need it. That's strictly a part of the
        # insert phase, but hey, even the paper defining the phases
        # ignores the phases, so...
        # For now, just make sure the roots are matched, we do that by
        # removing them from the lists of nodes, so it can't match, and add
        # them back last.
        lnodes.remove(self.left)
        rnodes.remove(self.right)

        if self.fast_match:
            # First find matches with longest_common_subsequence:
            matches = list(
                utils.longest_common_subsequence(
                    lnodes, rnodes, lambda x, y: self.node_ratio(x, y) >= 0.5))

            # Add the matches (I prefer this from start to finish):
            for left_match, right_match in matches:
                self.append_match(lnodes[left_match], rnodes[right_match],
                                  None)

            # Then remove the nodes (needs to be done backwards):
            for left_match, right_match in reversed(matches):
                lnode = lnodes.pop(left_match)
                rnode = rnodes.pop(right_match)

        for lnode in lnodes:
            max_match = 0
            match_node = None

            for rnode in rnodes:
                match = self.node_ratio(lnode, rnode)
                if match > max_match:
                    match_node = rnode
                    max_match = match

                # Try to shortcut for nodes that are not only equal but also
                # in the same place in the tree
                if match == 1.0:
                    # This is a total match, break here
                    break

            if max_match >= self.F:
                self.append_match(lnode, match_node, max_match)

                # We don't want to check nodes that already are matched
                if match_node is not None:
                    rnodes.remove(match_node)

        # Match the roots
        self.append_match(self.left, self.right, 1.0)

        return self._matches