def _diff(self, left, right, result): res = [] for x, y in utils.longest_common_subsequence(left, right): self.assertEqual(left[x], right[y]) res.append(left[x]) self.assertEqual(''.join(res), result)
def align_children(self, left, right): lchildren = [ c for c in left.getchildren() if (id(c) in self._l2rmap and self._l2rmap[id(c)].getparent() is right) ] rchildren = [ c for c in right.getchildren() if (id(c) in self._r2lmap and self._r2lmap[id(c)].getparent() is left) ] if not lchildren or not rchildren: # Nothing to align return lcs = utils.longest_common_subsequence( lchildren, rchildren, lambda x, y: self._l2rmap[id(x)] is y) for x, y in lcs: # Mark these as in order self._inorder.add(lchildren[x]) self._inorder.add(rchildren[y]) # Go over those children that are not in order: for lchild in lchildren: if lchild in self._inorder: # Already aligned continue rchild = self._l2rmap[id(lchild)] right_pos = self.find_pos(rchild) rtarget = rchild.getparent() ltarget = self._r2lmap[id(rtarget)] yield actions.MoveNode(utils.getpath(lchild), utils.getpath(ltarget), right_pos) # Do the actual move: left.remove(lchild) ltarget.insert(right_pos, lchild) # Mark the nodes as in order self._inorder.add(lchild) self._inorder.add(rchild)
def match(self, left=None, right=None): # This is not a generator, because the diff() functions needs # _l2rmap and _r2lmap, so if match() was a generator, then # diff() would have to first do list(self.match()) without storing # the result, and that would be silly. # Nothing in this library is actually using the resulting list of # matches match() returns, but it may be useful for somebody that # actually do not want a diff, but only a list of matches. # It also makes testing the match function easier. if left is not None or right is not None: self.set_trees(left, right) if self._matches is not None: # We already matched these sequences, use the cache return self._matches # Initialize the caches: self._matches = [] self._l2rmap = {} self._r2lmap = {} self._inorder = set() self._text_cache = {} # Generate the node lists lnodes = list(utils.post_order_traverse(self.left)) rnodes = list(utils.post_order_traverse(self.right)) # TODO: If the roots do not match, we should create new roots, and # have the old roots be children of the new roots, but let's skip # that for now, we don't need it. That's strictly a part of the # insert phase, but hey, even the paper defining the phases # ignores the phases, so... # For now, just make sure the roots are matched, we do that by # removing them from the lists of nodes, so it can't match, and add # them back last. lnodes.remove(self.left) rnodes.remove(self.right) if self.fast_match: # First find matches with longest_common_subsequence: matches = list( utils.longest_common_subsequence( lnodes, rnodes, lambda x, y: self.node_ratio(x, y) >= 0.5)) # Add the matches (I prefer this from start to finish): for left_match, right_match in matches: self.append_match(lnodes[left_match], rnodes[right_match], None) # Then remove the nodes (needs to be done backwards): for left_match, right_match in reversed(matches): lnode = lnodes.pop(left_match) rnode = rnodes.pop(right_match) for lnode in lnodes: max_match = 0 match_node = None for rnode in rnodes: match = self.node_ratio(lnode, rnode) if match > max_match: match_node = rnode max_match = match # Try to shortcut for nodes that are not only equal but also # in the same place in the tree if match == 1.0: # This is a total match, break here break if max_match >= self.F: self.append_match(lnode, match_node, max_match) # We don't want to check nodes that already are matched if match_node is not None: rnodes.remove(match_node) # Match the roots self.append_match(self.left, self.right, 1.0) return self._matches