示例#1
0
    def _make_diff_tags(self, left_value, right_value, node, target=None):
        if bool(self.normalize & WS_TEXT):
            left_value = utils.cleanup_whitespace(left_value or u'').strip()
            right_value = utils.cleanup_whitespace(right_value or u'').strip()

        text_diff = diff_match_patch()
        diff = text_diff.diff_main(left_value or '', right_value or '')
        text_diff.diff_cleanupSemantic(diff)

        diff = self._realign_placeholders(diff)

        cur_child = None
        if target is None:
            target = node
        else:
            cur_child = node

        for op, text in diff:
            if op == 0:
                if cur_child is None:
                    node.text = (node.text or u'') + text
                else:
                    cur_child.tail = (cur_child.tail or u'') + text
                continue

            if op == -1:
                action = 'delete'
            elif op == 1:
                action = 'insert'

            if self.placeholderer.is_placeholder(text):
                ph = self.placeholderer.mark_diff(text, action)

                if cur_child is None:
                    node.text = (node.text or u'') + ph

            else:
                new_text = self.placeholderer.wrap_diff(text, action)

                if cur_child is None:
                    node.text = (node.text or u'') + new_text
                else:
                    cur_child.tail = (cur_child.tail or u'') + new_text
示例#2
0
    def node_text(self, node):
        if node in self._text_cache:
            return self._text_cache[node]
        # Get the texts and the tag as a start
        texts = node.xpath('text()')

        # Then add attributes and values
        for tag, value in sorted(node.attrib.items()):
            if tag[0] == '{':
                tag = tag.split('}', )[-1]
            texts.append('%s:%s' % (tag, value))

        # Finally make one string, useful to see how similar two nodes are
        text = u' '.join(texts).strip()
        result = utils.cleanup_whitespace(text)
        self._text_cache[node] = result
        return result
示例#3
0
    def node_text(self, node):
        if node in self._text_cache:
            return self._text_cache[node]
        # Get the texts and the tag as a start
        texts = node.xpath("text()")

        # Then add attributes and values
        for tag, value in sorted(self.node_attribs(node).items()):
            if tag[0] == "{":
                tag = tag.split("}", )[-1]
            texts.append(f"{tag}:{value}")

        # Finally make one string, useful to see how similar two nodes are
        text = " ".join(texts).strip()
        result = utils.cleanup_whitespace(text)
        self._text_cache[node] = result
        return result