def _make_diff_tags(self, left_value, right_value, node, target=None): if bool(self.normalize & WS_TEXT): left_value = utils.cleanup_whitespace(left_value or u'').strip() right_value = utils.cleanup_whitespace(right_value or u'').strip() text_diff = diff_match_patch() diff = text_diff.diff_main(left_value or '', right_value or '') text_diff.diff_cleanupSemantic(diff) diff = self._realign_placeholders(diff) cur_child = None if target is None: target = node else: cur_child = node for op, text in diff: if op == 0: if cur_child is None: node.text = (node.text or u'') + text else: cur_child.tail = (cur_child.tail or u'') + text continue if op == -1: action = 'delete' elif op == 1: action = 'insert' if self.placeholderer.is_placeholder(text): ph = self.placeholderer.mark_diff(text, action) if cur_child is None: node.text = (node.text or u'') + ph else: new_text = self.placeholderer.wrap_diff(text, action) if cur_child is None: node.text = (node.text or u'') + new_text else: cur_child.tail = (cur_child.tail or u'') + new_text
def node_text(self, node): if node in self._text_cache: return self._text_cache[node] # Get the texts and the tag as a start texts = node.xpath('text()') # Then add attributes and values for tag, value in sorted(node.attrib.items()): if tag[0] == '{': tag = tag.split('}', )[-1] texts.append('%s:%s' % (tag, value)) # Finally make one string, useful to see how similar two nodes are text = u' '.join(texts).strip() result = utils.cleanup_whitespace(text) self._text_cache[node] = result return result
def node_text(self, node): if node in self._text_cache: return self._text_cache[node] # Get the texts and the tag as a start texts = node.xpath("text()") # Then add attributes and values for tag, value in sorted(self.node_attribs(node).items()): if tag[0] == "{": tag = tag.split("}", )[-1] texts.append(f"{tag}:{value}") # Finally make one string, useful to see how similar two nodes are text = " ".join(texts).strip() result = utils.cleanup_whitespace(text) self._text_cache[node] = result return result