def test_parse_text(): text = 'test one two < & > ;' dom = parse_text(text) root = dom.documentElement assert_equal(len(root.childNodes), 1) child = root.childNodes[0] assert is_text(child) assert child.nodeValue == text
def _is_junk(hashable_node): if isinstance(hashable_node, basestring): return is_text_junk(hashable_node) # Nodes with no text or just whitespace are junk. for descendant in walk_dom(hashable_node.node): if is_text(descendant): if not is_text_junk(descendant.nodeValue): return False return True
def fuzzy_match_node_hash(node): if is_text(node): return node.nodeValue return FuzzyHashableTree(node)
def match_node_hash(node): if is_text(node): return node.nodeValue return HashableTree(node)
def split_text_nodes(dom): for text_node in list(walk_dom(dom)): if not is_text(text_node): continue split_node(text_node)