Exemplo n.º 1
0
def _is_junk(hashable_node):
    if isinstance(hashable_node, basestring):
        return is_text_junk(hashable_node)
    # Nodes with no text or just whitespace are junk.
    for descendant in walk_dom(hashable_node.node):
        if is_text(descendant):
            if not is_text_junk(descendant.nodeValue):
                return False
    return True
Exemplo n.º 2
0
def _is_junk(hashable_node):
    if isinstance(hashable_node, six.string_types):
        return is_text_junk(hashable_node)
    # Nodes with no text or just whitespace are junk.
    for descendant in walk_dom(hashable_node.node):
        if is_text(descendant):
            if not is_text_junk(descendant.nodeValue):
                return False
    return True
Exemplo n.º 3
0
def fuzzy_match_node_hash(node):
    if is_text(node):
        return node.nodeValue
    return FuzzyHashableTree(node)
Exemplo n.º 4
0
def match_node_hash(node):
    if is_text(node):
        return node.nodeValue
    return HashableTree(node)
Exemplo n.º 5
0
def split_text_nodes(dom):
    for text_node in list(walk_dom(dom)):
        if not is_text(text_node):
            continue
        split_node(text_node)
Exemplo n.º 6
0
def split_text_nodes(dom):
    for text_node in list(walk_dom(dom)):
        if not is_text(text_node):
            continue
        split_node(text_node)
Exemplo n.º 7
0
def fuzzy_match_node_hash(node):
    if is_text(node):
        return node.nodeValue
    return FuzzyHashableTree(node)
Exemplo n.º 8
0
def match_node_hash(node):
    if is_text(node):
        return node.nodeValue
    return HashableTree(node)