예제 #1
0
def examples_to_nodes(field_list, page_url, text_filter_func=text_filter_strip_newline, user_agent=DEFAULT_USER_AGENT):
    """
    For each field, set the selected node.
    """
    root = url_to_DOM(page_url, user_agent)
    d = {}
    for field in field_list:
        target_nodes = [ node for node in root.iterdescendants() \
                        if text_filter_func(node.text_content()) == text_filter_func(field.example) ]

        d[field.name] = []
        for node in target_nodes:
            xpath = node_to_absolute_XPATH(node)
            record_count = len(root.findall(xpath))
            d[field.name].append((node, xpath, record_count))

    return d
예제 #2
0
def example_to_node(field, page_url, filter=False, disambiguation_method=take_last,
                    text_filter_func=text_filter_strip_newline):
    """
    This method takes in an ExampleField and finds the target node that contains that example.
    field: An ExampleField.
    page_url: The URL of the selected page.
    filter: If text_filter_func shold be applied to text before comparison.
    disambiguation_method: This should be a method that takes in a list and somehow decides which node to return.
    text_filter_func: Allows the user to supply a function for filtering text.
        default: util.text_filter_strip_newline (removes surrounding whitespace and replaces newlines with ''
    """
    root = url_to_DOM(page_url)
    if filter:
        target_nodes = [ node for node in root.iterdescendants() if text_filter_func(node.text_content()) == text_filter_func(field.example) ]
    else:
        target_nodes = [ node for node in root.iterdescendants() if node.text_content() == field.example ]
    if len(target_nodes) == 0:
        raise ValueError('Node containing the given example not found. Field(%s)' % (field.name))
    else:
        return disambiguation_method(target_nodes)