示例#1
0
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec():
    html_body = """
    <p>moe</p>
    <p>larry</p>
    <p>curly</p>"""

    assert query_html_doc(html_body, '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry')
    assert query_html_doc(html_body, '//p[matches(text(), ".URL.", "i")]/text()') == expected_result('curly')
示例#2
0
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec():
    assert query_html_doc('', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result("""
    Moe
    Larry
    Curly""")
    assert query_html_doc('', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result("""
    Ha
    ta
    patience""")
    assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
示例#3
0
def test_escapes_work_in_string_literals():
    assert query_html_doc('', '"foo&#10;bar"') == expected_result("""
    foo
    bar""")
    assert query_html_doc('', "'foo&#10;bar'") == expected_result("""
    foo
    bar""")
    assert query_html_doc('', '`foo&#10;bar`') == expected_result("""
    foo
    bar""")
def test_element_constructor_accepts_numbers_and_booleans():
    assert query_html_doc('', 'element test { 98.6 }') == expected_result("""
    <test>
     98.6
    </test>""")

    assert query_html_doc('', 'element test { false() }') == expected_result("""
    <test>
     false
    </test>""")
示例#5
0
def test_if_then_else_works_with_node_sets():
    html_body = """
    <p>eekaboo</p>"""
    assert query_html_doc(html_body, 'if (//p) then //p else 1 to 3') == expected_result("""
    <p>
     eekaboo
    </p>""")
    assert query_html_doc(html_body, 'if (//div) then //p else 1 to 3') == expected_result("""
    1
    2
    3""")
示例#6
0
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec():
    assert query_html_doc(
        '', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result("""
    Moe
    Larry
    Curly""")
    assert query_html_doc(
        '', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result("""
    Ha
    ta
    patience""")
    assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
示例#7
0
def test_if_then_else_works_with_node_sets():
    html_body = """
    <p>eekaboo</p>"""
    assert query_html_doc(
        html_body, 'if (//p) then //p else 1 to 3') == expected_result("""
    <p>
     eekaboo
    </p>""")
    assert query_html_doc(
        html_body, 'if (//div) then //p else 1 to 3') == expected_result("""
    1
    2
    3""")
示例#8
0
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec(
):
    html_body = """
    <p>moe</p>
    <p>larry</p>
    <p>curly</p>"""

    assert query_html_doc(
        html_body,
        '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry')
    assert query_html_doc(
        html_body,
        '//p[matches(text(), ".URL.", "i")]/text()') == expected_result(
            'curly')
def test_element_constructor_accepts_attributes_from_original_document_including_multi_values_like_classes():
    html_body = """
    <p class="one two" three="four">
        contents
    </p>"""

    assert query_html_doc(html_body, 'element test { //p/@* }') == expected_result("""
    <test class="one two" three="four">
    </test>""")

    assert query_html_doc(html_body, 'element test { //p/@three, //p }') == expected_result("""
    <test three="four">
     <p class="one two" three="four">
      contents
     </p>
    </test>""")
示例#10
0
def test_selects_the_chapter_children_of_the_context_node_that_have_one_or_more_title_children_with_string_value_equal_to_Introduction(
):
    html = """
    <context>
        <chapter>
            <title>Introduction</title>
        </chapter>
        <chapter>not selected</chapter>
        <chapter>
            <title>Author's Note</title>
        </chapter>
        <chapter>
            <title>Introduction</title>
            <content>Hello, I'm chapter.</content>
        </chapter>
    </context>"""
    assert query_context_node(
        html, 'chapter[title="Introduction"]') == expected_result("""
    <chapter>
     <title>
      Introduction
     </title>
    </chapter>
    <chapter>
     <title>
      Introduction
     </title>
     <content>
      Hello, I'm chapter.
     </content>
    </chapter>""")
示例#11
0
def test_selects_the_chapter_children_of_the_context_node_that_have_one_or_more_title_children(
):
    html = """
    <context>
        <chapter>
            <title>selected</title>
        </chapter>
        <chapter>
            <not-title></not-title>
        </chapter>
        <chapter>
            <title>also selected</title>
        </chapter>
    </context>"""
    assert query_context_node(html, 'chapter[title]') == expected_result("""
    <chapter>
     <title>
      selected
     </title>
    </chapter>
    <chapter>
     <title>
      also selected
     </title>
    </chapter>""")
def test_selects_the_lang_attribute_of_the_parent_of_the_context_node():
    html = """
    <root lang="English">
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context, '../@lang') == expected_result('lang="English"')
示例#13
0
def test_selects_the_para_element_descendants_of_the_context_node():
    html = """
    <para>
        <context>
            <para>selected</para>
            <not-para>not selected</not-para>
            <para>
                <para>also selected</para>
            </para>
        </context>
    </para>"""
    soup = make_soup(html)
    assert query_context_node(soup.para.context,
                              './/para') == expected_result("""
    <para>
     selected
    </para>
    <para>
     <para>
      also selected
     </para>
    </para>
    <para>
     also selected
    </para>""")
示例#14
0
def test_selects_all_the_item_elements_in_the_same_document_as_the_context_node_that_have_an_olist_parent(
):
    html = """
    <root>
        <olist>no items</olist>
        <item>not selected</item>
        <context></context>
        <olist>
            <item>first</item>
        </olist>
        <item>
            <olist>
                <item>second</item>
            <olist>
        </item>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context,
                              '//olist/item') == expected_result("""
    <item>
     first
    </item>
    <item>
     second
    </item>""")
示例#15
0
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause():
    assert query_html_doc('', 'for $x in (1 to 2) return $x, "!"') == expected_result("""
    1
    2
    !""")
    assert query_html_doc('', 'sum(for $x in //span return $x, "zero")') == 'zero'
    assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
def test_selects_the_chapter_children_of_the_context_node_that_have_one_or_more_title_children_with_string_value_equal_to_Introduction():
    html = """
    <context>
        <chapter>
            <title>Introduction</title>
        </chapter>
        <chapter>not selected</chapter>
        <chapter>
            <title>Author's Note</title>
        </chapter>
        <chapter>
            <title>Introduction</title>
            <content>Hello, I'm chapter.</content>
        </chapter>
    </context>"""
    assert query_context_node(html, 'chapter[title="Introduction"]') == expected_result("""
    <chapter>
     <title>
      Introduction
     </title>
    </chapter>
    <chapter>
     <title>
      Introduction
     </title>
     <content>
      Hello, I'm chapter.
     </content>
    </chapter>""")
示例#17
0
def test_join_filter_joins_string_values_from_node_set():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""
    assert query_html_doc(html_body,
                          '`${j:,://p}`') == expected_result('one,two,three')
示例#18
0
def test_selects_the_fifth_para_child_of_the_context_node_if_that_child_has_a_type_attribute_with_value_warning(
):
    html = """
    <context>
        <para>not selected</para>
        <para>not selected</para>
        <para>not selected</para>
        <para>not selected</para>
        <para type="error">selected</para>
    </context>"""
    assert query_context_node(
        html, 'para[5][@type="warning"]') == expected_result("")
    assert query_context_node(html.replace(
        'error', 'warning'), 'para[5][@type="warning"]') == expected_result("""
    <para type="warning">
     selected
    </para>""")
示例#19
0
def test_selects_the_lang_attribute_of_the_parent_of_the_context_node():
    html = """
    <root lang="English">
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(soup.root.context,
                              '../@lang') == expected_result('lang="English"')
示例#20
0
def test_even_and_odd_functions_select_the_appropriate_elements_based_on_position():
    html_body = """
    <p>You</p>
    <p>I</p>
    <p>are</p>
    <p>am</p>
    <p>odd.</p>
    <p>even.</p>"""

    assert query_html_doc(html_body, '//p[even()]/text()') == expected_result("""
    I
    am
    even.""")
    assert query_html_doc(html_body, '//p[odd()]/text()') == expected_result("""
    You
    are
    odd.""")
示例#21
0
def test_element_node_becomes_normalized_text_contents_in_interpolated_string(
):
    html_body = """
    <p>
        foo   bar
    </p>"""
    assert query_html_doc(html_body,
                          '`-->${//p}<--`') == expected_result('-->foo bar<--')
示例#22
0
def test_matches_function_extends_to_using_context_node_when_passed_no_input_string(
):
    html_body = """
    <p>bar</p>
    <p>foo</p>"""

    assert query_html_doc(
        html_body, '//p[matches("^f.+")]/text()') == expected_result('foo')
def test_selects_the_context_node():
    html = """
    <context>
        selected
    </context>"""
    assert query_context_node(html, '.') == expected_result("""
    <context>
     selected
    </context>""")
示例#24
0
def test_tolerates_latin_characters_in_comments(capsys, mocker):
    mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(expression='//comment()')
    mocker.patch('sys.stdin.read').return_value = wrap_html_body(u"""
    <!-- sacr\xe9 bleu! -->""")

    main()

    actual, _ = capture_console_output(capsys)
    assert actual == expected_result(u'<!-- sacr\xe9 bleu! -->')
示例#25
0
def test_iteration_using_for():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""
    assert query_html_doc(html_body, 'for $x in //p return $x/text()') == expected_result("""
    one
    two
    three""")
示例#26
0
def test_union_decomposition_with_parentheses():
    html_body = """
    <h1>heading</h1>
    <p>content</p>
    <h1>another heading</h1>"""
    assert query_html_doc(html_body, '(//h1 | //p) => ("fizz" | "buzz")') == expected_result("""
    fizz
    buzz
    fizz""")
示例#27
0
def test_selects_the_context_node():
    html = """
    <context>
        selected
    </context>"""
    assert query_context_node(html, '.') == expected_result("""
    <context>
     selected
    </context>""")
示例#28
0
def test_union_decomposition_naked():
    html_body = """
    <h1>heading</h1>
    <p>content</p>
    <h1>another heading</h1>"""
    assert query_html_doc(html_body, '(//h1 | //p) => `h1 $_` | `p $_`') == expected_result("""
    h1 heading
    p content
    h1 another heading""")
示例#29
0
def test_rooted_location_paths_work_with_both_kinds_of_slash():
    html_body = """
    <section>
        <div>
            <div>foo</div>
        </div>
    </section>
    <section>
        <div>
            <div>bar</div>
        </div>
    </section>"""

    assert query_html_doc(
        html_body, 'for $x in //section return $x/div') == expected_result("""
    <div>
     <div>
      foo
     </div>
    </div>
    <div>
     <div>
      bar
     </div>
    </div>""")

    assert query_html_doc(
        html_body, 'for $x in //section return $x//div') == expected_result("""
    <div>
     <div>
      foo
     </div>
    </div>
    <div>
     foo
    </div>
    <div>
     <div>
      bar
     </div>
    </div>
    <div>
     bar
    </div>""")
示例#30
0
def test_iteration_using_for():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""
    assert query_html_doc(
        html_body, 'for $x in //p return $x/text()') == expected_result("""
    one
    two
    three""")
示例#31
0
def test_abbreviated_flowr_provides_expected_iteration_variable_in_value_clause():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""

    assert query_html_doc(html_body, '//p -> $_/text()') == expected_result("""
    one
    two
    three""")
示例#32
0
def test_union_decomposition_naked():
    html_body = """
    <h1>heading</h1>
    <p>content</p>
    <h1>another heading</h1>"""
    assert query_html_doc(
        html_body, '(//h1 | //p) => `h1 $_` | `p $_`') == expected_result("""
    h1 heading
    p content
    h1 another heading""")
示例#33
0
def test_selects_the_first_para_child_of_the_context_node():
    html = """
    <context>
        <para>selected</para>
        <para>not selected</para>
    </context>"""
    assert query_context_node(html, 'para[1]') == expected_result("""
    <para>
     selected
    </para>""")
示例#34
0
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause(
):
    assert query_html_doc(
        '', 'for $x in (1 to 2) return $x, "!"') == expected_result("""
    1
    2
    !""")
    assert query_html_doc('',
                          'sum(for $x in //span return $x, "zero")') == 'zero'
    assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
示例#35
0
def test_union_decomposition_with_parentheses():
    html_body = """
    <h1>heading</h1>
    <p>content</p>
    <h1>another heading</h1>"""
    assert query_html_doc(
        html_body, '(//h1 | //p) => ("fizz" | "buzz")') == expected_result("""
    fizz
    buzz
    fizz""")
示例#36
0
def test_tolerates_latin_characters_in_attribute_contents(capsys, mocker):
    mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(expression='//div/@role')
    mocker.patch('sys.stdin.read').return_value = wrap_html_body(u"""
    <div role="prim\xe4r">
    </div>""")

    main()

    actual, _ = capture_console_output(capsys)
    assert actual == expected_result(u'role="prim\xe4r"')
示例#37
0
def test_tolerates_latin_characters_in_comments(capsys, mocker):
    mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(
        expression='//comment()')
    mocker.patch('sys.stdin.read').return_value = wrap_html_body(u"""
    <!-- sacr\xe9 bleu! -->""")

    main()

    actual, _ = capture_console_output(capsys)
    assert actual == expected_result(u'<!-- sacr\xe9 bleu! -->')
def test_selects_the_first_para_child_of_the_context_node():
    html = """
    <context>
        <para>selected</para>
        <para>not selected</para>
    </context>"""
    assert query_context_node(html, 'para[1]') == expected_result("""
    <para>
     selected
    </para>""")
示例#39
0
def test_rooted_location_paths_work_with_both_kinds_of_slash():
    html_body = """
    <section>
        <div>
            <div>foo</div>
        </div>
    </section>
    <section>
        <div>
            <div>bar</div>
        </div>
    </section>"""

    assert query_html_doc(html_body, 'for $x in //section return $x/div') == expected_result("""
    <div>
     <div>
      foo
     </div>
    </div>
    <div>
     <div>
      bar
     </div>
    </div>""")

    assert query_html_doc(html_body, 'for $x in //section return $x//div') == expected_result("""
    <div>
     <div>
      foo
     </div>
    </div>
    <div>
     foo
    </div>
    <div>
     <div>
      bar
     </div>
    </div>
    <div>
     bar
    </div>""")
示例#40
0
def test_tolerates_latin_characters_in_attribute_contents(capsys, mocker):
    mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(
        expression='//div/@role')
    mocker.patch('sys.stdin.read').return_value = wrap_html_body(u"""
    <div role="prim\xe4r">
    </div>""")

    main()

    actual, _ = capture_console_output(capsys)
    assert actual == expected_result(u'role="prim\xe4r"')
示例#41
0
def test_abbreviated_flowr_provides_expected_iteration_variable_in_value_clause(
):
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""

    assert query_html_doc(html_body, '//p -> $_/text()') == expected_result("""
    one
    two
    three""")
示例#42
0
def test_even_and_odd_functions_select_the_appropriate_elements_based_on_position(
):
    html_body = """
    <p>You</p>
    <p>I</p>
    <p>are</p>
    <p>am</p>
    <p>odd.</p>
    <p>even.</p>"""

    assert query_html_doc(html_body,
                          '//p[even()]/text()') == expected_result("""
    I
    am
    even.""")
    assert query_html_doc(html_body,
                          '//p[odd()]/text()') == expected_result("""
    You
    are
    odd.""")
示例#43
0
def test_selects_the_parent_of_the_context_node():
    html = """
    <root>
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(html, '..') == expected_result("""
    <root>
     <context>
     </context>
    </root>""")
def test_selects_the_parent_of_the_context_node():
    html = """
    <root>
        <context></context>
    </root>"""
    soup = make_soup(html)
    assert query_context_node(html, '..') == expected_result("""
    <root>
     <context>
     </context>
    </root>""")
示例#45
0
def test_union_decomposition_applies_first_matching_clause():
    html_body = """
    <div>div1</div>
    <p>p1</p>
    <div>
        <p>p2</p>
    </div>"""
    query = '(//p | /html/body/div | /html/body//*) => "one" | "two" | "three"'
    assert query_html_doc(html_body, query) == expected_result("""
    two
    one
    two
    one""")
示例#46
0
def test_union_decomposition_applies_first_matching_clause():
    html_body = """
    <div>div1</div>
    <p>p1</p>
    <div>
        <p>p2</p>
    </div>"""
    query = '(//p | /html/body/div | /html/body//*) => "one" | "two" | "three"'
    assert query_html_doc(html_body, query) == expected_result("""
    two
    one
    two
    one""")
def test_selects_the_para_element_children_of_the_context_node():
    html = """
    <context>
        <para>selected</para>
        <not-para>not selected</not-para>
        <para>also selected</para>
    </context>"""
    assert query_context_node(html, 'para') == expected_result("""
    <para>
     selected
    </para>
    <para>
     also selected
    </para>""")
def test_selects_all_the_employee_children_of_the_context_node_that_have_both_a_secretary_attribute_and_an_assistant_attribute():
    html = """
    <context>
        <employee secretary="not selected"></employee>
        <employee assistant="" secretary="">selected</employee>
        <employee assistant="not selected"></employee>
        <employee secretary="also" assistant="selected"></employee>
    </context>"""
    assert query_context_node(html, 'employee[@secretary and @assistant]') == expected_result("""
    <employee assistant="" secretary="">
     selected
    </employee>
    <employee assistant="selected" secretary="also">
    </employee>""")
示例#49
0
def test_tolerates_latin_characters_in_element_contents(capsys, mocker):
    mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(expression='//div')
    mocker.patch('sys.stdin.read').return_value = wrap_html_body(u"""
    <div>
        T\xeate\xa0\xe0\xa0t\xeate
    </div>""")

    main()

    actual, _ = capture_console_output(capsys)
    assert actual == expected_result(u"""
    <div>
     T\xeate\xa0\xe0\xa0t\xeate
    </div>""")
def test_selects_the_fifth_para_child_of_the_context_node_if_that_child_has_a_type_attribute_with_value_warning():
    html = """
    <context>
        <para>not selected</para>
        <para>not selected</para>
        <para>not selected</para>
        <para>not selected</para>
        <para type="error">selected</para>
    </context>"""
    assert query_context_node(html, 'para[5][@type="warning"]') == expected_result("")
    assert query_context_node(html.replace('error', 'warning'), 'para[5][@type="warning"]') == expected_result("""
    <para type="warning">
     selected
    </para>""")
示例#51
0
def test_selects_the_para_element_children_of_the_context_node():
    html = """
    <context>
        <para>selected</para>
        <not-para>not selected</not-para>
        <para>also selected</para>
    </context>"""
    assert query_context_node(html, 'para') == expected_result("""
    <para>
     selected
    </para>
    <para>
     also selected
    </para>""")
示例#52
0
def test_union_operator_produces_node_set_sorted_in_document_order():
    html_body = """
    <div>one</div>
    <p>two</p>
    <div>three</div>"""
    assert query_html_doc(html_body, '//p | //div') == expected_result("""
    <div>
     one
    </div>
    <p>
     two
    </p>
    <div>
     three
    </div>""")
def test_selects_all_para_children_of_the_context_node_that_have_a_type_attribute_with_value_warning():
    html = """
    <context>
        <para>not selected</para>
        <para type="warning">selected</para>
        <para type="error">not selected</para>
        <para type="warning">also selected</para>
    </context>"""
    assert query_context_node(html, 'para[@type="warning"]') == expected_result("""
    <para type="warning">
     selected
    </para>
    <para type="warning">
     also selected
    </para>""")
def test_selects_all_element_children_of_the_context_node():
    html = """
    <context>
        <!-- comment -->
        <element>selected</element>
        non-selected text
        <para>also selected</para>
    </context>"""
    assert query_context_node(html, '*') == expected_result("""
    <element>
     selected
    </element>
    <para>
     also selected
    </para>""")
def test_selects_all_para_grandchildren_of_the_context_node():
    html = """
    <context>
        <para>
            not selected
            <para>selected</para>
            <para>also selected</para>
        </para>
    </context>"""
    assert query_context_node(html, '*/para') == expected_result("""
    <para>
     selected
    </para>
    <para>
     also selected
    </para>""")
示例#56
0
def test_variables_before_for_have_global_scope_and_within_for_have_iteration_scope():
    query = """
    let $x := 2
    let $z := $x
    for $_ in (1, $x)
    let $y := $_
    let $x := $_
    return ($x, $z, $x = $y)"""

    assert query_html_doc('', ' '.join(query.split('\n'))) == expected_result("""
    1
    2
    true
    2
    2
    true""")
def test_selects_the_second_section_of_the_fifth_chapter_of_the_doc():
    html = """
    <doc>
        <chapter>one</chapter>
        <chapter>two</chapter>
        <chapter>three</chapter>
        <chapter>four</chapter>
        <chapter>
            <section>five point one</section>
            <section>five point two</section>
        </chapter>
    </doc>"""
    assert query_context_node(html, '/doc/chapter[5]/section[2]') == expected_result("""
    <section>
     five point two
    </section>""")