def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec(): html_body = """ <p>moe</p> <p>larry</p> <p>curly</p>""" assert query_html_doc(html_body, '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry') assert query_html_doc(html_body, '//p[matches(text(), ".URL.", "i")]/text()') == expected_result('curly')
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec(): assert query_html_doc('', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result(""" Moe Larry Curly""") assert query_html_doc('', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result(""" Ha ta patience""") assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
def test_escapes_work_in_string_literals(): assert query_html_doc('', '"foo bar"') == expected_result(""" foo bar""") assert query_html_doc('', "'foo bar'") == expected_result(""" foo bar""") assert query_html_doc('', '`foo bar`') == expected_result(""" foo bar""")
def test_element_constructor_accepts_numbers_and_booleans(): assert query_html_doc('', 'element test { 98.6 }') == expected_result(""" <test> 98.6 </test>""") assert query_html_doc('', 'element test { false() }') == expected_result(""" <test> false </test>""")
def test_if_then_else_works_with_node_sets(): html_body = """ <p>eekaboo</p>""" assert query_html_doc(html_body, 'if (//p) then //p else 1 to 3') == expected_result(""" <p> eekaboo </p>""") assert query_html_doc(html_body, 'if (//div) then //p else 1 to 3') == expected_result(""" 1 2 3""")
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec(): assert query_html_doc( '', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result(""" Moe Larry Curly""") assert query_html_doc( '', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result(""" Ha ta patience""") assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
def test_if_then_else_works_with_node_sets(): html_body = """ <p>eekaboo</p>""" assert query_html_doc( html_body, 'if (//p) then //p else 1 to 3') == expected_result(""" <p> eekaboo </p>""") assert query_html_doc( html_body, 'if (//div) then //p else 1 to 3') == expected_result(""" 1 2 3""")
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec( ): html_body = """ <p>moe</p> <p>larry</p> <p>curly</p>""" assert query_html_doc( html_body, '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry') assert query_html_doc( html_body, '//p[matches(text(), ".URL.", "i")]/text()') == expected_result( 'curly')
def test_element_constructor_accepts_attributes_from_original_document_including_multi_values_like_classes(): html_body = """ <p class="one two" three="four"> contents </p>""" assert query_html_doc(html_body, 'element test { //p/@* }') == expected_result(""" <test class="one two" three="four"> </test>""") assert query_html_doc(html_body, 'element test { //p/@three, //p }') == expected_result(""" <test three="four"> <p class="one two" three="four"> contents </p> </test>""")
def test_selects_the_chapter_children_of_the_context_node_that_have_one_or_more_title_children_with_string_value_equal_to_Introduction( ): html = """ <context> <chapter> <title>Introduction</title> </chapter> <chapter>not selected</chapter> <chapter> <title>Author's Note</title> </chapter> <chapter> <title>Introduction</title> <content>Hello, I'm chapter.</content> </chapter> </context>""" assert query_context_node( html, 'chapter[title="Introduction"]') == expected_result(""" <chapter> <title> Introduction </title> </chapter> <chapter> <title> Introduction </title> <content> Hello, I'm chapter. </content> </chapter>""")
def test_selects_the_chapter_children_of_the_context_node_that_have_one_or_more_title_children( ): html = """ <context> <chapter> <title>selected</title> </chapter> <chapter> <not-title></not-title> </chapter> <chapter> <title>also selected</title> </chapter> </context>""" assert query_context_node(html, 'chapter[title]') == expected_result(""" <chapter> <title> selected </title> </chapter> <chapter> <title> also selected </title> </chapter>""")
def test_selects_the_lang_attribute_of_the_parent_of_the_context_node(): html = """ <root lang="English"> <context></context> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '../@lang') == expected_result('lang="English"')
def test_selects_the_para_element_descendants_of_the_context_node(): html = """ <para> <context> <para>selected</para> <not-para>not selected</not-para> <para> <para>also selected</para> </para> </context> </para>""" soup = make_soup(html) assert query_context_node(soup.para.context, './/para') == expected_result(""" <para> selected </para> <para> <para> also selected </para> </para> <para> also selected </para>""")
def test_selects_all_the_item_elements_in_the_same_document_as_the_context_node_that_have_an_olist_parent( ): html = """ <root> <olist>no items</olist> <item>not selected</item> <context></context> <olist> <item>first</item> </olist> <item> <olist> <item>second</item> <olist> </item> </root>""" soup = make_soup(html) assert query_context_node(soup.root.context, '//olist/item') == expected_result(""" <item> first </item> <item> second </item>""")
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause(): assert query_html_doc('', 'for $x in (1 to 2) return $x, "!"') == expected_result(""" 1 2 !""") assert query_html_doc('', 'sum(for $x in //span return $x, "zero")') == 'zero' assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
def test_selects_the_chapter_children_of_the_context_node_that_have_one_or_more_title_children_with_string_value_equal_to_Introduction(): html = """ <context> <chapter> <title>Introduction</title> </chapter> <chapter>not selected</chapter> <chapter> <title>Author's Note</title> </chapter> <chapter> <title>Introduction</title> <content>Hello, I'm chapter.</content> </chapter> </context>""" assert query_context_node(html, 'chapter[title="Introduction"]') == expected_result(""" <chapter> <title> Introduction </title> </chapter> <chapter> <title> Introduction </title> <content> Hello, I'm chapter. </content> </chapter>""")
def test_join_filter_joins_string_values_from_node_set(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, '`${j:,://p}`') == expected_result('one,two,three')
def test_selects_the_fifth_para_child_of_the_context_node_if_that_child_has_a_type_attribute_with_value_warning( ): html = """ <context> <para>not selected</para> <para>not selected</para> <para>not selected</para> <para>not selected</para> <para type="error">selected</para> </context>""" assert query_context_node( html, 'para[5][@type="warning"]') == expected_result("") assert query_context_node(html.replace( 'error', 'warning'), 'para[5][@type="warning"]') == expected_result(""" <para type="warning"> selected </para>""")
def test_even_and_odd_functions_select_the_appropriate_elements_based_on_position(): html_body = """ <p>You</p> <p>I</p> <p>are</p> <p>am</p> <p>odd.</p> <p>even.</p>""" assert query_html_doc(html_body, '//p[even()]/text()') == expected_result(""" I am even.""") assert query_html_doc(html_body, '//p[odd()]/text()') == expected_result(""" You are odd.""")
def test_element_node_becomes_normalized_text_contents_in_interpolated_string( ): html_body = """ <p> foo bar </p>""" assert query_html_doc(html_body, '`-->${//p}<--`') == expected_result('-->foo bar<--')
def test_matches_function_extends_to_using_context_node_when_passed_no_input_string( ): html_body = """ <p>bar</p> <p>foo</p>""" assert query_html_doc( html_body, '//p[matches("^f.+")]/text()') == expected_result('foo')
def test_selects_the_context_node(): html = """ <context> selected </context>""" assert query_context_node(html, '.') == expected_result(""" <context> selected </context>""")
def test_tolerates_latin_characters_in_comments(capsys, mocker): mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(expression='//comment()') mocker.patch('sys.stdin.read').return_value = wrap_html_body(u""" <!-- sacr\xe9 bleu! -->""") main() actual, _ = capture_console_output(capsys) assert actual == expected_result(u'<!-- sacr\xe9 bleu! -->')
def test_iteration_using_for(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, 'for $x in //p return $x/text()') == expected_result(""" one two three""")
def test_union_decomposition_with_parentheses(): html_body = """ <h1>heading</h1> <p>content</p> <h1>another heading</h1>""" assert query_html_doc(html_body, '(//h1 | //p) => ("fizz" | "buzz")') == expected_result(""" fizz buzz fizz""")
def test_union_decomposition_naked(): html_body = """ <h1>heading</h1> <p>content</p> <h1>another heading</h1>""" assert query_html_doc(html_body, '(//h1 | //p) => `h1 $_` | `p $_`') == expected_result(""" h1 heading p content h1 another heading""")
def test_rooted_location_paths_work_with_both_kinds_of_slash(): html_body = """ <section> <div> <div>foo</div> </div> </section> <section> <div> <div>bar</div> </div> </section>""" assert query_html_doc( html_body, 'for $x in //section return $x/div') == expected_result(""" <div> <div> foo </div> </div> <div> <div> bar </div> </div>""") assert query_html_doc( html_body, 'for $x in //section return $x//div') == expected_result(""" <div> <div> foo </div> </div> <div> foo </div> <div> <div> bar </div> </div> <div> bar </div>""")
def test_iteration_using_for(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc( html_body, 'for $x in //p return $x/text()') == expected_result(""" one two three""")
def test_abbreviated_flowr_provides_expected_iteration_variable_in_value_clause(): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, '//p -> $_/text()') == expected_result(""" one two three""")
def test_union_decomposition_naked(): html_body = """ <h1>heading</h1> <p>content</p> <h1>another heading</h1>""" assert query_html_doc( html_body, '(//h1 | //p) => `h1 $_` | `p $_`') == expected_result(""" h1 heading p content h1 another heading""")
def test_selects_the_first_para_child_of_the_context_node(): html = """ <context> <para>selected</para> <para>not selected</para> </context>""" assert query_context_node(html, 'para[1]') == expected_result(""" <para> selected </para>""")
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause( ): assert query_html_doc( '', 'for $x in (1 to 2) return $x, "!"') == expected_result(""" 1 2 !""") assert query_html_doc('', 'sum(for $x in //span return $x, "zero")') == 'zero' assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
def test_union_decomposition_with_parentheses(): html_body = """ <h1>heading</h1> <p>content</p> <h1>another heading</h1>""" assert query_html_doc( html_body, '(//h1 | //p) => ("fizz" | "buzz")') == expected_result(""" fizz buzz fizz""")
def test_tolerates_latin_characters_in_attribute_contents(capsys, mocker): mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(expression='//div/@role') mocker.patch('sys.stdin.read').return_value = wrap_html_body(u""" <div role="prim\xe4r"> </div>""") main() actual, _ = capture_console_output(capsys) assert actual == expected_result(u'role="prim\xe4r"')
def test_tolerates_latin_characters_in_comments(capsys, mocker): mocker.patch('hq.hq.docopt').return_value = simulate_args_dict( expression='//comment()') mocker.patch('sys.stdin.read').return_value = wrap_html_body(u""" <!-- sacr\xe9 bleu! -->""") main() actual, _ = capture_console_output(capsys) assert actual == expected_result(u'<!-- sacr\xe9 bleu! -->')
def test_rooted_location_paths_work_with_both_kinds_of_slash(): html_body = """ <section> <div> <div>foo</div> </div> </section> <section> <div> <div>bar</div> </div> </section>""" assert query_html_doc(html_body, 'for $x in //section return $x/div') == expected_result(""" <div> <div> foo </div> </div> <div> <div> bar </div> </div>""") assert query_html_doc(html_body, 'for $x in //section return $x//div') == expected_result(""" <div> <div> foo </div> </div> <div> foo </div> <div> <div> bar </div> </div> <div> bar </div>""")
def test_tolerates_latin_characters_in_attribute_contents(capsys, mocker): mocker.patch('hq.hq.docopt').return_value = simulate_args_dict( expression='//div/@role') mocker.patch('sys.stdin.read').return_value = wrap_html_body(u""" <div role="prim\xe4r"> </div>""") main() actual, _ = capture_console_output(capsys) assert actual == expected_result(u'role="prim\xe4r"')
def test_abbreviated_flowr_provides_expected_iteration_variable_in_value_clause( ): html_body = """ <p>one</p> <p>two</p> <p>three</p>""" assert query_html_doc(html_body, '//p -> $_/text()') == expected_result(""" one two three""")
def test_even_and_odd_functions_select_the_appropriate_elements_based_on_position( ): html_body = """ <p>You</p> <p>I</p> <p>are</p> <p>am</p> <p>odd.</p> <p>even.</p>""" assert query_html_doc(html_body, '//p[even()]/text()') == expected_result(""" I am even.""") assert query_html_doc(html_body, '//p[odd()]/text()') == expected_result(""" You are odd.""")
def test_selects_the_parent_of_the_context_node(): html = """ <root> <context></context> </root>""" soup = make_soup(html) assert query_context_node(html, '..') == expected_result(""" <root> <context> </context> </root>""")
def test_union_decomposition_applies_first_matching_clause(): html_body = """ <div>div1</div> <p>p1</p> <div> <p>p2</p> </div>""" query = '(//p | /html/body/div | /html/body//*) => "one" | "two" | "three"' assert query_html_doc(html_body, query) == expected_result(""" two one two one""")
def test_selects_the_para_element_children_of_the_context_node(): html = """ <context> <para>selected</para> <not-para>not selected</not-para> <para>also selected</para> </context>""" assert query_context_node(html, 'para') == expected_result(""" <para> selected </para> <para> also selected </para>""")
def test_selects_all_the_employee_children_of_the_context_node_that_have_both_a_secretary_attribute_and_an_assistant_attribute(): html = """ <context> <employee secretary="not selected"></employee> <employee assistant="" secretary="">selected</employee> <employee assistant="not selected"></employee> <employee secretary="also" assistant="selected"></employee> </context>""" assert query_context_node(html, 'employee[@secretary and @assistant]') == expected_result(""" <employee assistant="" secretary=""> selected </employee> <employee assistant="selected" secretary="also"> </employee>""")
def test_tolerates_latin_characters_in_element_contents(capsys, mocker): mocker.patch('hq.hq.docopt').return_value = simulate_args_dict(expression='//div') mocker.patch('sys.stdin.read').return_value = wrap_html_body(u""" <div> T\xeate\xa0\xe0\xa0t\xeate </div>""") main() actual, _ = capture_console_output(capsys) assert actual == expected_result(u""" <div> T\xeate\xa0\xe0\xa0t\xeate </div>""")
def test_selects_the_fifth_para_child_of_the_context_node_if_that_child_has_a_type_attribute_with_value_warning(): html = """ <context> <para>not selected</para> <para>not selected</para> <para>not selected</para> <para>not selected</para> <para type="error">selected</para> </context>""" assert query_context_node(html, 'para[5][@type="warning"]') == expected_result("") assert query_context_node(html.replace('error', 'warning'), 'para[5][@type="warning"]') == expected_result(""" <para type="warning"> selected </para>""")
def test_union_operator_produces_node_set_sorted_in_document_order(): html_body = """ <div>one</div> <p>two</p> <div>three</div>""" assert query_html_doc(html_body, '//p | //div') == expected_result(""" <div> one </div> <p> two </p> <div> three </div>""")
def test_selects_all_para_children_of_the_context_node_that_have_a_type_attribute_with_value_warning(): html = """ <context> <para>not selected</para> <para type="warning">selected</para> <para type="error">not selected</para> <para type="warning">also selected</para> </context>""" assert query_context_node(html, 'para[@type="warning"]') == expected_result(""" <para type="warning"> selected </para> <para type="warning"> also selected </para>""")
def test_selects_all_element_children_of_the_context_node(): html = """ <context> <!-- comment --> <element>selected</element> non-selected text <para>also selected</para> </context>""" assert query_context_node(html, '*') == expected_result(""" <element> selected </element> <para> also selected </para>""")
def test_selects_all_para_grandchildren_of_the_context_node(): html = """ <context> <para> not selected <para>selected</para> <para>also selected</para> </para> </context>""" assert query_context_node(html, '*/para') == expected_result(""" <para> selected </para> <para> also selected </para>""")
def test_variables_before_for_have_global_scope_and_within_for_have_iteration_scope(): query = """ let $x := 2 let $z := $x for $_ in (1, $x) let $y := $_ let $x := $_ return ($x, $z, $x = $y)""" assert query_html_doc('', ' '.join(query.split('\n'))) == expected_result(""" 1 2 true 2 2 true""")
def test_selects_the_second_section_of_the_fifth_chapter_of_the_doc(): html = """ <doc> <chapter>one</chapter> <chapter>two</chapter> <chapter>three</chapter> <chapter>four</chapter> <chapter> <section>five point one</section> <section>five point two</section> </chapter> </doc>""" assert query_context_node(html, '/doc/chapter[5]/section[2]') == expected_result(""" <section> five point two </section>""")