示例#1
0
def test_relational_comparison_between_a_node_set_and_a_string():
    html_body = """
    <div>9.9</div>
    <div>10.1</div>"""
    assert query_html_doc(html_body, '//div > "10"') == 'true'
    assert query_html_doc(html_body, '"10.1" < //div') == 'false'
    assert query_html_doc(html_body, '//div <= "9.9"') == 'true'
示例#2
0
def test_string_value_of_a_sequence_is_concatenation_of_all_items_unlike_node_set():
    html_body = """
    <p>one</p>
    <p>two</p>"""

    assert query_html_doc(html_body, 'let $_ := //p/text() return string($_)') == 'one'
    assert query_html_doc(html_body, 'let $_ := ("one", "two") return string($_)') == 'onetwo'
示例#3
0
def test_relational_comparison_between_a_node_set_and_a_number():
    html_body = """
    <div>9.9</div>
    <div>10.1</div>"""
    assert query_html_doc(html_body, '//div > 10') == 'true'
    assert query_html_doc(html_body, '10.1 < //div') == 'false'
    assert query_html_doc(html_body, '//div <= 9.9') == 'true'
示例#4
0
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause():
    assert query_html_doc('', 'for $x in (1 to 2) return $x, "!"') == expected_result("""
    1
    2
    !""")
    assert query_html_doc('', 'sum(for $x in //span return $x, "zero")') == 'zero'
    assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
示例#5
0
def test_use_of_escapes_for_forbidden_characters_in_regex_replace_patterns():
    assert query_html_doc('',
                          r"""`it's ${rr:\w{3&#125;:dog::"a cat's"} life`"""
                          ) == "it's a dog's life"
    assert query_html_doc(
        '', r'`${rr:&#58; ::: let $x := "re: " return concat($x, "search")}`'
    ) == 'research'
示例#6
0
def test_ancestor_axis_selects_all_matching_ancestors():
    html_body = """
    <div>
        <section>
            <div>
                <p>text</p>
            </div>
        </section>
    </div>"""
    expected = expected_result("""
    <div>
     <section>
      <div>
       <p>
        text
       </p>
      </div>
     </section>
    </div>
    <div>
     <p>
      text
     </p>
    </div>""")

    assert query_html_doc(html_body, '//p/ancestor::div') == expected
    assert query_html_doc(html_body, '//p/^::div') == expected
示例#7
0
def test_sum_function_sums_number_interpretation_of_items_in_sequence():
    html_body = """
    <span>30</span>
    <div value="10.42"></div>
    <span>2</span>"""

    assert query_html_doc(html_body, 'sum(//span)') == '32'
    assert query_html_doc(html_body, 'sum((//span, //div/@value))') == '42.42'
示例#8
0
def test_filters_are_applied_to_all_items_in_sequence_when_input_is_not_atomic(
):
    html_body = """
    <p>Hello, world!</p>
    <p>Goodbye, world!</p>"""
    assert query_html_doc(html_body, '`${tru:8:://p}`') == 'Hello,Goodbye,'
    assert query_html_doc(
        html_body, '`${rr:world:test:://p}`') == 'Hello, test!Goodbye, test!'
示例#9
0
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec():
    html_body = """
    <p>moe</p>
    <p>larry</p>
    <p>curly</p>"""

    assert query_html_doc(html_body, '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry')
    assert query_html_doc(html_body, '//p[matches(text(), ".URL.", "i")]/text()') == expected_result('curly')
示例#10
0
def test_class_function_returns_true_when_element_has_name_in_class_attribute():
    html_body = """
    <p class="not selected">not selected</p>
    <p class="foo bar">expected</p>"""

    assert query_html_doc(html_body, 'class(//p[1], "foo")') == 'false'
    assert query_html_doc(html_body, 'class(//p[2], "foo")') == 'true'
    assert query_html_doc(html_body, '//p[class("bar")]/text()') == 'expected'
示例#11
0
def test_class_function_returns_true_when_element_has_name_in_class_attribute(
):
    html_body = """
    <p class="not selected">not selected</p>
    <p class="foo bar">expected</p>"""

    assert query_html_doc(html_body, 'class(//p[1], "foo")') == 'false'
    assert query_html_doc(html_body, 'class(//p[2], "foo")') == 'true'
    assert query_html_doc(html_body, '//p[class("bar")]/text()') == 'expected'
def test_text_content_normalization_is_applied_to_attribute_values_in_hash_constructor():
    preserved = u'\u00a0non\u00a0breaking\u00a0spaces '
    html_body = u'<p>{0}</p>'.format(preserved)

    actual = json.loads(query_html_doc(html_body, 'hash {para: //p/text()}'))
    assert actual['para'] == 'non breaking spaces'

    actual = json.loads(query_html_doc(html_body, 'hash {para: //p/text()}', preserve_space=True))
    assert actual['para'] == preserved
示例#13
0
def test_comma_as_sequence_cat_operator_does_not_bind_at_end_of_return_clause(
):
    assert query_html_doc(
        '', 'for $x in (1 to 2) return $x, "!"') == expected_result("""
    1
    2
    !""")
    assert query_html_doc('',
                          'sum(for $x in //span return $x, "zero")') == 'zero'
    assert query_html_doc('', 'sum(//span -> $_, "zero")') == 'zero'
示例#14
0
def test_escapes_work_in_string_literals():
    assert query_html_doc('', '"foo&#10;bar"') == expected_result("""
    foo
    bar""")
    assert query_html_doc('', "'foo&#10;bar'") == expected_result("""
    foo
    bar""")
    assert query_html_doc('', '`foo&#10;bar`') == expected_result("""
    foo
    bar""")
示例#15
0
def test_relational_comparison_involving_two_node_sets():
    html_body = """
    <p>9</p>
    <p>10</p>
    <div>10</div>
    <div>11</div>"""

    assert query_html_doc(html_body, '//p > //div') == 'false'
    assert query_html_doc(html_body, '//p >= //div') == 'true'
    assert query_html_doc(html_body, '//div[position()=1] <= //p') == 'true'
def test_element_constructor_accepts_numbers_and_booleans():
    assert query_html_doc('', 'element test { 98.6 }') == expected_result("""
    <test>
     98.6
    </test>""")

    assert query_html_doc('', 'element test { false() }') == expected_result("""
    <test>
     false
    </test>""")
示例#17
0
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec():
    assert query_html_doc('', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result("""
    Moe
    Larry
    Curly""")
    assert query_html_doc('', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result("""
    Ha
    ta
    patience""")
    assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
示例#18
0
def test_string_value_of_a_sequence_is_concatenation_of_all_items_unlike_node_set(
):
    html_body = """
    <p>one</p>
    <p>two</p>"""

    assert query_html_doc(html_body,
                          'let $_ := //p/text() return string($_)') == 'one'
    assert query_html_doc(
        html_body, 'let $_ := ("one", "two") return string($_)') == 'onetwo'
示例#19
0
def test_interpretation_of_div_and_mod_and_other_arithmetic_operators_as_operators_vs_node_tests():
    div = """
    <div>
    </div>"""
    mod = """
    <mod>
    </mod>"""

    assert query_html_doc(div, 'div', wrap_body=False) == expected_result(div)
    assert query_html_doc(mod, '/ mod', wrap_body=False) == expected_result(mod)
    assert query_html_doc(div, 'boolean(div)', wrap_body=False) == 'true'
    assert query_html_doc(mod, 'boolean(div)', wrap_body=False) == 'false'

    div_with_text = '<div>bar</div>'
    query_with_div_after_comma = 'starts-with(concat("foo ", div), "foo ba")'
    assert query_html_doc(div_with_text, query_with_div_after_comma, wrap_body=False) == 'true'

    assert query_html_doc(div, 'number("84")div2') == '42'
    assert query_html_doc(div, 'let $x := 4 return $x div 2') == '2'

    rect = '<rect id="foo" height="2" width="10"/>'
    assert query_html_doc(rect, 'let $r := //rect return $r/@height * $r/@width') == '20'

    num_in_text = """
    <span>not selected</span>
    <span id="foo">42</span>"""
    assert query_html_doc(num_in_text, '//span[@id="foo"] mod 10') == '2'
示例#20
0
def test_if_then_else_works_with_node_sets():
    html_body = """
    <p>eekaboo</p>"""
    assert query_html_doc(html_body, 'if (//p) then //p else 1 to 3') == expected_result("""
    <p>
     eekaboo
    </p>""")
    assert query_html_doc(html_body, 'if (//div) then //p else 1 to 3') == expected_result("""
    1
    2
    3""")
示例#21
0
def test_preceding_sibling_axis_works_with_name_test():
    html_body = """
    <p>foo</p>
    <div></div>
    <p>bar</p>"""
    expected = expected_result("""
    <p>
     foo
    </p>""")

    assert query_html_doc(html_body, '//div/preceding-sibling::p') == expected
    assert query_html_doc(html_body, '//div/<::p') == expected
示例#22
0
def test_tokenize_function_breaks_up_strings_as_per_xpath_30_functions_spec():
    assert query_html_doc(
        '', 'tokenize("Moe:Larry:..Curly", ":\.*")') == expected_result("""
    Moe
    Larry
    Curly""")
    assert query_html_doc(
        '', 'tokenize("HaxtaXpatience", "x", "i")') == expected_result("""
    Ha
    ta
    patience""")
    assert query_html_doc('', 'count(tokenize("haxtaxstax", "x"))') == '4'
示例#23
0
def test_following_sibling_axis_works_with_node_test():
    html_body = """
    <div>
     foo
     <p></p>
     bar
    </div>"""
    assert query_html_doc(html_body, '//p/following-sibling::text()') == expected_result('bar')
    assert query_html_doc('<h1></h1><div></div><p>foo</p>', '//div/following-sibling::*') == expected_result("""
    <p>
     foo
    </p>""")
示例#24
0
def test_if_then_else_works_with_node_sets():
    html_body = """
    <p>eekaboo</p>"""
    assert query_html_doc(
        html_body, 'if (//p) then //p else 1 to 3') == expected_result("""
    <p>
     eekaboo
    </p>""")
    assert query_html_doc(
        html_body, 'if (//div) then //p else 1 to 3') == expected_result("""
    1
    2
    3""")
示例#25
0
def test_round_function_follows_xpath_1_rules_for_positive_numbers_but_python_rules_for_negative_ones(
):
    """
    Not fooling with positive or negative infinity or zero, nor the numeric type business in the XPath 3.0 functions
    spec.. Also not, as the test name attests, respecting XPath 1 rules for negative numbers, as Python rounds away
    from zero and I anticipate some tiresome drudgery for no particular benefit (again, HQuery is not intended as an
    execution target for existing XPath code).
    """
    assert query_html_doc('', 'round(5.49)') == '5'
    assert query_html_doc('', 'round(5.5)') == '6'
    assert query_html_doc('', 'round(1 div 0)') == 'NaN'
    assert query_html_doc('', 'round(-5.5)') == '-6'
    assert query_html_doc('', 'round(-5.49)') == '-5'
示例#26
0
def test_various_functions_use_context_node_when_no_argument_passed():
    html_body = """
    <p>first</p>
    <p>foo   bar</p>
    <p>last</p>"""

    assert query_html_doc(
        html_body,
        '//p[string() = "first"]/text()') == expected_result('first')
    assert query_html_doc(html_body, '//p[normalize-space() = "foo bar"]/text()', preserve_space=True) == \
           expected_result('foo   bar')
    assert query_html_doc(
        html_body,
        '//p[string-length() = 4]/text()') == expected_result('last')
示例#27
0
def test_matches_function_performs_regex_matching_as_per_xpath_30_functions_spec(
):
    html_body = """
    <p>moe</p>
    <p>larry</p>
    <p>curly</p>"""

    assert query_html_doc(
        html_body,
        '//p[matches(text(), "^l[ary]+")]/text()') == expected_result('larry')
    assert query_html_doc(
        html_body,
        '//p[matches(text(), ".URL.", "i")]/text()') == expected_result(
            'curly')
示例#28
0
def test_css_class_axis_finds_elements_based_on_their_css_classes():
    html_body = """
    <p class="foo">foo</p>
    <p class="foo bar">foo bar</p>
    <p class="bar">bar</p>"""
    expected = expected_result("""
    <p class="foo bar">
     foo bar
    </p>
    <p class="bar">
     bar
    </p>""")

    assert query_html_doc(html_body, '//class::bar') == expected
    assert query_html_doc(html_body, '//.::bar') == expected
示例#29
0
def test_attribute_axis_in_full_and_abbreviated_form_selects_named_attributes_or_all_attributes():
    html_body = """
    <div id="one"></div>
    <div id="two" class="three"></div>"""
    expected_ids_result = expected_result('''
    id="one"
    id="two"''')
    expected_all_result = expected_result('''
    id="one"
    class="three"
    id="two"''')
    assert query_html_doc(html_body, '//div/attribute::id') == expected_ids_result
    assert query_html_doc(html_body, '//div/@id') == expected_ids_result
    assert query_html_doc(html_body, '//attribute::*') == expected_all_result
    assert query_html_doc(html_body, '//@*') == expected_all_result
示例#30
0
def test_ancestor_axis_produces_all_ancestors_and_only_ancestors():
    html_body = """
    <html>
        <body>
            <!-- comment -->
            <h1></h1>
            <div></div>
        </body>
    </html>"""
    assert query_html_doc(html_body, '//div/ancestor::*', wrap_body=False) == expected_result("""
    <html>
     <body>
      <!-- comment -->
      <h1>
      </h1>
      <div>
      </div>
     </body>
    </html>
    <body>
     <!-- comment -->
     <h1>
     </h1>
     <div>
     </div>
    </body>""")
def test_hash_keys_can_be_used_to_define_attributes_in_a_constructed_hash():
    actual = json.loads(query_html_doc('', 'hash {foo: "bar", moe: "larry"}'))

    assert 'foo' in actual
    assert actual['foo'] == 'bar'
    assert 'moe' in actual
    assert actual['moe'] == 'larry'
def test_element_constructor_accepts_attributes_from_original_document_including_multi_values_like_classes():
    html_body = """
    <p class="one two" three="four">
        contents
    </p>"""

    assert query_html_doc(html_body, 'element test { //p/@* }') == expected_result("""
    <test class="one two" three="four">
    </test>""")

    assert query_html_doc(html_body, 'element test { //p/@three, //p }') == expected_result("""
    <test three="four">
     <p class="one two" three="four">
      contents
     </p>
    </test>""")
def test_non_string_types_survive_conversion_to_json():
    actual = json.loads(query_html_doc('', 'hash { integer: 1, float: 1.1, boolean: true() }'))

    assert all(name in actual for name in ('integer', 'float', 'boolean'))
    assert isinstance(actual['integer'], int)
    assert isinstance(actual['float'], float)
    assert isinstance(actual['boolean'], bool)
示例#34
0
def test_selects_the_forty_second_figure_element_in_the_document():
    html_body = ''.join('<figure>{0}</figure>'.format(n) for n in range(1, 43))
    assert query_html_doc(
        html_body, '/descendant::figure[position()=42]') == expected_result("""
    <figure>
     42
    </figure>""")
def test_hash_constructor_turns_tags_into_tag_name_keys_with_tag_content_values():
    html_body = """
    <p>foo</p>
    <div>bar</div>"""
    actual = json.loads(query_html_doc(html_body, 'hash { /html/body/* }'))
    assert actual['p'] == 'foo'
    assert actual['div'] == 'bar'
示例#36
0
def test_abbreviated_context_node_works_in_predicate():
    html_body = """
    <div>
        <p>one</p>
    </div>
    <p>two</p>
    <div>
        three
    </div>
    <div>
        <p>four</p>
    </div>
    """
    actual = query_html_doc(html_body, '/html/body/node()[./p]')
    assert actual == expected_result("""
    <div>
     <p>
      one
     </p>
    </div>
    <div>
     <p>
      four
     </p>
    </div>""")
示例#37
0
def test_position_function_in_second_predicate_applies_to_results_from_first_predicate(
):
    html_body = """
    <table>
        <tr class="select-me">
            <td>one</td>
            <td>two</td>
        </tr>
        <tr class="forget-me">
            <td>uno</td>
            <td>dos</td>
        </tr>
        <tr class="select-me">
            <td>ichi</td>
            <td>ni</td>
        </tr>
    </table>"""
    assert query_html_doc(
        html_body,
        '//td[../@class="select-me"][position()=1]') == expected_result("""
    <td>
     one
    </td>
    <td>
     ichi
    </td>""")
示例#38
0
def test_double_slash_works_within_path():
    html_body = """
    <section>
        <p>moe</p>
        <div>
            <div>
                <p>larry</p>
            </div>
            <p>curly</p>
        </div>
    </section>
    <p>joe besser</p>
    <section>
        <p>shemp</p>
    </section>"""
    assert query_html_doc(html_body, '//section//p') == expected_result("""
    <p>
     moe
    </p>
    <p>
     larry
    </p>
    <p>
     curly
    </p>
    <p>
     shemp
    </p>""")
示例#39
0
def test_position_function_in_predicate_applies_to_current_step_only():
    html_body = """
    <table>
        <tr class="select-me">
            <td>one</td>
            <td>two</td>
        </tr>
        <tr class="forget-me">
            <td>uno</td>
            <td>dos</td>
        </tr>
        <tr class="select-me">
            <td>ichi</td>
            <td>ni</td>
        </tr>
    </table>"""
    assert query_html_doc(
        html_body,
        '//tr[@class="select-me"]/td[position()=2]') == expected_result("""
    <td>
     two
    </td>
    <td>
     ni
    </td>""")
示例#40
0
def test_join_filter_joins_string_values_from_node_set():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""
    assert query_html_doc(html_body,
                          '`${j:,://p}`') == expected_result('one,two,three')
示例#41
0
def test_even_and_odd_functions_select_the_appropriate_elements_based_on_position():
    html_body = """
    <p>You</p>
    <p>I</p>
    <p>are</p>
    <p>am</p>
    <p>odd.</p>
    <p>even.</p>"""

    assert query_html_doc(html_body, '//p[even()]/text()') == expected_result("""
    I
    am
    even.""")
    assert query_html_doc(html_body, '//p[odd()]/text()') == expected_result("""
    You
    are
    odd.""")
示例#42
0
def test_predicate_can_be_applied_to_variable_containing_node_set():
    html_body = """
    <p>not selected</p>
    <p id="foo">selected</p>"""
    assert query_html_doc(html_body, 'let $x := //p return $x[@id="foo"]') == expected_result("""
    <p id="foo">
     selected
    </p>""")
示例#43
0
def test_matches_function_supports_a_subset_of_xpath_30_flag_values():
    html_body = """
    <p>first</p>
    <p>second one</p>
    <p>
        multiple
        lines
        of
        text
    </p>"""
    multiline_pattern = r'.+multiple.+text.+'

    assert query_html_doc(html_body, r'//p[matches(text(), "\w+RST", "i")]/text()') == expected_result('first')
    assert query_html_doc(html_body, r'//p[matches(text(), ".+lines.+text")]', preserve_space=True) == ''
    assert re.match(
        multiline_pattern,
        query_html_doc(html_body, r'//p[matches(text(), ".+lines.+text", "s")]', preserve_space=True),
        re.S
    )
    assert query_html_doc(html_body, r'//p[matches(text(), "^ *lines$")]', preserve_space=True) == ''
    assert re.match(
        multiline_pattern,
        query_html_doc(html_body, r'//p[matches(text(), "^\s*lines$", "m")]', preserve_space=True),
        re.S
    )
    assert query_html_doc(html_body, r'//p[matches(text(), "sec  ond\sone")]/text()') == ''
    assert query_html_doc(html_body, r'//p[matches(text(), "sec  ond\sone", "x")]/text()') == 'second one'
示例#44
0
def test_union_decomposition_with_parentheses():
    html_body = """
    <h1>heading</h1>
    <p>content</p>
    <h1>another heading</h1>"""
    assert query_html_doc(html_body, '(//h1 | //p) => ("fizz" | "buzz")') == expected_result("""
    fizz
    buzz
    fizz""")
def test_element_constructor_can_be_nested():
    assert query_html_doc('', 'element moe {element larry {}, element curly {"Hey, Moe!"}}') == expected_result("""
    <moe>
     <larry>
     </larry>
     <curly>
      Hey, Moe!
     </curly>
    </moe>""")
示例#46
0
def test_absolute_location_path_should_find_multiple_grandchildren():
    actual = query_html_doc('<div>one</div><p>not a div</p><div>two</div>', '/html/body/div')
    assert actual == expected_result("""
    <div>
     one
    </div>
    <div>
     two
    </div>""")
示例#47
0
def test_union_decomposition_naked():
    html_body = """
    <h1>heading</h1>
    <p>content</p>
    <h1>another heading</h1>"""
    assert query_html_doc(html_body, '(//h1 | //p) => `h1 $_` | `p $_`') == expected_result("""
    h1 heading
    p content
    h1 another heading""")
示例#48
0
def test_iteration_using_for():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""
    assert query_html_doc(html_body, 'for $x in //p return $x/text()') == expected_result("""
    one
    two
    three""")
示例#49
0
def test_abbreviated_flowr_provides_expected_iteration_variable_in_value_clause():
    html_body = """
    <p>one</p>
    <p>two</p>
    <p>three</p>"""

    assert query_html_doc(html_body, '//p -> $_/text()') == expected_result("""
    one
    two
    three""")
def test_hash_constructor_filters_can_be_combined():
    html_body = """
    <p>20</p>
    <div>20</div>
    <h1>20.20</h1>"""

    actual = json.loads(query_html_doc(html_body, 'hash {a:p,h1:n:div,h1:} { /html/body/* }'))
    assert isinstance(actual['p'], list)
    assert isinstance(actual['h1'], list)
    assert actual['p'][0] == '20'
    assert actual['div'] == 20
    assert actual['h1'][0] == 20.2

    actual = json.loads(query_html_doc(html_body, 'hash {n:div,h1:a:p,h1:} { /html/body/* }'))
    assert isinstance(actual['p'], list)
    assert isinstance(actual['h1'], list)
    assert actual['p'][0] == '20'
    assert actual['div'] == 20
    assert actual['h1'][0] == 20.2
def test_hash_constructor_array_filter_causes_matching_elements_to_be_put_in_an_array():
    html_body = """
    <h1>zero</h1>
    <p>one</p>"""
    actual = json.loads(query_html_doc(html_body, 'hash {a:h1:} { /html/body/* }'))

    assert actual['p'] == 'one'
    assert isinstance(actual['h1'], list)
    assert len(actual['h1']) == 1
    assert actual['h1'][0] == 'zero'
示例#52
0
def test_rooted_location_paths_work_with_both_kinds_of_slash():
    html_body = """
    <section>
        <div>
            <div>foo</div>
        </div>
    </section>
    <section>
        <div>
            <div>bar</div>
        </div>
    </section>"""

    assert query_html_doc(html_body, 'for $x in //section return $x/div') == expected_result("""
    <div>
     <div>
      foo
     </div>
    </div>
    <div>
     <div>
      bar
     </div>
    </div>""")

    assert query_html_doc(html_body, 'for $x in //section return $x//div') == expected_result("""
    <div>
     <div>
      foo
     </div>
    </div>
    <div>
     foo
    </div>
    <div>
     <div>
      bar
     </div>
    </div>
    <div>
     bar
    </div>""")
def test_hash_constructor_number_filter_causes_contents_of_matching_elements_to_be_interpreted_as_numbers():
    html_body = """
    <p>20</p>
    <div>20</div>
    <h1>20.20</h1>"""

    actual = json.loads(query_html_doc(html_body, 'hash {n:div,h1:} { /html/body/* }'))

    assert actual['p'] == '20'
    assert actual['div'] == 20
    assert actual['h1'] == 20.2
def test_hash_constructor_coalesces_like_elements_into_an_array_by_default():
    html_body = """
    <p>one</p>
    <div>two</div>
    <p>three</p>"""

    actual = json.loads(query_html_doc(html_body, 'hash { /html/body/* }'))
    assert isinstance(actual['p'], list)
    assert len(actual['p']) == 2
    assert actual['p'][1] == 'three'
    assert actual['div'] == 'two'