Пример #1
0
def test_css_considers_xml_namespace(files_path):
    document = Document("<root><xml:node/><node/></root>")
    assert document.css_select("xml|node").size == 1

    document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml")
    results = document.css_select("*[xml|id]")
    assert results.size == 1
    results = document.css_select("*[xml|lang]")
    assert results.size == 2
Пример #2
0
def test_wrapper_consistency():
    # this test is the result of an investigation that asked why
    # `test_insert_issue_in_a_more_complex_situation` failed.
    # as a result, the way how node wrapper are tracked has been refactored.
    # so this test is looking on under-the-hood-expectations for aspects of the
    # mentioned test. when maintaining this requires effort, it should rather be
    # dropped.
    def node_ids():
        return {
            "root": id(root),
            "foo": id(foo),
            "div1": id(div1),
            "div2": id(div2),
            "text": id(text),
        }

    document = Document("<root><foo><div1><div2/>text</div1></foo></root>")

    root = document.root
    foo = root.first_child
    div1 = foo.first_child
    div2 = div1.first_child
    text = div1.last_child

    original_ids = node_ids()

    div1.detach()
    foo = root.first_child
    div2 = div1.first_child
    text = div1.last_child
    assert node_ids() == original_ids

    foo.detach()
    assert node_ids() == original_ids

    root.insert_child(0, div1)
    div1 = root.first_child
    div2 = div1.first_child
    text = div1.last_child
    assert node_ids() == original_ids

    other_doc = Document(str(document))
    div1 = other_doc.css_select("div1").first
    div2 = other_doc.css_select("div2").first
    div2.detach()
    div1.insert_child(0, div2)

    div1 = document.css_select("div1").first
    div2 = document.css_select("div2").first

    div2.detach()
    div1 = root.first_child
    text = div1.first_child
    assert node_ids() == original_ids

    div1.insert_child(0, div2)
Пример #3
0
def test_quotes_in_css_selector():
    document = Document('<a href="https://super.test/123"/>')
    assert document.css_select('a[href^="https://super.test/"]').size == 1
    assert document.css_select('a[href|="https://super.test/123"]').size == 1
    assert document.css_select('a[href*="super"]').size == 1

    # TODO
    if DELB_VERSION >= (0, 4):
        assert document.css_select('a:not([href|="https"])').size == 1
        # TODO specify an `ends-with` function for XPath
        assert document.css_select('a[href$="123"]').size == 1
Пример #4
0
def test_css_select_or(files_path):
    document = Document(files_path / "tei_stevenson_treasure_island.xml")

    result = document.css_select("titleStmt title, titleStmt author")

    assert len(result) == 2
    assert {x.local_name for x in result} == {"author", "title"}
Пример #5
0
def test_css_select():
    document = Document("<root><a><b/><c/><b/></a></root>")

    results = document.css_select("a b")
    assert len(results) == 2
    assert all(x.local_name == "b" for x in results)

    document = Document(
        '<root xmlns="x" xmlns:y="y"><a><b/><y:c/><b/></a></root>')

    results = document.css_select("a b")
    assert len(results) == 2
    assert all(x.local_name == "b" for x in results)

    results = document.css_select("a y|c")
    assert len(results) == 1
    assert results[0].universal_name == "{y}c"
Пример #6
0
def test_detach_node_retains_namespace_prefixes():
    # libxml2 loses the notion if a default prefix for nodes that have been
    # removed from a parent node
    document = Document("""\
        <root xmlns="schema://default/">
            <child><grandchild/></child>
        </root>
        """)
    child = document.css_select("child").first.detach()
    assert child.css_select("grandchild").size == 1
Пример #7
0
def test_results_equality():
    document = Document(
        """\
        <root>
            <s corresp="src:tlaIBUBd4DTggLNoE2MvPgWWka2UdY">
                <w corresp="src:tlaIBUBdzQ3wWIW60TVhNy3cRxYmgg"><unclear/></w>
                <w corresp="src:tlaIBUBd7n0fy1OPU1DjVU66j2B4Qc"><unclear/></w>
            </s>
        </root>
        """
    )
    word_nodes = document.css_select("s w")
    assert word_nodes == word_nodes.filtered_by(lambda n: True)
    assert word_nodes == word_nodes.as_list()
    assert word_nodes == tuple(reversed(word_nodes.as_list()))
    assert word_nodes != 2 * word_nodes.as_list()
    assert [document.root] == document.css_select("root")
    with pytest.raises(TypeError):
        document.css_select("root") == document.root
Пример #8
0
def test_remove_elements(keep_children, preserve_text, clear_ref):
    root = Document("<root><a>foo<b/></a></root>").root
    trash_bin = [root.first_child]

    transformation = SimpleNamespace(
        _available_symbols={"trashbin": trash_bin},
        states=SimpleNamespace(previous_result=None),
    )
    lib.remove_nodes(
        "trashbin",
        keep_children=keep_children,
        preserve_text=preserve_text,
        clear_ref=clear_ref,
    )(transformation)

    assert not root.css_select("a")
    assert keep_children == bool(root.css_select("b"))
    assert preserve_text == (root.full_text == "foo")
    assert clear_ref == (not bool(trash_bin)), (clear_ref, trash_bin)
Пример #9
0
def test_fetch_or_create_by_xpath_with_predicates_in_parentheses():
    root = Document("<root/>").root

    cit = root.fetch_or_create_by_xpath(
        './entry/sense/cit[((@type="translation") and (@lang="en"))]'
    )
    assert (
        root.fetch_or_create_by_xpath(
            './entry/sense/cit[(@type="translation")][((@lang="en"))]'
        )
        is cit
    )
    assert root.css_select('entry > sense > cit[lang="en"]').size == 1
Пример #10
0
def test_id_property(files_path):
    document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml")
    publisher = document.css_select("publicationStmt publisher").first

    assert publisher.id == "DTACorpusPublisher"

    publisher.id = None
    assert XML_ATT_ID not in publisher.attributes

    publisher.id = "foo"
    assert publisher.attributes[XML_ATT_ID] == "foo"

    with pytest.raises(TypeError):
        publisher.id = 1234

    with pytest.raises(InvalidOperation):
        publisher.parent.id = "foo"

    publisher.detach()
    with pytest.raises(InvalidOperation):
        a_tag_child_node = next(publisher.child_nodes(is_tag_node))
        a_tag_child_node.id = "foo"
Пример #11
0
def test_delete_namespaced_attribute():
    root = Document('<root><node xmlns:p="ns" p:a="1" p:b="2"/></root>').root
    node = root.css_select("root > node")[0]
    assert len(node.attributes) == 2
    del node.attributes["ns":"a"]
    assert len(node.attributes) == 1