def test_css_considers_xml_namespace(files_path): document = Document("<root><xml:node/><node/></root>") assert document.css_select("xml|node").size == 1 document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml") results = document.css_select("*[xml|id]") assert results.size == 1 results = document.css_select("*[xml|lang]") assert results.size == 2
def test_wrapper_consistency(): # this test is the result of an investigation that asked why # `test_insert_issue_in_a_more_complex_situation` failed. # as a result, the way how node wrapper are tracked has been refactored. # so this test is looking on under-the-hood-expectations for aspects of the # mentioned test. when maintaining this requires effort, it should rather be # dropped. def node_ids(): return { "root": id(root), "foo": id(foo), "div1": id(div1), "div2": id(div2), "text": id(text), } document = Document("<root><foo><div1><div2/>text</div1></foo></root>") root = document.root foo = root.first_child div1 = foo.first_child div2 = div1.first_child text = div1.last_child original_ids = node_ids() div1.detach() foo = root.first_child div2 = div1.first_child text = div1.last_child assert node_ids() == original_ids foo.detach() assert node_ids() == original_ids root.insert_child(0, div1) div1 = root.first_child div2 = div1.first_child text = div1.last_child assert node_ids() == original_ids other_doc = Document(str(document)) div1 = other_doc.css_select("div1").first div2 = other_doc.css_select("div2").first div2.detach() div1.insert_child(0, div2) div1 = document.css_select("div1").first div2 = document.css_select("div2").first div2.detach() div1 = root.first_child text = div1.first_child assert node_ids() == original_ids div1.insert_child(0, div2)
def test_quotes_in_css_selector(): document = Document('<a href="https://super.test/123"/>') assert document.css_select('a[href^="https://super.test/"]').size == 1 assert document.css_select('a[href|="https://super.test/123"]').size == 1 assert document.css_select('a[href*="super"]').size == 1 # TODO if DELB_VERSION >= (0, 4): assert document.css_select('a:not([href|="https"])').size == 1 # TODO specify an `ends-with` function for XPath assert document.css_select('a[href$="123"]').size == 1
def test_css_select_or(files_path): document = Document(files_path / "tei_stevenson_treasure_island.xml") result = document.css_select("titleStmt title, titleStmt author") assert len(result) == 2 assert {x.local_name for x in result} == {"author", "title"}
def test_css_select(): document = Document("<root><a><b/><c/><b/></a></root>") results = document.css_select("a b") assert len(results) == 2 assert all(x.local_name == "b" for x in results) document = Document( '<root xmlns="x" xmlns:y="y"><a><b/><y:c/><b/></a></root>') results = document.css_select("a b") assert len(results) == 2 assert all(x.local_name == "b" for x in results) results = document.css_select("a y|c") assert len(results) == 1 assert results[0].universal_name == "{y}c"
def test_detach_node_retains_namespace_prefixes(): # libxml2 loses the notion if a default prefix for nodes that have been # removed from a parent node document = Document("""\ <root xmlns="schema://default/"> <child><grandchild/></child> </root> """) child = document.css_select("child").first.detach() assert child.css_select("grandchild").size == 1
def test_results_equality(): document = Document( """\ <root> <s corresp="src:tlaIBUBd4DTggLNoE2MvPgWWka2UdY"> <w corresp="src:tlaIBUBdzQ3wWIW60TVhNy3cRxYmgg"><unclear/></w> <w corresp="src:tlaIBUBd7n0fy1OPU1DjVU66j2B4Qc"><unclear/></w> </s> </root> """ ) word_nodes = document.css_select("s w") assert word_nodes == word_nodes.filtered_by(lambda n: True) assert word_nodes == word_nodes.as_list() assert word_nodes == tuple(reversed(word_nodes.as_list())) assert word_nodes != 2 * word_nodes.as_list() assert [document.root] == document.css_select("root") with pytest.raises(TypeError): document.css_select("root") == document.root
def test_remove_elements(keep_children, preserve_text, clear_ref): root = Document("<root><a>foo<b/></a></root>").root trash_bin = [root.first_child] transformation = SimpleNamespace( _available_symbols={"trashbin": trash_bin}, states=SimpleNamespace(previous_result=None), ) lib.remove_nodes( "trashbin", keep_children=keep_children, preserve_text=preserve_text, clear_ref=clear_ref, )(transformation) assert not root.css_select("a") assert keep_children == bool(root.css_select("b")) assert preserve_text == (root.full_text == "foo") assert clear_ref == (not bool(trash_bin)), (clear_ref, trash_bin)
def test_fetch_or_create_by_xpath_with_predicates_in_parentheses(): root = Document("<root/>").root cit = root.fetch_or_create_by_xpath( './entry/sense/cit[((@type="translation") and (@lang="en"))]' ) assert ( root.fetch_or_create_by_xpath( './entry/sense/cit[(@type="translation")][((@lang="en"))]' ) is cit ) assert root.css_select('entry > sense > cit[lang="en"]').size == 1
def test_id_property(files_path): document = Document(files_path / "tei_marx_manifestws_1848.TEI-P5.xml") publisher = document.css_select("publicationStmt publisher").first assert publisher.id == "DTACorpusPublisher" publisher.id = None assert XML_ATT_ID not in publisher.attributes publisher.id = "foo" assert publisher.attributes[XML_ATT_ID] == "foo" with pytest.raises(TypeError): publisher.id = 1234 with pytest.raises(InvalidOperation): publisher.parent.id = "foo" publisher.detach() with pytest.raises(InvalidOperation): a_tag_child_node = next(publisher.child_nodes(is_tag_node)) a_tag_child_node.id = "foo"
def test_delete_namespaced_attribute(): root = Document('<root><node xmlns:p="ns" p:a="1" p:b="2"/></root>').root node = root.css_select("root > node")[0] assert len(node.attributes) == 2 del node.attributes["ns":"a"] assert len(node.attributes) == 1