def transform_toc(routes, toc_order, src_to_title, root_dir, epub_zip, section_title, filepath): context = locals().copy() context.pop('epub_zip', None) context.pop('toc_order', None) transformation = Transformation(inxs.lib.init_elementmaker( name='elmaker', ), make_toc_skeleton, Rule('navPoint', set_titles), Rule( Any( MatchesAttributes({'href': None}), MatchesAttributes({'src': None}), ), route_url, ), make_toc, list_contents, indent, result_object='context.html', context=context) with epub_zip.open(os.path.join(root_dir, filepath)) as doc_xml: parser = etree.XMLParser(remove_blank_text=True) doc_tree = etree.parse(doc_xml, parser) root = doc_tree.getroot() return transformation( root, src_to_title=src_to_title, toc_order=toc_order, )
def test_add_html_classes(): doc = Document("<html><body/></html>") transformation = Transformation( Rule("body", lib.add_html_classes("transformed"))) result = transformation(doc).root assert result[0].attributes["class"] == "transformed" doc = Document('<html><body class="loaded" /></html>') result = transformation(doc).root assert all(x in result[0].attributes["class"] for x in ("transformed", "loaded")) transformation = Transformation( Rule("body", lib.add_html_classes("transformed", "and_something_else"))) result = transformation(doc).root assert all(x in result[0].attributes["class"] for x in ("and_something_else", "loaded", "transformed")) transformation = Transformation( Rule("body", lib.add_html_classes(Ref("html_classes"))), context={"html_classes": ["transformed", "and_something_else"]}, ) result = transformation(doc).root assert all(x in result[0].attributes["class"] for x in ("and_something_else", "loaded", "transformed"))
def test_strip_namespace(): namespace = 'http://www.example.org/ns/' e = builder.ElementMaker(namespace=namespace, nsmap={'x': namespace}) root = e.div() t = Transformation(Rule(namespace, lib.strip_namespace)) result = t(root) assert result.tag == 'div'
def test_attributes(constraint): document = Document('<root><a b="x"/><a b="y"/></root>') transformation = Transformation(Rule(constraint, lib.set_text("x"))) result = transformation(document).root assert not result._data_node._exists assert result[0].full_text == "x", str(result) assert not len(result[1])
def test_xpath(xpath): document = Document("<root><a/><b/></root>") transformation = Transformation(Rule(xpath, lib.set_text("x"))) result = transformation(document).root assert not result._data_node._exists assert first(result.css_select("a")).full_text == "x" assert first(result.css_select("b")).full_text == ""
def test_If(): def return_zero(): return 0 def return_one(): return 1 transformation = Transformation( Rule(If(0, operator.eq, 0), lib.put_variable("a")), Rule(Not(If(return_zero, operator.eq, return_one)), lib.put_variable("b")), result_object="context", ) result = transformation(Document("<root/>")) assert hasattr(result, "a") assert hasattr(result, "b")
def test_wikipedia_example_1(): expected = Document(""" <root> <name username="******">John</name> <name username="******">Morka</name> </root> """) def extract_person(node: TagNode): return node.attributes["username"], first( node.css_select("name")).full_text def append_person(previous_result, result: TagNode): result.append_child( result.new_tag_node( "name", attributes={"username": previous_result[0]}, children=[previous_result[1]], )) transformation = Transformation( Rule("person", (extract_person, append_person)), result_object="context.result", context={"result": new_tag_node("root")}, ) # that's four lines less LOC than the XSLT implementation assert equal_subtree(transformation(wp_document.root), expected.root)
def test_strip_namespace(): namespace = "http://www.example.org/ns/" register_namespace("x", namespace) root = new_tag_node("div", namespace=namespace) transformation = Transformation(Rule(namespace, lib.remove_namespace)) result = transformation(root) assert result.qualified_name == "div"
def test_OneOf(): document = Document('<root x="x"><a x="x"/><b x="x"/></root>') transformation = Transformation( Rule(OneOf("a", "b", {"x": "x"}), lib.set_text("x"))) result = transformation(document).root assert result[0] == "x" assert all(x.full_text == "" for x in result.child_nodes(is_tag_node))
def test_is_root_condition(): transformation = Transformation( Rule(("a", "/"), lib.append("basket")), result_object="context.basket", context={"basket": []}, ) result = transformation(Document("<a><a/></a>")) assert len(result) == 1
def test_attributes_re_key(constraint, expected): document = Document('<root><item1 default-source="x"/>' '<item2 default-value="y"/><item3/></root>') transformation = Transformation( Rule(constraint, (lib.get_localname, lib.append("result"))), context={"result": []}, result_object="context.result", ) assert transformation(document) == expected
def test_css_selector(selector, expected): document = Document( '<section xmlns="foo"><table><head>Table Header</head></table>' '<cb type="start">X</cb><row>#</row></section>') transformation = Transformation( Rule(selector, (lib.get_text, lib.put_variable("result"))), result_object="context.result", ) assert transformation(document) == expected
def transform_document(routes, root_dir, epub_zip, filepath, currentpath, stats, fallback_url): context = locals().copy() context.pop('epub_zip', None) transformation = Transformation( add_re_namespace, Rule("title", remove_from_tree), Rule(has_link, route_url), Rule(MatchesXPath(sutta_ref_xpath), link_sutta_references), context=context, ) with epub_zip.open(os.path.join(root_dir, filepath)) as doc_xml: doc_tree = html5.parse(doc_xml.read(), fallback_encoding='utf-8') with requests.Session() as s: return transformation(doc_tree, session=s)
def test_common_conditions(): document = Document( '<root><a href="foo"/><a id="bar"/><a href="peng"/></root>') transformation = Transformation( Rule("*", (lib.get_attribute("href"), lib.append("references"))), common_rule_conditions={"href": None}, context={"references": []}, result_object="context.references", ) assert transformation(document) == ["foo", "peng"]
def linkfix_document(routes, filepath, currentpath, stats, fallback_url): context = locals().copy() context.pop('stats', None) transformation = Transformation( Rule([has_link, has_path_url], check_and_fix_link), context=context, ) with open(currentpath, mode='rb') as doc: doc_tree = html5.parse(doc.read(), fallback_encoding='utf-8') with requests.Session() as s: return transformation(doc_tree, session=s, stats=stats)
def test_subtransformation(): subtransformation = Transformation(Rule("*", lib.set_localname("pablo"))) transformation = Transformation( lib.f(id, Ref("root")), lib.put_variable("source_id"), subtransformation, lib.f(id, Ref("root")), lib.put_variable("result_id"), lib.debug_symbols("source_id", "result_id"), Rule( Not(If(Ref("source_id"), operator.eq, Ref("result_id"))), ( lib.debug_message("NO!"), lib.debug_symbols("root"), lib.set_localname("neruda"), AbortRule, ), ), ) doc = Document("<augustus />") assert doc.root.local_name == "augustus" result = transformation(doc) assert result.root.local_name == "pablo"
def test_SkipToNextElement(): def more_complicated_test(node: TagNode): # well, supposedly if "x" not in node.attributes: raise SkipToNextNode if int(node.attributes["x"]) % 2: raise SkipToNextNode return node.local_name transformation = Transformation( Rule("*", (more_complicated_test, lib.append("evens"))), context={"evens": []}, result_object="context.evens", ) doc = Document('<root><a x="1"/><b x="2"/><c x="3"/><d x="4"/></root>') assert transformation(doc) == ["b", "d"]
def test_wikipedia_example_2(): expected = Document(""" <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>Testing XML Example</title> </head> <body> <h1>Persons</h1> <ul> <li>Ismincius, Morka</li> <li>Smith, John</li> </ul> </body> </html> """) def generate_skeleton(context): context.html = new_tag_node( "html", namespace="http://www.w3.org/1999/xhtml", children=( tag("head", tag("title", "Testing XML Example")), tag("body", (tag("h1", "Persons"), tag("ul"))), ), ) def extract_person(node: TagNode, persons): persons.append(( first(node.css_select("name")).full_text, first(node.css_select("family-name")).full_text, )) def list_persons(previous_result, html: TagNode): first(html.css_select("html|body html|ul")).append_child( *(html.new_tag_node("li", children=[f"{x[1]}, {x[0]}"]) for x in previous_result)) transformation = Transformation( generate_skeleton, Rule("person", extract_person), lib.sort("persons", itemgetter(1)), list_persons, result_object="context.html", context={"persons": []}, ) # that's four lines more LOC than the XSLT implementation assert equal_subtree(transformation(wp_document.root), expected.root)
def test_wikipedia_example_2(): expected = parse(""" <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>Testing XML Example</title> </head> <body> <h1>Persons</h1> <ul> <li>Ismincius, Morka</li> <li>Smith, John</li> </ul> </body> </html> """) def generate_skeleton(context, e): context.html = e.html(e.head(e.title('Testing XML Example')), e.body(e.h1('Persons'), e.ul())) context.persons_list = context.html.xpath('./body/ul', smart_prefix=True)[0] def extract_person(element, persons): persons.append( (element.find('name').text, element.find('family-name').text)) def list_persons(previous_result, persons_list, e): persons_list.extend(e.li(f'{x[1]}, {x[0]}') for x in previous_result) transformation = Transformation( lib.init_elementmaker(namespace='http://www.w3.org/1999/xhtml'), generate_skeleton, Rule('person', extract_person), lib.sorter('persons', itemgetter(1)), list_persons, result_object='context.html', context={'persons': []}) # that's eight (or not counting line-breaks: thirteen) lines less sloc # than the XSLT implementation assert equal_subtree(transformation(wp_document), expected)
def test_wikipedia_example_1(): expected = parse(""" <root> <name username="******">John</name> <name username="******">Morka</name> </root> """) def extract_person(element): return element.attrib['username'], element.find('name').text def append_person(previous_result, target): element = etree.SubElement(target, 'name', {'username': previous_result[0]}) element.text = previous_result[1] transformation = Transformation(Rule('person', (extract_person, append_person)), result_object='context.target', context={'target': etree.Element('root')}) # that's five (or not counting line-breaks: eight) lines less sloc than the XSLT implementation assert equal_subtree(transformation(wp_document), expected)
copy_node=True), ), ), common_rule_conditions=MODS_NAMESPACE, copy=False, result_object=None, ) mods_location = Transformation( f(new_tag_node, "msIdentifier", namespace=TEI_NAMESPACE), as_result, Rule( "physicalLocation", ( lib.get_text, f(str.strip, prev), lib.f(tag, "repository", prev), lib.append("result"), ), ), Rule( "shelfLocator", ( lib.get_text, f(str.strip, prev), lib.f(tag, "idno", {"type": "shelfmark"}, prev), lib.append("result"), ), ), common_rule_conditions=MODS_NAMESPACE, copy=False,
def test_Any(): document = Document("<root><a/><b/></root>") transformation = Transformation(Rule(Any("a", "b"), lib.set_text("x"))) result = transformation(document).root assert not result._data_node._exists assert all(x.content == "x" for x in result.child_nodes(is_text_node))
Not, Rule, Transformation, ) __all__ = [] # reduce_whitespaces def _reduce_whitespace_handler(node: TagNode): for child in node.child_nodes(is_text_node, recurse=True): child.content = utils.reduce_whitespaces(node.content, strip="") reduce_whitespaces = Transformation(Rule("/", _reduce_whitespace_handler)) """ Normalizes any whitespace character in text nodes to a simple space and reduces consecutive ones to one. Leading or tailing whitespaces are not stripped away. """ __all__.append("reduce_whitespaces") # remove_empty_nodes remove_empty_nodes = Transformation( Rule(Not(lib.has_children, lib.has_text, lib.has_attributes, "/"), lib.remove_node), name="remove_empty_nodes", traversal_order=(TRAVERSE_DEPTH_FIRST | TRAVERSE_LEFT_TO_RIGHT | TRAVERSE_BOTTOM_TO_TOP), )
def test_any_element(): document = Document("<root><a/><b/></root>") transformation = Transformation(Rule("*", lib.set_text("x"))) for node in transformation(document).root.child_nodes(): assert node[0] == "x"
""" This module contains transformations that are supposedly of common interest. """ from inxs import \ (TRAVERSE_DEPTH_FIRST, TRAVERSE_BOTTOM_TO_TOP, TRAVERSE_LEFT_TO_RIGHT, lib, Not, Rule, Transformation) from inxs.lxml_utils import remove_element __all__ = [] def _append_tail_to_previous_in_stream(element): if not element.tail: return previous = element.getprevious() if previous is None: element.getparent().text += element.tail elif previous.tail is None: previous.tail = element.tail else: previous.tail += element.tail remove_empty_elements = Transformation( Rule(Not(lib.has_children, lib.has_text, lib.has_attributes, '/'), (_append_tail_to_previous_in_stream, remove_element)), name='remove_empty_elements', traversal_order=TRAVERSE_DEPTH_FIRST | TRAVERSE_LEFT_TO_RIGHT | TRAVERSE_BOTTOM_TO_TOP) """ Removes elements without attributes, text, tail and children from the (sub-)tree. """ __all__.append('remove_empty_elements')
def test_set_text(): node = new_tag_node("pre") transformation = Transformation(lib.put_variable("x", "Hello world."), Rule("/", lib.set_text(Ref("x")))) assert str(transformation(node)) == "<pre>Hello world.</pre>"