示例#1
0
def transform_toc(routes, toc_order, src_to_title, root_dir, epub_zip,
                  section_title, filepath):
    context = locals().copy()
    context.pop('epub_zip', None)
    context.pop('toc_order', None)

    transformation = Transformation(inxs.lib.init_elementmaker(
        name='elmaker', ),
                                    make_toc_skeleton,
                                    Rule('navPoint', set_titles),
                                    Rule(
                                        Any(
                                            MatchesAttributes({'href': None}),
                                            MatchesAttributes({'src': None}),
                                        ),
                                        route_url,
                                    ),
                                    make_toc,
                                    list_contents,
                                    indent,
                                    result_object='context.html',
                                    context=context)

    with epub_zip.open(os.path.join(root_dir, filepath)) as doc_xml:
        parser = etree.XMLParser(remove_blank_text=True)
        doc_tree = etree.parse(doc_xml, parser)
    root = doc_tree.getroot()

    return transformation(
        root,
        src_to_title=src_to_title,
        toc_order=toc_order,
    )
示例#2
0
文件: test_lib.py 项目: ra2003/inxs
def test_add_html_classes():
    doc = Document("<html><body/></html>")

    transformation = Transformation(
        Rule("body", lib.add_html_classes("transformed")))
    result = transformation(doc).root
    assert result[0].attributes["class"] == "transformed"

    doc = Document('<html><body class="loaded" /></html>')
    result = transformation(doc).root
    assert all(x in result[0].attributes["class"]
               for x in ("transformed", "loaded"))

    transformation = Transformation(
        Rule("body", lib.add_html_classes("transformed",
                                          "and_something_else")))
    result = transformation(doc).root
    assert all(x in result[0].attributes["class"]
               for x in ("and_something_else", "loaded", "transformed"))

    transformation = Transformation(
        Rule("body", lib.add_html_classes(Ref("html_classes"))),
        context={"html_classes": ["transformed", "and_something_else"]},
    )
    result = transformation(doc).root
    assert all(x in result[0].attributes["class"]
               for x in ("and_something_else", "loaded", "transformed"))
示例#3
0
def test_strip_namespace():
    namespace = 'http://www.example.org/ns/'
    e = builder.ElementMaker(namespace=namespace, nsmap={'x': namespace})
    root = e.div()
    t = Transformation(Rule(namespace, lib.strip_namespace))
    result = t(root)
    assert result.tag == 'div'
示例#4
0
def test_attributes(constraint):
    document = Document('<root><a b="x"/><a b="y"/></root>')
    transformation = Transformation(Rule(constraint, lib.set_text("x")))
    result = transformation(document).root
    assert not result._data_node._exists
    assert result[0].full_text == "x", str(result)
    assert not len(result[1])
示例#5
0
def test_xpath(xpath):
    document = Document("<root><a/><b/></root>")
    transformation = Transformation(Rule(xpath, lib.set_text("x")))
    result = transformation(document).root
    assert not result._data_node._exists
    assert first(result.css_select("a")).full_text == "x"
    assert first(result.css_select("b")).full_text == ""
示例#6
0
def test_If():
    def return_zero():
        return 0

    def return_one():
        return 1

    transformation = Transformation(
        Rule(If(0, operator.eq, 0), lib.put_variable("a")),
        Rule(Not(If(return_zero, operator.eq, return_one)),
             lib.put_variable("b")),
        result_object="context",
    )
    result = transformation(Document("<root/>"))
    assert hasattr(result, "a")
    assert hasattr(result, "b")
示例#7
0
def test_wikipedia_example_1():
    expected = Document("""
        <root>
          <name username="******">John</name>
          <name username="******">Morka</name>
        </root>
    """)

    def extract_person(node: TagNode):
        return node.attributes["username"], first(
            node.css_select("name")).full_text

    def append_person(previous_result, result: TagNode):
        result.append_child(
            result.new_tag_node(
                "name",
                attributes={"username": previous_result[0]},
                children=[previous_result[1]],
            ))

    transformation = Transformation(
        Rule("person", (extract_person, append_person)),
        result_object="context.result",
        context={"result": new_tag_node("root")},
    )

    # that's four lines less LOC than the XSLT implementation

    assert equal_subtree(transformation(wp_document.root), expected.root)
示例#8
0
文件: test_lib.py 项目: ra2003/inxs
def test_strip_namespace():
    namespace = "http://www.example.org/ns/"
    register_namespace("x", namespace)
    root = new_tag_node("div", namespace=namespace)

    transformation = Transformation(Rule(namespace, lib.remove_namespace))
    result = transformation(root)
    assert result.qualified_name == "div"
示例#9
0
def test_OneOf():
    document = Document('<root x="x"><a x="x"/><b x="x"/></root>')
    transformation = Transformation(
        Rule(OneOf("a", "b", {"x": "x"}), lib.set_text("x")))
    result = transformation(document).root
    assert result[0] == "x"

    assert all(x.full_text == "" for x in result.child_nodes(is_tag_node))
示例#10
0
def test_is_root_condition():
    transformation = Transformation(
        Rule(("a", "/"), lib.append("basket")),
        result_object="context.basket",
        context={"basket": []},
    )
    result = transformation(Document("<a><a/></a>"))
    assert len(result) == 1
示例#11
0
def test_attributes_re_key(constraint, expected):
    document = Document('<root><item1 default-source="x"/>'
                        '<item2 default-value="y"/><item3/></root>')
    transformation = Transformation(
        Rule(constraint, (lib.get_localname, lib.append("result"))),
        context={"result": []},
        result_object="context.result",
    )
    assert transformation(document) == expected
示例#12
0
def test_css_selector(selector, expected):
    document = Document(
        '<section xmlns="foo"><table><head>Table Header</head></table>'
        '<cb type="start">X</cb><row>#</row></section>')
    transformation = Transformation(
        Rule(selector, (lib.get_text, lib.put_variable("result"))),
        result_object="context.result",
    )
    assert transformation(document) == expected
示例#13
0
def transform_document(routes, root_dir, epub_zip, filepath, currentpath,
                       stats, fallback_url):
    context = locals().copy()
    context.pop('epub_zip', None)

    transformation = Transformation(
        add_re_namespace,
        Rule("title", remove_from_tree),
        Rule(has_link, route_url),
        Rule(MatchesXPath(sutta_ref_xpath), link_sutta_references),
        context=context,
    )

    with epub_zip.open(os.path.join(root_dir, filepath)) as doc_xml:
        doc_tree = html5.parse(doc_xml.read(), fallback_encoding='utf-8')

    with requests.Session() as s:
        return transformation(doc_tree, session=s)
示例#14
0
def test_common_conditions():
    document = Document(
        '<root><a href="foo"/><a id="bar"/><a href="peng"/></root>')
    transformation = Transformation(
        Rule("*", (lib.get_attribute("href"), lib.append("references"))),
        common_rule_conditions={"href": None},
        context={"references": []},
        result_object="context.references",
    )
    assert transformation(document) == ["foo", "peng"]
示例#15
0
def linkfix_document(routes, filepath, currentpath, stats, fallback_url):
    context = locals().copy()
    context.pop('stats', None)

    transformation = Transformation(
        Rule([has_link, has_path_url], check_and_fix_link),
        context=context,
    )

    with open(currentpath, mode='rb') as doc:
        doc_tree = html5.parse(doc.read(), fallback_encoding='utf-8')

    with requests.Session() as s:
        return transformation(doc_tree, session=s, stats=stats)
示例#16
0
def test_subtransformation():
    subtransformation = Transformation(Rule("*", lib.set_localname("pablo")))
    transformation = Transformation(
        lib.f(id, Ref("root")),
        lib.put_variable("source_id"),
        subtransformation,
        lib.f(id, Ref("root")),
        lib.put_variable("result_id"),
        lib.debug_symbols("source_id", "result_id"),
        Rule(
            Not(If(Ref("source_id"), operator.eq, Ref("result_id"))),
            (
                lib.debug_message("NO!"),
                lib.debug_symbols("root"),
                lib.set_localname("neruda"),
                AbortRule,
            ),
        ),
    )
    doc = Document("<augustus />")
    assert doc.root.local_name == "augustus"

    result = transformation(doc)
    assert result.root.local_name == "pablo"
示例#17
0
def test_SkipToNextElement():
    def more_complicated_test(node: TagNode):
        # well, supposedly
        if "x" not in node.attributes:
            raise SkipToNextNode
        if int(node.attributes["x"]) % 2:
            raise SkipToNextNode
        return node.local_name

    transformation = Transformation(
        Rule("*", (more_complicated_test, lib.append("evens"))),
        context={"evens": []},
        result_object="context.evens",
    )
    doc = Document('<root><a x="1"/><b x="2"/><c x="3"/><d x="4"/></root>')
    assert transformation(doc) == ["b", "d"]
示例#18
0
def test_wikipedia_example_2():
    expected = Document("""
        <html xmlns="http://www.w3.org/1999/xhtml">
          <head> <title>Testing XML Example</title> </head>
          <body>
            <h1>Persons</h1>
            <ul>
              <li>Ismincius, Morka</li>
              <li>Smith, John</li>
            </ul>
          </body>
        </html>
    """)

    def generate_skeleton(context):
        context.html = new_tag_node(
            "html",
            namespace="http://www.w3.org/1999/xhtml",
            children=(
                tag("head", tag("title", "Testing XML Example")),
                tag("body", (tag("h1", "Persons"), tag("ul"))),
            ),
        )

    def extract_person(node: TagNode, persons):
        persons.append((
            first(node.css_select("name")).full_text,
            first(node.css_select("family-name")).full_text,
        ))

    def list_persons(previous_result, html: TagNode):
        first(html.css_select("html|body html|ul")).append_child(
            *(html.new_tag_node("li", children=[f"{x[1]}, {x[0]}"])
              for x in previous_result))

    transformation = Transformation(
        generate_skeleton,
        Rule("person", extract_person),
        lib.sort("persons", itemgetter(1)),
        list_persons,
        result_object="context.html",
        context={"persons": []},
    )

    # that's four lines more LOC than the XSLT implementation

    assert equal_subtree(transformation(wp_document.root), expected.root)
示例#19
0
def test_wikipedia_example_2():
    expected = parse("""
        <html xmlns="http://www.w3.org/1999/xhtml">
          <head> <title>Testing XML Example</title> </head>
          <body>
            <h1>Persons</h1>
              <ul>
                <li>Ismincius, Morka</li>
                <li>Smith, John</li>
              </ul>
          </body>
        </html>
    """)

    def generate_skeleton(context, e):
        context.html = e.html(e.head(e.title('Testing XML Example')),
                              e.body(e.h1('Persons'), e.ul()))
        context.persons_list = context.html.xpath('./body/ul',
                                                  smart_prefix=True)[0]

    def extract_person(element, persons):
        persons.append(
            (element.find('name').text, element.find('family-name').text))

    def list_persons(previous_result, persons_list, e):
        persons_list.extend(e.li(f'{x[1]}, {x[0]}') for x in previous_result)

    transformation = Transformation(
        lib.init_elementmaker(namespace='http://www.w3.org/1999/xhtml'),
        generate_skeleton,
        Rule('person', extract_person),
        lib.sorter('persons', itemgetter(1)),
        list_persons,
        result_object='context.html',
        context={'persons': []})

    # that's eight (or not counting line-breaks: thirteen) lines less sloc
    # than the XSLT implementation

    assert equal_subtree(transformation(wp_document), expected)
示例#20
0
def test_wikipedia_example_1():
    expected = parse("""
        <root>
          <name username="******">John</name>
          <name username="******">Morka</name>
        </root>
    """)

    def extract_person(element):
        return element.attrib['username'], element.find('name').text

    def append_person(previous_result, target):
        element = etree.SubElement(target, 'name',
                                   {'username': previous_result[0]})
        element.text = previous_result[1]

    transformation = Transformation(Rule('person',
                                         (extract_person, append_person)),
                                    result_object='context.target',
                                    context={'target': etree.Element('root')})

    # that's five (or not counting line-breaks: eight) lines less sloc than the XSLT implementation

    assert equal_subtree(transformation(wp_document), expected)
示例#21
0
                       copy_node=True),
        ),
    ),
    common_rule_conditions=MODS_NAMESPACE,
    copy=False,
    result_object=None,
)

mods_location = Transformation(
    f(new_tag_node, "msIdentifier", namespace=TEI_NAMESPACE),
    as_result,
    Rule(
        "physicalLocation",
        (
            lib.get_text,
            f(str.strip, prev),
            lib.f(tag, "repository", prev),
            lib.append("result"),
        ),
    ),
    Rule(
        "shelfLocator",
        (
            lib.get_text,
            f(str.strip, prev),
            lib.f(tag, "idno", {"type": "shelfmark"}, prev),
            lib.append("result"),
        ),
    ),
    common_rule_conditions=MODS_NAMESPACE,
    copy=False,
示例#22
0
def test_Any():
    document = Document("<root><a/><b/></root>")
    transformation = Transformation(Rule(Any("a", "b"), lib.set_text("x")))
    result = transformation(document).root
    assert not result._data_node._exists
    assert all(x.content == "x" for x in result.child_nodes(is_text_node))
示例#23
0
    Not,
    Rule,
    Transformation,
)

__all__ = []

# reduce_whitespaces


def _reduce_whitespace_handler(node: TagNode):
    for child in node.child_nodes(is_text_node, recurse=True):
        child.content = utils.reduce_whitespaces(node.content, strip="")


reduce_whitespaces = Transformation(Rule("/", _reduce_whitespace_handler))
"""
Normalizes any whitespace character in text nodes to a simple space and reduces
consecutive ones to one. Leading or tailing whitespaces are not stripped away.
"""
__all__.append("reduce_whitespaces")

# remove_empty_nodes

remove_empty_nodes = Transformation(
    Rule(Not(lib.has_children, lib.has_text, lib.has_attributes, "/"),
         lib.remove_node),
    name="remove_empty_nodes",
    traversal_order=(TRAVERSE_DEPTH_FIRST | TRAVERSE_LEFT_TO_RIGHT
                     | TRAVERSE_BOTTOM_TO_TOP),
)
示例#24
0
def test_any_element():
    document = Document("<root><a/><b/></root>")
    transformation = Transformation(Rule("*", lib.set_text("x")))
    for node in transformation(document).root.child_nodes():
        assert node[0] == "x"
示例#25
0
""" This module contains transformations that are supposedly of common interest. """

from inxs import \
    (TRAVERSE_DEPTH_FIRST, TRAVERSE_BOTTOM_TO_TOP, TRAVERSE_LEFT_TO_RIGHT,
     lib, Not, Rule, Transformation)
from inxs.lxml_utils import remove_element

__all__ = []


def _append_tail_to_previous_in_stream(element):
    if not element.tail:
        return
    previous = element.getprevious()
    if previous is None:
        element.getparent().text += element.tail
    elif previous.tail is None:
        previous.tail = element.tail
    else:
        previous.tail += element.tail


remove_empty_elements = Transformation(
    Rule(Not(lib.has_children, lib.has_text, lib.has_attributes, '/'),
         (_append_tail_to_previous_in_stream, remove_element)),
    name='remove_empty_elements',
    traversal_order=TRAVERSE_DEPTH_FIRST | TRAVERSE_LEFT_TO_RIGHT
    | TRAVERSE_BOTTOM_TO_TOP)
""" Removes elements without attributes, text, tail and children from the (sub-)tree. """
__all__.append('remove_empty_elements')
示例#26
0
文件: test_lib.py 项目: ra2003/inxs
def test_set_text():
    node = new_tag_node("pre")
    transformation = Transformation(lib.put_variable("x", "Hello world."),
                                    Rule("/", lib.set_text(Ref("x"))))
    assert str(transformation(node)) == "<pre>Hello world.</pre>"