示例#1
0
    def test_attr_serialization_and_parsing(self):
        n = Node('employee', 'James Bond').with_pos(46)
        n.attr['branch'] = 'Secret Service'
        n.attr['id'] = '007'
        # json
        json = n.as_json()
        tree = parse_json_syntaxtree(json)
        # print()

        # XML
        xml = n.as_xml()
        assert xml.find('_pos') < 0
        xml = n.as_xml('')
        assert xml.find('_pos') >= 0
        tree = parse_xml(xml)
        assert tree.pos == 46
        assert not '_pos' in tree.attr
        tree = parse_xml(xml, ignore_pos=True)
        assert '_pos' in tree.attr
        assert tree._pos < 0

        # S-Expression
        sxpr = n.as_sxpr()
        assert sxpr.find('pos') < 0
        sxpr = n.as_sxpr('')
        assert sxpr.find('pos') >= 0
        tree = parse_sxpr(sxpr)
        assert tree.pos == 46
        assert not 'pos' in tree.attr
示例#2
0
 def test_plaintext_handling(self):
     tree = parse_xml('<a>alpha <b>beta</b> gamma</a>')
     assert flatten_sxpr(tree.as_sxpr(
     )) == '(a (:Text "alpha ") (b "beta") (:Text " gamma"))'
     tree = parse_xml(' <a>  <b>beta</b>  </a> ')
     assert flatten_xml(tree.as_xml()) == \
            '<a><ANONYMOUS_Text__>  </ANONYMOUS_Text__><b>beta</b>' \
            '<ANONYMOUS_Text__>  </ANONYMOUS_Text__></a>'
     assert tree.as_xml(inline_tags={'a'},
                        string_tags={':Text'}) == '<a>  <b>beta</b>  </a>'
     tree = parse_xml(' <a>\n  <b>beta</b>\n</a> ')
     assert tree.as_xml(inline_tags={'a'}) == '<a><b>beta</b></a>'
示例#3
0
 def test_roundtrip(self):
     tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
     xml = tree.as_xml()
     fxml = flatten_xml(xml)
     assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
     tree2 = parse_xml(fxml)
     assert fxml == flatten_xml(tree2.as_xml())
示例#4
0
 def test_as_etree(self):
     import xml.etree.ElementTree as ET
     # import lxml.etree as ET
     sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))'
     xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>'
     node = parse_sxpr(sxpr)
     et = node.as_etree()
     assert ET.tostring(et, encoding="unicode") == xml, ET.tostring(
         et, encoding="unicode")
     node = Node.from_etree(et)
     assert node.as_sxpr() == sxpr
     et = ET.XML(
         '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>')
     node = Node.from_etree(et)
     expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))'
     assert node.as_sxpr() == expected_sxpr
     et = node.as_etree()
     et = ET.XML(ET.tostring(et, encoding="unicode"))
     node = Node.from_etree(et)
     assert node.as_sxpr() == expected_sxpr
     empty_tags = set()
     tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>',
                      out_empty_tags=empty_tags)
     etree = tree.as_etree(empty_tags=empty_tags)
     assert ET.tostring(etree).replace(
         b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>'
     tree = Node.from_etree(etree)
     assert flatten_sxpr(tree.as_sxpr()) == \
            '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
示例#5
0
 def test_PI_and_DTD(self):
     """PIs <?...> and DTDs <!...> and the like should politely be overlooked."""
     testdata = """<!DOCTYPE nonsense>
         <?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?> 
         <?xpacket begin="r" id="Arnold-Mueller2017a"?> 
         <x:xmpmeta xmlns:x="adobe:ns:meta/"> 
         <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> 
         <rdf:Description xmlns:bibtex="http://jabref.sourceforge.net/bibteXMP/" 
         bibtex:bibtexkey="Arnold-Mueller2017a" 
         bibtex:journal="Informationspraxis" 
         bibtex:title="Wie permanent sind Permalinks?" 
         bibtex:type="Article" 
         bibtex:doi="http://dx.doi.org/10.11588/ip.2016.2.33483" 
         bibtex:year="2017" 
         bibtex:volume="3" 
         bibtex:issue="1" 
         bibtex:url="http://www.eckhartarnold.de/papers/2016_Permalinks/Arnold_Mueller_2016_Permalinks.html"> 
         <bibtex:author>Eckhart Arnold</bibtex:author> 
         </rdf:Description> 
         <!-- comment -->
         </rdf:RDF> 
         </x:xmpmeta> 
         <?xpacket end="r"?> 
         <?xpacket end='r'?>"""
     tree = parse_xml(testdata)
     assert tree.tag_name == 'x:xmpmeta'
     author = tree.pick('bibtex:author')
     assert author and author.content == "Eckhart Arnold"
     description = tree.pick('rdf:Description')
     assert description.has_attr('bibtex:title')
示例#6
0
 def test_collapse_children_if_plain(self):
     xml = "<EINZEILER><DEU_WORT>spectat</DEU_WORT><WS> </WS><DEU_WORT>ad</DEU_WORT>" +\
           "<WS> </WS><DEU_WORT>gravitatem</DEU_WORT><TEIL_SATZZEICHEN>,</TEIL_SATZZEICHEN>" +\
           "<WS> </WS><DEU_WORT>momentum</DEU_WORT></EINZEILER>"
     tree = parse_xml(xml)
     assert tree.as_xml(inline_tags={'EINZEILER'}) == xml
     collapse_children_if([tree], lambda l: True, self.Text)
     assert tree.as_xml(inline_tags={'EINZEILER'}) == \
            "<EINZEILER><Text>spectat ad gravitatem, momentum</Text></EINZEILER>"
示例#7
0
 def test_collapse_children_if_structured(self):
     xml = """<Stelle>
                <DEU_WORT>p.</DEU_WORT>
                <SEITENZAHL>26</SEITENZAHL>
                <HOCHGESTELLT>b</HOCHGESTELLT>
                <TEIL_SATZZEICHEN>,</TEIL_SATZZEICHEN>
                <SEITENZAHL>18</SEITENZAHL>
              </Stelle>"""
     tree = parse_xml(xml)
     collapse_children_if(
         [tree], lambda context: context[-1].tag_name != 'HOCHGESTELLT',
         self.Text)
     assert tree.as_xml(inline_tags={'Stelle'}) == \
            "<Stelle><Text>p.26</Text><HOCHGESTELLT>b</HOCHGESTELLT><Text>,18</Text></Stelle>"
示例#8
0
def profile_serializing():
    with open(os.path.join(scriptpath, 'data', 'inferus.ausgabe.xml')) as f:
        data = f.read()
    tree = parse_xml(data)
    print('XML inferus')
    cpu_profile(tree.as_xml, 100)
    print('S-Expression inferus')
    cpu_profile(lambda: tree.as_sxpr(compact=True), 100)
    print('json inferus')
    cpu_profile(lambda: tree.as_json(indent=None), 100)
    print('toolkit.json_dumps inferus')
    cpu_profile(lambda: json_dumps(tree.to_json_obj()), 100)

    with open(os.path.join(scriptpath, 'data', 'testdoc3.xml')) as f:
        data = f.read()
    tree = parse_xml(data)
    print('XML testdoc3')
    cpu_profile(tree.as_xml, 100)
    print('S-Expression testdoc3')
    cpu_profile(lambda: tree.as_sxpr(compact=True), 100)
    print('json testdoc3')
    cpu_profile(lambda: tree.as_json(indent=None), 100)
    print('toolkit.json_dumps testdoc3')
    cpu_profile(lambda: json_dumps(tree.to_json_obj()), 100)
示例#9
0
 def test_compact_representation(self):
     tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
     compact = tree.as_sxpr(compact=True, flatten_threshold=0)
     assert compact == '(A\n  (B\n    (C "D")\n    (E "F"))\n  (G "H"))'
     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
     compact = tree.as_sxpr(compact=True, flatten_threshold=0)
     assert compact == '(A\n  (B\n    (C\n      "D"\n      "X")\n    (E "F"))' \
         '\n  (G\n    " H "\n    " Y "))'
     tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
     C = tree['B']['C']
     C.attr['attr'] = 'val'
     threshold = get_config_value('flatten_sxpr_threshold')
     set_config_value('flatten_sxpr_threshold', 20)
     compact = tree.serialize('indented')
     # assert compact == 'A\n  B\n    C `(attr "val")\n      "D"\n    E\n      "F"\n  G\n    "H"'
     assert compact == 'A\n  B\n    C `(attr "val") "D"\n    E "F"\n  G "H"', compact
     tree = parse_xml(
         '<note><priority level="high" /><remark></remark></note>')
     assert tree.serialize(
         how='indented') == 'note\n  priority `(level "high")\n  remark'
     set_config_value('flatten_sxpr_threshold', threshold)
示例#10
0
 def test_endlessloop_error(self):
     tree = parse_xml(r'<LINEFEED>\\</LINEFEED>')
     assert tree
示例#11
0
 def test_flatten_xml(self):
     tree = parse_xml('<alpha>\n  <beta>gamma</beta>\n</alpha>')
     flat_xml = flatten_xml(tree.as_xml())
     assert flat_xml == '<alpha><beta>gamma</beta></alpha>', flat_xml