Пример #1
0
 def test_delete_item(self):
     sxpr = '(root (A "0") (B "1") (C "2") (D "3"))'
     node = parse_sxpr(sxpr)
     try:
         _ = node.index('E')
         assert False, 'ValueError expected'
     except ValueError:
         pass
     assert node[-1].tag_name == "D"
     try:
         del node[4]
         assert False, 'IndexError expected'
     except IndexError:
         pass
     try:
         del node[-5]
         assert False, 'IndexError expected'
     except IndexError:
         pass
     del node[-2]
     assert 'C' not in node
     del node[0]
     assert 'A' not in node
     sxpr = '(root (A "0") (B "1") (C "2") (D "3"))'
     node = parse_sxpr(sxpr)
     del node[1:3]
     assert str(node) == "03"
Пример #2
0
 def test_milestone_segment(self):
     tree = parse_sxpr(
         '(root (left (A "a") (B "b") (C "c")) (middle "-") (right (X "x") (Y "y") (Z "z")))'
     ).with_pos(0)
     left = tree.pick('left')
     right = tree.pick('right')
     middle = tree.pick('middle')
     B = tree.pick('B')
     Y = tree.pick('Y')
     segment = tree.milestone_segment(B, Y)
     assert segment.content == "bc-xy"
     assert left != segment.pick('left')
     assert right != segment.pick('right')
     assert B == segment.pick('B')
     assert Y == segment.pick('Y')
     assert middle == segment.pick('middle')
     A = tree.pick('A')
     Z = tree.pick('Z')
     segment = tree.milestone_segment(A, Z)
     assert segment == tree
     assert segment.content == "abc-xyz"
     segment = tree.milestone_segment(A, middle)
     assert segment.equals(
         parse_sxpr('(root (left (A "a") (B "b") (C "c")) (middle "-"))'))
     assert segment.content == "abc-"
     assert segment != tree
     assert A == segment.pick('A')
     assert middle == segment.pick('middle')
     root = tree.milestone_segment(tree, tree)
     assert root == tree
     assert tree.milestone_segment(B, B) == B
     C = tree.pick('C')
     segment = tree.milestone_segment(B, C)
     assert segment.equals(parse_sxpr('(left (B "b") (C "c"))'))
Пример #3
0
    def test_insert_nodes(self):
        tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0)
        trans_table = {'A': insert(0, node_maker('c', '=>'))}
        traverse(tree, trans_table)
        result1 = tree.serialize()
        assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1

        trans_table = {'A': insert(4, node_maker('d', '<='))}
        traverse(tree, trans_table)
        result2 = tree.serialize()
        assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2
        trans_table = {'A': insert(-2, node_maker('e', '|'))}
        traverse(tree, trans_table)
        result3 = tree.serialize()
        assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3

        tree = parse_sxpr('(A "")').with_pos(0)
        trans_table = {'A': insert(0, node_maker('B', 'b'))}
        traverse(tree, trans_table)
        result4 = tree.serialize()
        assert result4 == '(A (B "b"))'

        tree = parse_sxpr('(A "")').with_pos(0)
        trans_table = {'A': insert(lambda ctx: None, node_maker('B', 'b'))}
        traverse(tree, trans_table)
        result5 = tree.serialize()
        assert result5 == '(A)'
Пример #4
0
 def test_sexpr_attributes(self):
     tree = parse_sxpr('(A "B")')
     tree.attr['attr'] = "value"
     tree2 = parse_sxpr('(A `(attr "value") "B")')
     assert tree.as_sxpr() == tree2.as_sxpr()
     tree.attr['attr2'] = "value2"
     tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")')
     assert tree.as_sxpr() == tree3.as_sxpr()
Пример #5
0
 def test_sexpr(self):
     tree = parse_sxpr('(A (B "C") (D "E"))')
     s = tree.as_sxpr(compact=False, flatten_threshold=0)
     assert s == '(A\n  (B\n    "C"\n  )\n  (D\n    "E"\n  )\n)', s
     tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))')
     s = tree.as_sxpr(compact=False, flatten_threshold=0)
     assert s == '(A\n  (B\n    (C\n      "D"\n    )\n    (E\n      "F"\n    )' \
         '\n  )\n  (G\n    "H"\n  )\n)'
     tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))')
     s = tree.as_sxpr(compact=False, flatten_threshold=0)
     assert s == '(A\n  (B\n    (C\n      "D"\n      "X"\n    )' \
         '\n    (E\n      "F"\n    )\n  )\n  (G\n    " H "\n    " Y "\n  )\n)', s
Пример #6
0
 def test_parse_s_expression_w_attributes(self):
     s = '(A `(attr "1") (B "X"))'
     assert flatten_sxpr(
         parse_sxpr(s).as_sxpr()) == '(A `(attr "1") (B "X"))'
     s = """(BedeutungsPosition `(unterbedeutungstiefe "0")
              (Bedeutung
                (Beleg
                  (Quellenangabe (Quelle (Autor "LIUTPR.") (L " ") (Werk "leg.")) (L " ")
                    (BelegStelle (Stellenangabe (Stelle "21")) (L " ")
                      (BelegText (TEXT "...")))))))"""
     tree = parse_sxpr(s)
     assert str(tree) == "LIUTPR. leg. 21 ..."
     assert tree.attr['unterbedeutungstiefe'] == '0'
Пример #7
0
 def test_position_assignment(self):
     tree = parse_sxpr('(A (B (C "D") (E "FF")) (G "HHH"))')
     # assignment of position values
     tree.with_pos(0)
     assert (tree.pos, tree['B'].pos, tree['B']['C'].pos,
             tree['B']['E'].pos, tree['G'].pos) == (0, 0, 0, 1, 3)
     # assignment of unassigned position values
     tree['G'].result = parse_sxpr('(_ (N "OOOO") (P "Q"))').children
     assert (tree['G']['N'].pos, tree['G']['P'].pos) == (3, 7)
     # no reassignment of position values
     # (because pos-values should always reflect source position)
     tree['G'].result = parse_sxpr('(_ (N "OOOO") (P "Q"))').with_pos(
         1).children
     assert (tree['G']['N'].pos, tree['G']['P'].pos) == (1, 5)
Пример #8
0
    def test_deepcopy(self):
        tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        tree.with_pos(0)
        tree_copy = copy.deepcopy(tree)

        assert tree.equals(tree_copy)
        assert tree.as_sxpr() == parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()

        tree.add_error(tree, Error('Test Error', 0))
        assert not tree_copy.errors
        assert tree.as_sxpr() != parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()
        assert tree_copy.as_sxpr() == parse_sxpr(
            '(a (b c) (d (e f) (h i)))').as_sxpr()

        tree['d'].result = "x"
        assert not tree.equals(tree_copy)
        assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))'))
        #print(tree.as_sxpr())
        #print(tree.attr)
        assert tree.equals(parse_sxpr('(a (b c) (d x))'))

        # this also checks for errors equality...
        assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()
Пример #9
0
    def test_find(self):
        def match_tag_name(node, tag_name):
            return node.tag_name == tag_name

        matchf = lambda node: match_tag_name(node, "X")
        tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
        matches = list(tree.select_if(matchf))
        assert len(matches) == 2, len(matches)
        assert str(matches[0]) == 'd', str(matches[0])
        assert str(matches[1]) == 'F', str(matches[1])
        assert matches[0].equals(parse_sxpr('(X (c d))'))
        assert matches[1].equals(parse_sxpr('(X F)'))
        # check default: root is included in search:
        matchf2 = lambda node: match_tag_name(node, 'a')
        assert list(tree.select_if(matchf2, include_root=True))
        assert not list(tree.select_if(matchf2, include_root=False))
Пример #10
0
 def test_as_etree(self):
     import xml.etree.ElementTree as ET
     # import lxml.etree as ET
     sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))'
     xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>'
     node = parse_sxpr(sxpr)
     et = node.as_etree()
     assert ET.tostring(et, encoding="unicode") == xml, ET.tostring(
         et, encoding="unicode")
     node = Node.from_etree(et)
     assert node.as_sxpr() == sxpr
     et = ET.XML(
         '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>')
     node = Node.from_etree(et)
     expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))'
     assert node.as_sxpr() == expected_sxpr
     et = node.as_etree()
     et = ET.XML(ET.tostring(et, encoding="unicode"))
     node = Node.from_etree(et)
     assert node.as_sxpr() == expected_sxpr
     empty_tags = set()
     tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>',
                      out_empty_tags=empty_tags)
     etree = tree.as_etree(empty_tags=empty_tags)
     assert ET.tostring(etree).replace(
         b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>'
     tree = Node.from_etree(etree)
     assert flatten_sxpr(tree.as_sxpr()) == \
            '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
Пример #11
0
 def test_insert_remove(self):
     node = parse_sxpr('(R (A "1") (B "2") (C "3"))')
     B = node.pick('B')
     node.remove(B)
     assert node.as_sxpr() == '(R (A "1") (C "3"))'
     node.insert(0, B)
     assert node.as_sxpr() == '(R (B "2") (A "1") (C "3"))'
Пример #12
0
    def test_attr_serialization_and_parsing(self):
        n = Node('employee', 'James Bond').with_pos(46)
        n.attr['branch'] = 'Secret Service'
        n.attr['id'] = '007'
        # json
        json = n.as_json()
        tree = parse_json_syntaxtree(json)
        # print()

        # XML
        xml = n.as_xml()
        assert xml.find('_pos') < 0
        xml = n.as_xml('')
        assert xml.find('_pos') >= 0
        tree = parse_xml(xml)
        assert tree.pos == 46
        assert not '_pos' in tree.attr
        tree = parse_xml(xml, ignore_pos=True)
        assert '_pos' in tree.attr
        assert tree._pos < 0

        # S-Expression
        sxpr = n.as_sxpr()
        assert sxpr.find('pos') < 0
        sxpr = n.as_sxpr('')
        assert sxpr.find('pos') >= 0
        tree = parse_sxpr(sxpr)
        assert tree.pos == 46
        assert not 'pos' in tree.attr
Пример #13
0
 def test_sxpr_roundtrip(self):
     sxpr = (
         '(BelegText (Anker "interdico_1") (BelegLemma "inter.|ticente") (TEXT ", (") '
         '(Anker "interdico_2") (BelegLemma "inter.|titente") (L " ") (Zusatz "var. l.") '
         '(TEXT ") Deo."))')
     tree = parse_sxpr(sxpr)
     assert flatten_sxpr(tree.as_sxpr()) == sxpr
Пример #14
0
 def test_mock_syntax_tree_with_classes(self):
     sexpr = '(a:class1 (b:class2 x) (:class3 y) (c z))'
     tree = parse_sxpr(sexpr)
     assert tree.tag_name == 'a'
     assert tree.result[0].tag_name == 'b'
     assert tree.result[1].tag_name == ':class3'
     assert tree.result[2].tag_name == 'c'
Пример #15
0
 def test_parse_s_expression_malformed(self):
     try:
         s = parse_sxpr(
             '(A (B 1) (C (D (E 2) (F 3)) (G 4) (H (I 5) (J 6)) (K 7)')
         assert False, "ValueError exptected!"
     except ValueError:
         pass
Пример #16
0
 def test_roundtrip(self):
     tree = parse_sxpr('(a (b c) (d (e f) (h i)))')
     xml = tree.as_xml()
     fxml = flatten_xml(xml)
     assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>'
     tree2 = parse_xml(fxml)
     assert fxml == flatten_xml(tree2.as_xml())
Пример #17
0
class TestOptimizations:
    model = RootNode(
        parse_sxpr('''(array
          (number "1")
          (number
            (:RegExp "2")
            (:RegExp ".")
            (:RegExp "0"))
          (string "a string"))''')).with_pos(0)

    def raise_error(self, context):
        raise AssertionError()

    def test_squeeze_tree(self):
        tree = copy.deepcopy(TestOptimizations.model)
        merge_treetops(tree)
        assert tree.as_sxpr(
        ) == '''(array (number "1") (number "2.0") (string "a string"))'''

    def test_blocking(self):
        tree = copy.deepcopy(TestOptimizations.model)
        transtable = {
            '<': BLOCK_ANONYMOUS_LEAVES,
            'number': [merge_leaves, reduce_single_child],
            ':RegExp': self.raise_error
        }
        traverse(tree, transtable)
        assert tree.equals(
            parse_sxpr(
                '(array (number "1") (number "2.0") (string "a string"))'))
Пример #18
0
 def test_copy_errors(self):
     tree = RootNode(parse_sxpr('(A (B "1") (C "2"))').with_pos(0))
     tree.add_error(tree['C'], Error('error', 1))
     tree.add_error(None, Error('unspecific error', 2))
     save = tree.as_sxpr()
     tree_copy = copy.deepcopy(tree)
     compare = tree_copy.as_sxpr()
     assert compare == save  # is the error message still included?
Пример #19
0
    def test_positions_of(self):
        tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0)
        assert positions_of([tree], 'A') == ()
        assert positions_of([tree], 'X') == ()
        assert positions_of([tree], 'C') == (1, )
        assert positions_of([tree], 'B') == (0, 2)

        tree = parse_sxpr('(A (B 1) (C 2) (D 3))').with_pos(0)
        trans_table = {'A': insert(positions_of('D'), node_maker('X', '0'))}
        traverse(tree, trans_table)
        result1 = tree.serialize()
        assert result1 == '(A (B "1") (C "2") (X "0") (D "3"))', result1

        trans_table = {'A': insert(positions_of('Z'), node_maker('X', '0'))}
        traverse(tree, trans_table)
        result2 = tree.serialize()
        assert result2 == '(A (B "1") (C "2") (X "0") (D "3"))', result2
Пример #20
0
    def test_mock_syntax_tree(self):
        sexpr = '(a (b c) (d e) (f (g h)))'
        tree = parse_sxpr(sexpr)
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr

        # test different quotation marks
        sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))'''
        sexpr_stripped = '(a (b c k l) (d e) (f (g h)))'
        tree = parse_sxpr(sexpr)
        assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped

        sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))'
        tree = parse_sxpr(sexpr_clean)
        assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean

        tree = parse_sxpr(sexpr_stripped)
        assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
Пример #21
0
    def test_merge_adjacent(self):
        sentence = parse_sxpr('(SENTENCE (TEXT "Guten") (L " ") (TEXT "Tag") '
                              ' (T "\n") (TEXT "Hallo") (L " ") (TEXT "Welt")'
                              ' (T "\n") (L " "))')
        transformations = {
            'SENTENCE': merge_adjacent(is_one_of('TEXT', 'L'), 'TEXT')
        }
        traverse(sentence, transformations)
        assert tree_sanity_check(sentence)
        assert sentence.pick_child('TEXT').result == "Guten Tag"
        assert sentence[2].result == "Hallo Welt"
        assert sentence[-1].tag_name == 'L'
        assert 'T' in sentence

        # leaf nodes should be left untouched
        sentence = parse_sxpr('(SENTENCE "Hallo Welt")')
        traverse(sentence, transformations)
        assert sentence.content == "Hallo Welt", sentence.content
Пример #22
0
 def test_move_adjacent3(self):
     sentence = parse_sxpr(
         '(SENTENCE  (:Whitespace " ") (:Whitespace " ")  '
         '(TEXT (PHRASE "Guten Tag") (:Whitespace " ")))')
     transformations = {
         'TEXT':
         move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE)
     }
     traverse(sentence, transformations)
Пример #23
0
 def test_parse_s_expression(self):
     tree = parse_sxpr('(a (b c))')
     assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr(
         tree.as_sxpr())
     tree = parse_sxpr('(a i\nj\nk)')
     assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr(
         tree.as_sxpr())
     try:
         tree = parse_sxpr('a b c')
         assert False, "parse_sxpr() should raise a ValueError " \
                       "if argument is not a tree!"
     except ValueError:
         pass
     try:
         tree = parse_sxpr('(a (b c)))')
         assert False, "parse_sxpr() should raise a ValueError for too many matching brackets."
     except ValueError:
         pass
Пример #24
0
 def test_xml_tag_omission(self):
     tree = parse_sxpr('(XML (T "Hallo") (L " ") (T "Welt!"))')
     all_tags = {'XML', 'T', 'L'}
     assert tree.as_xml(inline_tags=all_tags,
                        string_tags=all_tags) == "Hallo Welt!"
     # tags with attributes will never be ommitted
     tree.pick_child('T').attr['class'] = "kursiv"
     assert tree.as_xml(inline_tags=all_tags, string_tags=all_tags) == \
            '<T class="kursiv">Hallo</T> Welt!'
Пример #25
0
 def test_select_children(self):
     tree = parse_sxpr('(A (B 1) (C (X 1) (Y 1)) (B 2))')
     children = list(nd.tag_name for nd in tree.select_children(ALL_NODES))
     assert children == ['B', 'C', 'B']
     B_values = list(nd.content
                     for nd in tree.select_children('B', reverse=True))
     assert B_values == ['2', '1']
     B_indices = tree.indices('B')
     assert B_indices == (0, 2)
Пример #26
0
 def test_remove_content(self):
     cst = parse_sxpr(
         '(BelegLemma (:Series (:RegExp "#") (LAT_WORT (:RegExp "facitergula"))))'
     )
     remove_content([cst], '#')
     assert cst.content == "#facitergula", str(cst.content)
     reduce_single_child([cst])
     remove_content([cst], '#')
     assert cst.content == "facitergula"
Пример #27
0
 def test_index(self):
     tree = parse_sxpr('(a (b 0) (c 1) (d 2))')
     assert tree.index('d') == 2
     assert tree.index('b') == 0
     assert tree.index('c') == 1
     try:
         i = tree.index('x')
         raise AssertionError('ValueError expected!')
     except ValueError:
         pass
Пример #28
0
 def test_contains(self):
     tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))')
     assert 'a' not in tree
     assert any(tree.select('a', True))
     assert not any(tree.select('a', False))
     assert 'b' in tree
     assert 'X' in tree
     assert 'e' in tree
     assert 'c' not in tree
     assert any(tree.select('c', False))
Пример #29
0
 def test_rstrip(self):
     cst = parse_sxpr('(_Token (:Re test) (:Whitespace " "))')
     rstrip([cst])
     assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
     sxpr1 = cst.as_sxpr()
     rstrip([cst])
     assert sxpr1 == cst.as_sxpr()
     cst = parse_sxpr('(_Token)')
     rstrip([cst])
     assert cst.as_sxpr() == '(_Token)'
     cst = parse_sxpr(
         '(_Token  (:Re test) (:Whitespace " ") (:Whitespace " "))')
     rstrip([cst])
     assert cst.as_sxpr().find(":Whitespace") < 0
     cst = parse_sxpr(
         '(_Token  (:Re test) (:Whitespace " ") (Deeper (:Whitespace " ")) '
         '(:Whitespace " "))')
     rstrip([cst])
     assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
Пример #30
0
 def test_content_property(self):
     tree = RootNode(parse_sxpr('(a (b c) (d e))'))
     content = tree.content
     b = tree.pick('b')
     d = tree.pick('d')
     b.result = "recently "
     d.result = "changed"
     assert content != tree.content
     assert content == 'ce'
     assert tree.content == 'recently changed'