def test_delete_item(self): sxpr = '(root (A "0") (B "1") (C "2") (D "3"))' node = parse_sxpr(sxpr) try: _ = node.index('E') assert False, 'ValueError expected' except ValueError: pass assert node[-1].tag_name == "D" try: del node[4] assert False, 'IndexError expected' except IndexError: pass try: del node[-5] assert False, 'IndexError expected' except IndexError: pass del node[-2] assert 'C' not in node del node[0] assert 'A' not in node sxpr = '(root (A "0") (B "1") (C "2") (D "3"))' node = parse_sxpr(sxpr) del node[1:3] assert str(node) == "03"
def test_milestone_segment(self): tree = parse_sxpr( '(root (left (A "a") (B "b") (C "c")) (middle "-") (right (X "x") (Y "y") (Z "z")))' ).with_pos(0) left = tree.pick('left') right = tree.pick('right') middle = tree.pick('middle') B = tree.pick('B') Y = tree.pick('Y') segment = tree.milestone_segment(B, Y) assert segment.content == "bc-xy" assert left != segment.pick('left') assert right != segment.pick('right') assert B == segment.pick('B') assert Y == segment.pick('Y') assert middle == segment.pick('middle') A = tree.pick('A') Z = tree.pick('Z') segment = tree.milestone_segment(A, Z) assert segment == tree assert segment.content == "abc-xyz" segment = tree.milestone_segment(A, middle) assert segment.equals( parse_sxpr('(root (left (A "a") (B "b") (C "c")) (middle "-"))')) assert segment.content == "abc-" assert segment != tree assert A == segment.pick('A') assert middle == segment.pick('middle') root = tree.milestone_segment(tree, tree) assert root == tree assert tree.milestone_segment(B, B) == B C = tree.pick('C') segment = tree.milestone_segment(B, C) assert segment.equals(parse_sxpr('(left (B "b") (C "c"))'))
def test_insert_nodes(self): tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0) trans_table = {'A': insert(0, node_maker('c', '=>'))} traverse(tree, trans_table) result1 = tree.serialize() assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1 trans_table = {'A': insert(4, node_maker('d', '<='))} traverse(tree, trans_table) result2 = tree.serialize() assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2 trans_table = {'A': insert(-2, node_maker('e', '|'))} traverse(tree, trans_table) result3 = tree.serialize() assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3 tree = parse_sxpr('(A "")').with_pos(0) trans_table = {'A': insert(0, node_maker('B', 'b'))} traverse(tree, trans_table) result4 = tree.serialize() assert result4 == '(A (B "b"))' tree = parse_sxpr('(A "")').with_pos(0) trans_table = {'A': insert(lambda ctx: None, node_maker('B', 'b'))} traverse(tree, trans_table) result5 = tree.serialize() assert result5 == '(A)'
def test_sexpr_attributes(self): tree = parse_sxpr('(A "B")') tree.attr['attr'] = "value" tree2 = parse_sxpr('(A `(attr "value") "B")') assert tree.as_sxpr() == tree2.as_sxpr() tree.attr['attr2'] = "value2" tree3 = parse_sxpr('(A `(attr "value") `(attr2 "value2") "B")') assert tree.as_sxpr() == tree3.as_sxpr()
def test_sexpr(self): tree = parse_sxpr('(A (B "C") (D "E"))') s = tree.as_sxpr(compact=False, flatten_threshold=0) assert s == '(A\n (B\n "C"\n )\n (D\n "E"\n )\n)', s tree = parse_sxpr('(A (B (C "D") (E "F")) (G "H"))') s = tree.as_sxpr(compact=False, flatten_threshold=0) assert s == '(A\n (B\n (C\n "D"\n )\n (E\n "F"\n )' \ '\n )\n (G\n "H"\n )\n)' tree = parse_sxpr('(A (B (C "D\nX") (E "F")) (G " H \n Y "))') s = tree.as_sxpr(compact=False, flatten_threshold=0) assert s == '(A\n (B\n (C\n "D"\n "X"\n )' \ '\n (E\n "F"\n )\n )\n (G\n " H "\n " Y "\n )\n)', s
def test_parse_s_expression_w_attributes(self): s = '(A `(attr "1") (B "X"))' assert flatten_sxpr( parse_sxpr(s).as_sxpr()) == '(A `(attr "1") (B "X"))' s = """(BedeutungsPosition `(unterbedeutungstiefe "0") (Bedeutung (Beleg (Quellenangabe (Quelle (Autor "LIUTPR.") (L " ") (Werk "leg.")) (L " ") (BelegStelle (Stellenangabe (Stelle "21")) (L " ") (BelegText (TEXT "...")))))))""" tree = parse_sxpr(s) assert str(tree) == "LIUTPR. leg. 21 ..." assert tree.attr['unterbedeutungstiefe'] == '0'
def test_position_assignment(self): tree = parse_sxpr('(A (B (C "D") (E "FF")) (G "HHH"))') # assignment of position values tree.with_pos(0) assert (tree.pos, tree['B'].pos, tree['B']['C'].pos, tree['B']['E'].pos, tree['G'].pos) == (0, 0, 0, 1, 3) # assignment of unassigned position values tree['G'].result = parse_sxpr('(_ (N "OOOO") (P "Q"))').children assert (tree['G']['N'].pos, tree['G']['P'].pos) == (3, 7) # no reassignment of position values # (because pos-values should always reflect source position) tree['G'].result = parse_sxpr('(_ (N "OOOO") (P "Q"))').with_pos( 1).children assert (tree['G']['N'].pos, tree['G']['P'].pos) == (1, 5)
def test_deepcopy(self): tree = RootNode(parse_sxpr('(a (b c) (d (e f) (h i)))')) tree.with_pos(0) tree_copy = copy.deepcopy(tree) assert tree.equals(tree_copy) assert tree.as_sxpr() == parse_sxpr( '(a (b c) (d (e f) (h i)))').as_sxpr() assert tree_copy.as_sxpr() == parse_sxpr( '(a (b c) (d (e f) (h i)))').as_sxpr() tree.add_error(tree, Error('Test Error', 0)) assert not tree_copy.errors assert tree.as_sxpr() != parse_sxpr( '(a (b c) (d (e f) (h i)))').as_sxpr() assert tree_copy.as_sxpr() == parse_sxpr( '(a (b c) (d (e f) (h i)))').as_sxpr() tree['d'].result = "x" assert not tree.equals(tree_copy) assert tree_copy.equals(parse_sxpr('(a (b c) (d (e f) (h i)))')) #print(tree.as_sxpr()) #print(tree.attr) assert tree.equals(parse_sxpr('(a (b c) (d x))')) # this also checks for errors equality... assert parse_sxpr('(a (b c) (d x))').as_sxpr() != tree.as_sxpr()
def test_find(self): def match_tag_name(node, tag_name): return node.tag_name == tag_name matchf = lambda node: match_tag_name(node, "X") tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))') matches = list(tree.select_if(matchf)) assert len(matches) == 2, len(matches) assert str(matches[0]) == 'd', str(matches[0]) assert str(matches[1]) == 'F', str(matches[1]) assert matches[0].equals(parse_sxpr('(X (c d))')) assert matches[1].equals(parse_sxpr('(X F)')) # check default: root is included in search: matchf2 = lambda node: match_tag_name(node, 'a') assert list(tree.select_if(matchf2, include_root=True)) assert not list(tree.select_if(matchf2, include_root=False))
def test_as_etree(self): import xml.etree.ElementTree as ET # import lxml.etree as ET sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))' xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>' node = parse_sxpr(sxpr) et = node.as_etree() assert ET.tostring(et, encoding="unicode") == xml, ET.tostring( et, encoding="unicode") node = Node.from_etree(et) assert node.as_sxpr() == sxpr et = ET.XML( '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>') node = Node.from_etree(et) expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))' assert node.as_sxpr() == expected_sxpr et = node.as_etree() et = ET.XML(ET.tostring(et, encoding="unicode")) node = Node.from_etree(et) assert node.as_sxpr() == expected_sxpr empty_tags = set() tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>', out_empty_tags=empty_tags) etree = tree.as_etree(empty_tags=empty_tags) assert ET.tostring(etree).replace( b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>' tree = Node.from_etree(etree) assert flatten_sxpr(tree.as_sxpr()) == \ '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
def test_insert_remove(self): node = parse_sxpr('(R (A "1") (B "2") (C "3"))') B = node.pick('B') node.remove(B) assert node.as_sxpr() == '(R (A "1") (C "3"))' node.insert(0, B) assert node.as_sxpr() == '(R (B "2") (A "1") (C "3"))'
def test_attr_serialization_and_parsing(self): n = Node('employee', 'James Bond').with_pos(46) n.attr['branch'] = 'Secret Service' n.attr['id'] = '007' # json json = n.as_json() tree = parse_json_syntaxtree(json) # print() # XML xml = n.as_xml() assert xml.find('_pos') < 0 xml = n.as_xml('') assert xml.find('_pos') >= 0 tree = parse_xml(xml) assert tree.pos == 46 assert not '_pos' in tree.attr tree = parse_xml(xml, ignore_pos=True) assert '_pos' in tree.attr assert tree._pos < 0 # S-Expression sxpr = n.as_sxpr() assert sxpr.find('pos') < 0 sxpr = n.as_sxpr('') assert sxpr.find('pos') >= 0 tree = parse_sxpr(sxpr) assert tree.pos == 46 assert not 'pos' in tree.attr
def test_sxpr_roundtrip(self): sxpr = ( '(BelegText (Anker "interdico_1") (BelegLemma "inter.|ticente") (TEXT ", (") ' '(Anker "interdico_2") (BelegLemma "inter.|titente") (L " ") (Zusatz "var. l.") ' '(TEXT ") Deo."))') tree = parse_sxpr(sxpr) assert flatten_sxpr(tree.as_sxpr()) == sxpr
def test_mock_syntax_tree_with_classes(self): sexpr = '(a:class1 (b:class2 x) (:class3 y) (c z))' tree = parse_sxpr(sexpr) assert tree.tag_name == 'a' assert tree.result[0].tag_name == 'b' assert tree.result[1].tag_name == ':class3' assert tree.result[2].tag_name == 'c'
def test_parse_s_expression_malformed(self): try: s = parse_sxpr( '(A (B 1) (C (D (E 2) (F 3)) (G 4) (H (I 5) (J 6)) (K 7)') assert False, "ValueError exptected!" except ValueError: pass
def test_roundtrip(self): tree = parse_sxpr('(a (b c) (d (e f) (h i)))') xml = tree.as_xml() fxml = flatten_xml(xml) assert fxml == '<a><b>c</b><d><e>f</e><h>i</h></d></a>' tree2 = parse_xml(fxml) assert fxml == flatten_xml(tree2.as_xml())
class TestOptimizations: model = RootNode( parse_sxpr('''(array (number "1") (number (:RegExp "2") (:RegExp ".") (:RegExp "0")) (string "a string"))''')).with_pos(0) def raise_error(self, context): raise AssertionError() def test_squeeze_tree(self): tree = copy.deepcopy(TestOptimizations.model) merge_treetops(tree) assert tree.as_sxpr( ) == '''(array (number "1") (number "2.0") (string "a string"))''' def test_blocking(self): tree = copy.deepcopy(TestOptimizations.model) transtable = { '<': BLOCK_ANONYMOUS_LEAVES, 'number': [merge_leaves, reduce_single_child], ':RegExp': self.raise_error } traverse(tree, transtable) assert tree.equals( parse_sxpr( '(array (number "1") (number "2.0") (string "a string"))'))
def test_copy_errors(self): tree = RootNode(parse_sxpr('(A (B "1") (C "2"))').with_pos(0)) tree.add_error(tree['C'], Error('error', 1)) tree.add_error(None, Error('unspecific error', 2)) save = tree.as_sxpr() tree_copy = copy.deepcopy(tree) compare = tree_copy.as_sxpr() assert compare == save # is the error message still included?
def test_positions_of(self): tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0) assert positions_of([tree], 'A') == () assert positions_of([tree], 'X') == () assert positions_of([tree], 'C') == (1, ) assert positions_of([tree], 'B') == (0, 2) tree = parse_sxpr('(A (B 1) (C 2) (D 3))').with_pos(0) trans_table = {'A': insert(positions_of('D'), node_maker('X', '0'))} traverse(tree, trans_table) result1 = tree.serialize() assert result1 == '(A (B "1") (C "2") (X "0") (D "3"))', result1 trans_table = {'A': insert(positions_of('Z'), node_maker('X', '0'))} traverse(tree, trans_table) result2 = tree.serialize() assert result2 == '(A (B "1") (C "2") (X "0") (D "3"))', result2
def test_mock_syntax_tree(self): sexpr = '(a (b c) (d e) (f (g h)))' tree = parse_sxpr(sexpr) assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr # test different quotation marks sexpr = '''(a (b """c""" 'k' "l") (d e) (f (g h)))''' sexpr_stripped = '(a (b c k l) (d e) (f (g h)))' tree = parse_sxpr(sexpr) assert flatten_sxpr(tree.as_sxpr().replace('"', '')) == sexpr_stripped sexpr_clean = '(a (b "c" "k" "l") (d "e") (f (g "h")))' tree = parse_sxpr(sexpr_clean) assert flatten_sxpr(tree.as_sxpr()) == sexpr_clean tree = parse_sxpr(sexpr_stripped) assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c k l") (d "e") (f (g "h")))'
def test_merge_adjacent(self): sentence = parse_sxpr('(SENTENCE (TEXT "Guten") (L " ") (TEXT "Tag") ' ' (T "\n") (TEXT "Hallo") (L " ") (TEXT "Welt")' ' (T "\n") (L " "))') transformations = { 'SENTENCE': merge_adjacent(is_one_of('TEXT', 'L'), 'TEXT') } traverse(sentence, transformations) assert tree_sanity_check(sentence) assert sentence.pick_child('TEXT').result == "Guten Tag" assert sentence[2].result == "Hallo Welt" assert sentence[-1].tag_name == 'L' assert 'T' in sentence # leaf nodes should be left untouched sentence = parse_sxpr('(SENTENCE "Hallo Welt")') traverse(sentence, transformations) assert sentence.content == "Hallo Welt", sentence.content
def test_move_adjacent3(self): sentence = parse_sxpr( '(SENTENCE (:Whitespace " ") (:Whitespace " ") ' '(TEXT (PHRASE "Guten Tag") (:Whitespace " ")))') transformations = { 'TEXT': move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE) } traverse(sentence, transformations)
def test_parse_s_expression(self): tree = parse_sxpr('(a (b c))') assert flatten_sxpr(tree.as_sxpr()) == '(a (b "c"))', flatten_sxpr( tree.as_sxpr()) tree = parse_sxpr('(a i\nj\nk)') assert flatten_sxpr(tree.as_sxpr()) == '(a "i" "j" "k")', flatten_sxpr( tree.as_sxpr()) try: tree = parse_sxpr('a b c') assert False, "parse_sxpr() should raise a ValueError " \ "if argument is not a tree!" except ValueError: pass try: tree = parse_sxpr('(a (b c)))') assert False, "parse_sxpr() should raise a ValueError for too many matching brackets." except ValueError: pass
def test_xml_tag_omission(self): tree = parse_sxpr('(XML (T "Hallo") (L " ") (T "Welt!"))') all_tags = {'XML', 'T', 'L'} assert tree.as_xml(inline_tags=all_tags, string_tags=all_tags) == "Hallo Welt!" # tags with attributes will never be ommitted tree.pick_child('T').attr['class'] = "kursiv" assert tree.as_xml(inline_tags=all_tags, string_tags=all_tags) == \ '<T class="kursiv">Hallo</T> Welt!'
def test_select_children(self): tree = parse_sxpr('(A (B 1) (C (X 1) (Y 1)) (B 2))') children = list(nd.tag_name for nd in tree.select_children(ALL_NODES)) assert children == ['B', 'C', 'B'] B_values = list(nd.content for nd in tree.select_children('B', reverse=True)) assert B_values == ['2', '1'] B_indices = tree.indices('B') assert B_indices == (0, 2)
def test_remove_content(self): cst = parse_sxpr( '(BelegLemma (:Series (:RegExp "#") (LAT_WORT (:RegExp "facitergula"))))' ) remove_content([cst], '#') assert cst.content == "#facitergula", str(cst.content) reduce_single_child([cst]) remove_content([cst], '#') assert cst.content == "facitergula"
def test_index(self): tree = parse_sxpr('(a (b 0) (c 1) (d 2))') assert tree.index('d') == 2 assert tree.index('b') == 0 assert tree.index('c') == 1 try: i = tree.index('x') raise AssertionError('ValueError expected!') except ValueError: pass
def test_contains(self): tree = parse_sxpr('(a (b X) (X (c d)) (e (X F)))') assert 'a' not in tree assert any(tree.select('a', True)) assert not any(tree.select('a', False)) assert 'b' in tree assert 'X' in tree assert 'e' in tree assert 'c' not in tree assert any(tree.select('c', False))
def test_rstrip(self): cst = parse_sxpr('(_Token (:Re test) (:Whitespace " "))') rstrip([cst]) assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr() sxpr1 = cst.as_sxpr() rstrip([cst]) assert sxpr1 == cst.as_sxpr() cst = parse_sxpr('(_Token)') rstrip([cst]) assert cst.as_sxpr() == '(_Token)' cst = parse_sxpr( '(_Token (:Re test) (:Whitespace " ") (:Whitespace " "))') rstrip([cst]) assert cst.as_sxpr().find(":Whitespace") < 0 cst = parse_sxpr( '(_Token (:Re test) (:Whitespace " ") (Deeper (:Whitespace " ")) ' '(:Whitespace " "))') rstrip([cst]) assert cst.as_sxpr().find(":Whitespace") < 0, cst.as_sxpr()
def test_content_property(self): tree = RootNode(parse_sxpr('(a (b c) (d e))')) content = tree.content b = tree.pick('b') d = tree.pick('d') b.result = "recently " d.result = "changed" assert content != tree.content assert content == 'ce' assert tree.content == 'recently changed'