def test_insert_nodes(self): tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0) trans_table = {'A': insert(0, node_maker('c', '=>'))} traverse(tree, trans_table) result1 = tree.serialize() assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1 trans_table = {'A': insert(4, node_maker('d', '<='))} traverse(tree, trans_table) result2 = tree.serialize() assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2 trans_table = {'A': insert(-2, node_maker('e', '|'))} traverse(tree, trans_table) result3 = tree.serialize() assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3 tree = parse_sxpr('(A "")').with_pos(0) trans_table = {'A': insert(0, node_maker('B', 'b'))} traverse(tree, trans_table) result4 = tree.serialize() assert result4 == '(A (B "b"))' tree = parse_sxpr('(A "")').with_pos(0) trans_table = {'A': insert(lambda ctx: None, node_maker('B', 'b'))} traverse(tree, trans_table) result5 = tree.serialize() assert result5 == '(A)'
def test_move_adjacent3(self): sentence = parse_sxpr( '(SENTENCE (:Whitespace " ") (:Whitespace " ") ' '(TEXT (PHRASE "Guten Tag") (:Whitespace " ")))') transformations = { 'TEXT': move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE) } traverse(sentence, transformations)
def test_blocking(self): tree = copy.deepcopy(TestOptimizations.model) transtable = { '<': BLOCK_ANONYMOUS_LEAVES, 'number': [merge_leaves, reduce_single_child], ':RegExp': self.raise_error } traverse(tree, transtable) assert tree.equals( parse_sxpr( '(array (number "1") (number "2.0") (string "a string"))'))
def test_remove_tokens(self): cst = parse_sxpr( '(wortarten (:Text "ajektiv") (:Text "et") (:Text "praeposition"))' ) ast_table = {"wortarten": [remove_tokens({"et"})], "*": []} traverse(cst, ast_table) cst1 = cst.as_sxpr() assert cst1.find('et') < 0 ast_table = {"wortarten": [remove_tokens("et")], "*": []} traverse(cst, ast_table) assert cst1 == cst.as_sxpr()
def test_apply_if(self): tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0) trans_table = { 'B': [ apply_if( (change_tag_name('X'), add_attributes({'renamed': 'True' })), is_one_of('B')) ] } traverse(tree, trans_table) assert flatten_sxpr(tree.as_sxpr( )) == '(A (X `(renamed "True") "1") (C "1") (X `(renamed "True") "2"))'
def test_complex_delimiter(self): tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0) nm = node_maker('d', (node_maker('c', ','), node_maker('l', ' '))) n = nm() trans_table = { 'A': delimit_children( node_maker('d', (node_maker('c', ','), node_maker('l', ' ')))) } traverse(tree, trans_table) original_result = tree.serialize() assert original_result \ == '(A (B "1") (d (c ",") (l " ")) (B "2") (d (c ",") (l " ")) (B "3"))', \ original_result
def test_move_adjacent(self): sentence = parse_sxpr( '(SENTENCE (WORD (LETTERS "To") (:Whitespace " ")) ' '(WORD (LETTERS "be") (:Whitespace " ")) ' '(WORD (LETTERS "or") (:Whitespace " ")) ' '(WORD (LETTERS "not") (:Whitespace " ")) ' '(WORD (LETTERS "to") (:Whitespace " "))' '(WORD (LETTERS "be") (:Whitespace " ")))') transformations = { 'WORD': move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE) } traverse(sentence, transformations) assert tree_sanity_check(sentence) assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence))
def test_positions_of(self): tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0) assert positions_of([tree], 'A') == () assert positions_of([tree], 'X') == () assert positions_of([tree], 'C') == (1, ) assert positions_of([tree], 'B') == (0, 2) tree = parse_sxpr('(A (B 1) (C 2) (D 3))').with_pos(0) trans_table = {'A': insert(positions_of('D'), node_maker('X', '0'))} traverse(tree, trans_table) result1 = tree.serialize() assert result1 == '(A (B "1") (C "2") (X "0") (D "3"))', result1 trans_table = {'A': insert(positions_of('Z'), node_maker('X', '0'))} traverse(tree, trans_table) result2 = tree.serialize() assert result2 == '(A (B "1") (C "2") (X "0") (D "3"))', result2
def test_equality2(self): ebnf = '@literalws = right\nterm = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~' att = { "term": [ remove_empty, remove_whitespace, replace_by_single_child, flatten ], "factor": [remove_empty, remove_whitespace, reduce_single_child], "*": [remove_empty, remove_whitespace, replace_by_single_child] } parser = grammar_provider(ebnf)() tree = parser("20 / 4 * 3") traverse(tree, att) compare_tree = parse_sxpr( "(term (term (factor 20) (:Text /) (factor 4)) (:Text *) (factor 3))" ) assert tree.equals(compare_tree), tree.as_sxpr()
def test_traverse_locally(self): cst = parse_sxpr(""" (Lemma (LemmaVariante (LAT_WORT (:RegExp "facitercula" ) (:Whitespace " " ) ) (Zusatz (DEU_WORT "sim." ) ) ) (Hinweis (LAT_WORT (:RegExp "bona" ) (:Whitespace " " ) ) (LAT_WORT (:RegExp "fide" ) ) ) )""") LemmaVariante_transformations = { "LAT_WORT": [remove_whitespace, reduce_single_child], "Zusatz": [reduce_single_child] } global_tansformations = { "LemmaVariante": [traverse_locally(LemmaVariante_transformations)], "Hinweis": [collapse] } traverse(cst, global_tansformations) # whitespace after "facitergula", but not after "bona" should have been removed assert str(cst) == "faciterculasim.bona fide"
def test_merge_adjacent(self): sentence = parse_sxpr('(SENTENCE (TEXT "Guten") (L " ") (TEXT "Tag") ' ' (T "\n") (TEXT "Hallo") (L " ") (TEXT "Welt")' ' (T "\n") (L " "))') transformations = { 'SENTENCE': merge_adjacent(is_one_of('TEXT', 'L'), 'TEXT') } traverse(sentence, transformations) assert tree_sanity_check(sentence) assert sentence.pick_child('TEXT').result == "Guten Tag" assert sentence[2].result == "Hallo Welt" assert sentence[-1].tag_name == 'L' assert 'T' in sentence # leaf nodes should be left untouched sentence = parse_sxpr('(SENTENCE "Hallo Welt")') traverse(sentence, transformations) assert sentence.content == "Hallo Welt", sentence.content
def test_move_adjacent2(self): sentence = parse_sxpr( '(SENTENCE (WORD (LETTERS "To") (:Whitespace " ")) ' '(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")) ' '(WORD (:Whitespace " ") (LETTERS "or") (:Whitespace " ")) ' '(WORD (:Whitespace " ") (LETTERS "not") (:Whitespace "a") (:Whitespace "b")) ' '(:Whitespace "c")' '(WORD (:Whitespace "d") (:Whitespace "e") (LETTERS "to") (:Whitespace " "))' '(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")))') transformations = { 'WORD': move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE) } traverse(sentence, transformations) assert tree_sanity_check(sentence) assert sentence.content.find('abcde') >= 0 assert all(i % 2 == 0 or node.tag_name == ':Whitespace' for i, node in enumerate(sentence)) assert all(i % 2 != 0 or ( node.tag_name == "WORD" and ":Whitespace" not in node) for i, node in enumerate(sentence))
def grammar_unit(test_unit, parser_factory, transformer_factory, report='REPORT', verbose=False): """ Unit tests for a grammar-parser and ast transformations. """ output = [] def write(s): nonlocal output """Append string `s` to output. The purpose is to defer printing to stdout in order to avoid muddled output when several unit tests run at the same time.""" output.append(s) def clean_key(k): try: return k.replace('*', '') except AttributeError: return k def get(tests, category, key) -> str: try: value = tests[category][key] if key in tests[category] \ else tests[category][clean_key(key)] except KeyError: return '' # raise AssertionError('%s-test %s for parser %s missing !?' # % (category, test_name, parser_name)) return value if isinstance(test_unit, str): _, unit_name = os.path.split(os.path.splitext(test_unit)[0]) test_unit = unit_from_file(test_unit) else: unit_name = 'unit_test_' + str(id(test_unit)) if verbose: write("\nGRAMMAR TEST UNIT: " + unit_name) errata = [] parser = parser_factory() transform = transformer_factory() def has_lookahead(parser_name: str) -> bool: """Returns True if the parser or any of its descendant parsers is a Lookahead parser.""" return parser[parser_name].apply( lambda ctx: isinstance(ctx[-1], Lookahead)) # lookahead_found = False # # def find_lookahead(p: Parser): # nonlocal lookahead_found # if not lookahead_found: # lookahead_found = isinstance(p, Lookahead) # # parser[parser_name].apply(find_lookahead) # return lookahead_found def lookahead_artifact(syntax_tree: Node): """ Returns True, if the error merely occurred, because the parser stopped in front of a sequence that was captured by a lookahead operator or if a mandatory lookahead failed at the end of data. This is required for testing of parsers that put a lookahead operator at the end. See test_testing.TestLookahead. """ if not get_config_value('test_suppress_lookahead_failures'): return False raw_errors = cast(RootNode, syntax_tree).errors_sorted is_artifact = ( {e.code for e in raw_errors} <= { PARSER_LOOKAHEAD_FAILURE_ONLY, AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY } or (len(raw_errors) == 1 and (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY # case 2: mandatory lookahead failure at end of text or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF))) if is_artifact: # don't remove zombie node with error message at the end # but change it's tag_name to indicate that it is an artifact! for parent in syntax_tree.select_if( lambda node: any(child.tag_name == ZOMBIE_TAG for child in node.children), include_root=True, reverse=True): zombie = parent.pick_child(ZOMBIE_TAG) zombie.tag_name = TEST_ARTIFACT zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \ 'tree structure may not be the same as in a non-testing ' \ 'environment, when a testing artifact has occurred!' # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG) break return is_artifact for parser_name, tests in test_unit.items(): # if not get_config_value('test_parallelization'): # print(' Testing parser: ' + parser_name) track_history = get_config_value('history_tracking') try: if has_lookahead(parser_name): set_tracer(all_descendants(parser[parser_name]), trace_history) track_history = True except AttributeError: pass assert parser_name, "Missing parser name in test %s!" % unit_name assert not any(test_type in RESULT_STAGES for test_type in tests), \ ("Test %s in %s already has results. Use reset_unit() before running again!" % (parser_name, unit_name)) assert set(tests.keys()).issubset(UNIT_STAGES), \ 'Unknown test-types: %s ! Must be one of %s' \ % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES) if verbose: write(' Match-Tests for parser "' + parser_name + '"') match_tests = set(tests['match'].keys()) if 'match' in tests else set() if 'ast' in tests: ast_tests = set(tests['ast'].keys()) if not {clean_key(k) for k in ast_tests} <= {clean_key(k) for k in match_tests}: raise AssertionError( 'AST-Tests %s for parser %s lack corresponding match-tests!' % (str(ast_tests - match_tests), parser_name)) if 'cst' in tests: cst_tests = set(tests['cst'].keys()) if not {clean_key(k) for k in cst_tests} <= {clean_key(k) for k in match_tests}: raise AssertionError( 'CST-Tests %s lack corresponding match-tests!' % str(cst_tests - match_tests)) # run match tests for test_name, test_code in tests.get('match', dict()).items(): # if not get_config_value('test_parallelization'): # print(' Test: ' + str(test_name)) errflag = len(errata) try: cst = parser(test_code, parser_name) except AttributeError as upe: cst = RootNode() cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe)) clean_test_name = str(test_name).replace('*', '') tests.setdefault('__cst__', {})[test_name] = cst errors = [] # type: List[Error] if is_error(cst.error_flag) and not lookahead_artifact(cst): errors = [ e for e in cst.errors_sorted if e.code not in POSSIBLE_ARTIFACTS ] errata.append( 'Match test "%s" for parser "%s" failed:' '\nExpr.: %s\n\n%s\n\n' % (test_name, parser_name, md_codeblock(test_code), '\n'.join(str(m).replace('\n', '\n') for m in errors))) if "ast" in tests or report: ast = copy.deepcopy(cst) old_errors = set(ast.errors) traverse(ast, {'*': remove_children({TEST_ARTIFACT})}) try: transform(ast) except AssertionError as e: e.args = ('Test %s of parser %s failed, because:\n%s' % (test_name, parser_name, e.args[0]), ) raise e tests.setdefault('__ast__', {})[test_name] = ast ast_errors = [e for e in ast.errors if e not in old_errors] ast_errors.sort(key=lambda e: e.pos) if is_error( max(e.code for e in ast_errors) if ast_errors else 0): if ast_errors: if errata: errata[-1] = errata[-1].rstrip('\n') ast_errors.append('\n') errata.append('\t' + '\n\t'.join( str(msg).replace('\n', '\n\t\t') for msg in ast_errors)) if verbose: infostr = ' match-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) if "cst" in tests and len(errata) == errflag: try: compare = parse_tree(get(tests, "cst", test_name)) except ValueError as e: raise SyntaxError( 'CST-TEST "%s" of parser "%s" failed with:\n%s' % (test_name, parser_name, str(e))) if compare: if not compare.equals(cst): errata.append( 'Concrete syntax tree test "%s" for parser "%s" failed:\n%s' % (test_name, parser_name, cst.serialize('cst'))) if verbose: infostr = ' cst-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) if "ast" in tests and len(errata) == errflag: try: compare = parse_tree(get(tests, "ast", test_name)) except ValueError as e: raise SyntaxError( 'AST-TEST "%s" of parser "%s" failed with:\n%s' % (test_name, parser_name, str(e))) if compare: traverse(compare, {'*': remove_children({TEST_ARTIFACT})}) if not compare.equals( ast): # no worry: ast is defined if "ast" in tests ast_str = flatten_sxpr(ast.as_sxpr()) compare_str = flatten_sxpr(compare.as_sxpr()) # differ = difflib.Differ() # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n'])) errata.append( 'Abstract syntax tree test "%s" for parser "%s" failed:' '\n\tExpr.: %s\n\tExpected: %s\n\tReceived: %s' % (test_name, parser_name, '\n\t'.join( test_code.split('\n')), compare_str, ast_str)) if verbose: infostr = ' ast-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) if len(errata) > errflag: tests.setdefault('__err__', {})[test_name] = errata[-1] # write parsing-history log only in case of failure! if is_logging() and track_history: with local_log_dir('./LOGS'): log_parsing_history( parser, "match_%s_%s.log" % (parser_name, clean_test_name)) if verbose and 'fail' in tests: write(' Fail-Tests for parser "' + parser_name + '"') # run fail tests for test_name, test_code in tests.get('fail', dict()).items(): errflag = len(errata) try: cst = parser(test_code, parser_name) except AttributeError as upe: node = Node(ZOMBIE_TAG, "").with_pos(0) cst = RootNode(node).new_error(node, str(upe)) errata.append('Unknown parser "{}" in fail test "{}"!'.format( parser_name, test_name)) tests.setdefault('__err__', {})[test_name] = errata[-1] if "ast" in tests or report: traverse(cst, {'*': remove_children({TEST_ARTIFACT})}) transform(cst) if not (is_error(cst.error_flag) and not lookahead_artifact(cst)): errata.append( 'Fail test "%s" for parser "%s" yields match instead of ' 'expected failure!\n' % (test_name, parser_name)) tests.setdefault('__err__', {})[test_name] = errata[-1] # write parsing-history log only in case of test-failure if is_logging() and track_history: with local_log_dir('./LOGS'): log_parsing_history( parser, "fail_%s_%s.log" % (parser_name, test_name)) if cst.error_flag: tests.setdefault('__msg__', {})[test_name] = \ "\n".join(str(e) for e in cst.errors_sorted) if verbose: infostr = ' fail-test "' + test_name + '" ... ' write(infostr + ("OK" if len(errata) == errflag else "FAIL")) # remove tracers, in case there are any: set_tracer(all_descendants(parser.root_parser__), None) # write test-report if report: test_report = get_report(test_unit) if test_report: try: os.mkdir(report) # is a process-Lock needed, here? except FileExistsError: pass with open(os.path.join(report, unit_name + '.md'), 'w', encoding='utf8') as f: f.write(test_report) f.flush() print('\n'.join(output)) return errata
def test_add_delimiter(self): tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0) trans_table = {'A': delimit_children(node_maker('c', ','))} traverse(tree, trans_table) original_result = tree.serialize(how='S-expression') assert original_result == '(A (B "1") (c ",") (B "2") (c ",") (B "3"))', original_result