Пример #1
0
    def test_insert_nodes(self):
        tree = parse_sxpr('(A (B 1) (B 2) (X 3))').with_pos(0)
        trans_table = {'A': insert(0, node_maker('c', '=>'))}
        traverse(tree, trans_table)
        result1 = tree.serialize()
        assert result1 == '(A (c "=>") (B "1") (B "2") (X "3"))', result1

        trans_table = {'A': insert(4, node_maker('d', '<='))}
        traverse(tree, trans_table)
        result2 = tree.serialize()
        assert result2 == '(A (c "=>") (B "1") (B "2") (X "3") (d "<="))', result2
        trans_table = {'A': insert(-2, node_maker('e', '|'))}
        traverse(tree, trans_table)
        result3 = tree.serialize()
        assert result3 == '(A (c "=>") (B "1") (B "2") (e "|") (X "3") (d "<="))', result3

        tree = parse_sxpr('(A "")').with_pos(0)
        trans_table = {'A': insert(0, node_maker('B', 'b'))}
        traverse(tree, trans_table)
        result4 = tree.serialize()
        assert result4 == '(A (B "b"))'

        tree = parse_sxpr('(A "")').with_pos(0)
        trans_table = {'A': insert(lambda ctx: None, node_maker('B', 'b'))}
        traverse(tree, trans_table)
        result5 = tree.serialize()
        assert result5 == '(A)'
Пример #2
0
 def test_move_adjacent3(self):
     sentence = parse_sxpr(
         '(SENTENCE  (:Whitespace " ") (:Whitespace " ")  '
         '(TEXT (PHRASE "Guten Tag") (:Whitespace " ")))')
     transformations = {
         'TEXT':
         move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE)
     }
     traverse(sentence, transformations)
Пример #3
0
 def test_blocking(self):
     tree = copy.deepcopy(TestOptimizations.model)
     transtable = {
         '<': BLOCK_ANONYMOUS_LEAVES,
         'number': [merge_leaves, reduce_single_child],
         ':RegExp': self.raise_error
     }
     traverse(tree, transtable)
     assert tree.equals(
         parse_sxpr(
             '(array (number "1") (number "2.0") (string "a string"))'))
Пример #4
0
 def test_remove_tokens(self):
     cst = parse_sxpr(
         '(wortarten (:Text "ajektiv") (:Text "et") (:Text "praeposition"))'
     )
     ast_table = {"wortarten": [remove_tokens({"et"})], "*": []}
     traverse(cst, ast_table)
     cst1 = cst.as_sxpr()
     assert cst1.find('et') < 0
     ast_table = {"wortarten": [remove_tokens("et")], "*": []}
     traverse(cst, ast_table)
     assert cst1 == cst.as_sxpr()
Пример #5
0
 def test_apply_if(self):
     tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0)
     trans_table = {
         'B': [
             apply_if(
                 (change_tag_name('X'), add_attributes({'renamed': 'True'
                                                        })), is_one_of('B'))
         ]
     }
     traverse(tree, trans_table)
     assert flatten_sxpr(tree.as_sxpr(
     )) == '(A (X `(renamed "True") "1") (C "1") (X `(renamed "True") "2"))'
Пример #6
0
 def test_complex_delimiter(self):
     tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0)
     nm = node_maker('d', (node_maker('c', ','), node_maker('l', ' ')))
     n = nm()
     trans_table = {
         'A':
         delimit_children(
             node_maker('d', (node_maker('c', ','), node_maker('l', ' '))))
     }
     traverse(tree, trans_table)
     original_result = tree.serialize()
     assert original_result \
         == '(A (B "1") (d (c ",") (l " ")) (B "2") (d (c ",") (l " ")) (B "3"))', \
         original_result
Пример #7
0
 def test_move_adjacent(self):
     sentence = parse_sxpr(
         '(SENTENCE (WORD (LETTERS "To") (:Whitespace " ")) '
         '(WORD (LETTERS "be") (:Whitespace " ")) '
         '(WORD (LETTERS "or") (:Whitespace " ")) '
         '(WORD (LETTERS "not") (:Whitespace " ")) '
         '(WORD (LETTERS "to") (:Whitespace " "))'
         '(WORD (LETTERS "be") (:Whitespace " ")))')
     transformations = {
         'WORD':
         move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE)
     }
     traverse(sentence, transformations)
     assert tree_sanity_check(sentence)
     assert all(i % 2 == 0 or node.tag_name == ':Whitespace'
                for i, node in enumerate(sentence))
Пример #8
0
    def test_positions_of(self):
        tree = parse_sxpr('(A (B 1) (C 1) (B 2))').with_pos(0)
        assert positions_of([tree], 'A') == ()
        assert positions_of([tree], 'X') == ()
        assert positions_of([tree], 'C') == (1, )
        assert positions_of([tree], 'B') == (0, 2)

        tree = parse_sxpr('(A (B 1) (C 2) (D 3))').with_pos(0)
        trans_table = {'A': insert(positions_of('D'), node_maker('X', '0'))}
        traverse(tree, trans_table)
        result1 = tree.serialize()
        assert result1 == '(A (B "1") (C "2") (X "0") (D "3"))', result1

        trans_table = {'A': insert(positions_of('Z'), node_maker('X', '0'))}
        traverse(tree, trans_table)
        result2 = tree.serialize()
        assert result2 == '(A (B "1") (C "2") (X "0") (D "3"))', result2
Пример #9
0
 def test_equality2(self):
     ebnf = '@literalws = right\nterm = term ("*"|"/") factor | factor\nfactor = /[0-9]+/~'
     att = {
         "term": [
             remove_empty, remove_whitespace, replace_by_single_child,
             flatten
         ],
         "factor": [remove_empty, remove_whitespace, reduce_single_child],
         "*": [remove_empty, remove_whitespace, replace_by_single_child]
     }
     parser = grammar_provider(ebnf)()
     tree = parser("20 / 4 * 3")
     traverse(tree, att)
     compare_tree = parse_sxpr(
         "(term (term (factor 20) (:Text /) (factor 4)) (:Text *) (factor 3))"
     )
     assert tree.equals(compare_tree), tree.as_sxpr()
Пример #10
0
 def test_traverse_locally(self):
     cst = parse_sxpr("""
         (Lemma
             (LemmaVariante
                 (LAT_WORT
                     (:RegExp
                         "facitercula"
                     )
                     (:Whitespace
                         " "
                     )
                 )
                 (Zusatz
                     (DEU_WORT
                         "sim."
                     )
                 )
             )
             (Hinweis
                 (LAT_WORT
                     (:RegExp
                         "bona"
                     )
                     (:Whitespace
                         " "
                     )
                 )
                 (LAT_WORT
                     (:RegExp
                         "fide"
                     )
                 )
             )
         )""")
     LemmaVariante_transformations = {
         "LAT_WORT": [remove_whitespace, reduce_single_child],
         "Zusatz": [reduce_single_child]
     }
     global_tansformations = {
         "LemmaVariante": [traverse_locally(LemmaVariante_transformations)],
         "Hinweis": [collapse]
     }
     traverse(cst, global_tansformations)
     # whitespace after "facitergula", but not after "bona" should have been removed
     assert str(cst) == "faciterculasim.bona fide"
Пример #11
0
    def test_merge_adjacent(self):
        sentence = parse_sxpr('(SENTENCE (TEXT "Guten") (L " ") (TEXT "Tag") '
                              ' (T "\n") (TEXT "Hallo") (L " ") (TEXT "Welt")'
                              ' (T "\n") (L " "))')
        transformations = {
            'SENTENCE': merge_adjacent(is_one_of('TEXT', 'L'), 'TEXT')
        }
        traverse(sentence, transformations)
        assert tree_sanity_check(sentence)
        assert sentence.pick_child('TEXT').result == "Guten Tag"
        assert sentence[2].result == "Hallo Welt"
        assert sentence[-1].tag_name == 'L'
        assert 'T' in sentence

        # leaf nodes should be left untouched
        sentence = parse_sxpr('(SENTENCE "Hallo Welt")')
        traverse(sentence, transformations)
        assert sentence.content == "Hallo Welt", sentence.content
Пример #12
0
 def test_move_adjacent2(self):
     sentence = parse_sxpr(
         '(SENTENCE (WORD (LETTERS "To") (:Whitespace " ")) '
         '(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")) '
         '(WORD (:Whitespace " ") (LETTERS "or") (:Whitespace " ")) '
         '(WORD (:Whitespace " ") (LETTERS "not") (:Whitespace "a") (:Whitespace "b")) '
         '(:Whitespace "c")'
         '(WORD (:Whitespace "d") (:Whitespace "e") (LETTERS "to") (:Whitespace " "))'
         '(WORD (:Whitespace " ") (LETTERS "be") (:Whitespace " ")))')
     transformations = {
         'WORD':
         move_adjacent(lambda ctx: ctx[-1].tag_name == WHITESPACE_PTYPE)
     }
     traverse(sentence, transformations)
     assert tree_sanity_check(sentence)
     assert sentence.content.find('abcde') >= 0
     assert all(i % 2 == 0 or node.tag_name == ':Whitespace'
                for i, node in enumerate(sentence))
     assert all(i % 2 != 0 or (
         node.tag_name == "WORD" and ":Whitespace" not in node)
                for i, node in enumerate(sentence))
Пример #13
0
def grammar_unit(test_unit,
                 parser_factory,
                 transformer_factory,
                 report='REPORT',
                 verbose=False):
    """
    Unit tests for a grammar-parser and ast transformations.
    """
    output = []

    def write(s):
        nonlocal output
        """Append string `s` to output. The purpose is to defer printing to
        stdout in order to avoid muddled output when several unit tests run
        at the same time."""
        output.append(s)

    def clean_key(k):
        try:
            return k.replace('*', '')
        except AttributeError:
            return k

    def get(tests, category, key) -> str:
        try:
            value = tests[category][key] if key in tests[category] \
                else tests[category][clean_key(key)]
        except KeyError:
            return ''
            # raise AssertionError('%s-test %s for parser %s missing !?'
            #                      % (category, test_name, parser_name))
        return value

    if isinstance(test_unit, str):
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
        test_unit = unit_from_file(test_unit)
    else:
        unit_name = 'unit_test_' + str(id(test_unit))
    if verbose:
        write("\nGRAMMAR TEST UNIT: " + unit_name)
    errata = []
    parser = parser_factory()
    transform = transformer_factory()

    def has_lookahead(parser_name: str) -> bool:
        """Returns True if the parser or any of its descendant parsers is a
        Lookahead parser."""
        return parser[parser_name].apply(
            lambda ctx: isinstance(ctx[-1], Lookahead))
        # lookahead_found = False
        #
        # def find_lookahead(p: Parser):
        #     nonlocal lookahead_found
        #     if not lookahead_found:
        #         lookahead_found = isinstance(p, Lookahead)
        #
        # parser[parser_name].apply(find_lookahead)
        # return lookahead_found

    def lookahead_artifact(syntax_tree: Node):
        """
        Returns True, if the error merely occurred, because the parser
        stopped in front of a sequence that was captured by a lookahead
        operator or if a mandatory lookahead failed at the end of data.
        This is required for testing of parsers that put a lookahead
        operator at the end. See test_testing.TestLookahead.
        """
        if not get_config_value('test_suppress_lookahead_failures'):
            return False
        raw_errors = cast(RootNode, syntax_tree).errors_sorted
        is_artifact = (
            {e.code
             for e in raw_errors} <= {
                 PARSER_LOOKAHEAD_FAILURE_ONLY,
                 AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
             } or (len(raw_errors) == 1 and
                   (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                    #  case 2:  mandatory lookahead failure at end of text
                    or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
        if is_artifact:
            # don't remove zombie node with error message at the end
            # but change it's tag_name to indicate that it is an artifact!
            for parent in syntax_tree.select_if(
                    lambda node: any(child.tag_name == ZOMBIE_TAG
                                     for child in node.children),
                    include_root=True,
                    reverse=True):
                zombie = parent.pick_child(ZOMBIE_TAG)
                zombie.tag_name = TEST_ARTIFACT
                zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                                'tree structure may not be the same as in a non-testing ' \
                                'environment, when a testing artifact has occurred!'
                # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
                break
        return is_artifact

    for parser_name, tests in test_unit.items():
        # if not get_config_value('test_parallelization'):
        #     print('  Testing parser: ' + parser_name)

        track_history = get_config_value('history_tracking')
        try:
            if has_lookahead(parser_name):
                set_tracer(all_descendants(parser[parser_name]), trace_history)
                track_history = True
        except AttributeError:
            pass

        assert parser_name, "Missing parser name in test %s!" % unit_name
        assert not any(test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
        if verbose:
            write('  Match-Tests for parser "' + parser_name + '"')
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not {clean_key(k)
                    for k in ast_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'AST-Tests %s for parser %s lack corresponding match-tests!'
                    % (str(ast_tests - match_tests), parser_name))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not {clean_key(k)
                    for k in cst_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'CST-Tests %s lack corresponding match-tests!' %
                    str(cst_tests - match_tests))

        # run match tests

        for test_name, test_code in tests.get('match', dict()).items():
            # if not get_config_value('test_parallelization'):
            #     print('    Test: ' + str(test_name))

            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                cst = RootNode()
                cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
            clean_test_name = str(test_name).replace('*', '')
            tests.setdefault('__cst__', {})[test_name] = cst
            errors = []  # type: List[Error]
            if is_error(cst.error_flag) and not lookahead_artifact(cst):
                errors = [
                    e for e in cst.errors_sorted
                    if e.code not in POSSIBLE_ARTIFACTS
                ]
                errata.append(
                    'Match test "%s" for parser "%s" failed:'
                    '\nExpr.:  %s\n\n%s\n\n' %
                    (test_name, parser_name, md_codeblock(test_code),
                     '\n'.join(str(m).replace('\n', '\n') for m in errors)))
            if "ast" in tests or report:
                ast = copy.deepcopy(cst)
                old_errors = set(ast.errors)
                traverse(ast, {'*': remove_children({TEST_ARTIFACT})})
                try:
                    transform(ast)
                except AssertionError as e:
                    e.args = ('Test %s of parser %s failed, because:\n%s' %
                              (test_name, parser_name, e.args[0]), )
                    raise e
                tests.setdefault('__ast__', {})[test_name] = ast
                ast_errors = [e for e in ast.errors if e not in old_errors]
                ast_errors.sort(key=lambda e: e.pos)
                if is_error(
                        max(e.code for e in ast_errors) if ast_errors else 0):
                    if ast_errors:
                        if errata: errata[-1] = errata[-1].rstrip('\n')
                        ast_errors.append('\n')
                        errata.append('\t' + '\n\t'.join(
                            str(msg).replace('\n', '\n\t\t')
                            for msg in ast_errors))

            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

            if "cst" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "cst", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'CST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    if not compare.equals(cst):
                        errata.append(
                            'Concrete syntax tree test "%s" for parser "%s" failed:\n%s'
                            % (test_name, parser_name, cst.serialize('cst')))
                    if verbose:
                        infostr = '      cst-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if "ast" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "ast", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'AST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    traverse(compare, {'*': remove_children({TEST_ARTIFACT})})
                    if not compare.equals(
                            ast):  # no worry: ast is defined if "ast" in tests
                        ast_str = flatten_sxpr(ast.as_sxpr())
                        compare_str = flatten_sxpr(compare.as_sxpr())
                        # differ = difflib.Differ()
                        # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n']))
                        errata.append(
                            'Abstract syntax tree test "%s" for parser "%s" failed:'
                            '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                            % (test_name, parser_name, '\n\t'.join(
                                test_code.split('\n')), compare_str, ast_str))
                    if verbose:
                        infostr = '      ast-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if len(errata) > errflag:
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of failure!
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "match_%s_%s.log" % (parser_name, clean_test_name))

        if verbose and 'fail' in tests:
            write('  Fail-Tests for parser "' + parser_name + '"')

        # run fail tests

        for test_name, test_code in tests.get('fail', dict()).items():
            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                node = Node(ZOMBIE_TAG, "").with_pos(0)
                cst = RootNode(node).new_error(node, str(upe))
                errata.append('Unknown parser "{}" in fail test "{}"!'.format(
                    parser_name, test_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
            if "ast" in tests or report:
                traverse(cst, {'*': remove_children({TEST_ARTIFACT})})
                transform(cst)
            if not (is_error(cst.error_flag) and not lookahead_artifact(cst)):
                errata.append(
                    'Fail test "%s" for parser "%s" yields match instead of '
                    'expected failure!\n' % (test_name, parser_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of test-failure
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "fail_%s_%s.log" % (parser_name, test_name))
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.errors_sorted)
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

    # remove tracers, in case there are any:
    set_tracer(all_descendants(parser.root_parser__), None)

    # write test-report
    if report:
        test_report = get_report(test_unit)
        if test_report:
            try:
                os.mkdir(report)  # is a process-Lock needed, here?
            except FileExistsError:
                pass
            with open(os.path.join(report, unit_name + '.md'),
                      'w',
                      encoding='utf8') as f:
                f.write(test_report)
                f.flush()

    print('\n'.join(output))
    return errata
Пример #14
0
 def test_add_delimiter(self):
     tree = parse_sxpr('(A (B 1) (B 2) (B 3))').with_pos(0)
     trans_table = {'A': delimit_children(node_maker('c', ','))}
     traverse(tree, trans_table)
     original_result = tree.serialize(how='S-expression')
     assert original_result == '(A (B "1") (c ",") (B "2") (c ",") (B "3"))', original_result