Пример #1
0
 def test_as_etree(self):
     import xml.etree.ElementTree as ET
     # import lxml.etree as ET
     sxpr = '(R (A "1") (S (B `(class "bold") "2")) (C "3"))'
     xml = '<R><A>1</A><S><B class="bold">2</B></S><C>3</C></R>'
     node = parse_sxpr(sxpr)
     et = node.as_etree()
     assert ET.tostring(et, encoding="unicode") == xml, ET.tostring(
         et, encoding="unicode")
     node = Node.from_etree(et)
     assert node.as_sxpr() == sxpr
     et = ET.XML(
         '<R>mixed <A>1</A>mode <!-- comment --><B class="italic" /></R>')
     node = Node.from_etree(et)
     expected_sxpr = '(R (:Text "mixed ") (A "1") (:Text "mode ") (B `(class "italic")))'
     assert node.as_sxpr() == expected_sxpr
     et = node.as_etree()
     et = ET.XML(ET.tostring(et, encoding="unicode"))
     node = Node.from_etree(et)
     assert node.as_sxpr() == expected_sxpr
     empty_tags = set()
     tree = parse_xml('<a><b>1<c>2<d />3</c></b>4</a>',
                      out_empty_tags=empty_tags)
     etree = tree.as_etree(empty_tags=empty_tags)
     assert ET.tostring(etree).replace(
         b' /', b'/') == b'<a><b>1<c>2<d/>3</c></b>4</a>'
     tree = Node.from_etree(etree)
     assert flatten_sxpr(tree.as_sxpr()) == \
            '(a (b (:Text "1") (c (:Text "2") (d) (:Text "3"))) (:Text "4"))'
Пример #2
0
    def test_has_parent(self):
        context = [Node('C', 'alpha'), Node('B', 'beta'), Node('A', 'gamma')]
        assert not has_ancestor(context, {'A'}, 1)
        assert has_ancestor(context, {'B'}, 1)
        assert not has_ancestor(context, {'C'}, 1)
        assert has_ancestor(context, {'C'}, 2)

        assert not has_parent(context, {'A'})
        assert has_parent(context, {'B'})
        assert not has_parent(context, {'C'})
Пример #3
0
    def test_attr_serialization_and_parsing(self):
        n = Node('employee', 'James Bond').with_pos(46)
        n.attr['branch'] = 'Secret Service'
        n.attr['id'] = '007'
        # json
        json = n.as_json()
        tree = parse_json_syntaxtree(json)
        # print()

        # XML
        xml = n.as_xml()
        assert xml.find('_pos') < 0
        xml = n.as_xml('')
        assert xml.find('_pos') >= 0
        tree = parse_xml(xml)
        assert tree.pos == 46
        assert not '_pos' in tree.attr
        tree = parse_xml(xml, ignore_pos=True)
        assert '_pos' in tree.attr
        assert tree._pos < 0

        # S-Expression
        sxpr = n.as_sxpr()
        assert sxpr.find('pos') < 0
        sxpr = n.as_sxpr('')
        assert sxpr.find('pos') >= 0
        tree = parse_sxpr(sxpr)
        assert tree.pos == 46
        assert not 'pos' in tree.attr
Пример #4
0
 def test_with_pos(self):
     nd = Node('A', '123')
     nd._pos = 0
     n1 = Node('B', '')
     n2 = Node('C', '456')
     root = Node('root', (nd, n1, n2))
     root.with_pos(0)
     assert len(root) == root.children[-1].pos + len(root.children[-1])
Пример #5
0
 def test_json_roundtrip(self):
     s = self.tree.as_json(indent=None, ensure_ascii=True)
     tree_copy = Node.from_json_obj(json.loads(s))
     assert tree_copy.equals(self.tree,
                             ignore_attr_order=sys.version_info < (3, 6))
     s = self.tree.as_json(indent=2, ensure_ascii=False)
     tree_copy = Node.from_json_obj(json.loads(s))
     assert tree_copy.equals(self.tree,
                             ignore_attr_order=sys.version_info < (3, 6))
     s = self.tree.as_json(indent=None, ensure_ascii=False)
     tree_copy = parse_json_syntaxtree(s)
     # print(s)
     # print(self.tree.as_sxpr())
     # print(tree_copy.as_sxpr())
     assert tree_copy.equals(self.tree)
Пример #6
0
 def test_json_obj_roundtrip(self):
     json_obj_tree = self.tree.to_json_obj()
     tree_copy = Node.from_json_obj(json_obj_tree)
     # print(json_obj_tree)
     # print(json.dumps(json_obj_tree, ensure_ascii=False))
     # print(json.loads(json.dumps(json_obj_tree, ensure_ascii=False)))
     assert tree_copy.equals(
         self.tree
     ), '\n' + tree_copy.as_sxpr() + '\n' + self.tree.as_sxpr()
Пример #7
0
 def fallback_compiler(self,
                       node: Node,
                       block_attribute_visitors: bool = False) -> Any:
     """This is a generic compiler function which will be called on
     all those node types for which no compiler method `on_XXX` has
     been defined."""
     replacements = {}  # type: Dict[Node, Node]
     if node.children:
         for child in node.children:
             nd = self.compile(child)
             if id(nd) != id(child):
                 replacements[id(child)] = nd
             if nd is not None and not isinstance(nd, Node):
                 tn = node.tag_name
                 raise TypeError(
                     'Fallback compiler for Node `%s` received a value of type '
                     '`%s` from child `%s` instead of the required return type `Node`. '
                     'Override `DHParser.compile.Compiler.fallback_compiler()` or add '
                     'method `on_%s(self, node)` in class `%s` to avoid this error!'
                     % (tn, str(type(nd)), child.tag_name, tn,
                        self.__class__.__name__))
         if replacements:
             # replace Nodes the identity of which has been changed during transformation
             # and drop any returned None-results
             result = []
             for child in node.children:
                 nd = replacements.get(id(child), child)
                 if nd is not None and nd.tag_name != EMPTY_PTYPE:
                     result.append(nd)
             node.result = tuple(result)
     if self.has_attribute_visitors and not block_attribute_visitors and node.has_attr(
     ):
         for attribute, value in node.attr.items():
             try:
                 attribute_visitor = self.__getattribute__(
                     attr_visitor_name(attribute))
                 node = attribute_visitor(node, value) or node
             except AttributeError:
                 pass
     return node
Пример #8
0
 def test_single_child_selection(self):
     tree = parse_sxpr('(A (B 1) (C 1) (B 2))')
     assert 'B' in tree
     assert 'X' not in tree
     assert tree.pick_child('B').equals(Node('B', '1'))
     item_w_value_2 = lambda nd: nd.content == '2'
     assert item_w_value_2 in tree
     item_w_value_4 = lambda nd: nd.content == '4'
     assert item_w_value_4 not in tree
     assert tree[item_w_value_2].equals(Node('B', '2'))
     try:
         _ = tree[item_w_value_4]
         assert False
     except KeyError:
         pass
     assert tree.get('B', EMPTY_NODE).equals(Node('B', '1'))
     assert tree.get(item_w_value_2, EMPTY_NODE).equals(Node('B', '2'))
     assert tree.get(item_w_value_4, EMPTY_NODE).equals(EMPTY_NODE)
     assert tree.index('C') == 1
     try:
         tree.index('X')
         assert False
     except ValueError:
         pass
Пример #9
0
 def test_attr_error_reporting_and_fixing(self):
     n = Node('tag', 'content').with_attr(faulty='<&"')
     set_config_value('xml_attribute_error_handling', 'fail')
     try:
         s = n.as_xml()
         assert False, "ValueError expected"
     except ValueError:
         pass
     set_config_value('xml_attribute_error_handling', 'fix')
     assert n.as_xml(
     ) == '''<tag faulty='&lt;&amp;"'>content</tag>''', n.as_xml()
     set_config_value('xml_attribute_error_handling', 'ignore')
     assert n.as_xml() == '''<tag faulty='<&"'>content</tag>'''
     n.attr['nonascii'] = 'ἱεραρχικωτάτου'
     set_config_value('xml_attribute_error_handling', 'lxml')
     assert n.as_xml(
     ) == '''<tag faulty='&lt;&amp;"' nonascii="??????????????">content</tag>'''
Пример #10
0
 def test_len_and_pos(self):
     """Test len-property of Node."""
     nd1 = Node(ZOMBIE_TAG, "123")
     assert len(nd1) == 3, "Expected Node.len == 3, got %i" % len(nd1)
     nd2 = Node(ZOMBIE_TAG, "456")
     assert len(nd2) == 3, "Expected Node.len == 3, got %i" % len(nd2)
     nd = Node(ZOMBIE_TAG, (nd1, nd2))
     assert len(nd) == 6, "Expected Node.len == 6, got %i" % len(nd)
     nd.with_pos(0)
     assert nd.pos == 0, "Expected Node.pos == 0, got %i" % nd.pos
     assert nd1.pos == 0, "Expected Node.pos == 0, got %i" % nd1.pos
     assert nd2.pos == 3, "Expected Node.pos == 3, got %i" % nd2.pos
Пример #11
0
    def test_repr(self):
        assert repr(Node('test1', 'content1')) == "Node('test1', 'content1')"
        assert repr(Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))) \
            == "Node('test2', (Node('child1', 'content1'), Node('child2', 'content2')))"

        test3 = Node('test', '').with_attr(attr='value')
        assert repr(test3) == "Node('test', '')"
        assert test3.repr == "Node('test', '').with_attr({'attr': 'value'})"

        test4 = Node('test', '').with_pos(0).with_attr(attr='value')
        assert repr(test4) == "Node('test', '')"
        assert test4.repr == "Node('test', '').with_attr({'attr': 'value'}).with_pos(0)"
Пример #12
0
 def test_swap_attributes(self):
     A = Node('A', '')
     B = Node('B', '')
     A.attr['x'] = 'x'
     swap_attributes(A, B)
     assert not A.attr
     assert B.attr['x'] == 'x'
     swap_attributes(A, B)
     assert not B.attr
     assert A.attr['x'] == 'x'
     B.attr['y'] = 'y'
     swap_attributes(A, B)
     assert A.attr['y'] == 'y'
     assert B.attr['x'] == 'x'
Пример #13
0
 def test_get_context(self):
     tree = parse_sxpr('(A (F (X "a") (Y "b")) (G "c"))')
     nd_X = tree.pick('X')
     ctx = tree.reconstruct_context(nd_X)
     assert [nd.tag_name for nd in ctx] == ['A', 'F', 'X']
     nd_F = tree.pick('F')
     nd_Y = tree.pick('Y')
     ctx = nd_F.reconstruct_context(nd_Y)
     assert [nd.tag_name for nd in ctx] == ['F', 'Y']
     ctx = tree.reconstruct_context(nd_F)
     assert [nd.tag_name for nd in ctx] == ['A', 'F']
     ctx = tree.reconstruct_context(nd_Y)
     assert [nd.tag_name for nd in ctx] == ['A', 'F', 'Y']
     nd_G = tree.pick('G')
     ctx = tree.reconstruct_context(nd_G)
     assert [nd.tag_name for nd in ctx] == ['A', 'G']
     not_there = Node('not_there', '')
     try:
         tree.reconstruct_context(not_there)
         assert False, "ValueError expected!"
     except ValueError:
         pass
     assert tree.reconstruct_context(tree) == [tree]
Пример #14
0
 def lookahead_artifact(syntax_tree: Node):
     """
     Returns True, if the error merely occurred, because the parser
     stopped in front of a sequence that was captured by a lookahead
     operator or if a mandatory lookahead failed at the end of data.
     This is required for testing of parsers that put a lookahead
     operator at the end. See test_testing.TestLookahead.
     """
     if not get_config_value('test_suppress_lookahead_failures'):
         return False
     raw_errors = cast(RootNode, syntax_tree).errors_sorted
     is_artifact = (
         {e.code
          for e in raw_errors} <= {
              PARSER_LOOKAHEAD_FAILURE_ONLY,
              AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
          } or (len(raw_errors) == 1 and
                (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                 #  case 2:  mandatory lookahead failure at end of text
                 or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
     if is_artifact:
         # don't remove zombie node with error message at the end
         # but change it's tag_name to indicate that it is an artifact!
         for parent in syntax_tree.select_if(
                 lambda node: any(child.tag_name == ZOMBIE_TAG
                                  for child in node.children),
                 include_root=True,
                 reverse=True):
             zombie = parent.pick_child(ZOMBIE_TAG)
             zombie.tag_name = TEST_ARTIFACT
             zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                             'tree structure may not be the same as in a non-testing ' \
                             'environment, when a testing artifact has occurred!'
             # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
             break
     return is_artifact
Пример #15
0
 def test_xml_sanitizer(self):
     node = Node('tag', '<&>')
     assert node.as_xml() == '<tag>&lt;&amp;&gt;</tag>'
Пример #16
0
def grammar_unit(test_unit,
                 parser_factory,
                 transformer_factory,
                 report='REPORT',
                 verbose=False):
    """
    Unit tests for a grammar-parser and ast transformations.
    """
    output = []

    def write(s):
        nonlocal output
        """Append string `s` to output. The purpose is to defer printing to
        stdout in order to avoid muddled output when several unit tests run
        at the same time."""
        output.append(s)

    def clean_key(k):
        try:
            return k.replace('*', '')
        except AttributeError:
            return k

    def get(tests, category, key) -> str:
        try:
            value = tests[category][key] if key in tests[category] \
                else tests[category][clean_key(key)]
        except KeyError:
            return ''
            # raise AssertionError('%s-test %s for parser %s missing !?'
            #                      % (category, test_name, parser_name))
        return value

    if isinstance(test_unit, str):
        _, unit_name = os.path.split(os.path.splitext(test_unit)[0])
        test_unit = unit_from_file(test_unit)
    else:
        unit_name = 'unit_test_' + str(id(test_unit))
    if verbose:
        write("\nGRAMMAR TEST UNIT: " + unit_name)
    errata = []
    parser = parser_factory()
    transform = transformer_factory()

    def has_lookahead(parser_name: str) -> bool:
        """Returns True if the parser or any of its descendant parsers is a
        Lookahead parser."""
        return parser[parser_name].apply(
            lambda ctx: isinstance(ctx[-1], Lookahead))
        # lookahead_found = False
        #
        # def find_lookahead(p: Parser):
        #     nonlocal lookahead_found
        #     if not lookahead_found:
        #         lookahead_found = isinstance(p, Lookahead)
        #
        # parser[parser_name].apply(find_lookahead)
        # return lookahead_found

    def lookahead_artifact(syntax_tree: Node):
        """
        Returns True, if the error merely occurred, because the parser
        stopped in front of a sequence that was captured by a lookahead
        operator or if a mandatory lookahead failed at the end of data.
        This is required for testing of parsers that put a lookahead
        operator at the end. See test_testing.TestLookahead.
        """
        if not get_config_value('test_suppress_lookahead_failures'):
            return False
        raw_errors = cast(RootNode, syntax_tree).errors_sorted
        is_artifact = (
            {e.code
             for e in raw_errors} <= {
                 PARSER_LOOKAHEAD_FAILURE_ONLY,
                 AUTORETRIEVED_SYMBOL_NOT_CLEARED, PARSER_LOOKAHEAD_MATCH_ONLY
             } or (len(raw_errors) == 1 and
                   (raw_errors[-1].code == PARSER_LOOKAHEAD_MATCH_ONLY
                    #  case 2:  mandatory lookahead failure at end of text
                    or raw_errors[-1].code == MANDATORY_CONTINUATION_AT_EOF)))
        if is_artifact:
            # don't remove zombie node with error message at the end
            # but change it's tag_name to indicate that it is an artifact!
            for parent in syntax_tree.select_if(
                    lambda node: any(child.tag_name == ZOMBIE_TAG
                                     for child in node.children),
                    include_root=True,
                    reverse=True):
                zombie = parent.pick_child(ZOMBIE_TAG)
                zombie.tag_name = TEST_ARTIFACT
                zombie.result = 'Artifact can be ignored. Be aware, though, that also the ' \
                                'tree structure may not be the same as in a non-testing ' \
                                'environment, when a testing artifact has occurred!'
                # parent.result = tuple(c for c in parent.children if c.tag_name != ZOMBIE_TAG)
                break
        return is_artifact

    for parser_name, tests in test_unit.items():
        # if not get_config_value('test_parallelization'):
        #     print('  Testing parser: ' + parser_name)

        track_history = get_config_value('history_tracking')
        try:
            if has_lookahead(parser_name):
                set_tracer(all_descendants(parser[parser_name]), trace_history)
                track_history = True
        except AttributeError:
            pass

        assert parser_name, "Missing parser name in test %s!" % unit_name
        assert not any(test_type in RESULT_STAGES for test_type in tests), \
            ("Test %s in %s already has results. Use reset_unit() before running again!"
             % (parser_name, unit_name))
        assert set(tests.keys()).issubset(UNIT_STAGES), \
            'Unknown test-types: %s ! Must be one of %s' \
            % (set(tests.keys()) - UNIT_STAGES, UNIT_STAGES)
        if verbose:
            write('  Match-Tests for parser "' + parser_name + '"')
        match_tests = set(tests['match'].keys()) if 'match' in tests else set()
        if 'ast' in tests:
            ast_tests = set(tests['ast'].keys())
            if not {clean_key(k)
                    for k in ast_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'AST-Tests %s for parser %s lack corresponding match-tests!'
                    % (str(ast_tests - match_tests), parser_name))
        if 'cst' in tests:
            cst_tests = set(tests['cst'].keys())
            if not {clean_key(k)
                    for k in cst_tests} <= {clean_key(k)
                                            for k in match_tests}:
                raise AssertionError(
                    'CST-Tests %s lack corresponding match-tests!' %
                    str(cst_tests - match_tests))

        # run match tests

        for test_name, test_code in tests.get('match', dict()).items():
            # if not get_config_value('test_parallelization'):
            #     print('    Test: ' + str(test_name))

            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                cst = RootNode()
                cst = cst.new_error(Node(ZOMBIE_TAG, "").with_pos(0), str(upe))
            clean_test_name = str(test_name).replace('*', '')
            tests.setdefault('__cst__', {})[test_name] = cst
            errors = []  # type: List[Error]
            if is_error(cst.error_flag) and not lookahead_artifact(cst):
                errors = [
                    e for e in cst.errors_sorted
                    if e.code not in POSSIBLE_ARTIFACTS
                ]
                errata.append(
                    'Match test "%s" for parser "%s" failed:'
                    '\nExpr.:  %s\n\n%s\n\n' %
                    (test_name, parser_name, md_codeblock(test_code),
                     '\n'.join(str(m).replace('\n', '\n') for m in errors)))
            if "ast" in tests or report:
                ast = copy.deepcopy(cst)
                old_errors = set(ast.errors)
                traverse(ast, {'*': remove_children({TEST_ARTIFACT})})
                try:
                    transform(ast)
                except AssertionError as e:
                    e.args = ('Test %s of parser %s failed, because:\n%s' %
                              (test_name, parser_name, e.args[0]), )
                    raise e
                tests.setdefault('__ast__', {})[test_name] = ast
                ast_errors = [e for e in ast.errors if e not in old_errors]
                ast_errors.sort(key=lambda e: e.pos)
                if is_error(
                        max(e.code for e in ast_errors) if ast_errors else 0):
                    if ast_errors:
                        if errata: errata[-1] = errata[-1].rstrip('\n')
                        ast_errors.append('\n')
                        errata.append('\t' + '\n\t'.join(
                            str(msg).replace('\n', '\n\t\t')
                            for msg in ast_errors))

            if verbose:
                infostr = '    match-test "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

            if "cst" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "cst", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'CST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    if not compare.equals(cst):
                        errata.append(
                            'Concrete syntax tree test "%s" for parser "%s" failed:\n%s'
                            % (test_name, parser_name, cst.serialize('cst')))
                    if verbose:
                        infostr = '      cst-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if "ast" in tests and len(errata) == errflag:
                try:
                    compare = parse_tree(get(tests, "ast", test_name))
                except ValueError as e:
                    raise SyntaxError(
                        'AST-TEST "%s" of parser "%s" failed with:\n%s' %
                        (test_name, parser_name, str(e)))
                if compare:
                    traverse(compare, {'*': remove_children({TEST_ARTIFACT})})
                    if not compare.equals(
                            ast):  # no worry: ast is defined if "ast" in tests
                        ast_str = flatten_sxpr(ast.as_sxpr())
                        compare_str = flatten_sxpr(compare.as_sxpr())
                        # differ = difflib.Differ()
                        # difference = ''.join(differ.compare([compare_str + '\n'], [ast_str + '\n']))
                        errata.append(
                            'Abstract syntax tree test "%s" for parser "%s" failed:'
                            '\n\tExpr.:     %s\n\tExpected:  %s\n\tReceived:  %s'
                            % (test_name, parser_name, '\n\t'.join(
                                test_code.split('\n')), compare_str, ast_str))
                    if verbose:
                        infostr = '      ast-test "' + test_name + '" ... '
                        write(infostr +
                              ("OK" if len(errata) == errflag else "FAIL"))

            if len(errata) > errflag:
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of failure!
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "match_%s_%s.log" % (parser_name, clean_test_name))

        if verbose and 'fail' in tests:
            write('  Fail-Tests for parser "' + parser_name + '"')

        # run fail tests

        for test_name, test_code in tests.get('fail', dict()).items():
            errflag = len(errata)
            try:
                cst = parser(test_code, parser_name)
            except AttributeError as upe:
                node = Node(ZOMBIE_TAG, "").with_pos(0)
                cst = RootNode(node).new_error(node, str(upe))
                errata.append('Unknown parser "{}" in fail test "{}"!'.format(
                    parser_name, test_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
            if "ast" in tests or report:
                traverse(cst, {'*': remove_children({TEST_ARTIFACT})})
                transform(cst)
            if not (is_error(cst.error_flag) and not lookahead_artifact(cst)):
                errata.append(
                    'Fail test "%s" for parser "%s" yields match instead of '
                    'expected failure!\n' % (test_name, parser_name))
                tests.setdefault('__err__', {})[test_name] = errata[-1]
                # write parsing-history log only in case of test-failure
                if is_logging() and track_history:
                    with local_log_dir('./LOGS'):
                        log_parsing_history(
                            parser,
                            "fail_%s_%s.log" % (parser_name, test_name))
            if cst.error_flag:
                tests.setdefault('__msg__', {})[test_name] = \
                    "\n".join(str(e) for e in cst.errors_sorted)
            if verbose:
                infostr = '    fail-test  "' + test_name + '" ... '
                write(infostr + ("OK" if len(errata) == errflag else "FAIL"))

    # remove tracers, in case there are any:
    set_tracer(all_descendants(parser.root_parser__), None)

    # write test-report
    if report:
        test_report = get_report(test_unit)
        if test_report:
            try:
                os.mkdir(report)  # is a process-Lock needed, here?
            except FileExistsError:
                pass
            with open(os.path.join(report, unit_name + '.md'),
                      'w',
                      encoding='utf8') as f:
                f.write(test_report)
                f.flush()

    print('\n'.join(output))
    return errata
Пример #17
0
 def test_contains_only_whitespace(self):
     assert contains_only_whitespace([Node('test', ' ')])
     assert contains_only_whitespace([Node('test', '')])
     assert contains_only_whitespace([Node('test', '\n')])
     assert not contains_only_whitespace([Node('test', 'Katze')])
     assert not contains_only_whitespace([Node('test', ' tag ')])
Пример #18
0
 def test_pos_value_of_later_added_nodes(self):
     nd = Node('Test', '').with_pos(0)
     assert nd.pos == 0
     nd.result = (Node('A', 'aaa'), Node('B', 'bbb'))
     assert nd.children[0].pos == 0 and nd.children[1].pos == 3
Пример #19
0
def trace_history(self: Parser,
                  text: StringView) -> Tuple[Optional[Node], StringView]:
    grammar = self._grammar  # type: Grammar
    location = grammar.document_length__ - text._len  # type: int

    if grammar.most_recent_error__:
        # add resume notice (mind that skip notices are added by
        # `parse.MandatoryElementsParser.mandatory_violation()`
        mre = grammar.most_recent_error__  # type: ParserError
        if mre.error.code == RECURSION_DEPTH_LIMIT_HIT:
            return mre.node, text

        grammar.most_recent_error__ = None
        errors = [mre.error]  # type: List[Error]
        text_ = grammar.document__[mre.error.pos:]
        lc = line_col(grammar.document_lbreaks__, mre.error.pos)
        resume_pos = self.grammar.document_length__ - len(text)
        target = text if len(text) <= 10 else text[:7] + '...'

        resumers = [grammar.call_stack__[-1][0]]
        i = 2
        L = len(grammar.call_stack__)
        while resumers[-1][:1] == ':' and i <= L:
            resumers.append(grammar.call_stack__[-i][0])
            i += 1
        resumer = '->'.join(reversed(resumers))

        if mre.first_throw:
            origin = mre.node.tag_name
            if origin[:1] == ':':
                origin = grammar.associated_symbol__(
                    mre.parser).tag_name + '->' + origin
            notice = Error(  # resume notice
                'Resuming from parser "{}" at position {}:{} with parser "{}": {}'
                .format(origin, *lc, resumer,
                        repr(target)), resume_pos, RESUME_NOTICE)
        else:
            notice = Error(  # skip notice
                'Skipping from position {}:{} within parser {}: {}'.format(
                    *lc, resumer, repr(target)), resume_pos, RESUME_NOTICE)
        if grammar.resume_notices__:
            grammar.tree__.add_error(mre.node, notice)
        errors.append(notice)
        grammar.history__.append(
            HistoryRecord(
                getattr(mre, 'frozen_callstack', grammar.call_stack__),
                mre.node, text_,
                line_col(grammar.document_lbreaks__, mre.error.pos), errors))

    grammar.call_stack__.append(
        (((' ' + self.repr) if self.tag_name in (REGEXP_PTYPE, TOKEN_PTYPE,
                                                 ":Retrieve", ":Pop") else
          (self.pname or self.tag_name)),
         location))  # ' ' added to avoid ':' as first char!
    grammar.moving_forward__ = True

    try:
        node, rest = self._parse(text)  # <===== call to the actual parser!
    except ParserError as pe:
        if pe.first_throw:
            pe.frozen_callstack = freeze_callstack(grammar.call_stack__)
            grammar.most_recent_error__ = pe
        if self == grammar.start_parser__ and grammar.most_recent_error__:
            fe = grammar.most_recent_error__  # type: ParserError
            lc = line_col(grammar.document_lbreaks__, fe.error.pos)
            # TODO: get the call stack from when the error occurred, here
            nd = fe.node
            grammar.history__.append(
                HistoryRecord(grammar.call_stack__, nd, fe.rest[len(nd):], lc,
                              [fe.error]))
        grammar.call_stack__.pop()
        raise pe

    # Mind that memoized parser calls will not appear in the history record!
    # Don't track returning parsers except in case an error has occurred!
    if ((self.tag_name != WHITESPACE_PTYPE) and
        (grammar.moving_forward__ or
         (not self.disposable and
          (node or grammar.history__ and grammar.history__[-1].node)))):
        # record history
        # TODO: Make dropping insignificant whitespace from history configurable
        delta = text._len - rest._len
        hnd = Node(node.tag_name,
                   text[:delta]).with_pos(location) if node else None
        lc = line_col(grammar.document_lbreaks__, location)
        record = HistoryRecord(grammar.call_stack__, hnd, rest, lc, [])
        cs_len = len(record.call_stack)
        if (not grammar.history__ or not node
                or lc != grammar.history__[-1].line_col or
                record.call_stack != grammar.history__[-1].call_stack[:cs_len]
                or self == grammar.start_parser__):
            grammar.history__.append(record)

    grammar.moving_forward__ = False
    grammar.call_stack__.pop()
    return node, rest