示例#1
0
    def _skipws(self, head, input_str):

        in_len = len(input_str)
        layout_content_ahead = ''

        if self.layout_parser:
            _, pos = self.layout_parser.parse(input_str, head.position)
            if pos > head.position:
                layout_content_ahead = input_str[head.position:pos]
                head.position = pos
        elif self.ws:
            old_pos = head.position
            try:
                while head.position < in_len \
                      and input_str[head.position] in self.ws:
                    head.position += 1
            except TypeError:
                raise ParserInitError("For parsing non-textual content please "
                                      "set `ws` to `None`.")
            layout_content_ahead = input_str[old_pos:head.position]

        if self.debug:
            content = layout_content_ahead
            if type(layout_content_ahead) is str:
                content = content.replace("\n", "\\n")
            h_print("Skipping whitespaces:", "'{}'".format(content))
            h_print("New position:", pos_to_line_col(input_str, head.position))
        head.layout_content_ahead = layout_content_ahead
示例#2
0
    def _find_lookaheads(self):
        debug = self.debug
        self._active_heads_per_symbol = {}
        while self._active_heads:
            state_id, head = self._active_heads.popitem()
            if head.token_ahead is not None:
                # May happen after error recovery
                self._active_heads_per_symbol.setdefault(
                    head.token_ahead.symbol, {})[head.state.state_id] = head
                continue
            if debug:
                h_print("Finding lookaheads for head {}".format(head),
                        new_line=True)
            self._skipws(head, self.input_str)

            tokens = self._next_tokens(head)

            if debug:
                self._debug_context(head.position,
                                    head.layout_content_ahead,
                                    lookahead_tokens=tokens,
                                    expected_symbols=head.state.actions.keys())

            if tokens:
                while tokens:
                    token = tokens.pop()
                    head = head.for_token(token)
                    self._active_heads_per_symbol.setdefault(
                        token.symbol, {})[head.state.state_id] = head
            else:
                # Can't find lookahead. This head can't progress
                if debug:
                    h_print('No lookaheads found. Killing head.')
示例#3
0
文件: cli.py 项目: morganjk/parglare
def check_get_grammar_table(grammar_file, debug, colors):
    try:
        g = Grammar.from_file(grammar_file,
                              _no_check_recognizers=True,
                              debug_colors=colors)
        if debug:
            g.print_debug()
        table = create_table(g)
        if debug:
            table.print_debug()

        h_print("Grammar OK.")
        if table.sr_conflicts:
            a_print("There are {} Shift/Reduce conflicts.".format(
                len(table.sr_conflicts)))
            prints("Either use 'prefer_shifts' parser mode, try to resolve "
                   "manually or use GLR parsing.".format(
                       len(table.sr_conflicts)))
        if table.rr_conflicts:
            a_print("There are {} Reduce/Reduce conflicts.".format(
                len(table.rr_conflicts)))
            prints("Try to resolve manually or use GLR parsing.")

        if (table.sr_conflicts or table.rr_conflicts) and not debug:
            prints("Run in debug mode to print all the states.")

    except (GrammarError, ParseError) as e:
        print("Error in the grammar file.")
        print(e)
        sys.exit(1)

    return g, table
示例#4
0
    def _do_recovery(self):

        debug = self.debug
        if debug:
            a_print("**Recovery initiated.**")

        head = self.parse_stack[-1]
        error = self.errors[-1]

        if type(self.error_recovery) is bool:
            # Default recovery
            if debug:
                prints("\tDoing default error recovery.")
            successful = self.default_error_recovery(head)
        else:
            # Custom recovery provided during parser construction
            if debug:
                prints("\tDoing custom error recovery.")
            successful = self.error_recovery(head, error)

        # The recovery may either decide to skip erroneous part of
        # the input and resume at the place that can continue or it
        # might decide to fill in missing tokens.
        if successful:
            if debug:
                h_print("Recovery ")
            error.location.context.end_position = head.position
            if debug:
                a_print("New position is ",
                        pos_to_line_col(head.input_str, head.position),
                        level=1)
                a_print("New lookahead token is ", head.token_ahead, level=1)
        return successful
示例#5
0
    def _finish_error_reporting(self):
        # Expected symbols are only those that can cause active heads
        # to shift.
        self._expected = set(h.token_ahead.symbol
                             for h, _ in self._for_shifter)
        if self.debug:
            a_print("*** LEAVING ERROR REPORTING MODE.", new_line=True)
            h_print("Tokens expected:",
                    ', '.join([t.name for t in self._expected]),
                    level=1)
            h_print("Tokens found:", self._tokens_ahead, level=1)

        # After leaving error reporting mode, register error and try
        # recovery if enabled
        context = self._last_shifted_heads[0]
        self.errors.append(
            self._create_error(context,
                               self._expected,
                               tokens_ahead=self._tokens_ahead,
                               symbols_before=list({
                                   h.state.symbol
                                   for h in self._last_shifted_heads
                               }),
                               last_heads=self._last_shifted_heads))

        self.for_shifter = []
        self._in_error_reporting = False
示例#6
0
def compile(ctx, grammar_file):
    debug = ctx.obj['debug']
    colors = ctx.obj['colors']
    prefer_shifts = ctx.obj['prefer_shifts']
    prefer_shifts_over_empty = ctx.obj['prefer_shifts_over_empty']
    h_print('Compiling...')
    compile_get_grammar_table(grammar_file, debug, colors, prefer_shifts,
                              prefer_shifts_over_empty)
示例#7
0
    def _debug_reduce_heads(self):
        heads = list(self.reduced_heads.values())
        h_print("Reduced heads = ", len(heads))
        for head in heads:
            prints("\t{}".format(head))

        heads = list(self.heads_for_reduction.values())
        h_print("Heads for reduction:", len(heads))
        for head in heads:
            prints("\t{}".format(head))
示例#8
0
    def _export__dot_trace(self):
        file_name = "{}_trace.dot".format(self.file_name) \
                    if self.file_name else "parglare_trace.dot"
        with io.open(file_name, 'w', encoding="utf-8") as f:
            f.write(DOT_HEADER)
            f.write(self._dot_trace)
            f.write("}\n")

        prints("Generated file {}.".format(file_name))
        prints("You can use dot viewer or generate pdf with the "
               "following command:")
        h_print("dot -Tpdf -O {}".format(file_name))
示例#9
0
    def calc_conflicts_and_dynamic_terminals(self, debug=False):
        """
        Determine S/R and R/R conflicts and states dynamic terminals.
        """
        self.sr_conflicts = []
        self.rr_conflicts = []

        if debug:
            h_print("Calculating conflicts and dynamic terminals...")

        for state in self.states:

            for term, actions in state.actions.items():

                # Mark state for dynamic disambiguation
                if term.dynamic:
                    state.dynamic.add(term)

                if len(actions) > 1:
                    if actions[0].action in [SHIFT, ACCEPT]:
                        # Create SR conflicts for each S-R pair of actions
                        # except EMPTY reduction as SHIFT will always be
                        # preferred in LR parsing and GLR has a special
                        # handling of EMPTY reduce in order to avoid infinite
                        # looping.
                        for r_act in actions[1:]:

                            # Mark state for dynamic disambiguation
                            if r_act.prod.dynamic:
                                state.dynamic.add(term)

                            self.sr_conflicts.append(
                                SRConflict(state, term,
                                           [x.prod for x in actions[1:]]))
                    else:
                        prods = [x.prod for x in actions if len(x.prod.rhs)]

                        # Mark state for dynamic disambiguation
                        if any([p.dynamic for p in prods]):
                            state.dynamic.add(term)

                        empty_prods = [
                            x.prod for x in actions if not len(x.prod.rhs)
                        ]
                        # Multiple empty reductions possible
                        if len(empty_prods) > 1:
                            self.rr_conflicts.append(
                                RRConflict(state, term, empty_prods))
                        # Multiple non-empty reductions possible
                        if len(prods) > 1:
                            self.rr_conflicts.append(
                                RRConflict(state, term, prods))
示例#10
0
 def _debug_context(self,
                    position,
                    layout_content=None,
                    lookahead_tokens=None,
                    expected_symbols=None):
     input_str = self.input_str
     h_print("Position:", pos_to_line_col(input_str, position))
     h_print("Context:", _(position_context(input_str, position)))
     if layout_content:
         h_print("Layout: ", "'{}'".format(_(layout_content)), level=1)
     if expected_symbols:
         h_print("Symbols expected: ", [s.name for s in expected_symbols])
     if lookahead_tokens:
         h_print("Token(s) ahead:", _(str(lookahead_tokens)))
示例#11
0
    def print_debug(self):
        a_print("*** STATES ***", new_line=True)
        for state in self.states:
            state.print_debug()

            if state.gotos:
                h_print("GOTO:", level=1, new_line=True)
                prints("\t" + ", ".join([("%s" + s_emph("->") + "%d") %
                                         (k, v.state_id)
                                         for k, v in state.gotos.items()]))
            h_print("ACTIONS:", level=1, new_line=True)
            prints("\t" + ", ".join([("%s" + s_emph("->") + "%s") %
                                     (k, str(v[0]) if len(v) == 1 else "[{}]".
                                      format(",".join([str(x) for x in v])))
                                     for k, v in state.actions.items()]))

        if self.sr_conflicts:
            a_print("*** S/R conflicts ***", new_line=True)
            if len(self.sr_conflicts) == 1:
                message = 'There is {} S/R conflict.'
            else:
                message = 'There are {} S/R conflicts.'
            h_print(message.format(len(self.sr_conflicts)))
            for src in self.sr_conflicts:
                print(src.message)

        if self.rr_conflicts:
            a_print("*** R/R conflicts ***", new_line=True)
            if len(self.rr_conflicts) == 1:
                message = 'There is {} R/R conflict.'
            else:
                message = 'There are {} R/R conflicts.'
            h_print(message.format(len(self.rr_conflicts)))
            for rrc in self.rr_conflicts:
                print(rrc.message)
示例#12
0
    def _do_error_recovery(self):
        """
        If recovery is enabled, does error recovery for the heads in
        _last_shifted_heads.

        """
        if self.debug:
            a_print("*** STARTING ERROR RECOVERY.", new_line=True)
        error = self.errors[-1]
        debug = self.debug
        self._active_heads = {}
        for head in self._last_shifted_heads:
            if debug:
                input_str = head.input_str
                symbols = head.state.actions.keys()
                h_print("Recovery initiated for head {}.".format(head),
                        level=1,
                        new_line=True)
                h_print("Symbols expected: ", [s.name for s in symbols],
                        level=1)
            if type(self.error_recovery) is bool:
                # Default recovery
                if debug:
                    prints("\tDoing default error recovery.")
                successful = self.default_error_recovery(head)
            else:
                # Custom recovery provided during parser construction
                if debug:
                    prints("\tDoing custom error recovery.")
                successful = self.error_recovery(head, error)

            if successful:
                error.location.context.end_position = head.position
                if debug:
                    a_print("New position is ",
                            pos_to_line_col(input_str, head.position),
                            level=1)
                    a_print("New lookahead token is ",
                            head.token_ahead,
                            level=1)
                self._active_heads[head.state.state_id] = head
                if self.debug:
                    a_print("*** ERROR RECOVERY SUCCEEDED. CONTINUING.",
                            new_line=True)
            else:
                if debug:
                    a_print("Killing head: ", head, level=1)
                    if self.debug_trace:
                        self._trace_step_kill(head)
示例#13
0
def compile_get_grammar_table(grammar_file, debug, colors, prefer_shifts,
                              prefer_shifts_over_empty):
    try:
        g = Grammar.from_file(grammar_file,
                              _no_check_recognizers=True,
                              debug_colors=colors)
        if debug:
            g.print_debug()
        table = create_load_table(
            g,
            prefer_shifts=prefer_shifts,
            prefer_shifts_over_empty=prefer_shifts_over_empty,
            force_create=True)
        if debug or table.sr_conflicts or table.rr_conflicts:
            table.print_debug()

        if not table.sr_conflicts and not table.rr_conflicts:
            h_print("Grammar OK.")

        if table.sr_conflicts:
            if len(table.sr_conflicts) == 1:
                message = 'There is 1 Shift/Reduce conflict.'
            else:
                message = 'There are {} Shift/Reduce conflicts.'\
                          .format(len(table.sr_conflicts))
            a_print(message)
            prints("Either use 'prefer_shifts' parser mode, try to resolve "
                   "manually, or use GLR parsing.".format(
                       len(table.sr_conflicts)))
        if table.rr_conflicts:
            if len(table.rr_conflicts) == 1:
                message = 'There is 1 Reduce/Reduce conflict.'
            else:
                message = 'There are {} Reduce/Reduce conflicts.'\
                          .format(len(table.rr_conflicts))
            a_print(message)
            prints("Try to resolve manually or use GLR parsing.")

    except (GrammarError, ParseError) as e:
        print("Error in the grammar file.")
        print(e)
        sys.exit(1)

    return g, table
示例#14
0
    def _lexical_disambiguation(self, tokens):
        """
        For the given list of matched tokens apply disambiguation strategy.

        Args:
        tokens (list of Token)
        """

        if self.debug:
            h_print("Lexical disambiguation.",
                    " Tokens: {}".format([x for x in tokens]),
                    level=1)

        if len(tokens) <= 1:
            return tokens

        # Longest-match strategy.
        max_len = max((len(x.value) for x in tokens))
        tokens = [x for x in tokens if len(x.value) == max_len]
        if self.debug:
            h_print("Disambiguation by longest-match strategy.",
                    "Tokens: {}".format([x for x in tokens]),
                    level=1)
        if len(tokens) == 1:
            return tokens

        # try to find preferred token.
        pref_tokens = [x for x in tokens if x.symbol.prefer]
        if pref_tokens:
            if self.debug:
                h_print("Preferring tokens {}.".format(pref_tokens), level=1)
            return pref_tokens

        return tokens
示例#15
0
    def _call_dynamic_filter(self,
                             context,
                             from_state,
                             to_state,
                             action,
                             production=None,
                             subresults=None):
        token = context.token
        if context.token is None:
            context.token = context.token_ahead
        if (action is SHIFT and not to_state.symbol.dynamic)\
           or (action is REDUCE and not production.dynamic):
            return True

        if self.debug:
            if action is SHIFT:
                act_str = "SHIFT"
                token = context.token
                production = ""
                subresults = ""
            else:
                act_str = "REDUCE"
                token = context.token_ahead
                production = ", prod={}".format(context.production)
                subresults = ", subresults={}".format(subresults)

            h_print("Calling filter for action:",
                    " {}, token={}{}{}".format(act_str, token, production,
                                               subresults),
                    level=2)

        accepted = self.dynamic_filter(context, from_state, to_state, action,
                                       production, subresults)
        if self.debug:
            if accepted:
                a_print("Action accepted.", level=2)
            else:
                a_print("Action rejected.", level=2)

        return accepted
示例#16
0
 def _debug__active_heads(self, heads):
     if not heads:
         h_print('No active heads.')
     else:
         h_print("Active heads = ", len(heads))
         for head in heads:
             prints("\t{}".format(head))
         h_print("Number of trees = {}".format(
             sum([len(h.parents) for h in heads])))
示例#17
0
    def print_debug(self):
        a_print("*** GRAMMAR ***", new_line=True)
        h_print("Terminals:")
        prints(" ".join([text(t) for t in self.terminals]))
        h_print("NonTerminals:")
        prints(" ".join([text(n) for n in self.nonterminals]))

        h_print("Productions:")
        for p in self.productions:
            prints(text(p))
示例#18
0
    def create_link(self, parent, from_head):
        parent.head = self
        existing_parent = self.parents.get(parent.root.id)
        created = False
        if existing_parent:
            existing_parent.merge(parent)
            if self.parser.debug:
                h_print("Extending possibilities \tof head:", self, level=1)
                h_print("  parent head:", parent.root, level=3)
        else:
            self.parents[parent.root.id] = parent
            created = True
            if self.parser.debug:
                h_print("Creating link \tfrom head:", self, level=1)
                h_print("  to head:", parent.root, level=3)

        if self.parser.debug and self.parser.debug_trace:
            self.parser._trace_step(from_head, parent)

        return created
示例#19
0
    def _call_shift_action(self, context):
        """
        Calls registered shift action for the given grammar symbol.
        """
        debug = self.debug
        token = context.token
        sem_action = token.symbol.action

        if self.build_tree:
            # call action for building tree node if tree building is enabled
            if debug:
                h_print("Building terminal node",
                        "'{}'.".format(token.symbol.name),
                        level=2)

            # If both build_tree and call_actions_during_build are set to
            # True, semantic actions will be call but their result will be
            # discarded. For more info check following issue:
            # https://github.com/igordejanovic/parglare/issues/44
            if self.call_actions_during_tree_build and sem_action:
                sem_action(context, token.value, *token.additional_data)

            return NodeTerm(context, token)

        if sem_action:
            result = sem_action(context, token.value, *token.additional_data)

        else:
            if debug:
                h_print("No action defined",
                        "for '{}'. "
                        "Result is matched string.".format(token.symbol.name),
                        level=1)
            result = token.value

        if debug:
            h_print("Action result = ",
                    "type:{} value:{}".format(type(result), repr(result)),
                    level=1)

        return result
示例#20
0
    def _reduce(self, head, root_head, production, node_nonterm,
                start_position, end_position):
        """
        Executes the given reduction.
        """
        if start_position is None:
            start_position = end_position = root_head.position
        state = root_head.state.gotos[production.symbol]

        if self.debug:
            self.debug_step += 1
            a_print('{} REDUCING head '.format(self._debug_step_str()),
                    str(head),
                    new_line=True)
            a_print('by prod ', production, level=1)
            a_print('to state {}:{}'.format(state.state_id, state.symbol),
                    level=1)
            a_print('root is ', root_head, level=1)
            a_print('Position span: {} - {}'.format(start_position,
                                                    end_position),
                    level=1)

        new_head = GSSNode(self,
                           state,
                           head.position,
                           head.frontier,
                           token_ahead=head.token_ahead,
                           layout_content=root_head.layout_content,
                           layout_content_ahead=head.layout_content_ahead)
        parent = Parent(new_head,
                        root_head,
                        start_position,
                        end_position,
                        production=production,
                        possibilities=[node_nonterm])

        if self.dynamic_filter and \
                not self._call_dynamic_filter(parent, head.state, state,
                                              REDUCE, production,
                                              list(node_nonterm)):
            # Action rejected by dynamic filter
            return

        active_head = self._active_heads.get(state.state_id, None)
        if active_head:
            created = active_head.create_link(parent, head)

            # Calculate heads to revisit with the new path. Only those heads that
            # are already processed (not in _for_actor) and are traversing this
            # new head state on the current frontier should be considered.
            if created and state.state_id in self._states_traversed:
                to_revisit = self._states_traversed[
                    state.state_id].intersection(
                        self._active_heads.keys()) - set(
                            h.state.state_id for h in self._for_actor)
                if to_revisit:
                    if self.debug:
                        h_print('Revisiting reductions for processed '
                                'active heads in states {}'.format(to_revisit),
                                level=1)
                    for r_head_state in to_revisit:
                        r_head = self._active_heads[r_head_state]
                        for action in [
                                a for a in r_head.state.actions.get(
                                    head.token_ahead.symbol, [])
                                if a.action == REDUCE
                        ]:
                            self._do_reductions(r_head, action.prod, parent)
        else:
            # No cycles. Do the reduction.
            new_head.create_link(parent, head)
            self._for_actor.append(new_head)
            self._active_heads[new_head.state.state_id] = new_head

            if self.debug:
                a_print("New head: ", new_head, level=1, new_line=True)
                if self.debug_trace:
                    self._trace_head(new_head)
示例#21
0
    def _call_reduce_action(self, context, subresults):
        """
        Calls registered reduce action for the given grammar symbol.
        """
        debug = self.debug
        result = None
        bt_result = None
        production = context.production

        if self.build_tree:
            # call action for building tree node if enabled.
            if debug:
                h_print("Building non-terminal node",
                        "'{}'.".format(production.symbol.name),
                        level=2)

            bt_result = NodeNonTerm(context,
                                    children=subresults,
                                    production=production)
            context.node = bt_result
            if not self.call_actions_during_tree_build:
                return bt_result

        sem_action = production.symbol.action
        if sem_action:
            assignments = production.assignments
            if assignments:
                assgn_results = {}
                for a in assignments.values():
                    if a.op == '=':
                        assgn_results[a.name] = subresults[a.index]
                    else:
                        assgn_results[a.name] = bool(subresults[a.index])

            if type(sem_action) is list:
                if assignments:
                    result = sem_action[production.prod_symbol_id](
                        context, subresults, **assgn_results)
                else:
                    result = sem_action[production.prod_symbol_id](context,
                                                                   subresults)
            else:
                if assignments:
                    result = sem_action(context, subresults, **assgn_results)
                else:
                    result = sem_action(context, subresults)

        else:
            if debug:
                h_print("No action defined",
                        " for '{}'.".format(production.symbol.name),
                        level=1)
            if len(subresults) == 1:
                if debug:
                    h_print("Unpacking a single subresult.", level=1)
                result = subresults[0]
            else:
                if debug:
                    h_print("Result is a list of subresults.", level=1)
                result = subresults

        if debug:
            h_print("Action result =",
                    "type:{} value:{}".format(type(result), repr(result)),
                    level=1)

        # If build_tree is set to True, discard the result of the semantic
        # action, and return the result of treebuild_reduce_action.
        return bt_result if bt_result is not None else result
示例#22
0
def create_table(grammar,
                 itemset_type=LR_1,
                 start_production=1,
                 prefer_shifts=False,
                 prefer_shifts_over_empty=True,
                 debug=False,
                 **kwargs):
    """
    Arguments:
    grammar (Grammar):
    itemset_type(int) - SRL=0 LR_1=1. By default LR_1.
    start_production(int) - The production which defines start state.
        By default 1 - first production from the grammar.
    prefer_shifts(bool) - Conflict resolution strategy which favours SHIFT over
        REDUCE (gready). By default False.
    prefer_shifts_over_empty(bool) - Conflict resolution strategy which favours
        SHIFT over REDUCE of EMPTY. By default False. If prefer_shifts is
        `True` this param is ignored.
    """

    first_sets = first(grammar)

    # Check for states with GOTO links but without SHIFT links.
    # This is invalid as the GOTO link will never be traversed.
    for nt, firsts in first_sets.items():
        if nt.name != 'S\'' and not firsts:
            raise GrammarError(
                location=nt.location,
                message='First set empty for grammar symbol "{}". '
                'An infinite recursion on the '
                'grammar symbol.'.format(nt))

    follow_sets = follow(grammar, first_sets)

    _old_start_production_rhs = grammar.productions[0].rhs
    start_prod_symbol = grammar.productions[start_production].symbol
    grammar.productions[0].rhs = ProductionRHS([start_prod_symbol, STOP])

    # Create a state for the first production (augmented)
    s = LRState(grammar, 0, AUGSYMBOL,
                [LRItem(grammar.productions[0], 0, set())])

    state_queue = [s]
    state_id = 1

    states = []

    if debug:
        h_print("Constructing LR automaton states...")
    while state_queue:
        state = state_queue.pop(0)

        # For each state calculate its closure first, i.e. starting from a so
        # called "kernel items" expand collection with non-kernel items. We will
        # also calculate GOTO and ACTIONS dicts for each state. These dicts will
        # be keyed by a grammar symbol.
        closure(state, itemset_type, first_sets)
        states.append(state)

        # To find out other states we examine following grammar symbols in the
        # current state (symbols following current position/"dot") and group all
        # items by a grammar symbol.
        per_next_symbol = OrderedDict()

        # Each production has a priority. But since productions are grouped by
        # grammar symbol that is ahead we take the maximal priority given for
        # all productions for the given grammar symbol.
        state._max_prior_per_symbol = {}

        for item in state.items:
            symbol = item.symbol_at_position
            if symbol:
                per_next_symbol.setdefault(symbol, []).append(item)

                # Here we calculate max priorities for each grammar symbol to
                # use it for SHIFT/REDUCE conflict resolution
                prod_prior = item.production.prior
                old_prior = state._max_prior_per_symbol.setdefault(
                    symbol, prod_prior)
                state._max_prior_per_symbol[symbol] = max(
                    prod_prior, old_prior)

        # For each group symbol we create new state and form its kernel
        # items from the group items with positions moved one step ahead.
        for symbol, items in per_next_symbol.items():
            if symbol is STOP:
                state.actions[symbol] = [Action(ACCEPT)]
                continue
            inc_items = [item.get_pos_inc() for item in items]
            maybe_new_state = LRState(grammar, state_id, symbol, inc_items)
            target_state = maybe_new_state
            try:
                idx = states.index(maybe_new_state)
                target_state = states[idx]
            except ValueError:
                try:
                    idx = state_queue.index(maybe_new_state)
                    target_state = state_queue[idx]
                except ValueError:
                    pass

            if target_state is maybe_new_state:
                # We've found a new state. Register it for later processing.
                state_queue.append(target_state)
                state_id += 1
            else:
                # A state with this kernel items already exists.
                if itemset_type is LR_1:
                    # LALR: Try to merge states, i.e. update items follow sets.
                    if not merge_states(target_state, maybe_new_state):
                        target_state = maybe_new_state
                        state_queue.append(target_state)
                        state_id += 1

            # Create entries in GOTO and ACTION tables
            if isinstance(symbol, NonTerminal):
                # For each non-terminal symbol we create an entry in GOTO
                # table.
                state.gotos[symbol] = target_state

            else:
                # For each terminal symbol we create SHIFT action in the
                # ACTION table.
                state.actions[symbol] = [Action(SHIFT, state=target_state)]

    if debug:
        h_print("{} LR automata states constructed".format(len(states)))
        h_print("Finishing LALR calculation...")

    # For LR(1) itemsets refresh/propagate item's follows as the LALR
    # merging might change item's follow in previous states
    if itemset_type is LR_1:

        # Propagate updates as long as there were items propagated in the last
        # loop run.
        update = True
        while update:
            update = False

            for state in states:

                # First refresh current state's follows
                closure(state, LR_1, first_sets)

                # Propagate follows to next states. GOTOs/ACTIONs keep
                # information about states created from this state
                inc_items = [i.get_pos_inc() for i in state.items]
                for target_state in chain(state.gotos.values(), [
                        a.state for i in state.actions.values()
                        for a in i if a.action is SHIFT
                ]):
                    for next_item in target_state.kernel_items:
                        this_item = inc_items[inc_items.index(next_item)]
                        if this_item.follow.difference(next_item.follow):
                            update = True
                            next_item.follow.update(this_item.follow)

    if debug:
        h_print("Calculate REDUCTION entries in ACTION tables and"
                " resolve possible conflicts.")

    # Calculate REDUCTION entries in ACTION tables and resolve possible
    # conflicts.
    for idx, state in enumerate(states):
        actions = state.actions

        for item in state.items:
            if item.is_at_end:
                # If the position is at the end then this item
                # would call for reduction but only for terminals
                # from the FOLLOW set of item (LR(1)) or the production LHS
                # non-terminal (LR(0)).
                if itemset_type is LR_1:
                    follow_set = item.follow
                else:
                    follow_set = follow_sets[item.production.symbol]

                prod = item.production
                new_reduce = Action(REDUCE, prod=prod)

                for terminal in follow_set:
                    if terminal not in actions:
                        actions[terminal] = [new_reduce]
                    else:
                        # Conflict! Try to resolve
                        t_acts = actions[terminal]
                        should_reduce = True

                        # Only one SHIFT or ACCEPT might exists for a single
                        # terminal.
                        shifts = [
                            x for x in t_acts if x.action in (SHIFT, ACCEPT)
                        ]
                        assert len(shifts) <= 1
                        t_shift = shifts[0] if shifts else None

                        # But many REDUCEs might exist
                        t_reduces = [x for x in t_acts if x.action is REDUCE]

                        # We should try to resolve using standard
                        # disambiguation rules between current reduction and
                        # all previous actions.

                        if t_shift:
                            # SHIFT/REDUCE conflict. Use assoc and priority to
                            # resolve
                            # For disambiguation treat ACCEPT action the same
                            # as SHIFT.
                            if t_shift.action is ACCEPT:
                                sh_prior = DEFAULT_PRIORITY
                            else:
                                sh_prior = state._max_prior_per_symbol[
                                    t_shift.state.symbol]
                            if prod.prior == sh_prior:
                                if prod.assoc == ASSOC_LEFT:
                                    # Override SHIFT with this REDUCE
                                    actions[terminal].remove(t_shift)
                                elif prod.assoc == ASSOC_RIGHT:
                                    # If associativity is right leave SHIFT
                                    # action as "stronger" and don't consider
                                    # this reduction any more. Right
                                    # associative reductions can't be in the
                                    # same set of actions together with SHIFTs.
                                    should_reduce = False
                                else:
                                    # If priorities are the same and no
                                    # associativity defined use preferred
                                    # strategy.
                                    is_empty = len(prod.rhs) == 0
                                    prod_pse = is_empty \
                                        and prefer_shifts_over_empty \
                                        and not prod.nopse
                                    prod_ps = not is_empty \
                                        and prefer_shifts and not prod.nops
                                    should_reduce = not (prod_pse or prod_ps)
                            elif prod.prior > sh_prior:
                                # This item operation priority is higher =>
                                # override with reduce
                                actions[terminal].remove(t_shift)
                            else:
                                # If priority of existing SHIFT action is
                                # higher then leave it instead
                                should_reduce = False

                        if should_reduce:
                            if not t_reduces:
                                actions[terminal].append(new_reduce)
                            else:
                                # REDUCE/REDUCE conflicts
                                # Try to resolve using priorities
                                if prod.prior == t_reduces[0].prod.prior:
                                    actions[terminal].append(new_reduce)
                                elif prod.prior > t_reduces[0].prod.prior:
                                    # If this production priority is higher
                                    # it should override all other reductions.
                                    actions[terminal][:] = \
                                        [x for x in actions[terminal]
                                         if x.action is not REDUCE]
                                    actions[terminal].append(new_reduce)

    grammar.productions[0].rhs = _old_start_production_rhs
    table = LRTable(states, **kwargs)
    return table
示例#23
0
def create_load_table(grammar,
                      itemset_type=LR_1,
                      start_production=1,
                      prefer_shifts=False,
                      prefer_shifts_over_empty=True,
                      force_create=False,
                      force_load=False,
                      in_layout=False,
                      debug=False,
                      **kwargs):
    """
    Construct table by loading from file if present and newer than the grammar.
    If table file is older than the grammar or non-existent calculate the table
    and save to file.

    Arguments:
    see create_table

    force_create(bool): If set to True table will be created even if table file
        exists.
    force_load(bool): If set to True table will be loaded if exists even if
        it's not newer than the grammar, i.e. modification time will not be
        checked.

    """

    if in_layout:
        # For layout grammars always calculate table.
        # Those are usually very small grammars so there is no point in
        # using cached tables.
        if debug:
            a_print("** Calculating LR table for the layout parser...",
                    new_line=True)
        return create_table(grammar, itemset_type, start_production,
                            prefer_shifts, prefer_shifts_over_empty)
    else:
        if debug:
            a_print("** Calculating LR table...", new_line=True)

    table_file_name = None
    if grammar.file_path:
        file_basename, _ = os.path.splitext(grammar.file_path)
        table_file_name = "{}.pgt".format(file_basename)

    create_table_file = True

    if not force_create and not force_load:
        if grammar.file_path:
            file_basename, _ = os.path.splitext(grammar.file_path)
            table_file_name = "{}.pgt".format(file_basename)

            if os.path.exists(table_file_name):
                create_table_file = False
                table_mtime = os.path.getmtime(table_file_name)
                # Check if older than any of the grammar files
                for g_file_name in grammar.imported_files.keys():
                    if os.path.getmtime(g_file_name) > table_mtime:
                        create_table_file = True
                        break

    if (create_table_file or force_create) and not force_load:
        table = create_table(grammar,
                             itemset_type,
                             start_production,
                             prefer_shifts,
                             prefer_shifts_over_empty,
                             debug=debug,
                             **kwargs)
        if table_file_name:
            try:
                save_table(table_file_name, table)
            except PermissionError:
                pass
    else:
        if debug:
            h_print("Loading LR table from '{}'".format(table_file_name))
        table = load_table(table_file_name, grammar)

    return table
示例#24
0
    def parse(self, input_str, position=0, file_name=None, extra=None):
        """
        Parses the given input string.
        Args:
            input_str(str): A string to parse.
            position(int): Position to start from.
            file_name(str): File name if applicable. Used in error reporting.
            extra: An object that keeps custom parsing state. If not given
                initialized to dict.
        """

        if self.debug:
            a_print("*** PARSING STARTED", new_line=True)

        self.input_str = input_str
        self.file_name = file_name
        self.extra = {} if extra is None else extra

        self.errors = []
        self.in_error_recovery = False
        self.accepted_head = None

        next_token = self._next_token
        debug = self.debug

        start_head = LRStackNode(self, self.table.states[0], 0, position)
        self._init_dynamic_disambiguation(start_head)
        self.parse_stack = parse_stack = [start_head]

        while True:
            head = parse_stack[-1]
            cur_state = head.state
            if debug:
                a_print("Current state:",
                        str(cur_state.state_id),
                        new_line=True)

            if head.token_ahead is None:
                if not self.in_layout:
                    self._skipws(head, input_str)
                    if self.debug:
                        h_print("Layout content:",
                                "'{}'".format(head.layout_content),
                                level=1)

                head.token_ahead = next_token(head)

            if debug:
                h_print("Context:",
                        position_context(head.input_str, head.position),
                        level=1)
                h_print("Tokens expected:",
                        expected_symbols_str(cur_state.actions.keys()),
                        level=1)
                h_print("Token ahead:", head.token_ahead, level=1)

            actions = None
            if head.token_ahead is not None:
                actions = cur_state.actions.get(head.token_ahead.symbol)
            if not actions and not self.consume_input:
                # If we don't have any action for the current token ahead
                # see if we can finish without consuming the whole input.
                actions = cur_state.actions.get(STOP)

            if not actions:

                symbols_expected = list(cur_state.actions.keys())
                tokens_ahead = self._get_all_possible_tokens_ahead(head)
                self.errors.append(
                    self._create_error(head,
                                       symbols_expected,
                                       tokens_ahead,
                                       symbols_before=[cur_state.symbol]))

                if self.error_recovery:
                    if self.debug:
                        a_print("*** STARTING ERROR RECOVERY.", new_line=True)
                    if self._do_recovery():
                        # Error recovery succeeded
                        if self.debug:
                            a_print(
                                "*** ERROR RECOVERY SUCCEEDED. CONTINUING.",
                                new_line=True)
                        continue
                    else:
                        break
                else:
                    break

            # Dynamic disambiguation
            if self.dynamic_filter:
                actions = self._dynamic_disambiguation(head, actions)

                # If after dynamic disambiguation we still have at least one
                # shift and non-empty reduction or multiple non-empty
                # reductions raise exception.
                if len([
                        a for a in actions if (a.action is SHIFT) or (
                            (a.action is REDUCE) and len(a.prod.rhs))
                ]) > 1:
                    raise DynamicDisambiguationConflict(head, actions)

            # If dynamic disambiguation is disabled either globaly by not
            # giving disambiguation function or localy by not marking
            # any production dynamic for this state take the first action.
            # First action is either SHIFT while there might be empty
            # reductions, or it is the only reduction.
            # Otherwise, parser construction should raise an error.
            act = actions[0]

            if act.action is SHIFT:
                cur_state = act.state

                if debug:
                    a_print(
                        "Shift:",
                        "{} \"{}\"".format(cur_state.state_id,
                                           head.token_ahead.value) +
                        " at position " +
                        str(pos_to_line_col(self.input_str, head.position)),
                        level=1)

                new_position = head.position + len(head.token_ahead)
                new_head = LRStackNode(
                    self,
                    state=act.state,
                    frontier=head.frontier + 1,
                    token=head.token_ahead,
                    layout_content=head.layout_content_ahead,
                    position=new_position,
                    start_position=head.position,
                    end_position=new_position)
                new_head.results = self._call_shift_action(new_head)
                parse_stack.append(new_head)

                self.in_error_recovery = False

            elif act.action is REDUCE:
                # if this is EMPTY reduction try to take another if
                # exists.
                if len(act.prod.rhs) == 0:
                    if len(actions) > 1:
                        act = actions[1]
                production = act.prod

                if debug:
                    a_print("Reducing",
                            "by prod '{}'.".format(production),
                            level=1)

                r_length = len(production.rhs)
                if r_length:
                    start_reduction_head = parse_stack[-r_length]
                    results = [x.results for x in parse_stack[-r_length:]]
                    del parse_stack[-r_length:]
                    next_state = parse_stack[-1].state.gotos[production.symbol]
                    new_head = LRStackNode(
                        self,
                        state=next_state,
                        frontier=head.frontier,
                        position=head.position,
                        production=production,
                        start_position=start_reduction_head.start_position,
                        end_position=head.end_position,
                        token_ahead=head.token_ahead,
                        layout_content=start_reduction_head.layout_content,
                        layout_content_ahead=head.layout_content_ahead)
                else:
                    # Empty reduction
                    results = []
                    next_state = cur_state.gotos[production.symbol]
                    new_head = LRStackNode(
                        self,
                        state=next_state,
                        frontier=head.frontier,
                        position=head.position,
                        production=production,
                        start_position=head.end_position,
                        end_position=head.end_position,
                        token_ahead=head.token_ahead,
                        layout_content='',
                        layout_content_ahead=head.layout_content_ahead)

                # Calling reduce action
                new_head.results = self._call_reduce_action(new_head, results)
                parse_stack.append(new_head)

            elif act.action is ACCEPT:
                self.accepted_head = head
                break

        if self.accepted_head:
            if debug:
                a_print("SUCCESS!!!")
            if self.return_position:
                return parse_stack[1].results, parse_stack[1].position
            else:
                return parse_stack[1].results
        else:
            raise self.errors[-1]
示例#25
0
    def _do_reductions(self, head, production, update_parent=None):
        """
        Reduce the given head by the given production. If update_parent is given
        this is update/limited reduction so just traverse the given parent instead of
        all parents of the parent's head.
        """
        debug = self.debug
        if debug:
            h_print("\tFinding reduction paths for head: {}".format(head))
            h_print("\tand production: {}".format(production))
            if update_parent:
                h_print("\tLimited/update reduction due to new path addition.")

        states_traversed = self._states_traversed
        prod_len = len(production.rhs)
        if prod_len == 0:
            # Special case, empty reduction
            self._reduce(head, head, production,
                         NodeNonTerm(None, [], production=production),
                         head.position, head.position)
        else:
            # Find roots of possible reductions by going backwards for
            # prod_len steps following all possible paths. Collect
            # subresults along the way to be used with semantic actions
            to_process = [(head, [], prod_len, None, update_parent is None)]
            if debug:
                h_print(
                    "Calculate reduction paths of length {}:".format(prod_len),
                    level=1)
                h_print("start node= {}".format(head), level=2)
            while to_process:
                (node, results, length, last_parent,
                 traversed) = to_process.pop()
                length = length - 1
                if debug:
                    h_print("node = {}".format(node), level=2, new_line=True)
                    h_print("backpath length = {}{}".format(
                        prod_len - length, " - ROOT" if not length else ""),
                            level=2)

                if node.frontier == head.frontier:
                    # Cache traversed states for revisit optimization
                    states_traversed.setdefault(node.state.state_id,
                                                set()).add(head.state.state_id)

                for parent in [update_parent] \
                        if update_parent and update_parent.head == node \
                        else list(node.parents.values()):
                    if debug:
                        h_print("", str(parent.head), level=3)

                    new_results = [parent] + results

                    if last_parent is None:
                        last_parent = parent

                    traversed = traversed or (update_parent
                                              and update_parent.head == node)

                    if length:
                        to_process.append((parent.root, new_results, length,
                                           last_parent, traversed))
                    elif traversed:
                        self._reduce(
                            head, parent.root, production,
                            NodeNonTerm(None,
                                        new_results,
                                        production=production),
                            parent.start_position, last_parent.end_position)