def _skipws(self, head, input_str): in_len = len(input_str) layout_content_ahead = '' if self.layout_parser: _, pos = self.layout_parser.parse(input_str, head.position) if pos > head.position: layout_content_ahead = input_str[head.position:pos] head.position = pos elif self.ws: old_pos = head.position try: while head.position < in_len \ and input_str[head.position] in self.ws: head.position += 1 except TypeError: raise ParserInitError("For parsing non-textual content please " "set `ws` to `None`.") layout_content_ahead = input_str[old_pos:head.position] if self.debug: content = layout_content_ahead if type(layout_content_ahead) is str: content = content.replace("\n", "\\n") h_print("Skipping whitespaces:", "'{}'".format(content)) h_print("New position:", pos_to_line_col(input_str, head.position)) head.layout_content_ahead = layout_content_ahead
def _find_lookaheads(self): debug = self.debug self._active_heads_per_symbol = {} while self._active_heads: state_id, head = self._active_heads.popitem() if head.token_ahead is not None: # May happen after error recovery self._active_heads_per_symbol.setdefault( head.token_ahead.symbol, {})[head.state.state_id] = head continue if debug: h_print("Finding lookaheads for head {}".format(head), new_line=True) self._skipws(head, self.input_str) tokens = self._next_tokens(head) if debug: self._debug_context(head.position, head.layout_content_ahead, lookahead_tokens=tokens, expected_symbols=head.state.actions.keys()) if tokens: while tokens: token = tokens.pop() head = head.for_token(token) self._active_heads_per_symbol.setdefault( token.symbol, {})[head.state.state_id] = head else: # Can't find lookahead. This head can't progress if debug: h_print('No lookaheads found. Killing head.')
def check_get_grammar_table(grammar_file, debug, colors): try: g = Grammar.from_file(grammar_file, _no_check_recognizers=True, debug_colors=colors) if debug: g.print_debug() table = create_table(g) if debug: table.print_debug() h_print("Grammar OK.") if table.sr_conflicts: a_print("There are {} Shift/Reduce conflicts.".format( len(table.sr_conflicts))) prints("Either use 'prefer_shifts' parser mode, try to resolve " "manually or use GLR parsing.".format( len(table.sr_conflicts))) if table.rr_conflicts: a_print("There are {} Reduce/Reduce conflicts.".format( len(table.rr_conflicts))) prints("Try to resolve manually or use GLR parsing.") if (table.sr_conflicts or table.rr_conflicts) and not debug: prints("Run in debug mode to print all the states.") except (GrammarError, ParseError) as e: print("Error in the grammar file.") print(e) sys.exit(1) return g, table
def _do_recovery(self): debug = self.debug if debug: a_print("**Recovery initiated.**") head = self.parse_stack[-1] error = self.errors[-1] if type(self.error_recovery) is bool: # Default recovery if debug: prints("\tDoing default error recovery.") successful = self.default_error_recovery(head) else: # Custom recovery provided during parser construction if debug: prints("\tDoing custom error recovery.") successful = self.error_recovery(head, error) # The recovery may either decide to skip erroneous part of # the input and resume at the place that can continue or it # might decide to fill in missing tokens. if successful: if debug: h_print("Recovery ") error.location.context.end_position = head.position if debug: a_print("New position is ", pos_to_line_col(head.input_str, head.position), level=1) a_print("New lookahead token is ", head.token_ahead, level=1) return successful
def _finish_error_reporting(self): # Expected symbols are only those that can cause active heads # to shift. self._expected = set(h.token_ahead.symbol for h, _ in self._for_shifter) if self.debug: a_print("*** LEAVING ERROR REPORTING MODE.", new_line=True) h_print("Tokens expected:", ', '.join([t.name for t in self._expected]), level=1) h_print("Tokens found:", self._tokens_ahead, level=1) # After leaving error reporting mode, register error and try # recovery if enabled context = self._last_shifted_heads[0] self.errors.append( self._create_error(context, self._expected, tokens_ahead=self._tokens_ahead, symbols_before=list({ h.state.symbol for h in self._last_shifted_heads }), last_heads=self._last_shifted_heads)) self.for_shifter = [] self._in_error_reporting = False
def compile(ctx, grammar_file): debug = ctx.obj['debug'] colors = ctx.obj['colors'] prefer_shifts = ctx.obj['prefer_shifts'] prefer_shifts_over_empty = ctx.obj['prefer_shifts_over_empty'] h_print('Compiling...') compile_get_grammar_table(grammar_file, debug, colors, prefer_shifts, prefer_shifts_over_empty)
def _debug_reduce_heads(self): heads = list(self.reduced_heads.values()) h_print("Reduced heads = ", len(heads)) for head in heads: prints("\t{}".format(head)) heads = list(self.heads_for_reduction.values()) h_print("Heads for reduction:", len(heads)) for head in heads: prints("\t{}".format(head))
def _export__dot_trace(self): file_name = "{}_trace.dot".format(self.file_name) \ if self.file_name else "parglare_trace.dot" with io.open(file_name, 'w', encoding="utf-8") as f: f.write(DOT_HEADER) f.write(self._dot_trace) f.write("}\n") prints("Generated file {}.".format(file_name)) prints("You can use dot viewer or generate pdf with the " "following command:") h_print("dot -Tpdf -O {}".format(file_name))
def calc_conflicts_and_dynamic_terminals(self, debug=False): """ Determine S/R and R/R conflicts and states dynamic terminals. """ self.sr_conflicts = [] self.rr_conflicts = [] if debug: h_print("Calculating conflicts and dynamic terminals...") for state in self.states: for term, actions in state.actions.items(): # Mark state for dynamic disambiguation if term.dynamic: state.dynamic.add(term) if len(actions) > 1: if actions[0].action in [SHIFT, ACCEPT]: # Create SR conflicts for each S-R pair of actions # except EMPTY reduction as SHIFT will always be # preferred in LR parsing and GLR has a special # handling of EMPTY reduce in order to avoid infinite # looping. for r_act in actions[1:]: # Mark state for dynamic disambiguation if r_act.prod.dynamic: state.dynamic.add(term) self.sr_conflicts.append( SRConflict(state, term, [x.prod for x in actions[1:]])) else: prods = [x.prod for x in actions if len(x.prod.rhs)] # Mark state for dynamic disambiguation if any([p.dynamic for p in prods]): state.dynamic.add(term) empty_prods = [ x.prod for x in actions if not len(x.prod.rhs) ] # Multiple empty reductions possible if len(empty_prods) > 1: self.rr_conflicts.append( RRConflict(state, term, empty_prods)) # Multiple non-empty reductions possible if len(prods) > 1: self.rr_conflicts.append( RRConflict(state, term, prods))
def _debug_context(self, position, layout_content=None, lookahead_tokens=None, expected_symbols=None): input_str = self.input_str h_print("Position:", pos_to_line_col(input_str, position)) h_print("Context:", _(position_context(input_str, position))) if layout_content: h_print("Layout: ", "'{}'".format(_(layout_content)), level=1) if expected_symbols: h_print("Symbols expected: ", [s.name for s in expected_symbols]) if lookahead_tokens: h_print("Token(s) ahead:", _(str(lookahead_tokens)))
def print_debug(self): a_print("*** STATES ***", new_line=True) for state in self.states: state.print_debug() if state.gotos: h_print("GOTO:", level=1, new_line=True) prints("\t" + ", ".join([("%s" + s_emph("->") + "%d") % (k, v.state_id) for k, v in state.gotos.items()])) h_print("ACTIONS:", level=1, new_line=True) prints("\t" + ", ".join([("%s" + s_emph("->") + "%s") % (k, str(v[0]) if len(v) == 1 else "[{}]". format(",".join([str(x) for x in v]))) for k, v in state.actions.items()])) if self.sr_conflicts: a_print("*** S/R conflicts ***", new_line=True) if len(self.sr_conflicts) == 1: message = 'There is {} S/R conflict.' else: message = 'There are {} S/R conflicts.' h_print(message.format(len(self.sr_conflicts))) for src in self.sr_conflicts: print(src.message) if self.rr_conflicts: a_print("*** R/R conflicts ***", new_line=True) if len(self.rr_conflicts) == 1: message = 'There is {} R/R conflict.' else: message = 'There are {} R/R conflicts.' h_print(message.format(len(self.rr_conflicts))) for rrc in self.rr_conflicts: print(rrc.message)
def _do_error_recovery(self): """ If recovery is enabled, does error recovery for the heads in _last_shifted_heads. """ if self.debug: a_print("*** STARTING ERROR RECOVERY.", new_line=True) error = self.errors[-1] debug = self.debug self._active_heads = {} for head in self._last_shifted_heads: if debug: input_str = head.input_str symbols = head.state.actions.keys() h_print("Recovery initiated for head {}.".format(head), level=1, new_line=True) h_print("Symbols expected: ", [s.name for s in symbols], level=1) if type(self.error_recovery) is bool: # Default recovery if debug: prints("\tDoing default error recovery.") successful = self.default_error_recovery(head) else: # Custom recovery provided during parser construction if debug: prints("\tDoing custom error recovery.") successful = self.error_recovery(head, error) if successful: error.location.context.end_position = head.position if debug: a_print("New position is ", pos_to_line_col(input_str, head.position), level=1) a_print("New lookahead token is ", head.token_ahead, level=1) self._active_heads[head.state.state_id] = head if self.debug: a_print("*** ERROR RECOVERY SUCCEEDED. CONTINUING.", new_line=True) else: if debug: a_print("Killing head: ", head, level=1) if self.debug_trace: self._trace_step_kill(head)
def compile_get_grammar_table(grammar_file, debug, colors, prefer_shifts, prefer_shifts_over_empty): try: g = Grammar.from_file(grammar_file, _no_check_recognizers=True, debug_colors=colors) if debug: g.print_debug() table = create_load_table( g, prefer_shifts=prefer_shifts, prefer_shifts_over_empty=prefer_shifts_over_empty, force_create=True) if debug or table.sr_conflicts or table.rr_conflicts: table.print_debug() if not table.sr_conflicts and not table.rr_conflicts: h_print("Grammar OK.") if table.sr_conflicts: if len(table.sr_conflicts) == 1: message = 'There is 1 Shift/Reduce conflict.' else: message = 'There are {} Shift/Reduce conflicts.'\ .format(len(table.sr_conflicts)) a_print(message) prints("Either use 'prefer_shifts' parser mode, try to resolve " "manually, or use GLR parsing.".format( len(table.sr_conflicts))) if table.rr_conflicts: if len(table.rr_conflicts) == 1: message = 'There is 1 Reduce/Reduce conflict.' else: message = 'There are {} Reduce/Reduce conflicts.'\ .format(len(table.rr_conflicts)) a_print(message) prints("Try to resolve manually or use GLR parsing.") except (GrammarError, ParseError) as e: print("Error in the grammar file.") print(e) sys.exit(1) return g, table
def _lexical_disambiguation(self, tokens): """ For the given list of matched tokens apply disambiguation strategy. Args: tokens (list of Token) """ if self.debug: h_print("Lexical disambiguation.", " Tokens: {}".format([x for x in tokens]), level=1) if len(tokens) <= 1: return tokens # Longest-match strategy. max_len = max((len(x.value) for x in tokens)) tokens = [x for x in tokens if len(x.value) == max_len] if self.debug: h_print("Disambiguation by longest-match strategy.", "Tokens: {}".format([x for x in tokens]), level=1) if len(tokens) == 1: return tokens # try to find preferred token. pref_tokens = [x for x in tokens if x.symbol.prefer] if pref_tokens: if self.debug: h_print("Preferring tokens {}.".format(pref_tokens), level=1) return pref_tokens return tokens
def _call_dynamic_filter(self, context, from_state, to_state, action, production=None, subresults=None): token = context.token if context.token is None: context.token = context.token_ahead if (action is SHIFT and not to_state.symbol.dynamic)\ or (action is REDUCE and not production.dynamic): return True if self.debug: if action is SHIFT: act_str = "SHIFT" token = context.token production = "" subresults = "" else: act_str = "REDUCE" token = context.token_ahead production = ", prod={}".format(context.production) subresults = ", subresults={}".format(subresults) h_print("Calling filter for action:", " {}, token={}{}{}".format(act_str, token, production, subresults), level=2) accepted = self.dynamic_filter(context, from_state, to_state, action, production, subresults) if self.debug: if accepted: a_print("Action accepted.", level=2) else: a_print("Action rejected.", level=2) return accepted
def _debug__active_heads(self, heads): if not heads: h_print('No active heads.') else: h_print("Active heads = ", len(heads)) for head in heads: prints("\t{}".format(head)) h_print("Number of trees = {}".format( sum([len(h.parents) for h in heads])))
def print_debug(self): a_print("*** GRAMMAR ***", new_line=True) h_print("Terminals:") prints(" ".join([text(t) for t in self.terminals])) h_print("NonTerminals:") prints(" ".join([text(n) for n in self.nonterminals])) h_print("Productions:") for p in self.productions: prints(text(p))
def create_link(self, parent, from_head): parent.head = self existing_parent = self.parents.get(parent.root.id) created = False if existing_parent: existing_parent.merge(parent) if self.parser.debug: h_print("Extending possibilities \tof head:", self, level=1) h_print(" parent head:", parent.root, level=3) else: self.parents[parent.root.id] = parent created = True if self.parser.debug: h_print("Creating link \tfrom head:", self, level=1) h_print(" to head:", parent.root, level=3) if self.parser.debug and self.parser.debug_trace: self.parser._trace_step(from_head, parent) return created
def _call_shift_action(self, context): """ Calls registered shift action for the given grammar symbol. """ debug = self.debug token = context.token sem_action = token.symbol.action if self.build_tree: # call action for building tree node if tree building is enabled if debug: h_print("Building terminal node", "'{}'.".format(token.symbol.name), level=2) # If both build_tree and call_actions_during_build are set to # True, semantic actions will be call but their result will be # discarded. For more info check following issue: # https://github.com/igordejanovic/parglare/issues/44 if self.call_actions_during_tree_build and sem_action: sem_action(context, token.value, *token.additional_data) return NodeTerm(context, token) if sem_action: result = sem_action(context, token.value, *token.additional_data) else: if debug: h_print("No action defined", "for '{}'. " "Result is matched string.".format(token.symbol.name), level=1) result = token.value if debug: h_print("Action result = ", "type:{} value:{}".format(type(result), repr(result)), level=1) return result
def _reduce(self, head, root_head, production, node_nonterm, start_position, end_position): """ Executes the given reduction. """ if start_position is None: start_position = end_position = root_head.position state = root_head.state.gotos[production.symbol] if self.debug: self.debug_step += 1 a_print('{} REDUCING head '.format(self._debug_step_str()), str(head), new_line=True) a_print('by prod ', production, level=1) a_print('to state {}:{}'.format(state.state_id, state.symbol), level=1) a_print('root is ', root_head, level=1) a_print('Position span: {} - {}'.format(start_position, end_position), level=1) new_head = GSSNode(self, state, head.position, head.frontier, token_ahead=head.token_ahead, layout_content=root_head.layout_content, layout_content_ahead=head.layout_content_ahead) parent = Parent(new_head, root_head, start_position, end_position, production=production, possibilities=[node_nonterm]) if self.dynamic_filter and \ not self._call_dynamic_filter(parent, head.state, state, REDUCE, production, list(node_nonterm)): # Action rejected by dynamic filter return active_head = self._active_heads.get(state.state_id, None) if active_head: created = active_head.create_link(parent, head) # Calculate heads to revisit with the new path. Only those heads that # are already processed (not in _for_actor) and are traversing this # new head state on the current frontier should be considered. if created and state.state_id in self._states_traversed: to_revisit = self._states_traversed[ state.state_id].intersection( self._active_heads.keys()) - set( h.state.state_id for h in self._for_actor) if to_revisit: if self.debug: h_print('Revisiting reductions for processed ' 'active heads in states {}'.format(to_revisit), level=1) for r_head_state in to_revisit: r_head = self._active_heads[r_head_state] for action in [ a for a in r_head.state.actions.get( head.token_ahead.symbol, []) if a.action == REDUCE ]: self._do_reductions(r_head, action.prod, parent) else: # No cycles. Do the reduction. new_head.create_link(parent, head) self._for_actor.append(new_head) self._active_heads[new_head.state.state_id] = new_head if self.debug: a_print("New head: ", new_head, level=1, new_line=True) if self.debug_trace: self._trace_head(new_head)
def _call_reduce_action(self, context, subresults): """ Calls registered reduce action for the given grammar symbol. """ debug = self.debug result = None bt_result = None production = context.production if self.build_tree: # call action for building tree node if enabled. if debug: h_print("Building non-terminal node", "'{}'.".format(production.symbol.name), level=2) bt_result = NodeNonTerm(context, children=subresults, production=production) context.node = bt_result if not self.call_actions_during_tree_build: return bt_result sem_action = production.symbol.action if sem_action: assignments = production.assignments if assignments: assgn_results = {} for a in assignments.values(): if a.op == '=': assgn_results[a.name] = subresults[a.index] else: assgn_results[a.name] = bool(subresults[a.index]) if type(sem_action) is list: if assignments: result = sem_action[production.prod_symbol_id]( context, subresults, **assgn_results) else: result = sem_action[production.prod_symbol_id](context, subresults) else: if assignments: result = sem_action(context, subresults, **assgn_results) else: result = sem_action(context, subresults) else: if debug: h_print("No action defined", " for '{}'.".format(production.symbol.name), level=1) if len(subresults) == 1: if debug: h_print("Unpacking a single subresult.", level=1) result = subresults[0] else: if debug: h_print("Result is a list of subresults.", level=1) result = subresults if debug: h_print("Action result =", "type:{} value:{}".format(type(result), repr(result)), level=1) # If build_tree is set to True, discard the result of the semantic # action, and return the result of treebuild_reduce_action. return bt_result if bt_result is not None else result
def create_table(grammar, itemset_type=LR_1, start_production=1, prefer_shifts=False, prefer_shifts_over_empty=True, debug=False, **kwargs): """ Arguments: grammar (Grammar): itemset_type(int) - SRL=0 LR_1=1. By default LR_1. start_production(int) - The production which defines start state. By default 1 - first production from the grammar. prefer_shifts(bool) - Conflict resolution strategy which favours SHIFT over REDUCE (gready). By default False. prefer_shifts_over_empty(bool) - Conflict resolution strategy which favours SHIFT over REDUCE of EMPTY. By default False. If prefer_shifts is `True` this param is ignored. """ first_sets = first(grammar) # Check for states with GOTO links but without SHIFT links. # This is invalid as the GOTO link will never be traversed. for nt, firsts in first_sets.items(): if nt.name != 'S\'' and not firsts: raise GrammarError( location=nt.location, message='First set empty for grammar symbol "{}". ' 'An infinite recursion on the ' 'grammar symbol.'.format(nt)) follow_sets = follow(grammar, first_sets) _old_start_production_rhs = grammar.productions[0].rhs start_prod_symbol = grammar.productions[start_production].symbol grammar.productions[0].rhs = ProductionRHS([start_prod_symbol, STOP]) # Create a state for the first production (augmented) s = LRState(grammar, 0, AUGSYMBOL, [LRItem(grammar.productions[0], 0, set())]) state_queue = [s] state_id = 1 states = [] if debug: h_print("Constructing LR automaton states...") while state_queue: state = state_queue.pop(0) # For each state calculate its closure first, i.e. starting from a so # called "kernel items" expand collection with non-kernel items. We will # also calculate GOTO and ACTIONS dicts for each state. These dicts will # be keyed by a grammar symbol. closure(state, itemset_type, first_sets) states.append(state) # To find out other states we examine following grammar symbols in the # current state (symbols following current position/"dot") and group all # items by a grammar symbol. per_next_symbol = OrderedDict() # Each production has a priority. But since productions are grouped by # grammar symbol that is ahead we take the maximal priority given for # all productions for the given grammar symbol. state._max_prior_per_symbol = {} for item in state.items: symbol = item.symbol_at_position if symbol: per_next_symbol.setdefault(symbol, []).append(item) # Here we calculate max priorities for each grammar symbol to # use it for SHIFT/REDUCE conflict resolution prod_prior = item.production.prior old_prior = state._max_prior_per_symbol.setdefault( symbol, prod_prior) state._max_prior_per_symbol[symbol] = max( prod_prior, old_prior) # For each group symbol we create new state and form its kernel # items from the group items with positions moved one step ahead. for symbol, items in per_next_symbol.items(): if symbol is STOP: state.actions[symbol] = [Action(ACCEPT)] continue inc_items = [item.get_pos_inc() for item in items] maybe_new_state = LRState(grammar, state_id, symbol, inc_items) target_state = maybe_new_state try: idx = states.index(maybe_new_state) target_state = states[idx] except ValueError: try: idx = state_queue.index(maybe_new_state) target_state = state_queue[idx] except ValueError: pass if target_state is maybe_new_state: # We've found a new state. Register it for later processing. state_queue.append(target_state) state_id += 1 else: # A state with this kernel items already exists. if itemset_type is LR_1: # LALR: Try to merge states, i.e. update items follow sets. if not merge_states(target_state, maybe_new_state): target_state = maybe_new_state state_queue.append(target_state) state_id += 1 # Create entries in GOTO and ACTION tables if isinstance(symbol, NonTerminal): # For each non-terminal symbol we create an entry in GOTO # table. state.gotos[symbol] = target_state else: # For each terminal symbol we create SHIFT action in the # ACTION table. state.actions[symbol] = [Action(SHIFT, state=target_state)] if debug: h_print("{} LR automata states constructed".format(len(states))) h_print("Finishing LALR calculation...") # For LR(1) itemsets refresh/propagate item's follows as the LALR # merging might change item's follow in previous states if itemset_type is LR_1: # Propagate updates as long as there were items propagated in the last # loop run. update = True while update: update = False for state in states: # First refresh current state's follows closure(state, LR_1, first_sets) # Propagate follows to next states. GOTOs/ACTIONs keep # information about states created from this state inc_items = [i.get_pos_inc() for i in state.items] for target_state in chain(state.gotos.values(), [ a.state for i in state.actions.values() for a in i if a.action is SHIFT ]): for next_item in target_state.kernel_items: this_item = inc_items[inc_items.index(next_item)] if this_item.follow.difference(next_item.follow): update = True next_item.follow.update(this_item.follow) if debug: h_print("Calculate REDUCTION entries in ACTION tables and" " resolve possible conflicts.") # Calculate REDUCTION entries in ACTION tables and resolve possible # conflicts. for idx, state in enumerate(states): actions = state.actions for item in state.items: if item.is_at_end: # If the position is at the end then this item # would call for reduction but only for terminals # from the FOLLOW set of item (LR(1)) or the production LHS # non-terminal (LR(0)). if itemset_type is LR_1: follow_set = item.follow else: follow_set = follow_sets[item.production.symbol] prod = item.production new_reduce = Action(REDUCE, prod=prod) for terminal in follow_set: if terminal not in actions: actions[terminal] = [new_reduce] else: # Conflict! Try to resolve t_acts = actions[terminal] should_reduce = True # Only one SHIFT or ACCEPT might exists for a single # terminal. shifts = [ x for x in t_acts if x.action in (SHIFT, ACCEPT) ] assert len(shifts) <= 1 t_shift = shifts[0] if shifts else None # But many REDUCEs might exist t_reduces = [x for x in t_acts if x.action is REDUCE] # We should try to resolve using standard # disambiguation rules between current reduction and # all previous actions. if t_shift: # SHIFT/REDUCE conflict. Use assoc and priority to # resolve # For disambiguation treat ACCEPT action the same # as SHIFT. if t_shift.action is ACCEPT: sh_prior = DEFAULT_PRIORITY else: sh_prior = state._max_prior_per_symbol[ t_shift.state.symbol] if prod.prior == sh_prior: if prod.assoc == ASSOC_LEFT: # Override SHIFT with this REDUCE actions[terminal].remove(t_shift) elif prod.assoc == ASSOC_RIGHT: # If associativity is right leave SHIFT # action as "stronger" and don't consider # this reduction any more. Right # associative reductions can't be in the # same set of actions together with SHIFTs. should_reduce = False else: # If priorities are the same and no # associativity defined use preferred # strategy. is_empty = len(prod.rhs) == 0 prod_pse = is_empty \ and prefer_shifts_over_empty \ and not prod.nopse prod_ps = not is_empty \ and prefer_shifts and not prod.nops should_reduce = not (prod_pse or prod_ps) elif prod.prior > sh_prior: # This item operation priority is higher => # override with reduce actions[terminal].remove(t_shift) else: # If priority of existing SHIFT action is # higher then leave it instead should_reduce = False if should_reduce: if not t_reduces: actions[terminal].append(new_reduce) else: # REDUCE/REDUCE conflicts # Try to resolve using priorities if prod.prior == t_reduces[0].prod.prior: actions[terminal].append(new_reduce) elif prod.prior > t_reduces[0].prod.prior: # If this production priority is higher # it should override all other reductions. actions[terminal][:] = \ [x for x in actions[terminal] if x.action is not REDUCE] actions[terminal].append(new_reduce) grammar.productions[0].rhs = _old_start_production_rhs table = LRTable(states, **kwargs) return table
def create_load_table(grammar, itemset_type=LR_1, start_production=1, prefer_shifts=False, prefer_shifts_over_empty=True, force_create=False, force_load=False, in_layout=False, debug=False, **kwargs): """ Construct table by loading from file if present and newer than the grammar. If table file is older than the grammar or non-existent calculate the table and save to file. Arguments: see create_table force_create(bool): If set to True table will be created even if table file exists. force_load(bool): If set to True table will be loaded if exists even if it's not newer than the grammar, i.e. modification time will not be checked. """ if in_layout: # For layout grammars always calculate table. # Those are usually very small grammars so there is no point in # using cached tables. if debug: a_print("** Calculating LR table for the layout parser...", new_line=True) return create_table(grammar, itemset_type, start_production, prefer_shifts, prefer_shifts_over_empty) else: if debug: a_print("** Calculating LR table...", new_line=True) table_file_name = None if grammar.file_path: file_basename, _ = os.path.splitext(grammar.file_path) table_file_name = "{}.pgt".format(file_basename) create_table_file = True if not force_create and not force_load: if grammar.file_path: file_basename, _ = os.path.splitext(grammar.file_path) table_file_name = "{}.pgt".format(file_basename) if os.path.exists(table_file_name): create_table_file = False table_mtime = os.path.getmtime(table_file_name) # Check if older than any of the grammar files for g_file_name in grammar.imported_files.keys(): if os.path.getmtime(g_file_name) > table_mtime: create_table_file = True break if (create_table_file or force_create) and not force_load: table = create_table(grammar, itemset_type, start_production, prefer_shifts, prefer_shifts_over_empty, debug=debug, **kwargs) if table_file_name: try: save_table(table_file_name, table) except PermissionError: pass else: if debug: h_print("Loading LR table from '{}'".format(table_file_name)) table = load_table(table_file_name, grammar) return table
def parse(self, input_str, position=0, file_name=None, extra=None): """ Parses the given input string. Args: input_str(str): A string to parse. position(int): Position to start from. file_name(str): File name if applicable. Used in error reporting. extra: An object that keeps custom parsing state. If not given initialized to dict. """ if self.debug: a_print("*** PARSING STARTED", new_line=True) self.input_str = input_str self.file_name = file_name self.extra = {} if extra is None else extra self.errors = [] self.in_error_recovery = False self.accepted_head = None next_token = self._next_token debug = self.debug start_head = LRStackNode(self, self.table.states[0], 0, position) self._init_dynamic_disambiguation(start_head) self.parse_stack = parse_stack = [start_head] while True: head = parse_stack[-1] cur_state = head.state if debug: a_print("Current state:", str(cur_state.state_id), new_line=True) if head.token_ahead is None: if not self.in_layout: self._skipws(head, input_str) if self.debug: h_print("Layout content:", "'{}'".format(head.layout_content), level=1) head.token_ahead = next_token(head) if debug: h_print("Context:", position_context(head.input_str, head.position), level=1) h_print("Tokens expected:", expected_symbols_str(cur_state.actions.keys()), level=1) h_print("Token ahead:", head.token_ahead, level=1) actions = None if head.token_ahead is not None: actions = cur_state.actions.get(head.token_ahead.symbol) if not actions and not self.consume_input: # If we don't have any action for the current token ahead # see if we can finish without consuming the whole input. actions = cur_state.actions.get(STOP) if not actions: symbols_expected = list(cur_state.actions.keys()) tokens_ahead = self._get_all_possible_tokens_ahead(head) self.errors.append( self._create_error(head, symbols_expected, tokens_ahead, symbols_before=[cur_state.symbol])) if self.error_recovery: if self.debug: a_print("*** STARTING ERROR RECOVERY.", new_line=True) if self._do_recovery(): # Error recovery succeeded if self.debug: a_print( "*** ERROR RECOVERY SUCCEEDED. CONTINUING.", new_line=True) continue else: break else: break # Dynamic disambiguation if self.dynamic_filter: actions = self._dynamic_disambiguation(head, actions) # If after dynamic disambiguation we still have at least one # shift and non-empty reduction or multiple non-empty # reductions raise exception. if len([ a for a in actions if (a.action is SHIFT) or ( (a.action is REDUCE) and len(a.prod.rhs)) ]) > 1: raise DynamicDisambiguationConflict(head, actions) # If dynamic disambiguation is disabled either globaly by not # giving disambiguation function or localy by not marking # any production dynamic for this state take the first action. # First action is either SHIFT while there might be empty # reductions, or it is the only reduction. # Otherwise, parser construction should raise an error. act = actions[0] if act.action is SHIFT: cur_state = act.state if debug: a_print( "Shift:", "{} \"{}\"".format(cur_state.state_id, head.token_ahead.value) + " at position " + str(pos_to_line_col(self.input_str, head.position)), level=1) new_position = head.position + len(head.token_ahead) new_head = LRStackNode( self, state=act.state, frontier=head.frontier + 1, token=head.token_ahead, layout_content=head.layout_content_ahead, position=new_position, start_position=head.position, end_position=new_position) new_head.results = self._call_shift_action(new_head) parse_stack.append(new_head) self.in_error_recovery = False elif act.action is REDUCE: # if this is EMPTY reduction try to take another if # exists. if len(act.prod.rhs) == 0: if len(actions) > 1: act = actions[1] production = act.prod if debug: a_print("Reducing", "by prod '{}'.".format(production), level=1) r_length = len(production.rhs) if r_length: start_reduction_head = parse_stack[-r_length] results = [x.results for x in parse_stack[-r_length:]] del parse_stack[-r_length:] next_state = parse_stack[-1].state.gotos[production.symbol] new_head = LRStackNode( self, state=next_state, frontier=head.frontier, position=head.position, production=production, start_position=start_reduction_head.start_position, end_position=head.end_position, token_ahead=head.token_ahead, layout_content=start_reduction_head.layout_content, layout_content_ahead=head.layout_content_ahead) else: # Empty reduction results = [] next_state = cur_state.gotos[production.symbol] new_head = LRStackNode( self, state=next_state, frontier=head.frontier, position=head.position, production=production, start_position=head.end_position, end_position=head.end_position, token_ahead=head.token_ahead, layout_content='', layout_content_ahead=head.layout_content_ahead) # Calling reduce action new_head.results = self._call_reduce_action(new_head, results) parse_stack.append(new_head) elif act.action is ACCEPT: self.accepted_head = head break if self.accepted_head: if debug: a_print("SUCCESS!!!") if self.return_position: return parse_stack[1].results, parse_stack[1].position else: return parse_stack[1].results else: raise self.errors[-1]
def _do_reductions(self, head, production, update_parent=None): """ Reduce the given head by the given production. If update_parent is given this is update/limited reduction so just traverse the given parent instead of all parents of the parent's head. """ debug = self.debug if debug: h_print("\tFinding reduction paths for head: {}".format(head)) h_print("\tand production: {}".format(production)) if update_parent: h_print("\tLimited/update reduction due to new path addition.") states_traversed = self._states_traversed prod_len = len(production.rhs) if prod_len == 0: # Special case, empty reduction self._reduce(head, head, production, NodeNonTerm(None, [], production=production), head.position, head.position) else: # Find roots of possible reductions by going backwards for # prod_len steps following all possible paths. Collect # subresults along the way to be used with semantic actions to_process = [(head, [], prod_len, None, update_parent is None)] if debug: h_print( "Calculate reduction paths of length {}:".format(prod_len), level=1) h_print("start node= {}".format(head), level=2) while to_process: (node, results, length, last_parent, traversed) = to_process.pop() length = length - 1 if debug: h_print("node = {}".format(node), level=2, new_line=True) h_print("backpath length = {}{}".format( prod_len - length, " - ROOT" if not length else ""), level=2) if node.frontier == head.frontier: # Cache traversed states for revisit optimization states_traversed.setdefault(node.state.state_id, set()).add(head.state.state_id) for parent in [update_parent] \ if update_parent and update_parent.head == node \ else list(node.parents.values()): if debug: h_print("", str(parent.head), level=3) new_results = [parent] + results if last_parent is None: last_parent = parent traversed = traversed or (update_parent and update_parent.head == node) if length: to_process.append((parent.root, new_results, length, last_parent, traversed)) elif traversed: self._reduce( head, parent.root, production, NodeNonTerm(None, new_results, production=production), parent.start_position, last_parent.end_position)