def test_closures_ofLRValue(self): a1 = set([ LR0(self.StartExtendedSymbol, 0, 0), LR0('S', 0, 0), LR0('S', 1, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('L', 0, 0), LR0('L', 1, 0) ]) a2 = set([ LR0('S', 0, 0), LR0('S', 1, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('R', 1, 1), LR0('L', 0, 0), LR0('L', 0, 1), LR0('L', 1, 0) ]) self.assertTrue(a1 == closure( set([LR0(self.StartExtendedSymbol, 0, 0)]), self.lrvalue)) self.assertTrue( a2 == closure(set([LR0('L', 0, 1), LR0('R', 1, 1)]), self.lrvalue))
def test_closures_ofLRValue_with_actions(self): a1 = set([ LR0(self.StartExtendedSymbol, 0, 0), LR0('S', 0, 0), LR0('S', 1, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('L', 0, 0), LR0('L', 1, 0) ]) a2 = set([ LR0(self.lrvalue_with_actions.ACTION_INSIDE % (3, '<lambda>'), 0, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('R', 1, 1), LR0('L', 0, 0), LR0('L', 0, 1), LR0('L', 1, 0) ]) self.assertTrue( a1 == closure(set([LR0(self.StartExtendedSymbol, 0, 0)]), self.lrvalue_with_actions)) self.assertTrue(a2 == closure(set([LR0( 'L', 0, 1), LR0('R', 1, 1)]), self.lrvalue_with_actions))
def generate_spontaneously_lookaheads(grammar, start_item, to_id): '''Builds a LR0 table using as a seed the start_item and then tries to determinate what terminals are lookahead of each item (in which case, these lookaheads are spontaneously generated), building initially the LALR items (they are very similar to the LR0 item but can be modified adding to him lookaheads, see the documentation of LALR class in the item module). ''' goto_table = collections.defaultdict(dict) lalr_start_item = LALR( start_item.sym_production, start_item.alternative, start_item.position) lalr_start_item.add_new(grammar.EOF) kernels_lalr = kernel_collection(grammar, lalr_start_item) ids_kernes_lalr = dict() for kernels in kernels_lalr: closure_lalr = closure(kernels, grammar) ids_kernes_lalr[to_id[closure_lalr]] = kernels populate_goto_table_from_state(grammar, closure_lalr, goto_table, to_id) for id_, kernels in ids_kernes_lalr.iteritems(): for item_lalr in kernels: closure_lr1 = closure(set([LR1( item_lalr.sym_production, item_lalr.alternative, item_lalr.position, grammar.PROBE)]), grammar) for item_lr1 in closure_lr1: next_symbol = item_lr1.next_symbol(grammar) if not next_symbol: continue item_lr1_shifted = item_lr1.item_shifted(grammar) assert id_ in goto_table goto_set_id = goto_table[id_][next_symbol] goto_set = ids_kernes_lalr[goto_set_id] item_lalr_from_shifted = LALR( item_lr1_shifted.sym_production, item_lr1_shifted.alternative, item_lr1_shifted.position) assert item_lalr_from_shifted in goto_set singleton = (goto_set - (goto_set - {item_lalr_from_shifted})) assert len(singleton) == 1 item_lalr_hidden = set(singleton).pop() if item_lr1.lookahead != grammar.PROBE: item_lalr_hidden.add_new(item_lr1.lookahead) else: item_lalr.subscribe(item_lalr_hidden) return kernels_lalr, goto_table
def generate_spontaneously_lookaheads(grammar, start_item, to_id): '''Builds a LR0 table using as a seed the start_item and then tries to determinate what terminals are lookahead of each item (in which case, these lookaheads are spontaneously generated), building initially the LALR items (they are very similar to the LR0 item but can be modified adding to him lookaheads, see the documentation of LALR class in the item module). ''' goto_table = collections.defaultdict(dict) lalr_start_item = LALR(start_item.sym_production, start_item.alternative, start_item.position) lalr_start_item.add_new(grammar.EOF) kernels_lalr = kernel_collection(grammar, lalr_start_item) ids_kernes_lalr = dict() for kernels in kernels_lalr: closure_lalr = closure(kernels, grammar) ids_kernes_lalr[to_id[closure_lalr]] = kernels populate_goto_table_from_state(grammar, closure_lalr, goto_table, to_id) for id_, kernels in ids_kernes_lalr.iteritems(): for item_lalr in kernels: closure_lr1 = closure( set([ LR1(item_lalr.sym_production, item_lalr.alternative, item_lalr.position, grammar.PROBE) ]), grammar) for item_lr1 in closure_lr1: next_symbol = item_lr1.next_symbol(grammar) if not next_symbol: continue item_lr1_shifted = item_lr1.item_shifted(grammar) assert id_ in goto_table goto_set_id = goto_table[id_][next_symbol] goto_set = ids_kernes_lalr[goto_set_id] item_lalr_from_shifted = LALR(item_lr1_shifted.sym_production, item_lr1_shifted.alternative, item_lr1_shifted.position) assert item_lalr_from_shifted in goto_set singleton = (goto_set - (goto_set - {item_lalr_from_shifted})) assert len(singleton) == 1 item_lalr_hidden = set(singleton).pop() if item_lr1.lookahead != grammar.PROBE: item_lalr_hidden.add_new(item_lr1.lookahead) else: item_lalr.subscribe(item_lalr_hidden) return kernels_lalr, goto_table
def test_closures_ofSome(self): a1 = set([LR0(self.StartExtendedSymbol, 0, 0), LR0('S', 0, 0), LR0('C', 0, 0), LR0('C', 1, 0)]) a2 = set([ LR0('S', 0, 1), LR0('C', 0, 0), LR0('C', 1, 0), LR0('C', 1, 1)]) self.assertTrue(a1 == closure(set([LR0(self.StartExtendedSymbol, 0, 0)]), self.some)) self.assertTrue(a2 == closure(set([LR0('S', 0, 1), LR0('C', 1, 1)]), self.some))
def test_closures_ofLRValue_with_actions(self): a1 = set([LR0(self.StartExtendedSymbol, 0, 0), LR0('S', 0, 0), LR0('S', 1, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('L', 0, 0), LR0('L', 1, 0)]) a2 = set([ LR0(self.lrvalue_with_actions.ACTION_INSIDE % (3, '<lambda>'), 0, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('R', 1, 1), LR0('L', 0, 0), LR0('L', 0, 1), LR0('L', 1, 0)]) self.assertTrue(a1 == closure(set([LR0(self.StartExtendedSymbol, 0, 0)]), self.lrvalue_with_actions)) self.assertTrue(a2 == closure(set([LR0('L', 0, 1), LR0('R', 1, 1)]), self.lrvalue_with_actions))
def test_closures_ofLRValue(self): a1 = set([LR0(self.StartExtendedSymbol, 0, 0), LR0('S', 0, 0), LR0('S', 1, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('L', 0, 0), LR0('L', 1, 0)]) a2 = set([ LR0('S', 0, 0), LR0('S', 1, 0), LR0('R', 0, 0), LR0('R', 1, 0), LR0('R', 1, 1), LR0('L', 0, 0), LR0('L', 0, 1), LR0('L', 1, 0)]) self.assertTrue(a1 == closure(set([LR0(self.StartExtendedSymbol, 0, 0)]), self.lrvalue)) self.assertTrue(a2 == closure(set([LR0('L', 0, 1), LR0('R', 1, 1)]), self.lrvalue))
def test_closures_ofArith(self): a1 = set([LR0(self.StartExtendedSymbol, 0, 0), LR0('E', 0, 0), LR0('E', 1, 0), LR0('T', 0, 0), LR0('T', 1, 0), LR0('F', 0, 0), LR0('F', 1, 0)]) a2 = set([LR0('F', 0, 1), LR0('E', 0, 0), LR0('E', 1, 0), LR0('T', 0, 0), LR0('T', 1, 0), LR0('F', 0, 0), LR0('F', 1, 0)]) self.assertTrue(a1 == closure(set([LR0(self.StartExtendedSymbol, 0, 0)]), self.arith)) self.assertTrue(a2 == closure(set([LR0('F', 0, 1)]), self.arith))
def test_closures_ofSome(self): a1 = set([ LR0(self.StartExtendedSymbol, 0, 0), LR0('S', 0, 0), LR0('C', 0, 0), LR0('C', 1, 0) ]) a2 = set( [LR0('S', 0, 1), LR0('C', 0, 0), LR0('C', 1, 0), LR0('C', 1, 1)]) self.assertTrue(a1 == closure( set([LR0(self.StartExtendedSymbol, 0, 0)]), self.some)) self.assertTrue( a2 == closure(set([LR0('S', 0, 1), LR0('C', 1, 1)]), self.some))
def test_closures_ofArith(self): a1 = set([ LR0(self.StartExtendedSymbol, 0, 0), LR0('E', 0, 0), LR0('E', 1, 0), LR0('T', 0, 0), LR0('T', 1, 0), LR0('F', 0, 0), LR0('F', 1, 0) ]) a2 = set([ LR0('F', 0, 1), LR0('E', 0, 0), LR0('E', 1, 0), LR0('T', 0, 0), LR0('T', 1, 0), LR0('F', 0, 0), LR0('F', 1, 0) ]) self.assertTrue(a1 == closure( set([LR0(self.StartExtendedSymbol, 0, 0)]), self.arith)) self.assertTrue(a2 == closure(set([LR0('F', 0, 1)]), self.arith))
def test_action_table(self): states = [ closure(kernel, self.grammar) for kernel in self.kernel_states ] expected_terminal_shift = [ {'NL', 'NUM'}, {}, {'NL', 'NUM'}, {}, {'NL', 'NUM'}, {}, {}, {}, {'NUM', '+', '-'}, {}, {}, ] for state, terminals in zip(states, expected_terminal_shift): keys = self.action_table[hash(state)].keys() keys = filter( lambda k: "Shift" in str(self.action_table[hash(state)][k]), keys) self.assertTrue(frozenset(keys) == frozenset(terminals)) expected_terminal_reduce = [ {}, {}, {self.grammar.EOF}, #input {self.grammar.EOF, 'NL', 'NUM'}, #line {}, {'NL', '+', '-', 'NUM'}, #expr {self.grammar.EOF}, #input {self.grammar.EOF, 'NL', 'NUM'}, #line {}, {'NL', '+', '-', 'NUM'}, #expr {'NL', '+', '-', 'NUM'}, #expr ] for state, terminals in zip(states, expected_terminal_reduce): keys = self.action_table[hash(state)].keys() keys = filter( lambda k: "Reduce" in str(self.action_table[hash(state)][k]), keys) self.assertTrue(frozenset(keys) == frozenset(terminals))
def build_parsing_table(grammar, start_item, handle_shift_reduce = True, disable_mapping = False): '''Builds the Action and Goto tables for be used by a driver returning these tables and the id of the start state, where the driver will use as a point of start to parse. See the documentation of handler_conflict function for more info with respect the handle_shift_reduce parameter. The start_item can be an instance of Item (see the module item). This works well with LR0 and LR1 items, but with LALR items, the algorithm used is the build_parsing_table_lalr (see that function, in this module) If 'disable_mapping' is True, the internal states are identified by its hash. This is only useful for testing and should not be modified in the normal case. Preconditions: the grammar must be already processed.''' if isinstance(start_item, LALR): return build_parsing_table_lalr(grammar, start_item, handle_shift_reduce) action_table = collections.defaultdict(dict) goto_table = collections.defaultdict(dict) kernels = kernel_collection(grammar, start_item) start_set_hash = None to_id = UserFriendlyMapping(disable_mapping) for kernel in kernels: state_set = closure(kernel, grammar) populate_goto_table_from_state(grammar, state_set, goto_table, to_id) populate_action_table_from_state( grammar, state_set, action_table, handle_shift_reduce, to_id) if not start_set_hash: for item in state_set: if item == start_item: start_set_hash = to_id[state_set] return dict(action_table), dict(goto_table), start_set_hash
def build_parsing_table(grammar, start_item, handle_shift_reduce=True, disable_mapping=False): '''Builds the Action and Goto tables for be used by a driver returning these tables and the id of the start state, where the driver will use as a point of start to parse. See the documentation of handler_conflict function for more info with respect the handle_shift_reduce parameter. The start_item can be an instance of Item (see the module item). This works well with LR0 and LR1 items, but with LALR items, the algorithm used is the build_parsing_table_lalr (see that function, in this module) If 'disable_mapping' is True, the internal states are identified by its hash. This is only useful for testing and should not be modified in the normal case. Preconditions: the grammar must be already processed.''' if isinstance(start_item, LALR): return build_parsing_table_lalr(grammar, start_item, handle_shift_reduce) action_table = collections.defaultdict(dict) goto_table = collections.defaultdict(dict) kernels = kernel_collection(grammar, start_item) start_set_hash = None to_id = UserFriendlyMapping(disable_mapping) for kernel in kernels: state_set = closure(kernel, grammar) populate_goto_table_from_state(grammar, state_set, goto_table, to_id) populate_action_table_from_state(grammar, state_set, action_table, handle_shift_reduce, to_id) if not start_set_hash: for item in state_set: if item == start_item: start_set_hash = to_id[state_set] return dict(action_table), dict(goto_table), start_set_hash
def build_parsing_table_lalr(grammar, start_item, handle_shift_reduce = True, disable_mapping=False): '''Builds a LALR table, first builds a LR0 table using as a seed the start_item and then tries to determinate what terminals are lookahead of each item (in which case, these lookaheads are spontaneously generated) In a second stage, the spontaneously generated terminals are propagated from one item to other. When no more terminals propagated, the algorithm builds a kernel of items LALR. With this, the function returns the action and goto tables used by the driver. See the documentation of handler_conflict function for more info with respect the handle_shift_reduce parameter. If 'disable_mapping' is True, the internal states are identified by its hash. This is only useful for testing and should not be modified in the normal case. ''' to_id = UserFriendlyMapping(disable_mapping) action_table = collections.defaultdict(dict) start_set_hash = None kernels_lalr, goto_table = generate_spontaneously_lookaheads( grammar, start_item, to_id) propagate_lookaheads(kernels_lalr) for kernel in kernels_lalr: state_set = closure(kernel, grammar) populate_action_table_from_state( grammar, state_set, action_table, handle_shift_reduce, to_id) if not start_set_hash: for item in state_set: if item == start_item: start_set_hash = to_id[state_set] break return dict(action_table), dict(goto_table), start_set_hash
def build_parsing_table_lalr(grammar, start_item, handle_shift_reduce=True, disable_mapping=False): '''Builds a LALR table, first builds a LR0 table using as a seed the start_item and then tries to determinate what terminals are lookahead of each item (in which case, these lookaheads are spontaneously generated) In a second stage, the spontaneously generated terminals are propagated from one item to other. When no more terminals propagated, the algorithm builds a kernel of items LALR. With this, the function returns the action and goto tables used by the driver. See the documentation of handler_conflict function for more info with respect the handle_shift_reduce parameter. If 'disable_mapping' is True, the internal states are identified by its hash. This is only useful for testing and should not be modified in the normal case. ''' to_id = UserFriendlyMapping(disable_mapping) action_table = collections.defaultdict(dict) start_set_hash = None kernels_lalr, goto_table = generate_spontaneously_lookaheads( grammar, start_item, to_id) propagate_lookaheads(kernels_lalr) for kernel in kernels_lalr: state_set = closure(kernel, grammar) populate_action_table_from_state(grammar, state_set, action_table, handle_shift_reduce, to_id) if not start_set_hash: for item in state_set: if item == start_item: start_set_hash = to_id[state_set] break return dict(action_table), dict(goto_table), start_set_hash
def test_action_table(self): states = [closure(kernel, self.grammar) for kernel in self.kernel_states] expected_terminal_shift = [ {'NL', 'NUM'}, {}, {'NL', 'NUM'}, {}, {'NL', 'NUM'}, {}, {}, {}, {'NUM', '+', '-'}, {}, {}, ] for state, terminals in zip(states, expected_terminal_shift): keys = self.action_table[hash(state)].keys() keys = filter(lambda k: "Shift" in str(self.action_table[hash(state)][k]), keys) self.assertTrue(frozenset(keys) == frozenset(terminals)) expected_terminal_reduce = [ {}, {}, {self.grammar.EOF}, #input {self.grammar.EOF, 'NL', 'NUM'}, #line {}, {'NL', '+', '-', 'NUM'}, #expr {self.grammar.EOF}, #input {self.grammar.EOF, 'NL', 'NUM'}, #line {}, {'NL', '+', '-', 'NUM'}, #expr {'NL', '+', '-', 'NUM'}, #expr ] for state, terminals in zip(states, expected_terminal_reduce): keys = self.action_table[hash(state)].keys() keys = filter(lambda k: "Reduce" in str(self.action_table[hash(state)][k]), keys) self.assertTrue(frozenset(keys) == frozenset(terminals))