def closure_1(grammar, state_set): assert False result = StateSet() # Step 1 for state in state_set.elements: result.add(state) # Step 2 for state in result: symbol = state.next_symbol() if isinstance(symbol, Nonterminal): f = set() for l in state.lookahead: betaL = [] betaL.extend(state.remaining_symbols()) betaL.append(l) f |= old2_first(grammar, betaL) alternatives = grammar[symbol].alternatives for a in alternatives: # create epsilon symbol if alternative is empty if a == []: a = [Epsilon()] p = Production(symbol, a) s = LR1Element(p, 0, f) if a == [epsilon]: s.d = 1 result.add(s) # merge states that only differ in their lookahead result.merge() return result
def amount(self): if len(self.action.right) > 0 and self.action.right[-1] == Terminal( "<eos>"): return len(self.action.right) - 1 if self.action.right == [Epsilon()]: return 0 return len(self.action.right)
def test_closure_0(): s1 = StateSet() s = State(Production(Nonterminal("Z"), [Nonterminal("S")]), 0) # first state Z ::= .S s1.add(s) closure = helper1.closure_0(s1) assert len(closure.elements) == 4 assert State(Production(Z, [S]), 0) in closure assert State(Production(S, [S, b]), 0) in closure assert State(Production(S, [b, A, a]), 0) in closure assert State(Production(S, [a]), 0) in closure s2 = StateSet() s = State(Production(F, [C, D, f]), 0) s2.add(s) closure = helper1.closure_0(s2) assert len(closure.elements) == 4 assert State(Production(F, [C, D, f]), 0) in closure assert State(Production(C, [D, A]), 0) in closure assert State(Production(D, [d]), 0) in closure assert State(Production(D, [Epsilon()]), 1) in closure s3 = StateSet() s = State(Production(C, [D, A]), 1) s3.add(s) closure = helper1.closure_0(s3) assert len(closure.elements) == 4 assert State(Production(C, [D, A]), 1) in closure assert State(Production(A, [a, S, c]), 0) in closure assert State(Production(A, [a, S, b]), 0) in closure assert State(Production(A, [a]), 0) in closure
def inc_parse(self, line_indents=[], needs_reparse=False, state=0, stack = []): logging.debug("============ NEW %s PARSE ================= ", "OOC" if self.ooc else "INCREMENTAL") logging.debug("= starting in state %s ", state) self.validating = False self.reused_nodes = set() self.current_state = state self.previous_version.parent.isolated = None bos = self.previous_version.parent.children[0] eos = self.previous_version.parent.children[-1] if not stack: self.stack = [eos] else: self.stack = stack eos.state = 0 self.loopcount = 0 self.needs_reparse = needs_reparse self.error_nodes = [] self.error_pres = [] if self.ooc: rmroot = self.ooc[1] else: rmroot = self.previous_version.parent self.rm = RecoveryManager(self.prev_version, rmroot, self.stack, self.syntaxtable) USE_OPT = True la = self.pop_lookahead(bos) while(True): logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent) self.loopcount += 1 # Abort condition for out-of-context analysis. If we reached the state of the # node that is being analyses and the lookahead matches the nodes # lookahead from the previous parse, we are done if self.ooc: logging.debug("ooc %s %s", self.ooc, id(self.ooc)) logging.debug("la %s", la) logging.debug("cs %s", self.current_state) if la is self.ooc[0]: if isinstance(la.symbol, Nonterminal): # if OOC is Nonterminal, use first terminal to apply # reductions first_term = la.find_first_terminal(self.prev_version) lookup = self.get_lookup(first_term) else: lookup = self.get_lookup(la) while True: # OOC is complete if we reached the expected state and # there are no more reductions left to do if self.current_state == self.ooc[2] and len(self.stack) == 2: logging.debug("======= OOC parse successfull =========") self.last_status = True return True # Otherwise apply more reductions to reach the wanted # state or an error occurs element = self.syntaxtable.lookup(self.current_state, lookup) if not isinstance(element, Reduce): logging.debug("No more reductions") break else: self.reduce(element) logging.debug("======= OOC parse failed =========") self.last_status = False return False if isinstance(la.symbol, Terminal) or isinstance(la.symbol, FinishSymbol) or la.symbol == Epsilon(): lookup_symbol = self.get_lookup(la) result = self.parse_terminal(la, lookup_symbol) if result == "Accept": logging.debug("============ INCREMENTAL PARSE END (ACCEPT) ================= ") # With error recovery we can end up in the accepting # state despite errors occuring during the parse. if len(self.error_nodes) == 0: self.last_status = True return True self.last_status = False return False elif result == "Error": logging.debug("============ INCREMENTAL PARSE END (ERROR) ================= ") self.last_status = False return False elif result != None: la = result else: # Nonterminal if la.has_changes() or needs_reparse or la.has_errors() or self.iso_context_changed(la): la = self.left_breakdown(la) else: if USE_OPT: goto = self.syntaxtable.lookup(self.current_state, la.symbol) # Only opt-shift if the nonterminal has children to # avoid a bug in the retainability algorithm. See # test/test_eco.py::Test_RetainSubtree::test_bug1 if goto and la.children: # can we shift this Nonterminal in the current state? logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto) follow_id = goto.action self.stack.append(la) la.deleted = False la.state = follow_id #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements) la.exists = True self.current_state = follow_id logging.debug("USE_OPT: set state to %s", self.current_state) if la.isolated: # When skipping previously isolated subtrees, # traverse their children to find the error # nodes and report them back to the editor. self.find_nested_error(la) la = self.pop_lookahead(la) self.validating = True continue else: #XXX can be made faster by providing more information in syntax tables first_term = la.find_first_terminal(self.prev_version) lookup_symbol = self.get_lookup(first_term) element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if isinstance(element, Reduce): logging.debug("OPT Reduce: %s", element) self.reduce(element) else: la = self.left_breakdown(la) else: # PARSER WITHOUT OPTIMISATION if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol element = self.syntaxtable.lookup(self.current_state, lookup_symbol) if self.shiftable(la): logging.debug("\x1b[37mis shiftable\x1b[0m") self.stack.append(la) self.current_state = la.state self.right_breakdown() la = self.pop_lookahead(la) else: la = self.left_breakdown(la)
| """ p = Parser(grammar) p.parse() r = p.rules b = Terminal("b") c = Terminal("c") d = Terminal("d") S = Nonterminal("S") A = Nonterminal("A") S_bAd = Production(S, [b, A, d]) A_c = Production(A, [c]) A_None = Production(A, [Epsilon()]) syntaxtable = { (0, b): Shift(2), (0, S): Goto(1), (1, FinishSymbol()): Accept(), (2, c): Shift(4), (2, A): Goto(3), (2, d): Reduce(A_None), (3, d): Shift(5), (4, d): Reduce(A_c), (5, FinishSymbol()): Reduce(S_bAd), } def test_build():
def closure_1(self, state_set): la_dict = {} result = set() working_set = set() # Step 1 for element in state_set.elements: la_dict[element] = state_set.get_lookahead(element) result.add(element) working_set.add(element) # Step 2 i = 0 temp = working_set while 1: newelements = set() for state in temp: if state.isfinal(): continue symbol = state.next_symbol() if isinstance(symbol, Nonterminal): f = set() for l in la_dict[state]: betaL = [] betaL.extend(state.remaining_symbols()) betaL.append(l) f |= self.first(betaL) alternatives = self.grammar[symbol].alternatives i = -1 for a in alternatives: i = i + 1 # create epsilon symbol if alternative is empty if a == []: a = [Epsilon()] p = Production(symbol, a, self.grammar[symbol].annotations[i], self.grammar[symbol].precs[i]) if i in self.grammar[symbol].inserts: insert = self.grammar[symbol].inserts[i] p.inserts[insert[0]] = insert[1] s = LR0Element(p, 0) if a == [epsilon]: s.d = 1 # NEW ELEMENT: # 1. completely new (+lookahead): add to result # 2. new lookahead: update lookahead in la_dict # -> add to new working set # 3. already known: ignore if s in result: if f.issubset( la_dict[s] ): # lookahead in combination with state already known continue else: la_dict[s] |= f # new lookahead else: la_dict[s] = set(f) # completely new result.add(s) newelements.add(s) temp = newelements if len(temp) == 0: break i += 1 # add lookaheads final_result = StateSet() for element in result: final_result.add(element, la_dict[element]) return final_result
# all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. from .state import State, StateSet, LR0Element, LR1Element from .production import Production from grammar_parser.gparser import Terminal, Nonterminal, Epsilon from .syntaxtable import FinishSymbol epsilon = Epsilon() def noprint(*args, **kwargs): pass class Helper(object): def __init__(self, grammar): self.grammar = grammar self.closure_time = 0 self.first_dict = {} self.follow_dict = {} self.calculate_first() self.calculate_follow() self.goto_count = {}
def inc_parse(self, line_indents=[], reparse=False): logging.debug("============ NEW INCREMENTAL PARSE ================= ") self.validating = False self.error_node = None self.stack = [] self.undo = [] self.current_state = 0 self.stack.append(Node(FinishSymbol(), 0, [])) bos = self.previous_version.parent.children[0] self.loopcount = 0 USE_OPT = True self.pm.do_incparse_inc_parse_top() la = self.pop_lookahead(bos) while (True): logging.debug("\x1b[35mProcessing\x1b[0m %s %s %s %s", la, la.changed, id(la), la.indent) self.loopcount += 1 if isinstance(la.symbol, Terminal) or isinstance( la.symbol, FinishSymbol) or la.symbol == Epsilon(): if la.changed: assert False # with prelexing you should never end up here! else: lookup_symbol = self.get_lookup(la) result = self.parse_terminal(la, lookup_symbol) if result == "Accept": self.last_status = True return True elif result == "Error": self.last_status = False return False elif result != None: la = result else: # Nonterminal if la.changed or reparse: # deconstruct the #la.changed = False # as all nonterminals that have changed are being rebuild, there is no need to change this flag (this also solves problems with comments) self.undo.append((la, 'changed', True)) la = self.left_breakdown(la) else: if USE_OPT: #Follow parsing/syntax table goto = self.syntaxtable.lookup(self.current_state, la.symbol) if goto: # can we shift this Nonterminal in the current state? logging.debug("OPTShift: %s in state %s -> %s", la.symbol, self.current_state, goto) self.pm.do_incparse_optshift(la) follow_id = goto.action self.stack.append(la) la.state = follow_id #XXX this fixed goto error (I should think about storing the states on the stack instead of inside the elements) self.current_state = follow_id logging.debug("USE_OPT: set state to %s", self.current_state) la = self.pop_lookahead(la) self.validating = True continue else: #XXX can be made faster by providing more information in syntax tables first_term = la.find_first_terminal() lookup_symbol = self.get_lookup(first_term) element = self.syntaxtable.lookup( self.current_state, lookup_symbol) if isinstance(element, Reduce): self.reduce(element) else: la = self.left_breakdown(la) else: # PARSER WITHOUT OPTIMISATION if la.lookup != "": lookup_symbol = Terminal(la.lookup) else: lookup_symbol = la.symbol element = self.syntaxtable.lookup( self.current_state, lookup_symbol) if self.shiftable(la): logging.debug("\x1b[37mis shiftable\x1b[0m") self.stack.append(la) self.current_state = la.state self.right_breakdown() la = self.pop_lookahead(la) else: la = self.left_breakdown(la) logging.debug("============ INCREMENTAL PARSE END ================= ")