def FindFollowSets(lemp): '''Compute all followsets. A followset is the set of all symbols which can come immediately after a configuration. ''' for i in range(lemp.nstate): for cfp in iterlinks(lemp.sorted[i].cfp): cfp.status = INCOMPLETE progress = 1 while progress: progress = 0 for i in range(lemp.nstate): for cfp in iterlinks(lemp.sorted[i].cfp): if cfp.status == COMPLETE: continue for plp in iterlinks(cfp.fplp): change = SetUnion(plp.cfp.fws, cfp.fws) if change: plp.cfp.status = INCOMPLETE progress = 1 cfp.status = COMPLETE return
def Configlist_closure(lemp): '''Compute the closure of the configuration list.''' from plink import Plink_add for cfp in iterlinks(current): rp = cfp.rp dot = cfp.dot if dot >= rp.nrhs: continue sp = rp.rhs[dot] if sp.type == NONTERMINAL: if sp.rule is None and sp != lemp.errsym: ErrorMsg(lemp.filename, rp.line, 'Nonterminal "%s" has no rules.', sp.name) lemp.errorcnt += 1 for newrp in iterlinks(sp.rule, 'nextlhs'): newcfp = Configlist_add(newrp, 0) for i in range(dot + 1, rp.nrhs): xsp = rp.rhs[i] if xsp.type == TERMINAL: SetAdd(newcfp.fws, xsp.index) break elif xsp.type == MULTITERMINAL: for k in range(xsp.nsubsym): SetAdd(newcfp.fws, xsp.subsym[k].index) break else: SetUnion(newcfp.fws, xsp.firstset) if not xsp._lambda: break else: cfp.fplp = Plink_add(cfp.fplp, newcfp) return
def FindActions(lemp): '''Compute the reduce actions, and resolve conflicts.''' # Add all of the reduce actions # A reduce action is added for each element of the followset of # a configuration which has its dot at the extreme right. for i in range(lemp.nstate): # Loop over all states stp = lemp.sorted[i] for cfp in iterlinks(stp.cfp): # Loop over all configurations if cfp.rp.nrhs == cfp.dot: # Is dot at extreme right? for j in range(lemp.nterminal): if SetFind(cfp.fws, j): # Add a reduce action to the state "stp" which # will reduce by the rule "cfp->rp" if the # lookahead symbol is "lemp->symbols[j]" stp.ap = Action_add(stp.ap, REDUCE, lemp.symbols[j], cfp.rp) # Add the accepting token if lemp.start: sp = Symbol_find(lemp.start) if sp is None: sp = lemp.rule.lhs else: sp = lemp.rule.lhs # Add to the first state (which is always the starting state of # the finite state machine) an action to ACCEPT if the lookahead # is the start nonterminal. lemp.sorted[0].ap = Action_add(lemp.sorted[0].ap, ACCEPT, sp, None) # Resolve conflicts for i in range(lemp.nstate): stp = lemp.sorted[i] stp.ap = Action_sort(stp.ap) ap = stp.ap while ap and ap.next: nap = ap.next while nap and nap.sp == ap.sp: # The two actions "ap" and "nap" have the same # lookahead. Figure out which one should be used. lemp.nconflict += resolve_conflict(ap, nap) nap = nap.next ap = ap.next # Report an error for each rule that can never be reduced. for rp in iterlinks(lemp.rule): rp.canReduce = False for i in range(lemp.nstate): for ap in iterlinks(lemp.sorted[i].ap): if ap.type == REDUCE: ap.x.rp.canReduce = True for rp in iterlinks(lemp.rule): if rp.canReduce: continue ErrorMsg(lemp.filename, rp.ruleline, "This rule can not be reduced.\n") lemp.errorcnt += 1 return
def buildshifts(lemp, stp): '''Construct all successor states to the given state. A "successor" state is any state which can be reached by a shift action. ''' # stp: The state from which successors are computed # cfp: For looping thru the config closure of "stp" # bcfp: For the inner loop on config closure of "stp" # sp: Symbol following the dot in configuration "cfp" # bsp: Symbol following the dot in configuration "bcfp" # newstp: A pointer to a successor state # Each configuration becomes complete after it contibutes to a # successor state. Initially, all configurations are incomplete. for cfp in iterlinks(stp.cfp): cfp.status = INCOMPLETE # Loop through all configurations of the state "stp" for cfp in iterlinks(stp.cfp): if cfp.status == COMPLETE: continue # Already used by inner loop if cfp.dot >= cfp.rp.nrhs: continue # Can't shift this config Configlist_reset() # Reset the new config set sp = cfp.rp.rhs[cfp.dot] # Symbol after the dot # For every configuration in the state "stp" which has the # symbol "sp" following its dot, add the same configuration to # the basis set under construction but with the dot shifted # one symbol to the right. for bcfp in iterlinks(cfp): if bcfp.status == COMPLETE: continue # Already used if bcfp.dot >= bcfp.rp.nrhs: continue # Can't shift this one bsp = bcfp.rp.rhs[bcfp.dot] # Get symbol after dot if not same_symbol(bsp, sp): continue # Must be same as for "cfp" bcfp.status = COMPLETE # Mark this config as used new = Configlist_addbasis(bcfp.rp, bcfp.dot + 1) new.bplp = Plink_add(new.bplp, bcfp) # Get a pointer to the state described by the basis # configuration set constructed in the preceding loop newstp = getstate(lemp) # The state "newstp" is reached from the state "stp" by a # shift action on the symbol "sp" if sp.type == MULTITERMINAL: for i in range(sp.nsubsym): stp.ap = Action_add(stp.ap, SHIFT, sp.subsym[i], newstp) else: stp.ap = Action_add(stp.ap, SHIFT, sp, newstp) return
def FindFirstSets(lemp): '''Find all nonterminals which will generate the empty string. Then go back and compute the first sets of every nonterminal. The first set is the set of all terminal symbols which can begin a string generated by that nonterminal. ''' for i in range(lemp.nsymbol): lemp.symbols[i]._lambda = False for i in range(lemp.nterminal, lemp.nsymbol): lemp.symbols[i].firstset = SetNew() # First compute all lambdas progress = 1 while progress: progress = 0 for rp in iterlinks(lemp.rule): if rp.lhs._lambda: continue for i in range(rp.nrhs): sp = rp.rhs[i] assert sp.type == NONTERMINAL or not sp._lambda if not sp._lambda: break else: rp.lhs._lambda = True progress = 1 # Now compute all first sets progress = 1 while progress: progress = 0 for rp in iterlinks(lemp.rule): s1 = rp.lhs for i in range(rp.nrhs): s2 = rp.rhs[i] if s2.type == TERMINAL: progress += SetAdd(s1.firstset, s2.index) break elif s2.type == MULTITERMINAL: for j in range(s2.nsubsym): progress += SetAdd(s1.firstset, s2.subsym[j].index) break elif s1 == s2: if not s1._lambda: break else: progress += SetUnion(s1.firstset, s2.firstset) if not s2._lambda: break return
def FindStates(lemp): '''Compute all LR(0) states for the grammar. Links are added to between some states so that the LR(1) follow sets can be computed later. ''' Configlist_init() # Find the start symbol if lemp.start: sp = Symbol_find(lemp.start) if sp is None: ErrorMsg(lemp.filename, 0, 'The specified start symbol "%s" ' 'is not in a nonterminal of the grammar. ' '"%s" will be used as the start symbol instead.', lemp.start, lemp.rule.lhs.name) lemp.errorcnt += 1 sp = lemp.rule.lhs else: sp = lemp.rule.lhs # Make sure the start symbol doesn't occur on the right-hand side # of any rule. Report an error if it does. (YACC would generate # a new start symbol in this case.) for rp in iterlinks(lemp.rule): for i in range(rp.nrhs): if rp.rhs[i] == sp: # FIX ME: Deal with multiterminals ErrorMsg(lemp.filename, 0, 'The start symbol "%s" ' 'occurs on the right-hand side of a rule. ' 'This will result in a parser ' 'which does not work properly.', sp.name) lemp.errorcnt += 1 # The basis configuration set for the first state is all rules # which have the start symbol as their left-hand side. for rp in iterlinks(sp.rule, 'nextlhs'): rp.lhsStart = 1 newcfp = Configlist_addbasis(rp, 0) SetAdd(newcfp.fws, 0) # Compute the first state. All other states will be computed # automatically during the computation of the first one. The # returned pointer to the first state is not used. getstate(lemp) return
def FindRulePrecedences(xp): '''Find a precedence symbol of every rule in the grammar.''' # Those rules which have a precedence symbol coded in the input # grammar using the "[symbol]" construct will already have the # rp->precsym field filled. Other rules take as their precedence # symbol the first RHS symbol with a defined precedence. If there # are not RHS symbols with a defined precedence, the precedence # symbol field is left blank. for rp in iterlinks(xp.rule): if rp.precsym is None: for i in range(rp.nrhs): if rp.precsym is not None: break sp = rp.rhs[i] if sp.type == MULTITERMINAL: for j in range(sp.nsubsym): if sp.subsym[j].prec >= 0: rp.precsym = sp.subsym[j] break elif sp.prec >= 0: rp.precsym = rp.rhs[i] return
def FindStates(lemp): '''Compute all LR(0) states for the grammar. Links are added to between some states so that the LR(1) follow sets can be computed later. ''' Configlist_init() # Find the start symbol if lemp.start: sp = Symbol_find(lemp.start) if sp is None: ErrorMsg( lemp.filename, 0, 'The specified start symbol "%s" ' 'is not in a nonterminal of the grammar. ' '"%s" will be used as the start symbol instead.', lemp.start, lemp.rule.lhs.name) lemp.errorcnt += 1 sp = lemp.rule.lhs else: sp = lemp.rule.lhs # Make sure the start symbol doesn't occur on the right-hand side # of any rule. Report an error if it does. (YACC would generate # a new start symbol in this case.) for rp in iterlinks(lemp.rule): for i in range(rp.nrhs): if rp.rhs[i] == sp: # FIX ME: Deal with multiterminals ErrorMsg( lemp.filename, 0, 'The start symbol "%s" ' 'occurs on the right-hand side of a rule. ' 'This will result in a parser ' 'which does not work properly.', sp.name) lemp.errorcnt += 1 # The basis configuration set for the first state is all rules # which have the start symbol as their left-hand side. for rp in iterlinks(sp.rule, 'nextlhs'): rp.lhsStart = 1 newcfp = Configlist_addbasis(rp, 0) SetAdd(newcfp.fws, 0) # Compute the first state. All other states will be computed # automatically during the computation of the first one. The # returned pointer to the first state is not used. getstate(lemp) return
def FindLinks(lemp): '''Construct the propagation links.''' # Housekeeping detail: Add to every propagate link a pointer back # to the state to which the link is attached. for i in range(lemp.nstate): stp = lemp.sorted[i] for cfp in iterlinks(stp.cfp): cfp.stp = stp # Convert all backlinks into forward links. Only the forward # links are used in the follow-set computation. for i in range(lemp.nstate): stp = lemp.sorted[i] for cfp in iterlinks(stp.cfp): for plp in iterlinks(cfp.bplp): other = plp.cfp other.fplp = Plink_add(other.fplp, cfp) return