def EPSclosure(stateSet): closureSet = orderedcollections.OrderedSet(stateSet) unexploredStatesSet = orderedcollections.OrderedSet(stateSet) while len(unexploredStatesSet) != 0: stateidx = unexploredStatesSet.pop() toStates = nfa.states[stateidx].onClassGoTo(epsilon) for toState in toStates: if toState not in closureSet: closureSet.add(toState) unexploredStatesSet.add(toState) return orderedcollections.OrderedFrozenSet(closureSet)
def nfaTransTo(fromStates, onClass): ''' Return the epsilon closure of the set of NFA states that you can get to on the class of characters (i.e. onClass) from this set of fromStates.''' toStates = orderedcollections.OrderedSet() for fromStateID in fromStates: toStates.update(nfa.states[fromStateID].onClassGoTo(onClass)) return orderedcollections.OrderedSet(EPSclosure(toStates))
def finer(minStateId): distinguishedStates = orderedcollections.OrderedSet() try: firstStateID = self.min2dfa[minStateId].pop() except Exception: return False madeAChange = False for onClass in dfa.states[firstStateID].getTransitions(): firstGoesTo = transToMinPartition(firstStateID, onClass) for secondaryStateID in self.min2dfa[minStateId]: secondGoesTo = transToMinPartition(secondaryStateID, onClass) if firstGoesTo != secondGoesTo: distinguishedStates.add(secondaryStateID) madeAChange = True self.min2dfa[minStateId].add(firstStateID) if len(distinguishedStates) == 0: return False for stateID in distinguishedStates: self.min2dfa[minStateId].remove(stateID) newStateForM2DFA = newState() self.min2dfa[newStateForM2DFA] = distinguishedStates for stateID in distinguishedStates: self.dfa2min[stateID] = newStateForM2DFA return madeAChange
def __init__(self, classes = {epsilon:orderedcollections.OrderedSet()}, states = orderedcollections.OrderedMap(), keywords = orderedcollections.OrderedMap(), tokens = orderedcollections.OrderedMap(), firstTokenId = -1 ): self.classes = orderedcollections.OrderedMap(classes) self.states = orderedcollections.OrderedMap(states) self.numStates = len(states) self.keywords = orderedcollections.OrderedMap(keywords) self.tokens = orderedcollections.OrderedMap(tokens) self.firstTokenId = firstTokenId
def EPSclosure(stateSet): closureSet = orderedcollections.OrderedSet(stateSet) # Add to the closure set all NFA state Ids that are # in the epsilon closure of this stateSet. Then # return the OrderedFrozenSet of this closure set. unexploredStates = orderedcollections.OrderedSet(stateSet) while len(unexploredStates) != 0: stateID = unexploredStates.pop() toStates = nfa.states[stateID].onClassGoTo(epsilon) for toStateID in toStates: if toStateID not in unexploredStates: closureSet.add(toStateID) unexploredStates.add(toStateID) return orderedcollections.OrderedFrozenSet(closureSet)
def nfaTransTo(fromStates, onClass): toStates = orderedcollections.OrderedSet() for fromStateId in fromStates: if onClass in nfa.states[fromStateId].getTransitions(): toStates.update( nfa.states[fromStateId].getTransitions()[onClass]) return EPSclosure(toStates)
def gatherClasses(states): classes = orderedcollections.OrderedSet() for stateId in states: for transClass in nfa.states[stateId].getTransitions(): if transClass != epsilon: classes.add(transClass) return classes
def onClasses(minStateId): classes = orderedcollections.OrderedSet() for dfaStateId in self.stateMap[minStateId]: for onClass in dfa.states[dfaStateId].getTransitions(): classes.add(onClass) return classes
def onClasses(minStateId): transitionsOn = orderedcollections.OrderedSet() for stateID in self.min2dfa[minStateId]: for classCh in self.classes: if self.states[stateID].hasTransition(classCh): transitionsOn.add(classCh) return transitionsOn
def gatherClasses(states): gathered = orderedcollections.OrderedSet() for stateidx in states: transitions = nfa.states[stateidx].getTransitions() for onClass in transitions: if onClass != epsilon: gathered.add(onClass) return gathered
def nfaTransTo(fromStates, onClass): # return the epsilon closure of the set of NFA states that # you can get to on the class of characters (i.e. onClass) from # this set of fromStates. toStates = orderedcollections.OrderedSet() for fromStateID in fromStates: for toStateID in nfa.states[fromStateID].onClassGoTo(onClass): toStates.add(toStateID) return orderedcollections.OrderedFrozenSet(EPSclosure(toStates))
def gatherClasses(states): # return the set of classes of transitions (i.e. classes of characters) # that are possible transitions from this set of NFA states. gatheredClasses = orderedcollections.OrderedSet() for stateID in states: transitions = nfa.states[stateID].getTransitions() for onClass in transitions: if onClass != epsilon: gatheredClasses.add(onClass) return gatheredClasses
def finer(minStateId): #(********************************************************************************) #(* Check each node in the given partition (the one passed as a parameter) *) #(* with the first node in the partition. If a node is found that transitions *) #(* to a different partition than the first node on the same input make a new *) #(* set with this node and put all subsequent nodes that don't have similar *) #(* transitions to the first node into this new set. After going through all *) #(* states, if the new set of distinquished states is not empty then create a *) #(* new partition and then remove all states in the set from the current *) #(* partition and add them to the new partion (i.e. minimal state). Return true *) #(* if a new partition was created and false otherwise. Aho, Sethi, Ullman p. 142*) #(********************************************************************************) distinguishedStates = orderedcollections.OrderedSet() try: firstStateID = self.min2dfa[minStateId].pop() except Exception: return False madeAChange = False for onClass in dfa.states[firstStateID].getTransitions(): firstGoesTo = transToMinPartition(firstStateID, onClass) for secondaryStateID in self.min2dfa[minStateId]: secondGoesTo = transToMinPartition(secondaryStateID, onClass) if firstGoesTo != secondGoesTo: distinguishedStates.add(secondaryStateID) madeAChange = True # add primary state id to the original set after self.min2dfa[minStateId].add(firstStateID) # remove all the distinguishable from the current dfa2min and add # it to another one if len(distinguishedStates) == 0: return False # print(self.min2dfa[minStateId]) for stateID in distinguishedStates: self.min2dfa[minStateId].remove(stateID) # print(self.min2dfa[minStateId]) newStateForM2DFA = newState() self.min2dfa[newStateForM2DFA] = distinguishedStates #print("\t", self.min2dfa[newStateForM2DFA]) for stateID in distinguishedStates: self.dfa2min[stateID] = newStateForM2DFA #transitionsOn = onClasses(minStateId) return madeAChange
def EPSclosure(stateSet): closureSet = orderedcollections.OrderedSet(stateSet) stck = stack.Stack() for stateId in stateSet: stck.push(stateId) closureSet.add(stateId) while not stck.isEmpty(): stateId = stck.pop() state = nfa.states[stateId] if epsilon in state.getTransitions(): toStates = state.getTransitions()[epsilon] for toStateId in toStates: if not toStateId in closureSet: closureSet.add(toStateId) stck.push(toStateId) return orderedcollections.OrderedFrozenSet(closureSet)
def finer(minStateId): #(*****************************************************************************) #(* Check each node in the given partition (the one passed as a parameter) *) #(* with the first node in the partition. If a node is found that transitions *) #(* to a different partition than the first node on the same input make a new *) #(* partition and put all subsequent nodes that don't have similar transitions*) #(* to the first node into this new partition. Also, remove all nodes that *) #(* have different transitions from the first partition. *) #(*****************************************************************************) dfaStates = self.stateMap[minStateId] dfaStateList = list(dfaStates) dfaStateList.sort() firstDFAStateId = dfaStateList[0] firstDFAState = dfa.states[firstDFAStateId] newMinStateId = None for i in range(1, len(dfaStateList)): currentDFAStateId = dfaStateList[i] for onClass in onClasses(minStateId): firstPartition = transToMin(firstDFAStateId, onClass) currentPartition = transToMin(currentDFAStateId, onClass) if firstPartition != currentPartition: #print("found a difference on", onClass, "from state", minStateId) if newMinStateId == None: newMinStateId = newState() self.stateMap[ newMinStateId] = orderedcollections.OrderedSet( ) self.dfa2min[currentDFAStateId] = newMinStateId self.stateMap[minStateId].discard(currentDFAStateId) self.stateMap[newMinStateId].add(currentDFAStateId) # return true if a change occurred. return newMinStateId != None
def nfaTransTo(fromStates, onClass): toStates = orderedcollections.OrderedSet() for stateidx in fromStates: toStates.update(nfa.states[stateidx].onClassGoTo(onClass)) return orderedcollections.OrderedSet(EPSclosure(toStates))
def buildFromDFA(self, dfa): def newState(): aState = state.State(self.numStates) self.states[self.numStates] = aState self.numStates += 1 return self.numStates - 1 # Returns the Minimized DFA Partition Id given # a DFA State and Input symbol class. def transToMinPartition(fromDFAStateId, onClass): pass # Returns an ordered set of all the character classes # of all DFA states in a minimized DFA partition. def onClasses(minStateId): pass #(********************************************************************************) #(* Check each node in the given partition (the one passed as a parameter) *) #(* with the first node in the partition. If a node is found that transitions *) #(* to a different partition than the first node on the same input make a new *) #(* set with this node and put all subsequent nodes that don't have similar *) #(* transitions to the first node into this new set. After going through all *) #(* states, if the new set of distinquished states is not empty then create a *) #(* new partition and then remove all states in the set from the current *) #(* partition and add them to the new partion (i.e. minimal state). Return true *) #(* if a new partition was created and false otherwise. Aho, Sethi, Ullman p. 142*) #(********************************************************************************) def finer(minStateId): pass # Run through all the states and make transitions # in the minimized DFA for all transitions that existed # in the unminimized DFA. Also sets the state to be accepting # if any state in the unminimized DFA was accepting. def constructMinStateTransitions(): for minStateId in self.states: minState = self.states[minStateId] # Find the first dfa stateid in the set dfaStateIds = list(self.min2dfa[minStateId]) dfaStateIds.sort() dfaStateId = dfaStateIds[0] if dfa.states[dfaStateId].isAccepting(): minState.setAccepting( dfa.states[dfaStateId].getAcceptsTokenId()) minState.transitions = [] for (onClass, toDFAStateId) in dfa.states[dfaStateId].getTransitions(): dfaState = dfa.states[toDFAStateId] toStateId = self.dfa2min[toDFAStateId] minState.addTransition(onClass, toStateId) self.startStateId = self.dfa2min[dfa.startStateId] self.classes = dfa.classes startStateId = newState() self.min2dfa = orderedcollections.OrderedMap() self.dfa2min = orderedcollections.OrderedMap() self.min2dfa[startStateId] = orderedcollections.OrderedSet() # Build state sets. One with all # the non-final states in it, and one # for each accepting state of the dfa # since we want separate accepting states # for all the tokens of the dfa. for stateId in dfa.states: dfaState = dfa.states[stateId] if not dfaState.isAccepting(): self.min2dfa[startStateId].add(stateId) self.dfa2min[stateId] = startStateId else: # Now we have to either add another partition (i.e. state) or # find the accepting state that this dfa state belongs to. found = False for minStateId in self.states: minState = self.states[minStateId] if minState.getAcceptsTokenId( ) == dfaState.getAcceptsTokenId(): self.min2dfa[minStateId].add(stateId) self.dfa2min[stateId] = minStateId found = True if not found: finalStateId = newState() self.min2dfa[finalStateId] = orderedcollections.OrderedSet( [stateId]) self.dfa2min[stateId] = finalStateId self.states[finalStateId].setAccepting( dfaState.getAcceptsTokenId()) self.startStateId = self.dfa2min[dfa.startStateId] # Now begin partitioning by finding distinguishable states # You must write code here to repeatedly call finer on all states # of the minimized DFA until no more changes can be made. # WRITE THE CODE DESCRIBED ABOVE HERE. change = True while change: change = False for partId in self.min2dfa: if finer(partId): change = True # After we are done splitting the states we call constructMinStateTransitions # to build the transitions in the new states. constructMinStateTransitions()
def buildFromDFA(self, dfa): def newState(): aState = state.State(self.numStates) self.states[self.numStates] = aState self.numStates += 1 return self.numStates - 1 def transToMinPartition(fromDFAStateId, onClass): goesTo = dfa.states[fromDFAStateId].onClassGoTo(onClass) return self.dfa2min[goesTo] def onClasses(minStateId): transitionsOn = orderedcollections.OrderedSet() for stateID in self.min2dfa[minStateId]: for classCh in self.classes: if self.states[stateID].hasTransition(classCh): transitionsOn.add(classCh) return transitionsOn def finer(minStateId): distinguishedStates = orderedcollections.OrderedSet() try: firstStateID = self.min2dfa[minStateId].pop() except Exception: return False madeAChange = False for onClass in dfa.states[firstStateID].getTransitions(): firstGoesTo = transToMinPartition(firstStateID, onClass) for secondaryStateID in self.min2dfa[minStateId]: secondGoesTo = transToMinPartition(secondaryStateID, onClass) if firstGoesTo != secondGoesTo: distinguishedStates.add(secondaryStateID) madeAChange = True self.min2dfa[minStateId].add(firstStateID) if len(distinguishedStates) == 0: return False for stateID in distinguishedStates: self.min2dfa[minStateId].remove(stateID) newStateForM2DFA = newState() self.min2dfa[newStateForM2DFA] = distinguishedStates for stateID in distinguishedStates: self.dfa2min[stateID] = newStateForM2DFA return madeAChange def constructMinStateTransitions(): for minStateId in self.states: minState = self.states[minStateId] dfaStateIds = list(self.min2dfa[minStateId]) dfaStateIds.sort() dfaStateId = dfaStateIds[0] if dfa.states[dfaStateId].isAccepting(): minState.setAccepting( dfa.states[dfaStateId].getAcceptsTokenId()) minState.transitions = {} trans = dfa.states[dfaStateId].getTransitions() for onClass in trans: toDFAStateId = trans[onClass] dfaState = dfa.states[toDFAStateId] toStateId = self.dfa2min[toDFAStateId] minState.addTransition(onClass, toStateId) self.startStateId = self.dfa2min[dfa.startStateId] self.classes = dfa.classes startStateId = newState() self.min2dfa = orderedcollections.OrderedMap() self.dfa2min = orderedcollections.OrderedMap() self.dfa2min[-1] = -1 self.min2dfa[startStateId] = orderedcollections.OrderedSet() for stateId in dfa.states: dfaState = dfa.states[stateId] if not dfaState.isAccepting(): self.min2dfa[startStateId].add(stateId) self.dfa2min[stateId] = startStateId else: found = False for minStateId in self.states: minState = self.states[minStateId] if minState.getAcceptsTokenId( ) == dfaState.getAcceptsTokenId(): self.min2dfa[minStateId].add(stateId) self.dfa2min[stateId] = minStateId found = True if not found: finalStateId = newState() self.min2dfa[finalStateId] = orderedcollections.OrderedSet( [stateId]) self.dfa2min[stateId] = finalStateId self.states[finalStateId].setAccepting( dfaState.getAcceptsTokenId()) self.startStateId = self.dfa2min[dfa.startStateId] while changed: changed = False for stateID in range(self.numStates): change = finer(stateID) if change: changed = True constructMinStateTransitions()
def buildFromNFA(self, nfa): def newState(): new_State = state.State(self.numStates) self.states[self.numStates] = new_State self.numStates += 1 return new_State.getId() def getAcceptingTokenId(stateSet): for stateid in stateSet: final_state = nfa.states[stateid].isAccepting() if final_state: self.tokens[final_state] = "Yes" return nfa.states[stateid].getAcceptsTokenId() return None def EPSclosure(stateSet): closureSet = orderedcollections.OrderedSet(stateSet) unexploredStatesSet = orderedcollections.OrderedSet(stateSet) while len(unexploredStatesSet) != 0: stateidx = unexploredStatesSet.pop() toStates = nfa.states[stateidx].onClassGoTo(epsilon) for toState in toStates: if toState not in closureSet: closureSet.add(toState) unexploredStatesSet.add(toState) return orderedcollections.OrderedFrozenSet(closureSet) def nfaTransTo(fromStates, onClass): toStates = orderedcollections.OrderedSet() for stateidx in fromStates: toStates.update(nfa.states[stateidx].onClassGoTo(onClass)) return orderedcollections.OrderedSet(EPSclosure(toStates)) def gatherClasses(states): gathered = orderedcollections.OrderedSet() for stateidx in states: transitions = nfa.states[stateidx].getTransitions() for onClass in transitions: if onClass != epsilon: gathered.add(onClass) return gathered # This is the beginning of the buildFromNFA method. # Copy over the classes self.classes = nfa.classes # Create the start state and the DFA to NFA stateMap. self.startStateId = newState() stateMap = orderedcollections.OrderedMap() EPSstartState = EPSclosure(orderedcollections.OrderedSet([self.startStateId])) stateMap[self.startStateId] = EPSstartState # Form the epsilon closure of the NFA start state (i.e. state 0) and then # map the start state of the DFA to the start state set of the NFA alphabet = gatherClasses(stateMap[self.startStateId]) for letter in alphabet: toStates = orderedcollections.OrderedSet() for stateidx in stateMap[self.startStateId]: toStates.update(nfa.states[stateidx].onClassGoTo(letter)) for trans in toStates: self.states[self.startStateId].addTransition(letter, trans) # Keep track of the new DFA states. The first new DFA state is the start # state. You can keep track of this as an ordered set or a stack if you # wish. unexploredStatesSet = orderedcollections.OrderedSet([self.startStateId]) self.tokens = orderedcollections.OrderedMap() nfa2dfaMap = orderedcollections.OrderedMap() nfa2dfaMap[EPSstartState] = self.states[self.startStateId] # Map the set of nfa state ids (as a frozen set) to the new DFA state id in the # nfa2dfa map. # set the new DFA state to accepting if the NFA states contained an accepting state. # You can use the getAcceptingTokenId function for this. while len(unexploredStatesSet) > 0: currentStateId = unexploredStatesSet.pop() alphabet = gatherClasses(stateMap[currentStateId]) for letter in alphabet: transitioning = orderedcollections.OrderedFrozenSet( nfaTransTo(stateMap[currentStateId], letter)) if transitioning not in stateMap.values(): newDFAState = newState() stateMap[newDFAState] = transitioning nfa2dfaMap[transitioning] = self.states[newDFAState] alphabetTwo = gatherClasses(stateMap[newDFAState]) for letter in alphabetTwo: toStates = orderedcollections.OrderedSet() for stateidx in stateMap[newDFAState]: toStates.update(nfa.states[stateidx].onClassGoTo(letter)) for trans in toStates: self.states[newDFAState].addTransition(letter, trans) if getAcceptingTokenId(transitioning): self.states[newDFAState].setAccepting(True) unexploredStatesSet.add(newDFAState) for stateidx in stateMap[newDFAState]: EPSColusreState = EPSclosure( orderedcollections.OrderedSet([stateidx])) if EPSColusreState not in stateMap.values() and len(EPSColusreState) > 1: newDFAstateFromClosure = newState() stateMap[newDFAstateFromClosure] = EPSColusreState nfa2dfaMap[EPSColusreState] = self.states[newDFAstateFromClosure] isAcceptingDFA[newDFAstateFromClosure] = getAcceptingTokenId(EPSColusreState) toStates = orderedcollections.OrderedSet() for stateidx in stateMap[newDFAstateFromClosure]: toStates.update(nfa.states[stateidx].onClassGoTo(letter)) for trans in toStates: self.states[newDFAstateFromClosure].addTransition(letter, trans) self.states[newDFAstateFromClosure].addTransition(epsilon, trans) if getAcceptingTokenId(EPSColusreState): self.states[ newDFAstateFromClosure].setAccepting(True) unexploredStatesSet.add(newDFAstateFromClosure)
def buildFromNFA(self, nfa): def newState(): ''' Add a new state to the map of stateIds to states in the state map. Return the new state id.''' new_State = state.State(self.numStates) self.numStates += 1 return new_State.getId() def getAcceptingTokenId(stateSet): ''' Return the first accepting tokenId found in the NFA state set. Otherwise, return None ''' for stateid in stateSet: if nfa.states[stateid].isAccepting(): return nfa.states[stateid].getAcceptsTokenId() return None def EPSclosure(stateSet): ''' Add to the closure set all NFA state Ids that are in the epsilon closure of this stateSet. Then return the OrderedFrozenSet of this closure set.''' closureSet = orderedcollections.OrderedSet(stateSet) unexploredStates = orderedcollections.OrderedSet(stateSet) while len(unexploredStates) != 0: stateID = unexploredStates.pop() toStates = nfa.states[stateID].onClassGoTo(epsilon) # Depth-First Search for toStateID in toStates: if toStateID not in closureSet: closureSet.add(toStateID) unexploredStates.add(toStateID) return orderedcollections.OrderedFrozenSet(closureSet) def nfaTransTo(fromStates, onClass): ''' Return the epsilon closure of the set of NFA states that you can get to on the class of characters (i.e. onClass) from this set of fromStates.''' toStates = orderedcollections.OrderedSet() for fromStateID in fromStates: toStates.update(nfa.states[fromStateID].onClassGoTo(onClass)) return orderedcollections.OrderedSet(EPSclosure(toStates)) def gatherClasses(states): ''' Return the set of classes of transitions (i.e. classes of characters) that are possible transitions from this set of NFA states.''' gatheredClasses = orderedcollections.OrderedSet() for stateID in states: transitions = nfa.states[stateID].getTransitions() for onClass in transitions: if onClass != epsilon: gatheredClasses.add(onClass) return gatheredClasses # This is the beginning of the buildFromNFA method. # Copy over the classes self.classes = nfa.classes # Create the start state and the DFA to NFA stateMap. self.startStateId = newState() stateMap = orderedcollections.OrderedMap() nfa2dfa = orderedcollections.OrderedMap() # Form the epsilon closure of the NFA start state (i.e. state 0) and then # map the start state of the DFA to the start state set of the NFA EPSstartState = EPSclosure( orderedcollections.OrderedSet([self.startStateId])) stateMap[self.startStateId] = EPSstartState nfa2dfa[EPSstartState] = self.startStateId # Keep track of the new DFA states. The first new DFA state is the start # state. You can keep track of this as an ordered set or a stack if you # wish. unexploredStates = orderedcollections.OrderedSet([self.startStateId]) # Map the set of nfa state ids (as a frozen set) to the new DFA state id in the # nfa2dfa map. # set the new DFA state to accepting if the NFA states contained an accepting state. # You can use the getAcceptingTokenId function for this. while len(unexploredStates) > 0: currentState = unexploredStates.pop() letters = gatherClasses(stateMap[currentState]) for letter in letters: transitionsTo = orderedcollections.OrderedFrozenSet( nfaTransTo(stateMap[currentState], letter)) if transitionsTo not in stateMap.values(): newDFAState = newState() stateMap[newDFAState] = transitionsTo nfa2dfa[transitionsTo] = newDFAState if getAcceptingTokenId(transitionsTo): self.states[newDFAState].setAccepting(True) unexploredStates.add(newDFAState) for stateID in stateMap[newDFAState]: EPSColusreState = EPSclosure( orderedcollections.OrderedSet([stateID])) if EPSColusreState not in stateMap.values( ) and len(EPSColusreState) > 1: newDFAStateFromClosure = newState() stateMap[newDFAStateFromClosure] = EPSColusreState nfa2dfa[EPSColusreState] = newDFAState isAcceptingDFA[ newDFAStateFromClosure] = getAcceptingTokenId( EPSColusreState) if getAcceptingTokenId(EPSColusreState): self.states[ newDFAStateFromClosure].setAccepting(True) unexploredStates.add(newDFAStateFromClosure) print(stateMap)
def buildMachine(self, instream): def operate(op, opStack, stateStack): #print "Operating..." #print "The operator is ", op.getOpChar() if (op.getOpChar() == '('): opStack.push(Operator('(')) return while op.precedence() <= opStack.peek().precedence(): topOp = opStack.pop() opChar = topOp.getOpChar() #print "The topOp is ", opChar if opChar == '|': b1, b2 = stateStack.pop() a1, a2 = stateStack.pop() firstId = newState() secondId = newState() self.states[firstId].addTransition(epsilon, a1) self.states[firstId].addTransition(epsilon, b1) self.states[a2].addTransition(epsilon, secondId) self.states[b2].addTransition(epsilon, secondId) stateStack.push((firstId, secondId)) elif opChar == '.': b1, b2 = stateStack.pop() a1, a2 = stateStack.pop() firstId = newState() secondId = newState() self.states[firstId].addTransition(epsilon, a1) self.states[a2].addTransition(epsilon, b1) self.states[b2].addTransition(epsilon, secondId) stateStack.push((firstId, secondId)) elif opChar == '*': a1, a2 = stateStack.pop() firstId = newState() secondId = newState() self.states[firstId].addTransition(epsilon, a1) self.states[firstId].addTransition(epsilon, secondId) self.states[a2].addTransition(epsilon, secondId) self.states[secondId].addTransition(epsilon, firstId) stateStack.push((firstId, secondId)) elif opChar == '(': # do nothing if (op.getOpChar() == ')'): return opStack.push(op) def evaluateRegExpression(reader): opStack = stack.Stack() stateStack = stack.Stack() opStack.push(Operator("(")) while not reader.peek(";"): token = reader.getToken() if token in "(+|.*)": #if reader.peek("(") or reader.peek(")") or reader.peek(".") or reader.peek("*") or reader.peek("|"): op = Operator(token) operate(op, opStack, stateStack) else: # it is a character class set name firstId = newState() secondId = newState() self.states[firstId].addTransition(token, secondId) stateStack.push((firstId, secondId)) operate(Operator(')'), opStack, stateStack) if (not opStack.isEmpty()): raise Exception("Malformed Regular Expression") return stateStack.pop() def newState(): self.numStates += 1 aState = nfastate.NFAState(self.numStates) self.states[self.numStates] = aState return self.numStates reader = streamreader.StreamReader(instream) startStates = [] reader.skipComments() if reader.peek("#CLASSES"): #print("Found #CLASSES") reader.readUpTo("\n") while (not reader.peek("#")): # The "#" marks the beginning of the next section. Either KEYWORDS or TOKENS. KEYWORDS are optional. reader.skipComments() # We could have keywords right after a comment. So if keyword section is found, don't read # any more character classes. if not reader.peek("#KEYWORDS"): className = reader.readIdentifier() reader.readUpTo("=") if reader.peek("^"): anticlass = True reader.readUpTo("^") classSet = orderedcollections.OrderedSet(range(256)) else: anticlass = False classSet = orderedcollections.OrderedSet() done = False while not done: if reader.peek("'"): # Found a character constant reader.readUpTo("'") character = reader.readUpTo("'")[0] #print(character) ordVal = ord(character) else: ordVal = reader.readInt() # Add the end of the range if there is a range of characters if reader.peek(".."): reader.readUpTo("..") if reader.peek("'"): reader.readUpTo("'") character = reader.readUpTo("'")[0] #print(character) lastOrdVal = ord(character) else: lastOrdVal = reader.readInt() else: lastOrdVal = ordVal # Now build the set for i in range(ordVal, lastOrdVal + 1): if anticlass: classSet.remove(i) else: classSet.add(i) if reader.peek(","): reader.readUpTo(",") else: done = True #print(className) #Add the class to the class dictionary self.classes[className] = classSet reader.readUpTo(";") #print("These are the classes") #print(self.classes) # keyword and token id numbers idnum = 0 keywordsPresent = False if reader.peek("#KEYWORDS"): reader.readUpTo("#KEYWORDS") keywordsPresent = True reader.skipComments() while (not reader.peek("#TOKENS")): #idnum = reader.readInt() #reader.readUpTo(":") reader.readUpTo("'") keyword = reader.readUpTo("'")[:-1].strip() #print(idnum,keyword) self.keywords[keyword] = idnum idnum += 1 reader.readUpTo(";") reader.skipComments() #print(self.keywords) reader.readUpTo("#TOKENS") reader.skipComments() readingFirstToken = True while not (reader.peek("#PRODUCTIONS") or reader.peek("#END") or reader.peek("#DEFINITIONS")): #idnum = reader.readInt() #reader.readUpTo(":") if reader.peek("'"): # Then the token was specified as a string like this: # '>='; reader.readUpTo("'") token = reader.readUpTo("'")[:-1].strip() previousId = newState() startStateId = previousId for c in token: nextId = newState() classSet = orderedcollections.OrderedSet([ord(c)]) if not (c in self.classes and self.classes[c] == classSet): self.classes[c] = classSet self.states[previousId].addTransition(c, nextId) previousId = nextId self.states[nextId].setAccepting(idnum) startStates.append(startStateId) reader.readUpTo(";") self.tokens[idnum] = token idnum += 1 if readingFirstToken and keywordsPresent: raise Exception( "First Token must be identifier token for matching keywords!" ) else: # The token was specified as a regular expression like this: # identifier = letter.(letter|digit)*; name = reader.readUpTo("=")[:-1].strip() self.tokens[idnum] = name if readingFirstToken: self.firstTokenId = idnum readingFirstToken = False startStateId, stopStateId = evaluateRegExpression(reader) self.states[stopStateId].setAccepting(idnum) idnum += 1 startStates.append(startStateId) reader.readUpTo(";") reader.skipComments() # Create a 0th State as the start state startState = nfastate.NFAState(0) self.numStates += 1 self.states[0] = startState for startId in startStates: self.states[0].addTransition(epsilon, startId) self.startStateId = 0 reader.readUpTo("#END")
def buildMachine(self, instream): ####################################################################### # The newState function should be called to create any new state. It enters # the state information into the self.states dictionary for use later. Then # it returns the state Id (its state number) of the newly created state. ####################################################################### def newState(): self.numStates += 1 aState = nfastate.NFAState(self.numStates) self.states[self.numStates] = aState return self.numStates ####################################################################### # The operate function is given an: # op : the operator # # opStack: the stack of operators # # stateStack: the stack of first,last states (which is the operand stack) # the function does not return anything. Instead it operates on the two # stacks as described in the two stack calculator algorithm. # # For each new state be sure to call the newState() function to enter # the new state in the self.states dictionary correctly. ####################################################################### def operate(op, opStack, stateStack): precedence = {'(': 0, '|': 1, '.': 2, '*': 3, ')': 0} if op == '(': opStack.push(op) return None elif opStack.isEmpty(): return None else: topOp = opStack.pop() opStack.push(topOp) while precedence[op] <= precedence[topOp]: topOp = opStack.pop() if topOp == '*': q0ID, q1ID = stateStack.pop() startStateID = newState() endStateID = newState() startState = self.states[startStateID] endState = self.states[endStateID] q0 = self.states[q0ID] q1 = self.states[q1ID] startState.addTransition(epsilon, q0ID) startState.addTransition(epsilon, q1ID) q1.addTransition(epsilon, endStateID) endState.addTransition(epsilon, startStateID) stateStack.push((startStateID, endStateID)) elif topOp == '.': q0ID, q1ID = stateStack.pop() startStateID = newState() endStateID = newState() startState = self.states[startStateID] endState = self.states[endStateID] q0 = self.states[q0ID] q1 = self.states[q1ID] startState.addTransition(epsilon, q0ID) q1.addTransition(epsilon, endStateID) stateStack.push((startStateID, endStateID)) elif topOp == '|': q0ID, q1ID = stateStack.pop() q2ID, q3ID = stateStack.pop() startStateID = newState() endStateID = newState() startState = self.states[startStateID] endState = self.states[endStateID] q0 = self.states[q0ID] q1 = self.states[q1ID] q2 = self.states[q2ID] q3 = self.states[q3ID] q1.addTransition(epsilon, endStateID) q3.addTransition(epsilon, endStateID) startState.addTransition(epsilon, q0ID) startState.addTransition(epsilon, q2ID) stateStack.push((startStateID, endStateID)) elif topOp == '(': return None opStack.push(op) ####################################################################### # The evaluateRegExpression function is given the StreamReader called # reader and reads the regular expression and returns a tuple of start,stop state # for the expression. The stop state will be set to an accepting state by the code # that calls this function. When this function is called the regular expression must be # read. For instance in the line # # identifier = letter.(letter|digit)*; # # everything up to the = has already been read. You need to write code to read the # regular expression up to the semicolon (i.e. ;) and then run your regular expression # calculator code on it to build an NFA from this. To create each new state be sure to call # the newState() function to create it so the state gets entered into the self.states dictionary # correctly. ####################################################################### def evaluateRegExpression(reader): operatorStack = stack.Stack() operandStack = stack.Stack() operatorStack.push('(') token = reader.getToken() while token != ';': if token in "(|.*": operate(token, operatorStack, operandStack) else: startStateID = newState() endStateID = newState() startState = self.states[startStateID] endState = self.states[endStateID] startState.addTransition(token, endStateID) operandStack.push((startStateID, endStateID)) token = reader.getToken() operate(')', operatorStack, operandStack) startStateID, endStateID = operandStack.pop() reader.unreadChar(';') return startStateID, endStateID #################################################### # This is the start of the buildMachine code here #################################################### reader = streamreader.StreamReader(instream) startStates = [] reader.skipComments() if reader.peek("#CLASSES"): # print("Found #CLASSES") reader.readUpTo("\n") while (not reader.peek("#")): # The "#" marks the beginning of the next section. Either # KEYWORDS or TOKENS. KEYWORDS are optional. reader.skipComments() # We could have keywords right after a comment. So if keyword section is found, don't read # any more character classes. if not reader.peek("#KEYWORDS"): className = reader.readIdentifier() reader.readUpTo("=") if reader.peek("^"): anticlass = True reader.readUpTo("^") classSet = orderedcollections.OrderedSet(range(256)) else: anticlass = False classSet = orderedcollections.OrderedSet() done = False while not done: if reader.peek("'"): # Found a character constant reader.readUpTo("'") character = reader.readUpTo("'")[0] # print(character) ordVal = ord(character) else: ordVal = reader.readInt() # Add the end of the range if there is a range of # characters if reader.peek(".."): reader.readUpTo("..") if reader.peek("'"): reader.readUpTo("'") character = reader.readUpTo("'")[0] # print(character) lastOrdVal = ord(character) else: lastOrdVal = reader.readInt() else: lastOrdVal = ordVal # Now build the set for i in range(ordVal, lastOrdVal + 1): if anticlass: classSet.remove(i) else: classSet.add(i) if reader.peek(","): reader.readUpTo(",") else: done = True # print(className) # Add the class to the class dictionary self.classes[className] = classSet reader.readUpTo(";") #print("These are the classes") # print(self.classes) # keyword and token id numbers idnum = 0 keywordsPresent = False if reader.peek("#KEYWORDS"): reader.readUpTo("#KEYWORDS") keywordsPresent = True reader.skipComments() while (not reader.peek("#TOKENS")): #idnum = reader.readInt() # reader.readUpTo(":") reader.readUpTo("'") keyword = reader.readUpTo("'")[:-1].strip() # print(idnum,keyword) self.keywords[keyword] = idnum idnum += 1 reader.readUpTo(";") reader.skipComments() # print(self.keywords) reader.readUpTo("#TOKENS") reader.skipComments() readingFirstToken = True while not (reader.peek("#PRODUCTIONS") or reader.peek("#END") or reader.peek("#DEFINITIONS")): #idnum = reader.readInt() # reader.readUpTo(":") if reader.peek("'"): # Then the token was specified as a string like this: # '>='; reader.readUpTo("'") token = reader.readUpTo("'")[:-1].strip() previousId = newState() startStateId = previousId for c in token: nextId = newState() classSet = orderedcollections.OrderedSet([ord(c)]) if not (c in self.classes and self.classes[c] == classSet): self.classes[c] = classSet self.states[previousId].addTransition(c, nextId) previousId = nextId self.states[nextId].setAccepting(idnum) startStates.append(startStateId) reader.readUpTo(";") self.tokens[idnum] = token idnum += 1 if readingFirstToken and keywordsPresent: raise Exception( "First Token must be identifier token for matching keywords!" ) else: # The token was specified as a regular expression like this: # identifier = letter.(letter|digit)*; name = reader.readUpTo("=")[:-1].strip() self.tokens[idnum] = name if readingFirstToken: self.firstTokenId = idnum readingFirstToken = False # You must write the evaluateRegExpression(reader) function # that reads a regular expression using the reader StreamReader # object and returns its start and stop state ids. startStateId, stopStateId = evaluateRegExpression(reader) self.states[stopStateId].setAccepting(idnum) idnum += 1 startStates.append(startStateId) reader.readUpTo(";") reader.skipComments() # Create a 0th State as the start state startState = nfastate.NFAState(0) self.numStates += 1 self.states[0] = startState for startId in startStates: self.states[0].addTransition(epsilon, startId) self.startStateId = 0 reader.readUpTo("#END")
def buildFromDFA(self, dfa): def newState(): aState = state.State(self.numStates) self.states[self.numStates] = aState self.numStates += 1 return self.numStates - 1 def transToMin(fromDFAStateId, onClass): state = dfa.states[fromDFAStateId] if not state.hasTransition(onClass): return -1 return self.dfa2min[state.onClassGoTo(onClass)] def onClasses(minStateId): classes = orderedcollections.OrderedSet() for dfaStateId in self.stateMap[minStateId]: for onClass in dfa.states[dfaStateId].getTransitions(): classes.add(onClass) return classes def finer(minStateId): #(*****************************************************************************) #(* Check each node in the given partition (the one passed as a parameter) *) #(* with the first node in the partition. If a node is found that transitions *) #(* to a different partition than the first node on the same input make a new *) #(* partition and put all subsequent nodes that don't have similar transitions*) #(* to the first node into this new partition. Also, remove all nodes that *) #(* have different transitions from the first partition. *) #(*****************************************************************************) dfaStates = self.stateMap[minStateId] dfaStateList = list(dfaStates) dfaStateList.sort() firstDFAStateId = dfaStateList[0] firstDFAState = dfa.states[firstDFAStateId] newMinStateId = None for i in range(1, len(dfaStateList)): currentDFAStateId = dfaStateList[i] for onClass in onClasses(minStateId): firstPartition = transToMin(firstDFAStateId, onClass) currentPartition = transToMin(currentDFAStateId, onClass) if firstPartition != currentPartition: #print("found a difference on", onClass, "from state", minStateId) if newMinStateId == None: newMinStateId = newState() self.stateMap[ newMinStateId] = orderedcollections.OrderedSet( ) self.dfa2min[currentDFAStateId] = newMinStateId self.stateMap[minStateId].discard(currentDFAStateId) self.stateMap[newMinStateId].add(currentDFAStateId) # return true if a change occurred. return newMinStateId != None def constructStates(): for minStateId in self.states: minState = self.states[minStateId] for dfaStateId in self.stateMap[minStateId]: if dfa.states[dfaStateId].isAccepting(): minState.setAccepting( dfa.states[dfaStateId].getAcceptsTokenId()) trans = dfa.states[dfaStateId].getTransitions() for onClass in trans: toDFAStateId = trans[onClass] dfaState = dfa.states[toDFAStateId] if not minState.hasTransition(onClass): toStateId = self.dfa2min[toDFAStateId] minState.addTransition(onClass, toStateId) self.startStateId = self.dfa2min[dfa.startStateId] self.classes = dfa.classes self.keywords = dfa.keywords self.tokens = dfa.tokens startStateId = newState() self.stateMap = orderedcollections.OrderedMap() self.dfa2min = orderedcollections.OrderedMap() self.stateMap[startStateId] = orderedcollections.OrderedSet() # Build state sets. One with all # the non-final states in it, and one # for each accepting state of the dfa # since we want separate accepting states # for all the tokens of the dfa. for stateId in dfa.states: dfaState = dfa.states[stateId] if not dfaState.isAccepting(): self.stateMap[startStateId].add(stateId) self.dfa2min[stateId] = startStateId else: # Now we have to either add another partition (i.e. state) or # find the accepting state that this dfa state belongs to. found = False for minStateId in self.states: minState = self.states[minStateId] if minState.getAcceptsTokenId( ) == dfaState.getAcceptsTokenId(): self.stateMap[minStateId].add(stateId) self.dfa2min[stateId] = minStateId found = True if not found: finalStateId = newState() self.stateMap[ finalStateId] = orderedcollections.OrderedSet( [stateId]) self.dfa2min[stateId] = finalStateId self.states[finalStateId].setAccepting( dfaState.getAcceptsTokenId()) # Now begins partitioning by finding distinguishable states changing = True while changing: changing = False for stateId in range(self.numStates): changed = finer(stateId) if changed: changing = True constructStates()
def buildFromNFA(self, nfa): def newState(): # Add a new state to the map of stateIds to states in the state map. # Return the new state id. newState = state.State(self.numStates) self.states[self.numStates] = newState self.numStates += 1 return self.numStates - 1 def getAcceptingTokenId(stateSet): # Return the first accepting tokenId found in the NFA state set. Otherwise, return None for stateId in stateSet: if nfa.states[stateId].isAccepting(): return nfa.states[stateId].getAcceptsTokenId() return None def EPSclosure(stateSet): closureSet = orderedcollections.OrderedSet(stateSet) # Add to the closure set all NFA state Ids that are # in the epsilon closure of this stateSet. Then # return the OrderedFrozenSet of this closure set. unexploredStates = orderedcollections.OrderedSet(stateSet) while len(unexploredStates) != 0: stateID = unexploredStates.pop() toStates = nfa.states[stateID].onClassGoTo(epsilon) for toStateID in toStates: if toStateID not in unexploredStates: closureSet.add(toStateID) unexploredStates.add(toStateID) return orderedcollections.OrderedFrozenSet(closureSet) def nfaTransTo(fromStates, onClass): # return the epsilon closure of the set of NFA states that # you can get to on the class of characters (i.e. onClass) from # this set of fromStates. toStates = orderedcollections.OrderedSet() for fromStateID in fromStates: for toStateID in nfa.states[fromStateID].onClassGoTo(onClass): toStates.add(toStateID) return orderedcollections.OrderedFrozenSet(EPSclosure(toStates)) def gatherClasses(states): # return the set of classes of transitions (i.e. classes of characters) # that are possible transitions from this set of NFA states. gatheredClasses = orderedcollections.OrderedSet() for stateID in states: transitions = nfa.states[stateID].getTransitions() for onClass in transitions: if onClass != epsilon: gatheredClasses.add(onClass) return gatheredClasses # This is the beginning of the buildFromNFA method. # Copy over the classes self.classes = nfa.classes # Create the start state and the DFA to NFA stateMap. self.startStateId = newState() self.stateMap = orderedcollections.OrderedMap() # Form the epsilon closure of the NFA start state (i.e. state 0) and then # map the start state of the DFA to the start state set of the NFA # keep track of the new DFA states. The first new DFA state is the start # state. You can keep track of this as an ordered set or a stack if you wish. # map the set of nfa state ids (as a frozen set) to the new DFA state id in the # nfa2dfa map. # set the new DFA state to accepting if the NFA states contained an accepting state. # You can use the getAcceptingTokenId function for this. # While there are no more unexplored states in the new DFA state set, follow the algorithm # given on the website by using the nfaTransTo function and creating new DFA states for each # new set of NFA states that are found by using gatherClasses. Remember to set accepting states # in the DFA as you proceed. # Code goes here startDFASet = EPSclosure( orderedcollections.OrderedSet([self.startStateId])) self.stateMap[self.startStateId] = startDFASet if getAcceptingTokenId(startDFASet): self.states[self.startStateId].setAccepting(True) unexploredStates = orderedcollections.OrderedSet([self.startStateId]) nfa2dfa = orderedcollections.OrderedMap() nfa2dfa[startDFASet] = self.startStateId while len(unexploredStates) > 0: currentStateID = unexploredStates.pop() classes = gatherClasses(self.stateMap[currentStateID]) for onClass in classes: toSet = nfaTransTo(self.stateMap[currentStateID], onClass) if toSet not in nfa2dfa: newDFAStateId = newState() self.stateMap[newDFAStateId] = toSet nfa2dfa[toSet] = newDFAStateId if getAcceptingTokenId(toSet): self.states[newDFAStateId].setAccepting(True) unexploredStates.add(newDFAStateId) self.states[currentStateID].addTransition( onClass, newDFAStateId) else: dfaStateId = nfa2dfa[toSet] self.states[currentStateID].addTransition( onClass, dfaStateId)
def buildFromDFA(self, dfa): def newState(): aState = state.State(self.numStates) self.states[self.numStates] = aState self.numStates += 1 return self.numStates - 1 # Returns the Minimized DFA Partition Id given # a DFA State and Input symbol class. def transToMinPartition(fromDFAStateId, onClass): goesTo = dfa.states[fromDFAStateId].onClassGoTo(onClass) return self.dfa2min[goesTo] # Returns an ordered set of all the character classes # of all DFA states in a minimized DFA partition. def onClasses(minStateId): transitionsOn = orderedcollections.OrderedSet() for stateID in self.min2dfa[minStateId]: for classCh in self.classes: if self.states[stateID].hasTransition(classCh): transitionsOn.add(classCh) return transitionsOn def finer(minStateId): #(********************************************************************************) #(* Check each node in the given partition (the one passed as a parameter) *) #(* with the first node in the partition. If a node is found that transitions *) #(* to a different partition than the first node on the same input make a new *) #(* set with this node and put all subsequent nodes that don't have similar *) #(* transitions to the first node into this new set. After going through all *) #(* states, if the new set of distinquished states is not empty then create a *) #(* new partition and then remove all states in the set from the current *) #(* partition and add them to the new partion (i.e. minimal state). Return true *) #(* if a new partition was created and false otherwise. Aho, Sethi, Ullman p. 142*) #(********************************************************************************) distinguishedStates = orderedcollections.OrderedSet() try: firstStateID = self.min2dfa[minStateId].pop() except Exception: return False madeAChange = False for onClass in dfa.states[firstStateID].getTransitions(): firstGoesTo = transToMinPartition(firstStateID, onClass) for secondaryStateID in self.min2dfa[minStateId]: secondGoesTo = transToMinPartition(secondaryStateID, onClass) if firstGoesTo != secondGoesTo: distinguishedStates.add(secondaryStateID) madeAChange = True # add primary state id to the original set after self.min2dfa[minStateId].add(firstStateID) # remove all the distinguishable from the current dfa2min and add # it to another one if len(distinguishedStates) == 0: return False # print(self.min2dfa[minStateId]) for stateID in distinguishedStates: self.min2dfa[minStateId].remove(stateID) # print(self.min2dfa[minStateId]) newStateForM2DFA = newState() self.min2dfa[newStateForM2DFA] = distinguishedStates #print("\t", self.min2dfa[newStateForM2DFA]) for stateID in distinguishedStates: self.dfa2min[stateID] = newStateForM2DFA #transitionsOn = onClasses(minStateId) return madeAChange # Run through all the states and make transitions # in the minimized DFA for all transitions that existed # in the unminimized DFA. Also sets the state to be accepting # if any state in the unminimized DFA was accepting. def constructMinStateTransitions(): for minStateId in self.states: minState = self.states[minStateId] # Find the first dfa stateid in the set dfaStateIds = list(self.min2dfa[minStateId]) dfaStateIds.sort() dfaStateId = dfaStateIds[0] if dfa.states[dfaStateId].isAccepting(): minState.setAccepting( dfa.states[dfaStateId].getAcceptsTokenId()) minState.transitions = {} trans = dfa.states[dfaStateId].getTransitions() for onClass in trans: toDFAStateId = trans[onClass] dfaState = dfa.states[toDFAStateId] toStateId = self.dfa2min[toDFAStateId] minState.addTransition(onClass, toStateId) self.startStateId = self.dfa2min[dfa.startStateId] self.classes = dfa.classes startStateId = newState() self.min2dfa = orderedcollections.OrderedMap() self.dfa2min = orderedcollections.OrderedMap() # Map -1 to -1 to handle when transitions is returned by onClassGoTo in # the transToMin function self.dfa2min[-1] = -1 self.min2dfa[startStateId] = orderedcollections.OrderedSet() # Build state sets. One with all # the non-final states in it, and one # for each accepting state of the dfa # since we want separate accepting states # for all the tokens of the dfa. for stateId in dfa.states: dfaState = dfa.states[stateId] if not dfaState.isAccepting(): self.min2dfa[startStateId].add(stateId) self.dfa2min[stateId] = startStateId else: # Now we have to either add another partition (i.e. state) or # find the accepting state that this dfa state belongs to. found = False for minStateId in self.states: minState = self.states[minStateId] if minState.getAcceptsTokenId( ) == dfaState.getAcceptsTokenId(): self.min2dfa[minStateId].add(stateId) self.dfa2min[stateId] = minStateId found = True if not found: finalStateId = newState() self.min2dfa[finalStateId] = orderedcollections.OrderedSet( [stateId]) self.dfa2min[stateId] = finalStateId self.states[finalStateId].setAccepting( dfaState.getAcceptsTokenId()) self.startStateId = self.dfa2min[dfa.startStateId] # Now begin partitioning by finding distinguishable states # You must write code here to repeatedly call finer on all states # of the minimized DFA until no more changes can be made. changed = True while changed: changed = False for stateID in range(self.numStates): change = finer(stateID) if change: changed = True # WRITE THE CODE DESCRIBED ABOVE HERE. # After we are done splitting the states we call constructMinStateTransitions # to build the transitions in the new states. constructMinStateTransitions()
def buildFromNFA(self, nfa): def newState(): ''' Add a new state to the map of stateIds to states in the state map. Return the new state id.''' new_State = state.State(self.numStates) self.states[self.numStates] = new_State self.numStates += 1 return new_State.getId() def getAcceptingTokenId(stateSet): ''' Return the first accepting tokenId found in the NFA state set. Otherwise, return None ''' for stateid in stateSet: condition = nfa.states[stateid].isAccepting() if condition: self.tokens[condition] = "Yes" return nfa.states[stateid].getAcceptsTokenId() return None def EPSclosure(stateSet): ''' Add to the closure set all NFA state Ids that are in the epsilon closure of this stateSet. Then return the OrderedFrozenSet of this closure set.''' closureSet = orderedcollections.OrderedSet(stateSet) unexploredStates = orderedcollections.OrderedSet(stateSet) while len(unexploredStates) != 0: stateID = unexploredStates.pop() toStates = nfa.states[stateID].onClassGoTo(epsilon) # Depth-First Search for toStateID in toStates: if toStateID not in closureSet: closureSet.add(toStateID) unexploredStates.add(toStateID) return orderedcollections.OrderedFrozenSet(closureSet) def nfaTransTo(fromStates, onClass): ''' Return the epsilon closure of the set of NFA states that you can get to on the class of characters (i.e. onClass) from this set of fromStates.''' toStates = orderedcollections.OrderedSet() for fromStateID in fromStates: toStates.update(nfa.states[fromStateID].onClassGoTo(onClass)) return orderedcollections.OrderedSet(EPSclosure(toStates)) def gatherClasses(states): ''' Return the set of classes of transitions (i.e. classes of characters) that are possible transitions from this set of NFA states.''' gatheredClasses = orderedcollections.OrderedSet() for stateID in states: transitions = nfa.states[stateID].getTransitions() for onClass in transitions: if onClass != epsilon: gatheredClasses.add(onClass) return gatheredClasses # This is the beginning of the buildFromNFA method. # Copy over the classes self.classes = nfa.classes # Create the start state and the DFA to NFA stateMap. self.startStateId = newState() stateMap = orderedcollections.OrderedMap() # Form the epsilon closure of the NFA start state (i.e. state 0) and then # map the start state of the DFA to the start state set of the NFA EPSstartState = EPSclosure( orderedcollections.OrderedSet([self.startStateId])) stateMap[self.startStateId] = EPSstartState # Keep track of the new DFA states. The first new DFA state is the start # state. You can keep track of this as an ordered set or a stack if you # wish. unexploredStates = orderedcollections.OrderedSet([self.startStateId]) # Map the set of nfa state ids (as a frozen set) to the new DFA state id in the # nfa2dfa map. nfa2dfa = orderedcollections.OrderedMap() nfa2dfa[EPSstartState] = self.startStateId # set the new DFA state to accepting if the NFA states contained an accepting state. # You can use the getAcceptingTokenId function for this. self.tokens = orderedcollections.OrderedMap() # While there are no more unexplored states in the new DFA state set, follow the algorithm # given on the website by using the nfaTransTo function and creating new DFA states for each # new set of NFA states that are found by using gatherClasses. Remember to set accepting states # in the DFA as you proceed. while len(unexploredStates) > 0: currentStateID = unexploredStates.pop() letters = gatherClasses(stateMap[currentStateID]) for letter in letters: transitionsTo = orderedcollections.OrderedFrozenSet( nfaTransTo(stateMap[currentStateID], letter)) if transitionsTo not in nfa2dfa: toDFAStateID = newState() stateMap[toDFAStateID] = transitionsTo nfa2dfa[transitionsTo] = toDFAStateID if getAcceptingTokenId(transitionsTo): self.states[toDFAStateID].setAccepting(True) unexploredStates.add(toDFAStateID) else: toDFAStateID = nfa2dfa[transitionsTo] self.states[currentStateID].addTransition(letter, toDFAStateID)
def buildFromNFA(self, nfa): def newState(): aState = state.State(self.numStates) self.states[self.numStates] = aState self.numStates += 1 return self.numStates - 1 def getAcceptingTokenId(stateSet): for nfaStateId in stateSet: nfaState = nfa.states[nfaStateId] if nfaState.isAccepting(): return nfaState.getAcceptsTokenId() return None def EPSclosure(stateSet): closureSet = orderedcollections.OrderedSet(stateSet) stck = stack.Stack() for stateId in stateSet: stck.push(stateId) closureSet.add(stateId) while not stck.isEmpty(): stateId = stck.pop() state = nfa.states[stateId] if epsilon in state.getTransitions(): toStates = state.getTransitions()[epsilon] for toStateId in toStates: if not toStateId in closureSet: closureSet.add(toStateId) stck.push(toStateId) return orderedcollections.OrderedFrozenSet(closureSet) def nfaTransTo(fromStates, onClass): toStates = orderedcollections.OrderedSet() for fromStateId in fromStates: if onClass in nfa.states[fromStateId].getTransitions(): toStates.update( nfa.states[fromStateId].getTransitions()[onClass]) return EPSclosure(toStates) def gatherClasses(states): classes = orderedcollections.OrderedSet() for stateId in states: for transClass in nfa.states[stateId].getTransitions(): if transClass != epsilon: classes.add(transClass) return classes self.firstTokenId = nfa.firstTokenId self.classes = nfa.classes self.keywords = nfa.keywords self.tokens = nfa.tokens self.startStateId = newState() self.stateMap = orderedcollections.OrderedMap() # This is the dfa state that maps to the EPS-closure of the NFA start state. nfaSet = EPSclosure(orderedcollections.OrderedSet([0])) self.stateMap[self.startStateId] = nfaSet newDFAStates = orderedcollections.OrderedSet([self.startStateId]) nfa2dfa = orderedcollections.OrderedMap() nfa2dfa[nfaSet] = self.startStateId tokenId = getAcceptingTokenId(nfaSet) if tokenId != None: self.states[self.startStateId].setAccepting(tokenId) while len(newDFAStates) > 0: fromDFAStateId = newDFAStates.pop() fromDFAState = self.states[fromDFAStateId] ## gather the transition alphabet from the NFA states nfaStates = self.stateMap[fromDFAStateId] classes = gatherClasses(nfaStates) for onclass in classes: nfaSet = nfaTransTo(nfaStates, onclass) if nfaSet in nfa2dfa: # The DFA state already exists dfaStateId = nfa2dfa[nfaSet] else: # The DFA state does not exist so create it. dfaStateId = newState() self.stateMap[dfaStateId] = nfaSet nfa2dfa[nfaSet] = dfaStateId newDFAStates.add(dfaStateId) tokenId = getAcceptingTokenId(nfaSet) if tokenId != None: self.states[dfaStateId].setAccepting(tokenId) fromDFAState.addTransition(onclass, dfaStateId)