class Lexer: tokens = [ TokenType.DELIMETERS_ARIT.value, TokenType.IDENTIFIER_ARIT.value, TokenType.OPERATORS_ARIT.value, TokenType.TOKEN_NUM_ARIT.value ] def __init__(self, src): self.line = 1 self.col = 0 self.fp = open(src, "r") self.buffer = self.fp.readline() self.buffer_size = len(self.buffer) self.dfa = DFA(7, ARIT_GOL[1], ARIT_GOL[0]) token_list = [] def hasNextTok(self): return True if self.buffer != "" else False def getInput(self): self.buffer = self.fp.readline() if self.buffer != "": self.buffer_size = len(self.buffer) self.line += 1 self.col = 0 #Function used for getting tokens and adding them to the symbol def getToken(self): self.dfa.current_state = 0 #If line over, get new line if self.buffer_size == self.col + 1: self.getInput() if not self.hasNextTok(): return None #Current lexeme lexeme = "" ''' Get characters from the buffer while dfa is not in a final state''' while True: char = self.buffer[self.col] #print("Line {0}, col {1} : {2}".format(self.line,self.col,char)) self.dfa.transition(char) if self.dfa.isFinalState(): break lexeme += char self.col += 1 #print(lexeme) #Return the match by checking all tokA for type in Lexer.tokens: for regex in type: exp = re.compile(regex) if exp.match(lexeme): return Token(type[regex], lexeme, self.line) #if it gets here Exception()