def __init__(self): self.state = 1 self.put_back = False self.current_token = '' self.symbol_table = Trie() default_id = [ 'Private', 'Public', 'Protected', 'Static', 'Primary', 'Integer', 'Exception', 'Try' ] # set up initial words in Trie for word in default_id: self.symbol_table.process_string(word, True)
def __init__(self): self.state = 1 self.put_back = False self.current_token = '' self.symbol_table = Trie() default_id = [ 'Private', 'Public', 'Protected', 'Static', 'Primary', 'Integer', 'Exception', 'Try' ] for word in default_id: self.symbol_table.process_string(word, True)
class Lexer(object): def __init__(self): self.state = 1 self.put_back = False self.current_token = '' self.symbol_table = Trie() default_id = [ 'Private', 'Public', 'Protected', 'Static', 'Primary', 'Integer', 'Exception', 'Try' ] for word in default_id: self.symbol_table.process_string(word, True) def putting_back(self, file): if self.put_back: self.put_back = False return True return False def driver(self, filename): f = open(filename, 'r') while True: if not self.putting_back(f): char = f.read(1) if self.dfa(char): print self.create_token() if not char: break def state_1(self, char): if char == '': return 0 elif char.isspace(): self.state = 1 elif self.is_special_char(char): self.state = 2 elif char.isdigit(): self.state = 3 elif char.isalpha(): self.state = 5 elif self.is_string_flag(char): self.state = 7 def state_3(self, char): if not char.isdigit(): self.state = 4 def state_5(self, char): if not char.isalpha(): self.state = 6 def state_7(self, char): if char == '': self.state = 9 elif self.is_tilda(char): self.state = 8 elif self.is_string_flag(char): self.state = 9 def state_8(self, char): self.state = 7 def dfa(self, char): dispatcher = { 1: self.state_1, 3: self.state_3, 5: self.state_5, 7: self.state_7, 8: self.state_8, } dispatcher.get(self.state)(char) if self.state in {2,3,5,7,9}: self.current_token += char if self.state in {1,3,5,7,8}: return False if self.state in {4,6}: self.put_back = True return True def create_token(self): if self.state == 2: special_char = {'(':'lpar', ')':'rpar', ';':'semicolon'} attribute = 'zero' token_type = special_char[self.current_token[-1:]] elif self.state == 4: token_type = 'int' attribute = self.current_token elif self.state == 6: if self.valid_id(self.current_token): token_type = 'id' attribute = self.current_token else: token_type = 'error' attribute = 'zero' elif self.state == 9: token_type = 'string' attribute = self.current_token[1:-1] self.current_token = '' self.state = 1 return Token(token_type, attribute) def is_special_char(self, char): return ord(char) in {40, 41, 59} def is_string_flag(self, char): return ord(char) == 34 def is_tilda(self, char): return ord(char) == 126 def valid_id(self, id): if id[:1].isupper(): flag = False else: flag = True return self.symbol_table.process_string(id, flag) != -1
class Lexer(object): def __init__(self): self.state = 1 self.put_back = False self.current_token = '' self.symbol_table = Trie() default_id = [ 'Private', 'Public', 'Protected', 'Static', 'Primary', 'Integer', 'Exception', 'Try' ] # set up initial words in Trie for word in default_id: self.symbol_table.process_string(word, True) def is_special_char(self, char): # checks for ( ) ; return ord(char) in {40, 41, 59} def is_string_flag(self, char): # checks for " return ord(char) == 34 def is_tilda(self, char): # checks for ~ return ord(char) == 126 def putting_back(self): if self.put_back: self.put_back = False return True return False def driver(self, filename): file_pointer = open(filename, 'r') while True: if not self.putting_back(): char = file_pointer.read(1) if self.dfa(char): print self.create_token() if not char: break def dfa(self, char): dispatcher = { 1: self.state_1, 3: self.state_3, 5: self.state_5, 7: self.state_7, 8: self.state_8, } dispatcher.get(self.state)(char) if self.state in {2, 3, 5, 7, 9}: # adds current char to string for building token self.current_token += char if self.state in {1, 3, 5, 7, 8}: # not accepting state return False if self.state in {4, 6}: # put back last char self.put_back = True return True # in accepting state def state_1(self, char): if char == '': return 0 elif char.isspace(): self.state = 1 elif self.is_special_char(char): self.state = 2 elif char.isdigit(): self.state = 3 elif char.isalpha(): self.state = 5 elif self.is_string_flag(char): self.state = 7 else: self.state = 0 def state_3(self, char): if not char.isdigit(): self.state = 4 def state_5(self, char): if not char.islower(): self.state = 6 def state_7(self, char): if char == '': self.state = 0 elif self.is_tilda(char): self.state = 8 elif self.is_string_flag(char): self.state = 9 def state_8(self, char): self.state = 7 def create_token(self): # Create token based on state dfa accepted dispatcher = { 0: self.error_token, 2: self.special_char_token, 4: self.int_token, 6: self.id_token, 9: self.string_token, } new_token = dispatcher.get(self.state)() self.current_token = '' self.state = 1 return new_token def error_token(self): return Token('error', 'zero') def special_char_token(self): special_char = {'(': 'lpar', ')': 'rpar', ';': 'semicolon'} return Token(special_char[self.current_token], 'zero') def int_token(self): return Token('int', self.process_int(self.current_token))# validates and builds int def id_token(self): if self.valid_id(self.current_token) != -1: return Token('id', self.valid_id(self.current_token)) return self.error_token() def string_token(self): # vector does not exsist in python so used list for attribute return Token('string', list(self.current_token[1:-1])) def valid_id(self, id): if id[:1].isupper(): flag = False else: flag = True return self.symbol_table.process_string(id, flag) def process_int(self, number): num_digits = len(number) processed_int = 0 if num_digits > 5: # more that 5 chars in string int is invalid return -1 elif num_digits == 5: # compares each digit one at a time until determined if valid for input, maxint in zip(number, '65534'): if int(input) < int(maxint): break elif int(input) > int(maxint): return -1 for digit in number: # builds int from string if valid num_digits -= 1 processed_int += int(digit) * pow(10, num_digits) return processed_int
class Lexer(object): def __init__(self): self.state = 1 self.put_back = False self.current_token = '' self.symbol_table = Trie() default_id = [ 'Private', 'Public', 'Protected', 'Static', 'Primary', 'Integer', 'Exception', 'Try' ] # set up initial words in Trie for word in default_id: self.symbol_table.process_string(word, True) def is_special_char(self, char): # checks for ( ) ; return ord(char) in {40, 41, 59} def is_string_flag(self, char): # checks for " return ord(char) == 34 def is_tilda(self, char): # checks for ~ return ord(char) == 126 def putting_back(self): if self.put_back: self.put_back = False return True return False def driver(self, filename): file_pointer = open(filename, 'r') while True: if not self.putting_back(): char = file_pointer.read(1) if self.dfa(char): print self.create_token() if not char: break def dfa(self, char): dispatcher = { 1: self.state_1, 3: self.state_3, 5: self.state_5, 7: self.state_7, 8: self.state_8, } dispatcher.get(self.state)(char) if self.state in {2, 3, 5, 7, 9}: # adds current char to string for building token self.current_token += char if self.state in {1, 3, 5, 7, 8}: # not accepting state return False if self.state in {4, 6}: # put back last char self.put_back = True return True # in accepting state def state_1(self, char): if char == '': return 0 elif char.isspace(): self.state = 1 elif self.is_special_char(char): self.state = 2 elif char.isdigit(): self.state = 3 elif char.isalpha(): self.state = 5 elif self.is_string_flag(char): self.state = 7 else: self.state = 0 def state_3(self, char): if not char.isdigit(): self.state = 4 def state_5(self, char): if not char.islower(): self.state = 6 def state_7(self, char): if char == '': self.state = 0 elif self.is_tilda(char): self.state = 8 elif self.is_string_flag(char): self.state = 9 def state_8(self, char): self.state = 7 def create_token(self): # Create token based on state dfa accepted dispatcher = { 0: self.error_token, 2: self.special_char_token, 4: self.int_token, 6: self.id_token, 9: self.string_token, } new_token = dispatcher.get(self.state)() self.current_token = '' self.state = 1 return new_token def error_token(self): return Token('error', 'zero') def special_char_token(self): special_char = {'(': 'lpar', ')': 'rpar', ';': 'semicolon'} return Token(special_char[self.current_token], 'zero') def int_token(self): return Token('int', self.process_int( self.current_token)) # validates and builds int def id_token(self): if self.valid_id(self.current_token) != -1: return Token('id', self.valid_id(self.current_token)) return self.error_token() def string_token(self): # vector does not exsist in python so used list for attribute return Token('string', list(self.current_token[1:-1])) def valid_id(self, id): if id[:1].isupper(): flag = False else: flag = True return self.symbol_table.process_string(id, flag) def process_int(self, number): num_digits = len(number) processed_int = 0 if num_digits > 5: # more that 5 chars in string int is invalid return -1 elif num_digits == 5: # compares each digit one at a time until determined if valid for input, maxint in zip(number, '65534'): if int(input) < int(maxint): break elif int(input) > int(maxint): return -1 for digit in number: # builds int from string if valid num_digits -= 1 processed_int += int(digit) * pow(10, num_digits) return processed_int
class Lexer(object): def __init__(self): self.state = 1 self.put_back = False self.current_token = '' self.symbol_table = Trie() default_id = [ 'Private', 'Public', 'Protected', 'Static', 'Primary', 'Integer', 'Exception', 'Try' ] for word in default_id: self.symbol_table.process_string(word, True) def putting_back(self, file): if self.put_back: self.put_back = False return True return False def driver(self, filename): f = open(filename, 'r') while True: if not self.putting_back(f): char = f.read(1) if self.dfa(char): print self.create_token() if not char: break def state_1(self, char): if char == '': return 0 elif char.isspace(): self.state = 1 elif self.is_special_char(char): self.state = 2 elif char.isdigit(): self.state = 3 elif char.isalpha(): self.state = 5 elif self.is_string_flag(char): self.state = 7 def state_3(self, char): if not char.isdigit(): self.state = 4 def state_5(self, char): if not char.isalpha(): self.state = 6 def state_7(self, char): if char == '': self.state = 9 elif self.is_tilda(char): self.state = 8 elif self.is_string_flag(char): self.state = 9 def state_8(self, char): self.state = 7 def dfa(self, char): dispatcher = { 1: self.state_1, 3: self.state_3, 5: self.state_5, 7: self.state_7, 8: self.state_8, } dispatcher.get(self.state)(char) if self.state in {2, 3, 5, 7, 9}: self.current_token += char if self.state in {1, 3, 5, 7, 8}: return False if self.state in {4, 6}: self.put_back = True return True def create_token(self): if self.state == 2: special_char = {'(': 'lpar', ')': 'rpar', ';': 'semicolon'} attribute = 'zero' token_type = special_char[self.current_token[-1:]] elif self.state == 4: token_type = 'int' attribute = self.current_token elif self.state == 6: if self.valid_id(self.current_token): token_type = 'id' attribute = self.current_token else: token_type = 'error' attribute = 'zero' elif self.state == 9: token_type = 'string' attribute = self.current_token[1:-1] self.current_token = '' self.state = 1 return Token(token_type, attribute) def is_special_char(self, char): return ord(char) in {40, 41, 59} def is_string_flag(self, char): return ord(char) == 34 def is_tilda(self, char): return ord(char) == 126 def valid_id(self, id): if id[:1].isupper(): flag = False else: flag = True return self.symbol_table.process_string(id, flag) != -1