def number(self) -> Token: """Return a (multidigit) integer or float consumed from the input. Returns: Token: a token represeting a number in an expression """ token = Token(type=None, value=None, line=self.t_line, column=self.t_column) value = "" while self.current_char is not None and self.current_char.isdigit(): value += self.current_char self.advance() if self.current_char == ".": value += self.current_char self.advance() while self.current_char is not None and self.current_char.isdigit( ): value += self.current_char self.advance() token.type = TokenType.REAL_CONST token.value = float(value) else: token.type = TokenType.INTEGER_CONST token.value = int(value) return token
def _id(self): """function get reversed-keywords or multi-char. if current char is reversed-kw, get it, or multi-char. for example: keywords like 'PROGRAM or VAR or BEGIN ...' or 'a, b, i, j ...' """ token = Token(type=None, value=None, lineno=self.lineno, column=self.column) result = '' # while char is digit or letter while self.current_char is not None and self.current_char.isalnum(): result += self.current_char self.advance() # if reversed keywords is None, get the current result token_type = RESERVED_KEYWORDS.get(result.upper()) if token_type is None: token.type = TokenType.ID token.value = result else: # reserved keyword token.type = token_type token.value = result.upper() return token
def handle_with_id_tokens(self) -> Token: """Handle identifiers and reserved keyboards. Returns: Token: the tokens representing a reserved keyword """ token = Token(type=None, value=None, line=self.t_line, column=self.t_column) value = "" while (self.current_char is not None and self.current_char.isalnum() or self.current_char == TokenType.UNDER_SCORE.value): value += self.current_char self.advance() token_type = self.RESERVED_KEYWORDS.get(value.upper()) if token_type is None: token.type = TokenType.ID token.value = value else: token.type = token_type token.value = value.upper() return token
def number(self): """function get the multi-number like float or int token. for example: '12345' or '32.1213' """ token = Token(type=None, value=None, lineno=self.lineno, column=self.column) result = '' # while character is digit while self.current_char is not None and self.current_char.isdigit(): result += self.current_char self.advance() # if character is '.' , means float number if self.current_char == '.': result += self.current_char self.advance() while self.current_char is not None and self.current_char.isdigit( ): result += self.current_char self.advance() token.type = TokenType.REAL_CONST token.value = float(result) else: token.type = TokenType.INTEGER_CONST token.value = int(result) return token
def loads(code): s = 0 i = 0 length = len(code) tokens = [] spaces = ' \b\f\n\r\t' digits = '0123456789' is_open = True while i < length: c = code[i] i += 1 if c in spaces: continue elif c in ':,[]{}': t = Token(Type.auto, c) tokens.append(t) elif c == '"' and is_open: # 吃字符串 # offset = string_end(code, i) result, index = string_end(code, i) if index != -1: t = Token(Type.string) # t.value = code[i:i+offset] t.value = result # i += offset + 1 i = index tokens.append(t) is_open = not is_open else: return elif c == '"' and not is_open: is_open = not is_open continue elif c in digits: # 吃数字 offset = number_end(code, i) t = Token(Type.number) # todo, 可能是 float, 要判断 t.value = int(code[i - 1:i + offset]) i += offset tokens.append(t) elif c in 'tfn': # true false null kvs = dict( t='true', f='false', n='null', ) # 要判断是否真的是 true false null t = Token(Type.keyword) t.value = kvs[c] tokens.append(t) i += len(kvs[c]) else: print("*** 错误", c, code[i:i + 10]) return return tokens
def loads(code): i = 0 length = len(code) tokens = [] spaces = ' \b\f\n\r\t' digits = '0123456789' is_open = True while i < length: c = code[i] i += 1 if c in spaces: continue elif c in ':,[]{}': t = Token(Type.auto, c) tokens.append(t) elif c == '"' and is_open: result, index = string_end(code, i) if index != -1: t = Token(Type.string) t.value = result i = index tokens.append(t) is_open = not is_open else: return elif c == '"' and not is_open: is_open = not is_open continue elif c in digits: offset = number_end(code, i) t = Token(Type.number) s = code[i - 1:i + offset] # 判断是否 float if '.' in s: t.value = float(s) else: t.value = int(s) i += offset tokens.append(t) elif c in 'tfn': # true false null kvs = dict( t='true', f='false', n='null', ) t = Token(Type.keyword) t.value = kvs[c] tokens.append(t) i += len(kvs[c]) else: print("*** 错误", c, code[i:i + 10]) return return tokens
def string(self) -> Token: """Return a literal string token (STRING_CONST). Returns: Token: a token representing a literal string. """ token = Token(type=None, value=None, line=self.t_line, column=self.t_column) self.advance() value = "" while (self.current_char is not None and self.current_char.isalpha() or self.current_char in self.SINGLE_CHARACTERS): value += self.current_char self.advance() if self.current_char.isspace(): value += " " self.skip_whitespace() self.advance() token.type = TokenType.STRING_CONST token.value = value return token
def json_tokens(code): length = len(code) tokens = [] spaces = '\n\t\r' digits = '1234567890' # 当前下标 i = 0 while i < length: # 先看看当前应该处理啥 c = code[i] i += 1 if c in spaces: # 空白符号要跳过, space tab return continue elif c in ':,{}[]': # 处理 6 种单个符号 t = Token(Type.auto, c) tokens.append(t) elif c == '"': # 处理字符串 s, offset = string_end(code, i) i = offset + 1 # print('i, offset', i, offset, s, code[offset]) t = Token(Type.string, s) tokens.append(t) elif c in digits: # 处理数字, 现在不支持小数和负数 end = 0 for offset, char in enumerate(code[i:]): if char not in digits: end = offset break n = code[i - 1:i + end] i += end t = Token(Type.number, n) tokens.append(t) elif c in 'tfn': m = {'t': 'true', 'f': 'false', 'n': 'null'} t = Token(Type.keyword) t.value = m[c] tokens.append(t) i += len(m[c]) else: # 出错了 pass return tokens
def next_token(self): found = False tok = Token() if self.eof(): tok.value=None return tok while not found: if (not self.buffered) or (self.current_char == ' ') or (self.current_char == '\n'): self.current_char = self.source_text[self.index] self.index += 1 self.log("Current char: "+self.current_char+", EOF Status: " +str(self.index == len(self.source_text)), self.L_DEBUG) self.current_read = self.character_look_up(self.current_char) # Stats cur_stats = "Current state: "+str(self.state)+", " cur_stats += "current_char: "+str(self.current_char)+", " cur_stats += "current_read: "+str(self.current_read)+", " cur_stats += "token status: "+str(self.token_under_construction) self.log(cur_stats, self.L_DEBUG) # Adding to token if ((self.next_state(self.state, self.current_read) != -1) and (self.action(self.state, self.current_read) == CONTINUE)): self.buffered = False self.token_under_construction += self.current_char self.state = self.next_state(self.state, self.current_read) # Halting elif ((self.next_state(self.state, self.current_read) == -1) and (self.action(self.state, self.current_read) == HALT)): look_up = self.look_up(self.state, self.current_read) self.log("Inside switch with state "+str(self.state), self.L_DEBUG) self.log("The look-up value is "+str(look_up), self.L_DEBUG) self.log("We have a buffered char of '"+self.current_char+"'", self.L_DEBUG) self.buffered = True self.log_token(self.token_look_up(look_up)) tok.type = int(look_up) tok.value = self.token_under_construction # Return to S0 self.state = 0 # Reset token self.token_under_construction = "" found = True # Syntax Error elif ((self.next_state(self.state, self.current_read) == -1) and (self.action(self.state, self.current_read) == ERROR) and (self.current_read != 30) and (self.current_read != 31)): self.log("Illegal character '"+self.current_char+"'", self.L_ERROR) raise SyntaxError("Illegal character '"+self.current_char+"'") # End while return tok