def next_token(self): ch = self.ch to = None if ch == '=': to = Token(TokenType.ASSIGN, ch) elif ch == '+': to = Token(TokenType.PLUS, ch) elif ch == '-': to = Token(TokenType.MINUS, ch) elif ch == '(': to = Token(TokenType.LPAREN, ch) elif ch == ')': to = Token(TokenType.RPAREN, ch) elif ch == '{': to = Token(TokenType.LBRACE, ch) elif ch == '}': to = Token(TokenType.RBRACE, ch) elif ch == ',': to = Token(TokenType.COMMA, ch) elif ch == ';': to = Token(TokenType.SEMICOLON, ch) else: to = Token(TokenType.ILLEGAL, ch) self.read_char() return to
def __init__(self, name, func, *args): Token.__init__(self) self.value = name self.func = func self.args = args self.arg_len = self.func.__code__.co_argcount self.has_unpack_args = len( self.func.__code__.co_varnames) > self.arg_len
def test_simple_tokens(self): input = "=+-,(){};" tests = [ Token(TokenType.ASSIGN, "="), Token(TokenType.PLUS, '+'), Token(TokenType.MINUS, '-'), Token(TokenType.COMMA, ','), Token(TokenType.LPAREN, '('), Token(TokenType.RPAREN, ')'), Token(TokenType.LBRACE, '{'), Token(TokenType.RBRACE, '}'), Token(TokenType.SEMICOLON, ';'), ] lexer_obj = Lexer(input) for t in tests: token = lexer_obj.next_token() self.assertEqual(token.type, t.type) self.assertEqual(token.literal, t.literal)
def __init__(self, name, args, expr, env): Token.__init__(self) self.value = name self.args = [x for x in map(lambda x: x.value, args)] self.expr = expr self.env = env
def tokenize(self, code): tokens = [] last_token = None curr_token_pos_start = 0 acceptors = PythonLexer.get_all_acceptors() pos = 0 while pos < len(code): while acceptors: next_acceptors = [] for acceptor in acceptors: if acceptor.move_next(code[pos]): if acceptor.isAccepted(): token_type = acceptor.current_state if token_type not in FA.STATE_WITH_LOOK_AHEAD: token_value = code[curr_token_pos_start:pos + 1] else: token_value = code[curr_token_pos_start:pos] last_token = Token(token_type, token_value) else: next_acceptors.append(acceptor) acceptors = next_acceptors if acceptors: pos += 1 if pos == len(code): if not acceptors[0].isAccepted(): last_token = None break if last_token: if last_token.type not in FA.STATE_WITH_LOOK_AHEAD: pos += 1 tokens.append(last_token) last_token = None acceptors = PythonLexer.get_all_acceptors() curr_token_pos_start = pos elif pos < len(code) and (code[pos] == ' ' or code[pos - 1] == '\n'): pos += 1 last_token = None acceptors = PythonLexer.get_all_acceptors() curr_token_pos_start = pos else: if re.fullmatch('^[\'\"].*\n$', code[curr_token_pos_start:pos + 1], flags=re.MULTILINE): raise ValueError('''Tokens: {0} \nUnclosed string start from {1} \n Value: {2}'''.format( tokens, curr_token_pos_start, code[curr_token_pos_start:pos])) elif re.fullmatch('^[(\'\'\')(\"\"\")](.|\n)*$', code[curr_token_pos_start:pos + 1], flags=re.MULTILINE): raise ValueError('''Tokens: {0} \nUnclosed multiline string start from {1} \n Value: {2}''' .format(tokens, curr_token_pos_start, code[curr_token_pos_start:pos])) else: raise ValueError('''Tokens: {0} \nUnexpected ID start from {1} \n Value: {2}'''.format( tokens, curr_token_pos_start, code[curr_token_pos_start:pos])) return tokens
def create_operator(self, value, str_position): return Token(Tokens.OPERATOR, value, str_position)
def create_number(self, value, str_position): return Token(Tokens.NUMBER, value, str_position)
def create_right_parenthesis(self, str_position): return Token(Tokens.RIGHT_PARENTHESES, ")", str_position)
def create_left_parenthesis(self, str_position): return Token(Tokens.LEFT_PARENTHESES, "(", str_position)
def __init__(self, value): Token.__init__(self, value)
def generate_tokens(self): while self.char is not None: if self.char in " \t\n": self.advance() elif self.char in digits: yield Token(TokenTypes.float_, self.generate_num()) elif self.char == "+": self.advance() yield Token(TokenTypes.plus) elif self.char == "-": self.advance() yield Token(TokenTypes.minus) elif self.char == "*": self.advance() yield Token(TokenTypes.mul) elif self.char == "/": self.advance() yield Token(TokenTypes.div) elif self.char == "%": self.advance() yield Token(TokenTypes.mod) elif self.char == ",": self.advance() yield Token(TokenTypes.comma) elif self.char in "'\"": yield Token(TokenTypes.string, self.generate_string(self.char)) elif self.char == "(": yield Token(TokenTypes.lparen) self.advance() elif self.char == ")": yield Token(TokenTypes.rparen) self.advance() else: yield Token(TokenTypes.word, self.generate_word())
def __init__(self, value): try: value = int(value) except ValueError: value = float(value) Token.__init__(self, value)