def handle_with_id_tokens(self) -> Token: """Handle identifiers and reserved keyboards. Returns: Token: the tokens representing a reserved keyword """ token = Token(type=None, value=None, line=self.t_line, column=self.t_column) value = "" while (self.current_char is not None and self.current_char.isalnum() or self.current_char == TokenType.UNDER_SCORE.value): value += self.current_char self.advance() token_type = self.RESERVED_KEYWORDS.get(value.upper()) if token_type is None: token.type = TokenType.ID token.value = value else: token.type = token_type token.value = value.upper() return token
def number(self) -> Token: """Return a (multidigit) integer or float consumed from the input. Returns: Token: a token represeting a number in an expression """ token = Token(type=None, value=None, line=self.t_line, column=self.t_column) value = "" while self.current_char is not None and self.current_char.isdigit(): value += self.current_char self.advance() if self.current_char == ".": value += self.current_char self.advance() while self.current_char is not None and self.current_char.isdigit( ): value += self.current_char self.advance() token.type = TokenType.REAL_CONST token.value = float(value) else: token.type = TokenType.INTEGER_CONST token.value = int(value) return token
def _id(self): """function get reversed-keywords or multi-char. if current char is reversed-kw, get it, or multi-char. for example: keywords like 'PROGRAM or VAR or BEGIN ...' or 'a, b, i, j ...' """ token = Token(type=None, value=None, lineno=self.lineno, column=self.column) result = '' # while char is digit or letter while self.current_char is not None and self.current_char.isalnum(): result += self.current_char self.advance() # if reversed keywords is None, get the current result token_type = RESERVED_KEYWORDS.get(result.upper()) if token_type is None: token.type = TokenType.ID token.value = result else: # reserved keyword token.type = token_type token.value = result.upper() return token
def number(self): """function get the multi-number like float or int token. for example: '12345' or '32.1213' """ token = Token(type=None, value=None, lineno=self.lineno, column=self.column) result = '' # while character is digit while self.current_char is not None and self.current_char.isdigit(): result += self.current_char self.advance() # if character is '.' , means float number if self.current_char == '.': result += self.current_char self.advance() while self.current_char is not None and self.current_char.isdigit( ): result += self.current_char self.advance() token.type = TokenType.REAL_CONST token.value = float(result) else: token.type = TokenType.INTEGER_CONST token.value = int(result) return token
def string(self) -> Token: """Return a literal string token (STRING_CONST). Returns: Token: a token representing a literal string. """ token = Token(type=None, value=None, line=self.t_line, column=self.t_column) self.advance() value = "" while (self.current_char is not None and self.current_char.isalpha() or self.current_char in self.SINGLE_CHARACTERS): value += self.current_char self.advance() if self.current_char.isspace(): value += " " self.skip_whitespace() self.advance() token.type = TokenType.STRING_CONST token.value = value return token
def punct(self): curr_char = self.curr_char() self.forward() # Try greedily tagging a second character and if no match then discard it next_next = self.curr_char() if next_next: next_next = curr_char + next_next tk = Token(next_next, None, self.line, 1) if tk.type( ) == "IDENTIFIER" or curr_char == '+' or curr_char == '-': # also prevents - -- and + ++ mixup tk = Token(curr_char, None, self.line, 1) else: self.forward() # adjust for extra character # Addop situations if not self.addop_flag and (curr_char == '+' or curr_char == '-'): tk = Token(curr_char * 2, None, self.line, 1) elif curr_char == ')' or curr_char == ']': self.addop_flag = True else: #flag is consumed, whether addop or not because it won't be valid after one token return self.addop_flag = False return tk
def lexer_get_next_token(): c = '' t = Token() while True: c = sys.stdin.read(1) if len(c) <= 0: t.type = 'EOF' elif c.isspace(): sys.stdin.
def tokenize(text): i = 0 puntuation_start = map(lambda x: x[0], punctuation) while i < len(text): if text[i] in spacing: r = Token(eat_spacing(text[i:]), 'space or comment') elif text[i:i+2] == '--': r = Token(eat_comment_oneline(text[i:]), 'space or comment') elif text[i:i+2] == '/*': r = Token(eat_comment_multiline(text[i:]), 'space or comment') elif text[i] in puntuation_start: r = Token(eat_punctuation(text[i:]), 'punctuation') elif text[i] in "'": r = Token(eat_string(text[i:]), 'string') elif text[i] in '"': r = Token(eat_string_doubleq(text[i:]), 'string_doubleq') else: r = Token(eat_words(text[i:]), "id") if r.text.lower() in keyword_list: r.type = "keyword" i += len(r) yield r
def letters(self): id = "" curr_char = self.curr_char() while curr_char and (curr_char.isalpha() or curr_char.isdigit()): id += curr_char self.forward() curr_char = self.curr_char() if len(id) > self.max_id_len: raise LexerError("Identifier too long", self.line, self.char_pos) tk = Token(id, None, self.line, 1) if tk.type() == "IDENTIFIER": self.addop_flag = True else: self.addop_flag = False # consumed, only valid after one token return tk
def next_token(self): found = False tok = Token() if self.eof(): tok.value=None return tok while not found: if (not self.buffered) or (self.current_char == ' ') or (self.current_char == '\n'): self.current_char = self.source_text[self.index] self.index += 1 self.log("Current char: "+self.current_char+", EOF Status: " +str(self.index == len(self.source_text)), self.L_DEBUG) self.current_read = self.character_look_up(self.current_char) # Stats cur_stats = "Current state: "+str(self.state)+", " cur_stats += "current_char: "+str(self.current_char)+", " cur_stats += "current_read: "+str(self.current_read)+", " cur_stats += "token status: "+str(self.token_under_construction) self.log(cur_stats, self.L_DEBUG) # Adding to token if ((self.next_state(self.state, self.current_read) != -1) and (self.action(self.state, self.current_read) == CONTINUE)): self.buffered = False self.token_under_construction += self.current_char self.state = self.next_state(self.state, self.current_read) # Halting elif ((self.next_state(self.state, self.current_read) == -1) and (self.action(self.state, self.current_read) == HALT)): look_up = self.look_up(self.state, self.current_read) self.log("Inside switch with state "+str(self.state), self.L_DEBUG) self.log("The look-up value is "+str(look_up), self.L_DEBUG) self.log("We have a buffered char of '"+self.current_char+"'", self.L_DEBUG) self.buffered = True self.log_token(self.token_look_up(look_up)) tok.type = int(look_up) tok.value = self.token_under_construction # Return to S0 self.state = 0 # Reset token self.token_under_construction = "" found = True # Syntax Error elif ((self.next_state(self.state, self.current_read) == -1) and (self.action(self.state, self.current_read) == ERROR) and (self.current_read != 30) and (self.current_read != 31)): self.log("Illegal character '"+self.current_char+"'", self.L_ERROR) raise SyntaxError("Illegal character '"+self.current_char+"'") # End while return tok
def to_gforth(self): while self.get_next_token(): self.convert_symbol() # constants if is_ints(self.cur_token): self.push_stack() elif is_floats(self.cur_token): self.f_exists = True self.push_stack() elif is_name(self.cur_token): self.push_stack() elif is_tf(self.cur_token): self.push_stack() elif is_strings(self.cur_token): self.s_exists = True temp = 's" ' + self.cur_token.value[1:-1] + '"' token = Token(self.cur_token.type, temp, self.cur_token.line) self.push_stack(token) # type elif is_type(self.cur_token): self.push_stack() # let statment (let (varlist)) elif is_let(self.cur_token): while len(self.stack) > 0 and is_type(self.stack[len(self.stack)-1][0]): typ = self.pop_stack() var = self.pop_stack() self.varlist.update({var.value:typ.value}) # assign (:= name oper) elif is_assign(self.cur_token): num = self.pop_stack() var = self.pop_stack() ##DEBUG## # print '===========> Varlist: ', # print self.varlist.items() ######### if self.varlist.has_key(var.value): temp = num.value + ' value ' + var.value token = Token('Assign', temp, self.cur_token.line) if self.f_exists: token.type = 'Assign_f' self.push_stack(token) self.assign = True else: self.error('Variable ' + var.value + ' not declared') # print statment elif is_print(self.cur_token): temp = self.pop_stack().value if self.f_exists: temp += ' f.' elif self.s_exists: temp += ' type' else: temp += ' .' token = Token('Printstmt', temp, self.cur_token.line) self.push_stack(token) self.stdout = False # if statment elif is_if(self.cur_token): tmp1 = self.pop_stack() tmp2 = self.pop_stack() if len(self.stack) > 0: tmp3 = self.pop_stack() temp = tmp3.value + ' if ' + tmp2.value + ' else ' + tmp1.value else: temp = tmp2.value + ' if ' + tmp1.value temp += ' endif' token = Token('Ifstmt', temp, self.cur_token.line) self.push_stack(token) self.func_flag = True # while statment elif is_while(self.cur_token): tmp = self.pop_stack() temp = 'begin ' + tmp.value + ' ' + self.pop_stack().value + ' until' token = Token('Whilestmt', temp, self.cur_token.line) self.push_stack(token) self.func_flag = True # negate elif is_negate(self.cur_token): oper = self.pop_stack().value if self.f_exists: temp = oper + ' fnegate' else: temp = oper + ' negate' token = Token('Negateno', temp, self.cur_token.line) if self.f_exists: token.type = 'Negateno_f' self.push_stack(token) # arithmetic calculation elif is_binops(self.cur_token): if self.s_exists and self.cur_token.value == '+': binop = 's+' elif self.f_exists and is_pmtd(self.cur_token): binop = 'f' + self.cur_token.value elif self.f_exists and is_power(self.cur_token): binop = 'f**' elif self.f_exists == False and is_power(self.cur_token): binop = 'f**' self.int_power = True self.f_exists = True else: binop = self.cur_token.value oper1 = self.pop_stack() oper2 = self.pop_stack() temp = oper2.value + ' ' + oper1.value + ' ' + binop if self.int_power == True: temp += ' f>s' token = Token('Binops', temp, self.cur_token.line) if self.f_exists: token.type = 'Binops_f' self.push_stack(token) if self.int_power == True: self.int_power = False self.f_exists = False # not | sin | cos | tan elif is_unops_2(self.cur_token): if self.f_exists and self.cur_token.value != 'not': unop = 'f' + self.cur_token.value else: unop = self.cur_token.value oper = self.pop_stack() temp = oper.value + ' ' + unop token = Token('Unops', temp, self.cur_token.line) if self.f_exists: token.type = 'Unops_f' self.push_stack(token) # other elif self.cur_token.type == 'Left_Parenthesis': self.scope += 1 elif self.cur_token.type == 'Right_Parenthesis': self.scope -= 1 else: self.error(self.cur_token.value) # if len(self.varlist) != 0: # self.output += self.declare_var() if self.func_flag == True: self.output += ': func ' if self.assign == True: self.stack.reverse() while self.stack: self.output += self.pop_stack().value + ' ' if self.func_flag == True: self.output += '; func '
def next_token(self): token = Token() self.skip_whitespaces() skip_read_char = False if self.ch == '+': if self.peek_char() == '+': self.read_char() token.type = TokenType.PLUSPLUS token.literal = '++' else: token.type = TokenType.PLUS token.literal = self.ch elif self.ch == '-': token.type = TokenType.MINUS token.literal = self.ch elif self.ch == '/': if self.peek_char() == '*': self.read_char() token.type = TokenType.START_COMMENT token.literal = '/*' else: token.type = TokenType.SLASH token.literal = self.ch elif self.ch == '*': if self.peek_char() == '/': self.read_char() token.type = TokenType.END_COMMENT token.literal = '*/' else: token.type = TokenType.ASTERISK token.literal = self.ch elif self.ch == '>': if self.peek_char() == '=': self.read_char() token.type = TokenType.GTE token.literal = ">=" else: token.type = TokenType.GT token.literal = self.ch elif self.ch == '<': if self.peek_char() == '=': self.read_char() token.type = TokenType.LTE token.literal = "<=" else: token.type = TokenType.LT token.literal = self.ch elif self.ch == ';': token.type = TokenType.SEMICOLON token.literal = self.ch elif self.ch == '(': token.type = TokenType.LPAREN token.literal = self.ch elif self.ch == ')': token.type = TokenType.RPAREN token.literal = self.ch elif self.ch == '{': token.type = TokenType.LBRACE token.literal = self.ch elif self.ch == '}': token.type = TokenType.RBRACE token.literal = self.ch elif self.ch == ',': token.type = TokenType.COMMA token.literal = self.ch elif self.ch == 0: token.type = TokenType.EOF token.literal = self.ch elif self.ch == '"': token.type = TokenType.STRING token.literal = self.read_string() elif self.ch == '[': token.type = TokenType.LBRACKET token.literal = self.ch elif self.ch == ']': token.type = TokenType.RBRACKET token.literal = self.ch elif self.ch == ':': token.type = TokenType.COLON token.literal = self.ch elif self.ch == '.': token.type = TokenType.DOT token.literal = self.ch elif self.ch == '=': if self.peek_char() == '=': self.read_char() token.type = TokenType.EQ token.literal = '==' else: token.type = TokenType.ASSIGN token.literal = self.ch elif self.ch == '!': if self.peek_char() == '=': self.read_char() token.type = TokenType.NOT_EQ token.literal = '!=' else: token.type = TokenType.BANG token.literal = '!' else: if self.is_letter(): token.literal = self.read_ident() token.type = self.lookup_ident(token.literal) skip_read_char = True elif self.is_digit(): token.literal = self.read_number() if '.' in token.literal: token.type = TokenType.FLOAT else: token.type = TokenType.INT skip_read_char = True else: token.type = TokenType.ILLEGAL token.literal = '' if not skip_read_char: self.read_char() return token