Exemplo n.º 1
0
    def handle_with_id_tokens(self) -> Token:
        """Handle identifiers and reserved keyboards.

        Returns:
            Token: the tokens representing a reserved keyword
        """

        token = Token(type=None,
                      value=None,
                      line=self.t_line,
                      column=self.t_column)

        value = ""
        while (self.current_char is not None and self.current_char.isalnum()
               or self.current_char == TokenType.UNDER_SCORE.value):
            value += self.current_char
            self.advance()

        token_type = self.RESERVED_KEYWORDS.get(value.upper())
        if token_type is None:
            token.type = TokenType.ID
            token.value = value
        else:
            token.type = token_type
            token.value = value.upper()

        return token
Exemplo n.º 2
0
    def number(self) -> Token:
        """Return a (multidigit) integer or float consumed from the input.

        Returns:
            Token: a token represeting a number in an expression
        """

        token = Token(type=None,
                      value=None,
                      line=self.t_line,
                      column=self.t_column)

        value = ""
        while self.current_char is not None and self.current_char.isdigit():
            value += self.current_char
            self.advance()

        if self.current_char == ".":
            value += self.current_char
            self.advance()

            while self.current_char is not None and self.current_char.isdigit(
            ):
                value += self.current_char
                self.advance()

            token.type = TokenType.REAL_CONST
            token.value = float(value)
        else:
            token.type = TokenType.INTEGER_CONST
            token.value = int(value)

        return token
Exemplo n.º 3
0
    def _id(self):
        """function get reversed-keywords or multi-char.
        if current char is reversed-kw, get it, or multi-char.

        for example: keywords like 'PROGRAM or VAR or BEGIN ...'
        or 'a, b, i, j ...'
        """
        token = Token(type=None,
                      value=None,
                      lineno=self.lineno,
                      column=self.column)
        result = ''
        # while char is digit or letter
        while self.current_char is not None and self.current_char.isalnum():
            result += self.current_char
            self.advance()
        # if reversed keywords is None, get the current result
        token_type = RESERVED_KEYWORDS.get(result.upper())
        if token_type is None:
            token.type = TokenType.ID
            token.value = result
        else:
            # reserved keyword
            token.type = token_type
            token.value = result.upper()

        return token
Exemplo n.º 4
0
    def number(self):
        """function get the multi-number like float or int token.
        for example: '12345' or '32.1213'

        """
        token = Token(type=None,
                      value=None,
                      lineno=self.lineno,
                      column=self.column)

        result = ''
        # while character is digit
        while self.current_char is not None and self.current_char.isdigit():
            result += self.current_char
            self.advance()
        # if character is '.' , means float number
        if self.current_char == '.':
            result += self.current_char
            self.advance()

            while self.current_char is not None and self.current_char.isdigit(
            ):
                result += self.current_char
                self.advance()

            token.type = TokenType.REAL_CONST
            token.value = float(result)
        else:
            token.type = TokenType.INTEGER_CONST
            token.value = int(result)

        return token
Exemplo n.º 5
0
    def string(self) -> Token:
        """Return a literal string token (STRING_CONST).

        Returns:
            Token: a token representing a literal string.
        """

        token = Token(type=None,
                      value=None,
                      line=self.t_line,
                      column=self.t_column)

        self.advance()

        value = ""
        while (self.current_char is not None and self.current_char.isalpha()
               or self.current_char in self.SINGLE_CHARACTERS):
            value += self.current_char
            self.advance()

            if self.current_char.isspace():
                value += " "
                self.skip_whitespace()

        self.advance()

        token.type = TokenType.STRING_CONST
        token.value = value

        return token
Exemplo n.º 6
0
    def punct(self):
        curr_char = self.curr_char()
        self.forward()

        # Try greedily tagging a second character and if no match then discard it
        next_next = self.curr_char()
        if next_next:
            next_next = curr_char + next_next

        tk = Token(next_next, None, self.line, 1)
        if tk.type(
        ) == "IDENTIFIER" or curr_char == '+' or curr_char == '-':  # also prevents - -- and + ++ mixup
            tk = Token(curr_char, None, self.line, 1)
        else:
            self.forward()  # adjust for extra character

        # Addop situations
        if not self.addop_flag and (curr_char == '+' or curr_char == '-'):
            tk = Token(curr_char * 2, None, self.line, 1)
        elif curr_char == ')' or curr_char == ']':
            self.addop_flag = True
        else:  #flag is consumed, whether addop or not because it won't be valid after one token return
            self.addop_flag = False

        return tk
Exemplo n.º 7
0
def lexer_get_next_token():
	c = ''
	t = Token()

	while True:
		c = sys.stdin.read(1)

		if len(c) <= 0:
			t.type = 'EOF'
		elif c.isspace():
			sys.stdin.
Exemplo n.º 8
0
def tokenize(text):
    i = 0
    puntuation_start = map(lambda x: x[0], punctuation)
    while i < len(text):

        if text[i] in spacing: r = Token(eat_spacing(text[i:]), 'space or comment')
        elif text[i:i+2] == '--': r = Token(eat_comment_oneline(text[i:]), 'space or comment')
        elif text[i:i+2] == '/*': r = Token(eat_comment_multiline(text[i:]), 'space or comment')
        elif text[i] in puntuation_start: r = Token(eat_punctuation(text[i:]), 'punctuation')
        elif text[i] in "'": r = Token(eat_string(text[i:]), 'string')
        elif text[i] in '"': r = Token(eat_string_doubleq(text[i:]), 'string_doubleq')
        else: r = Token(eat_words(text[i:]), "id")

        if r.text.lower() in keyword_list: r.type = "keyword"

        i += len(r)
        yield r
Exemplo n.º 9
0
    def letters(self):
        id = ""
        curr_char = self.curr_char()
        while curr_char and (curr_char.isalpha() or curr_char.isdigit()):
            id += curr_char
            self.forward()
            curr_char = self.curr_char()

        if len(id) > self.max_id_len:
            raise LexerError("Identifier too long", self.line, self.char_pos)

        tk = Token(id, None, self.line, 1)
        if tk.type() == "IDENTIFIER":
            self.addop_flag = True
        else:
            self.addop_flag = False  # consumed, only valid after one token

        return tk
Exemplo n.º 10
0
	def next_token(self):
		found = False
		tok = Token()
		
		if self.eof():
			tok.value=None
			return tok
		
		while not found:
			if (not self.buffered) or (self.current_char == ' ') or (self.current_char == '\n'):
				self.current_char = self.source_text[self.index]
				self.index += 1
			
			self.log("Current char: "+self.current_char+", EOF Status: "
				+str(self.index == len(self.source_text)), self.L_DEBUG)
			
			self.current_read = self.character_look_up(self.current_char)
				
			# Stats
			cur_stats =  "Current state: "+str(self.state)+", "
			cur_stats += "current_char: "+str(self.current_char)+", "
			cur_stats += "current_read: "+str(self.current_read)+", "
			cur_stats += "token status: "+str(self.token_under_construction)
			self.log(cur_stats, self.L_DEBUG)
			
			# Adding to token
			if ((self.next_state(self.state, self.current_read) != -1) and 
					(self.action(self.state, self.current_read) == CONTINUE)):
				
				self.buffered = False
				self.token_under_construction += self.current_char
				self.state = self.next_state(self.state, self.current_read)
				
			# Halting
			elif ((self.next_state(self.state, self.current_read) == -1) and
					(self.action(self.state, self.current_read) == HALT)):
					
				look_up = self.look_up(self.state, self.current_read)
				self.log("Inside switch with state "+str(self.state), self.L_DEBUG)
				self.log("The look-up value is "+str(look_up), self.L_DEBUG)
				self.log("We have a buffered char of '"+self.current_char+"'", self.L_DEBUG)
				self.buffered = True
					
				self.log_token(self.token_look_up(look_up))
				
				tok.type = int(look_up)
				tok.value = self.token_under_construction
					
				# Return to S0
				self.state = 0
				
				# Reset token
				self.token_under_construction = ""
				
				found = True

			# Syntax Error
			elif ((self.next_state(self.state, self.current_read) == -1) and
					(self.action(self.state, self.current_read) == ERROR) and 
					(self.current_read != 30) and (self.current_read != 31)):
				self.log("Illegal character '"+self.current_char+"'", self.L_ERROR)
				raise SyntaxError("Illegal character '"+self.current_char+"'")

				
		# End while
		return tok
	def to_gforth(self):
		while self.get_next_token():
			self.convert_symbol()
			# constants
			if is_ints(self.cur_token):
				self.push_stack()
			elif is_floats(self.cur_token):
				self.f_exists = True
				self.push_stack()
			elif is_name(self.cur_token):
				self.push_stack()
			elif is_tf(self.cur_token):
				self.push_stack()
			elif is_strings(self.cur_token):
				self.s_exists = True
				temp = 's" ' + self.cur_token.value[1:-1] + '"'
				token = Token(self.cur_token.type, temp, self.cur_token.line)
				self.push_stack(token)
			# type
			elif is_type(self.cur_token):
				self.push_stack()
			# let statment (let (varlist))
			elif is_let(self.cur_token):
				while len(self.stack) > 0 and is_type(self.stack[len(self.stack)-1][0]):
					typ = self.pop_stack()
					var = self.pop_stack()
					self.varlist.update({var.value:typ.value})
			# assign (:= name oper)
			elif is_assign(self.cur_token):
				num = self.pop_stack()
				var = self.pop_stack()
				##DEBUG##
				# print '===========> Varlist: ',
				# print self.varlist.items()
				#########
				if self.varlist.has_key(var.value):
					temp = num.value + ' value ' + var.value
					token = Token('Assign', temp, self.cur_token.line)
					if self.f_exists:
						token.type = 'Assign_f'
					self.push_stack(token)
					self.assign = True
				else:
					self.error('Variable ' + var.value + ' not declared')
			# print statment
			elif is_print(self.cur_token):
				temp = self.pop_stack().value
				if self.f_exists:
					temp += ' f.'
				elif self.s_exists:
					temp += ' type'
				else:
					temp += ' .'
				token = Token('Printstmt', temp, self.cur_token.line)
				self.push_stack(token)
				self.stdout = False
			# if statment
			elif is_if(self.cur_token):
				tmp1 = self.pop_stack()
				tmp2 = self.pop_stack()
				if len(self.stack) > 0:		
					tmp3 = self.pop_stack()
					temp =  tmp3.value + ' if ' + tmp2.value + ' else ' + tmp1.value
				else:
					temp =  tmp2.value + ' if ' + tmp1.value
				temp += ' endif'
				token = Token('Ifstmt', temp, self.cur_token.line)
				self.push_stack(token)
				self.func_flag = True
			# while statment
			elif is_while(self.cur_token):
				tmp = self.pop_stack()
				temp = 'begin ' + tmp.value + ' ' + self.pop_stack().value + ' until'
				token = Token('Whilestmt', temp, self.cur_token.line)
				self.push_stack(token)
				self.func_flag = True
			# negate 
			elif is_negate(self.cur_token):
				oper = self.pop_stack().value
				if self.f_exists:
					temp = oper + ' fnegate'
				else:
					temp = oper + ' negate'
				token = Token('Negateno', temp, self.cur_token.line)
				if self.f_exists:
					token.type = 'Negateno_f'
				self.push_stack(token)
			# arithmetic calculation
			elif is_binops(self.cur_token):
				if self.s_exists and self.cur_token.value == '+':
					binop = 's+'
				elif self.f_exists and is_pmtd(self.cur_token):
					binop = 'f' + self.cur_token.value
				elif self.f_exists and is_power(self.cur_token):
					binop = 'f**'
				elif self.f_exists == False and is_power(self.cur_token):
					binop = 'f**'
					self.int_power = True
					self.f_exists = True
				else:
					binop = self.cur_token.value
				oper1 = self.pop_stack()
				oper2 = self.pop_stack()
				temp = oper2.value + ' ' + oper1.value + ' ' + binop
				if self.int_power == True:
					temp += ' f>s'
				token = Token('Binops', temp, self.cur_token.line)
				if self.f_exists:
					token.type = 'Binops_f'
				self.push_stack(token)
				if self.int_power == True:
					self.int_power = False
					self.f_exists = False
			# not | sin | cos | tan
			elif is_unops_2(self.cur_token):
				if self.f_exists and self.cur_token.value != 'not':
					unop = 'f' + self.cur_token.value
				else:
					unop = self.cur_token.value
				oper = self.pop_stack()
				temp = oper.value + ' ' + unop
				token = Token('Unops', temp, self.cur_token.line)
				if self.f_exists:
					token.type = 'Unops_f'
				self.push_stack(token)
			# other
			elif self.cur_token.type == 'Left_Parenthesis':
				self.scope += 1
			elif self.cur_token.type == 'Right_Parenthesis':
				self.scope -= 1
			else:
				self.error(self.cur_token.value)

		# if len(self.varlist) != 0:
		# 	self.output += self.declare_var()
		if self.func_flag == True:
			self.output += ': func ' 
		if self.assign == True:
			self.stack.reverse()
		while self.stack:
			self.output += self.pop_stack().value + ' '
		if self.func_flag == True:
			self.output += '; func '
Exemplo n.º 12
0
 def next_token(self):
     token = Token()
     self.skip_whitespaces()
     skip_read_char = False
     if self.ch == '+':
         if self.peek_char() == '+':
             self.read_char()
             token.type = TokenType.PLUSPLUS
             token.literal = '++'
         else:
             token.type = TokenType.PLUS
             token.literal = self.ch
     elif self.ch == '-':
         token.type = TokenType.MINUS
         token.literal = self.ch
     elif self.ch == '/':
         if self.peek_char() == '*':
             self.read_char()
             token.type = TokenType.START_COMMENT
             token.literal = '/*'
         else:
             token.type = TokenType.SLASH
             token.literal = self.ch
     elif self.ch == '*':
         if self.peek_char() == '/':
             self.read_char()
             token.type = TokenType.END_COMMENT
             token.literal = '*/'
         else:
             token.type = TokenType.ASTERISK
             token.literal = self.ch
     elif self.ch == '>':
         if self.peek_char() == '=':
             self.read_char()
             token.type = TokenType.GTE
             token.literal = ">="
         else:
             token.type = TokenType.GT
             token.literal = self.ch
     elif self.ch == '<':
         if self.peek_char() == '=':
             self.read_char()
             token.type = TokenType.LTE
             token.literal = "<="
         else:
             token.type = TokenType.LT
             token.literal = self.ch
     elif self.ch == ';':
         token.type = TokenType.SEMICOLON
         token.literal = self.ch
     elif self.ch == '(':
         token.type = TokenType.LPAREN
         token.literal = self.ch
     elif self.ch == ')':
         token.type = TokenType.RPAREN
         token.literal = self.ch
     elif self.ch == '{':
         token.type = TokenType.LBRACE
         token.literal = self.ch
     elif self.ch == '}':
         token.type = TokenType.RBRACE
         token.literal = self.ch
     elif self.ch == ',':
         token.type = TokenType.COMMA
         token.literal = self.ch
     elif self.ch == 0:
         token.type = TokenType.EOF
         token.literal = self.ch
     elif self.ch == '"':
         token.type = TokenType.STRING
         token.literal = self.read_string()
     elif self.ch == '[':
         token.type = TokenType.LBRACKET
         token.literal = self.ch
     elif self.ch == ']':
         token.type = TokenType.RBRACKET
         token.literal = self.ch
     elif self.ch == ':':
         token.type = TokenType.COLON
         token.literal = self.ch
     elif self.ch == '.':
         token.type = TokenType.DOT
         token.literal = self.ch
     elif self.ch == '=':
         if self.peek_char() == '=':
             self.read_char()
             token.type = TokenType.EQ
             token.literal = '=='
         else:
             token.type = TokenType.ASSIGN
             token.literal = self.ch
     elif self.ch == '!':
         if self.peek_char() == '=':
             self.read_char()
             token.type = TokenType.NOT_EQ
             token.literal = '!='
         else:
             token.type = TokenType.BANG
             token.literal = '!'
     else:
         if self.is_letter():
             token.literal = self.read_ident()
             token.type = self.lookup_ident(token.literal)
             skip_read_char = True
         elif self.is_digit():
             token.literal = self.read_number()
             if '.' in token.literal:
                 token.type = TokenType.FLOAT
             else:
                 token.type = TokenType.INT
             skip_read_char = True
         else:
             token.type = TokenType.ILLEGAL
             token.literal = ''
     if not skip_read_char:
         self.read_char()
     return token