Exemplo n.º 1
0
    def number(self) -> Token:
        """Return a (multidigit) integer or float consumed from the input.

        Returns:
            Token: a token represeting a number in an expression
        """

        token = Token(type=None,
                      value=None,
                      line=self.t_line,
                      column=self.t_column)

        value = ""
        while self.current_char is not None and self.current_char.isdigit():
            value += self.current_char
            self.advance()

        if self.current_char == ".":
            value += self.current_char
            self.advance()

            while self.current_char is not None and self.current_char.isdigit(
            ):
                value += self.current_char
                self.advance()

            token.type = TokenType.REAL_CONST
            token.value = float(value)
        else:
            token.type = TokenType.INTEGER_CONST
            token.value = int(value)

        return token
Exemplo n.º 2
0
    def _id(self):
        """function get reversed-keywords or multi-char.
        if current char is reversed-kw, get it, or multi-char.

        for example: keywords like 'PROGRAM or VAR or BEGIN ...'
        or 'a, b, i, j ...'
        """
        token = Token(type=None,
                      value=None,
                      lineno=self.lineno,
                      column=self.column)
        result = ''
        # while char is digit or letter
        while self.current_char is not None and self.current_char.isalnum():
            result += self.current_char
            self.advance()
        # if reversed keywords is None, get the current result
        token_type = RESERVED_KEYWORDS.get(result.upper())
        if token_type is None:
            token.type = TokenType.ID
            token.value = result
        else:
            # reserved keyword
            token.type = token_type
            token.value = result.upper()

        return token
Exemplo n.º 3
0
    def handle_with_id_tokens(self) -> Token:
        """Handle identifiers and reserved keyboards.

        Returns:
            Token: the tokens representing a reserved keyword
        """

        token = Token(type=None,
                      value=None,
                      line=self.t_line,
                      column=self.t_column)

        value = ""
        while (self.current_char is not None and self.current_char.isalnum()
               or self.current_char == TokenType.UNDER_SCORE.value):
            value += self.current_char
            self.advance()

        token_type = self.RESERVED_KEYWORDS.get(value.upper())
        if token_type is None:
            token.type = TokenType.ID
            token.value = value
        else:
            token.type = token_type
            token.value = value.upper()

        return token
Exemplo n.º 4
0
    def number(self):
        """function get the multi-number like float or int token.
        for example: '12345' or '32.1213'

        """
        token = Token(type=None,
                      value=None,
                      lineno=self.lineno,
                      column=self.column)

        result = ''
        # while character is digit
        while self.current_char is not None and self.current_char.isdigit():
            result += self.current_char
            self.advance()
        # if character is '.' , means float number
        if self.current_char == '.':
            result += self.current_char
            self.advance()

            while self.current_char is not None and self.current_char.isdigit(
            ):
                result += self.current_char
                self.advance()

            token.type = TokenType.REAL_CONST
            token.value = float(result)
        else:
            token.type = TokenType.INTEGER_CONST
            token.value = int(result)

        return token
Exemplo n.º 5
0
def loads(code):
    s = 0
    i = 0
    length = len(code)
    tokens = []
    spaces = ' \b\f\n\r\t'
    digits = '0123456789'

    is_open = True
    while i < length:
        c = code[i]
        i += 1
        if c in spaces:
            continue
        elif c in ':,[]{}':
            t = Token(Type.auto, c)
            tokens.append(t)
        elif c == '"' and is_open:
            # 吃字符串
            # offset = string_end(code, i)
            result, index = string_end(code, i)
            if index != -1:
                t = Token(Type.string)
                # t.value = code[i:i+offset]
                t.value = result
                # i += offset + 1
                i = index
                tokens.append(t)
                is_open = not is_open
            else:
                return
        elif c == '"' and not is_open:
            is_open = not is_open
            continue
        elif c in digits:
            # 吃数字
            offset = number_end(code, i)
            t = Token(Type.number)
            # todo, 可能是 float, 要判断
            t.value = int(code[i - 1:i + offset])
            i += offset
            tokens.append(t)
        elif c in 'tfn':
            # true false null
            kvs = dict(
                t='true',
                f='false',
                n='null',
            )
            # 要判断是否真的是 true false null
            t = Token(Type.keyword)
            t.value = kvs[c]
            tokens.append(t)
            i += len(kvs[c])
        else:
            print("*** 错误", c, code[i:i + 10])
            return
    return tokens
Exemplo n.º 6
0
def loads(code):
    i = 0
    length = len(code)
    tokens = []
    spaces = ' \b\f\n\r\t'
    digits = '0123456789'

    is_open = True
    while i < length:
        c = code[i]
        i += 1
        if c in spaces:
            continue
        elif c in ':,[]{}':
            t = Token(Type.auto, c)
            tokens.append(t)
        elif c == '"' and is_open:
            result, index = string_end(code, i)
            if index != -1:
                t = Token(Type.string)
                t.value = result
                i = index
                tokens.append(t)
                is_open = not is_open
            else:
                return
        elif c == '"' and not is_open:
            is_open = not is_open
            continue
        elif c in digits:
            offset = number_end(code, i)
            t = Token(Type.number)
            s = code[i - 1:i + offset]
            # 判断是否 float
            if '.' in s:
                t.value = float(s)
            else:
                t.value = int(s)
            i += offset
            tokens.append(t)
        elif c in 'tfn':
            # true false null
            kvs = dict(
                t='true',
                f='false',
                n='null',
            )
            t = Token(Type.keyword)
            t.value = kvs[c]
            tokens.append(t)
            i += len(kvs[c])
        else:
            print("*** 错误", c, code[i:i + 10])
            return
    return tokens
Exemplo n.º 7
0
    def string(self) -> Token:
        """Return a literal string token (STRING_CONST).

        Returns:
            Token: a token representing a literal string.
        """

        token = Token(type=None,
                      value=None,
                      line=self.t_line,
                      column=self.t_column)

        self.advance()

        value = ""
        while (self.current_char is not None and self.current_char.isalpha()
               or self.current_char in self.SINGLE_CHARACTERS):
            value += self.current_char
            self.advance()

            if self.current_char.isspace():
                value += " "
                self.skip_whitespace()

        self.advance()

        token.type = TokenType.STRING_CONST
        token.value = value

        return token
Exemplo n.º 8
0
def json_tokens(code):
    length = len(code)
    tokens = []
    spaces = '\n\t\r'
    digits = '1234567890'
    # 当前下标
    i = 0
    while i < length:
        # 先看看当前应该处理啥
        c = code[i]
        i += 1
        if c in spaces:
            # 空白符号要跳过, space tab return
            continue
        elif c in ':,{}[]':
            # 处理 6 种单个符号
            t = Token(Type.auto, c)
            tokens.append(t)
        elif c == '"':
            # 处理字符串
            s, offset = string_end(code, i)
            i = offset + 1
            # print('i, offset', i, offset, s, code[offset])
            t = Token(Type.string, s)
            tokens.append(t)
        elif c in digits:
            # 处理数字, 现在不支持小数和负数
            end = 0
            for offset, char in enumerate(code[i:]):
                if char not in digits:
                    end = offset
                    break
            n = code[i - 1:i + end]
            i += end
            t = Token(Type.number, n)
            tokens.append(t)
        elif c in 'tfn':
            m = {'t': 'true', 'f': 'false', 'n': 'null'}
            t = Token(Type.keyword)
            t.value = m[c]
            tokens.append(t)
            i += len(m[c])
        else:
            # 出错了
            pass
    return tokens
Exemplo n.º 9
0
	def next_token(self):
		found = False
		tok = Token()
		
		if self.eof():
			tok.value=None
			return tok
		
		while not found:
			if (not self.buffered) or (self.current_char == ' ') or (self.current_char == '\n'):
				self.current_char = self.source_text[self.index]
				self.index += 1
			
			self.log("Current char: "+self.current_char+", EOF Status: "
				+str(self.index == len(self.source_text)), self.L_DEBUG)
			
			self.current_read = self.character_look_up(self.current_char)
				
			# Stats
			cur_stats =  "Current state: "+str(self.state)+", "
			cur_stats += "current_char: "+str(self.current_char)+", "
			cur_stats += "current_read: "+str(self.current_read)+", "
			cur_stats += "token status: "+str(self.token_under_construction)
			self.log(cur_stats, self.L_DEBUG)
			
			# Adding to token
			if ((self.next_state(self.state, self.current_read) != -1) and 
					(self.action(self.state, self.current_read) == CONTINUE)):
				
				self.buffered = False
				self.token_under_construction += self.current_char
				self.state = self.next_state(self.state, self.current_read)
				
			# Halting
			elif ((self.next_state(self.state, self.current_read) == -1) and
					(self.action(self.state, self.current_read) == HALT)):
					
				look_up = self.look_up(self.state, self.current_read)
				self.log("Inside switch with state "+str(self.state), self.L_DEBUG)
				self.log("The look-up value is "+str(look_up), self.L_DEBUG)
				self.log("We have a buffered char of '"+self.current_char+"'", self.L_DEBUG)
				self.buffered = True
					
				self.log_token(self.token_look_up(look_up))
				
				tok.type = int(look_up)
				tok.value = self.token_under_construction
					
				# Return to S0
				self.state = 0
				
				# Reset token
				self.token_under_construction = ""
				
				found = True

			# Syntax Error
			elif ((self.next_state(self.state, self.current_read) == -1) and
					(self.action(self.state, self.current_read) == ERROR) and 
					(self.current_read != 30) and (self.current_read != 31)):
				self.log("Illegal character '"+self.current_char+"'", self.L_ERROR)
				raise SyntaxError("Illegal character '"+self.current_char+"'")

				
		# End while
		return tok