Exemplo n.º 1
0
    def generate_tokens(self):
        tokens = []

        while self.current_token is not None:

            if self.current_token in " \t":
                self.advance()

            elif self.current_token in OPERATORS:
                tokens.append(Token(OPERATORS[self.current_token]))
                if self.current_token in "01)":
                    next_ = self.peek()
                    if next_ is not None and next_ in string.ascii_letters + "!(":
                        tokens.append(Token(TokenType.AND))
                self.advance()

            elif self.current_token in string.ascii_letters:
                next_ = self.peek()
                if next_ is not None and next_ not in "+^>=*)":
                    tokens.append(Token(TokenType.LITERAL, self.current_token))
                    tokens.append(Token(TokenType.AND))
                else:
                    tokens.append(Token(TokenType.LITERAL, self.current_token))
                self.advance()

            else:
                print("ERR")
                return None

        return tokens
Exemplo n.º 2
0
    def nextToken(self):
        global maxLenghtOfIdentifier

        lexeme = self.lexer.token()
        if not lexeme:
            # If no token was returned, EOF reached, break loop.
            token = Token('tc_EOF',('','op_NONE'),'dt_NONE',self.lexer.lineno,0)
            return token

        if lexeme.type == 'tc_ID' and len(lexeme.value) > maxLenghtOfIdentifier:
            TokenCode = 'tc_ID2LONG'
        else:
            TokenCode = lexeme.type

        try:
            opType = lexeme.OpType
        except:
            opType = 'op_NONE'
        DataValue = lexeme.value,opType
        try:
            DataType = lexeme.DataType
        except:
            DataType = 'dt_NONE'

        token = Token(TokenCode, DataValue, DataType,lexeme.lineno,self._find_tok_column(lexeme))
        return token
Exemplo n.º 3
0
    def __init__(self):
        self.ts = {}

        self.ts['if'] = Token(Tag.KW_IF, 'if', 0, 0)
        self.ts['else'] = Token(Tag.KW_ELSE, 'else', 0, 0)
        self.ts['then'] = Token(Tag.KW_THEN, 'then', 0, 0)
        self.ts['print'] = Token(Tag.KW_PRINT, 'print', 0, 0)
Exemplo n.º 4
0
    def Tokenize(self, source):
        tokens = list()
        token = ''
        state = TokenizeState.DEFAULT

        index = 0

        while index < len(source):
            chr = source[index]
            if state == TokenizeState.DEFAULT:
                opType = self.FindOpType(chr)
                if self.IsOp(chr):
                    tokens.append(Token(str(chr), opType))
                elif self.IsParen(chr):
                    parenType = self.FindParenType(chr)
                    tokens.append(Token(str(chr), parenType))
                elif chr.isdigit():
                    token = token + chr
                    state = TokenizeState.NUMBER

            # Handles multi-digit numbers
            elif state == TokenizeState.NUMBER:
                if chr.isdigit():
                    token = token + chr
                else:
                    tokens.append(Token(token, TokenizeState.NUMBER))
                    token = ""
                    state = TokenizeState.DEFAULT
                    index -= 1

            index += 1

        return tokens
Exemplo n.º 5
0
    def punct(self):
        curr_char = self.curr_char()
        self.forward()

        # Try greedily tagging a second character and if no match then discard it
        next_next = self.curr_char()
        if next_next:
            next_next = curr_char + next_next

        tk = Token(next_next, None, self.line, 1)
        if tk.type(
        ) == "IDENTIFIER" or curr_char == '+' or curr_char == '-':  # also prevents - -- and + ++ mixup
            tk = Token(curr_char, None, self.line, 1)
        else:
            self.forward()  # adjust for extra character

        # Addop situations
        if not self.addop_flag and (curr_char == '+' or curr_char == '-'):
            tk = Token(curr_char * 2, None, self.line, 1)
        elif curr_char == ')' or curr_char == ']':
            self.addop_flag = True
        else:  #flag is consumed, whether addop or not because it won't be valid after one token return
            self.addop_flag = False

        return tk
Exemplo n.º 6
0
 def __parse_parentheses(self, par_, buf_):
     buf = self.__parse_buf(buf_)
     if par_ == '(':
         self.tokens.append(Token(TokenType.OpenParentheses))
     else:
         self.tokens.append(Token(TokenType.CloseParentheses))
     return buf
Exemplo n.º 7
0
 def _scan(self) -> None:
     '''Scan a expression and find out operands and operators even without blanks'''
     self.tokens = []
     # index of a number starting at
     tokenStart = 0
     # delete blanks
     withoutBlank = self._sourceStr.replace(' ', '')
     # read the expression
     for index, char in enumerate(withoutBlank):
         # meeting an operator means the end of a number
         if char in Token.operator:
             # negatives
             if char == '-' and index == tokenStart and (
                     index == 0 or self.tokens[-1].isOperator()):
                 continue
             # skip '()'
             if index != tokenStart:
                 # append the operand
                 self.tokens.append(
                     Token(float(withoutBlank[tokenStart:index])))
             # append the operator or '()'
             self.tokens.append(Token(char))
             # update the numberStart's index
             tokenStart = index + 1
         # append the last number
         elif index == len(withoutBlank) - 1:
             self.tokens.append(
                 Token(float(withoutBlank[tokenStart:index + 1])))
Exemplo n.º 8
0
     def get_next_token(self):
        """Lexical analyzer (also known as scanner or tokenizer)
        This method is responsible for breaking a sentence
        apart into tokens.
        """
        while self.current_char is not None:

            if self.current_char.isspace():
                self.skip_whitespace()
                continue

            if self.current_char.isdigit():
                return Token(INTEGER, self.integer())

            if self.current_char == '+':
                self.advance()
                return Token(PLUS, '+')

            if self.current_char == '-':
                self.advance()
                return Token(MINUS, '-')

            self.error()

        return Token(EOF, None)
Exemplo n.º 9
0
    def __init__(self, lexer):
        self.tokens = lexer.tokens
        self.curToken = Token(0, '', '')
        self.peekToken = Token(0, '', '')
        self.tokenIdx = 1
        self.prefix = 'kp'

        self.vt = {}
        self.ft = {}

        self.ifc = 0
        self.whilec = 0

        self.lparen = 0
        self.rparen = 0
        if len(self.tokens) == 1:
            self.curToken = self.tokens[0]
        elif len(self.tokens) > 1:
            self.curToken = self.tokens[0]
            self.peekToken = self.tokens[1]
        else:
            self.error()
        self.tab = '    '
        self.start()
        #self.S(0)
        print ""
Exemplo n.º 10
0
    def cria_Token(self, tipoToken, lexema, linha, coluna):

        # Se é um desses tipos validos, será registrado na tabela de simbolos.
        if tipoToken == self.literal["Inteiro_classe"] or tipoToken == self.literal["char_classe"] or tipoToken == \
                self.literal["cadeia_classe"] or tipoToken == self.literal["variavel_classe"] or tipoToken == \
                self.literal["Flutuante_classe"]:

            # Se o lexema não existir na tabela, insere ele.
            if lexema not in self.sequencia_Simbolos.values():

                # Insere o token na tabela de simbolos, e adiciona no fluxo de tokens
                index = len(self.sequencia_Simbolos)
                self.sequencia_Simbolos[index] = lexema
                token = Token(tipoToken, lexema, linha, coluna, index)
                self.sequencia_Tokens.append(token)
                return
            else:
                # Se o token já existe, então ve como é o indice dele para inserir no fluxo de tokens

                # Obs: Tem que fazer melhor!!!.
                index = [chave for chave in self.sequencia_Simbolos if self.sequencia_Simbolos[chave] == lexema][0]
                token = Token(tipoToken, lexema, linha, coluna, index)
                self.sequencia_Tokens.append(token)
                return
        else:
            # Se o lexema não for de um tipo que requer um "Tipo", então inserir no fluxo de tokens
            token = Token(tipoToken, lexema, linha, coluna)
            self.sequencia_Tokens.append(token)
            return
Exemplo n.º 11
0
    def __simplify(self):
        output = deque()
        for token in self.tokens:
            if len(output) > 0:
                if token.type == TokenType.MinusOperator and output[
                        -1].type == TokenType.MinusOperator:
                    output.pop()
                    output.append(Token(TokenType.PlusOperator))
                    continue
                elif token.type == TokenType.MinusOperator and output[
                        -1].type == TokenType.PlusOperator:
                    output.pop()
                    output.append(Token(TokenType.MinusOperator))
                    continue
                elif token.type == TokenType.PlusOperator and output[
                        -1].type == TokenType.PlusOperator:
                    output.pop()
                    continue
                elif (token.type == TokenType.MulOperator
                      and output[-1].type == TokenType.MulOperator) or (
                          token.type == TokenType.DivOperator
                          and output[-1].type == TokenType.DivOperator):
                    raise ValueError("Invalid expression")

            output.append(token)
        self.tokens = output
Exemplo n.º 12
0
    def make_number(self, pos, line):
        num_str = ''
        dot_count = 0
        cur_pos = 0
        for i in range(pos, len(self.text)):
            if self.text[i] == ".":
                if dot_count == 1:
                    cur_pos = i
                    break
                dot_count += 1
                num_str += '.'
            elif self.text[i] == ' ' or self.text[i] == '\n' or self.text[
                    i] in OPERATOR:
                cur_pos = i
                break
            elif self.text[i].isalpha():
                self.log.addError('1', line)
                self.log.print()
                self.log.panicMode()
            else:
                num_str += self.text[i]
                cur_pos = i

        self.position = cur_pos
        self.flag = True
        if dot_count == 0:
            return Token(T_INT, int(num_str), line, pos, cur_pos, 'num')
        else:
            return Token(T_FLOAT, float(num_str), line, pos, cur_pos, 'num')
Exemplo n.º 13
0
def tokenize(line: str, lineno: int, symtable: SymbolTable):
    current = initialNode
    tokens = []
    value = ''
    for col, char in enumerate(line):
        if current is None:
            lexException(line, lineno, col - 1)
        if current is initialNode and char in WHITE:
            continue
        nxt = current.move(char)
        if nxt is not None:
            value += char
            current = nxt
        elif current.is_final:
            if current is intNode or current is floatNode:
                tokens.append(Token(Sym(NUM, value), lineno, col))
            else:
                symbol = symtable.lookup(value)
                tokens.append(Token(symbol, lineno, col))
            current = initialNode if char in WHITE else initialNode.move(char)
            value = '' if char in WHITE else char
        else:
            lexException(line, lineno, col)
    if current.is_final:
        if current is intNode or current is floatNode:
            tokens.append(Token(Sym(NUM, value), lineno, col))
        else:
            symbol = symtable.lookup(value)
            tokens.append(Token(symbol, lineno, col))
    return tokens
Exemplo n.º 14
0
    def check_content(self, content):
        content_list = Token().analyzeCode(content)
        if len(content_list) == 1 and content_list[0] not in Token(
        ).token_dict['key word']:
            return {'number': content_list[0]}
        else:
            d = {}
            content = ' '.join(content_list)
            if any([i in content for i in [':=', '+', '-', '*', '/', 'div']]):
                try:
                    if ':=' in content:
                        content_before, content_after = content.split(' := ')
                        if len(Token().analyzeCode(content_before)) == 1:
                            d['identifier'] = content_before
                            d['owner word'] = ':='
                        else:
                            return '\nError in operand\n{}\nUnexpected symbol'.format(
                                content_before)
                    else:
                        content_after = content
                except:
                    return '\nError in operand\n{}\nUnexpected symbol'.format(
                        content)

                if len(Token().analyzeCode(content_after)) == 1:
                    d['number'] = content_after
                elif Token().parse_commands(content, 0) == None:
                    d['number'] = content_after
                else:
                    return '\nError in operand\n{}\nUnexpected symbol'.format(
                        content_after)
            else:
                return '\nError in operand\n{}\nUnexpected symbol'.format(
                    content)
            return d
Exemplo n.º 15
0
 def test_map_parselet_with_non_map(self):
     mp = MapParselet()
     with (self.assertRaises(Exception)):
         mp.parse(None, Token(TokenType.MAP, "(a, b, c)"))
         mp.parse(None, Token(TokenType.MAP, "a, b, c"))
         mp.parse(None, Token(TokenType.MAP, "{a, b, c"))
         mp.parse(None, Token(TokenType.MAP, "a, b, c}"))
         mp.parse(None, Token(TokenType.MAP, "<a, b, c>"))
Exemplo n.º 16
0
 def test_set_parselet_with_non_set(self):
     sp = SetParselet()
     with (self.assertRaises(Exception)):
         sp.parse(None, Token(TokenType.SET, "(a, b, c)"))
         sp.parse(None, Token(TokenType.SET, "a, b, c"))
         sp.parse(None, Token(TokenType.SET, "[a, b, c"))
         sp.parse(None, Token(TokenType.SET, "a, b, c]"))
         sp.parse(None, Token(TokenType.SET, "<a, b, c>"))
Exemplo n.º 17
0
 def number(self):
     """recognizes and returns an integer or float token"""
     integer = self.get_integer()
     if self.current_char == '.':
         self.advance()
         floating = float(str(integer) + str(self.get_integer()))
         return Token(self.tokentype['FLOAT'], floating)
     return Token(self.tokentype['INT'], integer)
Exemplo n.º 18
0
def main():
    plus = Token(Token.PLUS, '+')
    one = Token(Token.INT, '1')
    two = Token(Token.INT, '2')
    root = Ast(plus)
    root.add_child(Ast(one))
    root.add_child(Ast(two))
    print("1+2 tree: " + str(root))
Exemplo n.º 19
0
    def get_next_token(self) -> Token:
        """Here the Lexical Analysis will take place, so the sentences will be broken
        one at a time into smaller parts

        Returns:
            Token: the Token object with all informations about the found token

        Raises:
            ValueError: a TokenizeError when the current_token is not found in the grammar
        """

        while self.current_char is not None:
            if self.current_char.isspace():
                self.skip_whitespace()
                continue

            if self.current_char == "{":
                self.advance()
                self.skip_comment()
                continue

            if self.current_char.isalpha():
                return self.handle_with_id_tokens()

            if self.current_char in ["'", '"']:
                return self.string()

            if self.current_char.isdigit():
                return self.number()

            if self.current_char == ":" and self.tokenize_assign_statements(
            ) == "=":
                token = Token(
                    type=TokenType.ASSIGN,
                    value=TokenType.ASSIGN.value,
                    line=self.t_line,
                    column=self.t_column,
                )
                self.advance()
                self.advance()
                return token

            try:
                token_type = TokenType(self.current_char)
            except ValueError:
                TokenizerErrorHandler.error(self.current_char, self.t_line,
                                            self.t_column)
            else:
                token = Token(
                    type=token_type,
                    value=token_type.value,
                    line=self.t_line,
                    column=self.t_column,
                )
                self.advance()
                return token

        return Token(type=TokenType.EOF, value=None)
Exemplo n.º 20
0
def loads(code):
    s = 0
    i = 0
    length = len(code)
    tokens = []
    spaces = ' \b\f\n\r\t'
    digits = '0123456789'

    is_open = True
    while i < length:
        c = code[i]
        i += 1
        if c in spaces:
            continue
        elif c in ':,[]{}':
            t = Token(Type.auto, c)
            tokens.append(t)
        elif c == '"' and is_open:
            # 吃字符串
            # offset = string_end(code, i)
            result, index = string_end(code, i)
            if index != -1:
                t = Token(Type.string)
                # t.value = code[i:i+offset]
                t.value = result
                # i += offset + 1
                i = index
                tokens.append(t)
                is_open = not is_open
            else:
                return
        elif c == '"' and not is_open:
            is_open = not is_open
            continue
        elif c in digits:
            # 吃数字
            offset = number_end(code, i)
            t = Token(Type.number)
            # todo, 可能是 float, 要判断
            t.value = int(code[i - 1:i + offset])
            i += offset
            tokens.append(t)
        elif c in 'tfn':
            # true false null
            kvs = dict(
                t='true',
                f='false',
                n='null',
            )
            # 要判断是否真的是 true false null
            t = Token(Type.keyword)
            t.value = kvs[c]
            tokens.append(t)
            i += len(kvs[c])
        else:
            print("*** 错误", c, code[i:i + 10])
            return
    return tokens
Exemplo n.º 21
0
 def _getNextToken(self):
     self._skipWhiteSpace()
     if self._currentChar.isdigit():
         self._currentToken = Token(self._getInteger())
     elif self._currentChar == Scanner.EOE:
         self._currentToken = Token(';')
     else:
         self._currentToken = Token(self._currentChar)
         self._nextChar()
Exemplo n.º 22
0
def __tokenize_strings(token_strings):
    tokens = []
    for token_string in token_strings:
        if token_string.isdigit():
            tokens.append(Token(token_types.INTEGER, int(token_string)))
        elif token_string == '+':
            tokens.append(Token(token_types.PLUS, token_string))

    return tokens
Exemplo n.º 23
0
 def string(self):
     """recognizes and returns a string token"""
     _string = ''
     while self.current_char != '"':
         _string += self.current_char
         self.advance()
     # return CHARACTER token if length of string is less than 2
     if len(_string) == 1:
         return Token(self.tokentype['CHAR'], _string)
     return Token(self.tokentype['STRING'], _string)
Exemplo n.º 24
0
Arquivo: link.py Projeto: ivenwan/esim
    def checkTag(self):
        r = random.randint(0, 10)
        if r < 6:  # cache hit
            print('%s cache hit' % self.name)
            emitToken = Token(self, 'cacheHit')
        else:  # cache miss
            print('%s cache miss' % self.name)
            emitToken = Token(self, 'cacheMiss')

        tokenPool.insert(emitToken)
Exemplo n.º 25
0
def loads(code):
    i = 0
    length = len(code)
    tokens = []
    spaces = ' \b\f\n\r\t'
    digits = '0123456789'

    is_open = True
    while i < length:
        c = code[i]
        i += 1
        if c in spaces:
            continue
        elif c in ':,[]{}':
            t = Token(Type.auto, c)
            tokens.append(t)
        elif c == '"' and is_open:
            result, index = string_end(code, i)
            if index != -1:
                t = Token(Type.string)
                t.value = result
                i = index
                tokens.append(t)
                is_open = not is_open
            else:
                return
        elif c == '"' and not is_open:
            is_open = not is_open
            continue
        elif c in digits:
            offset = number_end(code, i)
            t = Token(Type.number)
            s = code[i - 1:i + offset]
            # 判断是否 float
            if '.' in s:
                t.value = float(s)
            else:
                t.value = int(s)
            i += offset
            tokens.append(t)
        elif c in 'tfn':
            # true false null
            kvs = dict(
                t='true',
                f='false',
                n='null',
            )
            t = Token(Type.keyword)
            t.value = kvs[c]
            tokens.append(t)
            i += len(kvs[c])
        else:
            print("*** 错误", c, code[i:i + 10])
            return
    return tokens
Exemplo n.º 26
0
    def parse(self, tokenizer, state=None):
        from token import Token

        lookahead = None
        lookaheadstack = []

        statestack = [0]
        symstack = [Token("$end", "$end")]

        current_state = 0
        while True:
            if self.lr_table.default_reductions[current_state]:
                t = self.lr_table.default_reductions[current_state]
                current_state = self._reduce_production(
                    t, symstack, statestack, state)
                continue

            if lookahead is None:
                if lookaheadstack:
                    lookahead = lookaheadstack.pop()
                else:
                    try:
                        lookahead = next(tokenizer)
                    except StopIteration:
                        lookahead = None

                if lookahead is None:
                    lookahead = Token("$end", "$end")

            ltype = lookahead.gettokentype()
            if ltype in self.lr_table.lr_action[current_state]:
                t = self.lr_table.lr_action[current_state][ltype]
                if t > 0:
                    statestack.append(t)
                    current_state = t
                    symstack.append(lookahead)
                    lookahead = None
                    continue
                elif t < 0:
                    current_state = self._reduce_production(
                        t, symstack, statestack, state)
                    continue
                else:
                    n = symstack[-1]
                    return n
            else:
                # TODO: actual error handling here
                if self.error_handler is not None:
                    if state is None:
                        self.error_handler(lookahead)
                    else:
                        self.error_handler(state, lookahead)
                    raise AssertionError("For now, error_handler must raise.")
                else:
                    raise ParsingError(None, lookahead.getsourcepos())
Exemplo n.º 27
0
    def stack_machine_run(self, theard_fl=False, num=-1):
        while self.token_count < len(self.tokens):
            #print_tokens(self.tokens[self.token_count:])
            if self.tokens[self.token_count].get_type(
            ) == 'VAR' or self.tokens[self.token_count].get_type() == 'DIGIT':
                self.stack.append(self.tokens[self.token_count])
            elif self.tokens[self.token_count].get_type() == 'FN_VALUE':
                if theard_fl:
                    out = self.fun_calculate(
                        self.tokens[self.token_count].get_value(),
                        self.tokens[self.token_count + 1], True)
                    self.token_count += 2
                    return 'wait', out
                else:
                    out = self.fun_calculate(
                        self.tokens[self.token_count].get_value(),
                        self.tokens[self.token_count + 1])
                self.stack.append(out)
                self.token_count += 1

            elif self.tokens[self.token_count].get_type() == 'ARI_OP':
                self.stack.append(self.calculate(
                    self.tokens[self.token_count]))
            elif self.tokens[self.token_count].get_type() == 'ASSIGN_OP':
                self.assign_op()
            elif self.tokens[self.token_count].get_type() == 'LOG_OP':
                self.stack.append(self.calculate(
                    self.tokens[self.token_count]))
            elif self.tokens[self.token_count].get_type() == 'GO_F':
                flag = self.stack.pop().get_value()
                if flag == False:
                    self.token_count = self.tokens[
                        self.token_count].get_value()
            elif self.tokens[self.token_count].get_type() == 'GO_A':
                self.token_count = self.tokens[self.token_count].get_value()

            if theard_fl and (self.tokens[self.token_count].get_type() in [
                    'ARI_OP', 'ASSIGN_OP', 'LOG_OP'
            ]) and self.token_count + 2 < len(self.tokens):
                self.token_count += 1
                return 'ready', []

            self.token_count += 1
        if theard_fl:
            if self.tokens[-1].get_type() != 'RETURN':
                return 'exit', self.value_table
            else:
                out = self.stack.pop()
                if out.get_type() == 'VAR':
                    var = self.find_value(out.get_value())
                    return 'exit', Token('DIGIT', var)
                else:
                    return 'exit', Token('DIGIT', out.get_value())

        print(self.value_table)
Exemplo n.º 28
0
    def fill(self):

        while len(self.buffer) - self.typePos < self.width / 2:
            n = self.lib.next()

            if n[-2] == "'" and not self.useApostrophes:
                n = n[:-2]

            for s in list(n):
                self.buffer.append(Token(s))
            self.buffer.append(Token(' ', space=True))
Exemplo n.º 29
0
    def scan_tokens(self):
        while not self.is_at_end():
            # At the beginning of the next lexeme
            self.start = self.current
            self.scan_token()

        # Once all of the token are scanned, append dedents and EOF
        for indent in self.indentation_stack:
            if indent > 0:
                self.tokens.append(Token(TT.DEDENT, '', None, self.line))
        self.tokens.append(Token(TT.EOF, '', None, self.line))
        return self.tokens
Exemplo n.º 30
0
    def get_next_token(self):
        while self.current_char is not None:
            # print(repr(self.current_char))

            if self.current_char.isspace():
                self.skip_whitespace()
                continue

            if self.current_char.isalpha():
                return self._id()

            if self.current_char.isdigit():
                return Token(INTEGER, self.integer())

            if self.current_char == '=':
                self.advance()
                return Token(ASSIGN, '=')

            if self.current_char == ';':
                self.advance()
                return Token(EOL, ';')

            if self.current_char == '+':
                self.advance()
                return Token(PLUS, '+')

            if self.current_char == '-':
                self.advance()
                return Token(MINUS, '-')

            if self.current_char == '*':
                self.advance()
                return Token(MUL, '*')

            if self.current_char == '/':
                self.advance()
                return Token(DIV, '/')

            if self.current_char == '(':
                self.advance()
                return Token(LPAREN, '(')

            if self.current_char == ')':
                self.advance()
                return Token(RPAREN, ')')

            if self.current_char == '"':
                return self.string()

            self.error()

        return Token(EOF, None)