Пример #1
0
    def _generic_builder(self, type_1char, type_2chars):
        if self.source.current_char != type_1char.value:
            return None

        self.source.move_to_next_char()

        if self.source.current_char == type_2chars.value[-1]:
            self.source.move_to_next_char()
            return Token(type=type_2chars)
        return Token(type=type_1char)
Пример #2
0
def run(document):
    tokens = lexer.run(document)
    tokens = cut_of_eos(tokens)
    tokens = remove_repeated_eos(tokens)
    tokens.append(Token("\n", "", tokens[-1].line))

    parse_tree = None

    try:
        parse_tree = parser.parse(tokens)
    except InternalError as e:
        print(bcolors.HEADER + "There is a syntax error in your script!" +
              bcolors.ENDC)
        print(str(e.args[0]))

        if DETAILED_ERROR:
            print("\n\n-------Detailed Parse Tree-------")
            try:
                print(e.args[1])
            except:
                pass
        sys.exit(1)

    ast = None
    try:
        ast = abstract_syntax_tree.generate(parse_tree, None)
    except InternalError as e:
        print(
            bcolors.HEADER +
            "There is a parse tree sequence without a corresponding AST rule, this most likely "
            "isn't an error with your script but with the interpreter." +
            bcolors.ENDC)
        try:
            body = ""
            for t in e.args[1].body:
                body += t.__repr__() + " "

            print(e.args[1].head + " -> " + body)
        except:
            pass
        sys.exit(1)

    state = State()

    try:
        ast.generate(state)
    except InternalError as e:
        msg = bcolors.FAIL + str(e.args[0]) + bcolors.ENDC
        print(bcolors.HEADER +
              "An error occurred while executing your script!" + bcolors.ENDC)
        print(msg)

        if DETAILED_ERROR:
            print("\n\n-------Detailed Abstract Syntax Tree-------")
            try:
                print(ast)
            except:
                pass

        sys.exit(1)
Пример #3
0
    def try_to_build_string(self):
        if self.source.current_char != '"':
            return None

        collected_chars = []

        self.source.move_to_next_char()

        while self.source.current_char != '"':
            # string hasn't been finished, ETX appeared
            if self.source.current_char == TokenType.ETX.value:
                self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)

            # if current char is '\'
            if self.source.current_char == '\\':
                self.source.move_to_next_char()

                collected_chars.append({
                    '"': '"',
                    '\\': '\\'
                }.get(self.source.current_char,
                      f'\\{self.source.current_char}'))
            else:
                collected_chars.append(self.source.current_char)
            self.source.move_to_next_char()

        self.source.move_to_next_char()

        # convert to string
        result = ''.join(collected_chars)

        return Token(
            type=TokenType.STRING,
            value=result,
        )
Пример #4
0
 def scan(self):  #
     self.scanTokens()
     self.tokens.append(Token(
         "FIM", '', self.linha))  #Escrever fim de arquivo nos tokens
     self.scanReserved(
     )  # Verifica a tabela de tokens para atualizar o Tipo dos tokens das palavras reservadas
     return self.tokens
Пример #5
0
    def test_etx_on_comment_line(self):
        string = 'not_comment = 1; # a comment'
        lexer = Lexer(StringSource(string))

        tokens = [
            Token(TokenType.ID, 'not_comment'),
            Token(TokenType.ASSIGN),
            Token(TokenType.SCALAR, 1),
            Token(TokenType.SEMI),
            Token(TokenType.ETX)
        ]

        for expected_token in tokens:
            token = lexer.current_token
            self.assertEqual(expected_token.type, token.type)
            self.assertEqual(expected_token.value, token.value)
            lexer.build_next_token()
Пример #6
0
def handle_number(document):
    global pos, cur_line
    total_str = ""
    while pos < len(document) and is_number(
            document[pos]) or document[pos] == ".":
        total_str += document[pos]
        pos += 1

    tokens.append(Token("LITERAL", float(total_str), cur_line))
Пример #7
0
    def try_to_build_neq(self):
        if self.source.current_char != '!':
            return None

        self.source.move_to_next_char()

        if self.source.current_char != '=':
            self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)
        self.source.move_to_next_char()
        return Token(type=TokenType.NEQ)
Пример #8
0
 def try_to_build_single_char_token(self):
     # Handle single-character tokens
     try:
         token_type = TokenType(self.source.current_char)
     except ValueError:
         # No enum member with value equal to self.source.current_char.
         return None
     else:
         # No exception occurred.
         # Return created single-character token.
         self.source.move_to_next_char()
         return Token(type=token_type)
Пример #9
0
def handle_string(document):
    global pos, tokens, cur_line
    pos += 1
    string = ""
    while pos < len(document) and document[pos] != "\"":
        string += document[pos]
        pos += 1

    if pos >= len(document):
        raise Exception("Unclosed string, in line: " + cur_line)

    tokens.append(Token("LITERAL", string, cur_line))
Пример #10
0
    def scanTokens(self):
        while self.atual < len(self.programa):
            self.inicio = self.atual
            char = self.nextChar()

            if char == " " or char == "\t" or char == "\r":
                pass
            elif char == "\n":
                self.linha += 1

            elif char == "(" or char == ")" or char == "{" or char == "}":
                self.tokens.append(
                    Token(
                        self.delimitadoresToken(char),
                        self.programa[self.inicio:self.atual],
                        self.linha,
                    ))

            elif char == "+" or char == "-" or char == "*" or char == "/":
                self.tokens.append(
                    Token(
                        self.opAritmeticaToken(char),
                        self.programa[self.inicio:self.atual],
                        self.linha,
                    ))

            elif char == ":" or char == "=" or char == "!" or char == "<" or char == ">":
                self.tokens.append(
                    Token(
                        self.opBolleanaToken(char),
                        self.programa[self.inicio:self.atual],
                        self.linha,
                    ))

            # Vírgula
            elif char == ",":
                self.tokens.append(
                    Token("COMMA", self.programa[self.inicio:self.atual],
                          self.linha))

            # Números
            elif char >= "0" and char <= "9":
                while self.lookAhead() >= "0" and self.lookAhead() <= "9":
                    self.nextChar()
                self.tokens.append(
                    Token("NUM", self.programa[self.inicio:self.atual],
                          self.linha))

            # Letras / Identificadores / Palavras Reservadas
            elif char.isalpha():
                while self.lookAhead().isalnum():
                    self.nextChar()
                self.tokens.append(
                    Token("ID", self.programa[self.inicio:self.atual],
                          self.linha))

            # Outros
            else:
                print("Caractere inválido na linha ", self.linha)
                exit(2)
Пример #11
0
    def try_to_build_scalar(self):
        if not self.source.current_char.isdigit():
            return None

        collected_chars = []

        # Handle integer part of scalar
        if self.source.current_char == '0':
            collected_chars.append('0')
            self.source.move_to_next_char()
            if self.source.current_char.isdigit():
                self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)
        else:
            while self.source.current_char.isdigit():
                collected_chars.append(self.source.current_char)
                self.source.move_to_next_char()

        # Handle decimal part of scalar
        if self.source.current_char == '.':
            collected_chars.append(self.source.current_char)
            self.source.move_to_next_char()

            if not self.source.current_char.isdigit():
                self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)

            while self.source.current_char.isdigit():
                collected_chars.append(self.source.current_char)
                self.source.move_to_next_char()

            # Handle scientific notation
            if self.source.current_char == 'e' or self.source.current_char == 'E':
                collected_chars.append(self.source.current_char)
                self.source.move_to_next_char()

                if self.source.current_char == '-' or self.source.current_char == '+':
                    collected_chars.append(self.source.current_char)
                    self.source.move_to_next_char()

                if not self.source.current_char.isdigit():
                    self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)

                while self.source.current_char.isdigit():
                    collected_chars.append(self.source.current_char)
                    self.source.move_to_next_char()

        # convert to string
        result = ''.join(collected_chars)

        return Token(
            type=TokenType.SCALAR,
            value=float(result),
        )
Пример #12
0
def run(document):
    global pos, tokens, cur_line

    global pos, cur_line

    buffer = ""

    while pos < len(document):
        if document[pos] in SPECIAL_CHARS:
            if len(buffer) > 0:
                if is_keyword(buffer):
                    tokens.append(Token(buffer, "", cur_line))
                else:
                    tokens.append(Token("SYMBOL", buffer, cur_line))
                buffer = ""
                continue

            if pos + 1 < len(document) and document[pos] + document[
                    pos + 1] in [">=", "==", "<=", "+=", "-="]:
                tokens.append(
                    Token(document[pos] + document[pos + 1], "", cur_line))
                pos += 2
            else:
                if document[pos] == "\"":
                    handle_string(document)
                elif document[pos] != " ":
                    tokens.append(Token(document[pos], "", cur_line))
                pos += 1

        elif is_number(document[pos]) and len(buffer) == 0:
            handle_number(document)
        else:
            buffer += document[pos]
            pos += 1

        if pos < len(document) and document[pos] == "\n":
            cur_line += 1
    return tokens
Пример #13
0
def create_expected_binary_operator(parameters):
    binop = None
    actions = {
        'scalar': lambda value: Scalar(value),
        'op': lambda type: Token(TokenType(type)),
        'id': lambda id: Identifier(id),
        'prev': lambda _: binop
    }

    for param in parameters:
        if param[1][1] == '=':
            binop = Assignment(actions[param[0][0]](param[0][1]),
                               actions[param[2][0]](param[2][1]))
        else:
            binop = BinaryOperator(
                lvalue=actions[param[0][0]](param[0][1]),
                op=actions[param[1][0]](param[1][1]),
                rvalue=actions[param[2][0]](param[2][1])
            )

    return binop
Пример #14
0
    def test_double_char_operators(self):
        string = '<= > <= < < >= = < > >= < <= <= < >= <= >= >= != > >='
        tokens = [
            Token(TokenType.LEQ),
            Token(TokenType.GRE),
            Token(TokenType.LEQ),
            Token(TokenType.LESS),
            Token(TokenType.LESS),
            Token(TokenType.GEQ),
            Token(TokenType.ASSIGN),
            Token(TokenType.LESS),
            Token(TokenType.GRE),
            Token(TokenType.GEQ),
            Token(TokenType.LESS),
            Token(TokenType.LEQ),
            Token(TokenType.LEQ),
            Token(TokenType.LESS),
            Token(TokenType.GEQ),
            Token(TokenType.LEQ),
            Token(TokenType.GEQ),
            Token(TokenType.GEQ),
            Token(TokenType.NEQ),
            Token(TokenType.GRE),
            Token(TokenType.GEQ)
        ]
        lexer = Lexer(StringSource(string))

        for expected_token in tokens:
            token = lexer.current_token
            self.assertEqual(expected_token.type, token.type)
            self.assertEqual(expected_token.value, token.value)
            lexer.build_next_token()
Пример #15
0
    def scanTokens(self):  # procura os tokens
        while (self.atual < len(self.programa)):  #enquanto n chegar no final
            self.inicio = self.atual
            char = self.nextChar()
            if char == ' ' or char == '\t' or char == '\r':
                pass
            elif char == '\n':
                self.linha += 1
            elif char == '(':  # Parentese esquerdo
                self.tokens.append(
                    Token("LBRACK", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == ')':  # Parentese direito
                self.tokens.append(
                    Token("RBRACK", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '{':  # Chaves (Curly Brackets) esquerdo
                self.tokens.append(
                    Token("LCBRACK", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '}':  # Direito
                self.tokens.append(
                    Token("RCBRACK", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '+':  # Soma
                self.tokens.append(
                    Token("SUM", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '-':  # Subtracao
                self.tokens.append(
                    Token("SUB", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '*':  # Multiplicacao
                self.tokens.append(
                    Token("MUL", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '/':  # Divisao
                self.tokens.append(
                    Token("DIV", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == '=':  # Igual ou Atribuicao
                if self.lookAhead() == '=':
                    self.atual += 1
                    self.tokens.append(
                        Token("EQUAL", self.programa[self.inicio:self.atual],
                              self.linha))
                else:
                    self.tokens.append(
                        Token("ATTR", self.programa[self.inicio:self.atual],
                              self.linha))
            elif char == '<':  # Diferente, menor ou igual, menor
                if self.lookAhead() == '>':
                    self.atual += 1
                    self.tokens.append(
                        Token("DIFF", self.programa[self.inicio:self.atual],
                              self.linha))
                elif self.lookAhead() == '=':
                    self.atual += 1
                    self.tokens.append(
                        Token("LESSEQUAL",
                              self.programa[self.inicio:self.atual],
                              self.linha))
                else:
                    self.tokens.append(
                        Token("LESS", self.programa[self.inicio:self.atual],
                              self.linha))
            elif char == '>':  # Maior ou igual, Maior
                if self.lookAhead() == '=':
                    self.atual += 1
                    self.tokens.append(
                        Token("GREATEQUAL",
                              self.programa[self.inicio:self.atual],
                              self.linha))
                else:
                    self.tokens.append(
                        Token("GREAT", self.programa[self.inicio:self.atual],
                              self.linha))
            elif char == ',':  # Virgula
                self.tokens.append(
                    Token("COMMA", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char == ';':  # Ponto e virgula
                self.tokens.append(
                    Token("SEMICOLON", self.programa[self.inicio:self.atual],
                          self.linha))

            elif char >= '0' and char <= '9':  # Numeros
                while (self.lookAhead() >= '0' and self.lookAhead() <= '9'):
                    self.nextChar()
                self.tokens.append(
                    Token("NUMBER", self.programa[self.inicio:self.atual],
                          self.linha))
            elif char.isalpha():  #Letras/Identificadores/Reservadas
                while (self.lookAhead().isalnum()):
                    self.nextChar()
                self.tokens.append(
                    Token("ID", self.programa[self.inicio:self.atual],
                          self.linha))
            else:
                print('Caractere Inválido na linha:', self.linha)
                exit(2)
Пример #16
0
    def test_all_tokens(self):

        tokens = [Token(t) for t in RESERVED_KEYWORDS.values()] + [
            Token(TokenType.ID, 'a'),
            Token(TokenType.ID, 'aaa'),
            Token(TokenType.ID, 'a123'),
            Token(TokenType.ID, 'a_'),
            Token(TokenType.ID, 'a_123'),
            Token(TokenType.ID, 'abc_def_123gh'),
            Token(TokenType.SEMI),
            Token(TokenType.COMMA),
            Token(TokenType.COLON),
            Token(TokenType.PLUS),
            Token(TokenType.MINUS),
            Token(TokenType.MUL),
            Token(TokenType.FLOAT_DIV),
            Token(TokenType.ASSIGN),
            Token(TokenType.LPAREN),
            Token(TokenType.RPAREN),
            Token(TokenType.LBRACK),
            Token(TokenType.RBRACK),
            Token(TokenType.LCURB),
            Token(TokenType.RCURB),
            Token(TokenType.LESS),
            Token(TokenType.GRE),
            Token(TokenType.LEQ),
            Token(TokenType.GEQ),
            Token(TokenType.EQ),
            Token(TokenType.NEQ),
            Token(TokenType.POW),
            Token(TokenType.SCALAR, 0),
            Token(TokenType.SCALAR, 12),
            Token(TokenType.SCALAR, 12.345),
            Token(TokenType.SCALAR, 12.345),
            Token(TokenType.SCALAR, float('12.345e6')),
            Token(TokenType.SCALAR, float('12.345e-6')),
            Token(TokenType.SCALAR, 0),
            Token(TokenType.SCALAR, 0.01),
            Token(TokenType.SCALAR, float('0.001e2')),
            Token(TokenType.SCALAR, float('0.0001e-2')),
            Token(TokenType.ETX)
        ]

        file_source = FileSource('tokens/all_tokens.txt')
        lexer = Lexer(file_source)
        for expected_token in tokens:
            token = lexer.current_token
            self.assertEqual(expected_token.type, token.type)
            self.assertEqual(expected_token.value, token.value)
            lexer.build_next_token()
Пример #17
0
        while len(collected_chars) <= Lexer.MAX_ID_LENGTH and \
                (self.source.current_char.isalnum() or self.source.current_char == '_'):
            collected_chars.append(self.source.current_char)
            self.source.move_to_next_char()

        if len(collected_chars) > Lexer.MAX_ID_LENGTH:
            self.error(error_code=ErrorCode.EXCEED_MAX_ID_SIZE)

        # convert to string
        result = ''.join(collected_chars)

        if not (token_type := RESERVED_KEYWORDS.get(result)):
            token_type = TokenType.ID

        return Token(
            type=token_type,
            value=result,
        )

    def try_to_build_string(self):
        if self.source.current_char != '"':
            return None

        collected_chars = []

        self.source.move_to_next_char()

        while self.source.current_char != '"':
            # string hasn't been finished, ETX appeared
            if self.source.current_char == TokenType.ETX.value:
                self.error(error_code=ErrorCode.TOKEN_BUILD_FAIL)