示例#1
0
    def parse_input(self, path: str):
        f = open(path, 'r')
        InputHandler.set_input_text(f.read())
        Logger.info(
            '-------------------------------------------------------------')
        Logger.info(
            '------------------- Starting parsing phase ------------------')
        Logger.info(
            '-------------------------------------------------------------')
        lexer = Lexer()
        Logger.info('* Starting lexing')
        tokens = lexer.lex_input()
        Logger.info('- Lexing DONE')

        Logger.debug('*** Printing lexed tokens: ***')
        for i, t in enumerate(tokens):
            Logger.debug('{token_type}::{value}'.format(
                token_type=t.token_type, value=t.value))
        if len(lexer.lex_errors) > 0:
            for e in lexer.lex_errors:
                Logger.error(e)
            sys.exit(1)

        tr = TokenReader(tokens)
        parser = Parser(tr)
        Logger.info('* Starting parsing')
        ast = parser.parse_spl()
        Logger.info('- Parsing DONE')
        Logger.info('*** Pretty printing AST: ***')
        Logger.info('\n' + ast.indented_print())

        if len(parser.errors) > 0:
            for e in parser.errors:
                Logger.error(e)
            sys.exit(1)
        return ast
示例#2
0
class LexerTests(unittest.TestCase):
    def setUp(self):
        self.lexer = Lexer()

    def expected_token_len(self, tokens, num):
        self.assertEqual(len(tokens), num, f'Token list should contain {num} token(s)')

    def expected_token(self, token: Token, token_type: TokenType, token_value: str):
        self.assertEqual(token.token_type, token_type, f'Token should be type {token_type}')
        self.assertEqual(token.value, token_value, f'Token should have value "{token_value}"')

    def test_lex_empty(self):
        self.lexer.input_text = ''
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 1)
        self.assertEqual(tokens[0].token_type, TokenType.EOF, 'Only token should be EOF token')

    def test_lex_whitespaces(self):
        self.lexer.input_text = '  \t     5          60 '
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 3)
        self.expected_token(tokens[0], TokenType.INT, '5')
        self.expected_token(tokens[1], TokenType.INT, '60')

    def test_lex_comments(self):
        self.lexer.input_text = '// Single line comment \n 1 /* multi line \n still comment \t  ccccc \n */ 2 /* to end'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 3)
        self.expected_token(tokens[0], TokenType.INT, '1')
        self.expected_token(tokens[1], TokenType.INT, '2')

    def test_lex_brackets(self):
        self.lexer.input_text = '[]{}()'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 7)
        self.expected_token(tokens[0], TokenType.BLOCK_OPEN, '[')
        self.expected_token(tokens[1], TokenType.BLOCK_CLOSE, ']')
        self.expected_token(tokens[2], TokenType.CURLY_OPEN, '{')
        self.expected_token(tokens[3], TokenType.CURLY_CLOSE, '}')
        self.expected_token(tokens[4], TokenType.PAREN_OPEN, '(')
        self.expected_token(tokens[5], TokenType.PAREN_CLOSE, ')')

    def test_lex_punctuation(self):
        self.lexer.input_text = '.,;'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 4)
        self.expected_token(tokens[0], TokenType.DOT, '.')
        self.expected_token(tokens[1], TokenType.COMMA, ',')
        self.expected_token(tokens[2], TokenType.SEMICOLON, ';')

    def test_lex_fun_decl(self):
        self.lexer.input_text = '::   ->'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 3)
        self.expected_token(tokens[0], TokenType.DOUBLECOLON, '::')
        self.expected_token(tokens[1], TokenType.ARROW, '->')

    def test_lex_operators(self):
        self.lexer.input_text = ': : - > <+*/==<=!>= > = != ! = &&||%'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 21)
        self.expected_token(tokens[0], TokenType.OPERATOR, ':')
        self.expected_token(tokens[1], TokenType.OPERATOR, ':')
        self.expected_token(tokens[2], TokenType.OPERATOR, '-')
        self.expected_token(tokens[3], TokenType.OPERATOR, '>')
        self.expected_token(tokens[4], TokenType.OPERATOR, '<')
        self.expected_token(tokens[5], TokenType.OPERATOR, '+')
        self.expected_token(tokens[6], TokenType.OPERATOR, '*')
        self.expected_token(tokens[7], TokenType.OPERATOR, '/')
        self.expected_token(tokens[8], TokenType.OPERATOR, '==')
        self.expected_token(tokens[9], TokenType.OPERATOR, '<=')
        self.expected_token(tokens[10], TokenType.OPERATOR, '!')
        self.expected_token(tokens[11], TokenType.OPERATOR, '>=')
        self.expected_token(tokens[12], TokenType.OPERATOR, '>')
        self.expected_token(tokens[13], TokenType.OPERATOR, '=')
        self.expected_token(tokens[14], TokenType.OPERATOR, '!=')
        self.expected_token(tokens[15], TokenType.OPERATOR, '!')
        self.expected_token(tokens[16], TokenType.OPERATOR, '=')
        self.expected_token(tokens[17], TokenType.OPERATOR, '&&')
        self.expected_token(tokens[18], TokenType.OPERATOR, '||')
        self.expected_token(tokens[19], TokenType.OPERATOR, '%')

    def test_lex_char(self):
        self.lexer.input_text = "a'b'c '\n' '\t'"
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 6)
        self.expected_token(tokens[0], TokenType.IDENTIFIER, 'a')
        self.expected_token(tokens[1], TokenType.CHAR, 'b')
        self.expected_token(tokens[2], TokenType.IDENTIFIER, 'c')
        self.expected_token(tokens[3], TokenType.CHAR, '\n')
        self.expected_token(tokens[4], TokenType.CHAR, '\t')