Exemplo n.º 1
0
class Parser:
    def __init__(self):
        self.tokens = None
        self.current = None
        self.operators_precendence = Operators()

    def next(self):
        self.current = next(self.tokens)

    def parse_number_expression(self):
        """
        numberexpr ::= number
        """
        result = NumberExpression(self.current.value)
        self.next()
        return result

    def parse_parenthesis_expression(self):
        """
        parenexpr ::= '(' expression ')'
        """
        self.next()  # consume '('
        contents = self.parse_expression()
        if self.current != ClosedParenthesisToken():
            raise ParserException("Expected ')', got " + str(self.current))
        self.next()  # consume ')'
        return contents

    def parse_expression(self):
        """
        expression ::= primary binoprhs
        """
        left = self.parse_primary_expression()
        return self.parse_binary_op(left, 0)

    def parse_binary_op(self, left, left_precedence):
        """
        binoprhs ::= (operator primary)*
        """
        while True:
            precedence = self.operators_precendence.get(self.current)
            if precedence < left_precedence:
                return left
            operator = self.current.char
            self.next()
            right = self.parse_primary_expression()
            next_precedence = self.operators_precendence.get(self.current)
            if precedence < next_precedence:
                right = self.parse_binary_op(right, precedence + 1)
            left = BinaryOperatorExpression(operator, left, right)

    def parse_if_expression(self):
        """
        ifexpr ::= 'if' expression 'then' expression 'else' expression
        """
        self.next()
        condition = self.parse_expression()
        if not isinstance(self.current, ThenToken):
            raise ParserException("Expected 'then' after if, got " + str(self.current))
        self.next()
        then_expression = self.parse_expression()
        if not isinstance(self.current, ElseToken):
            raise ParserException("Expected 'else' after if, got " + str(self.current))
        self.next()
        else_expression = self.parse_expression()
        return IfExpression(condition, then_expression, else_expression)

    def parse_for_expression(self):
        """
        forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
        """
        self.next()
        if not isinstance(self.current, IdentifierToken):
            raise ParserException("Expected identifier after 'for', got " + str(self.current))

        loop_variable = self.current.name
        self.next()
        if self.current != AssignToken():
            raise ParserException("Expected '=' after variable in for, got " + str(self.current))
        self.next()
        start = self.parse_expression()

        if self.current != CommaToken():
            raise ParserException("Expected ',' after variable start value in for, got " + str(self.current))
        self.next()

        end = self.parse_expression()

        if self.current == CommaToken():
            self.next()
            step = self.parse_expression()
        else:
            step = None

        if not isinstance(self.current, InToken):
            raise ParserException("Expected 'in' after the loop, got " + str(self.current))
        self.next()

        body = self.parse_expression()
        return ForExpression(loop_variable, start, end, step, body)

    def parse_identifier_expression(self):
        """
        identifierexpr ::= identifier | identifier '(' expression* ')'
        """
        identifier_name = self.current.name
        self.next()
        if self.current != OpenParenthesisToken():
            return VariableExpression(identifier_name)

        self.next()  # consume '('
        arguments = []
        if self.current != ClosedParenthesisToken():
            while True:
                arguments.append(self.parse_expression())
                if self.current == ClosedParenthesisToken():
                    break
                if self.current != CommaToken():
                    raise ParserException("Expected ',' or ')' in the argument list")
                self.next()
        self.next()  # consume ')'
        return FunctionCallExpression(identifier_name, arguments)

    def parse_primary_expression(self):
        """
        primary ::= identifierexpr | numberexpr | parenexpr | ifexpression | forexpression
        """
        if isinstance(self.current, IdentifierToken):
            return self.parse_identifier_expression()
        elif isinstance(self.current, NumberToken):
            return self.parse_number_expression()
        elif isinstance(self.current, IfToken):
            return self.parse_if_expression()
        elif isinstance(self.current, ForToken):
            return self.parse_for_expression()
        elif self.current == OpenParenthesisToken():
            return self.parse_parenthesis_expression()
        else:
            raise ParserException("Unknown token when parsing primary: " + str(self.current))

    def parse_prototype_expression(self):
        """
        prototype ::= id '(' id* ')'
        """
        if not isinstance(self.current, IdentifierToken):
            raise ParserException("Expected function name in prototype")
        function_name = self.current.name
        self.next()
        if self.current != OpenParenthesisToken():
            raise ParserException("Expected '(' in function prototype")
        self.next()
        argument_names = []
        while isinstance(self.current, IdentifierToken):
            argument_names.append(self.current.name)
            self.next()
        if self.current != ClosedParenthesisToken():
            raise ParserException("Expected ')' in function prototype")
        self.next()
        return PrototypeNode(function_name, argument_names)

    def parse_definition(self):
        """
        definition ::= 'def' prototype expression
        """
        self.next()
        prototype = self.parse_prototype_expression()
        expression = self.parse_expression()
        return FunctionNode(prototype, expression)

    def parse_external(self):
        """
        external ::= 'extern' prototype
        """
        self.next()
        return self.parse_prototype_expression()

    def parse_toplevel_expression(self):
        """
        toplevelexpr ::= expression
        """
        expression = self.parse_expression()
        return FunctionNode.create_anonymous(expression)

    def parse(self, string):
        """
        top ::= definition | external | expression | EOF
        """

        lexer = Lexer()
        self.tokens = lexer.tokenize(string)
        self.next()

        if isinstance(self.current, EOFToken):
            pass
        elif isinstance(self.current, DefToken):
            print('Parsed a function definition.')
            return self.parse_definition()
        elif isinstance(self.current, ExternToken):
            print('Parsed an extern.')
            return self.parse_external()
        else:
            print('Parsed a top-level expression.')
            return self.parse_toplevel_expression()

    def _flatten(self, ast):
        if isinstance(ast, NumberExpression):
            return ['Number', ast.value]
        elif isinstance(ast, VariableExpression):
            return ['Variable', ast.name]
        elif isinstance(ast, BinaryOperatorExpression):
            return ['Binop', ast.operator,
                    self._flatten(ast.left), self._flatten(ast.right)]
        elif isinstance(ast, FunctionCallExpression):
            args = [self._flatten(arg) for arg in ast.arguments]
            return ['Call', ast.function, args]
        elif isinstance(ast, PrototypeNode):
            return ['Prototype', ast.name, ' '.join(ast.arguments)]
        elif isinstance(ast, FunctionNode):
            return ['Function',
                    self._flatten(ast.prototype), self._flatten(ast.body)]
        else:
            raise TypeError('unknown type in _flatten: {0}'.format(type(ast)))