示例#1
0
def interpret_FunctionDeclaration(code: str, node: Node,
                                  symbol_table: SymbolTable):
    if symbol_table_symbol_exists(symbol_table, node.id_):
        generate_error_message(
            node, symbol_table.symbols[node.id_], code,
            "Runtime Error, found duplicate function identifier", True)
    return symbol_table_set(symbol_table, node.id_, node)
示例#2
0
def interpret_Identifier(code: str, node: Identifier,
                         symbol_table: SymbolTable):
    value = symbol_table_get(symbol_table, node.name_)
    if value == None:
        generate_error_message(node, code, f"{node.name_} is not defined",
                               True)
    return symbol_table_add_return_symbol(symbol_table, value)
示例#3
0
def parse_function_call_parameters_loop(
    characters: str,
    tokens : List[Token]
) -> Tuple[List['Node'], List['Token']]:
    """Function is used to parse the parameters when parsing a function call
    
    Note: 
        Follows the following grammar rules:
            1. A function call statement starts with a TokenType.CALL and 
                ends with a TokenType.CALL
            2. A function call parameter must be separated by a '|'
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A list of nodes representing the parameters 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    node, tokens = parse_expr.parse_expression(characters, tokens)
    head, *tail = tokens
    if head.tokentype_ == TokenTypes.CALL: 
        return [node], tokens
    if head.tokentype_ != TokenTypes.SEPARATOR:
        generate_error_message(head, characters, "Missing '|' between multiple parameters", True)

    nodes, tokens = parse_function_call_parameters_loop(characters, tail)
    return [node] + nodes, tokens
示例#4
0
def parse_if_statement_test(
        characters: str,
        tokens: List['Token']) -> Tuple['BinaryExpression', List['Token']]:
    """
    Functies parses a test for an if-statement
    
    Note: 
        The following grammar rules apply:
            1. An if statement must be followed by a binary expression 
            2. The last if statement test must be followed by an '––>'
            3. An '––>' must be followed by a newline

    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A BinaryExpression node 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    test, tokens = parse_expr.parse_expression(characters, tokens)

    head, *tail = tokens
    if head.tokentype_ != TokenTypes.INDENTATION:
        generate_error_message(head, characters,
                               "Expected '––>' after if statement", True)

    head, *tail = tail
    if head.tokentype_ != TokenTypes.NEW_LINE:
        generate_error_message(head, characters,
                               "Expected new line after if statement", True)
    return test, tail
示例#5
0
def parse_function_params(
        characters: str,
        tokens: List['Token']) -> Tuple[List['Node'], List['Token']]:
    """ Function tries to parse function parameters
    
    Note: 
        Follows the following sequence of grammar rules:
            1. A function body starts when the TokenType.INDENTATION is found
                1a. This token must be followed by a TokenType.NEW_LINE
            2. If a newline is found before a TokenType.INDENTATION, raise an exception
            3. Check for a TokenType.SEPARATOR, raise an exception if not found
            4. Check for a TokenType.PARAMETER, raise an exception if not found
            5. Check for an identifier, raise an exception if not found
            6. Gerenare Identifier Node and recurse
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A List of identifier nodes which make up the function parameters 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    head, *tail = tokens
    if head.tokentype_ == TokenTypes.INDENTATION:
        head, *tail = tail
        if head.tokentype_ != TokenTypes.NEW_LINE:
            generate_error_message(
                head, characters,
                "Expected newline '––>' after function declaration", True)
        return [], tail
    if head.tokentype_ == TokenTypes.NEW_LINE:
        generate_error_message(head, characters,
                               "Expected '––>' after function declaration",
                               True)
    if head.tokentype_ != TokenTypes.SEPARATOR:
        generate_error_message(
            head, characters,
            "Expected '|' or '––>' after function parameter declaration", True)

    head, *tail = tail
    if head.tokentype_ != TokenTypes.PARAMETER:
        generate_error_message(
            head, characters,
            "Expected 'parameter declaration' after function separator", True)
    head, *tail = tail
    if head.tokentype_ != TokenTypes.IDENTIFIER:
        generate_error_message(
            head, characters,
            "Expected 'identifier' after function parameter declaration", True)

    param = Identifier(loc_=head.loc_, range_=head.range_, name_=head.value_)
    params, tokens = parse_function_params(characters, tail)
    return [param] + params, tokens
示例#6
0
def parse_operand(
        characters: str,
        tokens: List['Token']) -> Optional[Tuple['Node', List['Token']]]:
    """
    Function parses an operand for an expression
    
    Rules:
        - If the operand is either a TokenTypes.PLUS or TokenTypes.MINUS, a UnaryExpression node is created
        - If the operand is either a TokenTypes.INT or TokenTypes.Float, a Literal node is created
        - If the operand is a TokenTypes.CALL, a FunctionDeclaration node is created
        - If the operand is a TokenTypes.IDENTIFIER, an Identifier node is created
        - If the operand is a TokenTypes.LEFT_PARENTHESIES, a new BinaryExpression node is created
            - After the creation of a new BinaryExpression node, a TokenTypes.RIGHT_PARENTHESIES is required 
    
    Args: 
        tokens: The tokens that need to be parsed
        
    Returns:
        If no error occurs:
            - A Node containing the found operand 
            - The leftover tokens that stil need to be parsed
        If no operand was found:
            returns None
    """
    head, *tail = tokens

    if head.tokentype_ in (TokenTypes.PLUS, TokenTypes.MINUS):
        node, tail = parse_operand(characters, tail)
        loc_ = {"start": head.loc_["start"], "end": node.loc_["end"]}
        range_ = [head.range_[0], node.range_[1]]
        return UnaryExpression(loc_=loc_,
                               range_=range_,
                               operator_=head.tokentype_,
                               argument_=node), tail
    if head.tokentype_ in (TokenTypes.INT, TokenTypes.FLOAT):
        return Literal(loc_=head.loc_,
                       range_=head.range_,
                       value_=int(head.value_),
                       raw_=head.value_), tail
    if head.tokentype_ == TokenTypes.CALL:
        result, tokens = pass_func.parse_function_call(
            characters, tokens)  #TODO: characters meegeven
        return result, tokens
    if head.tokentype_ == TokenTypes.IDENTIFIER:
        return Identifier(loc_=head.loc_,
                          range_=head.range_,
                          name_=head.value_), tail
    if head.tokentype_ == TokenTypes.LEFT_PARENTHESIES:
        node, tokens = parse_expression(characters, tail)
        head, *tail = tokens
        if head.tokentype_ != TokenTypes.RIGHT_PARENTHESIES:
            generate_error_message(head, characters,
                                   "Missing right parenthesies", True)
        return node, tail
    generate_error_message(head, characters,
                           "Expected expression, literal, or function call",
                           True)
示例#7
0
def parse_variable_declaration(
        characters: str,
        tokens: List['Token']) -> Tuple['VariableDeclaration', List['Token']]:
    """Function parses a variable declaration
    
    Rules:
        A variable declaration is valid in the following sequence:
        1. TokenTypes.VARIABLE_DECLARATION
        2. TokenTypes.IDENTIFIER
        3. TokenTypes.IS
        4. Any Token that indicates an expression:
            - TokenTypes.CALL
            - TokenTypes.IDENTIFIER
            - TokenTypes.MINUS
            - TokenTypes.PLUS, 
            - TokenTypes.INT
            - TokenTypes.FLOAT
            - TokenTypes.LEFT_PARENTHESIES  
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            Returns a tuple with a variable declaration and a list of tokens that still need to be parsed
        If a grammar error occured:
            Raises a Syntax Error with a message of where the error occured
    """
    variable_declaration, identifier, *tail = tokens
    if identifier.tokentype_ != TokenTypes.IDENTIFIER:
        return generate_error_message(
            identifier, characters,
            "Expected identifier after variable declaration", True)

    head, *tail = tail
    if head.tokentype_ != TokenTypes.IS:
        return generate_error_message(head, characters, "Expected '='", True)

    head, *tail = tail
    node, tokens = parse_expr.parse_expression(characters, [head] + tail)

    loc_ = {
        "start": variable_declaration.loc_["start"],
        "end": node.loc_["end"]
    }
    range_ = [variable_declaration.range_[0], node.range_[1]]
    node = VariableDeclaration(loc_=loc_,
                               range_=range_,
                               id_=identifier.value_,
                               init_=node)
    return node, tokens
示例#8
0
def parse(
    characters: str, 
    tokens: List['Token'], 
    termination_tokens: List['TokenTypes']=[], 
) -> List['Node']:
    """Function creates an AST from the provided tokens. It raises error 
    messages when it encounters illegal grammar
    
    Args:
        characters          : Characters that are being lexed, parsed and interpreted
        tokens              : List of tokens to create an AST from
        termination_tokens  : A List of termination tokens. If the parser encounters one of these tokens OR an EOF token, stop parsing

    Returns:
        If no errors occured:
            - An AST in the form of a program node
            - An EOF Token
        If a grammar error occured:
            Raises a Syntax Error with a message of where the error occured
    """
    if len(tokens) == 0: return [], []
    
    head, *tail = tokens
    if head.tokentype_ == TokenTypes.IF: 
        breakpoint
    if   head.tokentype_ in (TokenTypes.EOF, *termination_tokens) : return [], tokens
    elif head.tokentype_ in (TokenTypes.NEW_LINE, TokenTypes.TAB) : return parse(characters, tail, termination_tokens)
    elif head.tokentype_ == TokenTypes.VARIABLE_DECLARATION       : node, tokens = parse_var_decl.parse_variable_declaration(characters, tokens)
    elif head.tokentype_ == TokenTypes.FUNCTION_DECLARATION       : node, tokens = parse_func_decl.parse_function_declaration(characters, tokens)
    elif head.tokentype_ == TokenTypes.IF                         : node, tokens = parse_if_stmt.parse_if_statement(characters, tokens)
    elif head.tokentype_ == TokenTypes.RETURN                     : node, tokens = parse_func_decl.parse_return_statement(characters, tokens)
    elif head.tokentype_ == TokenTypes.CALL                       : node, tokens = parse_func_call.parse_function_call(characters, tokens)
    else                                                          : return generate_error_message(head, characters, "Invalid Syntax", True)
    nodes, tokens = parse(characters, tokens, termination_tokens)
    return [node] + nodes, tokens
示例#9
0
def parse_return_statement(
        characters: str,
        tokens: List['Token']) -> Tuple['ReturnStatement', List['Token']]:
    """Function tries to parse a return statement 
    
    Note: 
        Follows the following grammar rules:
            1. A returnstatement must be followed by an expression
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A ReturnStatement node 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    return_statement_start, head, *tail = tokens

    node, tokens = parse_expr.parse_expression(characters, [head] + tail)

    return_statement_end, *tail = tokens
    if return_statement_end.tokentype_ != TokenTypes.RETURN:
        generate_error_message(return_statement_end, characters,
                               "Expected closing '⮐' after return statement",
                               True)

    loc_ = {
        "start": return_statement_start.loc_["start"],
        "end": return_statement_end.loc_["end"]
    }
    range_ = [return_statement_start.range_[0], return_statement_end.range_[1]]
    node = ReturnStatement(loc_=loc_, range_=range_, argument_=node)
    return node, tail
示例#10
0
def parse_function_call(
    characters: str, 
    tokens: List['Token']
) -> Tuple['CallExpression', List['Token']]:
    """Function tries to parse a function call statement
    
    Note: 
        Follows the following grammar rules:
            1. A function call statement starts with a TokenType.CALL and 
                ends with a TokenType.CALL
            2. A function call parameter must be separated by a '|'
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A CallExpression node representing the function call
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    call_start, identifier, *tail = tokens
    if identifier.tokentype_ not in (TokenTypes.PRINT, TokenTypes.IDENTIFIER):
        generate_error_message(identifier, characters, "Expected identifier after call statement", True)
    
    callee              = Identifier(loc_=identifier.loc_, range_=identifier.range_, name_=identifier.value_)
    arguments, tokens   = parse_function_call_parameters(characters, tail)
    
    call_end, *tail = tokens
    
    loc_ = {"start": call_start.loc_["start"], "end": call_end.loc_["end"]}
    range_   = [call_start.range_[0], call_end.range_[1]]
    node                = CallExpression(loc_=loc_, range_=range_, arguments_=arguments, callee_=callee)
    return node, tail
示例#11
0
def parse_function_declaration(
        characters: str,
        tokens: List['Token']) -> Tuple['FunctionDeclaration', List['Token']]:
    """ Function tries to parse a function declaration
    
    Note: 
        Follows the following grammar rules:
            1. A function declaration needs to be followed by a TokenType.IDENTIFIER
            2. After the identifier, get function parameters (if there are any)
            3. Finally, parse the function body just like a normal piece of code, look for a 
                TokenType.FUNCTION_DECLARATION_END token to stop parsing at the end of the function
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A FunctionDeclaration node 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    function_declaration_start, identifier, *tail = tokens
    if identifier.tokentype_ != TokenTypes.IDENTIFIER:
        generate_error_message(
            identifier, characters,
            "Expected identifier after function declaration", True)

    function_parameters, tokens = parse_function_params(characters, tail)
    function_body, tokens = parser.parse(
        characters,
        tokens,
        termination_tokens=[TokenTypes.FUNCTION_DECLARATION_END])
    function_declaration_end, *tail = tokens

    if len(function_body) == 0:
        generate_error_message(identifier, characters,
                               "Function body cannot be empty", True)

    loc_ = {
        "start": function_body[0].loc_["start"],
        "end": function_body[-1].loc_["end"]
    }
    range_ = [function_body[0].range_[0], function_body[-1].range_[1]]
    function_body = BlockStatement(
        loc_=loc_, range_=range_,
        body_=function_body)  # Convert the function body to a blockstatement

    loc_ = {
        "start": function_declaration_start.loc_["start"],
        "end": function_declaration_end.loc_["end"]
    }
    range_ = [
        function_declaration_start.range_[0],
        function_declaration_end.range_[1]
    ]
    node = FunctionDeclaration(loc_=loc_,
                               range_=range_,
                               id_=identifier.value_,
                               params_=function_parameters,
                               body_=function_body)
    return node, tail
示例#12
0
def lex(characters: str,
        search_match_f: Callable[[str, List[Tuple[str, TokenTypes]], int],
                                 Optional[Tuple[re.match, str]]],
        token_expressions: List[Tuple[str, TokenTypes]],
        line_no: int = 1,
        index: int = 0,
        total_index: int = 0) -> List[Token]:
    """Function converts the provided characters into tokens. Uses a provided function to search for matches in characters

    Args:
        characters          : The characters that need to be lexed.
        search_match_f      : A function to match the provided characters with the provided tokens
        token_expressions   : Try to find a match from this index in characters. Can be seen as characters[total_index:].
        line_no             : The current line number that is being lexed, default=1.
        index               : The current line index that is being lexed, default=0.
        total_index         : The total index in characters that is being lexed, default=0

    Returns:
        If no errors occured:
            Returns a list of lexed tokens
        If no match was found:
            Raises a Syntax Error with a message of where the error occured
    """
    if len(
            characters
    ) == total_index:  # If the end characters has been reached return an EOF token
        return [
            Token(loc_={
                "start": {
                    "line": line_no,
                    "index": index
                },
                "end": {
                    "line": line_no,
                    "index": index + 3
                }
            },
                  range_=[total_index, total_index + 3],
                  value_="\00",
                  tokentype_=TokenTypes.EOF)
        ]

    match, tokentype = search_match_f(characters, token_expressions,
                                      total_index)
    if not match:
        generate_error_message(line_no, index, characters, "Invalid Syntax",
                               True)

    matched_text = match.group(0)
    offset = match.end(0) - match.start(0)
    token_location = {
        "start": {
            "line": line_no,
            "index": index
        },
        "end": {
            "line": line_no,
            "index": index + offset
        }
    }
    token_range = [match.start(0), match.end(0)]

    token = Token(loc_=token_location,
                  range_=token_range,
                  value_=matched_text,
                  tokentype_=tokentype)

    if tokentype == TokenTypes.NEW_LINE:
        line_no += 1
        index = 0
        offset = 0
    return [token] + lex(characters, search_match_f, token_expressions,
                         line_no, index + offset, match.end(0))
示例#13
0
def parse_if_statement(
        characters: str,
        tokens: List['Token']) -> Tuple['IfStatement', List['Token']]:
    """
    Function parses an if statement 
    
    Note: 
        The following grammar rules apply:
            1. An if statement starts with a TokenType.IF token
            2. An if statement, elif statement and else statement must each end with a TokenType.IF_STATEMENT_END 
            3. An if statement must contain a test (binaryexpression)
            4. Parse an if statement body just like normal code
                4a. Look out for a TokenType.IF_STATEMENT_END to stop parsing the function body
            5. If a TokenTypes.ELSE_IF token is found, recurse this function

    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - An IfStatement node 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    valid_termination_characters = [
        TokenTypes.IF_STATEMENT_END, TokenTypes.ELSE, TokenTypes.ELSE_IF
    ]
    if_statement_start, *tail = tokens
    test, tokens = parse_if_statement_test(characters, tail)
    body, tokens = parser.parse(
        characters, tokens, termination_tokens=valid_termination_characters)
    termination_token, *tail = tokens

    if len(body) == 0:
        generate_error_message(termination_token, characters,
                               "If statement body cannot be empty", True)
    if termination_token.tokentype_ not in valid_termination_characters:
        generate_error_message(termination_token, characters,
                               "Expected '¿', '⁈', or '⁇' after if statement",
                               True)

    if termination_token.tokentype_ == TokenTypes.ELSE_IF:
        alternative, tokens = parse_if_statement(characters, tokens)

        loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]}
        range_ = [body[0].range_[0], body[-1].range_[1]]
        consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body)

        loc_ = {
            "start": if_statement_start.loc_["start"],
            "end": alternative.loc_["end"]
        }
        range_ = [if_statement_start.range_[0], alternative.range_[1]]
        return IfStatement(loc_=loc_,
                           range_=range_,
                           test_=test,
                           consequent_=consequent_,
                           alternate_=alternative), tokens
    elif termination_token.tokentype_ == TokenTypes.ELSE:
        head, *tail = tail
        if head.tokentype_ != TokenTypes.INDENTATION:
            generate_error_message(
                head, characters, "Expected '––>' statement after else block",
                True)
        alternative, tokens = parser.parse(
            characters, tail, termination_tokens=[TokenTypes.IF_STATEMENT_END])
        if_statement_end, *tail = tokens

        if if_statement_end.tokentype_ != TokenTypes.IF_STATEMENT_END:
            generate_error_message(if_statement_end, characters,
                                   "Expected '¿' after if statement end", True)
        if len(alternative) == 0:
            generate_error_message(if_statement_end, characters,
                                   "Else statement body cannot be empty", True)

        loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]}
        range_ = [body[0].range_[0], body[-1].range_[1]]
        consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body)

        loc_ = {
            "start": alternative[0].loc_["start"],
            "end": alternative[-1].loc_["end"]
        }
        range_ = [alternative[0].range_[0], alternative[-1].range_[1]]
        alternative = BlockStatement(loc_=loc_,
                                     range_=range_,
                                     body_=alternative)
        loc_ = {
            "start": if_statement_start.loc_["start"],
            "end": if_statement_end.loc_["end"]
        }
        range_ = [if_statement_start.range_[0], if_statement_end.range_[1]]
        return IfStatement(loc_=loc_,
                           range_=range_,
                           test_=test,
                           consequent_=consequent_,
                           alternate_=alternative), tail

    loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]}
    range_ = [body[0].range_[0], body[-1].range_[1]]
    consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body)

    loc_ = {
        "start": if_statement_start.loc_["start"],
        "end": termination_token.loc_["end"]
    }
    range_ = [if_statement_start.range_[0], termination_token.range_[1]]
    return IfStatement(loc_=loc_,
                       range_=range_,
                       test_=test,
                       consequent_=consequent_,
                       alternate_=[]), tail