Пример #1
0
    def open_lex_parse_compare(self, file_path):
        with open(file_path, 'rb') as f:
            code = f.read().decode("utf-8")

        tokens = lex(code, search_match, TokenExpressions)
        tokens = list(
            filter(lambda token: token.tokentype_ != TokenTypes.NONE, tokens))

        parsed, leftover_token = parse(code, tokens)
        program = Program(loc_={
            'start': {
                'line': 1,
                'index': 0
            },
            "end": {
                "line": tokens[-1].loc_["start"]["line"],
                "index": tokens[-1].loc_["start"]["index"]
            }
        },
                          range_=[0, len(code)],
                          body_=parsed)

        filename, _ = os.path.splitext(os.path.basename(file_path))
        folder_output = root_dir + "/tests/parser_tests/test_output/" + os.path.basename(
            os.path.dirname(file_path))
        folder_required = root_dir + "/tests/parser_tests/required/" + os.path.basename(
            os.path.dirname(file_path))
        output_file = folder_output + "/" + filename + ".json"
        required_file = folder_required + "/" + filename + ".json"

        if not os.path.exists(folder_output): os.makedirs(folder_output)

        with open(output_file, "w") as f:
            f.write(program.jsonify())
        self.compare_files(required_file, output_file)
Пример #2
0
def parse_function_declaration(
        characters: str,
        tokens: List['Token']) -> Tuple['FunctionDeclaration', List['Token']]:
    """ Function tries to parse a function declaration
    
    Note: 
        Follows the following grammar rules:
            1. A function declaration needs to be followed by a TokenType.IDENTIFIER
            2. After the identifier, get function parameters (if there are any)
            3. Finally, parse the function body just like a normal piece of code, look for a 
                TokenType.FUNCTION_DECLARATION_END token to stop parsing at the end of the function
    
    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - A FunctionDeclaration node 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    function_declaration_start, identifier, *tail = tokens
    if identifier.tokentype_ != TokenTypes.IDENTIFIER:
        generate_error_message(
            identifier, characters,
            "Expected identifier after function declaration", True)

    function_parameters, tokens = parse_function_params(characters, tail)
    function_body, tokens = parser.parse(
        characters,
        tokens,
        termination_tokens=[TokenTypes.FUNCTION_DECLARATION_END])
    function_declaration_end, *tail = tokens

    if len(function_body) == 0:
        generate_error_message(identifier, characters,
                               "Function body cannot be empty", True)

    loc_ = {
        "start": function_body[0].loc_["start"],
        "end": function_body[-1].loc_["end"]
    }
    range_ = [function_body[0].range_[0], function_body[-1].range_[1]]
    function_body = BlockStatement(
        loc_=loc_, range_=range_,
        body_=function_body)  # Convert the function body to a blockstatement

    loc_ = {
        "start": function_declaration_start.loc_["start"],
        "end": function_declaration_end.loc_["end"]
    }
    range_ = [
        function_declaration_start.range_[0],
        function_declaration_end.range_[1]
    ]
    node = FunctionDeclaration(loc_=loc_,
                               range_=range_,
                               id_=identifier.value_,
                               params_=function_parameters,
                               body_=function_body)
    return node, tail
Пример #3
0
from misc.token_types import *
from misc.node_types import Program

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("No source file provided")
        exit()

    with open(sys.argv[1], 'rb') as f:
        code = f.read().decode("utf-8")

    tokens = lex(code, search_match, TokenExpressions)
    tokens = list(
        filter(lambda token: token.tokentype_ != TokenTypes.NONE, tokens))

    parsed, leftover_token = parse(code, tokens)
    program = Program(loc_={
        'start': {
            'line': 1,
            'index': 0
        },
        "end": {
            "line": tokens[-1].loc_["start"]["line"],
            "index": tokens[-1].loc_["start"]["index"]
        }
    },
                      range_=[0, len(code)],
                      body_=parsed)

    time_start = time.time()
    result = interpret(code, program)
Пример #4
0
    return interpret_loop(code, program.body_, symbol_table)


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Expected filename")
        exit()

    with open(sys.argv[1], "rb") as f:
        code = f.read().decode("utf-8")

    lexed = lex(code, search_match, TokenExpressions)
    tokens = list(
        filter(lambda token: token.tokentype_ != TokenTypes.NONE, lexed))

    parsed, eof_token = parse(code, tokens)
    program = Program(loc_={
        'start': {
            'line': 1,
            'index': 0
        },
        "end": {
            "line": tokens[-1].loc_["start"]["line"],
            "index": tokens[-1].loc_["start"]["index"]
        }
    },
                      range_=[0, len(code)],
                      body_=parsed)

    with open("ast_to_interpret.json", "wb") as f:
        f.write(program.jsonify().encode("utf-8"))
Пример #5
0
def parse_if_statement(
        characters: str,
        tokens: List['Token']) -> Tuple['IfStatement', List['Token']]:
    """
    Function parses an if statement 
    
    Note: 
        The following grammar rules apply:
            1. An if statement starts with a TokenType.IF token
            2. An if statement, elif statement and else statement must each end with a TokenType.IF_STATEMENT_END 
            3. An if statement must contain a test (binaryexpression)
            4. Parse an if statement body just like normal code
                4a. Look out for a TokenType.IF_STATEMENT_END to stop parsing the function body
            5. If a TokenTypes.ELSE_IF token is found, recurse this function

    Args:
        characters  : The characters that are being lexed, parsed, interpreted
        tokens      : List of tokens that need to be parsed
    
    Returns:
        If no errors occured:
            - An IfStatement node 
            - A list of tokens that still need to be parsed
        If a grammar error occured:
            - Raises a Syntax Error with a message of where the error occured
    """
    valid_termination_characters = [
        TokenTypes.IF_STATEMENT_END, TokenTypes.ELSE, TokenTypes.ELSE_IF
    ]
    if_statement_start, *tail = tokens
    test, tokens = parse_if_statement_test(characters, tail)
    body, tokens = parser.parse(
        characters, tokens, termination_tokens=valid_termination_characters)
    termination_token, *tail = tokens

    if len(body) == 0:
        generate_error_message(termination_token, characters,
                               "If statement body cannot be empty", True)
    if termination_token.tokentype_ not in valid_termination_characters:
        generate_error_message(termination_token, characters,
                               "Expected '¿', '⁈', or '⁇' after if statement",
                               True)

    if termination_token.tokentype_ == TokenTypes.ELSE_IF:
        alternative, tokens = parse_if_statement(characters, tokens)

        loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]}
        range_ = [body[0].range_[0], body[-1].range_[1]]
        consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body)

        loc_ = {
            "start": if_statement_start.loc_["start"],
            "end": alternative.loc_["end"]
        }
        range_ = [if_statement_start.range_[0], alternative.range_[1]]
        return IfStatement(loc_=loc_,
                           range_=range_,
                           test_=test,
                           consequent_=consequent_,
                           alternate_=alternative), tokens
    elif termination_token.tokentype_ == TokenTypes.ELSE:
        head, *tail = tail
        if head.tokentype_ != TokenTypes.INDENTATION:
            generate_error_message(
                head, characters, "Expected '––>' statement after else block",
                True)
        alternative, tokens = parser.parse(
            characters, tail, termination_tokens=[TokenTypes.IF_STATEMENT_END])
        if_statement_end, *tail = tokens

        if if_statement_end.tokentype_ != TokenTypes.IF_STATEMENT_END:
            generate_error_message(if_statement_end, characters,
                                   "Expected '¿' after if statement end", True)
        if len(alternative) == 0:
            generate_error_message(if_statement_end, characters,
                                   "Else statement body cannot be empty", True)

        loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]}
        range_ = [body[0].range_[0], body[-1].range_[1]]
        consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body)

        loc_ = {
            "start": alternative[0].loc_["start"],
            "end": alternative[-1].loc_["end"]
        }
        range_ = [alternative[0].range_[0], alternative[-1].range_[1]]
        alternative = BlockStatement(loc_=loc_,
                                     range_=range_,
                                     body_=alternative)
        loc_ = {
            "start": if_statement_start.loc_["start"],
            "end": if_statement_end.loc_["end"]
        }
        range_ = [if_statement_start.range_[0], if_statement_end.range_[1]]
        return IfStatement(loc_=loc_,
                           range_=range_,
                           test_=test,
                           consequent_=consequent_,
                           alternate_=alternative), tail

    loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]}
    range_ = [body[0].range_[0], body[-1].range_[1]]
    consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body)

    loc_ = {
        "start": if_statement_start.loc_["start"],
        "end": termination_token.loc_["end"]
    }
    range_ = [if_statement_start.range_[0], termination_token.range_[1]]
    return IfStatement(loc_=loc_,
                       range_=range_,
                       test_=test,
                       consequent_=consequent_,
                       alternate_=[]), tail