def open_lex_parse_compare(self, file_path): with open(file_path, 'rb') as f: code = f.read().decode("utf-8") tokens = lex(code, search_match, TokenExpressions) tokens = list( filter(lambda token: token.tokentype_ != TokenTypes.NONE, tokens)) parsed, leftover_token = parse(code, tokens) program = Program(loc_={ 'start': { 'line': 1, 'index': 0 }, "end": { "line": tokens[-1].loc_["start"]["line"], "index": tokens[-1].loc_["start"]["index"] } }, range_=[0, len(code)], body_=parsed) filename, _ = os.path.splitext(os.path.basename(file_path)) folder_output = root_dir + "/tests/parser_tests/test_output/" + os.path.basename( os.path.dirname(file_path)) folder_required = root_dir + "/tests/parser_tests/required/" + os.path.basename( os.path.dirname(file_path)) output_file = folder_output + "/" + filename + ".json" required_file = folder_required + "/" + filename + ".json" if not os.path.exists(folder_output): os.makedirs(folder_output) with open(output_file, "w") as f: f.write(program.jsonify()) self.compare_files(required_file, output_file)
def parse_function_declaration( characters: str, tokens: List['Token']) -> Tuple['FunctionDeclaration', List['Token']]: """ Function tries to parse a function declaration Note: Follows the following grammar rules: 1. A function declaration needs to be followed by a TokenType.IDENTIFIER 2. After the identifier, get function parameters (if there are any) 3. Finally, parse the function body just like a normal piece of code, look for a TokenType.FUNCTION_DECLARATION_END token to stop parsing at the end of the function Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - A FunctionDeclaration node - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ function_declaration_start, identifier, *tail = tokens if identifier.tokentype_ != TokenTypes.IDENTIFIER: generate_error_message( identifier, characters, "Expected identifier after function declaration", True) function_parameters, tokens = parse_function_params(characters, tail) function_body, tokens = parser.parse( characters, tokens, termination_tokens=[TokenTypes.FUNCTION_DECLARATION_END]) function_declaration_end, *tail = tokens if len(function_body) == 0: generate_error_message(identifier, characters, "Function body cannot be empty", True) loc_ = { "start": function_body[0].loc_["start"], "end": function_body[-1].loc_["end"] } range_ = [function_body[0].range_[0], function_body[-1].range_[1]] function_body = BlockStatement( loc_=loc_, range_=range_, body_=function_body) # Convert the function body to a blockstatement loc_ = { "start": function_declaration_start.loc_["start"], "end": function_declaration_end.loc_["end"] } range_ = [ function_declaration_start.range_[0], function_declaration_end.range_[1] ] node = FunctionDeclaration(loc_=loc_, range_=range_, id_=identifier.value_, params_=function_parameters, body_=function_body) return node, tail
from misc.token_types import * from misc.node_types import Program if __name__ == "__main__": if len(sys.argv) < 2: print("No source file provided") exit() with open(sys.argv[1], 'rb') as f: code = f.read().decode("utf-8") tokens = lex(code, search_match, TokenExpressions) tokens = list( filter(lambda token: token.tokentype_ != TokenTypes.NONE, tokens)) parsed, leftover_token = parse(code, tokens) program = Program(loc_={ 'start': { 'line': 1, 'index': 0 }, "end": { "line": tokens[-1].loc_["start"]["line"], "index": tokens[-1].loc_["start"]["index"] } }, range_=[0, len(code)], body_=parsed) time_start = time.time() result = interpret(code, program)
return interpret_loop(code, program.body_, symbol_table) if __name__ == '__main__': if len(sys.argv) < 2: print("Expected filename") exit() with open(sys.argv[1], "rb") as f: code = f.read().decode("utf-8") lexed = lex(code, search_match, TokenExpressions) tokens = list( filter(lambda token: token.tokentype_ != TokenTypes.NONE, lexed)) parsed, eof_token = parse(code, tokens) program = Program(loc_={ 'start': { 'line': 1, 'index': 0 }, "end": { "line": tokens[-1].loc_["start"]["line"], "index": tokens[-1].loc_["start"]["index"] } }, range_=[0, len(code)], body_=parsed) with open("ast_to_interpret.json", "wb") as f: f.write(program.jsonify().encode("utf-8"))
def parse_if_statement( characters: str, tokens: List['Token']) -> Tuple['IfStatement', List['Token']]: """ Function parses an if statement Note: The following grammar rules apply: 1. An if statement starts with a TokenType.IF token 2. An if statement, elif statement and else statement must each end with a TokenType.IF_STATEMENT_END 3. An if statement must contain a test (binaryexpression) 4. Parse an if statement body just like normal code 4a. Look out for a TokenType.IF_STATEMENT_END to stop parsing the function body 5. If a TokenTypes.ELSE_IF token is found, recurse this function Args: characters : The characters that are being lexed, parsed, interpreted tokens : List of tokens that need to be parsed Returns: If no errors occured: - An IfStatement node - A list of tokens that still need to be parsed If a grammar error occured: - Raises a Syntax Error with a message of where the error occured """ valid_termination_characters = [ TokenTypes.IF_STATEMENT_END, TokenTypes.ELSE, TokenTypes.ELSE_IF ] if_statement_start, *tail = tokens test, tokens = parse_if_statement_test(characters, tail) body, tokens = parser.parse( characters, tokens, termination_tokens=valid_termination_characters) termination_token, *tail = tokens if len(body) == 0: generate_error_message(termination_token, characters, "If statement body cannot be empty", True) if termination_token.tokentype_ not in valid_termination_characters: generate_error_message(termination_token, characters, "Expected '¿', '⁈', or '⁇' after if statement", True) if termination_token.tokentype_ == TokenTypes.ELSE_IF: alternative, tokens = parse_if_statement(characters, tokens) loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]} range_ = [body[0].range_[0], body[-1].range_[1]] consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body) loc_ = { "start": if_statement_start.loc_["start"], "end": alternative.loc_["end"] } range_ = [if_statement_start.range_[0], alternative.range_[1]] return IfStatement(loc_=loc_, range_=range_, test_=test, consequent_=consequent_, alternate_=alternative), tokens elif termination_token.tokentype_ == TokenTypes.ELSE: head, *tail = tail if head.tokentype_ != TokenTypes.INDENTATION: generate_error_message( head, characters, "Expected '––>' statement after else block", True) alternative, tokens = parser.parse( characters, tail, termination_tokens=[TokenTypes.IF_STATEMENT_END]) if_statement_end, *tail = tokens if if_statement_end.tokentype_ != TokenTypes.IF_STATEMENT_END: generate_error_message(if_statement_end, characters, "Expected '¿' after if statement end", True) if len(alternative) == 0: generate_error_message(if_statement_end, characters, "Else statement body cannot be empty", True) loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]} range_ = [body[0].range_[0], body[-1].range_[1]] consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body) loc_ = { "start": alternative[0].loc_["start"], "end": alternative[-1].loc_["end"] } range_ = [alternative[0].range_[0], alternative[-1].range_[1]] alternative = BlockStatement(loc_=loc_, range_=range_, body_=alternative) loc_ = { "start": if_statement_start.loc_["start"], "end": if_statement_end.loc_["end"] } range_ = [if_statement_start.range_[0], if_statement_end.range_[1]] return IfStatement(loc_=loc_, range_=range_, test_=test, consequent_=consequent_, alternate_=alternative), tail loc_ = {"start": body[0].loc_["start"], "end": body[-1].loc_["end"]} range_ = [body[0].range_[0], body[-1].range_[1]] consequent_ = BlockStatement(loc_=loc_, range_=range_, body_=body) loc_ = { "start": if_statement_start.loc_["start"], "end": termination_token.loc_["end"] } range_ = [if_statement_start.range_[0], termination_token.range_[1]] return IfStatement(loc_=loc_, range_=range_, test_=test, consequent_=consequent_, alternate_=[]), tail