def basic_statement(token): """ Implements recursive descent for the rule: <basic_statement> ==> 19 <return_statement> | 20 <if_statement> | 21 <while_statement> | 22 <declaration_statement> | 23 <assignment_or_function_call> """ # Print the statement as a comment CG.code_gen_comment(Parser.file_reader.current_line.strip()) if token.t_type == TokenType.KeywordReturn: print(19, end=" ") Parser.return_statement(token) elif token.t_type == TokenType.KeywordIf: print(20, end=" ") Parser.if_statement(token) elif token.t_type == TokenType.KeywordWhile: print(21, end=" ") Parser.while_statement(token) elif token.t_type == TokenType.KeywordVar: print(22, end=" ") Parser.declaration_statement(token) elif token.t_type == TokenType.Identifier: print(23, end=" ") Parser.assignment_or_function_call(token) else: Parser.raise_production_not_found_error(token, 'basic_statement')
def literal(token): """ Implements recursive descent for the rule: <literal> ==> 57 TokenType.Float | 58 TokenType.Integer | 59 TokenType.Char | 60 TokenType.String :return: an ExpressionRecord that holds the literal """ er_literal = None if token.t_type == TokenType.Float: print(57, end=" ") er_literal = CG.create_literal(DataTypes.FLOAT, float(token.lexeme)) Parser.match(token, TokenType.Float) elif token.t_type == TokenType.Integer: print(58, end=" ") er_literal = CG.create_literal(DataTypes.INT, int(token.lexeme)) Parser.match(token, TokenType.Integer) elif token.t_type == TokenType.String: print(59, end=" ") er_literal = CG.create_literal(DataTypes.STRING, token.lexeme) Parser.match(token, TokenType.String) elif token.t_type == TokenType.Char: print(60, end=" ") er_literal = CG.create_literal(DataTypes.CHAR, token.lexeme) Parser.match(token, TokenType.Char) return er_literal
def while_statement(token): """ Implements recursive descent for the rule: <while_statement> ==> 34 TokenType.KeywordWhile TokenType.OpenParen <expression> TokenType.CloseParen <code_block> """ if token.t_type == TokenType.KeywordWhile: print(34, end=" ") before_while_lbl, after_while_lbl = CG.gen_label("while") # Write label for beginning of while loop CG.code_gen_label(before_while_lbl) Parser.match(token, TokenType.KeywordWhile) Parser.match(token, TokenType.OpenParen) er_condition = Parser.expression(token) Parser.match(token, TokenType.CloseParen) # Perform the test CG.code_gen_if(er_condition, after_while_lbl) # Write the contents of the loop Parser.code_block(token) # Branch back to the test again CG.code_gen("b", before_while_lbl) # Write label for end of while loop, to pick up when the test fails CG.code_gen_label(after_while_lbl) else: Parser.raise_production_not_found_error(token, 'while_statement')
def return_statement(token): """ Implements recursive descent for the rule: <return_statement> ==> 30 TokenType.KeywordReturn TokenType.Semicolon """ if token.t_type == TokenType.KeywordReturn: print(30, end="") Parser.match(token, TokenType.KeywordReturn) Parser.match(token, TokenType.Semicolon) CG.code_gen("jr", "$ra") else: Parser.raise_production_not_found_error(token, 'return_statement')
def program(token): """ Implements recursive descent for the rule: <program> ==> 1 {<func_decl_or_proto>} | """ CG.write_prolog() CG.write_epilogue() print(1, end=" ") if token.t_type in (TokenType.KeywordFunc, TokenType.KeywordProto): while token.t_type in (TokenType.KeywordFunc, TokenType.KeywordProto): Parser.func_decl_or_proto(token)
def declaration_statement(token): """ Implements recursive descent for the rule: <declaration_statement> ==> 33 TokenType.KeywordVar TokenType.Identifier <datatype> TokenType.Semicolon """ if token.t_type == TokenType.KeywordVar: print(33, end=" ") Parser.match(token, TokenType.KeywordVar) # get the param's identifier and datatype identifier = token.lexeme Parser.match(token, TokenType.Identifier) datatype, size = Parser.datatype(token) # check that the identifier hasn't already been declared Parser.error_on_variable_usage(identifier, True) # reserve space on the stack for the variable var_er = CG.declare_variable(datatype, identifier, size) # insert the identifier into the symbol table Parser.s_table.insert(identifier, var_er) Parser.match(token, TokenType.Semicolon) else: Parser.raise_production_not_found_error(token, 'declaration_statement')
def call_function(func_identifier, func_signature, er_params): """ Handles built-in and user-defined function calls: checks that the callee is a function, checks that the parameters are of the right types, and tells the code generator to generate the function call. :param func_identifier: The id of the function :param func_signature: The FunctionSignature associated with the id :param er_params: A list of ExpressionRecords that hold the parameters :return: An ExpressionRecord that holds the function's return value """ # Handle built-in functions here: if func_identifier in CG.BUILT_IN_FUNCTIONS.keys(): function, datatype = CG.BUILT_IN_FUNCTIONS[func_identifier] return function(datatype, er_params) # Input validation: verify that er_list types match the # function signature, and verify that the identifier is for a # function if not isinstance(func_signature, FunctionSignature): raise SemanticError( "Tried to call %s(), but it wasn't a " "function" % func_identifier, Parser.file_reader.get_line_data()) for i in range(len(func_signature.param_list_types)): expect_type = func_signature.param_list_types[i] if expect_type != er_params[i].data_type: raise SemanticError( "Parameter for %s in position %d " "has the wrong type: expected %s" % (func_identifier, i, expect_type), Parser.file_reader) return CG.call_function(func_signature, er_params)
def parse(filename, asm_output_filename): """ Uses recursive descent to parse an input file, printing a list of productions as it goes. Opens the input file, and calls 'program()', which begins recursive descent until an EndOfFile token is reached. If no errors occur, it prints "Success!!!" :param filename: The name of the file to parse. :return: True if compiled successfully; else False """ with FileReader(filename) as fr: with open(asm_output_filename, 'w') as file_out: CG.init(file_out, fr) Parser.file_reader = fr Parser.s_table = SymbolTable() current_token = Scanner.get_token(Parser.file_reader) Parser.program(current_token) Parser.match(current_token, TokenType.EndOfFile) # Search for main in open symbol table: # if not found, compilation has failed if not Parser.s_table.find("main"): print("No main function found in program; point of entry " "required") CG.is_code_ok = False # Search for FunctionSignatures in the symbol table, # and check that each has been defined; if not, compilation # has failed undefined_proto_ids = Parser.s_table.get_undefined_prototypes() for func_id in undefined_proto_ids: print("Function %s was forward declared, but was never " "defined." % func_id) CG.is_code_ok = False if CG.is_code_ok: print("\nSuccessfully compiled %s\n" % filename) return True else: print("\nCompilation of %s completed unsuccessfully.\n" % filename) # os.remove(asm_output_filename) return False
def relfactor(token): """ Implements recursive descent for the rule: <relfactor> ==> 40 <factor> [TokenType.RelationalOperator <factor>] :return: an ExpressionRecord that holds the result of the relfactor """ if token.t_type in (TokenType.OpenParen, TokenType.Identifier, TokenType.Float, TokenType.Integer, TokenType.String, TokenType.Char): print(40, end=" ") er_lhs = Parser.factor(token) if token.t_type == TokenType.RelationalOperator: operator = token.lexeme Parser.match(token, TokenType.RelationalOperator) er_rhs = Parser.factor(token) return CG.gen_rel_expression(er_lhs, er_rhs, operator) return er_lhs else: Parser.raise_production_not_found_error(token, 'relfactor')
def term(token): """ Implements recursive descent for the rule: <term> ==> 39 <relfactor> { TokenType.MulDivModOperator <relfactor> } :return: an ExpressionRecord that holds the result of the term """ if token.t_type in (TokenType.OpenParen, TokenType.Identifier, TokenType.Float, TokenType.Integer, TokenType.String, TokenType.Char): print(39, end=" ") er_lhs = Parser.relfactor(token) while token.t_type == TokenType.MulDivModOperator: operator = token.lexeme Parser.match(token, TokenType.MulDivModOperator) er_rhs = Parser.relfactor(token) er_lhs = CG.gen_expression(er_lhs, er_rhs, operator) return er_lhs else: Parser.raise_production_not_found_error(token, 'term')
def function_prototype(token): """ Implements recursive descent for the rule: <function_prototype> ==> 4 TokenType.KeywordProto TokenType.Identifier TokenType.OpenParen <param_list> TokenType.CloseParen <return_identifier> <return_datatype> TokenType.Semicolon """ if token.t_type == TokenType.KeywordProto: print(4, end=" ") Parser.match(token, TokenType.KeywordProto) # add the function identifier to the symbol table function_id = token.lexeme # check that the identifier hasn't already been declared Parser.error_on_variable_usage(function_id, is_decl_stmt=True, is_prototype=True) Parser.match(token, TokenType.Identifier) Parser.match(token, TokenType.OpenParen) param_list = Parser.param_list(token) param_types = [x[1] for x in param_list] Parser.match(token, TokenType.CloseParen) Parser.return_identifier(token) return_val_type = Parser.return_datatype(token) func_signature = FunctionSignature( identifier=function_id, label=CG.gen_function_label(function_id), param_list_types=param_types, return_type=return_val_type, is_prototype=True) Parser.s_table.insert(function_id, func_signature) Parser.match(token, TokenType.Semicolon)
def assignment_or_function_call(token): """ Implements recursive descent for the rule: <assignment_or_function_call> ==> 24 TokenType.Identifier TokenType.AssignmentOperator <expression> TokenType.Semicolon | 25 TokenType.Identifier TokenType.OpenBracket <expression> TokenType.CloseBracket TokenType.AssignmentOperator <expression> TokenType.Semicolon | 26 TokenType.Identifier TokenType.OpenParen <expression_list> TokenType.CloseParen TokenType.Semicolon """ if token.t_type == TokenType.Identifier: next_offset_before_statement = CG.next_offset # get the param's identifier and look it up identifier = token.lexeme Parser.error_on_variable_usage(identifier) er_lhs = Parser.s_table.find_in_all_scopes(identifier) Parser.match(token, TokenType.Identifier) if token.t_type == TokenType.AssignmentOperator: print(24, end=" ") Parser.match(token, TokenType.AssignmentOperator) er_rhs = Parser.expression(token) Parser.match(token, TokenType.Semicolon) CG.code_gen_assign(er_lhs, er_rhs) elif token.t_type == TokenType.OpenBracket: print(25, end=" ") Parser.match(token, TokenType.OpenBracket) er_subscript = Parser.expression(token) Parser.match(token, TokenType.CloseBracket) Parser.match(token, TokenType.AssignmentOperator) er_rhs = Parser.expression(token) Parser.match(token, TokenType.Semicolon) CG.code_gen_assign(er_lhs, er_rhs, dest_subscript=er_subscript) elif token.t_type == TokenType.OpenParen: print(26, end=" ") Parser.match(token, TokenType.OpenParen) param_list = Parser.expression_list(token) Parser.match(token, TokenType.CloseParen) Parser.match(token, TokenType.Semicolon) # First handle built-in functions if identifier in CG.BUILT_IN_FUNCTIONS.keys(): function, data_type = CG.BUILT_IN_FUNCTIONS[identifier] function(data_type, param_list) else: Parser.call_function(identifier, er_lhs, param_list) else: Parser.raise_production_not_found_error( token, 'assignment_or_function_call') # Reclaim stack space that was used during this statement CG.next_offset = next_offset_before_statement else: Parser.raise_production_not_found_error( token, 'assignment_or_function_call')
def if_statement(token): """ Implements recursive descent for the rule: <if_statement> ==> 31 TokenType.KeywordIf TokenType.OpenParen <expression> TokenType.CloseParen <code_block> [ <else_clause> ] """ if token.t_type == TokenType.KeywordIf: print(31, end=" ") Parser.match(token, TokenType.KeywordIf) Parser.match(token, TokenType.OpenParen) er_condition = Parser.expression(token) if er_condition.data_type != DataTypes.BOOL: raise SemanticError( "If statement requires boolean expression " "as an argument", Parser.file_reader.get_line_data()) Parser.match(token, TokenType.CloseParen) else_label, after_else_label = CG.gen_label("else") CG.code_gen_if(er_condition, else_label) Parser.code_block(token) if token.t_type == TokenType.KeywordElse: Parser.match(token, TokenType.KeywordElse) # the last code block must branch to after the else clause CG.code_gen("b", after_else_label) # if test failed, then pick up program execution here CG.code_gen_label(else_label) # generate the else block Parser.code_block(token) # make the after_else label CG.code_gen_label(after_else_label) else: # if test failed, then pick up program execution here CG.code_gen_label(else_label) else: Parser.raise_production_not_found_error(token, 'if_statement')
def function_decl(token): """ Implements recursive descent for the rule: <function_decl> ==> 5 TokenType.KeywordFunc TokenType.Identifier TokenType.OpenParen <param_list> TokenType.CloseParen <return_identifier> <return_datatype> TokenType.OpenCurly <statement_list> TokenType.CloseCurly """ if token.t_type == TokenType.KeywordFunc: print(5, end=" ") Parser.match(token, TokenType.KeywordFunc) # add the function identifier to the symbol table function_id = token.lexeme # check that the identifier hasn't already been declared Parser.error_on_variable_usage(function_id, is_decl_stmt=True) func_signature = FunctionSignature(function_id) old_signature = Parser.s_table.find_in_all_scopes(function_id) if not old_signature: # we don't need to check that signatures match Parser.s_table.insert(function_id, func_signature) elif not isinstance(old_signature, FunctionSignature): raise SemanticError( "Tried to redeclare %s as a function, " "but it was already a variable" % function_id, Parser.file_reader.get_line_data()) # open a new scope Parser.s_table.open_scope() Parser.match(token, TokenType.Identifier) Parser.match(token, TokenType.OpenParen) param_list = Parser.param_list(token) param_types = [x[1] for x in param_list] if not old_signature: func_signature.param_list_types = param_types else: # verify that param types match for i in range(len(param_types)): if param_types[i] != old_signature.param_list_types[i]: raise SemanticError( "In declaration of function %s, parameter #%d is " "of type %r, but previous forward declaration was " "of type %r" % (function_id, i, param_types[i], old_signature.param_list_types[i]), Parser.file_reader.get_line_data()) Parser.match(token, TokenType.CloseParen) return_val_id = token.lexeme Parser.return_identifier(token) return_val_type = Parser.return_datatype(token) er_return_val = ExpressionRecord(return_val_type, (4 * len(param_list) + 4), is_temp=False) Parser.s_table.insert(return_val_id, er_return_val) if not old_signature: func_signature.return_type = return_val_type func_signature.label = CG.gen_function_label(function_id) else: # verify that return types match if return_val_type != old_signature.return_type: raise SemanticError( "In declaration of function %s, return datatype is " "of type %r, but previous forward declaration was " "of type %r" % (function_id, return_val_type, old_signature.return_type), Parser.file_reader.get_line_data()) # at this point, we are guaranteed that the return types, # param types, and identifier are equal to that of the old # signature, so we can use that signature instead func_signature = old_signature # record that the signature has been defined old_signature.is_prototype = False CG.code_gen_label(func_signature.label, comment=str(func_signature)) offset = (4 * len(param_list)) # In new function, return var is at (4*len(params)+8)($fp), # params are at 4($fp) thru (4*len(params)+4)($fp) for identifier, data_type, size in param_list: er_param = ExpressionRecord(data_type, offset, is_temp=False, is_reference=True) Parser.s_table.insert(identifier, er_param) offset -= 4 Parser.match(token, TokenType.OpenCurly) Parser.statement_list(token) Parser.match(token, TokenType.CloseCurly) # close the function's scope Parser.s_table.close_scope() # reset the stack offsets CG.next_offset = -8 CG.code_gen("jr", "$ra") else: Parser.raise_production_not_found_error(token, 'function_decl')
def variable_or_function_call(token): """ Implements recursive descent for the rule: <variable_or_function_call> ==> 49 TokenType.Identifier TokenType.OpenBracket <expression> TokenType.CloseBracket | 50 TokenType.OpenParen <expression_list> TokenType.CloseParen | 51 TokenType.Identifier :return: an ExpressionRecord that holds: 49 the value at array_id[subscript], if it was an array 50 the return value of the function, if it was a function; 51 the value of the variable, if it was a variable id """ if token.t_type == TokenType.Identifier: identifier = token.lexeme exp_rec = Parser.s_table.find_in_all_scopes(identifier) Parser.match(token, TokenType.Identifier) if token.t_type == TokenType.OpenBracket: print(49, end=" ") Parser.match(token, TokenType.OpenBracket) er_subscript = Parser.expression(token) # Input validation: verify that the subscript is an integer, # and that exp_rec contains an array if er_subscript.data_type != DataTypes.INT: raise SemanticError("Subscript is not an integer", Parser.file_reader.get_line_data()) if not DataTypes.is_array(exp_rec.data_type): raise SemanticError( "Subscript applied to variable %s, " "which is not an array" % identifier, Parser.file_reader.get_line_data()) # Match ]: wait until after potential error messages to do this Parser.match(token, TokenType.CloseBracket) # TODO: make this a function in CG # Make a temp ExpressionRecord to hold the value at # array[subscript], and return it result_exp_rec = ExpressionRecord(DataTypes.array_to_basic( exp_rec.data_type), CG.next_offset, is_temp=True, is_reference=False) CG.next_offset -= 4 CG.code_gen_assign(result_exp_rec, exp_rec, src_subscript=er_subscript) return result_exp_rec elif token.t_type == TokenType.OpenParen: print(50, end=" ") if not isinstance(exp_rec, FunctionSignature) and \ not identifier in CG.BUILT_IN_FUNCTIONS.keys(): raise SemanticError( "Tried to call %s as a function, " "but it was not a function." % identifier, Parser.file_reader.get_line_data()) # exp_rec is actually a function signature, so call it that func_signature = exp_rec Parser.match(token, TokenType.OpenParen) er_params = Parser.expression_list(token) Parser.match(token, TokenType.CloseParen) return Parser.call_function(identifier, func_signature, er_params) else: print(51, end=" ") return exp_rec else: raise Parser.raise_production_not_found_error( token, "variable_or_function_call")