class CompilationEngine: """ The compilation engine compile the jack code given in the input file into an xml code saved in the out_file """ def __init__(self, in_file, out_file): """ A compilation engine constructor :param in_file: the file we are currently compiling :param out_file: the file where we save the output """ self.tokenizer = JackTokenizer(in_file) self.out_file = open(out_file, 'w') self._indent_count = 0 def compile_class(self): """ compiles a class according to the grammar """ self._write_outer_tag(CLASS_TAG) self.tokenizer.advance() if self.tokenizer.key_word() != CLASS_TAG: print(COMPILE_CLASS_ERROR) sys.exit() self._write_token(self.tokenizer.token_type()) self._check_write_name() self._check_write_symbol("{") # there may be multiple variable declarations while self._check_if_var_dec(): self.compile_class_var_dec() # there may be multiple subroutine declarations while self._check_subroutine_dec(): self.compile_subroutine_dec() self._check_write_symbol("}") self._write_outer_tag(CLASS_TAG, IS_ENDING_TAG) def compile_class_var_dec(self): """ compiles the class's variables declarations """ self._write_outer_tag(CLASS_VAR_DEC_TAG) # we only come in the function if the current token is correct so we # can just write it self._write_token(self.tokenizer.token_type()) self._check_write_type() self._check_write_name() while self._check_if_comma(): # there are more variables self._check_write_symbol(",") self._check_write_name() self._check_write_symbol(";") self._write_outer_tag(CLASS_VAR_DEC_TAG, IS_ENDING_TAG) def compile_subroutine_dec(self): """ compiles the class's subroutine (methods and functions) declarations """ self._write_outer_tag(SUBROUTINE_DEC_TAG) # we only come in the function if the current token is correct so we # can just write it self._write_token(self.tokenizer.token_type()) # the function is either void or has a type if self.tokenizer.key_word() == 'void': self._write_token(self.tokenizer.token_type()) else: self._check_write_type() self._check_write_name() self._check_write_symbol("(") self.compile_parameter_list() self._check_write_symbol(")") self.compile_subroutine_body() self._write_outer_tag(SUBROUTINE_DEC_TAG, IS_ENDING_TAG) def compile_parameter_list(self): """ compiles the parameter list for the subroutines """ self._write_outer_tag(PARAMETER_LIST_TAG) # if curr_token is ')' it means the param list is empty if self.tokenizer.symbol() != ')': self._check_write_type() self._check_write_name() while self._check_if_comma(): # there are more params self._check_write_symbol(",") self._check_write_type() self._check_write_name() self._write_outer_tag(PARAMETER_LIST_TAG, IS_ENDING_TAG) def compile_subroutine_body(self): """ compiles the body of the subroutine """ self._write_outer_tag(SUBROUTINE_BODY_TAG) self._check_write_symbol("{") # there may be multiple variable declarations at the beginning of # the subroutine while self.tokenizer.key_word() == 'var': self.compile_var_dec() self.compile_statements() self._check_write_symbol("}") self._write_outer_tag(SUBROUTINE_BODY_TAG, IS_ENDING_TAG) def compile_var_dec(self): """ compiles the variable declarations """ self._write_outer_tag(VAR_DEC_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_type() self._check_write_name() # there may be multiple variable names in the dec while self._check_if_comma(): self._check_write_symbol(",") self._check_write_name() self._check_write_symbol(";") self._write_outer_tag(VAR_DEC_TAG, IS_ENDING_TAG) def compile_statements(self): """ compiles the statements (0 or more statements) """ self._write_outer_tag(STATEMENTS_TAG) while self._check_if_statement(): if self.tokenizer.key_word() == 'let': self.compile_let() elif self.tokenizer.key_word() == 'if': self.compile_if() elif self.tokenizer.key_word() == 'while': self.compile_while() elif self.tokenizer.key_word() == 'do': self.compile_do() elif self.tokenizer.key_word() == 'return': self.compile_return() self._write_outer_tag(STATEMENTS_TAG, IS_ENDING_TAG) def compile_do(self): """ compiles the do statement """ self._write_outer_tag(DO_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self.compile_subroutine_call() self._check_write_symbol(";") self._write_outer_tag(DO_STATEMENT_TAG, IS_ENDING_TAG) def compile_let(self): """ compiles the let statement """ self._write_outer_tag(LET_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_name() if self.tokenizer.symbol() == '[': # if there is an array self._check_write_symbol("[") self.compile_expression() self._check_write_symbol("]") self._check_write_symbol("=") self.compile_expression() self._check_write_symbol(";") self._write_outer_tag(LET_STATEMENT_TAG, IS_ENDING_TAG) def compile_if(self): """ compiles the if statements """ self._write_outer_tag(IF_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_symbol("(") self.compile_expression() self._check_write_symbol(")") self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") # there can also be an if else scenario if self.tokenizer.key_word() == 'else': self._write_token(self.tokenizer.token_type()) self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") self._write_outer_tag(IF_STATEMENT_TAG, IS_ENDING_TAG) def compile_while(self): """ compiles the while statements """ self._write_outer_tag("whileStatement") self._write_token(self.tokenizer.token_type()) self._check_write_symbol("(") self.compile_expression() self._check_write_symbol(")") self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") self._write_outer_tag("whileStatement", IS_ENDING_TAG) def compile_return(self): """ compiles the return statements """ self._write_outer_tag(RETURN_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) # if cur token is ; we return nothing, otherwise we return something if not self.tokenizer.symbol() == ';': self.compile_expression() self._check_write_symbol(";") self._write_outer_tag(RETURN_STATEMENT_TAG, IS_ENDING_TAG) def compile_subroutine_call(self): """ compiles the subroutine calls ( when we actually call a subroutine as opposed to declaring it) """ self._check_write_name() # there may be a '.' if it is a foo.bar() scenario (or Foo.bar()) if self.tokenizer.symbol() == ".": self._check_write_symbol(".") self._check_write_name() self._check_write_symbol("(") self.compile_expression_list() self._check_write_symbol(")") def compile_expression(self): """ compiles expressions which are terms and possibly operators and more terms """ self._write_outer_tag(EXPRESSION_TAG) self.compile_term() # there may be a few operators in one expression while self.tokenizer.symbol() in OPERATIONS: self._write_op() self.compile_term() self._write_outer_tag(EXPRESSION_TAG, IS_ENDING_TAG) def compile_term(self): """ compiles terms according to the grammar """ self._write_outer_tag(TERM_TAG) cur_type = self.tokenizer.token_type() # either a string/int constant if self.tokenizer.token_type() in ["INT_CONST", "STRING_CONST"]: self._write_token(cur_type) # or a constant keyword (true, false, null, this) elif self.tokenizer.key_word() in KEYWORD_CONST: self._write_token(cur_type) # or an expression within brown brackets elif self.tokenizer.symbol() == '(': self._write_token(cur_type) self.compile_expression() self._check_write_symbol(")") # or a unary op and then a term elif self.tokenizer.symbol() in UNARY_OPS: self._write_op() self.compile_term() # or it is an identifier which could be: elif self.tokenizer.identifier(): self._compile_term_identifier() else: print(COMPILE_TERM_ERROR) sys.exit() self._write_outer_tag(TERM_TAG, IS_ENDING_TAG) def _compile_term_identifier(self): """ compiles terms in case of identifier token """ # an array if self.tokenizer.get_next_token() == '[': self._check_write_name() self._check_write_symbol("[") self.compile_expression() self._check_write_symbol("]") # or a subroutine call elif self.tokenizer.get_next_token() in [".", "("]: self.compile_subroutine_call() else: self._check_write_name() # or just a variable name def compile_expression_list(self): """ compiles the expression lists """ self._write_outer_tag(EXPRESSION_LIST_TAG) # if it is ')' then the expression list is empty if self.tokenizer.symbol() != ')': self.compile_expression() while self._check_if_comma(): # while there are more expressions self._write_token(self.tokenizer.token_type()) self.compile_expression() self._write_outer_tag(EXPRESSION_LIST_TAG, IS_ENDING_TAG) def _check_if_var_dec(self): """ check if we are currently compiling a variable declaration :return: true iff the current token is either 'static' or 'field' """ return self.tokenizer.key_word() in CLASS_VAR_KEYWORDS def _check_subroutine_dec(self): """ checks if we are currently compiling a subroutine declaration :return: true iff the current token is either 'constructor' or 'function' or 'method' """ return self.tokenizer.key_word() in SUBROUTINE def _check_if_comma(self): """ checks if current token is a comma :return: true iff the current token is a ',' """ return self.tokenizer.symbol() == ',' def _check_if_statement(self): """ checks if we are currently compiling a statement :return: true iff the current token is in ['let', 'if', 'while', 'do', 'return'] """ return self.tokenizer.key_word() in STATEMENTS def _check_write_type(self): """ checks if the current token is a valid type and if so, it writes it to the output file """ if self.tokenizer.key_word() in TYPE_KEYWORDS: self._write_token(self.tokenizer.token_type()) else: self._check_write_name() def _check_write_symbol(self, expected_symbol): """ checks if the current token is the expected symbol, if so it write it to the output file :param expected_symbol: the symbol we are validating is the current token :return: prints illegal statement error if it is not the expected symbol and exits the program """ if self.tokenizer.symbol() != expected_symbol: print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._write_token(self.tokenizer.token_type()) def _check_write_name(self): """ checks the current token is a name (identifier), and if so, write it to the output file :return: prints illegal statement error if it is not a name and exits the program """ if self.tokenizer.identifier(): self._write_token("IDENTIFIER") else: print(ILLEGAL_STATEMENT_ERROR) sys.exit() def _write_outer_tag(self, tag_str, end=False): """ writes the outer tags of the different sections we are compiling :param tag_str: the string of the current section we are compiling :param end: true iff it is an end tag """ if end: # we decrease the indent count before the closing tag self._indent_count -= 1 self.out_file.write("\t" * self._indent_count) self.out_file.write("</" + tag_str + ">\n") else: # we increase the indent count after the opening tag self.out_file.write("\t" * self._indent_count) self.out_file.write("<" + tag_str + ">\n") self._indent_count += 1 def _write_op(self): """ writes an op symbol to the out file """ self.out_file.write("\t" * self._indent_count) self.out_file.write("<symbol> ") if self.tokenizer.symbol() == '<': self.out_file.write("<") elif self.tokenizer.symbol() == '>': self.out_file.write(">") elif self.tokenizer.symbol() == '&': self.out_file.write("&") elif self.tokenizer.symbol() == '\"': self.out_file.write(""") else: self.out_file.write(self.tokenizer.symbol()) self.out_file.write(" </symbol>\n") self.tokenizer.advance() def _write_token(self, cur_type): """ writes the current token to the output file :param cur_type: the type of the current token """ self.out_file.write("\t" * self._indent_count) self.out_file.write("<" + TOKEN_TYPE_STR[cur_type] + "> ") self.out_file.write(str(self.tokenizer.get_token_str())) self.out_file.write(" </" + TOKEN_TYPE_STR[cur_type] + ">\n") self.tokenizer.advance()
class CompilationEngine: def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.current_sub_name = None self.class_name = None self.func_counter = 0 self.while_counter = 0 self.if_counter = 0 # starts the process self.tokenizer.advance() self.compile_class() self.vm_writer.close() def compile_class(self): """ compiles the class function :return: none """ # advances a single step to get the class name self.tokenizer.advance() # set class's name self.class_name = self.tokenizer.current_token # moves to the symbol { self.tokenizer.advance() # move to the next symbol and check what it is self.tokenizer.advance() # compiles class variable while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \ KEY_WORDS.get(self.tokenizer.current_token) == FIELD: self.compile_class_var_dec() # compiles subroutine while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \ KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \ KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION: self.compile_sub_routine() # we are now at the <symbol> } <symbol> which closes the class def compile_class_var_dec(self): """ compiles a var dec :return: none """ var_kind = self.tokenizer.key_word() # advances the token to the var's type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advances the token to the var's identifier self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_name = self.tokenizer.key_word() else: var_name = self.tokenizer.identifier() # update symbol table self.symbol_table.define(var_name, var_type, var_kind) # advance to next token, and check if there are more var_names self.tokenizer.advance() while self.tokenizer.current_token != ";": # token is <symbol> , <symbol> # advance to var's identifier self.tokenizer.advance() var_name = self.tokenizer.current_token # update symbol table self.symbol_table.define(var_name, var_type, var_kind) self.tokenizer.advance() # the current token is <symbol> ; <symbol>, advance to next self.tokenizer.advance() def compile_sub_routine(self): """ compiles a single sub routine :return: none """ # start new subroutine symbol table self.symbol_table.start_subroutine() # get subroutine type (method/construction/function) sub_type = self.tokenizer.key_word() # advances the token to what the subroutine returns self.tokenizer.advance() # updates the return type if self.tokenizer.token_type() == KEY_WORD: return_type = self.tokenizer.key_word() else: return_type = self.tokenizer.identifier() # advances the token to <identifier> sub_name <identifier> self.tokenizer.advance() # update the subroutine name subroutine_name = self.tokenizer.identifier() self.current_sub_name = subroutine_name # advance to <symbol> ( <symbol> self.tokenizer.advance() # if subroutine is a method, add 'this' to the symbol table as argument 0 if sub_type == METHOD: self.symbol_table.define("this", self.class_name, "ARG") # compiles the parameter list self.compile_parameter_list() # we are at <symbol> ) <symbol> # advance to subroutine body, and compile it self.tokenizer.advance() self.compile_subroutine_body(sub_type) def compile_subroutine_body(self, sub_type): """ the method compiles the subroutine body :return: none """ # we are at bracket {, advance self.tokenizer.advance() # compile var dec while KEY_WORDS.get(self.tokenizer.current_token) == VAR: self.compile_var_dec() # write function label self.vm_writer.write_function( self.class_name + '.' + self.current_sub_name, self.symbol_table.var_count("VAR")) # if is method, update THIS to the object if sub_type == METHOD: self.vm_writer.write_push(ARG, 0) self.vm_writer.write_pop("POINTER", 0) # if is constructor, allocate memory, and put in this if sub_type == CONSTRUCTOR: self.vm_writer.write_push("CONST", self.symbol_table.var_count("FIELD")) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) if self.tokenizer.current_token != "}": self.compile_statements() # we are at bracket }, advance self.tokenizer.advance() def compile_parameter_list(self): """ compiles a parameter list :return: none """ # advance to first parameter self.tokenizer.advance() # while there are more parameters while self.tokenizer.current_token != ')': # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to variables name <identifier> var_name <identifier> self.tokenizer.advance() var_name = self.tokenizer.identifier() # define new variable self.symbol_table.define(var_name, var_type, "ARG") # gets the next token self.tokenizer.advance() # advance to next token if we are at ',' if self.tokenizer.current_token == ",": self.tokenizer.advance() def compile_var_dec(self): """ compiles a declaration of a variable :return: none """ # we are at <keyword> var <keyword> # advance to variable type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to the variables name self.tokenizer.advance() while self.tokenizer.current_token != ';': # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # define variable in symbol table self.symbol_table.define(var_name, var_type, "VAR") # advance to next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.tokenizer.advance() # we are at <symbol> ; <symbol> # advance to next token self.tokenizer.advance() def compile_statements(self): """ the method compiles statements :return: none """ # while there are more statements, deal with each one while self.tokenizer.current_token != '}': statement_type = self.tokenizer.key_word() if statement_type == LET: self.compile_let() elif statement_type == IF: self.compile_if() elif statement_type == WHILE: self.compile_while() elif statement_type == DO: self.compile_do() elif statement_type == RETURN: self.compile_return() def compile_do(self): """ the method compiles a do command :return: none """ # we are at <keyword> do <keyword> # advance to next token <identifier> name_of_func <identifier> self.tokenizer.advance() func_name = self.tokenizer.identifier() self.tokenizer.advance() # compile the subroutine call self.compile_subroutine_call(func_name) # pop the result from the function into temp self.vm_writer.write_pop("TEMP", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() def compile_let(self): """ the method compiles a let statement :return: none """ # we are at <keyword> let <keyword> # advance to next token (var_name) self.tokenizer.advance() # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # get variable data var_index = self.symbol_table.index_of(var_name) var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) # advance to next token ('[' | '=') self.tokenizer.advance() is_array = False if self.tokenizer.current_token == '[': is_array = True # push arr self.vm_writer.write_push(var_kind, var_index) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # we are at <symbol> ] <symbol>, advance to next token self.tokenizer.advance() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # we are at <symbol> = <symbol> # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # if var is an array if is_array: self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) # if var is not an array else: self.vm_writer.write_pop(var_kind, var_index) # we are at <symbol> ; <symbol>, advance to next self.tokenizer.advance() return def compile_while(self): """ the method compiles a while statement :return: none """ while_counter = str(self.while_counter) # update the while counter self.while_counter += 1 # create new label for the start of the while self.vm_writer.write_label("While_" + while_counter) # we are at <keyword> while <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # negate expression self.vm_writer.write_arithmetic("NOT") # if condition is not met, go to the end of the while self.vm_writer.write_if("End_While_" + while_counter) # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() # compile statements self.compile_statements() # go back to the start of the while self.vm_writer.write_goto("While_" + while_counter) # create new label for the end of the while self.vm_writer.write_label("End_While_" + while_counter) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() return def compile_return(self): """ the method compiles a return statement :return: none """ # we are at <keyword> return <keyword>, advance to next token self.tokenizer.advance() if self.tokenizer.current_token != ';': self.compile_expression() else: # if function is void, push const 0 to the stack self.vm_writer.write_push("CONST", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() self.vm_writer.write_return() return def compile_if(self): """ the method compiles an if statement :return: none """ if_count = str(self.if_counter) # update if counter self.if_counter += 1 # we are at <keyword> if <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() # compile expression self.compile_expression() # negate the expression self.vm_writer.write_arithmetic("NOT") # check if condition is met self.vm_writer.write_if("ELSE_" + if_count) # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() self.compile_statements() # jump to the end of the if self.vm_writer.write_goto("END_IF_" + if_count) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() # create else label (which may be empty) self.vm_writer.write_label("ELSE_" + if_count) if self.tokenizer.current_token == 'else': # we are at <keyword> else <keyword>, advance self.tokenizer.advance() # we are at <symbol> { <symbol>, advance self.tokenizer.advance() self.compile_statements() # we are at <symbol> } <symbol>, advance self.tokenizer.advance() # create new label self.vm_writer.write_label("END_IF_" + if_count) return def compile_expression(self): """ the method compiles an expression :return: """ # compile the term self.compile_term() while self.tokenizer.current_token in OP_LST: call_math = False # we are at <symbol> op <symbol> op = OP_DICT.get(self.tokenizer.current_token) # check if operator needs to call math if self.tokenizer.current_token == '*' or self.tokenizer.current_token == '/': call_math = True # advance to next term and compile term self.tokenizer.advance() self.compile_term() # output the operator if call_math: self.vm_writer.write_call(op[0], op[1]) else: self.vm_writer.write_arithmetic(op) return def compile_term(self): """ the method compiles a term :return: none """ token_type = self.tokenizer.token_type() if token_type == INT_CONST: # push the const int self.vm_writer.write_push("CONST", self.tokenizer.int_val()) self.tokenizer.advance() elif token_type == STRING_CONST: # write without the "" string_val = self.tokenizer.string_val() # push the len of the string and call the string constructor self.vm_writer.write_push("CONST", len(string_val)) self.vm_writer.write_call("String.new", 1) # update new string for char in string_val: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.tokenizer.advance() elif self.tokenizer.current_token in KEY_WORD_CONST: segment, idx = KEY_WORD_CONST.get(self.tokenizer.current_token) self.vm_writer.write_push(segment, idx) if self.tokenizer.current_token == 'true': self.vm_writer.write_arithmetic('NOT') self.tokenizer.advance() elif self.tokenizer.current_token == '(': # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() elif self.tokenizer.current_token in UNARY_OP: op_command = UNARY_OP.get(self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(op_command) # var/var[expression]/subroutine_call else: # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() self.tokenizer.advance() # if is var_name[expression] if self.tokenizer.current_token == '[': var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) # push arr self.vm_writer.write_push(var_kind, var_index) # we are at <symbol> [ <symbol>, advance to expression and compile it self.tokenizer.advance() self.compile_expression() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # set the that pointer self.vm_writer.write_pop("POINTER", 1) # push to the stack what is in the arr[i] self.vm_writer.write_push("THAT", 0) # we are at <symbol> ] <symbol>, advance self.tokenizer.advance() # if is a subroutine call elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.': self.compile_subroutine_call(var_name) else: # if is just 'var' var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) return def compile_expression_list(self): """ the method compiles a list of expressions :return: amount of arguments in the expression list """ expression_counter = 0 # check that list is not empty if self.tokenizer.current_token != ')': expression_counter += 1 # compile first expression self.compile_expression() # if there are more expressions, compile them while self.tokenizer.current_token == ',': expression_counter += 1 # we are at <symbol> , <symbol>, advance self.tokenizer.advance() # compile expression self.compile_expression() return expression_counter def compile_subroutine_call(self, identifier): """ the method compiles a subroutine call (not including the subroutine first varName :return: none """ func_name = self.class_name + "." + identifier num_of_arguments = 0 if self.tokenizer.current_token == '.': # change func name to its class name if self.symbol_table.type_of(identifier) is not None: func_name = self.symbol_table.type_of(identifier) # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = func_name + "." + self.tokenizer.identifier() self.tokenizer.advance() # push the object to the stack segment = TYPE_DICT.get(self.symbol_table.kind_of(identifier)) idx = self.symbol_table.index_of(identifier) self.vm_writer.write_push(segment, idx) num_of_arguments += 1 else: # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = identifier + "." + self.tokenizer.identifier() self.tokenizer.advance() else: self.vm_writer.write_push("POINTER", 0) num_of_arguments += 1 # we are at <symbol> ( <symbol>, advance self.tokenizer.advance() num_of_arguments += self.compile_expression_list() # we are at <symbol> ) <symbol>, advance self.tokenizer.advance() self.vm_writer.write_call(func_name, num_of_arguments) return
class CompilationEngine: def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.xml_file = open(output_file, "w") self.space_depth = 0 # starts the process self.tokenizer.advance() self.compile_class() self.xml_file.close() def compile_class(self): """ compiles the class function :return: none """ # write <class> self.non_terminal_open(XML_CLASS) # write <keyword> class <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advances a single step to get the class name self.tokenizer.advance() # write <identifier> class_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # moves for the symbol self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compiles class variable while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \ KEY_WORDS.get(self.tokenizer.current_token) == FIELD: self.compile_class_var_dec() # compiles subroutine while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \ KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \ KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION: self.compile_sub_routine() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <class> self.non_terminal_end(XML_CLASS) def non_terminal_end(self, xml_type): """ closes a non terminal function :param xml_type: the xml type we are working with :return: none """ self.space_depth -= 1 self.write_line(self.terminal_end(xml_type)) def non_terminal_open(self, xml_type): """ an opening for a non terminal :param xml_type: the xml type :return: none """ self.write_line(self.terminal_opening(xml_type) + "\n") self.space_depth += 1 def terminal_opening(self, word): """ makes the word a starts of a function :param word: the word to make a start :return: the word as a start """ return "<" + word + ">" def terminal_end(self, word): """ makes the word a start and end :param word: the word to work with :return: the word as an end """ return "</" + word + ">\n" def write_line(self, word): """ writes the line to the file with the correct depth :param word: the word we are writing :return: none """ self.xml_file.write("\t" * self.space_depth + word) def one_liner(self, xml_type, token): """ writes the one liner function :param xml_type: the type :param token: thw token to put in the xml :return: """ self.write_line( self.terminal_opening(xml_type) + " " + token + " " + self.terminal_end(xml_type)) def compile_class_var_dec(self): """ compiles a var dec :return: none """ # write <class_var_dict> self.non_terminal_open(XML_CLASS_VAR_DEC) # write <keyword> static/field <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advances the token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # check if there are more var_names while self.tokenizer.current_token != ";": # write <symbol> , <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # closes the statement self.non_terminal_end(XML_CLASS_VAR_DEC) self.tokenizer.advance() def compile_sub_routine(self): """ compiles a single sub routine :return: none """ # writes <subroutine_dec> self.non_terminal_open(XML_SUBROUTINE_DEC) # write <keyword> function/method/const <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advances the token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # advances the token self.tokenizer.advance() # write <identifier> sub_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # compiles the parameter list self.compile_parameter_list() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compile subroutine body self.compile_subroutine_body() # closes the sub routine self.non_terminal_end(XML_SUBROUTINE_DEC) def compile_subroutine_body(self): """ the method compiles the subroutine body :return: none """ # write <sub routine> self.non_terminal_open(XML_SUBROUTINE_BODY) # opens the bracket { self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compile var dec while KEY_WORDS.get(self.tokenizer.current_token) == VAR: self.compile_var_dec() if self.tokenizer.current_token != "}": self.compile_statements() # closes the bracket self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # closes the sub routine body (write <sub routine>) self.non_terminal_end(XML_SUBROUTINE_BODY) def compile_parameter_list(self): """ compiles a parameter list :return: none """ # writes <parameter_list> self.non_terminal_open(XML_PARAMETER_LIST) self.tokenizer.advance() while self.tokenizer.current_token != ')': # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the variables name self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # closes the statement self.non_terminal_end(XML_PARAMETER_LIST) def compile_var_dec(self): """ compiles a declaration of a variable :return: none """ # writes the opening self.non_terminal_open(XML_VAR_DEC) # write <keyword> var <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # tests what to put as the type of the object self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the variables name self.tokenizer.advance() while self.tokenizer.current_token != ';': # writes <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # writes <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # closes the statement self.non_terminal_end(XML_VAR_DEC) def compile_statements(self): """ the method compiles statements :return: none """ # write <statements> self.non_terminal_open(XML_STATEMENTS) # while there are more statements, deal with each one while self.tokenizer.current_token != '}': statement_type = self.tokenizer.key_word() if statement_type == LET: self.compile_let() elif statement_type == IF: self.compile_if() elif statement_type == WHILE: self.compile_while() elif statement_type == DO: self.compile_do() elif statement_type == RETURN: self.compile_return() # write <statements> self.non_terminal_end(XML_STATEMENTS) def compile_do(self): """ the method compiles a do command :return: none """ # write <do_statement> self.non_terminal_open(XML_DO_STATEMENT) # write <keyword> do <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advance to next token (subroutine call) self.tokenizer.advance() # write <identifier> name_of_func <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # compile the subroutine call self.compile_subroutine_call() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <do_statement> self.non_terminal_end(XML_DO_STATEMENT) self.tokenizer.advance() def compile_let(self): """ the method compiles a let statement :return: none """ # write <let_statement> self.non_terminal_open(XML_LET_STATEMENT) # write <keyword> let <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advance to next token (var_name) self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # advance to next token ('[' | '=') self.tokenizer.advance() if self.tokenizer.current_token == '[': # write <symbol> [ <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # write <symbol> ] <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> = <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <let_statement> self.non_terminal_end(XML_LET_STATEMENT) self.tokenizer.advance() return def compile_while(self): """ the method compiles a while statement :return: none """ # write <while_statement> self.non_terminal_open(XML_WHILE_STATEMENT) # write <keyword> while <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_statements() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <while_statement> self.non_terminal_end(XML_WHILE_STATEMENT) self.tokenizer.advance() return def compile_return(self): """ the method compiles a return statement :return: none """ # write <return_statement> self.non_terminal_open(XML_RETURN_STATEMENT) # write <keyword> return <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() if self.tokenizer.current_token != ';': self.compile_expression() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <return_statement> self.non_terminal_end(XML_RETURN_STATEMENT) self.tokenizer.advance() return def compile_if(self): """ the method compiles an if statement :return: none """ # write <if_statement> self.non_terminal_open(XML_IF_STATEMENT) # write <keyword> if <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_statements() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() if self.tokenizer.current_token == 'else': # write <keyword> else <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_statements() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <if_statement> self.non_terminal_end(XML_IF_STATEMENT) return def compile_expression(self): """ the method compiles an expression :return: """ # write <expression> self.non_terminal_open(XML_EXPRESSION) self.compile_term() while self.tokenizer.current_token in OP_LST: # write <symbol> op <symbol> if self.tokenizer.current_token in OP_DICT: self.one_liner(XML_SYMBOL, OP_DICT.get(self.tokenizer.current_token)) else: self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() # write <expression> self.non_terminal_end(XML_EXPRESSION) return def compile_term(self): """ the method compiles a term :return: none """ # write <term> self.non_terminal_open(XML_TERM) token_type = self.tokenizer.token_type() if token_type == INT_CONST: self.one_liner(XML_INT_CONST, self.tokenizer.current_token) self.tokenizer.advance() elif token_type == STRING_CONST: # write without the "" self.one_liner(XML_STRING_CONST, self.tokenizer.current_token[1:-1]) self.tokenizer.advance() elif self.tokenizer.current_token in KEY_WORD_CONST: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() elif self.tokenizer.current_token == '(': # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() elif self.tokenizer.current_token in UNARY_OP: # write <symbol> unary_op <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() # var/var[expression]/subroutine_call else: # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # if is var_name[expression] if self.tokenizer.current_token == '[': # write <symbol> [ <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ] <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # if is a subroutine call elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.': self.compile_subroutine_call() # write <term> self.non_terminal_end(XML_TERM) return def compile_expression_list(self): """ the method compiles a list of expressions :return: none """ # write <expression_list> self.non_terminal_open(XML_EXPRESSION_LIST) # check that list is not empty if self.tokenizer.current_token != ')': # compile first expression self.compile_expression() # if there are more expressions, compile them while self.tokenizer.current_token == ',': # write <symbol> , <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compile expression self.compile_expression() # write <expression_list> self.non_terminal_end(XML_EXPRESSION_LIST) return def compile_subroutine_call(self): """ the method compiles a subroutine call (not including the subroutine first varName :return: none """ if self.tokenizer.current_token == '.': # write <symbol> . <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <identifier> sub_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression_list() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() return
class CompilationEngine: """ The compilation engine compile the jack code given in the input file into an xml code saved in the out_file """ def __init__(self, in_file, out_file): """ A compilation engine constructor :param in_file: the file we are currently compiling :param out_file: the file where we save the output """ self._tokenizer = JackTokenizer(in_file) self._class_table = SymbolTable() self._method_table = SymbolTable() self._cur_class_name = "" self._vm_writer = VMWriter(out_file) self._label_count_while = 0 self._label_count_if = 0 def compile_class(self): """ compiles a class according to the grammar """ self._class_table.start_subroutine() self._tokenizer.advance() # check if the current keyword is the right class tag if self._tokenizer.key_word() != CLASS_TAG: print(COMPILE_CLASS_ERROR) sys.exit() self._tokenizer.advance() self._cur_class_name = self.get_cur_token() self._tokenizer.advance() self._check_symbol("{") # there may be multiple variable declarations while self._check_if_var_dec(): self.compile_class_var_dec() # there may be multiple subroutine declarations while self._check_subroutine_dec(): self.compile_subroutine_dec() self._check_symbol("}") def compile_class_var_dec(self): """ compiles the class's variables declarations """ cur_kind = self.get_cur_token() self._tokenizer.advance() cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._class_table.define(cur_name, cur_type, cur_kind) while self._check_if_comma(): # there are more variables self._tokenizer.advance() cur_name = self.get_cur_token() self._check_name() self._class_table.define(cur_name, cur_type, cur_kind) self._check_symbol(";") def get_cur_token(self): return self._tokenizer.get_token_str() def compile_subroutine_dec(self): """ compiles the class's subroutine (methods and functions) declarations """ # re-initialize the method symbol table self._method_table.start_subroutine() key_word = self._tokenizer.key_word() self._tokenizer.advance() self._tokenizer.advance() cur_name = self.get_cur_token() self._tokenizer.advance() # method get the as argument the base address of the current object if key_word == "method": self._method_table.define("this", self._cur_class_name, "argument") self._check_symbol("(") self.compile_parameter_list() self._check_symbol(")") subroutine_path = self._cur_class_name + '.' + cur_name # the function is either void or has a type self.compile_subroutine_body(subroutine_path, key_word) def compile_parameter_list(self): """ compiles the parameter list for the subroutines """ # if curr_token is ')' it means the param list is empty if self._tokenizer.symbol() == ')': return cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._method_table.define(cur_name, cur_type, "argument") while self._check_if_comma(): # there are more params self._tokenizer.advance() cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._method_table.define(cur_name, cur_type, "argument") def compile_subroutine_body(self, subroutine_name, subroutine_kind): """ compiles the body of the subroutine """ self._check_symbol("{") # there may be multiple variable declarations at the beginning of # the subroutine while self._tokenizer.key_word() == 'var': self.compile_var_dec() # define the subroutine n_locals = self._method_table.var_count("local") self._vm_writer.write_function(subroutine_name, n_locals) if subroutine_kind == "constructor": # allocating memory for the object's fields num_of_fields = self._class_table.var_count("field") self._vm_writer.write_push("constant", num_of_fields) self._vm_writer.write_call("Memory.alloc", 1) # make 'this' to point to address returned by Memory.alloc self._vm_writer.write_pop("pointer", 0) if subroutine_kind == "method": # assign pointer[0] to the object's base address in order to # get access to 'this' segment self._vm_writer.write_push("argument", 0) self._vm_writer.write_pop("pointer", 0) self.compile_statements() self._check_symbol("}") def compile_var_dec(self): """ compiles the variable declarations """ self._tokenizer.advance() cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._method_table.define(cur_name, cur_type, "local") # there may be multiple variable names in the dec while self._check_if_comma(): self._tokenizer.advance() self._method_table.define(self.get_cur_token(), cur_type, "local") self._check_name() self._check_symbol(";") def compile_statements(self): """ compiles the statements (0 or more statements) """ while self._check_if_statement(): if self._tokenizer.key_word() == 'let': self.compile_let() elif self._tokenizer.key_word() == 'if': self.compile_if() elif self._tokenizer.key_word() == 'while': self.compile_while() elif self._tokenizer.key_word() == 'do': self.compile_do() elif self._tokenizer.key_word() == 'return': self.compile_return() def compile_do(self): """ compiles the do statement """ self._tokenizer.advance() self.compile_subroutine_call() self._check_symbol(";") self._vm_writer.write_pop("temp", 0) def compile_let(self): """ compiles the let statement """ self._tokenizer.advance() name = self.get_cur_token() info = self._get_symbol_info(name) self._check_if_declared(info) s_type, s_kind, s_id = info seg = self._get_segment(s_kind) is_and_array = False if self._tokenizer.get_next_token() == '[': # if there is an array is_and_array = True self.compile_term() else: self._tokenizer.advance() self._check_symbol("=") self.compile_expression() if is_and_array: # save the value created after compiling the expression which # appears right after '=' in temp[0] self._vm_writer.write_pop("temp", 0) # now the top of the stack should be the address of the right cell # in the array so we assign it to pointer[1] self._vm_writer.write_pop("pointer", 1) # re-pushing the value we saved in temp[0] self._vm_writer.write_push("temp", 0) # the value of the array is located in that[0] seg = "that" s_id = 0 # execute the assignment self._vm_writer.write_pop(seg, s_id) self._check_symbol(";") @staticmethod def _check_if_declared(info): if info is None: print("Unknown Symbol") sys.exit() def compile_if(self): """ compiles the if statements """ false_label = self._get_if_label() end_label = self._get_if_label() self._tokenizer.advance() self._check_symbol("(") self.compile_expression() self._check_symbol(")") self._check_symbol("{") self._vm_writer.write_arithmetic("not") self._vm_writer.write_if_goto(false_label) self.compile_statements() self._check_symbol("}") # there can also be an if else scenario self._vm_writer.write_goto(end_label) self._vm_writer.write_label(false_label) if self._tokenizer.key_word() == 'else': self._tokenizer.advance() self._check_symbol("{") self.compile_statements() self._check_symbol("}") self._vm_writer.write_label(end_label) def compile_while(self): """ compiles the while statements """ self._tokenizer.advance() first_label = self._get_while_label() second_label = self._get_while_label(END_WHILE) self._check_symbol("(") self._vm_writer.write_label(first_label) self.compile_expression() self._vm_writer.write_arithmetic("not") self._vm_writer.write_if_goto(second_label) self._check_symbol(")") self._check_symbol("{") self.compile_statements() self._vm_writer.write_goto(first_label) self._vm_writer.write_label(second_label) self._check_symbol("}") def compile_return(self): """ compiles the return statements """ self._tokenizer.advance() # if cur token is ; we return nothing, otherwise we return something if not self._tokenizer.symbol() == ';': self.compile_expression() else: self._vm_writer.write_push("constant", 0) self._check_symbol(";") self._vm_writer.write_return() def compile_subroutine_call(self): """ compiles the subroutine calls ( when we actually call a subroutine as opposed to declaring it) """ method_name = self.get_cur_token() self._check_name() num_of_args = 0 # there may be a '.' if it is a foo.bar() scenario (or Foo.bar()) if self._tokenizer.symbol() == ".": self._tokenizer.advance() class_name = method_name method_name = self.get_cur_token() self._check_name() symbol_info = self._get_symbol_info(class_name) if symbol_info is None: cur_name = class_name + '.' + method_name else: type_of, kind_of, id_of = symbol_info num_of_args += 1 self._vm_writer.write_push(self._get_segment(kind_of), id_of) cur_name = type_of + '.' + method_name else: cur_name = self._cur_class_name + '.' + method_name num_of_args += 1 self._vm_writer.write_push("pointer", 0) self._check_symbol("(") num_of_args += self.compile_expression_list() self._check_symbol(")") self._vm_writer.write_call(cur_name, num_of_args) def compile_expression(self): """ compiles expressions which are terms and possibly operators and more terms """ symbol = self._tokenizer.symbol() self.compile_term() # write the 'not' operator if necessary if symbol == '~': self._vm_writer.write_arithmetic("not") # there may be a few operators in one expression while self._tokenizer.symbol() in OPERATIONS: symbol = self._tokenizer.symbol() self.compile_term() # executing operators after handling the the operands # in order to evaluate the current expression as postfix expression op = self._get_op(symbol) self._vm_writer.write_arithmetic(op) def compile_term(self): """ compiles terms according to the grammar """ cur_type = self._tokenizer.token_type() key_word = self._tokenizer.key_word() cur_token = self.get_cur_token() # either a string/int constant if cur_type in ["INT_CONST", "STRING_CONST"]: self._compile_string_int_term(cur_token, cur_type) # or a constant keyword (true, false, null, this) elif key_word in KEYWORD_CONST: self._compile_const_keyword_term(key_word) # or an expression within brown brackets elif self._tokenizer.symbol() == '(': self._tokenizer.advance() self.compile_expression() self._check_symbol(")") # or a unary op and then a term elif self._tokenizer.symbol() in OPERATIONS: self._tokenizer.advance() self.compile_term() # or it is an identifier which could be: elif self._tokenizer.identifier(): self._compile_term_identifier() else: print(COMPILE_TERM_ERROR) sys.exit() def _compile_const_keyword_term(self, key_word): """ compile term in case the current token type is constant keyword :param key_word: string from {'true', 'false', 'null', 'this'} """ if key_word == "this": self._vm_writer.write_push("pointer", 0) else: self._vm_writer.write_push("constant", 0) if key_word == "true": self._vm_writer.write_arithmetic("not") self._tokenizer.advance() def _compile_string_int_term(self, cur_token, cur_type): """ compile term in case the given token type is constant string or constant integer :param cur_token: the current token as a string :param cur_type: the type of the current token """ if cur_type == "INT_CONST": self._vm_writer.write_push("constant", cur_token) else: # is string n = len(cur_token) self._vm_writer.write_push("constant", n) self._vm_writer.write_call("String.new", 1) for c in cur_token: self._vm_writer.write_push("constant", ord(c)) self._vm_writer.write_call("String.appendChar", 2) self._tokenizer.advance() def _compile_term_identifier(self): """ compiles terms in case of identifier token """ cur_token = self.get_cur_token() info = self._get_symbol_info(cur_token) next_token = self._tokenizer.get_next_token() if info is not None and next_token not in [".", "("]: type_of, kind_of, id_of = info seg = self._get_segment(kind_of) self._vm_writer.write_push(seg, id_of) # an array if next_token == '[': self._check_name() self._check_symbol("[") self.compile_expression() self._check_symbol("]") self._vm_writer.write_arithmetic("add") if self._tokenizer.symbol() != '=': self._vm_writer.write_pop("pointer", 1) self._vm_writer.write_push("that", 0) # or a subroutine call elif next_token in [".", "("]: self.compile_subroutine_call() else: self._tokenizer.advance() def compile_expression_list(self): """ compiles the expression lists """ # if it is ')' then the expression list is empty if self._tokenizer.symbol() == ')': return 0 num_of_args = 1 # at least one argument self.compile_expression() # while there are more expressions while self._check_if_comma(): self._tokenizer.advance() cur_symbol = self._tokenizer.symbol() self.compile_expression() if cur_symbol == '-': # negative int self._vm_writer.write_arithmetic("neg") num_of_args += 1 return num_of_args def _check_if_var_dec(self): """ check if we are currently compiling a variable declaration :return: true iff the current token is either 'static' or 'field' """ return self._tokenizer.key_word() in CLASS_VAR_KEYWORDS def _check_subroutine_dec(self): """ checks if we are currently compiling a subroutine declaration :return: true iff the current token is either 'constructor' or 'function' or 'method' """ return self._tokenizer.key_word() in SUBROUTINE def _check_if_comma(self): """ checks if current token is a comma :return: true iff the current token is a ',' """ return self._tokenizer.symbol() == ',' def _check_if_statement(self): """ checks if we are currently compiling a statement :return: true iff the current token is in ['let', 'if', 'while', 'do', 'return'] """ return self._tokenizer.key_word() in STATEMENTS def _check_type(self): """ checks if the current token is a valid type and if so, it writes it to the output file """ if not self._tokenizer.key_word() in TYPE_KEYWORDS: self._check_name() else: self._tokenizer.advance() def _check_symbol(self, expected_symbol): """ checks if the current token is the expected symbol, if so it write it to the output file :param expected_symbol: the symbol we are validating is the current token :return: prints illegal statement error if it is not the expected symbol and exits the program """ if self._tokenizer.symbol() != expected_symbol: print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._tokenizer.advance() def _check_name(self): """ checks the current token is a name (identifier), and if so, write it to the output file :return: prints illegal statement error if it is not a name and exits the program """ if not self._tokenizer.identifier(): print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._tokenizer.advance() @staticmethod def _get_op(symbol): """ writes an op symbol to the out file """ if symbol == '<': return "lt" elif symbol == '>': return "gt" elif symbol == '=': return "eq" elif symbol == '&': return "and" elif symbol == '|': return "or" elif symbol == '+': return "add" elif symbol == '-': return "sub" elif symbol == '~': return "not" elif symbol == "*": return "call Math.multiply 2" elif symbol == "/": return "call Math.divide 2" def _get_symbol_info(self, symbol_name): """ first checks if the given symbol in the method symbol table if the method table contains the symbol it returns it's information: (type,kind,id) otherwise check if the class symbol table contains the symbol if it does it return the symbol information from the class table else returns None :param symbol_name: string """ info = self._method_table.get_info(symbol_name) if info is None: info = self._class_table.get_info(symbol_name) return info @staticmethod def _get_segment(cur_kind): """ :param cur_kind: Jack kind - from the list: ["var", "argument", "field", "class", "subroutine", "local", "static"] :return: if the given kind is "field" it returns 'this' otherwise returns the given kind """ if cur_kind == "field": return "this" else: return cur_kind def _get_if_label(self): """ create new if label and increment the if label counter :return: if unused label """ curr_counter = str(self._label_count_if) self._label_count_if += 1 return "IF" + curr_counter def _get_while_label(self, is_end_while=False): """ creates label according to the given flag, if the method creates end while label it increments the while label counter :param is_end_while: if true creates end while label otherwise creates while label :return: unused while label or end while label according to the flag """ curr_counter = str(self._label_count_while) if is_end_while: self._label_count_while += 1 return "WHILE_END" + curr_counter return "WHILE" + curr_counter