class CompilationEngine: """ generates the compilers output """ def __init__(self, input_file, output_file): """ the constructor of the class :param input_file: the jack file that the user want to compile :param output_file: the path for the output xml file """ self.label_count = 0 self.file_reader = JackFileReader(input_file) self.jack_tokens = JackTokenizer(self.file_reader.get_one_liner()) self.curr_token = self.jack_tokens.advance() self.to_output_file = [] self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.class_name = None self.compile_class() self.vm_writer.close() def compile_class(self): """ Compiles a complete class. """ # advancing beyond 'class' self.next_token() # assign class name self.class_name = self.next_token() # advancing beyond '{' self.next_token() # zero or more times while self.curr_token.split()[1] in VAR_DECS: self.compile_class_var_dec() # zero or more times while self.curr_token.split()[1] in SUB_ROUTINES: self.compile_subroutine_dec() # advancing beyond '}' self.next_token() return def compile_class_var_dec(self): """ Compiles a static declaration or a field declaration. :return: """ # compiles a static variable declaration, or a field declaration # ('static' | 'field' ) type varName (',' varName)* ';' var_kind = self.next_token() var_type = self.next_token() var_name = self.next_token() self.symbol_table.define(var_name, var_type, var_kind) while self.curr_token.split()[1] == COMMA: # advancing the COMMA self.next_token() var_name = self.next_token() self.symbol_table.define(var_name, var_type, var_kind) # advance beyond ; self.next_token() return def compile_subroutine_dec(self): """ Compiles a complete method, function, or constructor. :return: """ self.symbol_table.start_subroutine() # constructor \ function \ method subroutine_type = self.next_token() # advance the return type self.next_token() # subroutine name subroutine_name = self.class_name + "." + self.next_token() # advance the left brackets self.next_token() if subroutine_type == "method": self.symbol_table.define("this", self.class_name, ARG) self.compile_parameters_list() self.vm_writer.write_function(subroutine_name, self.count_var_dec()) if subroutine_type == "constructor": field_vars_num = self.get_num_of_field_vars() self.vm_writer.write_push("constant", field_vars_num) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("pointer", 0) if subroutine_type == "method": self.vm_writer.write_push("argument", 0) self.vm_writer.write_pop("pointer", 0) # advance the right brackets self.next_token() self.compile_subroutine_body() def get_num_of_field_vars(self): field_vars_num = 0 for var in self.symbol_table.class_symbol_table.values(): if var[1] == "field": field_vars_num += 1 return field_vars_num def compile_parameters_list(self): """ Compiles a (possibly empty) parameter list, not including the enclosing (). :return: """ num_of_par = 0 if self.curr_token.split()[1] != RIGHT_BRACKETS: # type num_of_par += 1 par_type = self.next_token() par_name = self.next_token() self.symbol_table.define(par_name, par_type, ARG) while self.curr_token.split()[1] == COMMA: # advance pass the comma: num_of_par += 1 self.next_token() par_type = self.next_token() par_name = self.next_token() self.symbol_table.define(par_name, par_type, ARG) return num_of_par def count_var_dec(self): """ counts the number of variables the subroutine has :return: """ var_count = 0 temp_pointer = self.jack_tokens.curr_token + 1 # 'var' type varName (',' varName)* ';' while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == "var": var_count += 1 # skip var type varName temp_pointer = temp_pointer + 3 while self.jack_tokens.list_of_tokens[temp_pointer].split()[1] == COMMA: var_count += 1 temp_pointer += 2 # advance passed ; temp_pointer += 1 return var_count def compile_subroutine_body(self): """ compiles the subroutine body :return: """ # pass the left curly brackets self.next_token() while self.curr_token.split()[1] == "var": self.compile_var_dec() self.compile_statements() # pass the right curly brackets self.next_token() def compile_var_dec(self): """ Compiles a var declaration. :return: """ # advance passed "var" self.next_token() var_type = self.next_token() var_name = self.next_token() self.symbol_table.define(var_name, var_type, LCL) while self.curr_token.split()[1] == COMMA: # advance passed COMMA self.next_token() var_name = self.next_token() self.symbol_table.define(var_name, var_type, LCL) # advance passed ; self.next_token() return def compile_statements(self): statements = True while statements: statement_type = self.curr_token.split()[1] if statement_type == "let": self.compile_let() elif statement_type == "if": self.compile_if() elif statement_type == "while": self.compile_while() elif statement_type == "do": self.compile_do() elif statement_type == "return": self.compile_return() else: statements = False def compile_let(self): """ Compiles a let statement. :return: """ # advances passed let self.next_token() # var name var_name = self.next_token() var_kind = self.symbol_table.kind_of(var_name) if var_kind == "field": var_kind = "this" var_index = self.symbol_table.index_of(var_name) # for varName[] case list_flag = False if self.curr_token.split()[1] == LEFT_SQUARE_BRACKETS: list_flag = True # advance brackets self.next_token() self.compile_expression() self.vm_writer.write_push(var_kind, var_index) self.vm_writer.write_arithmetic("+") # advance brackets self.next_token() # advance equal sign self.next_token() self.compile_expression() if list_flag: # the value of expression 2 self.vm_writer.write_pop("temp", 0) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("temp", 0) self.vm_writer.write_pop("that", 0) else: self.vm_writer.write_pop(var_kind, var_index) # advance semi colon self.next_token() def compile_if(self): """ Compiles a if statement. :return: """ # advance the if self.next_token() # advance the left brackets self.next_token() self.compile_expression() # self.vm_writer.write_arithmetic(NOT) label_1 = self.next_label() self.vm_writer.write_if(label_1) label_2 = self.next_label() self.vm_writer.write_goto(label_2) # label if true self.vm_writer.write_label(label_1) # advance the right brackets self.next_token() # advance the left curly brackets self.next_token() self.compile_statements() # advance the right curly brackets self.next_token() # label if false if self.curr_token.split()[1] == "else": label_3 = self.next_label() self.vm_writer.write_goto(label_3) self.vm_writer.write_label(label_2) # advance the else self.next_token() # advance the left curly brackets self.next_token() self.compile_statements() # advance the right curly brackets self.next_token() self.vm_writer.write_label(label_3) else: self.vm_writer.write_label(label_2) def compile_while(self): """ Compiles a while statement. :return: """ # advance the while self.next_token() # advance the left brackets self.next_token() label_1 = self.next_label() self.vm_writer.write_label(label_1) self.compile_expression() self.vm_writer.write_arithmetic(NOT) label_2 = self.next_label() self.vm_writer.write_if(label_2) # advance the right brackets self.next_token() # advance the left curly brackets self.next_token() self.compile_statements() self.vm_writer.write_goto(label_1) self.vm_writer.write_label(label_2) # advance the right curly brackets self.next_token() def compile_subroutine_call(self): subroutine_name = self.next_token() kind = self.symbol_table.kind_of(subroutine_name) if kind == "field": kind = "this" index = self.symbol_table.index_of(subroutine_name) from_class = False if self.curr_token.split()[1] == ".": # advance the dot self.next_token() type_ = self.symbol_table.type_of(subroutine_name) if type_: subroutine_name = type_ + "." + self.next_token() else: subroutine_name = subroutine_name + "." + self.next_token() else: from_class = True subroutine_name = self.class_name + "." + subroutine_name # advance the brackets self.next_token() num_of_arguments = 0 if from_class: self.vm_writer.write_push("pointer", 0) num_of_arguments = 1 if kind is not None and index is not None: self.vm_writer.write_push(kind, index) num_of_arguments = 1 num_of_arguments += self.compile_expression_list() # advance the brackets self.next_token() self.vm_writer.write_call(subroutine_name, num_of_arguments) def compile_do(self): """ Compiles a do statement. :return: """ # advance the do self.next_token() # subroutine call: self.compile_subroutine_call() self.vm_writer.write_pop("temp", 0) # advance the semi colon self.next_token() def compile_return(self): """ Compiles a return statement. :return: """ # advance the return self.next_token() if self.curr_token.split()[1] != SEMI_COLON: if self.curr_token.split()[1] == "this": self.vm_writer.write_push("pointer", 0) self.next_token() else: self.compile_expression() else: # default self.vm_writer.write_push("constant", 0) self.vm_writer.write_return() # advance the semi colon self.next_token() def compile_expression(self): """ Compiles a do statement. :return: """ self.compile_term() while self.curr_token.split()[1] in Syntax.operators: op = self.curr_token.split()[1] self.next_token() self.compile_term() self.compile_op(op) return def compile_op(self, op): if op == "*": self.vm_writer.write_call("Math.multiply", 2) elif op == "/": self.vm_writer.write_call("Math.divide", 2) else: self.vm_writer.write_arithmetic(op) def compile_term(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. :return: """ all_ = self.curr_token.split() header = all_[0] val = all_[1] # handle case of stringConstant, integerConstant, keyword if header == "<integerConstant>": self.vm_writer.write_push("constant", val) self.next_token() # handle in case of (expression) elif val == LEFT_BRACKETS: # advance passed "(" self.next_token() self.compile_expression() # advance passed ")" self.next_token() # case of onary Op elif val in ONARY_OP: self.next_token() self.compile_term() if val == "-": self.vm_writer.write_arithmetic(NEG) else: self.vm_writer.write_arithmetic(NOT) elif header == IDENTIFIER: next_token = self.jack_tokens.peek().split()[1] if next_token == LEFT_SQUARE_BRACKETS: # skip name and "[" self.next_token() self.next_token() self.compile_expression() kind = self.symbol_table.kind_of(val) if kind == "field": kind = "this" self.vm_writer.write_push(kind, self.symbol_table.index_of(val)) self.vm_writer.write_arithmetic("+") # skip over "]" self.next_token() self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("that", 0) # subroutine call: subroutineName(expressionList) elif next_token == LEFT_BRACKETS or next_token == ".": self.compile_subroutine_call() else: kind = self.symbol_table.kind_of(val) if kind == "field": kind = "this" self.vm_writer.write_push(kind, self.symbol_table.index_of(val)) self.next_token() elif header == "<keyword>": if val == "this": self.vm_writer.write_push("pointer", 0) else: self.vm_writer.write_push("constant", 0) if val == "true": self.vm_writer.write_arithmetic(NOT) self.next_token() elif header == "<stringConstant>": the_string = self.curr_token[17:-18] self.vm_writer.write_push("constant", len(the_string)) self.vm_writer.write_call("String.new", 1) for char in the_string: self.vm_writer.write_push("constant", ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.next_token() return def compile_expression_list(self): """ Compiles a (possibly empty) comma separated list of expressions. :return: """ num_of_arguments = 0 if self.curr_token.split()[1] != RIGHT_BRACKETS: num_of_arguments += 1 self.compile_expression() while self.curr_token.split()[1] == COMMA: num_of_arguments += 1 # advance comma self.next_token() self.compile_expression() return num_of_arguments def next_token(self): to_return = self.curr_token.split()[1] self.curr_token = self.jack_tokens.advance() return to_return def next_label(self): count = self.label_count self.label_count += 1 return "LABEL" + str(count)
class CompilationEngine(object): def __init__(self, inFile): self.t = Tokenizer(inFile) self.symTable = SymbolTable() self.vmName = inFile.rstrip('.jack') + '.vm' self.vm = VMWriter(self.vmName) self.className = '' self.types = ['int', 'char', 'boolean', 'void'] self.stmnt = ['do', 'let', 'if', 'while', 'return'] self.subroutType = '' self.whileIndex = 0 self.ifIndex = 0 self.fieldNum = 0 def compile_class(self): self.t.advance() self.validator('class') self.className = self.t.current_token() self.t.advance() self.validator('{') self.fieldNum = self.compile_class_var_dec() while self.t.symbol() != '}': # subroutines self.compile_subroutine() self.validator('}') self.vm.close() return def compile_class_var_dec(self): varKeyWords = ['field', 'static'] name = '' kind = '' varType = '' counter = 0 while self.t.keyword() in varKeyWords: kind = self.t.current_token() self.validator(varKeyWords) # variable type varType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'IDENTIFIER']) name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() if kind == 'field': counter += 1 while self.t.symbol() != ';': # checks multiple vars self.validator(',') name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() if kind == 'field': counter += 1 self.validator(';') return counter def compile_subroutine(self): current_subrout_scope = self.symTable.subDict self.symTable.start_subroutine() subroutKword = self.t.current_token() self.validator(['constructor', 'function', 'method']) self.subroutType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) name = self.t.current_token() subroutName = self.className + '.' + name self.t.advance() self.validator('(') if subroutKword == 'method': self.compile_parameter_list(method=True) else: self.compile_parameter_list() self.validator(')') self.validator('{') if self.t.symbol() == '}': self.t.advance() return self.validator(['var', 'let', 'do', 'if', 'while', 'return'], advance=False) numLocals = 0 if self.t.keyword() == 'var': numLocals = self.compile_var_dec() self.vm.write_function(subroutName, numLocals) if subroutKword == 'constructor': self.vm.write_push('constant', self.fieldNum) self.vm.write_call('Memory.alloc', 1) self.vm.write_pop('pointer', 0) elif subroutKword == 'method': self.vm.write_push('argument', 0) self.vm.write_pop('pointer', 0) if self.t.keyword() in self.stmnt: self.compile_statements() self.validator('}') self.symTable.subDict = current_subrout_scope self.whileIndex = 0 self.ifIndex = 0 return def compile_parameter_list(self, method=False): name = '' varType = '' kind = '' counter = 0 if self.t.symbol() == ')': return counter varType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) kind = 'arg' name = self.t.current_token() if method: self.symTable.define(name, varType, kind, method=True) else: self.symTable.define(name, varType, kind) self.t.advance() counter += 1 while self.t.symbol() == ',': self.validator(',') self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) kind = 'arg' name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() counter += 1 return counter def compile_var_dec(self): name = '' kind = '' varType = '' counter = 0 while self.t.keyword() == 'var': # check multiple lines of var kind = 'var' self.t.advance() varType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() counter += 1 while self.t.symbol() == ',': # multiple varNames self.t.advance() name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() counter += 1 self.validator(';') return counter def compile_statements(self): while self.t.keyword() in self.stmnt: if self.t.keyword() == 'let': self.compile_let() elif self.t.keyword() == 'do': self.compile_do() elif self.t.keyword() == 'if': self.compile_if() elif self.t.keyword() == 'while': self.compile_while() elif self.t.keyword() == 'return': self.compile_return() else: raise Exception(self.t.current_token() + ' is not valid') return def compile_do(self): lookAhead = '' self.t.advance() # do lookAhead = self.t.tokens[self.t.tokenIndex + 1] if lookAhead == '(': # subroutineName(exprlist) subroutName = self.className + '.' + self.t.current_token() self.t.advance() self.validator('(') self.vm.write_push('pointer', 0) numArgs = self.compile_expression_list() self.vm.write_call(subroutName, numArgs + 1) # add 1 for 'this' self.validator(')') self.validator(';') self.vm.write_pop('temp', 0) # throws away returned value return else: className = self.t.current_token() self.t.advance() self.validator('.') # name.subroutine(exprList) subroutName = self.t.current_token() self.t.advance() self.validator('(') if self.symTable.kind_of(className) in [ 'this', 'static', 'local', 'argument' ]: # used 'this' for 'field' typeName = self.symTable.type_of(className) subroutName = typeName + '.' + subroutName segment = self.symTable.kind_of(className) index = self.symTable.index_of(className) self.vm.write_push(segment, index) numArgs = self.compile_expression_list() self.vm.write_call(subroutName, numArgs + 1) else: subroutName = className + '.' + subroutName numArgs = self.compile_expression_list() self.vm.write_call(subroutName, numArgs) self.validator(')') self.validator(';') self.vm.write_pop('temp', 0) return def compile_let(self): name = '' kind = '' array = False self.t.advance() # let while self.t.symbol() != ';': name = self.t.identifier() kind = self.symTable.kind_of(name) index = self.symTable.index_of(name) if name in self.symTable.classDict: self.t.advance() elif name in self.symTable.subDict: self.t.advance() else: raise Exception(self.t.identifier() + ' is not defined') if self.t.symbol() == '[': # array index array = True self.vm.write_push(kind, index) self.validator('[') self.compile_expression() self.validator(']') self.vm.write_arithmetic('+') self.validator('=') self.compile_expression() if array: self.vm.write_pop('temp', 0) self.vm.write_pop('pointer', 1) self.vm.write_push('temp', 0) self.vm.write_pop('that', 0) else: self.vm.write_pop(kind, index) self.validator(';') return def compile_while(self): currentWhile = 'WHILE' + str(self.whileIndex) self.vm.write_label(currentWhile) self.whileIndex += 1 self.t.advance() # while self.validator('(') self.compile_expression() self.vm.write_arithmetic('~') self.vm.write_if('END' + currentWhile) self.validator(')') self.validator('{') self.compile_statements() self.vm.write_goto(currentWhile) self.validator('}') self.vm.write_label('END' + currentWhile) return def compile_return(self): self.t.advance() # return if self.t.symbol() == ';': self.vm.write_push('constant', '0') self.vm.write_return() self.t.advance() else: self.compile_expression() self.validator(';') self.vm.write_return() return def compile_if(self): endIf = 'END_IF' + str(self.ifIndex) currentElse = 'IF_ELSE' + str(self.ifIndex) self.ifIndex += 1 self.t.advance() # if self.validator('(') self.compile_expression() self.vm.write_arithmetic('~') self.vm.write_if(currentElse) self.validator(')') self.validator('{') self.compile_statements() self.vm.write_goto(endIf) self.validator('}') self.vm.write_label(currentElse) if self.t.keyword() == 'else': self.t.advance() # else self.validator('{') self.compile_statements() self.validator('}') self.vm.write_label(endIf) return def compile_expression(self): op = ['+', '-', '*', '/', '&', '|', '<', '>', '='] self.compile_term() while self.t.symbol() in op: opToken = self.t.current_token() self.t.advance() self.compile_term() self.vm.write_arithmetic(opToken) return def compile_term(self): keyConst = ['true', 'false', 'null', 'this'] unOps = ['-', '~'] lookAhead = '' name = '' current_subrout_scope = '' if self.t.token_type() == 'INT_CONST': self.vm.write_push('constant', self.t.int_val()) self.t.advance() elif self.t.token_type() == 'STRING_CONST': string = self.t.string_val() length = len(string) self.vm.write_push('constant', length) self.vm.write_call('String.new', 1) for char in string: char = ord(char) # gives the ASCII number self.vm.write_push('constant', char) self.vm.write_call('String.appendChar', 2) self.t.advance() elif self.t.token_type() == 'KEYWORD': self.validator(keyConst, advance=False) if self.t.current_token() in ['false', 'null']: self.t.advance() self.vm.write_push('constant', '0') elif self.t.current_token() == 'true': self.vm.write_push('constant', '1') self.vm.write_arithmetic('-', neg=True) self.t.advance() else: self.vm.write_push('pointer', '0') self.t.advance() elif self.t.token_type() == 'SYMBOL': if self.t.symbol() in unOps: # unary operator unOpToken = self.t.current_token() self.t.advance() self.compile_term() self.vm.write_arithmetic(unOpToken, neg=True) elif self.t.symbol() == '(': # (expression)) self.t.advance() self.compile_expression() self.t.advance() else: raise Exception(self.t.current_token() + ' is not valid') elif self.t.token_type() == 'IDENTIFIER': # varName, array, or subcall lookAhead = self.t.tokens[self.t.tokenIndex + 1] if lookAhead == '[': # array item name = self.t.identifier() kind = self.symTable.kind_of(name) index = self.symTable.index_of(name) if name in self.symTable.classDict: self.t.advance() elif name in self.symTable.subDict: self.t.advance() else: raise Exception(self.t.identifier() + ' is not defined') self.vm.write_push(kind, index) self.validator('[') self.compile_expression() self.vm.write_arithmetic('+') self.vm.write_pop('pointer', 1) self.vm.write_push('that', 0) self.validator(']') elif lookAhead == '(': # subcall current_subrout_scope = self.symTable.subDict name = self.className + '.' + self.t.current_token() self.t.advance() self.validator('(') numArgs = self.compile_expression_list() self.vm.write_call(name, numArgs + 1) self.validator(')') self.symTable.subDict = current_subrout_scope elif lookAhead == '.': # name.subroutName(expressList) current_subrout_scope = self.symTable.subDict className = self.t.current_token() self.t.advance() self.validator('.') subroutName = self.t.current_token() self.validator('IDENTIFIER') name = className + '.' + subroutName self.validator('(') if self.symTable.kind_of(className) in [ 'this', 'static', 'local', 'argument' ]: # used 'this' for 'field' classType = self.symTable.type_of(className) name = classType + '.' + subroutName kind = self.symTable.kind_of(className) index = self.symTable.index_of(className) self.vm.write_push(kind, index) numArgs = self.compile_expression_list() self.vm.write_call(name, numArgs + 1) else: numArgs = self.compile_expression_list() self.vm.write_call(name, numArgs) self.validator(')') self.symTable.subDict = current_subrout_scope else: name = self.t.identifier() # varName kind = self.symTable.kind_of(name) index = self.symTable.index_of(name) self.vm.write_push(kind, index) self.t.advance() else: raise Exception(self.t.current_token() + ' is not valid') return def compile_expression_list(self): # only in subroutineCall counter = 0 if self.t.symbol() == ')': return counter else: self.compile_expression() counter += 1 while self.t.symbol() == ',': self.t.advance() self.compile_expression() counter += 1 return counter def validator(self, syntax, advance=True): tokenType = self.t.token_type() token = self.t.current_token() if advance: self.t.advance() if type(syntax) != list: syntax = [syntax] for item in syntax: if item in [tokenType, token]: return True raise Exception(self.t.current_token() + ' is not valid')
class CompilationEngine(): """ compila un archivo fuente jack desde un tokenizador jack en formato xml en output_file """ SYMBOL_KINDS = { 'parameter_list': 'argument', 'var_dec': 'local' } TOKENS_THAT_NEED_LABELS = ['if', 'while'] TERMINATING_TOKENS = { 'class': ['}'], 'class_var_dec': [';'], 'subroutine': ['}'], 'parameter_list': [')'], 'expression_list': [')'], 'statements': ['}'], 'do': [';'], 'let': [';'], 'while': ['}'], 'if': ['}'], 'var_dec': [';'], 'return': [';'], 'expression': [';', ')', ']', ','], 'array': [']'] } STARTING_TOKENS = { 'var_dec': ['var'], 'parameter_list': ['('], 'subroutine_body': ['{'], 'expression_list': ['('], 'expression': ['=', '[', '('], 'array': ['['], 'conditional': ['if', 'else'] } def __init__(self, tokenizer, output_file): self.tokenizer = tokenizer self.output_file = output_file self.class_symbol_table = SymbolTable() self.subroutine_symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.label_counter = LabelCounter(labels=self.TOKENS_THAT_NEED_LABELS) self.class_name = None def compile_class(self): """ lo basico pa compilar la clase """ # omitimos todo para comenzar la clase while not self.tokenizer.class_token_reached(): self.tokenizer.advance() # variable de instancia self.class_name = self.tokenizer.next_token.text while self.tokenizer.has_more_tokens: self.tokenizer.advance() if self.tokenizer.current_token.starts_class_var_dec(): self.compile_class_var_dec() elif self.tokenizer.current_token.starts_subroutine(): self.compile_subroutine() def compile_class_var_dec(self): symbol_kind = self.tokenizer.keyword() # obtenemos el tipo del simbolo self.tokenizer.advance() symbol_type = self.tokenizer.keyword() # obtenemos todos los identificadores while self._not_terminal_token_for('class_var_dec'): self.tokenizer.advance() if self.tokenizer.identifier(): # agregamos los simbolos de clase symbol_name = self.tokenizer.identifier() self.class_symbol_table.define( name=symbol_name, kind=symbol_kind, symbol_type=symbol_type ) def compile_subroutine(self): # nueva subrutina significa nuevo alcance self.subroutine_symbol_table.reset() # obtenemos el nombre de la subrutina self.tokenizer.advance() self.tokenizer.advance() subroutine_name = self.tokenizer.current_token.text # compilamos la lista de parametros self.tokenizer.advance() self.compile_parameter_list() # compilamos el cuerpo self.tokenizer.advance() self.compile_subroutine_body(subroutine_name=subroutine_name) # reset self.label_counter.reset_counts() def compile_subroutine_body(self, subroutine_name): # saltamos el inicio self.tokenizer.advance() # obtenemos todas las locales num_locals = 0 while self._starting_token_for('var_dec'): num_locals += self.compile_var_dec() self.tokenizer.advance() # escribimos el comando de funcion self.vm_writer.write_function( name='{}.{}'.format(self.class_name, subroutine_name), num_locals=num_locals ) # compilamos todas las declaraciones while self._not_terminal_token_for('subroutine'): self.compile_statements() def compile_parameter_list(self): # tabla de simbolos while self._not_terminal_token_for('parameter_list'): self.tokenizer.advance() if self.tokenizer.next_token.is_identifier(): symbol_kind = self.SYMBOL_KINDS['parameter_list'] symbol_type = self.tokenizer.current_token.text symbol_name = self.tokenizer.next_token.text self.subroutine_symbol_table.define( name=symbol_name, kind=symbol_kind, symbol_type=symbol_type ) def compile_var_dec(self): self.tokenizer.advance() symbol_type = self.tokenizer.current_token.text num_vars = 0 # obtenemos todas las variables while self._not_terminal_token_for('var_dec'): self.tokenizer.advance() if self.tokenizer.identifier(): num_vars += 1 symbol_kind = self.SYMBOL_KINDS['var_dec'] symbol_name = self.tokenizer.identifier() self.subroutine_symbol_table.define( name=symbol_name, kind=symbol_kind, symbol_type=symbol_type ) # return a las variables procesadas return num_vars def compile_statements(self): statement_compile_methods = { 'if': self.compile_if, 'do': self.compile_do, 'let': self.compile_let, 'while': self.compile_while, 'return': self.compile_return } while self._not_terminal_token_for('subroutine'): if self.tokenizer.current_token.is_statement_token(): statement_type = self.tokenizer.current_token.text statement_compile_methods[statement_type]() self.tokenizer.advance() def compile_do(self): self.tokenizer.advance() caller_name = self.tokenizer.current_token.text symbol = self._find_symbol_in_symbol_tables(symbol_name=caller_name) self.tokenizer.advance() self.tokenizer.advance() subroutine_name = self.tokenizer.current_token.text if symbol: segment = 'local' index = symbol['index'] symbol_type = symbol['type'] self.vm_writer.write_push(segment=segment, index=index) else: # es decir llamada al os symbol_type = caller_name subroutine_call_name = symbol_type + '.' + subroutine_name # iniciamos la lista de expresion self.tokenizer.advance() # obtenemos los argumentos en la lista de expresion num_args = self.compile_expression_list() # method call if symbol: # llamando al objeto pasado como un argumento implicito num_args += 1 # escribimos la llamada self.vm_writer.write_call(name=subroutine_call_name, num_args=num_args) self.vm_writer.write_pop(segment='temp', index='0') def compile_let(self): # obtener símbolo para almacenar evaluación de expresión self.tokenizer.advance() symbol_name = self.tokenizer.current_token.text symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name) array_assignment = self._starting_token_for(keyword_token='array', position='next') if array_assignment: # llegar a la expresión de índice self.tokenizer.advance() self.tokenizer.advance() # lo compilamos self.compile_expression() self.vm_writer.write_push(segment=symbol['kind'], index=symbol['index']) self.vm_writer.write_arithmetic(command='+') while not self.tokenizer.current_token.text == '=': self.tokenizer.advance() # compila todas las expresiones while self._not_terminal_token_for('let'): self.tokenizer.advance() self.compile_expression() if not array_assignment: # almacenar evaluación de expresión en la ubicación del símbolo self.vm_writer.write_pop(segment=symbol['kind'], index=symbol['index']) else: self.vm_writer.write_pop(segment='temp', index='0') self.vm_writer.write_pop(segment='pointer', index='1') self.vm_writer.write_push(segment='temp', index='0') self.vm_writer.write_pop(segment='that', index='0') def compile_while(self): # escribimos la etiqueta while self.vm_writer.write_label( label='WHILE_EXP{}'.format(self.label_counter.get('while')) ) # avanzar al inicio ( self.tokenizer.advance() self.tokenizer.advance() # compilamos la expresion dentro () self.compile_expression() # NOT expresión para manejar fácilmente la terminación y if-goto self.vm_writer.write_unary(command='~') self.vm_writer.write_ifgoto( label='WHILE_END{}'.format(self.label_counter.get('while')) ) while self._not_terminal_token_for('while'): self.tokenizer.advance() if self._statement_token(): self.compile_statements() # escribir el goto self.vm_writer.write_goto( label='WHILE_EXP{}'.format(self.label_counter.get('while')) ) # escribimos el fin de la etiqueta self.vm_writer.write_label( label='WHILE_END{}'.format(self.label_counter.get('while')) ) # agregar while al contador de etiquetas self.label_counter.increment('while') def compile_if(self): # avanzamos a la expresion start self.tokenizer.advance() self.tokenizer.advance() # compilamos dentro () self.compile_expression() self.vm_writer.write_ifgoto(label='IF_TRUE{}'.format(self.label_counter.get('if'))) self.vm_writer.write_goto(label='IF_FALSE{}'.format(self.label_counter.get('if'))) self.vm_writer.write_label(label='IF_TRUE{}'.format(self.label_counter.get('if'))) self.compile_conditional_body() if self._starting_token_for(keyword_token='conditional', position='next'): self.tokenizer.advance() self.vm_writer.write_goto( label='IF_END{}'.format(self.label_counter.get('if')) ) self.vm_writer.write_label( label='IF_FALSE{}'.format(self.label_counter.get('if')) ) self.compile_conditional_body() self.vm_writer.write_label( label='IF_END{}'.format(self.label_counter.get('if')) ) else: self.vm_writer.write_label( label='IF_FALSE{}'.format(self.label_counter.get('if')) ) def compile_conditional_body(self): while self._not_terminal_token_for('if'): self.tokenizer.advance() if self._statement_token(): if self.tokenizer.current_token.is_if(): self.label_counter.increment('if') self.compile_statements() self.label_counter.decrement('if') else: self.compile_statements() def compile_expression(self): """ many examples..i,e., x = 4 """ # las operaciones se compilan al final en orden inverso al que fueron agregadas ops = [] while self._not_terminal_token_for('expression'): if self._subroutine_call(): self.compile_subroutine_call() elif self._array_expression(): self.compile_array_expression() elif self.tokenizer.current_token.text.isdigit(): self.vm_writer.write_push( segment='constant', index=self.tokenizer.current_token.text ) elif self.tokenizer.identifier(): self.compile_symbol_push() elif self.tokenizer.current_token.is_operator() and not self._part_of_expression_list(): ops.insert(0, Operator(token=self.tokenizer.current_token.text, category='bi')) elif self.tokenizer.current_token.is_unary_operator(): ops.insert(0, Operator(token=self.tokenizer.current_token.text, category='unary')) elif self.tokenizer.string_const(): self.compile_string_const() elif self.tokenizer.boolean(): # caso booleano self.compile_boolean() elif self._starting_token_for('expression'): # expresión anidada # saltamos el inicial ( self.tokenizer.advance() self.compile_expression() elif self.tokenizer.null(): self.vm_writer.write_push(segment='constant', index=0) self.tokenizer.advance() for op in ops: self.compile_op(op) def compile_op(self, op): if op.unary(): self.vm_writer.write_unary(command=op.token) elif op.multiplication(): self.vm_writer.write_call(name='Math.multiply', num_args=2) elif op.division(): self.vm_writer.write_call(name='Math.divide', num_args=2) else: self.vm_writer.write_arithmetic(command=op.token) def compile_boolean(self): """ True o False """ self.vm_writer.write_push(segment='constant', index=0) if self.tokenizer.boolean() == 'true': self.vm_writer.write_unary(command='~') def compile_string_const(self): string_length = len(self.tokenizer.string_const()) self.vm_writer.write_push(segment='constant', index=string_length) self.vm_writer.write_call(name='String.new', num_args=1) # construir cadena a partir de caracteres for char in self.tokenizer.string_const(): if not char == self.tokenizer.STRING_CONST_DELIMITER: ascii_value_of_char = ord(char) self.vm_writer.write_push(segment='constant', index=ascii_value_of_char) self.vm_writer.write_call(name='String.appendChar', num_args=2) def compile_symbol_push(self): symbol = self._find_symbol_in_symbol_tables(symbol_name=self.tokenizer.identifier()) segment = symbol['kind'] index = symbol['index'] self.vm_writer.write_push(segment=segment, index=index) def compile_array_expression(self): symbol_name = self.tokenizer.current_token.text symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name) # llegar a la expresión de índice self.tokenizer.advance() self.tokenizer.advance() # compilamos self.compile_expression() self.vm_writer.write_push(segment='local', index=symbol['index']) # agregar dos direcciones: identificador y resultado de expresión self.vm_writer.write_arithmetic(command='+') self.vm_writer.write_pop(segment='pointer', index=1) # agreamos el valor a la pila self.vm_writer.write_push(segment='that', index=0) def compile_subroutine_call(self): """ example: Memory.peek(8000) """ subroutine_name = '' while not self._starting_token_for('expression_list'): subroutine_name += self.tokenizer.current_token.text self.tokenizer.advance() # obtenemos el numero de argumentos num_args = self.compile_expression_list() # después de enviar argumentos a la pila self.vm_writer.write_call(name=subroutine_name, num_args=num_args) def compile_expression_list(self): num_args = 0 if self._empty_expression_list(): return num_args # iniciamos las expresiones self.tokenizer.advance() while self._not_terminal_token_for('expression_list'): num_args += 1 self.compile_expression() if self._another_expression_coming(): self.tokenizer.advance() return num_args def compile_return(self): if self._not_terminal_token_for(keyword_token='return', position='next'): self.compile_expression() else: self.vm_writer.write_push(segment='constant', index='0') self.tokenizer.advance() self.vm_writer.write_return() def _not_terminal_token_for(self, keyword_token, position='current'): if position == 'current': return not self.tokenizer.current_token.text in self.TERMINATING_TOKENS[keyword_token] elif position == 'next': return not self.tokenizer.next_token.text in self.TERMINATING_TOKENS[keyword_token] def _starting_token_for(self, keyword_token, position='current'): if position == 'current': return self.tokenizer.current_token.text in self.STARTING_TOKENS[keyword_token] elif position == 'next': return self.tokenizer.next_token.text in self.STARTING_TOKENS[keyword_token] def _statement_token(self): return self.tokenizer.current_token.is_statement_token() def _another_expression_coming(self): return self.tokenizer.current_token.is_expression_list_delimiter() def _find_symbol_in_symbol_tables(self, symbol_name): if self.subroutine_symbol_table.find_symbol_by_name(symbol_name): return self.subroutine_symbol_table.find_symbol_by_name(symbol_name) elif self.class_symbol_table.find_symbol_by_name(symbol_name): return self.class_symbol_table.find_symbol_by_name(symbol_name) def _empty_expression_list(self): return self._start_of_expression_list() and self._next_ends_expression_list() def _start_of_expression_list(self): return self.tokenizer.current_token.text in self.STARTING_TOKENS['expression_list'] def _next_ends_expression_list(self): return self.tokenizer.next_token.text in self.TERMINATING_TOKENS['expression_list'] def _subroutine_call(self): return self.tokenizer.identifier() and self.tokenizer.next_token.is_subroutine_call_delimiter() def _array_expression(self): return self.tokenizer.identifier() and self._starting_token_for(keyword_token='array', position='next') def _part_of_expression_list(self): return self.tokenizer.part_of_expression_list()
def compileTerm(self, operation=None): def get_condition(): res_list = [] for k in KEYWORD_CONSTANTS: res_list.append(self.words_exist([k])) res = False for r in res_list: res = res or r return res self.open_tag('term') if self.words_exist(['integerConstant']) or self.words_exist(['stringConstant']) or get_condition(): self.format_and_write_line() if self.vm: value = self.get_xml_value() if value == 'true': value = '1' # this might have consequence. PLUM operation = 'neg' elif value == 'false' or value == 'null': value = 0 self.compiled.write( VMWriter.write_push('constant', value) ) if operation: self.compiled.write( VMWriter.write_arithmetic(operation) ) self.advance() elif self.words_exist(['identifier']): name = self.get_xml_value() kind = self.SYMBOL_TABLE.kind_of(name) index = self.SYMBOL_TABLE.index_of(name) self.format_and_write_line({'category': None, 'defined':False, 'kind':kind, 'index':index}) self.advance() # THIS ONLY WORKS FOR SIMPLE IDENTIFIERS, should refactor for indexing arrays KIND_LOOKUP = {'static': 'static', 'field': 'this', 'arg': 'argument', 'var': 'local'} if kind is not None: self.compiled.write( VMWriter.write_push(KIND_LOOKUP[kind], index) ) # if there is a [ next if self.words_exist(['symbol', '[']): self.format_and_write_line() self.advance() self.compileExpression() if self.words_exist(['symbol', ']']): self.format_and_write_line() self.advance() else: raise # if there is a ( next subroutine call, it will leave its value on the stack elif self.words_exist(['(']) or self.words_exist(['.']): self.compileSubroutineCall(identifier_compiled=True, identifier=name) elif self.words_exist(['(', 'symbol']): self.format_and_write_line() self.advance() self.compileExpression() if self.words_exist([')', 'symbol']): self.format_and_write_line() self.advance() else: raise elif self.words_exist(['-']) or self.words_exist(['~']): if self.words_exist(['-']): operation = 'neg' else: operation = '~' self.format_and_write_line() self.advance() self.compileTerm(operation=operation) else: raise self.close_tag('term')
class CompilationEngine: def __init__(self, inpath, outpath): self.tokenizer = Tokenizer(inpath) self.symboltable = SymbolTable() self.vmwriter = VMWriter(outpath) self._class_name = None if self.tokenizer.has_more_tokens(): self.compile_class() self.vmwriter.close() print("{0} completed.".format(outpath)) def _subroutine_init(self): self._sub_kind = None self._sub_name = None self._ret_type = None def _advance(self): self._check_EOF() self.tokenizer.advance() @property def _current_token(self): t_type = self.tokenizer.token_type return (self.tokenizer.keyword if t_type == T_KEYWORD else self.tokenizer.symbol if t_type == T_SYMBOL else self.tokenizer.identifier if t_type == T_ID else self.tokenizer .intval if t_type == T_INTEGER else self.tokenizer.stringval) @property def _current_tok_type(self): return self.tokenizer.token_type @property def _current_tok_tag(self): return token_tags[self._current_tok_type] @property def _next_token(self): """return raw next_token in the tokenizer""" return str(self.tokenizer.next_token) def _require_token(self, tok_type, token=None): """Check whether the next_token(terminal) in the tokenizer meets the requirement (specific token or just token type). If meets, tokenizer advances (update current_token and next_token) and terminal will be writed into outfile; If not, report an error.""" self._advance() if token and self._current_token != token: return self._error(expect_toks=(token, )) elif self._current_tok_type != tok_type: return self._error(expect_types=(tok_type, )) def _require_id(self): self._require_token(T_ID) def _require_kw(self, token): return self._require_token(T_KEYWORD, token=token) def _require_sym(self, token): return self._require_token(T_SYMBOL, token=token) def _require_brackets(self, brackets, procedure): front, back = brackets self._require_sym(front) procedure() self._require_sym(back) def _fol_by_class_vardec(self): return self._next_token in (KW_STATIC, KW_FIELD) def _fol_by_subroutine(self): return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _fol_by_vardec(self): return self._next_token == KW_VAR ######################### # structure compilation # ######################### def compile_class_name(self): self._require_id() self._class_name = self._current_token def compile_subroutine_name(self): self._require_id() self._sub_name = self._current_token def compile_var_name(self, kind=None, type=None, declare=False): self._require_id() name = self._current_token if declare is True: # kind and type are not None self.symboltable.define(name, type, kind) else: self.check_var_name(name, type) def check_var_name(self, name, type=None): recorded_kind = self.symboltable.kindof(name) if recorded_kind is None: self._traceback('name used before declared: {0}'.format(name)) elif type is not None: recorded_type = self.symboltable.typeof(name) if recorded_type != type: get = '{0} "{1}"'.format(recorded_type, name) self._error(expect_types=(type, ), get=get) def compile_type(self, advanced=False, expect='type'): # int, string, boolean or identifier(className) if advanced is False: self._advance() if (self._current_token not in SymbolTable.builtIn_types and self._current_tok_type != T_ID): return self._error(expect=expect) def compile_return_type(self): # void or type self._advance() if self._current_token != KW_VOID: self.compile_type(True, '"void" or type') self._ret_type = self._current_token if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name: me = 'constructor expect current class as return type' self._traceback(me) @record_non_terminal('class') def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' self._require_kw(KW_CLASS) self.compile_class_name() self._require_sym('{') while self._fol_by_class_vardec(): self.compile_class_vardec() while self._fol_by_subroutine(): self.compile_subroutine() self._advance() if self._current_token != '}': self._traceback("Except classVarDec first, subroutineDec second.") if self.tokenizer.has_more_tokens(): if self._next_token == KW_CLASS: self._traceback('Only expect one classDec.') self._traceback('Unexpected extra tokens.') def compile_declare(self): self._advance() id_kind = self._current_token # ('static | field | var') # type varName (',' varName)* ';' self.compile_type() id_type = self._current_token self.compile_var_name(id_kind, id_type, declare=True) # compile ',' or ';' self._advance() while self._current_token == ',': self.compile_var_name(id_kind, id_type, declare=True) self._advance() if self._current_token != ';': return self._error((',', ';')) @record_non_terminal('classVarDec') def compile_class_vardec(self): # ('static|field') type varName (',' varName)* ';' self.compile_declare() @record_non_terminal('subroutineDec') def compile_subroutine(self): # ('constructor'|'function'|'method') # ('void'|type) subroutineName '(' parameterList ')' subroutineBody self._subroutine_init() self.symboltable.start_subroutine() self._advance() self._sub_kind = self._current_token if self._sub_kind == KW_METHOD: self.symboltable.define('this', self._class_name, 'argument') self.compile_return_type() self.compile_subroutine_name() self._require_brackets('()', self.compile_parameter_list) self.compile_subroutine_body() @record_non_terminal('parameterList') def compile_parameter_list(self): # ((type varName) (',' type varName)*)? if self._next_token == ')': return self.compile_type() self.compile_var_name('argument', self._current_token, True) while self._next_token != ')': self._require_sym(',') self.compile_type() self.compile_var_name('argument', self._current_token, True) @record_non_terminal('subroutineBody') def compile_subroutine_body(self): # '{' varDec* statements '}' self._require_sym('{') while self._fol_by_vardec(): self.compile_vardec() self.compile_function() self.compile_statements() self._require_sym('}') def compile_function(self): fn_name = '.'.join((self._class_name, self._sub_name)) num_locals = self.symboltable.varcount(KW_VAR) self.vmwriter.write_function(fn_name, num_locals) # function fn_name num_locals # set up pointer this if self._sub_kind == KW_CONSTRUCTOR: num_fields = self.symboltable.varcount(KW_FIELD) self.vmwriter.write_push('constant', num_fields) self.vmwriter.write_call('Memory.alloc', 1) self.vmwriter.write_pop('pointer', 0) elif self._sub_kind == KW_METHOD: self.vmwriter.write_push('argument', 0) self.vmwriter.write_pop('pointer', 0) @record_non_terminal('varDec') def compile_vardec(self): # 'var' type varName (',' varName)* ';' self.compile_declare() ######################### # statement compilation # ######################### @record_non_terminal('statements') def compile_statements(self): # (letStatement | ifStatement | whileStatement | doStatement | # returnStatement)* last_statement = None while self._next_token != '}': self._advance() last_statement = self._current_token if last_statement == 'do': self.compile_do() elif last_statement == 'let': self.compile_let() elif last_statement == 'while': self.compile_while() elif last_statement == 'return': self.compile_return() elif last_statement == 'if': self.compile_if() else: return self._error(expect='statement expression') #if STACK[-2] == 'subroutineBody' and last_statement != 'return': # self._error(expect='return statement', get=last_statement) @record_non_terminal('doStatement') def compile_do(self): # 'do' subroutineCall ';' self._advance() self.compile_subroutine_call() self.vmwriter.write_pop('temp', 0) # temp[0] store useless value self._require_sym(';') @record_non_terminal('letStatement') def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' self.compile_var_name() var_name = self._current_token array = (self._next_token == '[') if array: self.compile_array_subscript( var_name) # push (array base + subscript) self._require_sym('=') self.compile_expression() # push expression value self._require_sym(';') if array: self.vmwriter.write_pop('temp', 1) # pop exp value to temp[1] self.vmwriter.write_pop('pointer', 1) # that = array base + subscript self.vmwriter.write_push('temp', 1) self.vmwriter.write_pop('that', 0) else: self.assign_variable(var_name) kind_segment = { 'static': 'static', 'field': 'this', 'argument': 'argument', 'var': 'local' } def assign_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_pop(self.kind_segment[kind], index) def load_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_push(self.kind_segment[kind], index) label_num = 0 @record_non_terminal('whileStatement') def compile_while(self): # 'while' '(' expression ')' '{' statements '}' start_label = 'WHILE_START_' + str(self.label_num) end_label = 'WHILE_END_' + str(self.label_num) self.label_num += 1 self.vmwriter.write_label(start_label) self.compile_cond_expression(start_label, end_label) @record_non_terminal('ifStatement') def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? else_label = 'IF_ELSE_' + str(self.label_num) end_label = 'IF_END_' + str(self.label_num) self.label_num += 1 self.compile_cond_expression(end_label, else_label) # else clause if self._next_token == KW_ELSE: self._require_kw(KW_ELSE) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_label(end_label) def compile_cond_expression(self, goto_label, end_label): self._require_brackets('()', self.compile_expression) self.vmwriter.write_arithmetic('not') self.vmwriter.write_if(end_label) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_goto(goto_label) # meet self.vmwriter.write_label(end_label) @record_non_terminal('returnStatement') def compile_return(self): # 'return' expression? ';' if self._sub_kind == KW_CONSTRUCTOR: self._require_kw(KW_THIS) # constructor must return 'this' self.vmwriter.write_push('pointer', 0) elif self._next_token != ';': self.compile_expression() else: if self._ret_type != KW_VOID: self._traceback('expect return ' + self._ret_type) self.vmwriter.write_push('constant', 0) self._require_sym(';') self.vmwriter.write_return() ########################## # expression compilation # ########################## unary_ops = {'-': 'neg', '~': 'not'} binary_ops = { '+': 'add', '-': 'sub', '*': None, '/': None, '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq' } @record_non_terminal('expression') def compile_expression(self): # term (op term)* self.compile_term() while self._next_token in self.binary_ops: self._advance() if self._current_tok_type != T_SYMBOL: self._error(expect_types=(T_SYMBOL, )) op = self._current_token self.compile_term() self.compile_binaryop(op) def compile_binaryop(self, op): if op == '*': self.vmwriter.write_call('Math.multiply', 2) elif op == '/': self.vmwriter.write_call('Math.divide', 2) else: self.vmwriter.write_arithmetic(self.binary_ops[op]) kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) @record_non_terminal('term') def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term if self._next_token == '(': self._require_brackets('()', self.compile_expression) else: self._advance() tok = self._current_token tok_type = self._current_tok_type if tok_type == T_KEYWORD and tok in self.kw_consts: self.compile_kw_consts(tok) elif tok_type == T_INTEGER: self.vmwriter.write_push('constant', tok) elif tok_type == T_STRING: self.compile_string(tok) elif tok_type == T_ID: if self._next_token in '(.': self.compile_subroutine_call() elif self._next_token == '[': self.check_var_name(tok) self.compile_array_subscript(tok) self.vmwriter.write_pop('pointer', 1) self.vmwriter.write_push('that', 0) else: self.check_var_name(tok) self.load_variable(tok) elif tok_type == T_SYMBOL and tok in self.unary_ops: self.compile_term() self.vmwriter.write_arithmetic(self.unary_ops[tok]) else: self._error(expect='term') # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kw_consts(self, kw): if kw == KW_THIS: self.vmwriter.write_push('pointer', 0) elif kw == KW_TRUE: self.vmwriter.write_push('constant', 1) self.vmwriter.write_arithmetic('neg') else: self.vmwriter.write_push('constant', 0) def compile_string(self, string): self.vmwriter.write_push('constant', len(string)) self.vmwriter.write_call('String.new', 1) for char in string: self.vmwriter.write_push('constant', ord(char)) self.vmwriter.write_call('String.appendChar', 2) def compile_subroutine_call(self): # subroutineName '(' expressionList ')' | # (className | varName) '.' subroutineName '(' expressionList ')' ## the first element of structure has already been compiled. fn_name, num_args = self.compile_call_name() self._require_sym('(') num_args = self.compile_expressionlist(num_args) self._require_sym(')') self.vmwriter.write_call(fn_name, num_args) def compile_call_name(self): # the fisrt name of subroutine call could be (className or varName) if # it is followed by '.', or subroutineName if followed by '('. # return name of function call and num_args (1: means pushing this, 0: # means don't) if self._current_tok_type != T_ID: self._error(expect_types=(T_ID, )) name = self._current_token if self._next_token == '.': self._require_sym('.') self.compile_subroutine_name() sub_name = self._current_token if (name in self.symboltable.all_class_types() or name in SymbolTable.builtIn_class or name == self._class_name): return '.'.join((name, sub_name)), 0 # className else: self.check_var_name(name) # varName with class type type = self.symboltable.typeof(name) if type in SymbolTable.builtIn_types: return self._error(expect='class instance or class', get=type) self.load_variable(name) return '.'.join((type, sub_name)), 1 elif self._next_token == '(': self.vmwriter.write_push('pointer', 0) # push this to be 1st arg return '.'.join((self._class_name, name)), 1 # subroutineName @record_non_terminal('expressionList') def compile_expressionlist(self, num_args): # (expression (',' expression)*)? if self._next_token != ')': self.compile_expression() num_args += 1 while self._next_token != ')': self._require_sym(',') self.compile_expression() num_args += 1 return num_args def compile_array_subscript(self, var_name): # varName '[' expression ']' self.check_var_name(var_name, 'Array') self._require_brackets( '[]', self.compile_expression) # push expression value self.load_variable(var_name) self.vmwriter.write_arithmetic('add') # base + subscript def _check_EOF(self): if not self.tokenizer.has_more_tokens(): self._traceback("Unexpected EOF.") def _error(self, expect_toks=(), expect_types=(), expect=None, get=None): if expect is None: exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks)) exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types) exp_type = ' or '.join(exp_types) if exp_tok and exp_type: expect = ' or '.join(exp_tok, exp_type) else: expect = exp_tok + exp_type if get is None: get = self._current_token me = 'Expect {0} but get "{1}"'.format(expect, get) return self._traceback(me) def _traceback(self, message): if DEBUG: print('--------------------------------------------') print(self.symboltable) print(self.symboltable.all_class_types()) print('--------------------------------------------') file_info = 'file: "{0}"'.format(self.tokenizer.filename) line_info = 'line {0}'.format(self.tokenizer.line_count) raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))
class CompilationEngine: XML_LINE = "<{0}> {1} </{0}>\n" COMPARE_SYM_REPLACER = { '<': "<", '>': ">", '"': """, '&': "&" } KEYWORD_CONSTANT = ("true", "false", "null", "this") def __init__(self, input_stream, output_stream): """ constructor of the Compilation Engine object :param input_stream: the input stream :param output_stream: the output stream """ self.__tokenizer = Tokenizer(input_stream) # Tokenizer object self.__output = VMWriter(output_stream) self.__symbol = SymbolTable() self.__class_name = "" self.__statements = { "let": self.compile_let, "if": self.compile_if, "while": self.compile_while, "do": self.compile_do, "return": self.compile_return } self.compile_class() # self.__output.close() def write_xml(self): """ writing xml line """ if self.__tokenizer.token_type() == "stringConstant": self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), self.__tokenizer.string_val())) elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER: xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()] self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), xml_val)) else: self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), self.__tokenizer.get_value())) def compile_class(self): """ compiling the program from the class definition """ # self.__output.write("<class>\n") # self.write_xml() self.__tokenizer.advance() # skip "class" self.__class_name = self.__tokenizer.get_value() # self.write_xml() self.__tokenizer.advance() # skip class name # self.write_xml() self.__tokenizer.advance() # skip { current_token = self.__tokenizer.get_value() while current_token == "static" or current_token == "field": self.compile_class_var_dec() current_token = self.__tokenizer.get_value() while current_token == "constructor" or current_token == "function" or current_token == "method": self.compile_subroutine_dec() current_token = self.__tokenizer.get_value() # self.write_xml() # self.__output.write("</class>\n") self.__output.close() def compile_class_var_dec(self): """ compiling the program from the class's declaration on vars """ current_token = self.__tokenizer.get_value() while current_token == "static" or current_token == "field": # self.__output.write("<classVarDec>\n") # self.write_xml() index = self.__symbol.var_count(current_token) self.__tokenizer.advance() # get token type token_type = self.__tokenizer.get_value() self.__output.write_push(current_token, index) self.__tokenizer.advance() # get token name token_name = self.__tokenizer.get_value() self.__symbol.define(token_name, token_type, current_token) self.__tokenizer.advance() # self.write_xml() # self.__tokenizer.advance() # self.write_xml() # self.__tokenizer.advance() while self.__tokenizer.get_value() == ",": # self.write_xml() # write , self.__tokenizer.advance() # get token name token_name = self.__tokenizer.get_value() index = self.__symbol.var_count(current_token) # get new index self.__output.write_push(current_token, index) self.__symbol.define(token_name, token_type, current_token) self.__tokenizer.advance() # self.write_xml() # write value # self.__tokenizer.advance() # self.write_xml() self.__tokenizer.advance() current_token = self.__tokenizer.get_value() # self.__output.write("</classVarDec>\n") def compile_subroutine_body(self): """ compiling the program's subroutine body """ # self.__output.write("<subroutineBody>\n") # self.write_xml() # write { self.__tokenizer.advance() # skip { while self.__tokenizer.get_value() == "var": self.compile_var_dec() self.compile_statements() # self.write_xml() # write } self.__tokenizer.advance() # skip } # self.__output.write("</subroutineBody>\n") def compile_subroutine_dec(self): """ compiling the program's subroutine declaration """ # self.__output.write("<subroutineDec>\n") # self.write_xml() # write constructor/function/method self.__tokenizer.advance() # skip constructor/function/method return_value = self.__tokenizer.get_value() self.__tokenizer.advance() func_name = self.__tokenizer.get_value() self.__tokenizer.advance() func_args = self.compile_parameter_list() self.__output.write_function(func_name, func_args) self.compile_subroutine_body() if return_value == "void": self.__output.write_pop("temp", "0") # self.__output.write("</subroutineDec>\n") def compile_parameter_list(self): """ compiling a parameter list """ # todo returns the number og args ! # self.write_xml() # write ( counter = 0 self.__tokenizer.advance() # skip ( # self.__output.write("<parameterList>\n") if self.__tokenizer.get_value() != ")": # self.write_xml() # write type self.__tokenizer.advance() # skip type # self.write_xml() # write varName self.__tokenizer.advance() # skip var name counter += 1 while self.__tokenizer.get_value() == ",": # self.write_xml() # write , self.__tokenizer.advance() # skip , # self.write_xml() # type self.__tokenizer.advance() # skip type # self.write_xml() # varName self.__tokenizer.advance() # skip varName counter += 1 # self.__output.write("</parameterList>\n") # self.write_xml() # write ) self.__tokenizer.advance() return counter def compile_var_dec(self): """ compiling function's var declaration """ # self.__output.write("<varDec>\n") # self.write_xml() # write var token_kind = self.__tokenizer.get_value() self.__tokenizer.advance() # self.write_xml() # write type token_type = self.__tokenizer.get_value() self.__tokenizer.advance() # self.write_xml() # write varName token_name = self.__tokenizer.get_value() self.__tokenizer.advance() index = self.__symbol.var_count(token_kind) self.__output.write_push(token_kind, index) self.__symbol.define(token_name, token_type, token_kind) while self.__tokenizer.get_value() == ",": # self.write_xml() # write , self.__tokenizer.advance() # skip , # self.write_xml() token_name = self.__tokenizer.get_value() index = self.__symbol.var_count(token_kind) self.__output.write_push(token_kind, index) self.__symbol.define(token_name, token_type, token_kind) self.__tokenizer.advance() # self.write_xml() # write ; self.__tokenizer.advance() # skip ; # self.__output.write("</varDec>\n") def compile_statements(self): """ compiling statements """ key = self.__tokenizer.get_value() # self.__output.write("<statements>\n") if key != "}": while key in self.__statements: self.__statements[self.__tokenizer.get_value()]() key = self.__tokenizer.get_value() # self.__output.write("</statements>\n") def compile_do(self): """ compiling do call """ # self.__output.write("<doStatement>\n") # self.write_xml() # write do self.__tokenizer.advance() # skip do self.subroutine_call() # self.write_xml() # write ; self.__tokenizer.advance() # skip ; # self.__output.write("</doStatement>\n") def compile_let(self): """ compiling let call """ # self.__output.write("<letStatement>\n") # self.write_xml() # write let self.__tokenizer.advance() # skip let # self.write_xml() # write varName var_name = self.__tokenizer.get_value() self.__tokenizer.advance() # if self.__tokenizer.get_value() == "[": # todo handle array # self.write_xml() # write [ # self.__tokenizer.advance() # self.compile_expression() # self.write_xml() # write ] # self.__tokenizer.advance() # self.write_xml() # write = self.__tokenizer.advance() # skip = self.compile_expression() # todo push the value to the stack # self.write_xml() # write ; self.__tokenizer.advance() # skip ; # self.__output.write("</letStatement>\n") var_kind = self.__symbol.kind_of(var_name) var_index = self.__symbol.index_of(var_name) self.__output.write_pop(var_kind, var_index) def compile_while(self): """ compiling while loop call """ self.__output.write("<whileStatement>\n") self.write_xml() # write while self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</whileStatement>\n") def compile_return(self): """ compiling return statement """ self.__output.write("<returnStatement>\n") self.write_xml() # write return self.__tokenizer.advance() if self.__tokenizer.get_value() != ";": self.compile_expression() self.write_xml() # write ; self.__tokenizer.advance() self.__output.write("</returnStatement>\n") def compile_if(self): """ compiling if condition """ self.__output.write("<ifStatement>\n") self.write_xml() # write if self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() if self.__tokenizer.get_value() == "else": self.write_xml() # write else self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</ifStatement>\n") def compile_expression(self): """ compiling expressions """ self.__output.write("<expression>\n") self.compile_term() while self.__tokenizer.is_operator(): self.write_xml() # write the operator self.__tokenizer.advance() self.compile_term() self.__output.write("</expression>\n") def compile_term(self): """ compiling any kind of terms """ # dealing with unknown token self.__output.write("<term>\n") curr_type = self.__tokenizer.token_type() # handle consts if curr_type == "integerConstant" or curr_type == "stringConstant": self.write_xml() # write the int \ string self.__tokenizer.advance() # handle const keyword elif curr_type == "keyword" and self.__tokenizer.get_value( ) in self.KEYWORD_CONSTANT: self.__tokenizer.set_type("keywordConstant") self.write_xml() # write key word self.__tokenizer.advance() elif curr_type == "identifier": # handle var names if self.__tokenizer.get_next_token( ) != "(" and self.__tokenizer.get_next_token() != ".": self.write_xml() # write the var name self.__tokenizer.advance() if self.__tokenizer.get_value() == "[": self.write_xml() # write [ self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ] self.__tokenizer.advance() # handle function calls else: self.subroutine_call() # handle expression elif curr_type == "symbol" and self.__tokenizer.get_value() == "(": self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() # handle - \ ~ elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value( ) == "~": self.write_xml() # write -\~ self.__tokenizer.advance() self.compile_term() self.__output.write("</term>\n") def subroutine_call(self): """ compiling the program's subroutine call """ if self.__tokenizer.get_next_token() == ".": self.write_xml() # write name self.__tokenizer.advance() self.write_xml() # write . self.__tokenizer.advance() self.write_xml() # write name self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression_list() self.write_xml() # write ) self.__tokenizer.advance() def compile_expression_list(self): """ compiling expression list """ self.__output.write("<expressionList>\n") if self.__tokenizer.get_value() != ")": self.compile_expression() while self.__tokenizer.get_value() == ",": self.write_xml() # write , self.__tokenizer.advance() self.compile_expression() self.__output.write("</expressionList>\n")
class CompilationEngine: DEBUG = False translate_dict = { '+': 'add', '-': 'sub', '=': 'eq', '>': 'gt', '<': 'lt', '&': 'and', '|': 'or', 'unary-': 'neg', 'unary~': 'not', 'argument': 'argument', 'static': 'static', 'var': 'local', 'field': 'this', '*': 'Math.multiply', '/': 'Math.divide' } def __init__(self, input: JackTokenizer, output_file_path): self.tokenizer = input self.vmwriter = VMWriter(output_file_path) self.symbol_table = SymbolTable() self.label_index = 0 self.curr_token = '' self.curr_token_type = '' self.depth = 0 def compile_class(self): self.print_open('compile_class') self.__next_token() # class self.__next_token() # className self.class_name = self.curr_token self.__next_token() # { self.__next_token() while self.curr_token == 'static' or self.curr_token == 'field': self.compile_class_var_dec() while self.curr_token == 'constructor' or self.curr_token == 'function' or self.curr_token == 'method': self.compile_subroutine_dec() self.__next_token() # after } self.vmwriter.close() self.print_close('compile_class_end') def compile_class_var_dec(self): self.print_open('compile_class_var_dec') kind = self.curr_token # (static|field) self.__next_token() var_type = self.curr_token # type self.__next_token() var_name = self.curr_token # varName self.symbol_table.define(var_name, var_type, self.translate_dict[kind]) self.__next_token() # , or ; while (self.curr_token != ';'): self.__next_token() var_name = self.curr_token # varName self.symbol_table.define(var_name, var_type, self.translate_dict[kind]) self.__next_token() # , or ; self.__next_token() # after ; self.print_close('compile_class_var_dec_end') def compile_subroutine_dec(self): self.print_open('compile_subroutine_dec') self.symbol_table.start_subroutine() kind = self.curr_token # (constructor|function|method) self.__next_token() var_type = self.curr_token # (void|type) self.__next_token() subroutine_name = self.curr_token # subroutineName self.__next_token() # '(' if kind == 'method': self.symbol_table.define('this', self.class_name, 'argument') self.__next_token() self.compile_parameter_list() self.__next_token() # after ')' self.compile_subroutine_body(kind, var_type, subroutine_name) self.print_close('compile_subroutine_dec_end') def compile_parameter_list(self): self.print_open('compile_parameter_list') while self.curr_token != ')': if self.curr_token == ',': self.__next_token() var_type = self.curr_token # type self.__next_token() var_name = self.curr_token # varName self.symbol_table.define(var_name, var_type, 'argument') self.__next_token() self.print_close('compile_parameter_list_end') def compile_subroutine_body(self, kind, var_type, subroutine_name): self.print_open('compile_subroutine_body') self.__next_token() # after '{' while self.curr_token == 'var': self.compile_var_dec() self.vmwriter.write_function(self.class_name + '.' + subroutine_name, self.symbol_table.var_count('local')) if kind == 'method': self.vmwriter.write_push('argument', 0) self.vmwriter.write_pop('pointer', 0) elif kind == 'constructor': self.vmwriter.write_push('constant', self.symbol_table.var_count('this')) self.vmwriter.write_call('Memory.alloc', 1) self.vmwriter.write_pop('pointer', 0) self.compile_statements() self.__next_token() # after '}' self.print_close('compile_subroutine_body_end') def compile_var_dec(self): self.print_open('compile_var_dec') # curr token is var self.__next_token() var_type = self.curr_token # type self.__next_token() var_name = self.curr_token # varName self.symbol_table.define(var_name, var_type, 'local') self.__next_token() # , or ; while self.curr_token != ';': self.__next_token() self.symbol_table.define(self.curr_token, type, 'local') self.__next_token() self.__next_token() # after ; self.print_close('compile_var_dec_end') def compile_statements(self): self.print_open('compile_statements') while True: if self.curr_token == 'let': self.compile_let() elif self.curr_token == 'if': self.compile_if() elif self.curr_token == 'while': self.compile_while() elif self.curr_token == 'do ': self.compile_do() elif self.curr_token == 'return': self.compile_return() else: break self.print_close('compile_statements_end') def compile_let(self): self.print_open('compile_let') # curr_token is let self.__next_token() var_name = self.curr_token # varName kind = self.symbol_table.kind_of(var_name) index = self.symbol_table.index_of(var_name) self.__next_token() if self.curr_token == '[': # push arr self.vmwriter.write_push(kind, index) # VM code for computing and pushing the value of expression1 self.__next_token() self.compile_expression() self.__next_token() # add self.vmwriter.write_arithmetic('add') # VM code for computing and pushing the value of expression2 self.__next_token() # after = self.compile_expression() self.__next_token() # after ; # pop temp 0 self.vmwriter.write_pop('temp', 0) # pop pointer 1 self.vmwriter.write_pop('pointer', 1) # push temp 0 self.vmwriter.write_push('temp', 0) # pop that 0 self.vmwriter.write_pop('that', 0) else: self.__next_token() # after = self.compile_expression() self.__next_token() # after ; self.vmwriter.write_pop(kind, index) self.print_close('compile_let_end') def compile_if(self): self.print_open('compile_if') # curr_token is if index_l = self.__next_label_index() self.__next_token() # ( self.__next_token() # after ( self.compile_expression() self.vmwriter.write_arithmetic('not') self.__next_token() # ) --> { self.__next_token() # { --> ? self.vmwriter.write_if('L1' + str(index_l)) self.compile_statements() self.vmwriter.write_go_to('L2' + str(index_l)) self.__next_token() # } --> ? self.vmwriter.write_label('L1' + str(index_l)) if self.curr_token == 'else': self.__next_token() # else --> { self.__next_token() # { --> ? self.compile_statements() self.__next_token() # } --> ? self.vmwriter.write_label('L2' + str(index_l)) self.print_close('compile_if_end') def compile_while(self): self.print_open('compile_while') # curr_token is while index = self.__next_label_index() self.vmwriter.write_label('L1' + str(index)) self.__next_token() # while --> ( self.__next_token() # ( --> ? self.compile_expression() self.__next_token() # ) --> { self.vmwriter.write_arithmetic('not') self.vmwriter.write_if('L2' + str(index)) self.__next_token() # { --> ? self.compile_statements() self.__next_token() # } --> ? self.vmwriter.write_go_to('L1' + str(index)) self.vmwriter.write_label('L2' + str(index)) self.print_close('compile_while_end') def compile_do(self): self.print_open('compile do') # curr_token is do self.__next_token() # do --> (subroutineName | className | varName) self.subroutine_call() self.vmwriter.write_pop('temp', 0) # because of void call self.__next_token() # ; --> ? self.print_close('compile do_end') def subroutine_call(self, skipped=False, arg_name=''): self.print_open('subroutine_call') name = '' if skipped: name = arg_name else: name = self.curr_token # (subroutineName | className | varName) self.__next_token() function = name args = 0 if self.curr_token == '(': function = self.class_name + '.' + name self.vmwriter.write_push('pointer', 0) args = 1 elif self.curr_token == '.': self.__next_token() # . --> subroutine_name subroutine_name = self.curr_token kind = self.symbol_table.kind_of(name) if kind == None: function = name + '.' + subroutine_name else: var_type = self.symbol_table.type_of(name) function = var_type + '.' + subroutine_name self.vmwriter.write_push(kind, self.symbol_table.index_of(name)) args = 1 self.__next_token() # subroutine_name --> ( self.__next_token() # ( --> ? expression_list_len = self.compile_expression_list() self.__next_token() # ) --> ; self.vmwriter.write_call(function, args + expression_list_len) # self.__next_token() # ; --> ? self.print_close('subroutine_call_end') def compile_return(self): self.print_open('compile_return') # curr_token is return self.__next_token() # return --> ? if self.curr_token != ';': self.compile_expression() else: self.vmwriter.write_push('constant', 0) self.__next_token() # ; --> ? self.vmwriter.write_return() self.print_close('compile_return_end') def compile_expression(self): self.print_open('compile_expression') self.compile_term() while self.curr_token in {'+', '-', '*', '/', '&', '|', '<', '>', '='}: op = self.curr_token self.__next_token() self.compile_term() if op in ['*', '/']: self.vmwriter.write_call(self.translate_dict[op], 2) else: if op in self.translate_dict: self.vmwriter.write_arithmetic(self.translate_dict[op]) self.print_close('compile_expression_end') def compile_term(self): self.print_open('compile_term') if self.curr_token == '(': self.__next_token() # ( --> ? self.compile_expression() self.__next_token() # ) --> ? elif self.curr_token in {'-', '~'}: op = self.curr_token # (-|~) self.__next_token() # (-|~) --> ? self.compile_term() self.vmwriter.write_arithmetic(self.translate_dict['unary' + op]) else: if self.curr_token_type == 'stringConstant': self.vmwriter.write_push('constant', len(self.curr_token)) self.vmwriter.write_call('String.new', 1) for ch in self.curr_token: self.vmwriter.write_push('constant', ord(ch)) self.vmwriter.write_call('String.appendChar', 2) self.__next_token() elif self.curr_token_type == 'integerConstant': self.vmwriter.write_push('constant', self.curr_token) self.__next_token() elif self.curr_token_type == 'keyword': if self.curr_token == 'this': self.vmwriter.write_push('pointer', 0) else: self.vmwriter.write_push('constant', 0) if self.curr_token == 'true': self.vmwriter.write_arithmetic('not') self.__next_token() else: temp = self.curr_token self.__next_token() if self.curr_token == '[': self.vmwriter.write_push(self.symbol_table.kind_of(temp), self.symbol_table.index_of(temp)) self.__next_token() # [ --> ? self.compile_expression() self.__next_token() # ] --> ? # add self.vmwriter.write_arithmetic('add') # pop pointer 1 self.vmwriter.write_pop('pointer', 1) # push that 0 self.vmwriter.write_push('that', 0) elif self.curr_token in ['(', '.']: self.subroutine_call(True, temp) else: # var_name self.vmwriter.write_push(self.symbol_table.kind_of(temp), self.symbol_table.index_of(temp)) # self.__next_token() self.print_close('compile_term_end') def compile_expression_list(self): self.print_open('compile_expression_list') count = 0 while self.curr_token != ')': if self.curr_token == ',': self.__next_token() self.compile_expression() count += 1 self.print_close('compile_expression_list_end') return count #-----------private methods---------------- def __next_token(self): if self.DEBUG: print(' ' * self.depth + 'curr_token: ' + self.curr_token) if self.tokenizer.has_more_tokens(): self.tokenizer.advance() self.curr_token_type = self.tokenizer.token_type() self.curr_token = self.tokenizer.keyword() def __next_label_index(self): index = self.label_index self.label_index += 1 return index def print_open(self, string): if self.DEBUG: print(' ' * self.depth + string) self.depth += 1 def print_close(self, string): if self.DEBUG: self.depth -= 1 print(' ' * self.depth + string)
class CompliationEngine(object): ''' Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream ''' MAP = {'<': "<", '>': ">", '"': """, '&': "&"} def __init__(self, tokenizer, out_file_name): ''' Constructor ''' self._tokenizer = tokenizer self._vm_writer = VMWriter(out_file_name) self._class_name = None self._symbol_table = SymbolTable() self._counter = 0 self._subroutine_name = None def Compile(self): token = str(self._tokenizer.next_token()) if token == 'class': self.CompileClass(token) def CompileClass(self, token): """ takes 'class' as token and end the compilation """ self._class_name = self._tokenizer.next_token() # got the class name str(self._tokenizer.next_token()) # '{' token = self._tokenizer.next_token() # field declarations # For declaring Class Level Variable while token in ['field', 'static']: token = self.CompileClassVarDec(token) # Class Methods while token in ['function', 'method', 'constructor']: token = self.CompileSubroutine(token) self._vm_writer.writer_close() self._symbol_table.printSymbolTables() def CompileSubroutine(self, token): """ Takes any among 'function', 'method', 'constructor' and return token after end of subroutine '}' or simple next subroutine token """ function_modifier = token str(self._tokenizer.next_token()) # return type function_name = str(self._tokenizer.next_token()) # name of function self._subroutine_name = function_name self._symbol_table.startSubRoutine(function_name) if function_modifier == 'method': self._symbol_table.define(['this', self._class_name, 'argument']) str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) # 'arguments' while token != ')': token = self.CompileParamList(token) str(self._tokenizer.next_token()) # '{' token = str(self._tokenizer.next_token()) # Statements or '}' while token == 'var': token = self.CompileVarDec(token) local_variables = self._symbol_table.varCount('local') # Writing Function VM self._vm_writer.write_subroutine(self._class_name, function_name, local_variables) if function_name == 'new': no_of_fields = self._symbol_table.varCount('field') self._vm_writer.write_push('constant', no_of_fields) self._vm_writer.write_call('Memory', 'alloc', 1) self._vm_writer.write_pop('pointer', 0) if function_modifier == 'method': self._vm_writer.write_push('argument', 0) self._vm_writer.write_pop('pointer', 0) """temp_buffer = "" while local_variables > 0: temp_buffer += 'push constant 0\n' local_variables -= 1 self._out_file_object.write(temp_buffer) self._out_file_object.flush()""" while token != '}': token = self.CompileStatements(token) token = str(self._tokenizer.next_token()) # next subroutine return token def CompileStatements(self, token): if token == 'return': return self.CompileReturn(token) if token == 'do': return self.CompileDo(token) if token == 'let': return self.CompileLet(token) if token == 'while': return self.CompileWhile(token) if token == 'if': return self.CompileIf(token) def CompileIf(self, token): """ Takes 'if' keyword and returns next statement token """ self._counter += 1 # for linear label names str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # returns ')' self._vm_writer.write_arithmatic('~') label = self._class_name + '.' + 'if.' + str(self._counter) + '.L1' self._vm_writer.write_if_goto(label) str(self._tokenizer.next_token()) # '}' token = str(self._tokenizer.next_token()) goto_label = self._class_name + '.' + 'if.' + str( self._counter) + '.L2' while token != '}': token = self.CompileStatements(token) self._vm_writer.write_goto(goto_label) self._vm_writer.write_label(label) # optional else Command token = str(self._tokenizer.next_token()) if token == "else": token = self.CompileElse(token) self._vm_writer.write_label(goto_label) return token def CompileElse(self, token): """ Takes 'else' token and return next statement token """ str(self._tokenizer.next_token()) # '{' token = str(self._tokenizer.next_token()) while token != '}': token = self.CompileStatements(token) token = str(self._tokenizer.next_token()) return token def CompileWhile(self, token): """ Takes 'while' token and returns next statement token """ self._counter += 1 # for linear label names label = self._class_name + '.' + 'while.' + str(self._counter) + '.L1' self._vm_writer.write_label(label) str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # 'returns ')' self._vm_writer.write_arithmatic('~') # ~cond if_label = self._class_name + '.' + 'while.' + str( self._counter) + '.L2' self._vm_writer.write_if_goto(if_label) str(self._tokenizer.next_token()) # '{' token = str(self._tokenizer.next_token()) while token != '}': token = self.CompileStatements(token) self._vm_writer.write_goto(label) # 'goto label' self._vm_writer.write_label(if_label) # label for next statement token = str(self._tokenizer.next_token()) return token def CompileDo(self, token): identifier = str( self._tokenizer.next_token()) # identifer or class name token = str(self._tokenizer.next_token()) class_name = identifier no_of_arguments = 0 if token == ".": method_or_function = str(self._tokenizer.next_token()) str(self._tokenizer.next_token()) # '(' id_type = self._symbol_table.typeOf(identifier) else: class_name = self._class_name method_or_function = identifier no_of_arguments += 1 self._vm_writer.write_push('pointer', '0') id_type = None token = str(self._tokenizer.next_token()) if id_type != None: segment = self._symbol_table.kindOf(identifier) index = self._symbol_table.indexOf(identifier) self._vm_writer.write_push(segment, index) no_of_arguments += 1 class_name = id_type no_arguments = 0 if token != ')': token, no_arguments = self.CompilerExpressionList( token) # return value is ')' no_of_arguments += no_arguments self._vm_writer.write_call(class_name, method_or_function, no_of_arguments) str(self._tokenizer.next_token()) # ';' # 'void functions will return constant 0 which should be discarded' self._vm_writer.write_pop('temp', '0') token = str(self._tokenizer.next_token()) return token def CompileLet(self, token): """ Function receiver 'let' and return ';' """ identifier = str( self._tokenizer.next_token()) # left hand side identifier segment = self._symbol_table.kindOf(identifier) index = str(self._symbol_table.indexOf(identifier)) token = str(self._tokenizer.next_token()) # = or [ if_array = False if token == '[': if_array = True token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # ']' self._vm_writer.write_push(segment, index) self._vm_writer.write_arithmatic('+') # Equal Expression token = str(self._tokenizer.next_token()) # Right Hand Side Expression token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # End Statements if if_array: self._vm_writer.write_pop('temp', 0) self._vm_writer.write_pop('pointer', 1) self._vm_writer.write_push('temp', 0) self._vm_writer.write_pop('that', 0) else: self._vm_writer.write_pop(segment, index) token = str(self._tokenizer.next_token()) return token def CompileReturn(self, token): """ Takes 'return' token if simple return pushes dummy constant and returns 0 """ token = str(self._tokenizer.next_token()) # ';'? if token == ';': self._vm_writer.write_push('constant', '0') else: token = self.CompileExpression(token) # ';' self._vm_writer.write_return() return str(self._tokenizer.next_token()) def CompilerExpressionList(self, token): no_of_argument = 1 token = self.CompileExpression(token) # returns ',' while token == ",": no_of_argument += 1 token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) return token, no_of_argument def CompileExpression(self, token): """ Expression """ token = self.CompileTerm(token) if token in Lexical.OP: operator = token token = str(self._tokenizer.next_token()) # Next term token = self.CompileTerm(token) self._vm_writer.write_arithmatic(operator) return token def CompileTerm(self, token): """ Takes the term token and returns the token after the term """ if token.isdigit(): self._vm_writer.write_push('constant', token) elif token[0] == '"': no_of_character = len(token) - 2 # removing " self._vm_writer.write_push('constant', no_of_character) self._vm_writer.write_call('String', 'new', 1) for idx in range(1, len(token) - 1): self._vm_writer.write_push('constant', ord(token[idx])) self._vm_writer.write_call('String', 'appendChar', 2) elif token == 'true': self._vm_writer.write_push('constant', '1') self._vm_writer.write_arithmatic('-', 'NEG') elif token in ['false', 'null']: self._vm_writer.write_push('constant', '0') elif token == 'this': self._vm_writer.write_push('pointer', '0') elif token == '-': return self.CompileNegOperator(token) elif token == "~": return self.CompileNotOperator(token) elif token == "(": token = str(self._tokenizer.next_token()) # Term token token = self.CompileExpression(token) # Returns ')' elif self._tokenizer.expected_token() == "[": identifier = token index = self._symbol_table.indexOf(identifier) segment = self._symbol_table.kindOf(identifier) self._vm_writer.write_push(segment, index) str(self._tokenizer.next_token()) # '[' token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # return value is ']' self._vm_writer.write_arithmatic('+') self._vm_writer.write_pop('pointer', '1') self._vm_writer.write_push('that', '0') elif self._tokenizer.expected_token() == ".": identifier = token str(self._tokenizer.next_token()) # '.' method_or_function = str(self._tokenizer.next_token()) str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) no_of_arguments = 0 class_name = identifier id_type = self._symbol_table.typeOf(identifier) print identifier, id_type if id_type != None: segment = self._symbol_table.kindOf(identifier) index = self._symbol_table.indexOf(identifier) self._vm_writer.write_push(segment, index) no_of_arguments += 1 class_name = id_type no_arguments = 0 if token != ")": token, no_arguments = self.CompilerExpressionList(token) no_of_arguments += no_arguments self._vm_writer.write_call(class_name, method_or_function, no_of_arguments) else: identifier = token index = self._symbol_table.indexOf(identifier) segment = self._symbol_table.kindOf(identifier) self._vm_writer.write_push(segment, index) token = str(self._tokenizer.next_token()) return token def CompileNegOperator(self, token): token = str(self._tokenizer.next_token()) token = self.CompileTerm(token) self._vm_writer.write_arithmatic('-', 'NEG') return token def CompileNotOperator(self, token): """ Takes '~' as argument as return ')' """ token = str(self._tokenizer.next_token()) # '('? if token != '(': token = self.CompileTerm(token) else: token = str(self._tokenizer.next_token()) # token = self.CompileExpression(token) # returns inner ')' res token = str(self._tokenizer.next_token()) # outer ')' self._vm_writer.write_arithmatic('~') return token def CompileParamList(self, token): """ Takes type of the first argument of the subroutine """ id_type = token # type of var variable kind = 'argument' identifier = str(self._tokenizer.next_token()) # identifier name identifier_details = [identifier, id_type, kind] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) if token == ',': token = str(self._tokenizer.next_token()) return self.CompileParamList(token) return token def CompileVarDec(self, token): """ Takes either of 'field' or 'static' as token return next statement either 'var' or do, let, if, while """ id_type = str(self._tokenizer.next_token()) # type of var variable kind = 'local' identifier = str(self._tokenizer.next_token()) # identifier name identifier_details = [identifier, id_type, kind] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) # ',' or '; while token == ',': identifier_details = [] identifier = str(self._tokenizer.next_token()) # identifier name identifier_details = [identifier, id_type, kind] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) # ',' or '; return str(self._tokenizer.next_token()) def CompileClassVarDec(self, token): class_var_modifer = str(token) # 'field' or 'static' # primitive or user defined class class_var_type = str(self._tokenizer.next_token()) identifier = str(self._tokenizer.next_token()) identifier_details = [identifier, class_var_type, class_var_modifer] self._symbol_table.define(identifier_details) token = self._tokenizer.next_token() while token == ',': identifier = str(self._tokenizer.next_token()) identifier_details = [ identifier, class_var_type, class_var_modifer ] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) token = self._tokenizer.next_token() if token in ['field', 'static']: return self.CompileClassVarDec(token) return token
class CompliationEngine(object): """ Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream """ MAP = {"<": "<", ">": ">", '"': """, "&": "&"} def __init__(self, tokenizer, out_file_name): """ Constructor """ self._tokenizer = tokenizer self._vm_writer = VMWriter(out_file_name) self._class_name = None self._symbol_table = SymbolTable() self._counter = 0 self._subroutine_name = None def Compile(self): token = str(self._tokenizer.next_token()) if token == "class": self.CompileClass(token) def CompileClass(self, token): """ takes 'class' as token and end the compilation """ self._class_name = self._tokenizer.next_token() # got the class name str(self._tokenizer.next_token()) # '{' token = self._tokenizer.next_token() # field declarations # For declaring Class Level Variable while token in ["field", "static"]: token = self.CompileClassVarDec(token) # Class Methods while token in ["function", "method", "constructor"]: token = self.CompileSubroutine(token) self._vm_writer.writer_close() self._symbol_table.printSymbolTables() def CompileSubroutine(self, token): """ Takes any among 'function', 'method', 'constructor' and return token after end of subroutine '}' or simple next subroutine token """ function_modifier = token str(self._tokenizer.next_token()) # return type function_name = str(self._tokenizer.next_token()) # name of function self._subroutine_name = function_name self._symbol_table.startSubRoutine(function_name) if function_modifier == "method": self._symbol_table.define(["this", self._class_name, "argument"]) str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) # 'arguments' while token != ")": token = self.CompileParamList(token) str(self._tokenizer.next_token()) # '{' token = str(self._tokenizer.next_token()) # Statements or '}' while token == "var": token = self.CompileVarDec(token) local_variables = self._symbol_table.varCount("local") # Writing Function VM self._vm_writer.write_subroutine(self._class_name, function_name, local_variables) if function_name == "new": no_of_fields = self._symbol_table.varCount("field") self._vm_writer.write_push("constant", no_of_fields) self._vm_writer.write_call("Memory", "alloc", 1) self._vm_writer.write_pop("pointer", 0) if function_modifier == "method": self._vm_writer.write_push("argument", 0) self._vm_writer.write_pop("pointer", 0) """temp_buffer = "" while local_variables > 0: temp_buffer += 'push constant 0\n' local_variables -= 1 self._out_file_object.write(temp_buffer) self._out_file_object.flush()""" while token != "}": token = self.CompileStatements(token) token = str(self._tokenizer.next_token()) # next subroutine return token def CompileStatements(self, token): if token == "return": return self.CompileReturn(token) if token == "do": return self.CompileDo(token) if token == "let": return self.CompileLet(token) if token == "while": return self.CompileWhile(token) if token == "if": return self.CompileIf(token) def CompileIf(self, token): """ Takes 'if' keyword and returns next statement token """ self._counter += 1 # for linear label names str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # returns ')' self._vm_writer.write_arithmatic("~") label = self._class_name + "." + "if." + str(self._counter) + ".L1" self._vm_writer.write_if_goto(label) str(self._tokenizer.next_token()) # '}' token = str(self._tokenizer.next_token()) goto_label = self._class_name + "." + "if." + str(self._counter) + ".L2" while token != "}": token = self.CompileStatements(token) self._vm_writer.write_goto(goto_label) self._vm_writer.write_label(label) # optional else Command token = str(self._tokenizer.next_token()) if token == "else": token = self.CompileElse(token) self._vm_writer.write_label(goto_label) return token def CompileElse(self, token): """ Takes 'else' token and return next statement token """ str(self._tokenizer.next_token()) # '{' token = str(self._tokenizer.next_token()) while token != "}": token = self.CompileStatements(token) token = str(self._tokenizer.next_token()) return token def CompileWhile(self, token): """ Takes 'while' token and returns next statement token """ self._counter += 1 # for linear label names label = self._class_name + "." + "while." + str(self._counter) + ".L1" self._vm_writer.write_label(label) str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # 'returns ')' self._vm_writer.write_arithmatic("~") # ~cond if_label = self._class_name + "." + "while." + str(self._counter) + ".L2" self._vm_writer.write_if_goto(if_label) str(self._tokenizer.next_token()) # '{' token = str(self._tokenizer.next_token()) while token != "}": token = self.CompileStatements(token) self._vm_writer.write_goto(label) # 'goto label' self._vm_writer.write_label(if_label) # label for next statement token = str(self._tokenizer.next_token()) return token def CompileDo(self, token): identifier = str(self._tokenizer.next_token()) # identifer or class name token = str(self._tokenizer.next_token()) class_name = identifier no_of_arguments = 0 if token == ".": method_or_function = str(self._tokenizer.next_token()) str(self._tokenizer.next_token()) # '(' id_type = self._symbol_table.typeOf(identifier) else: class_name = self._class_name method_or_function = identifier no_of_arguments += 1 self._vm_writer.write_push("pointer", "0") id_type = None token = str(self._tokenizer.next_token()) if id_type != None: segment = self._symbol_table.kindOf(identifier) index = self._symbol_table.indexOf(identifier) self._vm_writer.write_push(segment, index) no_of_arguments += 1 class_name = id_type no_arguments = 0 if token != ")": token, no_arguments = self.CompilerExpressionList(token) # return value is ')' no_of_arguments += no_arguments self._vm_writer.write_call(class_name, method_or_function, no_of_arguments) str(self._tokenizer.next_token()) # ';' # 'void functions will return constant 0 which should be discarded' self._vm_writer.write_pop("temp", "0") token = str(self._tokenizer.next_token()) return token def CompileLet(self, token): """ Function receiver 'let' and return ';' """ identifier = str(self._tokenizer.next_token()) # left hand side identifier segment = self._symbol_table.kindOf(identifier) index = str(self._symbol_table.indexOf(identifier)) token = str(self._tokenizer.next_token()) # = or [ if_array = False if token == "[": if_array = True token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # ']' self._vm_writer.write_push(segment, index) self._vm_writer.write_arithmatic("+") # Equal Expression token = str(self._tokenizer.next_token()) # Right Hand Side Expression token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # End Statements if if_array: self._vm_writer.write_pop("temp", 0) self._vm_writer.write_pop("pointer", 1) self._vm_writer.write_push("temp", 0) self._vm_writer.write_pop("that", 0) else: self._vm_writer.write_pop(segment, index) token = str(self._tokenizer.next_token()) return token def CompileReturn(self, token): """ Takes 'return' token if simple return pushes dummy constant and returns 0 """ token = str(self._tokenizer.next_token()) # ';'? if token == ";": self._vm_writer.write_push("constant", "0") else: token = self.CompileExpression(token) # ';' self._vm_writer.write_return() return str(self._tokenizer.next_token()) def CompilerExpressionList(self, token): no_of_argument = 1 token = self.CompileExpression(token) # returns ',' while token == ",": no_of_argument += 1 token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) return token, no_of_argument def CompileExpression(self, token): """ Expression """ token = self.CompileTerm(token) if token in Lexical.OP: operator = token token = str(self._tokenizer.next_token()) # Next term token = self.CompileTerm(token) self._vm_writer.write_arithmatic(operator) return token def CompileTerm(self, token): """ Takes the term token and returns the token after the term """ if token.isdigit(): self._vm_writer.write_push("constant", token) elif token[0] == '"': no_of_character = len(token) - 2 # removing " self._vm_writer.write_push("constant", no_of_character) self._vm_writer.write_call("String", "new", 1) for idx in range(1, len(token) - 1): self._vm_writer.write_push("constant", ord(token[idx])) self._vm_writer.write_call("String", "appendChar", 2) elif token == "true": self._vm_writer.write_push("constant", "1") self._vm_writer.write_arithmatic("-", "NEG") elif token in ["false", "null"]: self._vm_writer.write_push("constant", "0") elif token == "this": self._vm_writer.write_push("pointer", "0") elif token == "-": return self.CompileNegOperator(token) elif token == "~": return self.CompileNotOperator(token) elif token == "(": token = str(self._tokenizer.next_token()) # Term token token = self.CompileExpression(token) # Returns ')' elif self._tokenizer.expected_token() == "[": identifier = token index = self._symbol_table.indexOf(identifier) segment = self._symbol_table.kindOf(identifier) self._vm_writer.write_push(segment, index) str(self._tokenizer.next_token()) # '[' token = str(self._tokenizer.next_token()) token = self.CompileExpression(token) # return value is ']' self._vm_writer.write_arithmatic("+") self._vm_writer.write_pop("pointer", "1") self._vm_writer.write_push("that", "0") elif self._tokenizer.expected_token() == ".": identifier = token str(self._tokenizer.next_token()) # '.' method_or_function = str(self._tokenizer.next_token()) str(self._tokenizer.next_token()) # '(' token = str(self._tokenizer.next_token()) no_of_arguments = 0 class_name = identifier id_type = self._symbol_table.typeOf(identifier) print identifier, id_type if id_type != None: segment = self._symbol_table.kindOf(identifier) index = self._symbol_table.indexOf(identifier) self._vm_writer.write_push(segment, index) no_of_arguments += 1 class_name = id_type no_arguments = 0 if token != ")": token, no_arguments = self.CompilerExpressionList(token) no_of_arguments += no_arguments self._vm_writer.write_call(class_name, method_or_function, no_of_arguments) else: identifier = token index = self._symbol_table.indexOf(identifier) segment = self._symbol_table.kindOf(identifier) self._vm_writer.write_push(segment, index) token = str(self._tokenizer.next_token()) return token def CompileNegOperator(self, token): token = str(self._tokenizer.next_token()) token = self.CompileTerm(token) self._vm_writer.write_arithmatic("-", "NEG") return token def CompileNotOperator(self, token): """ Takes '~' as argument as return ')' """ token = str(self._tokenizer.next_token()) # '('? if token != "(": token = self.CompileTerm(token) else: token = str(self._tokenizer.next_token()) # token = self.CompileExpression(token) # returns inner ')' res token = str(self._tokenizer.next_token()) # outer ')' self._vm_writer.write_arithmatic("~") return token def CompileParamList(self, token): """ Takes type of the first argument of the subroutine """ id_type = token # type of var variable kind = "argument" identifier = str(self._tokenizer.next_token()) # identifier name identifier_details = [identifier, id_type, kind] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) if token == ",": token = str(self._tokenizer.next_token()) return self.CompileParamList(token) return token def CompileVarDec(self, token): """ Takes either of 'field' or 'static' as token return next statement either 'var' or do, let, if, while """ id_type = str(self._tokenizer.next_token()) # type of var variable kind = "local" identifier = str(self._tokenizer.next_token()) # identifier name identifier_details = [identifier, id_type, kind] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) # ',' or '; while token == ",": identifier_details = [] identifier = str(self._tokenizer.next_token()) # identifier name identifier_details = [identifier, id_type, kind] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) # ',' or '; return str(self._tokenizer.next_token()) def CompileClassVarDec(self, token): class_var_modifer = str(token) # 'field' or 'static' # primitive or user defined class class_var_type = str(self._tokenizer.next_token()) identifier = str(self._tokenizer.next_token()) identifier_details = [identifier, class_var_type, class_var_modifer] self._symbol_table.define(identifier_details) token = self._tokenizer.next_token() while token == ",": identifier = str(self._tokenizer.next_token()) identifier_details = [identifier, class_var_type, class_var_modifer] self._symbol_table.define(identifier_details) token = str(self._tokenizer.next_token()) token = self._tokenizer.next_token() if token in ["field", "static"]: return self.CompileClassVarDec(token) return token
class CompilationEngine: def __init__(self, _tokens, _in_path, _out_file): self.tokens = iter(_tokens) self.file_name = str(split(_in_path)[1].split('.')[0]) self.out_file = _out_file self.writer = VMWriter(_out_file) self.sym_table = SymbolTable() self.class_name = '' self.curr_subroutine_name = '' self.curr_cond_index = 0 def CompileClass(self): self.tokens.__next__() self.class_name = self.tokens.__next__().token self.tokens.__next__() # { self.CompileClassVarDec() curr_token = self.tokens.__next__() while curr_token.token in [ Keyword.FUNCTION.value, Keyword.METHOD.value, Keyword.CONSTRUCTOR.value ]: self.CompileSubroutineDEC(curr_token.token) curr_token = self.tokens.__next__() def CompileClassVarDec(self): sym_kind = self.tokens.__next__() while sym_kind.token == Keyword.FIELD.value or sym_kind.token == Keyword.STATIC.value: sym_type = self.tokens.__next__() moreVars = True while moreVars: sym_name = self.tokens.__next__() moreVars = self.tokens.__next__().token == Symbol.COMMA.value self.sym_table.define(sym_name.token, sym_type.token, sym_kind.token) sym_kind = self.tokens.__next__() self.tokens = itertools.chain([sym_kind], self.tokens) def CompileSubroutineDEC(self, sub_type): self.sym_table.startSubroutine() return_type = self.tokens.__next__() # return type self.curr_subroutine_name = self.file_name + '.' + self.tokens.__next__( ).token self.tokens.__next__() # ( if sub_type == constants.METHOD: self.sym_table.define('this', self.class_name, constants.ARG) self.CompileParameterList() self.tokens.__next__() # ) self.CompileSubroutineBody(sub_type) def CompileParameterList(self): arg_type = self.tokens.__next__() if arg_type.token == Symbol.PAREN_CLOSE.value: self.tokens = itertools.chain([arg_type], self.tokens) else: moreVars = True while moreVars: arg_name = self.tokens.__next__() curr_token = self.tokens.__next__() # , or ) moreVars = curr_token.token == Symbol.COMMA.value self.sym_table.define(arg_name.token, arg_type.token, constants.ARG) if moreVars: arg_type = self.tokens.__next__() else: self.tokens = itertools.chain([curr_token], self.tokens) def CompileSubroutineBody(self, sub_type): self.tokens.__next__() # { self.CompileVarDec() self.writer.write_function(self.curr_subroutine_name, self.sym_table.var_count(constants.VAR)) if sub_type == constants.METHOD: self.writer.write_push(constants.ARG, 0) self.writer.write_pop(constants.POINTER, 0) if sub_type == Keyword.CONSTRUCTOR.value: self.CompileCtorAlloc() self.CompileStatements() self.tokens.__next__() # } def CompileCtorAlloc(self): self.writer.write_push(constants.CONST, self.sym_table.var_count(constants.FIELD)) self.writer.write_call('Memory.alloc', 1) self.writer.write_pop(constants.POINTER, 0) def CompileVarDec(self): curr_token = self.tokens.__next__() hasVars = curr_token.token == Keyword.VAR.value while hasVars: var_type = self.tokens.__next__() # type self.CompileInlineVars(var_type.token) curr_token = self.tokens.__next__() hasVars = curr_token.token == Keyword.VAR.value self.tokens = itertools.chain([curr_token], self.tokens) def CompileInlineVars(self, var_type): hasVarsInline = True while hasVarsInline: var_name = self.tokens.__next__() #ident curr_token = self.tokens.__next__() # , or ; hasVarsInline = (curr_token.token == Symbol.COMMA.value) self.sym_table.define(var_name.token, var_type, constants.VAR) def CompileStatements(self): state = self.tokens.__next__() while state.token in [ Keyword.LET.value, Keyword.DO.value, Keyword.RETURN.value, Keyword.WHILE.value, Keyword.IF.value ]: if state.token == Keyword.LET.value: self.CompileLet() elif state.token == Keyword.WHILE.value: self.CompileWhile() elif state.token == Keyword.RETURN.value: self.CompileReturn() elif state.token == Keyword.IF.value: self.CompileIf() elif state.token == Keyword.DO.value: self.CompileDo() state = self.tokens.__next__() self.tokens = itertools.chain([state], self.tokens) def CompileLet(self): left_value = self.tokens.__next__() #var name segment = segment_map[self.sym_table.kind_of(left_value.token)] index = self.sym_table.index_of(left_value.token) curr_token = self.tokens.__next__() if curr_token.token == Symbol.BRACKET_OPEN.value: self.CompileArrayAccess(segment, index, True) else: self.CompileExpression() self.writer.write_pop(segment, index) self.tokens.__next__() # ; def CompileWhile(self): L1 = 'L' + str(self.curr_cond_index) self.curr_cond_index += 1 L2 = 'L' + str(self.curr_cond_index) self.curr_cond_index += 1 self.tokens.__next__() # ( self.writer.write_label(L1) self.CompileExpression() self.tokens.__next__() # ) self.writer.write_arithmetic(constants.NOT) self.writer.write_if(L2) self.tokens.__next__() # { self.CompileStatements() self.writer.write_goto(L1) self.tokens.__next__() # } self.writer.write_label(L2) def CompileIf(self): L1 = 'L' + str(self.curr_cond_index) self.curr_cond_index += 1 L2 = 'L' + str(self.curr_cond_index) self.curr_cond_index += 1 self.tokens.__next__() # ( self.CompileExpression() self.writer.write_arithmetic(constants.NOT) self.writer.write_if(L1) self.tokens.__next__() # ) self.tokens.__next__() # { self.CompileStatements() self.writer.write_goto(L2) self.writer.write_label(L1) self.tokens.__next__() # } curr_token = self.tokens.__next__() if curr_token.token == Keyword.ELSE.value: self.tokens.__next__() # { self.CompileStatements() self.tokens.__next__() # } else: self.tokens = itertools.chain([curr_token], self.tokens) self.writer.write_label(L2) def CompileReturn(self): curr_token = self.tokens.__next__() if curr_token.token != Symbol.SEMI_COLON.value: self.tokens = itertools.chain([curr_token], self.tokens) self.CompileExpression() self.tokens.__next__() # ; else: self.writer.write_push(constants.CONST, 0) self.writer.write_return() def CompileDo(self): self.compileSubroutineCall(True) self.tokens.__next__() # ; def CompileExpression(self): self.CompileTerm() curr_token = self.tokens.__next__() if curr_token.token in [operator.value for operator in Operator]: self.CompileTerm() self.writer.write_arithmetic(curr_token.token) else: self.tokens = itertools.chain([curr_token], self.tokens) def CompileTerm(self): curr_token = self.tokens.__next__() if curr_token.type == TokenType.IDENTIFIER.value: next_token = self.tokens.__next__() if next_token.token == Symbol.BRACKET_OPEN.value: curr_token_kind = segment_map[self.sym_table.kind_of( curr_token.token)] curr_token_index = self.sym_table.index_of(curr_token.token) self.CompileArrayAccess(curr_token_kind, curr_token_index, False) elif next_token.token == Symbol.PERIOD.value or next_token.token == Symbol.PAREN_OPEN.value: self.tokens = itertools.chain([next_token], self.tokens) self.tokens = itertools.chain([curr_token], self.tokens) self.compileSubroutineCall(False) else: self.tokens = itertools.chain([next_token], self.tokens) self.writer.write_push( segment_map[self.sym_table.kind_of(curr_token.token)], self.sym_table.index_of(curr_token.token)) elif curr_token.type == constants.TokenType.INT_CONST.value: self.writer.write_push(constants.CONST, str(curr_token.token)) elif curr_token.type == constants.TokenType.STRING_CONST.value: self.CompileStringConstant(curr_token.token) elif curr_token.token == constants.Keyword.THIS.value: self.writer.write_push(constants.POINTER, 0) elif curr_token.type == constants.TokenType.KEYWORD.value: if curr_token.token == 'null' or curr_token.token == 'false': self.writer.write_push(constants.CONST, 0) elif curr_token.token == 'true': self.writer.write_push(constants.CONST, 1) self.writer.write_arithmetic('neg') else: if curr_token.token in [unary.value for unary in UnaryOperator]: self.CompileTerm() self.writer.write_arithmetic( constants.unary_operators_map[curr_token.token]) elif curr_token.token == Symbol.PAREN_OPEN.value: self.CompileExpression() self.tokens.__next__() # ) def CompileStringConstant(self, str_const): self.writer.write_push(constants.CONST, len(str_const)) self.writer.write_call('String.new', 1) for i in range(len(str_const)): self.writer.write_push(constants.CONST, ord(str_const[i])) self.writer.write_call('String.appendChar', 2) def CompileArrayAccess(self, arr_kind, arr_index, is_let): self.writer.write_push(arr_kind, arr_index) # arr self.CompileExpression() # arr[expres 1] self.writer.write_arithmetic('add') self.tokens.__next__() # ] if is_let: self.tokens.__next__() # = self.CompileExpression() self.writer.write_pop(constants.TEMP, 0) self.writer.write_pop(constants.POINTER, 1) self.writer.write_push(constants.TEMP, 0) self.writer.write_pop(constants.THAT, 0) else: self.writer.write_pop(constants.POINTER, 1) self.writer.write_push(constants.THAT, 0) def CompileExpressionList(self): num_exprss = 0 curr_token = self.tokens.__next__() if curr_token.token != Symbol.PAREN_CLOSE.value: moreExpr = True self.tokens = itertools.chain([curr_token], self.tokens) while moreExpr: num_exprss += 1 self.CompileExpression() curr_token = self.tokens.__next__() moreExpr = curr_token.token == Symbol.COMMA.value if not moreExpr: self.tokens = itertools.chain([curr_token], self.tokens) else: self.tokens = itertools.chain([curr_token], self.tokens) return num_exprss def compileSubroutineCall(self, is_void): num_express = 0 prefix = self.tokens.__next__().token next_token = self.tokens.__next__() # ( or . if next_token.token == Symbol.PERIOD.value and \ self.sym_table.type_of(prefix) is not None: function_name = self.sym_table.type_of( prefix) + '.' + self.tokens.__next__().token self.tokens.__next__() # ( self.writer.write_push(segment_map[self.sym_table.kind_of(prefix)], self.sym_table.index_of(prefix)) num_express = 1 elif next_token.token == Symbol.PERIOD.value: function_name = prefix + '.' + self.tokens.__next__().token self.tokens.__next__() # ( else: function_name = self.class_name + '.' + prefix self.writer.write_push(constants.POINTER, 0) num_express = 1 num_express += self.CompileExpressionList() self.writer.write_call(function_name, num_express) if is_void: self.writer.write_pop(constants.TEMP, 0) self.tokens.__next__() # )
class CompilationEngine: all_operators = { "+": "add", "-": "sub", "/": "div", "*": "mul", "&": "and", "|": "or", ">": "gt", "<": "lt", "=": "eq" } def __init__(self, tokens, out_file): """ initializing a new compile engine object :param tokens: the list of tokens created by the tokenizer :param out_file: the output file. """ self.__tokens = tokens self.__file = out_file self.__i = 0 self.__class_symbol = SymbolTable() self.__subroutine_symbol = SymbolTable() self.__cur_token = () self.__class_name = "" self.__writer = VMWriter(out_file) self.__label_count = 0 self.compile_class() self.__writer.close() def eat(self): """ compiling a single token and move to the next one """ self.__cur_token = self.__tokens[self.__i] self.__i += 1 def get_token(self): return self.__cur_token[1] def peek(self): """ checking the current token without compiling :return: the token """ ret_val = self.__tokens[self.__i] return ret_val[1] def peek_type(self): """ checking the current token type without compiling :return: the token type """ ret_val = self.__tokens[self.__i] return ret_val[0] def peek_ll2(self): """ checking two tokens ahead without compiling :return: the token """ ret_val = self.__tokens[self.__i + 1] return ret_val[1] def compile_while_stat(self): # i points to while """ compiling while statement """ self.eat() self.eat() label_true = "L%s" % self.__label_count self.__label_count += 1 label_continue = "L%s" % self.__label_count self.__label_count += 1 self.__writer.write_label(label_true) self.compile_expression() self.__writer.write_arithmetic("not") self.__writer.write_if(label_continue) self.eat() self.eat() self.compile_statements() self.__writer.write_go_to(label_true) self.eat() self.__writer.write_label(label_continue) def compile_return_stat(self): # i points to return """ compiling return statement """ self.eat() if not self.peek() == ";": self.compile_expression() else: self.__writer.write_push("constant", 0) self.__writer.write_return() self.eat() def compile_do_stat(self): """ compiling do statement """ self.eat() self.compile_subroutine_call() self.__writer.write_pop("temp", 0) self.eat() def compile_if_stat(self): """ compiling if statement """ self.eat() self.eat() self.compile_expression() self.__writer.write_arithmetic("not") label_false = "L%s" % self.__label_count self.__label_count += 1 label_continue = "L%s" % self.__label_count self.__label_count += 1 self.__writer.write_if(label_false) self.eat() self.eat() self.compile_statements() self.__writer.write_go_to(label_continue) self.eat() self.__writer.write_label(label_false) if self.peek() == "else": self.eat() self.eat() self.compile_statements() self.eat() self.__writer.write_label(label_continue) def compile_class_var_dec(self): """ compiling class variable declaration """ self.eat() kind = self.get_token() if kind == "var": kind = SymbolTable.VAR self.var_dec_helper(kind, self.__class_symbol) def compile_var_dec(self): """ compiling variable declaration """ self.eat() self.var_dec_helper(SymbolTable.VAR, self.__subroutine_symbol) def var_dec_helper(self, kind, symbol_table): self.eat() type = self.get_token() self.eat() name = self.get_token() symbol_table.add(name, type, kind) cur_stat = self.peek() while cur_stat != ";": self.eat() self.eat() name = self.get_token() symbol_table.add(name, type, kind) cur_stat = self.peek() self.eat() def compile_subroutine_body(self, func_name, func_type): """ compiling subroutine body """ self.eat() cur_stat = self.peek() while cur_stat == "var": self.compile_var_dec() cur_stat = self.peek() self.__writer.write_function( func_name, self.__subroutine_symbol.var_count(SymbolTable.VAR)) self.__subroutine_symbol.add("this", self.__class_name, "pointer") if func_type == "method": self.__writer.write_push(SymbolTable.ARG, 0) self.__writer.write_pop("pointer", 0) elif func_type == "constructor": self.__writer.write_push( "constant", self.__class_symbol.var_count(SymbolTable.FIELD)) self.__writer.write_call("Memory.alloc", 1) self.__writer.write_pop("pointer", 0) self.compile_statements() self.eat() def compile_parameter_list(self): """ compiling parameters list """ cur_stat = self.peek() if cur_stat != ")": self.eat() type = self.get_token() self.eat() name = self.get_token() self.__subroutine_symbol.add(name, type, SymbolTable.ARG) cur_stat = self.peek() while cur_stat == ",": self.eat() self.eat() type = self.get_token() self.eat() name = self.get_token() self.__subroutine_symbol.add(name, type, SymbolTable.ARG) cur_stat = self.peek() def compile_class(self): """ compiling class """ self.eat() self.eat() self.__class_name = self.get_token() self.eat() cur_stat = self.peek() while cur_stat == "static" or cur_stat == "field": self.compile_class_var_dec() cur_stat = self.peek() while cur_stat != "}": self.compile_subroutine_dec() cur_stat = self.peek() self.eat() def compile_expression(self): """ compiling expression """ self.compile_term() cur_stat = self.peek() while cur_stat in CompilationEngine.all_operators.keys(): self.eat() self.compile_term() self.compile_operation(cur_stat) cur_stat = self.peek() def compile_operation(self, op): """ compiling operation :param op: current op """ if op == "*": self.__writer.write_call("Math.multiply", 2) elif op == "/": self.__writer.write_call("Math.divide", 2) else: self.__writer.write_arithmetic(CompilationEngine.all_operators[op]) def compile_statements(self): """ compiling statements """ while self.compile_statement(): continue def compile_subroutine_call(self): """ compiling subroutine call """ self.eat() name = self.get_token() cur_stat = self.peek() if cur_stat == "(": self.eat() self.__writer.write_push("pointer", 0) args = self.compile_expression_list() self.eat() self.__writer.write_call(self.__class_name + "." + name, args + 1) else: self.eat() val = self.find(name) self.eat() var_name = self.get_token() self.eat() if not val: args = 0 else: self.__writer.push_val(val) name = val[0] args = 1 args += self.compile_expression_list() self.__writer.write_call(name + "." + var_name, args) self.eat() def compile_expression_list(self): """ compiling expression list """ args = 0 cur_stat = self.peek() if cur_stat != ")": self.compile_expression() args += 1 cur_stat = self.peek() while cur_stat == ",": self.eat() args += 1 self.compile_expression() cur_stat = self.peek() return args def compile_statement(self): """ compiling statement """ cur_stat = self.peek() if cur_stat == "if": self.compile_if_stat() elif cur_stat == "while": self.compile_while_stat() elif cur_stat == "do": self.compile_do_stat() elif cur_stat == "return": self.compile_return_stat() elif cur_stat == "let": self.compile_let_stat() else: return 0 # when there is no more statements to compile return 1 def compile_let_stat(self): """ compiling let statement """ self.eat() self.eat() name = self.get_token() data = self.find(name) kind = data[1] ind = data[2] if kind == "field": kind = "this" cur_stat = self.peek() if cur_stat == "[": self.compile_array(kind, ind) else: self.eat() self.compile_expression() self.__writer.write_pop(kind, ind) self.eat() # eat ; def compile_subroutine_dec(self): """ compiling subroutine declaration """ self.eat() func_type = self.get_token() self.eat() self.eat() func_name = self.__class_name + "." + self.get_token() self.eat() if func_type == "method": self.__subroutine_symbol.add("this", self.__class_name, SymbolTable.ARG) self.compile_parameter_list() self.eat() self.compile_subroutine_body(func_name, func_type) self.__subroutine_symbol = SymbolTable() def compile_term(self): """ compiling term """ cur_stat = self.peek_type() if cur_stat == JackTokenizer.INT_CONST: self.__writer.write_push("constant", self.peek()) self.eat() return if cur_stat == JackTokenizer.KEYWORD: if self.peek() == "null" or self.peek() == "false": self.__writer.write_push("constant", 0) elif self.peek() == "true": self.__writer.write_push("constant", 0) self.__writer.write_arithmetic("not") elif self.peek() == "this": self.__writer.write_push("pointer", 0) self.eat() return if cur_stat == JackTokenizer.STR_CONST: string1 = self.peek().replace('\t', "\\t") string2 = string1.replace('\n', "\\n") string3 = string2.replace('\r', "\\r") string = string3.replace('\b', "\\b") self.__writer.write_push("constant", len(string)) self.__writer.write_call("String.new", 1) for ch in string: self.__writer.write_push("constant", ord(ch)) self.__writer.write_call("String.appendChar", 2) self.eat() return cur_stat = self.peek() if cur_stat == "(": self.eat() self.compile_expression() self.eat() return if cur_stat == "-": self.eat() self.compile_term() self.__writer.write_arithmetic("neg") return if cur_stat == "~": self.eat() self.compile_term() self.__writer.write_arithmetic("not") return cur_stat = self.peek_ll2() if cur_stat == "[": self.eat() name = self.get_token() self.__writer.push_val(self.find(name)) self.eat() self.compile_expression() self.__writer.write_arithmetic("add") self.__writer.write_pop("pointer", 1) self.__writer.write_push("that", 0) self.eat() return if cur_stat == "." or cur_stat == "(": self.compile_subroutine_call() return self.eat() # varName name = self.get_token() self.__writer.push_val(self.find(name)) return def find(self, name): """ finding a variable name in symbol tables """ val = self.__subroutine_symbol.get_data(name) if not val: val = self.__class_symbol.get_data(name) elif not val: return False return val def compile_array(self, kind, index): """ compiling array assignment :param kind: var kind :param index: var index """ self.eat() self.compile_expression() self.eat() self.__writer.write_push(kind, index) self.__writer.write_arithmetic("add") self.eat() self.compile_expression() self.__writer.write_pop("temp", 0) self.__writer.write_pop("pointer", 1) self.__writer.write_push("temp", 0) self.__writer.write_pop("that", 0)
class CompilationEngine: def __init__(self, tokenizer: JackTokenizer, jack_file): self.tokenizer = tokenizer self.class_name = '' log_file_name = jack_file.name.replace('.jack', '_engine.xml') self.log_file = open(log_file_name, 'w') log_file_name = jack_file.name.replace('.jack', '.vm') self.output_file = open(log_file_name, 'w') self.symbol_table = SymbolTable() self.vm_writer = VMWriter(self.output_file) self.while_label_index = 0 self.if_label_index = 0 def compile(self): self.compile_class(0) def advance(self): """return current token""" return self.tokenizer.advance() def next(self) -> Token: return self.tokenizer.next() def compile_token(self, token, indentation, limits=None): print(token.content, end=' ') if limits is not None: if isinstance(limits, list) and token.token_type not in limits: raise RuntimeError(token, 'can be only', limits) if isinstance(limits, str) and token.content != limits: raise RuntimeError(token, 'can be only', limits) self.log(token, indentation) def log_node(self, msg, indentation): space = '' for i in range(0, indentation): space += ' ' self.log_file.write('{1}<{0}>\n'.format(msg, space)) def log(self, token, indentation): txt = token.content if txt == '<': txt = '<' elif txt == '>': txt = '>' elif txt == '\"': txt = '"' elif txt == '&': txt = '&' space = '' for i in range(0, indentation): space += ' ' # 2 spaces self.log_file.write('{2}<{0}> {1} </{0}>\n'.format( token.token_type, txt, space)) def compile_class(self, indentation): """ Compiles a complete class. """ self.log_file.write('<class>\n') # 'class' advance = self.advance() self.compile_token(advance, indentation + 1) # class name advance = self.advance() self.class_name = advance.content self.compile_token(advance, indentation + 1) # set class name to vm-writer self.vm_writer.set_class_name(advance.content) # { advance = self.advance() self.compile_token(advance, indentation + 1, "{") # classVarDec* subroutineDec* advance = self.advance() while advance.content != '}': if (advance.content == 'constructor' or advance.content == 'function' or advance.content == 'method'): self.compile_subroutine(advance, indentation + 1) elif advance.content in ['field', 'static']: self.compile_class_var_dec(advance, indentation + 1) elif advance.content != '}': raise RuntimeError( advance, 'Only subroutine and variable can be declared here') advance = self.advance() # } self.compile_token(advance, indentation + 1, '}') self.log_file.write('</class>\n') self.log_file.flush() print("\ncompilation success") return def compile_class_var_dec(self, token, indentation): """ passing token as an argument, because the caller has already called the advance function once Compiles a static declaration or a field declaration. """ self.log_node('classVarDec', indentation) # static or field kind = token.content.upper() self.compile_token(token, indentation + 1) token = self.advance() var_type = token.content self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # var name token = self.advance() var_name = token.content self.compile_token(token, indentation + 1, [IDENTIFIER]) self.symbol_table.define(var_name, var_type, kind) token = self.advance() while token.content == ',': self.compile_token(token, indentation + 1, ',') token = self.advance() var_name = token.content self.symbol_table.define(var_name, var_type, kind) self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() # ; self.compile_token(token, indentation + 1, ';') self.log_node('/classVarDec', indentation) return def compile_subroutine(self, token, indentation): """ Compiles a complete method, function, or constructor. """ # reset symbol table for subroutine self.symbol_table.start_subroutine() self.log_node('subroutineDec', indentation) # function/method/constructor function_type = token.content self.compile_token(token, indentation + 1) # void | type token = self.advance() self.compile_token(token, indentation + 1) # subroutine name token = self.advance() subroutine_name = token.content self.compile_token(token, indentation + 1) # ( token = self.advance() self.compile_token(token, indentation + 1) # parameter list exists if function_type == 'method': self.symbol_table.define('this_placeholder', "THIS", ARG) pass token = self.advance() self.compile_parameter_list(token, indentation + 1) if token.content != ')': token = self.advance() # ) self.compile_token(token, indentation + 1, ')') # { token = self.advance() self.compile_subroutine_body(token, indentation + 1, subroutine_name, function_type) self.log_node('/subroutineDec', indentation) return def compile_subroutine_body(self, token, indentation, subroutine_name, function_type='function'): self.log_node('subroutineBody', indentation) self.compile_token(token, indentation + 1, '{') token = self.advance() n_locals = 0 if token.content == 'var': n_locals = self.compile_var_dec(token, indentation + 1) token = self.advance() self.vm_writer.write_functions(subroutine_name, n_locals) # todo 处理constructor if function_type == 'constructor': # number of fields self.vm_writer.write_push('CONST', self.symbol_table.var_count(FIELD)) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('POINTER', 0, 'set this pointer') elif function_type == 'method': # if it is a method, always set arg 0 to pointer 0(this) self.vm_writer.write_push(ARG, 0) self.vm_writer.write_pop('POINTER', 0) pass # if this token is '}' means the function has an empty body if token.content == '}': # TODO 空函数体的处理 # empty body print('empty body', token) pass else: self.compile_statements(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, '}') self.log_node('/subroutineBody', indentation) def compile_parameter_list(self, token, indentation): """Compiles a (possibly empty) parameter list, not including the enclosing ‘‘ () ’’.""" self.log_node('parameterList', indentation) while token.content != ')': param_symbol = Symbol() param_symbol.kind = ARG # parameter type self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) param_symbol.symbol_type = token.content # parameter name token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) param_symbol.name = token.content self.symbol_table.define_symbol(param_symbol) if self.next() is not None and self.next().content == ',': # compile , token = self.advance() self.compile_token(token, indentation + 1) token = self.advance() continue elif self.next() is not None and self.next().content == ')': # this function does not consumes ')' so didn't call advance() break else: token = self.advance() self.log_node('/parameterList', indentation) return def compile_var_dec(self, token, indentation) -> int: """ Compiles a var declaration.""" # var_symbol = Symbol() # # var # self.compile_token(token, indentation + 1, 'var') # var_symbol.kind = VAR # # var type # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # var_symbol.symbol_type = token.content # # var name # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # var_symbol.name = token.content # # , or ; # token = self.advance() # while token.content != ';': # self.compile_token(token, indentation + 1, ',') # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) # token = self.advance() # self.compile_token(token, indentation + 1, ';') var_count = 0 while token.content == 'var': self.log_node('varDec', indentation) var_count += 1 var_symbol = Symbol() # var self.compile_token(token, indentation + 1, 'var') var_symbol.kind = VAR # var type token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) var_symbol.symbol_type = token.content # var name token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER, KEYWORD]) var_symbol.name = token.content self.symbol_table.define_symbol(var_symbol) # next token may be ',' or ';' token = self.advance() # if next token is ',' while token.content == ',': var_count += 1 self.compile_token(token, indentation + 1, ',') # var name token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER]) # only name differs, types are the same self.symbol_table.define(token.content, var_symbol.symbol_type, VAR) token = self.advance() if token.content == ';': self.compile_token(token, indentation + 1, ';') if self.next().content == 'var': token = self.advance() self.log_node('/varDec', indentation) return var_count def compile_statements(self, token, indentation): """Compiles a sequence of statements, not including the enclosing ‘‘{}’’.""" self.log_node('statements', indentation) while token.content != '}': if token.content == 'let': self.compile_let(token, indentation + 1) pass elif token.content == 'if': self.compile_if(token, indentation + 1) pass elif token.content == 'while': self.compile_while(token, indentation + 1) pass elif token.content == 'do': self.compile_do(token, indentation + 1) pass elif token.content == 'return': self.compile_return(token, indentation + 1) pass else: raise RuntimeError('unknown type in statements %s') if self.next() is not None and self.next().content == '}': break else: token = self.advance() self.log_node('/statements', indentation) return def compile_do(self, token: Token, indentation): self.log_node('doStatement', indentation) self.compile_token(token, indentation + 1, 'do') token = self.advance() self.compile_term(token, indentation + 1, do_term=True) self.vm_writer.write_pop('TEMP', 0, 'do call') token = self.advance() self.compile_token(token, indentation + 1, ';') # maybe a local subroutine or someone else's # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER]) # function_class_name = token.content # token = self.advance() # if token.content == '.': # # someone else 's # self.compile_token(token, indentation + 1, '.') # token = self.advance() # self.compile_token(token, indentation + 1, [IDENTIFIER]) # function_name = token.content # token = self.advance() # self.compile_token(token, indentation + 1, '(') # token = self.advance() # n_arg = self.compile_expression_list(token, indentation + 1) # self.vm_writer.write_call(function_class_name + '.' + function_name, n_arg) # # do calls must 'pop temp 0', because void functions always returns 0 # self.vm_writer.write_pop('TEMP', 0, 'do call') # if token.content != ')': # token = self.advance() # self.compile_token(token, indentation + 1, ')') # pass # else: # self.compile_token(token, indentation + 1, '(') # token = self.advance() # self.compile_expression_list(token, indentation + 1) # if token.content != ')': # token = self.advance() # self.compile_token(token, indentation + 1, ')') # # local method # pass # token = self.advance() # self.compile_token(token, indentation + 1, ';') self.log_node('/doStatement', indentation) return def compile_let(self, token: Token, indentation): """let length = Keyboard.readInt("HOW MANY NUMBERS? ");""" self.log_node('letStatement', indentation) # let self.compile_token(token, indentation + 1, 'let') # length token = self.advance() self.compile_token(token, indentation + 1, [IDENTIFIER]) var_name = token.content # = or [ token = self.advance() array = False if token.content == '[': array = True self.compile_token(token, indentation + 1, '[') token = self.advance() # e.g x[y] # push y to stack self.compile_expression(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, ']') token = self.advance() # push x to stack self.write_push(var_name) # add x and y self.vm_writer.write_arithmetic('ADD') # # pop the result to THAT # self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_pop('TEMP', 2) pass self.compile_token(token, indentation + 1, '=') # expression token = self.advance() self.compile_expression(token, indentation + 1) if array: self.vm_writer.write_push('TEMP', 2) self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_pop('THAT', 0) pass else: if self.symbol_table.kind_of(var_name) == VAR: self.vm_writer.write_pop('LOCAL', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == ARG: self.vm_writer.write_pop('ARG', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == FIELD: self.vm_writer.write_pop('THIS', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == STATIC: self.vm_writer.write_pop('STATIC', self.symbol_table.index_of(var_name), var_name) pass # ; token = self.advance() self.compile_token(token, indentation + 1, ';') self.log_node('/letStatement', indentation) return def write_push(self, var_name): if self.symbol_table.kind_of(var_name) == VAR: self.vm_writer.write_push('LOCAL', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == ARG: self.vm_writer.write_push('ARG', self.symbol_table.index_of(var_name), var_name) pass elif self.symbol_table.kind_of(var_name) == FIELD: self.vm_writer.write_push('THIS', self.symbol_table.index_of(var_name), var_name) pass def compile_while(self, token: Token, indentation): while_label_pre = 'WHILE_%s' % self.while_label_index # label index++ self.while_label_index += 1 self.vm_writer.write_label('%s_EXP' % while_label_pre) self.log_node('whileStatement', indentation) self.compile_token(token, indentation + 1, 'while') token = self.advance() self.compile_token(token, indentation + 1, '(') token = self.advance() self.vm_writer.write_comment("calculating while condition expression") # expression self.compile_expression(token, indentation + 1) # ) token = self.advance() self.compile_token(token, indentation + 1, ')') self.vm_writer.write_arithmetic('NOT') # checking condition expression self.vm_writer.write_if('%s_END' % while_label_pre) # { token = self.advance() self.compile_token(token, indentation + 1, '{') # statements token = self.advance() if token.content != '}': # not empty statement self.compile_statements(token, indentation + 1) # } token = self.advance() self.compile_token(token, indentation + 1, '}') self.vm_writer.write_goto('%s_EXP' % while_label_pre) self.vm_writer.write_label('%s_END' % while_label_pre) self.log_node('/whileStatement', indentation) return def compile_return(self, token: Token, indentation): self.log_node('returnStatement', indentation) self.compile_token(token, indentation + 1, 'return') token = self.advance() if token.content != ';': self.compile_expression(token, indentation + 1) token = self.advance() self.vm_writer.write_return() else: # for functions that return void, it must return an integer 0 self.vm_writer.write_return(True) pass self.compile_token(token, indentation + 1, ';') self.log_node('/returnStatement', indentation) return def compile_if(self, token: Token, indentation): # if_label_pre = 'IF_%s' % self.if_label_index else_label = 'ELSE_%s' % self.if_label_index finish_label = 'FINISH_%s' % self.if_label_index # label index++ self.if_label_index += 1 self.log_node('ifStatement', indentation) self.compile_token(token, indentation + 1, 'if') token = self.advance() self.compile_token(token, indentation + 1, '(') self.vm_writer.write_comment("calculating if condition expression") token = self.advance() # expression self.compile_expression(token, indentation + 1) # ) token = self.advance() self.compile_token(token, indentation + 1, ')') self.vm_writer.write_arithmetic('NOT') self.vm_writer.write_if(else_label) # { token = self.advance() self.compile_token(token, indentation + 1, '{') # statements token = self.advance() if token.content != '}': # not empty statement self.compile_statements(token, indentation + 1) # } token = self.advance() self.compile_token(token, indentation + 1, '}') if self.next().content == 'else': """ if statements... (else vm code) goto FINISH // if statements finished, pass the else code lable ELSE else statements... label FINISH """ self.vm_writer.write_goto(finish_label) self.vm_writer.write_label(else_label) token = self.advance() self.compile_token(token, indentation + 1, 'else') token = self.advance() self.compile_token(token, indentation + 1, '{') token = self.advance() self.compile_statements(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, '}') self.vm_writer.write_label(finish_label) else: """ if statements... (no else vm code) label ELSE """ self.vm_writer.write_label(else_label) pass self.log_node('/ifStatement', indentation) return def compile_expression(self, token, indentation): self.log_node('expression', indentation) self.compile_term(token, indentation + 1) while self.next() is not None and self.next( ).content in OP_SYMBOLS.keys(): token = self.advance() self.compile_token(token, indentation + 1, [SYMBOL]) op_symbol = OP_SYMBOLS[token.content] token = self.advance() self.compile_term(token, indentation + 1) # call op function after pushes the second parameter self.vm_writer.write_arithmetic(op_symbol) self.log_node('/expression', indentation) return def compile_term(self, token: Token, indentation, do_term=False): if not do_term: self.log_node('term', indentation) if token.token_type == INT_CONST: self.compile_token(token, indentation + 1, [INT_CONST]) # todo self.vm_writer.write_push('CONST', token.content) pass elif token.token_type == STRING_CONST: """ // construct a string push constant 3 call String.new 1 // the address of string is now on the stack push constant 72 call String.appendChar 2 push constant 73 call String.appendChar 2 push constant 74 call String.appendChar 2 // string construct complete its still on sp """ length = len(token.content) self.vm_writer.write_push('CONST', length) self.vm_writer.write_call('String.new', 1) for c in token.content: self.vm_writer.write_push('CONST', ord(c)) self.vm_writer.write_call('String.appendChar', 2) pass self.compile_token(token, indentation + 1) # keyword constant elif token.content == 'true': self.compile_token(token, indentation + 1) self.vm_writer.write_push('CONST', 1) self.vm_writer.write_arithmetic('NEG') pass elif token.content == 'false': self.compile_token(token, indentation + 1) self.vm_writer.write_push('CONST', 0) pass elif token.content == 'this': self.compile_token(token, indentation + 1) self.vm_writer.write_push('POINTER', 0) pass elif token.content == 'null': self.compile_token(token, indentation + 1) self.vm_writer.write_push('CONST', 0) pass elif token.content in ['true', 'false', 'null', 'this']: self.compile_token(token, indentation + 1) self.vm_writer.write_push('POINTER', 0) # self.vm_writer.write_comment('%s not implemented' % token.content) pass elif self.next().content == '[': self.compile_token(token, indentation + 1, [IDENTIFIER]) self.write_push(token.content) token = self.advance() self.compile_token(token, indentation + 1, '[') token = self.advance() self.compile_expression(token, indentation + 1) token = self.advance() self.compile_token(token, indentation + 1, ']') self.vm_writer.write_arithmetic('ADD') self.vm_writer.write_pop('POINTER', 1) self.vm_writer.write_push('THAT', 0) pass elif token.content == '(': self.compile_token(token, indentation + 1, '(') token = self.advance() self.compile_expression(token, indentation + 1) if token.content != ')': token = self.advance() self.compile_token(token, indentation + 1, ')') pass elif token.content in UNARY_OP_SYMBOL.keys(): self.compile_token(token, indentation + 1) unary_op = UNARY_OP_SYMBOL[token.content] token = self.advance() self.compile_term(token, indentation + 1) self.vm_writer.write_arithmetic(unary_op) # elif self.next().content == ';': # # varname # self.compile_token(token, indentation + 1) # pass elif self.next().content == '(': # method call n_arg = 1 self.vm_writer.write_push('POINTER', 0) # self.vm_writer.write_pop(ARG, 0) function_class_name = self.class_name function_name = token.content self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() self.compile_token(token, indentation + 1, '(') token = self.advance() n_arg += self.compile_expression_list(token, indentation + 1) self.vm_writer.write_call( function_class_name + '.' + function_name, n_arg) if token.content != ')': token = self.advance() self.compile_token(token, indentation + 1, ')') pass elif self.next().content == '.': # static function call # class name n_arg = 0 function_class_name = token.content if self.symbol_table.index_of(function_class_name) > -1: n_arg += 1 self.vm_writer.write_push( self.symbol_table.kind_of(function_class_name), self.symbol_table.index_of(function_class_name), function_class_name) function_class_name = self.symbol_table.type_of( function_class_name) self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() self.compile_token(token, indentation + 1, '.') # function name token = self.advance() function_name = token.content self.compile_token(token, indentation + 1, [IDENTIFIER]) token = self.advance() self.compile_token(token, indentation + 1, '(') token = self.advance() n_arg += self.compile_expression_list(token, indentation + 1) self.vm_writer.write_call( function_class_name + '.' + function_name, n_arg) if token.content != ')': token = self.advance() self.compile_token(token, indentation + 1, ')') pass elif token.token_type == IDENTIFIER: # varName self.compile_token(token, indentation + 1, [IDENTIFIER]) # todo 处理不同情形 if self.symbol_table.kind_of(token.content) == VAR: self.vm_writer.write_push( 'LOCAL', self.symbol_table.index_of(token.content), token.content) elif self.symbol_table.kind_of(token.content) == ARG: self.vm_writer.write_push( 'ARG', self.symbol_table.index_of(token.content), token.content) pass elif self.symbol_table.kind_of(token.content) == FIELD: self.vm_writer.write_push( 'FIELD', self.symbol_table.index_of(token.content), token.content) pass elif self.symbol_table.kind_of(token.content) == STATIC: self.vm_writer.write_push( 'STATIC', self.symbol_table.index_of(token.content), token.content) pass pass else: raise RuntimeError("Uncaught situation", token) if not do_term: self.log_node('/term', indentation) return def compile_expression_list(self, token: Token, indentation) -> int: self.log_node('expressionList', indentation) n_expression = 0 while token.content != ')': n_expression += 1 self.compile_expression(token, indentation + 1) if self.next() is not None and self.next().content == ',': # multiple expression list token = self.advance() self.compile_token(token, indentation + 1, ',') token = self.advance() elif self.next() is not None and self.next().content == ')': break else: print('UNEXPECTED token in compile_expression_list', token) token = self.advance() self.log_node('/expressionList', indentation) return n_expression
class CompilationEngine: def __init__(self, input_stream: str, jack_tokenizer: JackTokenizer): """ creates a new compilation engine with the given input and output. :param input_stream: given input stream :param jack_tokenizer: given jack tokenizer """ self.tokenizer = jack_tokenizer self.tokens = jack_tokenizer.get_tokens() self.file_name = input_stream.replace(".jack", "") self.output_file_name = input_stream.replace(".jack", ".xml") self.output_file = open(self.output_file_name, "wb") self.current_class_name = None self.root = None self.label_counter = 0 self.tree = None # ----- identifier type, project 11, Wednesday -------- # self.identifier_counter = {LOCAL: 0, ARGUMENT: 0, STATIC: 0, FIELD: 0} # ----------------------------------------------------- # self.symbol_table = SymbolTable() self.VMWriter = None def compile(self) -> None: """ method to compile jack file and close file afterwards :return: none """ self.tokenizer.advance() self.compile_class() self.output_file.close() def compile_class(self) -> None: """ compiles a class :return: None """ # create VMWriter for current class self.VMWriter = VMWriter(self.file_name) # was class self.tokenizer.advance() # now name # current class name : self.current_class_name = self.tokenizer.get_current_token()[1] # was name self.tokenizer.advance() # now { # was { self.tokenizer.advance() # now class body while self.tokenizer.has_more_tokens(): current_token = self.tokenizer.get_current_token() token_string = current_token[1] if CompilationEngine.is_class_field(token_string): self.compile_class_var_declaration() elif CompilationEngine.is_subroutine(token_string): self.compile_subroutine() # insert last "}" of end of class current_token = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # # ***** testing ***** # # tree = etree.ElementTree(self.root) # # etree.indent(self.root, "") # tree.write(self.output_file, pretty_print=True) @staticmethod def is_subroutine(token: str) -> bool: """ method to check if token is subroutine :param token: string of current token :return: true if subroutine declaration, false otherwise """ return ((token == "constructor") or (token == "function") or ( token == "method")) @staticmethod def is_var_declare(token: str) -> bool: return token == "var" @staticmethod def is_class_field(token: str) -> bool: """ method to check if token is class field :param token: string of current token :return: true if class field declaration, false otherwise """ return (token == "static") or (token == "field") @staticmethod def is_statement(token: str) -> bool: return (token == LET) or (token == IF) or (token == WHILE) or ( token == DO) or (token == RETURN) def insert_next_token(self, root) -> None: """ insert next token :return: none """ current_token = self.tokenizer.get_current_token() token_type = current_token[0] token_string = current_token[1] if token_type == JackTokenizer.STRING_TYPE: token_string = token_string[1:-1] etree.SubElement(root, token_type).text = " " + token_string + " " self.tokenizer.advance() def compile_class_var_declaration(self) -> None: """ compiles a variable declaration :return: None """ # variable kind: field | static kind = self.tokenizer.get_current_token()[1] # field | static self.tokenizer.advance() # variable type type_var = self.tokenizer.get_current_token()[1] # int|char|boolean self.tokenizer.advance() # variable name name = self.tokenizer.get_current_token()[1] # varName self.tokenizer.advance() # adding to symbol table if kind == STATIC: # static variable self.identifier_counter[STATIC] += 1 else: # class field self.identifier_counter[FIELD] += 1 # adding to symbol table anyways self.symbol_table.define(name, type_var, kind) # run in a loop and print all names, with "," in between while self.tokenizer.current_word == COMMA: # , self.tokenizer.advance() # need to add to symbol table as well # type is as before, and kind is as before # still needs to add to counter name = self.tokenizer.get_current_token()[1] # adding to symbol table if kind == STATIC: # static variable self.identifier_counter[STATIC] += 1 else: # class field self.identifier_counter[FIELD] += 1 # adding to symbol table anyways self.symbol_table.define(name, type_var, kind) # varName self.tokenizer.advance() # end of declaration # ; current_token = self.tokenizer.get_current_token()[1] self.tokenizer.advance() def compile_subroutine(self) -> None: """ compiles a complete method function or constructor :return: None """ # restart as a new subroutine self.symbol_table.start_subroutine() # constructor | function | method subroutine_type = self.tokenizer.get_current_token()[1] # add this if it is a method if subroutine_type == METHOD: name = THIS var_type = self.current_class_name kind = ARGUMENT self.symbol_table.define(name, var_type, kind) # was function type self.tokenizer.advance() # now return type # was return type self.tokenizer.advance() # now subroutine name subroutine_name = self.tokenizer.get_current_token()[1] subroutine_name = self.current_class_name + DOT + subroutine_name # was name self.tokenizer.advance() # now ( # parameter list compilation # and inserting it into the subtree self.compile_parameter_list() # was ) self.tokenizer.advance() # now { # subroutine body self.compile_subroutine_body(subroutine_name, subroutine_type) # was } self.tokenizer.advance() # now token return def compile_subroutine_body(self, subroutine_name: str, subroutine_type: str): """ method to compile subroutine body :return: None """ n_locals = self.symbol_table.variable_counter[FIELD] # { current_token = self.tokenizer.get_current_token()[1] # vars inside var_count = 0 # was { self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # now subroutine body # read all variable declares while CompilationEngine.is_var_declare(current_token): # adding var declare subtree # to subroutine body element tree var_count = var_count + self.compile_var_declaration() current_token = self.tokenizer.get_current_token()[1] # function declare line self.VMWriter.write_function(subroutine_name, var_count) # putting this if subroutine_type == CONSTRUCTOR: # allocate memory for object # subroutine is constructor # push const nLocals self.VMWriter.write_push(CONSTANT, n_locals) # call Memory.alloc 1 self.VMWriter.write_call(ALLOCATION_METHOD, ONE_NUM) # (popping this): pop pointer 0 self.VMWriter.write_pop(POINTER, ZERO_NUM) elif subroutine_type == METHOD: # push argument 0 self.VMWriter.write_push(ARGUMENT, ZERO_NUM) # push pop pointer 0 self.VMWriter.write_pop(POINTER, ZERO_NUM) # subroutine statements self.compile_statements() # } self.tokenizer.advance() def compile_var_declaration(self) -> int: """ method to compile var declaration lines """ var_count = 0 # was var kind (var) kind = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now type # get type which is int|char|boolean|class type_var = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now name # get name which is int|char|boolean|class name = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now , or ; # adding to symbol table self.symbol_table.define(name, type_var, kind) var_count += 1 # run in a loop and print all names, with "," in between while self.tokenizer.current_word == COMMA: # was , var_count += 1 self.tokenizer.advance() # now name # get name which for the int|char|boolean|class var name = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now , or ; # adding to symbol table self.symbol_table.define(name, type_var, kind) # end of declaration # was ; self.tokenizer.advance() # now next line return var_count def compile_parameter_list(self) -> int: """ compiles a (CAN BE EMPTY) parameter list not including the enclosing "()" :return: var count of parameter list """ var_count = 0 # was ( self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # now arguments or ) # till we at the end of the param line -> ")" if current_token != END_OF_PARAM_LIST: var_count += 1 kind = ARGUMENT # was var_type var_type = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now var name # was var_name name = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now , or ) # possible_variable = self.get_variable_of_table(name) # if possible_variable is None: self.symbol_table.define(name, var_type, kind) # otherwise its inside current_token = self.tokenizer.get_current_token()[1] # go through param list while current_token == COMMA: var_count += 1 # was , self.tokenizer.advance() # now type # var_type var_type = self.tokenizer.get_current_token()[1] self.tokenizer.advance() # now var name # var_name name = self.tokenizer.get_current_token()[1] # possible_variable = self.get_variable_of_table(name) # if possible_variable is None: self.symbol_table.define(name, var_type, kind) # otherwise its inside self.tokenizer.advance() # now comma or ) # check again current token current_token = self.tokenizer.get_current_token()[1] return var_count def compile_statements(self) -> None: """ compiles a sequence of statements not including the enclosing {} :return: None """ # statement current_token = self.tokenizer.get_current_token()[1] if current_token == END_OF_CLASS: # end of function we return return peek_at_next = current_token # peek statements as long as we have them # determine their type # add the statement block to the # over all statements blocks while CompilationEngine.is_statement(peek_at_next): # pretty much straight forward # we have some types of statements # and we need to find out which one # and send to the fitting compilation method if peek_at_next == LET: self.compile_let() elif peek_at_next == IF: self.compile_if() elif peek_at_next == WHILE: self.compile_while() elif peek_at_next == DO: self.compile_do() elif peek_at_next == RETURN: self.compile_return() # adding the statement was done inside # getting the token we are on peek_at_next = self.tokenizer.peek_at_next_token()[1] def compile_do(self) -> None: """ compiles a do statement :return: None """ # peeked on do # now advanced to do current_token = self.tokenizer.get_current_token()[1] if current_token != DO: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # do self.tokenizer.advance() # what to do # --------------------------------------------- # # compilation of subroutine or some class routine # --------------------------------------------- # # subroutine_name # ------- or, for another class method --------- # class_name -> then .subroutine_name rout_or_class_name = self.tokenizer.get_current_token()[1] peek_at_token = self.tokenizer.peek_at_next_token()[1] if peek_at_token != START_OF_PARAM_LIST: self.tokenizer.advance() self.compile_call(rout_or_class_name) # now comes ; self.tokenizer.advance() # popping temp 0 self.VMWriter.write_pop(TEMP, ZERO_NUM) def compile_let(self) -> None: """ compiles a let statement -------------------- let "var_name" = "expression" ; -------------------- :return: None """ # peeked on let # now advanced to let current_token = self.tokenizer.get_current_token()[1] if current_token != LET: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] not_array_flag = True # should be varName, might be varName [] # was let self.tokenizer.advance() var_name = self.tokenizer.get_current_token()[1] # now var name # was var name self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # now = or [ if current_token == ARRAY_OPENER: not_array_flag = False self.calculate_memory_location(var_name) # were on = self.tokenizer.advance() # now on expression self.compile_expression() # after expression # comes; self.tokenizer.advance() if not_array_flag: # not array, we pop variable variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] segment = SymbolTable.get_segment(var_kind) var_index = variable[INDEX] self.VMWriter.write_pop(segment, var_index) else: # array, we pop array element # pop temp 0 self.VMWriter.write_pop(TEMP, ZERO_NUM) # pop pointer 1 self.VMWriter.write_pop(POINTER, ONE_NUM) # push temp 0 self.VMWriter.write_push(TEMP, ZERO_NUM) # pop that 0 self.VMWriter.write_pop(THAT, ZERO_NUM) def calculate_memory_location(self, var_name): """ method to calculate location of current var index :param var_name: name of variable :return: """ # pushing name variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] segment = SymbolTable.get_segment(var_kind) var_index = variable[INDEX] # after [ self.tokenizer.advance() # expression inside array self.compile_expression() self.VMWriter.write_push(segment, var_index) # write add to add memory places self.VMWriter.write_arithmetic(ADD) # were on whats inside array self.tokenizer.advance() # now on ] # were on ] self.tokenizer.advance() # now on expression def compile_while(self): """ compiles a while statement -------------------- while ( "expression" ) { "statements } -------------------- :return: None """ # peeked on while # now advanced to let current_token = self.tokenizer.get_current_token()[1] if current_token != WHILE: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] # label L1 while_label = self.label_generator() self.VMWriter.write_label(while_label) # while self.tokenizer.advance() # ( self.tokenizer.advance() # expression of while self.compile_expression() # ~(cond) # negate condition negate = BINARY_DICT["~"] self.VMWriter.write_arithmetic(negate) # -------------------- # # ) self.tokenizer.advance() # if-goto L2 after_while_label = self.label_generator() self.VMWriter.write_if(after_while_label) # { self.tokenizer.advance() # statement self.tokenizer.advance() self.compile_statements() # goto L1 self.VMWriter.write_goto(while_label) # label L2 self.VMWriter.write_label(after_while_label) # } self.tokenizer.advance() def compile_return(self) -> None: """ compiles a return statement :return: None """ # peeked on return # now advanced to return current_token = self.tokenizer.get_current_token()[1] if current_token != RETURN: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] value_to_return = self.tokenizer.peek_at_next_token()[1] if value_to_return == COMMA_DOT: # no value to return self.tokenizer.advance() self.VMWriter.write_push(CONSTANT, ZERO_NUM) self.VMWriter.write_return() return # evaluate return value self.tokenizer.advance() self.compile_expression() self.VMWriter.write_return() # ; self.tokenizer.advance() def compile_if(self): """ compiles an if statement possibly with a trailing else clause -------------------- if ( "expression" ) { "statements } - might be else { } -------------------- :return: None """ # peeked on if # now advanced to if current_token = self.tokenizer.get_current_token()[1] if current_token != IF: self.tokenizer.advance() current_token = self.tokenizer.get_current_token()[1] L1 = self.label_generator() L2 = self.label_generator() # was if now ( self.tokenizer.advance() # cond # build if expression self.compile_expression() # ~(cond) # negate condition negate = BINARY_DICT["~"] self.VMWriter.write_arithmetic(negate) # -------------------- # # ) self.tokenizer.advance() # if-goto L1 self.VMWriter.write_if(L1) # -------------------- # # { self.tokenizer.advance() # insert whats inside if() { lalla } # VM code for s1 self.compile_statements() # -------------------- # # goto L2 self.VMWriter.write_goto(L2) # -------------------- # # } self.tokenizer.advance() # now we might have else: current_token = self.tokenizer.get_current_token()[1] current_peek = self.tokenizer.peek_at_next_token()[1] # label L1 self.VMWriter.write_label(L1) # -------------------- # # statements 2 is else : if (current_peek == ELSE) | (current_token == ELSE): if current_peek == ELSE: self.tokenizer.advance() # now else self.tokenizer.advance() # { self.tokenizer.advance() self.compile_statements() # } self.tokenizer.advance() # label L2 self.VMWriter.write_label(L2) # -------------------- # def compile_expression(self) -> None: """ compiles an expression -------------------- term (optional term)? term: var_name or constant - var_name: string with no digit - constant: decimal number -------------------- :return: tree of an expression """ # first term self.compile_term() peek_at_token = self.tokenizer.peek_at_next_token()[1] while peek_at_token in BINARY_OPERATORS: # binary op self.tokenizer.advance() operation = self.tokenizer.get_current_token()[1] # expression self.tokenizer.advance() # compile term self.compile_term() arithmetic_command = BINARY_DICT[peek_at_token] self.VMWriter.write_arithmetic(arithmetic_command) # renew again peek_at_token = self.tokenizer.peek_at_next_token()[1] def compile_term(self) -> None: """ compiles a term. if the current token is an identifier we distinguish between - a variable: . - an array entry: [ - subroutine call: ( :return: None """ # get current token we insert current_token = self.tokenizer.get_current_token() token_type = current_token[0] token_string = current_token[1] # integerConstant if token_type == JackTokenizer.INT_TYPE: self.VMWriter.write_push(CONSTANT, token_string) # stringConstant elif token_type == JackTokenizer.STRING_TYPE: # construction of string inside self.construct_string(token_string) # keywordConstant elif token_type == JackTokenizer.KEYWORD_TYPE: if token_string == TRUE: self.VMWriter.write_push(CONSTANT, ZERO_NUM) neg_op = BINARY_DICT["~"] self.VMWriter.write_arithmetic(neg_op) if token_string == FALSE: self.VMWriter.write_push(CONSTANT, ZERO_NUM) elif token_string == THIS: self.VMWriter.write_push(POINTER, ZERO_NUM) elif token_string == NULL: self.VMWriter.write_push(CONSTANT, ZERO_NUM) # unaryOperator {- , ~} elif token_string in UNARY_OPERATORS: # operator to print after expression # we can not sub anything, we negate. if token_string == "-": token_string = "!" op = BINARY_DICT[token_string] self.tokenizer.advance() # create a term of the inside of the operator self.compile_term() # neg if - # not if ~ self.VMWriter.write_arithmetic(op) # advance to next term # anyways we have a varNam or, varName[] or, subroutineCall () or () # ( -> some expression -> ) elif token_string == START_OF_PARAM_LIST: # ( self.tokenizer.advance() # insert expression self.compile_expression() # ) # advance to next term self.tokenizer.advance() else: # was some identifier possibly_parent = self.tokenizer.peek_at_next_token()[1] # now . or [ # pretty much straight forward: # 1. array opener [] # 2. expression opener () # function call # 3. className. -> and then # 2. call of subroutineName() # 4. simple varName if possibly_parent == ARRAY_OPENER: self.tokenizer.advance() self.array_variable(token_string) elif possibly_parent == START_OF_PARAM_LIST: # subroutine call immediately # ( # lets compile it as a call. self.compile_call(token_string) elif possibly_parent == DOT: # . self.tokenizer.advance() # we have a possible className in token_string # now we will have a subroutine name and call self.compile_call(token_string) else: self.simple_variable(token_string) def simple_variable(self, var_name) -> None: """ method to push simple variable :param var_name: var name we push :return: None """ variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] segment = SymbolTable.get_segment(var_kind) var_index = variable[INDEX] self.VMWriter.write_push(segment, var_index) def array_variable(self, var_name): variable = self.get_variable_of_table(var_name) var_kind = variable[KIND] var_index = variable[INDEX] segment = SymbolTable.get_segment(var_kind) # [ self.tokenizer.advance() # expression inside [] self.compile_expression() # push start of array self.VMWriter.write_push(segment, var_index) # handling writing to an array element # adding to base address, the expression self.VMWriter.write_arithmetic(ADD) # pop pointer 1 self.VMWriter.write_pop(POINTER, ONE_NUM) # push that 0 self.VMWriter.write_push(THAT, ZERO_NUM) # closing array # ] self.tokenizer.advance() def compile_expression_list(self) -> int: """ compiles (might be empty list) a comma separated list of expression :return: amount of expressions """ current_token = self.tokenizer.get_current_token()[1] # we are on ( self.tokenizer.advance() # now we on ) or argument arguments_count = 0 # we start unless we are already at ")" # just like with param list # or arg or ) current_token = self.tokenizer.get_current_token()[1] if current_token != END_OF_PARAM_LIST: arguments_count += 1 # compiling argument self.compile_expression() # close of expression self.tokenizer.advance() # renew current token current_token = self.tokenizer.get_current_token()[1] while current_token == COMMA: # was , -> now ) or argument self.tokenizer.advance() # now new argument arguments_count += 1 # new expression tree self.compile_expression() # on term self.tokenizer.advance() # and go again, renew current token current_token = self.tokenizer.get_current_token()[1] return arguments_count def label_generator(self) -> str: """ helper method method to generate new label :return: str of new label """ label = LABEL + str(self.label_counter) self.label_counter += 1 return label def construct_string(self, token_string): # need to call String.new token_string = token_string[1:-1] memory_to_alloc = len(token_string) self.VMWriter.write_push(CONSTANT, memory_to_alloc) # calling String.new 1, empty string of size (memory to alloc) self.VMWriter.write_call(STRING_ALLOC_METHOD, ONE_NUM) # need to add ascii value of chars: for char_of_string in token_string: ascii_value = ord(char_of_string) self.VMWriter.write_push(CONSTANT, ascii_value) self.VMWriter.write_call(STRING_APPENDING, TWO_NUM) def compile_call(self, rout_or_class_name) -> None: """ method to compile call :param rout_or_class_name: name of class or subroutine :return: none """ variable = self.get_variable_of_table(rout_or_class_name) if variable is not None: rout_or_class_name = variable[TYPE] subroutine_type = variable[TYPE] var_index = variable[INDEX] var_kind = SymbolTable.get_segment(variable[KIND]) self.VMWriter.write_push(var_kind, var_index) else: subroutine_type = None # . or subroutine name current_token = self.tokenizer.get_current_token()[1] if current_token == DOT: # it is a call for a className.subroutineName # was . self.tokenizer.advance() # now subroutine name # subroutine_name subroutine_name = self.tokenizer.get_current_token()[1] # Class.Subroutine subroutine_name = rout_or_class_name + DOT + subroutine_name else: # a subroutine name self.VMWriter.write_push(POINTER, ZERO_NUM) subroutine_name = self.current_class_name + DOT + rout_or_class_name subroutine_type = METHOD if (subroutine_type is None) | (subroutine_type == VOID): # other class of void arguments = 0 else: # method or constructor arguments = 1 # start of expression list # ------------------------ # was subroutine name self.tokenizer.advance() # now ( # compilation of expression list arguments = arguments + self.compile_expression_list() # -------------------- # # end of expression list # -------------------- # # call subroutine_name arguments self.VMWriter.write_call(subroutine_name, arguments) def get_variable_of_table(self, var_name): """ method to get variable of one of tables :param var_name: var name to get :return: dict of variable """ variable = None # if in both if (var_name in self.symbol_table.variable_table.keys()) & \ (var_name in self.symbol_table.subroutine_table.keys()): variable = self.symbol_table.subroutine_table[var_name] elif var_name in self.symbol_table.variable_table.keys(): variable = self.symbol_table.variable_table[var_name] elif var_name in self.symbol_table.subroutine_table.keys(): variable = self.symbol_table.subroutine_table[var_name] return variable
class CompilationEngine(object): def __init__(self, inputfile, outputfile): self._inputfile = inputfile self._outputfile = outputfile self._tokenizer: JackTokenizer = None self._cur_root = [] self._n_args = [] self._root = None self.class_name = None self.return_type = None self._label_cnt = 0 self.vm_writer = None # type:VMWriter self._init() self.symbol = SymbolTable() self.vm_writer.set_engine(self) self.method_type = None def line_num(self): return self._tokenizer.line def _init(self): self._inputbuf = self.create_buffer(self._inputfile) self._outputbuf = self.create_buffer(self._outputfile, mode="w+") self.vm_writer = VMWriter(self._outputfile[:-4] + ".vm") self._tokenizer = JackTokenizer(self._inputbuf) def create_buffer(self, fn, mode='r'): if isinstance(fn, str) or isinstance(fn, unicode): return open(fn, mode) elif isinstance(fn, file) or isinstance(fn, IOBase): return fn else: raise ValueError("file object show file or readable") def compile_class(self): parent = self._set_parent("class") self._root = parent self._advance() self._pop_required(parent, TokenType.keyword, KeywordType.CLASS) self.class_name = self._token()[1] self._pop_required(parent, TokenType.identifier) self._pop_required(parent, TokenType.symbol, "{") try: while self._is_class_var(): self.compile_class_var_desc() while self._is_subroutine(): self.compile_subroutine() self._pop_required(parent, TokenType.symbol, "}") print(self.symbol) finally: self._outputbuf.write( unicode( et.tostring(self._root, pretty_print=True, method="c14n2").decode("utf-8"))) self.vm_writer.close() self._outputbuf.close() def _required_type(self, token_type, val=None): tp, tv = self._token() if token_type != tp or ( (tp == TokenType.keyword or tp == TokenType.symbol) and (val != tv)): raise ValueError("token must be %s,%s" % (token_type, val)) return tp, tv def compile_class_var_desc(self): parent = self._set_parent("classVarDec") # 具体可以细分变量类型检查,标识符正确检查 parent.append(self._build_element()) kind = self.get_kind() self._advance() itype = self.get_type() parent.append(self._build_element()) self._advance() while not self.is_token(TokenType.symbol, ";"): parent.append(self._build_element()) if self._token()[1] != "," and self._token()[1] != ";": self.symbol.define(self._token()[1], itype, kind) self._advance() parent.append(self._build_element()) self._advance() self._remove_parent() def get_kind(self): kind = self._token()[1] if isinstance(kind, KeywordType): kind = kind.name.lower() return kind def get_type(self): itype = self._token()[1] if isinstance(itype, KeywordType): return itype.name.lower() return itype def compile_subroutine(self): print(self.symbol) self.symbol.start_subroutine() parent = self._set_parent("subroutineDec") method_type = self._token()[1] self.method_type = method_type self._advance() self.return_type = self._token()[1] self._advance() function_name = self._token()[1] self._advance() self._pop_required(parent, TokenType.symbol, "(") self.compile_parameter_list() full_name = "{}.{}".format(self.class_name, function_name) self._pop_required(parent, TokenType.symbol, ")") self._compile_body(full_name, method_type) self._remove_parent() self.vm_writer.write_comment("end function %s" % function_name) self.vm_writer.write_comment("") # if self._tokenizer.token_type()==TokenType.KEY_WORD: def _compile_body(self, full_name, method_type): parent = self._set_parent("subroutineBody") self._pop_required(parent, TokenType.symbol, "{") while self._is_var_desc(): self.compile_var_desc() var_cnt = self.symbol.var_count("var") field_cnt = self.symbol.var_count("field") self.vm_writer.write_function(full_name, var_cnt) if method_type == KeywordType.CONSTRUCTOR: # 构造函数分配对象内存 self.vm_writer.write_push(SEG_CONSTANT, field_cnt) self.vm_writer.write_call("Memory.alloc", "1") self.vm_writer.write_pop(SEG_POINTER, "0") elif method_type == KeywordType.METHOD: # 成员方法,设置this=arg[0] self.vm_writer.write_push(SEG_ARG, "0") self.vm_writer.write_pop(SEG_POINTER, "0") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self._remove_parent() def _remove_parent(self): self._cur_root.pop() def compile_parameter_list(self): kind = "arg" while not self.is_token(TokenType.symbol, ")"): itype = self.get_type() self._advance() name = self._token()[1] self.symbol.define(name, itype, kind) self._advance() # parent.append(self._build_element()) if self.is_token(TokenType.symbol, ","): self._advance() def compile_var_desc(self): parent = self._set_parent("varDec") self._pop_required(parent, TokenType.keyword, KeywordType.VAR) kind = "var" itype = self.get_type() parent.append(self._build_element()) self._advance() while not self.is_token(TokenType.symbol, ";"): # parent.append(self._build_element()) if not self.is_token(TokenType.symbol, ",") and not self.is_token( TokenType.symbol, ";"): self.symbol.define(self._token()[1], itype, kind) self._advance() self._pop_required(parent, TokenType.symbol, ";") self._remove_parent() def compile_statements(self): self._set_parent("statements") while self._is_statement(): if self.is_let_statement(): self.compile_let() if self.is_do_statement(): self.compile_do() if self.is_return_statement(): self.compile_return() if self.is_if_statement(): self.compile_if() continue if self.is_while_statement(): self.compile_while() continue self._remove_parent() def compile_do(self): parent = self._set_parent("doStatement") self._pop_required(parent, TokenType.keyword, KeywordType.DO) type1, id1 = self._pop_required(parent, TokenType.identifier) self.compile_call(type1, id1) self.vm_writer.write_pop(SEG_TEMP, 0) self._pop_required(parent, TokenType.symbol, ";") self._remove_parent() def compile_call(self, typ1, id1): parent = None symbol_kind = self.symbol.kind_of(id1) # 调用变量方法 n_args = 0 typ2, id2 = self._token() if id2 == ".": if symbol_kind: # 变量类型 function_type = self.symbol.type_of(id1) # this 指针入栈 if symbol_kind == "arg": self.vm_writer.write_push("argument", self.symbol.index_of(id1)) elif symbol_kind == "static": self.vm_writer.write_push("static", self.symbol.index_of(id1)) elif symbol_kind == "var": self.vm_writer.write_push("local", self.symbol.index_of(id1)) elif symbol_kind == "field": self.vm_writer.write_push("this", self.symbol.index_of(id1)) n_args += 1 else: # 静态方法 function_type = id1 self._advance() _, method_name = self._pop_required(parent, TokenType.identifier) full_name = "%s.%s" % (function_type, method_name) else: n_args += 1 self.vm_writer.write_push("pointer", 0) function_type = self.class_name full_name = "%s.%s" % (function_type, id1) self._n_args.append(n_args) self._pop_required(parent, TokenType.symbol, "(") self.compile_expression_list() self._pop_required(parent, TokenType.symbol, ")") n_args = self._n_args.pop(-1) self.vm_writer.write_call(full_name, n_args=n_args) def compile_let(self): parent = self._set_parent("letStatement") self._pop_required(parent, TokenType.keyword, KeywordType.LET) tk, val = self._pop_required(parent, TokenType.identifier) seg, idx = self.get_var_seg_idx(val) is_arr = False if self.is_token(TokenType.symbol, "["): is_arr = True self._advance() self.compile_expression() self.vm_writer.write_push(seg, idx) self.vm_writer.write_arithmetic("+") self._pop_required(parent, TokenType.symbol, "]") # 有可能是数组 # 替换正则 self._pop_required(parent, TokenType.symbol, "=") self.compile_expression() if is_arr: self.vm_writer.write_pop(SEG_TEMP, "0") self.vm_writer.write_pop(SEG_POINTER, "1") self.vm_writer.write_push(SEG_TEMP, "0") self.vm_writer.write_pop(SEG_THAT, "0") else: self.vm_writer.write_pop(seg, idx) self._pop_required(parent, TokenType.symbol, ";") self._remove_parent() def compile_while(self): self.vm_writer.write_comment("start while") parent = self._set_parent("whileStatement") self._pop_required(parent, TokenType.keyword, KeywordType.WHILE) label1 = self._get_label() self.vm_writer.write_label(label1) label2 = self._get_label() self._pop_required(parent, TokenType.symbol, "(") self.compile_expression() self.vm_writer.write_arithmetic("~") self._pop_required(parent, TokenType.symbol, ")") self.vm_writer.write_if(label2) self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self.vm_writer.write_goto(label1) self.vm_writer.write_label(label2) self._remove_parent() self.vm_writer.write_comment("end while") def compile_return(self): parent = self._set_parent("returnStatement") self._pop_required(parent, TokenType.keyword, KeywordType.RETURN) if not self.is_token(TokenType.symbol, ";"): self.compile_expression() self._pop_required(parent, TokenType.symbol, ";") if self.return_type == KeywordType.VOID: self.vm_writer.write_push(SEG_CONSTANT, 0) self.vm_writer.write_return() self._remove_parent() def compile_if(self): parent = self._set_parent("ifStatement") self.vm_writer.write_comment("compile if") self._pop_required(parent, TokenType.keyword, KeywordType.IF) self._pop_required(parent, TokenType.symbol, "(") label1 = self._get_label() label2 = self._get_label() self.compile_expression() self.vm_writer.write_arithmetic("~") self.vm_writer.write_if(label1) self._pop_required(parent, TokenType.symbol, ")") self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self.vm_writer.write_goto(label2) self.vm_writer.write_label(label1) if self.is_token(TokenType.keyword, KeywordType.ELSE): self._pop_required(parent, TokenType.keyword, KeywordType.ELSE) self._pop_required(parent, TokenType.symbol, "{") self.compile_statements() self._pop_required(parent, TokenType.symbol, "}") self.vm_writer.write_label(label2) self._remove_parent() self.vm_writer.write_comment(" if end") def compile_expression(self): parent = self._set_parent("expression") op_count = 0 ops = [] while not self._is_end(): self.compile_term() if self._is_op(False): _, op = self._token() self._advance() ops.append(op) op_count += 1 if op_count >= 2: print(ops) self.vm_writer.write_arithmetic(ops.pop(0)) # parent.append(self._build_element()) # self._advance() self._remove_parent() def compile_term(self): parent = self._set_parent("term") first = True while not self._is_op(first) and not self._is_end(): first = False if self.is_token(TokenType.symbol, "("): self._advance() self.compile_expression() self._pop_required(parent, TokenType.symbol, ")") elif self._is_unary_op(): token, op = self._token() self._advance() op = "neg" if op == "-" else op self.compile_term() self.vm_writer.write_arithmetic(op) continue elif self.is_token(TokenType.identifier): tk, val = self._pop_required(parent, TokenType.identifier) if self.is_token(TokenType.symbol, "(") or self.is_token( TokenType.symbol, "."): self.compile_call(tk, val) elif self.is_token(TokenType.symbol, "["): self._advance() self.compile_expression() seg, idx = self.get_var_seg_idx(val) self.vm_writer.write_push(seg, idx) # 数组直接计算基址,通过that[0]访问 # fixme a[0] 这种常数的访问 self.vm_writer.write_arithmetic("+") self.vm_writer.write_pop(SEG_POINTER, "1") self.vm_writer.write_push(SEG_THAT, "0") self._pop_required(parent, TokenType.symbol, "]") else: # 变量 seg, idx = self.get_var_seg_idx(val) self.vm_writer.write_push(seg, idx) else: tk, val = self._token() if self.is_token(TokenType.integerConstant): self.vm_writer.write_push(SEG_CONSTANT, val) elif self.is_token(TokenType.keyword, KeywordType.TRUE): self.vm_writer.write_push(SEG_CONSTANT, "0") self.vm_writer.write_arithmetic("~") elif self.is_token(TokenType.keyword, KeywordType.FALSE): self.vm_writer.write_push(SEG_CONSTANT, "0") elif self.is_token(TokenType.keyword, KeywordType.NULL): self.vm_writer.write_push(SEG_CONSTANT, "0") elif self.is_token(TokenType.keyword, KeywordType.THIS): self.vm_writer.write_push(SEG_POINTER, "0") elif self.is_token(TokenType.stringConstant): str_len = len(val) self.vm_writer.write_push(SEG_CONSTANT, str(str_len)) self.vm_writer.write_call("String.new", "1") for idx, x in enumerate(val): self.vm_writer.write_push(SEG_CONSTANT, str(ord(x))) self.vm_writer.write_call("String.appendChar", '2') self._advance() self._remove_parent() def _pop_required(self, parent, tk, val=None): tk, val = self.required(tk, val) self._advance() return tk, val def _is_op(self, first): tk, val = self._token() return tk == TokenType.symbol and val in '+*/&|<>=' or (val == '-' and not first) def _is_unary_op(self): tk, val = self._token() return tk == TokenType.symbol and val in '-~' def compile_expression_list(self): parent = self._set_parent("expressionList") n_args = self._n_args[-1] while not self.is_token(TokenType.symbol, ")"): n_args += 1 self.compile_expression() if self.is_token(TokenType.symbol, ","): self._pop_required(parent, TokenType.symbol, ",") self._n_args[-1] = n_args self._remove_parent() def build_identifier(self): e = et.Element("identifier") e.text = self._tokenizer.identifier() return e def build_keyword(self): e = et.Element("keyword") e.text = self._tokenizer.keyword().name.lower() return e def build_symbol(self): e = et.Element("symbol") e.text = self._tokenizer.symbol() return e def _token(self): # if self._tokenizer.line > 44: # raise ValueError("测试代码,翻译到此停止") token_type = self._tokenizer.token_type() if self._tokenizer.token_type() == TokenType.keyword: a, b = token_type, self._tokenizer.keyword() elif self._tokenizer.token_type() == TokenType.symbol: a, b = token_type, self._tokenizer.symbol() elif self._tokenizer.token_type() == TokenType.identifier: a, b = token_type, self._tokenizer.identifier() elif self._tokenizer.token_type() == TokenType.integerConstant: a, b = token_type, self._tokenizer.intVal() elif self._tokenizer.token_type() == TokenType.stringConstant: a, b = token_type, self._tokenizer.stringVal() else: a, b = None, None print(a, b, self._tokenizer.line) return a, b def _advance(self): if self._tokenizer.has_more_tokens(): self._tokenizer.advance() def required(self, token, val=None): return self._required_type(token, val) def _build_element(self): a, b = self._token() e = et.Element(a.name) if isinstance(b, KeywordType): e.text = b.name.lower() else: e.text = b return e def _is_class_var(self): return self.is_token(TokenType.keyword, KeywordType.FIELD) or self.is_token( TokenType.keyword, KeywordType.STATIC) def is_token(self, token, val=None): t, v = self._token() if val is not None: return t == token and v == val else: return t == token def _get_parent(self): if len(self._cur_root) > 0: return self._cur_root[-1] else: return None def _set_parent(self, name): parent = self._get_parent() ele2 = et.Element(name) if parent is not None: parent.append(ele2) self._cur_root.append(ele2) return ele2 def _is_subroutine(self): return self.is_token(TokenType.keyword, KeywordType.FUNCTION) \ or self.is_token(TokenType.keyword, KeywordType.CONSTRUCTOR) \ or self.is_token(TokenType.keyword, KeywordType.METHOD) def _is_statement(self): if self.is_let_statement(): return True if self.is_do_statement(): return True if self.is_return_statement(): return True if self.is_if_statement(): return True if self.is_while_statement(): return True def is_while_statement(self): return self.is_token(TokenType.keyword, KeywordType.WHILE) def is_let_statement(self): return self.is_token(TokenType.keyword, KeywordType.LET) def is_do_statement(self): return self.is_token(TokenType.keyword, KeywordType.DO) def is_return_statement(self): return self.is_token(TokenType.keyword, KeywordType.RETURN) def is_if_statement(self): return self.is_token(TokenType.keyword, KeywordType.IF) def _is_var_desc(self): return self.is_token(TokenType.keyword, KeywordType.VAR) def _is_end(self): return self.is_token(TokenType.symbol, ";") or \ self.is_token(TokenType.symbol, ";") \ or self.is_token(TokenType.symbol, ")") \ or self.is_token(TokenType.symbol, ",") \ or self.is_token(TokenType.symbol, "]") def get_var_seg_idx(self, val): kind = self.symbol.kind_of(val) idx = self.symbol.index_of(val) if kind == "static": return SEG_STATIC, idx elif kind == "var": return SEG_LOCAL, idx elif kind == "field": return SEG_THIS, idx elif kind == "arg": if self.method_type == KeywordType.METHOD: idx += 1 return SEG_ARG, idx def _get_label(self): label = "label_%s" % self._label_cnt self._label_cnt += 1 return label
class CompilationEngine: """ Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream. The output is generated by a series of compilexxx() routines, one for every syntactic element xxx of the Jack grammar. The contract between these routines is that each compilexxx() routine should read the syntactic construct xxx from the input, advance() the tokenizer exactly beyond xxx, and output the parsing of xxx. Thus, most of the compilexxx() may only be called if indeed xxx is the next syntactic element of the input. The module outputs to the output stream, the correspond VM code. """ def __init__(self, input_stream, output_stream): """ Creates a new compilation engine with the given input and output. The next routine called must be compileClass(). """ self.__prefix = "" self.__tokenizer = JackTokenizer(input_stream) self.__writer = VMWriter(output_stream) self.__symbol_table = SymbolTable() self.__label_counter = 0 self.__class_name = None def compile(self): """ Compiles the whole file """ self.__compile_class() def __compile_class(self): """ Compiles a complete class :return: True iff the class was compiled successfully """ # checks the next parts of the class and writes them to the file self.__check_keyword_symbol(KEYWORD_TYPE) # "class" self.__check_keyword_symbol(IDENTIFIER_TYPE) # className self.__class_name = self.__tokenizer.get_value( ) # saves the class's name for its type when creating this self.__check_keyword_symbol(SYMBOL_TYPE) # "{" while self.__compile_class_var_dec(): continue while self.__compile_subroutine(False): self.__advance_tokenizer() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # block closer "}" def __compile_class_var_dec(self, make_advance=True): """ Compiles a static declaration or a field declaration :param: make_advance: boolean parameter- should make advance before the first call or not. Default value is True :return: True iff there was a valid class var declaration """ if not self.__check_keyword_symbol( KEYWORD_TYPE, CLASS_VAR_DEC_KEYWORDS, make_advance): # It is not a class var dec return False var_kind = self.__tokenizer.get_value() # saves the variable's kind self.__check_type() var_type = self.__tokenizer.get_value() # saves the variable's type self.__check_keyword_symbol(IDENTIFIER_TYPE) # varName var_name = self.__tokenizer.get_value() # saves the variable's name self.__symbol_table.define( var_name, var_type, var_kind) # adds the variable to the symbol table # adds all additional variables to the symbol table while self.__check_keyword_symbol( SYMBOL_TYPE, [ADDITIONAL_VAR_OPTIONAL_MARK]): # "," more varName self.__check_keyword_symbol(IDENTIFIER_TYPE) # varName var_name = self.__tokenizer.get_value( ) # saves the variable's name self.__symbol_table.define( var_name, var_type, var_kind) # adds the variable to the symbol table self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ";" return True def __compile_subroutine(self, make_advance=True): """ Compiles a complete method, function, or constructor. :param: make_advance: boolean parameter- should make advance before the first call or not. Default value is True :return: True iff there was a valid subroutine declaration """ if not self.__check_keyword_symbol( KEYWORD_TYPE, SUBROUTINE_DEC_KEYWORDS, make_advance): # It is not a subroutine return False self.__symbol_table.start_subroutine() # creates new subroutine table is_constructor = False # adds this object in case of a method if self.__tokenizer.get_value() == METHOD_DEC_KEYWORD: self.__symbol_table.define(THIS_CONSTANT, self.__class_name, ARG_SEGMENT_KEYWORD) elif self.__tokenizer.get_value() == CONSTRUCTOR_DEC_KEYWORD: is_constructor = True if not self.__check_keyword_symbol(KEYWORD_TYPE): # not void self.__check_type(False) self.__check_keyword_symbol(IDENTIFIER_TYPE) # subroutineName func_name = self.__tokenizer.get_value() # saves the function's mame self.__check_keyword_symbol(SYMBOL_TYPE) # "(" self.__compile_parameter_list() # advance was made in the compile_parameter_list without use self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ")" self.__compile_subroutine_body(func_name, is_constructor) return True def __compile_subroutine_body(self, subroutine_name, is_constructor): """ Compiles a subroutine body :param: subroutine_name: The name of the current subroutine (function/method/constructor's name) """ self.__check_keyword_symbol(SYMBOL_TYPE) # '{' vars_amount = 0 # number of locals the function needs # compiles and writes all variable declarations current_dec_var_amount = self.__compile_var_dec() while current_dec_var_amount: # as long there are more declaration vars_amount += current_dec_var_amount # adds the last amount of vars that were declared current_dec_var_amount = self.__compile_var_dec() self.__writer.write_function( self.__class_name, subroutine_name, vars_amount) # writes the function's title # creates the object in case of a constructor if is_constructor: num_of_fields = self.__symbol_table.var_count( FIELD_SEGMENT_KEYWORD) self.__writer.write_push( CONSTANT_SEGMENT, num_of_fields ) # push the number of fields needed for the object self.__writer.write_call( ALLOC_FUNCTION, ALLOC_ARGS_NUM) # calls the alloc function self.__writer.write_pop( POINTER_SEGMENT, THIS_POINTER_INDEX) # anchors this at the base address elif self.__symbol_table.get_index_of(THIS_CONSTANT) is not None: # this was pushed for the method - pop it to this segment self.__push_var(THIS_CONSTANT) self.__writer.write_pop(POINTER_SEGMENT, THIS_POINTER_INDEX) # compiles the statements of the subroutine self.__compile_statements() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # '}' def __compile_parameter_list(self): """ Compiles a (possibly empty) parameter list, not including the enclosing "()". In any way, the function advance the tokenizer """ if self.__check_type(): var_type = self.__tokenizer.get_value() # gets the variable's type self.__check_keyword_symbol(IDENTIFIER_TYPE) # varName var_name = self.__tokenizer.get_value() # gets the variable's name self.__symbol_table.define( var_name, var_type, ARG_SEGMENT_KEYWORD) # add the variable to the symbol table # adds all additional parameters to the symbol table while self.__check_keyword_symbol( SYMBOL_TYPE, [ADDITIONAL_VAR_OPTIONAL_MARK]): # "," more varName self.__check_type() var_type = self.__tokenizer.get_value( ) # gets the variable's type self.__check_keyword_symbol(IDENTIFIER_TYPE) # varName var_name = self.__tokenizer.get_value( ) # gets the variable's name # add the variable to the symbol table self.__symbol_table.define(var_name, var_type, ARG_SEGMENT_KEYWORD) def __compile_var_dec(self): """ checks if the current token is set to variable declaration, If so, returns true and writes the tokens to the stream. Otherwise, doesn't write to the stream, and returns False :return: number of variables that were declared. If the current token is not set to the beginning of variable declaration, returns 0 """ vars_amount = 0 # checks if the current token is set to 'var', which means it is a var declaration if not self.__check_keyword_symbol(KEYWORD_TYPE, VAR_KEYWORDS): # 'var' return vars_amount vars_amount += 1 # first variable declaration self.__check_type() var_type = self.__tokenizer.get_value() self.__check_keyword_symbol(IDENTIFIER_TYPE) # variableName var_name = self.__tokenizer.get_value() self.__symbol_table.define( var_name, var_type, VAR_SEGMENT_KEYWORD) # add the variable to symbol table # adds all additional variables to the symbol table while self.__check_keyword_symbol(SYMBOL_TYPE, [ADDITIONAL_VAR_OPTIONAL_MARK]): vars_amount += 1 # more variable declarations self.__check_keyword_symbol(IDENTIFIER_TYPE) # variableName var_name = self.__tokenizer.get_value() self.__symbol_table.define(var_name, var_type, VAR_SEGMENT_KEYWORD) self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ';' return vars_amount def __compile_statements(self): """ compiles the statements inside a subroutine. Assumes the tokenizer is advanced for the first call. """ # compiling all statements while self.__check_keyword_symbol(KEYWORD_TYPE, STATEMENTS_LIST, False): # checking which statement to compile if self.__tokenizer.get_value() == LET_KEYWORD: self.__compile_let() elif self.__tokenizer.get_value() == DO_KEYWORD: self.__compile_do() elif self.__tokenizer.get_value() == WHILE_KEYWORD: self.__compile_while() elif self.__tokenizer.get_value() == RETURN_KEYWORD: self.__compile_return() else: self.__compile_if() def __compile_do(self): """ Compiles a do statement. Assumes the tokenizer is advanced for the first call. Advance the tokenizer at the end """ self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False) # 'do' # advance the tokenizer for the subroutine call self.__check_keyword_symbol( IDENTIFIER_TYPE) # identifier that would be operate on self.__check_subroutine_call() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ';' self.__writer.write_pop(TEMP_SEGMENT, 0) self.__advance_tokenizer() def __compile_let(self): """ Compiles a let statement. Assumes the tokenizer is advanced for the first call. Advance the tokenizer at the end. """ self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False) # 'let' self.__check_keyword_symbol(IDENTIFIER_TYPE) # varName left_side_var = self.__tokenizer.get_value() is_left_side_array_access = False # mark if the left side variable is an array access # compile the left side of the equation if self.__check_keyword_symbol( SYMBOL_TYPE, [OPEN_ARRAY_ACCESS_BRACKET]): # array access, if not: = is_left_side_array_access = True self.__analyze_array_var(left_side_var) self.__check_keyword_symbol(SYMBOL_TYPE) # '=' # compile the right side of the equation self.__advance_tokenizer() # advance the tokenizer for the expression self.__compile_expression() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ';' self.__advance_tokenizer() # assign the right side of the equation (that is in the stack) into the left side if is_left_side_array_access: # assign into an array self.__writer.write_pop(TEMP_SEGMENT, 0) self.__writer.write_pop(POINTER_SEGMENT, THAT_POINTER_INDEX) self.__writer.write_push(TEMP_SEGMENT, 0) self.__writer.write_pop(THAT_SEGMENT, 0) else: # assign into any other variable directly self.__writer.write_pop( self.__symbol_table.get_kind_of(left_side_var), self.__symbol_table.get_index_of(left_side_var)) def __compile_while(self): """ Compiles a while statement. Assumes the tokenizer is advanced for the first call. Advance the tokenizer at the end. """ self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False) # 'while' self.__check_keyword_symbol(SYMBOL_TYPE) # '(' # writes the loop label start_loop_label = self.__label_counter self.__label_counter += 1 self.__writer.write_label(start_loop_label) # advance the tokenizer for the expression self.__advance_tokenizer() self.__compile_expression() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ')' self.__writer.write_arithmetic(NOT_OPERATOR, True) # if the expression is false, goto the next label end_loop_label = self.__label_counter self.__label_counter += 1 self.__writer.write_if(end_loop_label) self.__check_keyword_symbol(SYMBOL_TYPE) # '{' # advance the tokenizer for the statements self.__advance_tokenizer() self.__compile_statements() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # '}' self.__advance_tokenizer() # goes back to the top of the label self.__writer.write_goto(start_loop_label) self.__writer.write_label(end_loop_label) # writes the end loop label def __compile_return(self): """ Compiles a return statement. Assumes the tokenizer is advanced for the first call. Advance the tokenizer at the end. """ self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False) # 'return' if not self.__check_keyword_symbol(SYMBOL_TYPE, [END_LINE_MARK]): if self.__tokenizer.get_value() == THIS_CONSTANT and \ self.__symbol_table.get_type_of(THIS_CONSTANT) is None: # returning this in the constructor - push pointer 0 self.__writer.write_push(POINTER_SEGMENT, THIS_POINTER_INDEX) self.__advance_tokenizer() else: # returning an expression self.__compile_expression() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ';' else: # return void - push a junk constant 0 for a return value self.__writer.write_push(CONSTANT_SEGMENT, 0) self.__advance_tokenizer() self.__writer.write_return() def __compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. Assumes the tokenizer is advanced for the first call. Advance the tokenizer at the end. """ self.__check_keyword_symbol(KEYWORD_TYPE, make_advance=False) # 'if' self.__check_keyword_symbol(SYMBOL_TYPE) # '(' # advance the tokenizer for the expression self.__advance_tokenizer() self.__compile_expression() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ')' self.__writer.write_arithmetic(NOT_OPERATOR, True) # if the expression is false, goto the next label (else label) else_label = self.__label_counter self.__label_counter += 1 self.__writer.write_if(else_label) self.__check_keyword_symbol(SYMBOL_TYPE) # '{' # advance the tokenizer for the statements self.__advance_tokenizer() self.__compile_statements() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # '}' end_if_label = self.__label_counter self.__label_counter += 1 self.__writer.write_goto( end_if_label) # goto the end of the if statement self.__writer.write_label(else_label) # writes else label if self.__check_keyword_symbol(KEYWORD_TYPE, [ELSE_KEYWORD]): # 'else' self.__check_keyword_symbol(SYMBOL_TYPE) # '{' # advance the tokenizer for the statements self.__advance_tokenizer() self.__compile_statements() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # '}' self.__advance_tokenizer() self.__writer.write_label( end_if_label) # write the end if statement label def __compile_expression(self): """ compiles an expression Assumes the tokenizer is advanced for the first call. Advances the tokenizer at the end """ # compiles the first term self.__compile_term() # compiles all the op + term that exists while self.__check_op(False): op = self.__tokenizer.get_value() self.__advance_tokenizer() self.__compile_term() self.__writer.write_arithmetic(op) def __compile_term(self): """ compiles a term Assumes the tokenizer is advanced for the first call. Advances the tokenizer at the end """ # checks for all the term options: # integer constant if self.__tokenizer.get_token_type() == INTEGER_CONST_TYPE: self.__writer.write_push(CONSTANT_SEGMENT, int(self.__tokenizer.get_value())) self.__advance_tokenizer() # string constant elif self.__tokenizer.get_token_type() in STRING_CONST_TYPE: self.__compile_string_constant() self.__advance_tokenizer() # keyword constant elif self.__check_keyword_symbol(KEYWORD_TYPE, KEYWORD_CONSTANT_LIST, False): if self.__tokenizer.get_value() == THIS_CONSTANT: # push this self.__writer.write_push(POINTER_SEGMENT, 0) elif self.__tokenizer.get_value() == TRUE_CONSTANT: # push -1 self.__writer.write_push(CONSTANT_SEGMENT, 1) self.__writer.write_arithmetic(MINUS, True) else: # false/null- push 0 self.__writer.write_push(CONSTANT_SEGMENT, 0) self.__advance_tokenizer() # (expression) elif self.__check_keyword_symbol(SYMBOL_TYPE, [OPEN_BRACKET], False): self.__advance_tokenizer() self.__compile_expression() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ')' self.__advance_tokenizer() # unaryOp + term elif self.__check_unary_op(False): op = self.__tokenizer.get_value() self.__advance_tokenizer() self.__compile_term() self.__writer.write_arithmetic(op, True) # varName / varName[expression] / subroutineCall- in any case, starts with identifier else: # self.__check_keyword_symbol(IDENTIFIER_TYPE) identifier_name = self.__tokenizer.get_value() # checks for function/method call if self.__check_subroutine_call(): return # varName[expression] if self.__check_keyword_symbol(SYMBOL_TYPE, [OPEN_ARRAY_ACCESS_BRACKET], False): self.__analyze_array_var(identifier_name) self.__writer.write_pop(POINTER_SEGMENT, THAT_POINTER_INDEX) # pop pointer 1 self.__writer.write_push(THAT_SEGMENT, 0) # push that 0 self.__advance_tokenizer() # varName else: self.__push_var(identifier_name) # push the var def __analyze_array_var(self, identifier_name): """ varName[expression] operate varName + expression :param identifier_name: the variable'a name """ self.__push_var(identifier_name) # push the var self.__advance_tokenizer() self.__compile_expression() # push the expression self.__writer.write_arithmetic(PLUS) # varName + expression self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ']' def __compile_string_constant(self): """ compiles a string constant """ str_const = self.__tokenizer.get_value() # fixing escaped characters str_const = str_const.replace("\t", "\\t") str_const = str_const.replace("\n", "\\n") str_const = str_const.replace("\b", "\\b") str_const = str_const.replace("\r", "\\r") str_len = len(str_const) self.__writer.write_push(CONSTANT_SEGMENT, str_len) self.__writer.write_call(STRING_CONSTRUCTOR, STRING_CONSTRUCT_NUM_ARGS) for char in str_const: self.__writer.write_push(CONSTANT_SEGMENT, ord(char)) # push the char ASCII code self.__writer.write_call(STRING_APPEND, STRING_APPEND_NUM_ARGS) def __push_var(self, var_name): """ writes a push var command to the output stream to the :param var_name: the variable name to push to the stack """ self.__writer.write_push(self.__symbol_table.get_kind_of(var_name), self.__symbol_table.get_index_of(var_name)) def __check_subroutine_call(self): """ checks if the next tokens are subroutine call. If so, writes the vm commands for the subroutine call. Advances the tokenizer at the end :return: true iff the next tokens are subroutine calls """ num_args = 0 call_name = "" identifier = self.__tokenizer.get_value() # checks if the next token is '(' : regular method call if self.__check_keyword_symbol(SYMBOL_TYPE, [OPEN_BRACKET]): call_name += self.__class_name + CALL_CLASS_METHOD_MARK + identifier num_args += 1 # the extra 'this' arg # push this if self.__symbol_table.get_index_of(THIS_CONSTANT) is not None: self.__push_var(THIS_CONSTANT) else: self.__writer.write_push(POINTER_SEGMENT, THIS_POINTER_INDEX) # checks if the next token is '.' : function/method call elif self.__check_keyword_symbol(SYMBOL_TYPE, [CALL_CLASS_METHOD_MARK], False): # a variable- method call if self.__symbol_table.get_index_of(identifier) is not None: var_type = self.__symbol_table.get_type_of(identifier) call_name += var_type num_args += 1 # the extra 'this' arg # push this self.__push_var(identifier) # function/ constructor call else: call_name += identifier self.__advance_tokenizer() func_name = self.__tokenizer.get_value() call_name += CALL_CLASS_METHOD_MARK + func_name self.__check_keyword_symbol(SYMBOL_TYPE) # '(' # if the next token is not ( or . : not a subroutine call else: return False # pushing all args num_args += self.__compile_expression_list() self.__check_keyword_symbol(SYMBOL_TYPE, make_advance=False) # ')' # calling the function self.__writer.write_call(call_name, num_args) self.__advance_tokenizer() return True def __compile_expression_list(self): """ compiles an expression list :return: the number of expressions compiled """ exp_counter = 0 self.__advance_tokenizer() # if the expression list is not empty: compile all the expression if self.__tokenizer.get_value() != CLOSE_BRACKET: exp_counter += 1 # compiles the first expression self.__compile_expression() # checks for more expressions separated with comma while self.__check_keyword_symbol(SYMBOL_TYPE, [ADDITIONAL_VAR_OPTIONAL_MARK], False): exp_counter += 1 # advances the tokenizer self.__advance_tokenizer() # compiles the next expression self.__compile_expression() return exp_counter def __check_keyword_symbol(self, token_type, value_list=None, make_advance=True): """ checks if the current token is from token_type (which is keyword or symbol), and it's value is one of the given optional values (in the value_list). :param token_type: the wanted type of the current token: keyword or symbol :param value_list: a list of optional values for the current token :param make_advance: whether or not the method should call tokenizer.advance() at the beginning :return: True if the current token is from Keyword type, and it's value exists in the keyword list, and false otherwise """ if make_advance: if self.__tokenizer.has_more_tokens(): self.__tokenizer.advance() else: return False if self.__tokenizer.get_token_type() == token_type: if value_list is None or self.__tokenizer.get_value( ) in value_list: return True return False def __check_type(self, make_advance=True): """ checks if the current token is a type. :param make_advance: whether or not the method should call tokenizer.advance() at the beginning :return: true iff the current token is a type """ # checks for builtin types if self.__check_keyword_symbol(KEYWORD_TYPE, TYPE_LIST, make_advance): return True # checks for user-defined class types if not self.__check_keyword_symbol(IDENTIFIER_TYPE, make_advance=False): return False return True def __check_op(self, make_advance=True): """ :return: true iff the current token is a symbol containing an operation """ return self.__check_keyword_symbol(SYMBOL_TYPE, OP_LIST, make_advance) def __check_unary_op(self, make_advance=True): """ :return: true iff the current token is a symbol containing an unary operation """ return self.__check_keyword_symbol(SYMBOL_TYPE, UNARY_OP_LIST, make_advance) def __advance_tokenizer(self): """ advances the inner tokenizer in case when there must be more tokens """ self.__tokenizer.has_more_tokens( ) # when there must be more tokens, otherwise the input is invalid self.__tokenizer.advance()
class CompilationEngine: def __init__(self, input_file, output_file): self.jack_tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.writer = VMWriter(output_file) self.class_name = "" self.subroutine_name = "" self.return_type = "" self.label_counter_if = 0 self.label_counter_while = 0 self.num_args_called_function = 0 self.is_unary = False self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2", "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"} def compile_class(self): # "class className { for i in range(NUM_TOKENS_CLASS_DEC): self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # saves the className if self.jack_tokenizer.token_type() == IDENTIFIER: self.class_name = self.jack_tokenizer.identifier() # classVarDec* or SubroutineDec* while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or self.jack_tokenizer.key_word() == "field"): self.compile_class_var_dec() if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or self.jack_tokenizer.key_word() == "method" or self.jack_tokenizer.key_word() == "constructor"): self.compile_subroutine() if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": break def compile_class_var_dec(self): # "static" of "field" kind = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # type if self.jack_tokenizer.token_type() == KEYWORD: type = self.jack_tokenizer.key_word() else: type = self.jack_tokenizer.identifier() while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: name = self.jack_tokenizer.identifier() self.symbol_table.define(name,type,kind) elif token_type == SYMBOL: if self.jack_tokenizer.symbol() == ";": break def compile_subroutine(self): self.symbol_table.start_subroutine() self.subroutine_name = "" self.return_type = "" self.label_counter_if = 0 self.label_counter_while = 0 # the curr token : "constructor" or "function" or "method type_of_subroutine = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # the curr token : return type of the subroutine if self.jack_tokenizer.token_type() == KEYWORD: self.return_type = self.jack_tokenizer.key_word() else: self.return_type = self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.subroutine_name = self.jack_tokenizer.identifier() while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() if self.jack_tokenizer.symbol() == "(": if type_of_subroutine == "method": self.symbol_table.define(THIS, self.class_name, ARG) self.compile_parameter_list() # the curr token should be - ")" if self.jack_tokenizer.symbol() == '{': while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == KEYWORD: if self.jack_tokenizer.key_word() == "var": self.compile_var_dec() continue else: self.writer.write_function(self.class_name + "." + self.subroutine_name, self.symbol_table.var_count(VAR)) if type_of_subroutine == "constructor": self.writer.write_push(CONST, self.symbol_table.var_count(FIELD)) self.writer.write_call("Memory.alloc", 1) self.writer.write_pop("pointer", 0) elif type_of_subroutine == "method": self.writer.write_push(ARGUMENT, 0) self.writer.write_pop("pointer", 0) self.compile_statements() # the curr token should be - "}" break break def compile_parameter_list(self): kind = ARG while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() # int, bool.... if token_type == KEYWORD: type = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.symbol_table.define(name, type, kind) # className elif token_type == IDENTIFIER: type = self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() name = self.jack_tokenizer.identifier() self.symbol_table.define(name, type, kind) # end of parameter list if token_type == SYMBOL and self.jack_tokenizer.symbol() == ")": break def compile_var_dec(self): # should be "var" kind = self.jack_tokenizer.key_word() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # type if self.jack_tokenizer.token_type() == KEYWORD: type = self.jack_tokenizer.key_word() else: type = self.jack_tokenizer.identifier() while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: name = self.jack_tokenizer.identifier() self.symbol_table.define(name, type, kind) if token_type == SYMBOL: if self.jack_tokenizer.symbol() == ";": break def compile_statements(self): while True: if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do": self.compile_do() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let": self.compile_let() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while": self.compile_while() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return": self.compile_return() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # compile_if returns advanced if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if": self.compile_if() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}": break def compile_do(self): self.num_args_called_function = 0 self.compile_subroutine_call() self.writer.write_pop(TEMP , 0) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # return from compile_subroutine_call with ";" def compile_let(self): init = True # the curr token - "let" while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == IDENTIFIER: name = self.jack_tokenizer.identifier() type = self.symbol_table.type_of(name) kind = self.symbol_table.kind_of(name) index = self.symbol_table.index_of(name) if token_type == SYMBOL: # there is an assignment to an array if self.jack_tokenizer.symbol() == "[": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # handle - [expression] self.compile_expression() # the curr token - "]" self.writer.write_push(self.find_segment(kind), index) self.writer.write_arithmetic("add") self.writer.write_pop("pointer", 1) init = False # should return from the compile_expression only with ";" or "]" if self.jack_tokenizer.symbol() == "=": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # handle the = expression self.compile_expression() # that is only for array if init == False: # was also if type == "Array" self.writer.write_pop(THAT, 0) else: self.writer.write_pop(self.find_segment(kind), index) # end of let statement if self.jack_tokenizer.symbol() == ";": break def compile_while(self): while_counter = self.label_counter_while self.label_counter_while += 1 # the curr token - "while" self.writer.write_label("WHILE_EXP" + str(while_counter)) while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # the curr token - ")" self.writer.write_arithmetic("not") self.writer.write_if("WHILE_END" + str(while_counter)) if self.jack_tokenizer.symbol() == "{": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # the curr token - "}" self.writer.write_go_to("WHILE_EXP" + str(while_counter)) self.writer.write_label("WHILE_END" + str(while_counter)) if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}": break def compile_return(self): # the curr token - "return" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";": self.writer.write_push(CONST, "0") else: self.compile_expression() # should return from "compile_expression" only with ";" self.writer.write_return() def compile_if(self): if_counter = self.label_counter_if self.label_counter_if += 1 # the curr token - "if" while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL: if self.jack_tokenizer.symbol() == "(": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # the curr token - ")" self.writer.write_if("IF_TRUE" + str(if_counter)) self.writer.write_go_to("IF_FALSE" + str(if_counter)) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "{": self.writer.write_label("IF_TRUE" + str(if_counter)) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # ~~~~~~~~~~ change : was token_type ~~~~~~~~~~~~~~ if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}": break self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else": # print "else" self.writer.write_go_to("IF_END" + str(if_counter)) self.writer.write_label("IF_FALSE" + str(if_counter)) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # print "{" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_statements() # print "}" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.writer.write_label("IF_END" + str(if_counter)) else: self.writer.write_label("IF_FALSE" + str(if_counter)) def compile_subroutine_call(self): to_add = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "subRoutineName" or ("className" | "varName", as part of className.subRoutineName) called_statement = self.jack_tokenizer.identifier() type = self.symbol_table.type_of(called_statement) kind = self.symbol_table.kind_of(called_statement) index = self.symbol_table.index_of(called_statement) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # case of "subRoutineCall(expressionList) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(": to_add = True called_statement = self.class_name + "." + called_statement self.writer.write_push(POINTER, 0) self.compile_expression_list() # the curr token - ")" # (className | varName).subroutineName(expressionList) elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # subroutineName if kind <> NONE: to_add = True self.writer.write_push(self.find_segment(kind), index) called_statement = type + "." + self.jack_tokenizer.identifier() else: called_statement = called_statement + "." + self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "(" # expressionList self.compile_expression_list() # ")" if to_add: self.writer.write_call(called_statement, self.num_args_called_function + 1) else: self.writer.write_call(called_statement, self.num_args_called_function) def compile_expression(self): is_print_unary = False if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-": self.is_unary = True self.compile_term() while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\ ["+", "-", "*", "/", "&", "|", "<", ">", "="]: arit_symbol = self.jack_tokenizer.symbol() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-": self.is_unary = True is_print_unary = True self.compile_term() # if not is_print_unary and self.writer.write_arithmetic(self.dic_arithmetic[arit_symbol]) def compile_term(self): while True: token_type = self.jack_tokenizer.token_type() if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\ [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "="]: break if token_type == INT_CONST: self.writer.write_push(CONST, self.jack_tokenizer.int_val()) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == STRING_CONST: self.compile_string() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == KEYWORD and self.jack_tokenizer.key_word() in ["true", "false", "null"]: self.writer.write_push(CONST, 0) if self.jack_tokenizer.key_word() == "true": self.writer.write_arithmetic("not") self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break # like in return this if token_type == KEYWORD and self.jack_tokenizer.key_word() == "this": self.writer.write_push(POINTER, 0) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]: symbol = self.jack_tokenizer.symbol() self.is_unary = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_term() if symbol == "~": self.writer.write_arithmetic("not") else: self.writer.write_arithmetic("neg") break if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # should return from compile_expression only with ")" self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if token_type == IDENTIFIER: is_add = True name = self.jack_tokenizer.identifier() kind = self.symbol_table.kind_of(name) index = self.symbol_table.index_of(name) if name[0].isupper(): is_add = False self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\ [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&", "<",">"]: # in case of a > ...or b; self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name)) break if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() self.compile_expression() # should return only "]" self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name)) self.writer.write_arithmetic("add") self.writer.write_pop(POINTER, 1) self.writer.write_push(THAT, 0) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() break if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(": self.writer.write_push(POINTER, 0) self.compile_expression_list() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # case of a = ... bar() self.writer.write_call(self.class_name + "." + name,self.num_args_called_function + 1) break # (className | varName).subroutineName(expressionList) if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".": self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # subroutineName if is_add: type = self.symbol_table.type_of(name) name = type + "." + self.jack_tokenizer.identifier() else: name = name + "." + self.jack_tokenizer.identifier() self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() # "(" # expressionList if is_add: self.writer.write_push(self.find_segment(kind), index) self.compile_expression_list() # ")" if is_add: self.writer.write_call(name, self.num_args_called_function + 1) else: self.writer.write_call(name, self.num_args_called_function) self.jack_tokenizer.has_more_tokens() self.jack_tokenizer.advance() def compile_expression_list(self): num_args = 0 while self.jack_tokenizer.has_more_tokens(): self.jack_tokenizer.advance() if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")": break else: num_args += 1 self.compile_expression() if self.jack_tokenizer.symbol() == ")": break # print "," self.num_args_called_function = num_args def find_segment(self, kind): if kind == ARG: return ARGUMENT if kind == VAR: return LCL if kind == FIELD: return THIS if kind == STATIC: return STATIC def compile_string(self): length = len(self.jack_tokenizer.string_val()) self.writer.write_push(CONST, length) self.writer.write_call("String.new", 1) for i in range(len(self.jack_tokenizer.string_val())): uni = ord(self.jack_tokenizer.string_val()[i]) self.writer.write_push(CONST, uni) self.writer.write_call("String.appendChar", 2)
class JackCompiler: def __init__(self, file_path): self._tokenizer = JackTokenizer(file_path) self._vm_writer = VMWriter(file_path.replace(".jack", "Compiled.vm")) self._symbol_table = SymbolTable() self.class_name = "" self.label_value = 1 self.compile_class() def compile_class(self): self._tokenizer.next() # ignore class self.class_name = self._tokenizer.next() self._tokenizer.next() # ignore opening brackets while self._tokenizer.next() in ("static", "field"): self.compile_class_var_dec() else: self._tokenizer.go_back() while self._tokenizer.next() in ("constructor", "method", "function"): self.compile_subroutine() def compile_class_var_dec(self): var_kind = self._tokenizer.return_token_value() var_type = self._tokenizer.next() while True: self._tokenizer.next() var_name = self._tokenizer.return_token_value() self._symbol_table.define(var_name, var_type, var_kind) if self._tokenizer.next() == ';': break def compile_subroutine(self): self._symbol_table.start_subroutine() subroutine_type = self._tokenizer.return_token_value() self._tokenizer.next() # ignore return type subroutine_name = self._tokenizer.next() # create mapping for "this" in symbol table # method is implicitly passed "this" if subroutine_type == "method": self._symbol_table.define("this", self.class_name, "argument") self._tokenizer.next() # ignore '(' self.compile_parameter_list() self._tokenizer.next() # ignore ')' self._tokenizer.next() # ignore '{' while self._tokenizer.next() == "var": # var declarations self.compile_var_dec() else: self._tokenizer.go_back() self._vm_writer.write_function(self.class_name + "." + subroutine_name, self._symbol_table.var_count("var")) # TODO: handle constructor inside constructor as in SquareGame.new() if subroutine_type == "constructor": # allocate memory equal to number of field variables self._vm_writer.write_push(VMWriter.CONST_SEGMENT, self._symbol_table.var_count("field")) self._vm_writer.write_call("Memory.alloc", 1) # store pointer to new memory block as this (pointer 0) self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 0) if subroutine_type == "method": self._vm_writer.write_push(VMWriter.ARG_SEGMENT, 0) # implicit this pointer self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 0) # write to this of current scope for i in range(1, self._symbol_table.var_count("argument")): self._vm_writer.write_push(VMWriter.ARG_SEGMENT, i) self._vm_writer.write_pop(VMWriter.THIS_SEGMENT, i - 1) self.compile_statements() self._tokenizer.next() # ignore closing '}' brackets def compile_parameter_list(self): if self._tokenizer.next() == ')': self._tokenizer.go_back() return else: self._tokenizer.go_back() parameter_type = self._tokenizer.next() parameter_name = self._tokenizer.next() self._symbol_table.define(parameter_name, parameter_type, "argument") while self._tokenizer.next() != ')': parameter_type = self._tokenizer.next() parameter_name = self._tokenizer.next() self._symbol_table.define(parameter_name, parameter_type, "argument") else: self._tokenizer.go_back() def compile_var_dec(self): var_kind = self._tokenizer.return_token_value() self._tokenizer.next() var_type = self._tokenizer.return_token_value() while self._tokenizer.next() != ';': var_name = self._tokenizer.return_token_value() self._symbol_table.define(var_name, var_type, var_kind) def compile_statements(self): while self._tokenizer.next() != "}": token_value = self._tokenizer.return_token_value() if token_value == "let": self.compile_let() elif token_value == "if": self.compile_if() elif token_value == "while": self.compile_while() elif token_value == "do": self.compile_do() elif token_value == "return": self.compile_return() self._tokenizer.go_back() def compile_let(self): name = self._tokenizer.next() index, kind = self._symbol_table.index_of(name), self._symbol_table.kind_of(name) array_access = False if self._tokenizer.next() == "[": if kind == "field": self._vm_writer.write_push(VMWriter.THIS_SEGMENT, index) else: self._vm_writer.write_push(kind, index) self.compile_expression() # evaluated expression value at SP self._vm_writer.write_arithmetic("add") # SP contains memory address array + base self._tokenizer.next() # ignore ']' array_access = True else: self._tokenizer.go_back() self._tokenizer.next() # ignore '=' self.compile_expression() self._tokenizer.next() # ignore ';' # evaluate expression and then pop value to variable on right side of assignment if array_access: self._vm_writer.write_pop(VMWriter.TEMP_SEGMENT, 0) # pop expression value to temp register self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 1) # put array index address in THAT self._vm_writer.write_push(VMWriter.TEMP_SEGMENT, 0) # re insert expression value on stack self._vm_writer.write_pop(VMWriter.THAT_SEGMENT, 0) # pop expression value to array index else: self._vm_writer.write_pop(kind, index) # pop evaluated expression to appropriate segment and index def compile_if(self): self._tokenizer.next() # ignore '(' self.compile_expression() self._vm_writer.write_arithmetic("~") self._vm_writer.write_if(self.label_value) self._tokenizer.next() # ignore ')' self._tokenizer.next() # ignore '{' self.compile_statements() # if statements self._tokenizer.next() # ignore '}' if self._tokenizer.next() == "else": self._tokenizer.next() # ignore '{' self._vm_writer.write_goto(self.label_value + 1) self._vm_writer.write_label(self.label_value) self.compile_statements() # if statements self._vm_writer.write_label(self.label_value + 1) # skip over if when false statements self._tokenizer.next() # ignore '}' else: self._tokenizer.go_back() self._vm_writer.write_label(self.label_value) self.label_value += 2 def compile_while(self): self._tokenizer.next() # ignore '(' self._vm_writer.write_label(self.label_value) self.compile_expression() self._vm_writer.write_arithmetic("~") self._vm_writer.write_if(self.label_value + 1) self._tokenizer.next() # ignore ')' self._tokenizer.next() # ignore '{' self.compile_statements() self._vm_writer.write_goto(self.label_value) self._vm_writer.write_label(self.label_value + 1) self.label_value += 2 self._tokenizer.next() # ignore '}' def compile_do(self): self.compile_subroutine_call() self._vm_writer.write_pop(VMWriter.TEMP_SEGMENT, 0) # discard popped value self._tokenizer.next() # ignore ';' def compile_return(self): if self._tokenizer.next() == ";": self._vm_writer.write_push(VMWriter.CONST_SEGMENT, 0) # void functions should return 0 else: self._tokenizer.go_back() self.compile_expression() self._tokenizer.next() # ignore ';' self._vm_writer.write_return() def compile_subroutine_call(self, sub_name=None): # if sub routine name is not given get next token # sub routine name will be given when called from compile_term if not sub_name: sub_name = self._tokenizer.next() # check if class function or method call args = 0 if self._tokenizer.next() == '.': callee_name = sub_name sub_name = self._tokenizer.next() kind = self._symbol_table.kind_of(callee_name) if kind is None: # function call of the form Math.multiply() name = callee_name + "." + sub_name else: # method call of the form object.draw(this, ...) name = self._symbol_table.type_of(callee_name) + "." + sub_name self._vm_writer.write_push(kind, self._symbol_table.index_of(callee_name)) args = 1 else: # implicit method call, push this as first argument self._tokenizer.go_back() name = self.class_name + "." + sub_name self._vm_writer.write_push(VMWriter.POINTER_SEGMENT, 0) args = 1 self._tokenizer.next() # ignore '(' args += self.compile_expression_list() self._tokenizer.next() # ignore ')' self._vm_writer.write_call(name, args) def compile_expression_list(self): args_count = 0 if self._tokenizer.next() == ')': self._tokenizer.go_back() return args_count else: self._tokenizer.go_back() self.compile_expression() args_count += 1 while self._tokenizer.next() != ')': self.compile_expression() args_count += 1 self._tokenizer.go_back() return args_count def compile_expression(self): self.compile_term() while True: op = self._tokenizer.next() # token is an op if op in [")", "]", ",", ";"]: # expression termination characters self._tokenizer.go_back() break self.compile_term() if op == "/": self._vm_writer.write_call("Math.divide()", 2) elif op == "*": self._vm_writer.write_call("Math.multiply()", 2) else: self._vm_writer.write_arithmetic(op) def compile_term(self): self._tokenizer.next() token_type = self._tokenizer.token_type() token_value = self._tokenizer.return_token_value() if token_type == JackTokenizer.INT_CONST_TOKEN: self.compile_integer(token_value) elif token_type == JackTokenizer.STRING_CONST_TOKEN: self.compile_string(token_value) elif token_type == JackTokenizer.KEYWORD_TOKEN: # only true, false, null and this self.compile_keyword(token_value) elif token_type == JackTokenizer.IDENTIFIER_TOKEN: # subroutine or variables or array accesses if self._tokenizer.next() in ["(", "."]: self._tokenizer.go_back() self.compile_subroutine_call(token_value) else: self._tokenizer.go_back() self.compile_var_name(token_value) elif token_value == "(": self.compile_expression() self._tokenizer.next() # ignore ')' elif token_type == JackTokenizer.SYMBOL_TOKEN: # unary ops if token_value in ["-", "~"]: self.compile_term() self._vm_writer.write_arithmetic(token_value) else: exit("Invalid term") def compile_string(self, string): length = len(string) self._vm_writer.write_push(VMWriter.CONST_SEGMENT, length) self._vm_writer.write_call("String.new", 1) # returns a new string pointer at SP for i in range(length): self._vm_writer.write_push(VMWriter.CONST_SEGMENT, ord(string[i])) self._vm_writer.write_function("String.appendChar", 1) # append characters one by one to String at SP def compile_integer(self, value): self._vm_writer.write_push(VMWriter.CONST_SEGMENT, value) def compile_keyword(self, keyword): if keyword == "true": self._vm_writer.write_push(VMWriter.CONST_SEGMENT, 0) self._vm_writer.write_arithmetic("!") elif keyword == "this": self._vm_writer.write_push(VMWriter.POINTER_SEGMENT, 0) else: # false and null self._vm_writer.write_push(VMWriter.CONST_SEGMENT, 0) def compile_var_name(self, name): index, kind = self._symbol_table.index_of(name), self._symbol_table.kind_of(name) if kind: # index can be 0, so checking kind if symbol exists if kind == "field": self._vm_writer.write_push(VMWriter.THIS_SEGMENT, index) else: self._vm_writer.write_push(kind, index) else: exit("access to undefined variable") # evaluate array access if self._tokenizer.next() == "[": self.compile_expression() # evaluated expression value at SP self._tokenizer.next() # ignore '[' self._vm_writer.write_arithmetic("+") self._vm_writer.write_pop(VMWriter.POINTER_SEGMENT, 1) # pop array + base to THAT self._vm_writer.write_push(VMWriter.THAT_SEGMENT, 0) # access [array + base] through THAT else: self._tokenizer.go_back()
class CompilationEngine: """CompilationEngine: Effects the actual compilation output. Gets its input from a JackTokenizer and emits its parsed structure into an output file/stream.""" def __init__(self, tokens_with_tokenType, out_vm_file): self.tokens_with_tokenType = tokens_with_tokenType self.symbol_table = SymbolTable() self.vm_writer = VMWriter(out_vm_file) self.class_name = out_vm_file.stem self.construct_op_dict() self.construct_segment_dict() self.while_label_index = 0 self.if_else_label_index = 0 def construct_op_dict(self): self.op_dict = { '+': 'add', '-': 'sub', '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq', } def construct_segment_dict(self): """Translate the kind of variable to related memory segment name""" self.segment_dict = { 'STATIC': 'static', 'FIELD': 'this', 'ARG': 'argument', 'VAR': 'local', } def compile(self): compiled_etree = self.compile_tokens() # Uncomment following line if you want to see the output of compiled element tree # print(etree.tounicode(compiled_etree, pretty_print=True)) self.vm_writer.close() def compile_tokens(self): self.compiled_output_root = etree.Element('class') self.compile_class() compiled_etree = etree.ElementTree(self.compiled_output_root) return compiled_etree def compile_new_token_ensure_token_type(self, correct_token_type, parent): token, token_type = self.compile_new_token(parent) assert token_type == correct_token_type, '{} with token_type {} not expected'.format( token, token_type) def compile_new_token_ensure_token(self, correct_token, parent): token, token_type = self.compile_new_token(parent) assert token == correct_token, '{} with token_type {} not expected'.format( token, token_type) def compile_new_token(self, parent): token, token_type = self.next_token_and_type() self.add_sub_element(parent, token_type, token) return token, token_type def add_sub_element(self, parent, element_tag, element_text): new_element = etree.SubElement(parent, element_tag) new_element.text = ' ' + element_text + ' ' def next_token_and_type(self): return self.tokens_with_tokenType.pop(0) def show_next_token(self): token, token_type = self.tokens_with_tokenType[0] return token def show_next_token_and_type(self): return self.tokens_with_tokenType[0] def compile_class(self): """ Compiles a complete class. class: 'class' className '{' classVarDec* subroutineDec* '}' """ self.compile_new_token_ensure_token('class', self.compiled_output_root) self.compile_new_token_ensure_token_type('identifier', self.compiled_output_root) self.compile_new_token_ensure_token('{', self.compiled_output_root) self.compile_classVarDec() self.compile_subroutineDec() self.compile_new_token_ensure_token('}', self.compiled_output_root) def compile_classVarDec(self): """ Compiles a static declaration or a field declaration. classVarDec: ('static' | 'field') type varName (',' varName)* ';' """ token = self.show_next_token() if token in {'static', 'field'}: compiled_output_class_var_dec = etree.SubElement( self.compiled_output_root, 'classVarDec') symbol_kind = token.upper() # Add static or field self.compile_new_token(compiled_output_class_var_dec) symbol_type = self.compile_type(compiled_output_class_var_dec) self.compile_one_or_more_varName(compiled_output_class_var_dec, symbol_type, symbol_kind) self.compile_new_token_ensure_token(';', compiled_output_class_var_dec) # Recursive call self.compile_classVarDec() def compile_one_or_more_varName(self, parent, symbol_type, symbol_kind): self.add_new_symbol(symbol_type, symbol_kind) self.compile_new_token_ensure_token_type('identifier', parent) self.compile_more_varName_if_exist(parent, symbol_type, symbol_kind) def add_new_symbol(self, symbol_type, symbol_kind): """Next token is symbol_name, add this symbol_name and its symbol_type and symbol_kind to self.symbol_table""" symbol_name = self.show_next_token() self.symbol_table.define(symbol_name, symbol_type, symbol_kind) def compile_more_varName_if_exist(self, parent, symbol_type, symbol_kind): """If there is more varName, compiles them""" token = self.show_next_token() if token == ',': # More VarName need to add self.compile_new_token(parent) # Add ',' self.add_new_symbol(symbol_type, symbol_kind) self.compile_new_token_ensure_token_type('identifier', parent) # Recursive call self.compile_more_varName_if_exist(parent, symbol_type, symbol_kind) def compile_type(self, parent): """ Compiles type for var and add token element to parent. type: 'int' | 'char' | 'boolean' | className """ token, token_type = self.compile_new_token(parent) assert token in {'int', 'char', 'boolean' } or token_type == 'identifier' return token def compile_void_or_type(self, parent): """ Compiles type or 'void' for var and add token element to parent. """ token, token_type = self.compile_new_token(parent) assert token in {'void', 'int', 'char', 'boolean' } or token_type == 'identifier' def compile_subroutineDec(self): """ Compiles a complete method, function, or constructor. subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody """ token = self.show_next_token() if token in {'constructor', 'function', 'method'}: self.symbol_table.start_subroutine( ) # Reset the subroutine's symbol table function_kind = token compiled_output_subroutineDec = etree.SubElement( self.compiled_output_root, 'subroutineDec') # Add token in {'constructor', 'function', 'method'} to compiled_output_subroutineDec self.compile_new_token(compiled_output_subroutineDec) self.compile_void_or_type(compiled_output_subroutineDec) # subroutineName function_name = self.class_name + '.' + self.show_next_token() self.compile_new_token_ensure_token_type( 'identifier', compiled_output_subroutineDec) self.compile_new_token_ensure_token('(', compiled_output_subroutineDec) # parameterList if function_kind == 'method': # this is a dummy symbol added to the symbol_table's ARG, for the side effect that method's number of arguments will add 1. A method with k arguments operates on k+1 arguments actually, and the first argument (argument number 0) always refers to the this object self.symbol_table.define('this', 'int', 'ARG') self.compile_parameterList(compiled_output_subroutineDec) self.compile_new_token_ensure_token(')', compiled_output_subroutineDec) # subroutineBody self.compile_subroutineBody(compiled_output_subroutineDec, function_name, function_kind) # Recursive call self.compile_subroutineDec() def compile_parameterList(self, parent): """ ((type varName) (',' type varName)*)? """ compiled_output_parameterList = etree.SubElement( parent, 'parameterList') token, token_type = self.show_next_token_and_type() if token == ')': # No parameter need to add compiled_output_parameterList.text = '\n\t' # change the print format of empty element compiled_output_parameterList else: # There is at least one parameter needs to be added # type assert token in {'int', 'char', 'boolean' } or token_type == 'identifier' symbol_kind = 'ARG' symbol_type = token self.compile_new_token(compiled_output_parameterList) # Add type self.add_new_symbol(symbol_type, symbol_kind) # varName self.compile_new_token_ensure_token_type( 'identifier', compiled_output_parameterList) # more paremeters self.compile_more_parameter(compiled_output_parameterList) def compile_subroutineBody(self, parent, function_name, function_kind): """ subroutineBody: '{' varDec* statements '}' """ compiled_output_subroutineBody = etree.SubElement( parent, 'subroutineBody') self.compile_new_token_ensure_token('{', compiled_output_subroutineBody) self.compile_varDec(compiled_output_subroutineBody) local_vars_num = self.symbol_table.count_symbol_by_kind('VAR') self.vm_writer.write_function(function_name, local_vars_num) if function_kind == 'constructor': # translate this=Memory.alloc(fields_num) fields_num = self.symbol_table.count_symbol_by_kind('FIELD') self.vm_writer.write_push('constant', fields_num) self.vm_writer.write_call('Memory.alloc', 1) self.vm_writer.write_pop('pointer', 0) elif function_kind == 'method': # Point the virtual this segment to the current object (using pointer 0) self.vm_writer.write_push( 'argument', 0 ) # In method, this object address will always be stored in the first argument self.vm_writer.write_pop('pointer', 0) compiled_output_statements = etree.SubElement( compiled_output_subroutineBody, 'statements') self.compile_statements(compiled_output_statements) self.compile_new_token_ensure_token('}', compiled_output_subroutineBody) def compile_more_parameter(self, parent): token = self.show_next_token() if token == ',': # More parameter need to add self.compile_new_token(parent) # Add ',' symbol_kind = 'ARG' symbol_type = self.compile_type(parent) self.add_new_symbol(symbol_type, symbol_kind) self.compile_new_token_ensure_token_type('identifier', parent) # Recursive call self.compile_more_parameter(parent) def compile_varDec(self, parent): """varDec: 'var' type varName (',' varName)* ';'""" token = self.show_next_token() if token == 'var': compiled_output_varDec = etree.SubElement(parent, 'varDec') symbol_kind = token.upper() self.compile_new_token(compiled_output_varDec) # Add 'var' symbol_type = self.compile_type(compiled_output_varDec) self.add_new_symbol(symbol_type, symbol_kind) self.compile_new_token_ensure_token_type('identifier', compiled_output_varDec) self.compile_more_varName_if_exist(compiled_output_varDec, symbol_type, symbol_kind) self.compile_new_token_ensure_token(';', compiled_output_varDec) # Recursive call self.compile_varDec(parent) def compile_statements(self, parent): """statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement""" token = self.show_next_token() if token in {'let', 'if', 'while', 'do', 'return'}: if token == 'let': self.compile_statement_let(parent) elif token == 'if': self.compile_statement_if(parent) elif token == 'while': self.compile_statement_while(parent) elif token == 'do': self.compile_statement_do(parent) else: # return self.compile_statement_return(parent) # Recursive call self.compile_statements(parent) def compile_statement_let(self, parent): """ letStatement: 'let' varName ('[' expression ']')? '=' expression ';' vm: pop the value of expression to varName """ compiled_output_statement = etree.SubElement(parent, 'letStatement') self.compile_new_token_ensure_token('let', compiled_output_statement) # varName symbol_name = self.show_next_token() self.compile_new_token_ensure_token_type('identifier', compiled_output_statement) token = self.show_next_token() if token == '[': # Array """ code: arr[expression1] = expression2 vm: push arr push expression1 add push expression2 pop temp 0 pop pointer 1 push temp 0 pop that 0 The reason to use temp 0 and delayed pop pointer 1 after push expression2 is that expression2 may also contain arrays, for example: a[i]=b[j], then the value in pointer 1 will mess up. So we must pop the returned value by expression2 to temp 0 for the rescue. """ self.write_push_variable(symbol_name) self.compile_new_token(compiled_output_statement) # Add '[' self.compile_expression(compiled_output_statement) self.vm_writer.write_arithmetic('add') self.compile_new_token_ensure_token(']', compiled_output_statement) self.compile_new_token_ensure_token( '=', compiled_output_statement) # Add '=' self.compile_expression(compiled_output_statement) if token == '[': # Array # Array assignment always first align that to the address to be modified, then "pop that 0" self.vm_writer.write_pop('temp', 0) self.vm_writer.write_pop('pointer', 1) self.vm_writer.write_push('temp', 0) self.vm_writer.write_pop('that', 0) else: # a varName self.write_pop_variable(symbol_name) self.compile_new_token_ensure_token(';', compiled_output_statement) def compile_statement_if(self, parent): """ ifStatement: 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')? code: if (cond) s1 else s2 vm: VM code for computing ~(cond) if-goto L1 VM code for executing s1 goto L2 label L1 VM code for executing s2 label L2 """ compiled_output_statement = etree.SubElement(parent, 'ifStatement') self.compile_new_token_ensure_token('if', compiled_output_statement) self.if_else_label_index += 1 else_start_label_name = 'ELSE_START_{}_{}'.format( self.class_name.upper(), self.if_else_label_index) if_else_end_label_name = 'IF_ELSE_END_{}_{}'.format( self.class_name.upper(), self.if_else_label_index) self.compile_new_token_ensure_token('(', compiled_output_statement) self.compile_expression(compiled_output_statement) self.vm_writer.write_arithmetic('not') self.vm_writer.write_if_goto(else_start_label_name) self.compile_new_token_ensure_token(')', compiled_output_statement) self.compile_new_token_ensure_token('{', compiled_output_statement) compiled_output_statements_if = etree.SubElement( compiled_output_statement, 'statements') self.compile_statements(compiled_output_statements_if) self.vm_writer.write_goto(if_else_end_label_name) self.compile_new_token_ensure_token('}', compiled_output_statement) self.vm_writer.write_label(else_start_label_name) next_token = self.show_next_token() if next_token == 'else': self.compile_new_token_ensure_token('else', compiled_output_statement) self.compile_new_token_ensure_token('{', compiled_output_statement) compiled_output_statements_else = etree.SubElement( compiled_output_statement, 'statements') self.compile_statements(compiled_output_statements_else) self.compile_new_token_ensure_token('}', compiled_output_statement) self.vm_writer.write_label(if_else_end_label_name) def compile_statement_while(self, parent): """ whileStatement: 'while' '(' expression ')' '{' statements '}' code: while (cond) s1 vm: label L1 VM code for computing ~(cond) if-goto L2 VM code for executing s1 goto L1 label L2 """ compiled_output_statement = etree.SubElement(parent, 'whileStatement') self.compile_new_token_ensure_token('while', compiled_output_statement) self.while_label_index += 1 while_start_label_name = 'WHILE_START_{}_{}'.format( self.class_name.upper(), self.while_label_index) while_end_label_name = 'WHILE_END_{}_{}'.format( self.class_name.upper(), self.while_label_index) self.vm_writer.write_label(while_start_label_name) self.compile_new_token_ensure_token('(', compiled_output_statement) self.compile_expression(compiled_output_statement) self.vm_writer.write_arithmetic('not') self.vm_writer.write_if_goto(while_end_label_name) self.compile_new_token_ensure_token(')', compiled_output_statement) self.compile_new_token_ensure_token('{', compiled_output_statement) compiled_output_statements_while = etree.SubElement( compiled_output_statement, 'statements') self.compile_statements(compiled_output_statements_while) self.vm_writer.write_goto(while_start_label_name) self.vm_writer.write_label(while_end_label_name) self.compile_new_token_ensure_token('}', compiled_output_statement) def compile_statement_do(self, parent): """ doStatement: 'do' subroutineCall ';' """ compiled_output_statement = etree.SubElement(parent, 'doStatement') self.compile_new_token_ensure_token('do', compiled_output_statement) # subroutineCall self.compile_subroutineCall(compiled_output_statement) # When translating a do sub statement where sub is a void method or function, the caller of the corresponding VM function must pop (and ignore) the returned value (which is always the constant 0). self.vm_writer.write_pop('temp', 0) self.compile_new_token_ensure_token(';', compiled_output_statement) def compile_subroutineCall(self, parent): """ subroutineCall: subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')' """ name = self.show_next_token() self.compile_new_token_ensure_token_type( 'identifier', parent) # subroutineName or className or varName next_token = self.show_next_token() if next_token == '.': self.compile_new_token_ensure_token('.', parent) symbol_type = self.symbol_table.get_symbol_type(name) if not symbol_type: # Not defined in symbol_table, so name must be className, and function name is simply className.subroutineName, needs not to be changed function_name = name + '.' + self.show_next_token() args_num_should_add_1 = False else: # name is varName, so it is an instance of a className, className is symbol_type, so we push the value of the varName first, which is the base address of the class instance, then set the function name to className.subroutineName args_num_should_add_1 = True self.write_push_variable(name) function_name = symbol_type + '.' + self.show_next_token() self.compile_new_token_ensure_token_type('identifier', parent) # subroutineName else: # no '.' found, so name is subroutineName, function name should be self.class_name.subroutineName, and we need push this (pointer 0) first self.vm_writer.write_push('pointer', 0) function_name = self.class_name + '.' + name args_num_should_add_1 = True self.compile_new_token_ensure_token('(', parent) self.compile_expressionList(parent, function_name, args_num_should_add_1) self.compile_new_token_ensure_token(')', parent) def compile_statement_return(self, parent): """ ReturnStatement 'return' expression? ';' """ compiled_output_statement = etree.SubElement(parent, 'returnStatement') self.compile_new_token_ensure_token('return', compiled_output_statement) next_token = self.show_next_token() if next_token != ';': # has expression self.compile_expression(compiled_output_statement) else: # void functions return the constant 0 self.vm_writer.write_push('constant', 0) self.vm_writer.write_return() self.compile_new_token_ensure_token(';', compiled_output_statement) def compile_expression(self, parent): """ expression: term (op term)* """ compiled_output_expression = etree.SubElement(parent, 'expression') self.compile_term(compiled_output_expression) self.compile_zero_or_more_op_and_term(compiled_output_expression) def compile_term(self, parent): """ term: integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term """ compiled_output_term = etree.SubElement(parent, 'term') next_token, token_type = self.show_next_token_and_type() if token_type == 'integerConstant' or next_token in { 'true', 'false', 'null', 'this' }: # integerConstant or keywordConstant if token_type == 'integerConstant': self.vm_writer.write_push('constant', next_token) elif next_token == 'true': # true = -1, which is 16 bit each bit is 1 self.vm_writer.write_push('constant', 1) self.vm_writer.write_arithmetic('neg') elif next_token == 'false' or next_token == 'null': self.vm_writer.write_push('constant', 0) else: # next_token == 'this' # this will always be the content of pointer 0 self.vm_writer.write_push('pointer', 0) self.compile_new_token(compiled_output_term) elif token_type == 'stringConstant': token, token_type = self.next_token_and_type() # remove double quote symbol in token string = token[1:-1] # Push string using OS String: String.new(length), String.appendChar(nextChar) self.vm_writer.write_push('constant', len(string)) self.vm_writer.write_call('String.new', 1) for char in string: self.vm_writer.write_push('constant', ord(char)) self.vm_writer.write_call('String.appendChar', 2) self.add_sub_element(compiled_output_term, token_type, string) elif token_type == 'identifier': next_next_token, token_type = self.tokens_with_tokenType[1] if next_next_token == '[': # Array """ code: a[i] vm: push a push i add pop pointer 1 push that 0 """ symbol_name = next_token self.write_push_variable(symbol_name) self.compile_new_token_ensure_token_type( 'identifier', compiled_output_term) self.compile_new_token_ensure_token('[', compiled_output_term) self.compile_expression(compiled_output_term) self.vm_writer.write_arithmetic('add') self.vm_writer.write_pop('pointer', 1) # Push the value of the array item to stack using segment that self.vm_writer.write_push('that', 0) self.compile_new_token_ensure_token(']', compiled_output_term) elif next_next_token == '(' or next_next_token == '.': self.compile_subroutineCall(compiled_output_term) else: # A single varName symbol_name = next_token self.write_push_variable(symbol_name) self.compile_new_token_ensure_token_type( 'identifier', compiled_output_term) elif next_token == '(': self.compile_new_token(compiled_output_term) self.compile_expression(compiled_output_term) self.compile_new_token_ensure_token(')', compiled_output_term) elif next_token in {'-', '~'}: # unaryOp self.compile_new_token(compiled_output_term) self.compile_term(compiled_output_term) if next_token == '-': self.vm_writer.write_arithmetic('neg') else: self.vm_writer.write_arithmetic('not') else: raise 'Not a valid expression' def compile_zero_or_more_op_and_term(self, parent): """ op: '+' | '-' | '*' | '/' | '&' | '|' | '<' | '>' | '=' """ next_token = self.show_next_token() if next_token in {'+', '-', '*', '/', '&', '|', '<', '>', '='}: # in op self.compile_new_token(parent) # add op self.compile_term(parent) # Write vm code for operator if next_token == '*': self.vm_writer.write_call('Math.multiply', 2) elif next_token == '/': self.vm_writer.write_call('Math.divide', 2) else: operator = self.op_dict[next_token] self.vm_writer.write_arithmetic(operator) # Recursive call self.compile_zero_or_more_op_and_term(parent) def compile_expressionList(self, parent, function_name, args_num_should_add_1): """ expressionList: (expression (',' expression)* )? """ compiled_output_expressionList = etree.SubElement( parent, 'expressionList') self.args_num = 0 if args_num_should_add_1: # if function_name is varName.subroutineName or self.class_name.subroutineName, the number of arguments should add 1 because we first push the base address of the operated object self.args_num += 1 next_token = self.show_next_token() if next_token == ')': # No expression compiled_output_expressionList.text = '\n\t' self.vm_writer.write_call(function_name, self.args_num) else: self.compile_expression(compiled_output_expressionList) self.args_num += 1 self.compile_comma_and_expression(compiled_output_expressionList) self.vm_writer.write_call(function_name, self.args_num) def compile_comma_and_expression(self, parent): next_token = self.show_next_token() if next_token == ',': self.compile_new_token_ensure_token(',', parent) self.args_num += 1 self.compile_expression(parent) # Recursive call self.compile_comma_and_expression(parent) def write_push_variable(self, symbol_name): """Push the value of variable to working stack""" index = self.symbol_table.get_symbol_index(symbol_name) symbol_kind = self.symbol_table.get_symbol_kind(symbol_name) segment = self.segment_dict[symbol_kind] self.vm_writer.write_push(segment, index) def write_pop_variable(self, symbol_name): """Pop the top value of the working stack to variable""" index = self.symbol_table.get_symbol_index(symbol_name) symbol_kind = self.symbol_table.get_symbol_kind(symbol_name) segment = self.segment_dict[symbol_kind] self.vm_writer.write_pop(segment, index)
class CompilationEngine: def __init__(self, source): self.if_counter = 0 self.while_counter = 0 self.tokenizer = Tokenizer(source) self.tokenizer.has_more_tokens() self.tokenizer.advance() self.symbols = SymbolTable() self.writer = VMWriter(source) self.arithmetic_op = {} self.init_op() self.root = Element(CLASS) self.class_name = "" self.compile_class(self.root) self.writer.close() def init_op(self): self.arithmetic_op = {'+': "add", '-': "sub", '*': "call Math.multiply 2", '/': "call Math.divide 2", '&': "and", '|': "or", '<': "lt", '>': "gt", '=': "eq" } def next(self): """ Proceed to the next token. :return: """ if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def compile_expression(self,caller): """ Compiles an expression. :param caller: :return: """ op_stack = [] self.compile_term(SubElement(caller,TERM)) while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS: op_stack.append(self.tokenizer.symbol()) self.next() self.compile_term(SubElement(caller,TERM)) while op_stack: self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()]) def compile_expressionList(self,caller): num_of_args = 0 # if expression list is empty if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = " " return num_of_args num_of_args += 1 self.compile_expression(SubElement(caller,EXPRESSION)) while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",": #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() num_of_args += 1 self.next() self.compile_expression(SubElement(caller,EXPRESSION)) return num_of_args def compile_subroutineCall(self,caller,first_token): func_name = first_token is_method = 0 if self.tokenizer.symbol() == '.': self.next() if self.symbols.kind_of(func_name): segment = self.symbols.kind_of(func_name) segment = Kind.get_segment(segment) index = self.symbols.index_of(func_name) self.writer.write_push(segment,index) func_name = self.symbols.type_of(func_name) is_method = 1 func_name = func_name+"."+self.tokenizer.identifier() self.next() else: func_name = self.class_name+"."+func_name self.writer.write_push(POINTER,0) is_method = 1 self.next() num_of_args = self.compile_expressionList(SubElement(caller, EXPRESSION_LIST))+is_method self.writer.write_call(func_name,num_of_args) self.next() def compile_term(self,caller): type = self.tokenizer.token_type() if type is JTok.INT_CONST: self.writer.write_push(CONSTANT,self.tokenizer.intVal()) self.next() elif type is JTok.STRING_CONST: string_val = self.tokenizer.string_val() self.writer.write_push(CONSTANT,len(string_val)) self.writer.write_call("String.new", 1) for c in string_val: self.writer.write_push(CONSTANT,ord(c)) self.writer.write_call("String.appendChar", 2) self.next() elif type is JTok.KEYWORD: if self.tokenizer.key_word() in {"null", "false"}: self.writer.write_push(CONSTANT, 0) elif self.tokenizer.key_word() == "true": self.writer.write_push(CONSTANT, 1) self.writer.write_arithmetic("neg") elif self.tokenizer.key_word() == "this": self.writer.write_push(POINTER, 0) else: print("unexpected") self.next() elif type is JTok.IDENTIFIER: name = self.tokenizer.identifier() self.next() type = self.tokenizer.token_type() if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}: self.compile_subroutineCall(caller,name) elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[': self.next() self.compile_expression(SubElement(caller, EXPRESSION)) kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(),index) else: print("unexpected") self.writer.write_arithmetic("add") self.writer.write_pop(POINTER,1) self.writer.write_push("that",0) self.next() else: kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(),index) else: print("unexpected") elif type is JTok.SYMBOL: if self.tokenizer.symbol() == '(': self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.next() elif self.tokenizer.symbol() in {'-','~'}: unary_op = self.tokenizer.symbol() self.next() self.compile_term(SubElement(caller,TERM)) if unary_op == "-": self.writer.write_arithmetic("neg") elif unary_op == "~": self.writer.write_arithmetic("not") else: "unexpected" def compile_do(self, caller): self.next() name = self.tokenizer.identifier() self.next() self.compile_subroutineCall(caller,name) self.writer.write_pop(TEMP,0) self.next() def compile_let(self, caller): self.next() varName = self.tokenizer.identifier() self.next() kind = self.symbols.kind_of(varName) kind = kind.get_segment() index = self.symbols.index_of(varName) if self.tokenizer.symbol() == '[': self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_push(kind,index) self.writer.write_arithmetic("add") self.next() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(TEMP,0) self.writer.write_pop(POINTER,1) self.writer.write_push(TEMP,0) self.writer.write_pop("that",0) else: self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(kind,index) self.next() def compile_return(self, caller): self.next() if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";": self.writer.write_push(CONSTANT, 0) self.writer.write_return() self.next() return self.compile_expression(SubElement(caller,EXPRESSION)) self.writer.write_return() self.next() def compile_while(self, caller): while_index = self.while_counter self.while_counter += 1 self.writer.write_label("WHILE_EXP"+str(while_index)) self.next() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_arithmetic("not") self.writer.write_if("WHILE_END"+str(while_index)) self.next() self.next() self.compile_statements(SubElement(caller, STATEMENTS)) self.writer.write_goto("WHILE_EXP"+str(while_index)) self.writer.write_label("WHILE_END"+str(while_index)) self.next() def compile_statements(self, caller): STATEMENTS = {'do','while','let','return','if'} caller.text = " " while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS: if self.tokenizer.key_word() == 'do': self.compile_do(SubElement(caller, 'doStatement')) elif self.tokenizer.key_word() == 'while': self.compile_while(SubElement(caller, 'whileStatement')) elif self.tokenizer.key_word() == 'let': self.compile_let(SubElement(caller, 'letStatement')) elif self.tokenizer.key_word() == 'return': self.compile_return(SubElement(caller, 'returnStatement')) elif self.tokenizer.key_word() == 'if': self.compile_if(SubElement(caller, 'ifStatement')) def compile_if(self, caller): self.next() # ( self.compile_expression(caller) self.next() # { if_index = self.if_counter self.if_counter += 1 self.writer.write_if("IF_TRUE" + str(if_index)) self.writer.write_goto("IF_FALSE" + str(if_index)) self.writer.write_label("IF_TRUE" + str(if_index)) self.compile_statements(caller) self.next() if self.tokenizer.key_word() == 'else': self.writer.write_goto("IF_END" + str(if_index)) self.writer.write_label("IF_FALSE" + str(if_index)) self.next() self.next() self.compile_statements(caller) self.next() self.writer.write_label("IF_END" + str(if_index)) else: self.writer.write_label("IF_FALSE" + str(if_index)) return
class CompilationEngine(): """ compiles a jack source file from a jack tokenizer into xml form in output_file NOTE: ASSUMES ERROR FREE CODE -> a todo could be to add error handling """ SYMBOL_KINDS = {'parameter_list': 'argument', 'var_dec': 'local'} CLASS_VAR_DEC_TOKENS = ["static", "field"] SUBROUTINE_TOKENS = ["function", "method", "constructor"] STATEMENT_TOKENS = ['do', 'let', 'while', 'return', 'if'] STARTING_TOKENS = { 'var_dec': ['var'], 'parameter_list': ['('], 'subroutine_body': ['{'], 'expression_list': ['('], 'expression': ['=', '[', '('], 'array': ['['], 'conditional': ['if', 'else'] } TERMINATING_TOKENS = { 'class': ['}'], 'class_var_dec': [';'], 'subroutine': ['}'], 'parameter_list': [')'], 'expression_list': [')'], 'statements': ['}'], 'do': [';'], 'let': [';'], 'while': ['}'], 'if': ['}'], 'var_dec': [';'], 'return': [';'], 'expression': [';', ')', ']', ','], 'array': [']'] } OPERATORS = ['+', '-', '*', '/', '&', '|', '<', '>', '='] UNARY_OPERATORS = ['-', '~'] TOKENS_THAT_NEED_LABELS = ['if', 'while'] def __init__(self, tokenizer, output_file): self.tokenizer = tokenizer self.output_file = output_file self.class_symbol_table = SymbolTable() self.subroutine_symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.label_counter = LabelCounter(labels=self.TOKENS_THAT_NEED_LABELS) self.class_name = None def compile_class(self): """ everything needed to compile a class, the basic unit of compilation """ # skip everything up to class start while not self.tokenizer.class_token_reached(): self.tokenizer.advance() # since compilation unit is a class makes sense to store this as instance variable self.class_name = self.tokenizer.next_token while self.tokenizer.has_more_tokens: self.tokenizer.advance() if self.tokenizer.current_token in self.CLASS_VAR_DEC_TOKENS: self.compile_class_var_dec() elif self.tokenizer.current_token in self.SUBROUTINE_TOKENS: self.compile_subroutine() def compile_class_var_dec(self): """ example: field int x; """ symbol_kind = self.tokenizer.keyword() # get symbol type self.tokenizer.advance() symbol_type = self.tokenizer.keyword() # get all identifiers while self._not_terminal_token_for('class_var_dec'): self.tokenizer.advance() if self.tokenizer.identifier(): # add symbol to class symbol_name = self.tokenizer.identifier() self.class_symbol_table.define(name=symbol_name, kind=symbol_kind, symbol_type=symbol_type) def compile_subroutine(self): """ example: methoid void dispose() { ... """ # new subroutine means new subroutine scope self.subroutine_symbol_table.reset() # get subroutine name self.tokenizer.advance() self.tokenizer.advance() subroutine_name = self.tokenizer.current_token # compile parameter list self.tokenizer.advance() self.compile_parameter_list() # compile body self.tokenizer.advance() self.compile_subroutine_body(subroutine_name=subroutine_name) # rest counts from subroutine self.label_counter.reset_counts() def compile_subroutine_body(self, subroutine_name): # skip start self.tokenizer.advance() # get all locals num_locals = 0 while self._starting_token_for('var_dec'): num_locals += self.compile_var_dec() self.tokenizer.advance() # write function command self.vm_writer.write_function(name='{}.{}'.format( self.class_name, subroutine_name), num_locals=num_locals) # compile all statements while self._not_terminal_token_for('subroutine'): self.compile_statements() def compile_parameter_list(self): """ example: dispose(int a, int b) returns number of params found """ ### symbol table while self._not_terminal_token_for('parameter_list'): self.tokenizer.advance() # symbol table if self.tokenizer.token_type_of( self.tokenizer.next_token) == "IDENTIFIER": symbol_kind = self.SYMBOL_KINDS['parameter_list'] symbol_type = self.tokenizer.current_token symbol_name = self.tokenizer.next_token self.subroutine_symbol_table.define(name=symbol_name, kind=symbol_kind, symbol_type=symbol_type) # 'var' type varName (',' varName)* ';' def compile_var_dec(self): """ example: var int a; """ # skip var self.tokenizer.advance() # get symbol type symbol_type = self.tokenizer.current_token # count number of vars, i.e., var int i, sum = 2 num_vars = 0 # get all vars while self._not_terminal_token_for('var_dec'): self.tokenizer.advance() if self.tokenizer.identifier(): num_vars += 1 symbol_kind = self.SYMBOL_KINDS['var_dec'] symbol_name = self.tokenizer.identifier() self.subroutine_symbol_table.define(name=symbol_name, kind=symbol_kind, symbol_type=symbol_type) # return vars processed return num_vars def compile_statements(self): """ call correct statement """ # TODO: way to make this global for class? statement_compile_methods = { 'if': self.compile_if, 'do': self.compile_do, 'let': self.compile_let, 'while': self.compile_while, 'return': self.compile_return } while self._not_terminal_token_for('subroutine'): if self.tokenizer.current_token in self.STATEMENT_TOKENS: statement_type = self.tokenizer.current_token statement_compile_methods[statement_type]() self.tokenizer.advance() def compile_do(self): """ example: do square.dispose(); """ # get to caller self.tokenizer.advance() # set caller_name caller_name = self.tokenizer.current_token # look up in symbol table symbol = self._find_symbol_in_symbol_tables(symbol_name=caller_name) # skip . self.tokenizer.advance() # subroutine name self.tokenizer.advance() # set subroutine name subroutine_name = self.tokenizer.current_token if symbol: # user defined Method # push value onto local segment segment = 'local' index = symbol['index'] symbol_type = symbol['type'] self.vm_writer.write_push(segment=segment, index=index) else: # i.e, OS call symbol_type = caller_name subroutine_call_name = symbol_type + '.' + subroutine_name # start expression list self.tokenizer.advance() # get arguments in expession list num_args = self.compile_expression_list() # method call if symbol: # calling object passed as implicit argument num_args += 1 # write call self.vm_writer.write_call(name=subroutine_call_name, num_args=num_args) # pop off return of previous call we don't care about self.vm_writer.write_pop(segment='temp', index='0') # 'let' varName ('[' expression ']')? '=' expression ';' def compile_let(self): """ example: let direction = 0; """ # get symbol to store expression evaluation self.tokenizer.advance() symbol_name = self.tokenizer.current_token symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name) # array assignment? array_assignment = self._starting_token_for(keyword_token='array', position='next') if array_assignment: # get to index expression self.tokenizer.advance() self.tokenizer.advance() # compile it self.compile_expression() self.vm_writer.write_push(segment=symbol['kind'], index=symbol['index']) # add two addresses self.vm_writer.write_arithmetic(command='+') # go past = while not self.tokenizer.current_token == '=': self.tokenizer.advance() # compile all expressions while self._not_terminal_token_for('let'): self.tokenizer.advance() self.compile_expression() if not array_assignment: # store expression evaluation in symbol location self.vm_writer.write_pop(segment=symbol['kind'], index=symbol['index']) else: # array unloading # pop return value onto temp self.vm_writer.write_pop(segment='temp', index='0') # pop address of array slot onto THAT self.vm_writer.write_pop(segment='pointer', index='1') # pointer 1 => array # push value on temp back onto stack self.vm_writer.write_push(segment='temp', index='0') # set that self.vm_writer.write_pop(segment='that', index='0') # 'while' '(' expression ')' '{' statements '}' def compile_while(self): """ example: while (x > 0) { ... } """ # write while label self.vm_writer.write_label( label='WHILE_EXP{}'.format(self.label_counter.get('while'))) # advance to expression start ( self.tokenizer.advance() self.tokenizer.advance() # compile expression in () self.compile_expression() # NOT expression so for easily handling of termination and if-goto self.vm_writer.write_unary(command='~') self.vm_writer.write_ifgoto( label='WHILE_END{}'.format(self.label_counter.get('while'))) while self._not_terminal_token_for('while'): self.tokenizer.advance() if self._statement_token(): self.compile_statements() # write goto self.vm_writer.write_goto( label='WHILE_EXP{}'.format(self.label_counter.get('while'))) # write end label self.vm_writer.write_label( label='WHILE_END{}'.format(self.label_counter.get('while'))) # add while to labels count self.label_counter.increment('while') def compile_if(self): """ example: if (True) { ... } else { ... } """ # advance to expression start self.tokenizer.advance() self.tokenizer.advance() # compile expression in () self.compile_expression() # write ifgoto to if statement self.vm_writer.write_ifgoto( label='IF_TRUE{}'.format(self.label_counter.get('if'))) # write goto if false (else) self.vm_writer.write_goto( label='IF_FALSE{}'.format(self.label_counter.get('if'))) # write if label self.vm_writer.write_label( label='IF_TRUE{}'.format(self.label_counter.get('if'))) # body of if self.compile_conditional_body() # else? if self._starting_token_for(keyword_token='conditional', position='next'): # past closing { self.tokenizer.advance() # goto if end if this path wasn't hit self.vm_writer.write_goto( label='IF_END{}'.format(self.label_counter.get('if'))) # if false self.vm_writer.write_label( label='IF_FALSE{}'.format(self.label_counter.get('if'))) # compile else self.compile_conditional_body() # define IF_END self.vm_writer.write_label( label='IF_END{}'.format(self.label_counter.get('if'))) else: # no else present # go to end of if self.vm_writer.write_label( label='IF_FALSE{}'.format(self.label_counter.get('if'))) def compile_conditional_body(self): while self._not_terminal_token_for('if'): self.tokenizer.advance() if self._statement_token(): if self.tokenizer.current_token == 'if': # add ifto labels count self.label_counter.increment('if') # compile nested if self.compile_statements() # subtract for exiting nesting self.label_counter.decrement('if') else: self.compile_statements() # term (op term)* def compile_expression(self): """ many examples..i,e., x = 4 """ # ops get compiled at end in reverse order in which they were added ops = [] while self._not_terminal_token_for('expression'): if self._subroutine_call(): self.compile_subroutine_call() elif self._array_expression(): self.compile_array_expression() elif self.tokenizer.current_token.isdigit(): self.vm_writer.write_push(segment='constant', index=self.tokenizer.current_token) elif self.tokenizer.identifier(): self.compile_symbol_push() elif self.tokenizer.current_token in self.OPERATORS and not self._part_of_expression_list( ): ops.insert( 0, Operator(token=self.tokenizer.current_token, category='bi')) elif self.tokenizer.current_token in self.UNARY_OPERATORS: ops.insert( 0, Operator(token=self.tokenizer.current_token, category='unary')) elif self.tokenizer.string_const(): self.compile_string_const() elif self.tokenizer.boolean(): # boolean case self.compile_boolean() elif self._starting_token_for('expression'): # nested expression # skip starting ( self.tokenizer.advance() self.compile_expression() elif self.tokenizer.null(): self.vm_writer.write_push(segment='constant', index=0) self.tokenizer.advance() # compile_ops for op in ops: self.compile_op(op) def compile_op(self, op): """ example: +, /, etc. """ if op.unary(): self.vm_writer.write_unary(command=op.token) elif op.multiplication(): self.vm_writer.write_call(name='Math.multiply', num_args=2) elif op.division(): self.vm_writer.write_call(name='Math.divide', num_args=2) else: self.vm_writer.write_arithmetic(command=op.token) def compile_boolean(self): """ 'true' and 'false' """ self.vm_writer.write_push(segment='constant', index=0) if self.tokenizer.boolean() == 'true': # negate true self.vm_writer.write_unary(command='~') def compile_string_const(self): """ example: "Hello World" """ # handle string const string_length = len(self.tokenizer.string_const()) self.vm_writer.write_push(segment='constant', index=string_length) self.vm_writer.write_call(name='String.new', num_args=1) # build string from chars for char in self.tokenizer.string_const(): if not char == self.tokenizer.STRING_CONST_DELIMITER: ascii_value_of_char = ord(char) self.vm_writer.write_push(segment='constant', index=ascii_value_of_char) self.vm_writer.write_call(name='String.appendChar', num_args=2) def compile_symbol_push(self): """ example: x """ symbol = self._find_symbol_in_symbol_tables( symbol_name=self.tokenizer.identifier()) segment = symbol['kind'] index = symbol['index'] self.vm_writer.write_push(segment=segment, index=index) def compile_array_expression(self): """ example: let x = a[j], a[4] """ symbol_name = self.tokenizer.current_token symbol = self._find_symbol_in_symbol_tables(symbol_name=symbol_name) # get to index expression self.tokenizer.advance() self.tokenizer.advance() # compile self.compile_expression() # push onto local array symbol self.vm_writer.write_push(segment='local', index=symbol['index']) # add two addresses: identifer and expression result self.vm_writer.write_arithmetic(command='+') # pop address onto pointer 1 / THAT self.vm_writer.write_pop(segment='pointer', index=1) # push value onto stack self.vm_writer.write_push(segment='that', index=0) def compile_subroutine_call(self): """ example: Memory.peek(8000) """ subroutine_name = '' while not self._starting_token_for('expression_list'): subroutine_name += self.tokenizer.current_token self.tokenizer.advance() # get num of args num_args = self.compile_expression_list() # write_call after pushing arguments onto stack self.vm_writer.write_call(name=subroutine_name, num_args=num_args) # (expression (',' expression)* )? def compile_expression_list(self): """ separeted out of compile_expression because of edge cases from normal expression example: (x, y, x + 5) """ num_args = 0 if self._empty_expression_list(): return num_args # start expressions self.tokenizer.advance() while self._not_terminal_token_for('expression_list'): num_args += 1 self.compile_expression() if self._another_expression_coming( ): # would be , after compile expression self.tokenizer.advance() return num_args def compile_return(self): """ example: return x; or return; """ if self._not_terminal_token_for(keyword_token='return', position='next'): self.compile_expression() else: # push constant for void self.vm_writer.write_push(segment='constant', index='0') self.tokenizer.advance() self.vm_writer.write_return() def _not_terminal_token_for(self, keyword_token, position='current'): if position == 'current': return not self.tokenizer.current_token in self.TERMINATING_TOKENS[ keyword_token] elif position == 'next': return not self.tokenizer.next_token in self.TERMINATING_TOKENS[ keyword_token] def _starting_token_for(self, keyword_token, position='current'): if position == 'current': return self.tokenizer.current_token in self.STARTING_TOKENS[ keyword_token] elif position == 'next': return self.tokenizer.next_token in self.STARTING_TOKENS[ keyword_token] def _statement_token(self): return self.tokenizer.current_token in self.STATEMENT_TOKENS def _operator_token(self, position='current'): if position == 'current': return self.tokenizer.current_token in self.OPERATORS elif position == 'next': return self.tokenizer.next_token in self.OPERATORS def _another_expression_coming(self): return self.tokenizer.current_token == "," def _find_symbol_in_symbol_tables(self, symbol_name): if self.subroutine_symbol_table.find_symbol_by_name(symbol_name): return self.subroutine_symbol_table.find_symbol_by_name( symbol_name) elif self.class_symbol_table.find_symbol_by_name(symbol_name): return self.class_symbol_table.find_symbol_by_name(symbol_name) def _empty_expression_list(self): return self._start_of_expression_list( ) and self._next_ends_expression_list() def _start_of_expression_list(self): return self.tokenizer.current_token in self.STARTING_TOKENS[ 'expression_list'] def _next_ends_expression_list(self): return self.tokenizer.next_token in self.TERMINATING_TOKENS[ 'expression_list'] def _subroutine_call(self): return self.tokenizer.identifier() and self.tokenizer.next_token == '.' def _array_expression(self): return self.tokenizer.identifier() and self._starting_token_for( keyword_token='array', position='next') def _part_of_expression_list(self): return self.tokenizer.tokens_found[-3] in [ ',', '(' ] # distinguish neg from sub
class CompilationEngine: def __init__(self, source): self.if_counter = 0 self.while_counter = 0 self.tokenizer = Tokenizer(source) self.tokenizer.has_more_tokens() self.tokenizer.advance() self.symbols = SymbolTable() self.writer = VMWriter(source) self.arithmetic_op = {} self.init_op() self.root = Element(CLASS) self.class_name = "" self.compile_class(self.root) self.writer.close() def init_op(self): self.arithmetic_op = { '+': "add", '-': "sub", '*': "call Math.multiply 2", '/': "call Math.divide 2", '&': "and", '|': "or", '<': "lt", '>': "gt", '=': "eq" } def next(self): """ Proceed to the next token. :return: """ if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def compile_expression(self, caller): """ Compiles an expression. :param caller: :return: """ op_stack = [] self.compile_term(SubElement(caller, TERM)) while self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS: op_stack.append(self.tokenizer.symbol()) self.next() self.compile_term(SubElement(caller, TERM)) while op_stack: self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()]) def compile_expressionList(self, caller): """ compiles a list of expressions :param caller: :return: num_of_args - number of expressions in expressions list. used by function call """ num_of_args = 0 # if expression list is empty if self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = " " return num_of_args num_of_args += 1 self.compile_expression(SubElement(caller, EXPRESSION)) while self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ",": #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() num_of_args += 1 self.next() self.compile_expression(SubElement(caller, EXPRESSION)) return num_of_args def compile_subroutineCall(self, caller, first_token): """ First token, the first identifier must be sent manually, so the method expects the current token to be the second in the specification. :param caller: :param first_token: :return: """ #SubElement(caller, IDENTIFIER).text = first_token func_name = first_token #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() is_method = 0 if self.tokenizer.symbol() == '.': self.next() if self.symbols.kind_of(func_name): # If first token is var name segment = self.symbols.kind_of(func_name) segment = Kind.get_segment(segment) index = self.symbols.index_of(func_name) self.writer.write_push(segment, index) func_name = self.symbols.type_of(func_name) is_method = 1 func_name = func_name + "." + self.tokenizer.identifier() #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.next() #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() else: func_name = self.class_name + "." + func_name self.writer.write_push(POINTER, 0) is_method = 1 self.next() num_of_args = self.compile_expressionList( SubElement(caller, EXPRESSION_LIST)) + is_method self.writer.write_call(func_name, num_of_args) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() def compile_term(self, caller): """ :param caller: :return: """ type = self.tokenizer.token_type() if type is JTok.INT_CONST: #SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal()) self.writer.write_push(CONSTANT, self.tokenizer.intVal()) self.next() elif type is JTok.STRING_CONST: string_val = self.tokenizer.string_val() self.writer.write_push(CONSTANT, len(string_val)) self.writer.write_call("String.new", 1) for c in string_val: self.writer.write_push(CONSTANT, ord(c)) self.writer.write_call("String.appendChar", 2) self.next() elif type is JTok.KEYWORD: #SubElement(caller, KEYWORD).text = self.tokenizer.key_word() if self.tokenizer.key_word() in {"null", "false"}: self.writer.write_push(CONSTANT, 0) elif self.tokenizer.key_word( ) == "true": # Assuming valid input, it must be true self.writer.write_push(CONSTANT, 1) self.writer.write_arithmetic("neg") elif self.tokenizer.key_word() == "this": self.writer.write_push(POINTER, 0) else: print("unexpected") self.next() elif type is JTok.IDENTIFIER: name = self.tokenizer.identifier() self.next() type = self.tokenizer.token_type() if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}: self.compile_subroutineCall(caller, name) elif type is JTok.SYMBOL and self.tokenizer.symbol( ) == '[': #TODO: Arrays, later # SubElement(caller, IDENTIFIER).text = name # SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(), index) else: print("unexpected") self.writer.write_arithmetic("add") self.writer.write_pop(POINTER, 1) self.writer.write_push("that", 0) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() else: #SubElement(caller, IDENTIFIER).text = name kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(), index) else: print("unexpected") elif type is JTok.SYMBOL: if self.tokenizer.symbol() == '(': #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() elif self.tokenizer.symbol() in {'-', '~'}: #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() unary_op = self.tokenizer.symbol() self.next() self.compile_term(SubElement(caller, TERM)) if unary_op == "-": self.writer.write_arithmetic("neg") elif unary_op == "~": self.writer.write_arithmetic("not") else: "unexpected" def compile_do(self, caller): """ format : 'do' subroutineCall ';' :param caller: :return: """ #SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() name = self.tokenizer.identifier() self.next() self.compile_subroutineCall(caller, name) self.writer.write_pop(TEMP, 0) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() def compile_let(self, caller): """ format : 'let' varName ( '[' expression ']' )? '=' expression ';' :param caller: :return: """ self.next() # skip 'let' varName = self.tokenizer.identifier() self.next() kind = self.symbols.kind_of(varName) kind = kind.get_segment() index = self.symbols.index_of(varName) if self.tokenizer.symbol() == '[': # if array self.next() # skip [ self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_push(kind, index) self.writer.write_arithmetic("add") self.next() # skip ] self.next() # skip = self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(TEMP, 0) self.writer.write_pop(POINTER, 1) self.writer.write_push(TEMP, 0) self.writer.write_pop("that", 0) else: self.next() # skip = self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(kind, index) self.next() # skip ; def compile_return(self, caller): """ format : 'return' expression? ';' :param caller: :return: """ #SubElement(caller,KEYWORD).text = self.tokenizer.identifier() self.next() if self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ";": #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.writer.write_push(CONSTANT, 0) self.writer.write_return() self.next() return self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_return() #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() def compile_while(self, caller): """ format : 'while' '(' expression ')' '{' statements '}' :param caller: :return: """ while_index = self.while_counter self.while_counter += 1 self.writer.write_label("WHILE_EXP" + str(while_index)) self.next() # skip while self.next() # skip ( self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_arithmetic("not") self.writer.write_if("WHILE_END" + str(while_index)) self.next() # skip ) self.next() # skip { self.compile_statements(SubElement(caller, STATEMENTS)) self.writer.write_goto("WHILE_EXP" + str(while_index)) self.writer.write_label("WHILE_END" + str(while_index)) self.next() # skip } def compile_statements(self, caller): """ :param caller: :return: """ STATEMENTS = {'do', 'while', 'let', 'return', 'if'} caller.text = " " while self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS: if self.tokenizer.key_word() == 'do': self.compile_do(SubElement(caller, 'doStatement')) elif self.tokenizer.key_word() == 'while': self.compile_while(SubElement(caller, 'whileStatement')) elif self.tokenizer.key_word() == 'let': self.compile_let(SubElement(caller, 'letStatement')) elif self.tokenizer.key_word() == 'return': self.compile_return(SubElement(caller, 'returnStatement')) elif self.tokenizer.key_word() == 'if': self.compile_if(SubElement(caller, 'ifStatement')) def compile_if(self, caller): """ format : 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? :param caller: :return: """ self.next() # ( self.compile_expression(caller) self.next() # { if_index = self.if_counter self.if_counter += 1 self.writer.write_if("IF_TRUE" + str(if_index)) self.writer.write_goto("IF_FALSE" + str(if_index)) self.writer.write_label("IF_TRUE" + str(if_index)) self.compile_statements(caller) self.next() if self.tokenizer.key_word() == 'else': self.writer.write_goto("IF_END" + str(if_index)) self.writer.write_label("IF_FALSE" + str(if_index)) self.next() # else self.next() # { self.compile_statements(caller) self.next() # } self.writer.write_label("IF_END" + str(if_index)) else: self.writer.write_label("IF_FALSE" + str(if_index)) return def compile_var_dec(self, caller): """ format: 'var' type varName ( ',' varName)* ';' :param caller: :return: """ kind = self.tokenizer.key_word() #SubElement(caller, KEYWORD).text = kind # set var as keyword self.next() return self.compile_list_of_vars(caller, "var", Kind[kind]) def compile_class(self, caller): """ :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.class_name = self.tokenizer.identifier() self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() #{ self.next() while self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() in {'static', 'field'}: self.compile_classVarDec(SubElement(caller, "classVarDec")) while not self.tokenizer.token_type() is JTok.SYMBOL: self.compile_subroutine(SubElement(caller, "subroutineDec")) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() #} self.next() def compile_list_of_vars(self, caller, category, kind): """ Helper method to compile lists of variables according to type varName (',' varName)* :param caller: :return: """ num_of_vars = 0 type = self.compile_type(caller) self.symbols.define(self.tokenizer.identifier(), type, kind) num_of_vars += 1 #text = category+", defined, "+type+", "+kind.name+", "+str(self.symbols.index_of(self.tokenizer.identifier())) #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text # set var name as identifier self.next() while self.tokenizer.symbol() != ';': #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ',' self.next() self.symbols.define(self.tokenizer.identifier(), type, kind) num_of_vars += 1 #text = category + ", defined, " + type + ", " + kind.name + ", " + str( # self.symbols.index_of(self.tokenizer.identifier())) #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text # set var name self.next() #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() return num_of_vars def compile_classVarDec(self, caller): """ :param caller: :return: """ kind = self.tokenizer.key_word() #SubElement(caller,KEYWORD).text = kind self.next() self.compile_list_of_vars(caller, kind, Kind[kind]) def compile_type(self, caller): """ Compiles a tag according to type, for variables :param caller: :return: """ tag = KEYWORD if self.tokenizer.token_type( ) is JTok.KEYWORD else IDENTIFIER text = self.tokenizer.key_word( ) if tag is KEYWORD else self.tokenizer.identifier() SubElement(caller, tag).text = text self.next() return text def compile_subroutine(self, caller): """ :param caller: :return: """ subroutine_type = self.tokenizer.key_word() self.next() # Just to skip void or type if self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() == "void": SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() else: self.compile_type(caller) name = self.class_name + "." + self.tokenizer.identifier() self.symbols.start_subroutine() self.next() self.next() # Skips ( if subroutine_type == "method": self.symbols.define("this", "", Kind.arg) self.compile_parameterList(SubElement(caller, "parameterList")) self.next() # Skips ) self.next() # Skips { num_of_locals = 0 while self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() == "var": num_of_locals += self.compile_var_dec(SubElement(caller, "varDec")) self.writer.write_function(name, num_of_locals) if subroutine_type == "constructor": self.writer.write_push(CONSTANT, self.symbols.var_count(Kind.field)) self.writer.write_call("Memory.alloc", 1) self.writer.write_pop(POINTER, 0) elif subroutine_type == "method": self.writer.write_push(ARGUMENT, 0) self.writer.write_pop(POINTER, 0) self.compile_statements(SubElement(caller, "statements")) self.next() # Skips } def compile_parameterList(self, caller): """ :param caller: :return: """ if self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = " " return type = self.compile_type(caller) name = self.tokenizer.identifier() # SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier() self.symbols.define(name, type, Kind.arg) self.next() while self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ",": # SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() type = self.compile_type(caller) name = self.tokenizer.identifier() self.symbols.define(name, type, Kind.arg) #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.next()
class CompilationEngine: _OPEN_PARENTHESIS = "\(" _CLOSE_PARENTHESIS = "\)" _OPEN_BRACKET = "\[" _CLOSE_BRACKET = "\]" _DOT = "\." _OPS = "\+|-|\*|\/|&|\||<|>|=" def __init__(self, in_address): self.tokenizer = Tokenizer(in_address) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(in_address.replace(".jack", ".vm")) self.curr_token = self.tokenizer.get_current_token() self.out_address = in_address.replace(".jack", ".xml") self.output = "" self.indent = 0 self.label_count = -1 self.class_name = "" self.compile_class() def write_file(self): # with open(self.out_address, 'w') as f: # f.write(self.output) self.vm_writer.write_file() def write(self, to_write): """ Writes to the output, with indentation. :param to_write: The string to write """ self.output += (self.indent * " ") + to_write + "\n" # ========== Compilation Methods ========== # def compile_class(self): """ Compiles a complete class. """ def comp_class(): self.eat("class") self.class_name = self.eat(NAME_REG) self.eat("{") self.compile_class_var_dec() self.compile_subroutine() self.eat("}") self.wrap("class", comp_class) def compile_class_var_dec(self): """ Compiles a static or field declaration. :return: """ var_type_reg = "static|field" if self.peek_token(var_type_reg): self.wrap("classVarDec", self.__class_var_dec) self.compile_class_var_dec() def compile_subroutine(self): """ Compiles a complete method, function or constructor. :return: """ sub_regex = "(constructor|function|method)" self.symbol_table.start_subroutine() kind = self.eat(sub_regex) self.__compile_type(True) # subroutine name name = self.__compile_name() self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_parameter_list(kind) self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") if self.peek_token("var"): self.compile_var_dec() num_locals = self.symbol_table.var_count("local") self.vm_writer.write_function("{}.{}".format(self.class_name, name), num_locals) self.__set_pointer(kind) self.compile_statements() self.eat("}") # def subroutine_dec(): # kind = self.eat(sub_regex) # self.__compile_type(True) # # subroutine name # name = self.__compile_name() # self.eat(CompilationEngine._OPEN_PARENTHESIS) # self.compile_parameter_list(kind) # self.eat(CompilationEngine._CLOSE_PARENTHESIS) # subroutine_body(name) # # self.wrap("subroutineBody", subroutine_body) # # def subroutine_body(name): # self.eat("{") # num_locals = 0 # if self.peek_token("var"): # num_locals = self.compile_var_dec() # self.vm_writer.write_function("{}.{}".format(self.class_name, # name), num_locals) # # self.compile_statements() # # if sub_type == "void": # # self.vm_writer.write_push("constant", 0) # self.eat("}") # Handle next subroutine if there is one if self.peek_token(sub_regex): self.compile_subroutine() def compile_parameter_list(self, kind): """ Compiles a possibly empty parameter list, not including the enclosing () :return: """ if kind == "method": self.symbol_table.define("this", self.class_name, "argument") type_reg = r"int|char|boolean|[A-Za-z_]\w*" while self.peek_token(type_reg): self.__params() def compile_var_dec(self): """ Compiles a var declaration. :return: """ # self.wrap("varDec", self.__comp_var_dec) self.eat("var") var_type = self.__compile_type(False) self.__var_declare(var_type, "var") self.eat(";") if self.peek_token("var"): self.compile_var_dec() def compile_statements(self): """ Compiles a sequence of statements, not including the enclosing {} :return: """ statement_reg = "let|if|while|do|return" if self.peek_token(statement_reg): if self.peek_token("let"): self.compile_let() elif self.peek_token("if"): self.compile_if() elif self.peek_token("while"): self.compile_while() elif self.peek_token("do"): self.compile_do() elif self.peek_token("return"): self.compile_return() self.compile_statements() def compile_do(self): """ Compiles a do statement """ self.eat("do") self.__subroutine_call() # Since we don't use the return value, we pop it to temp self.vm_writer.write_pop("temp", 0) self.eat(";") def compile_let(self): """ Compiles a let statement """ self.eat("let") name = self.__compile_name() is_array = False # Determine [expression] if self.peek_token(CompilationEngine._OPEN_BRACKET): is_array = True self.__handle_array(name) self.eat("=") self.compile_expression() # Pop the value to the spot in the memory if is_array: self.vm_writer.write_pop("temp", 0) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("temp", 0) self.vm_writer.write_pop("that", 0) else: self.__write_pop(name) self.eat(";") def compile_while(self): """ Compiles a while statement. :return: """ self.eat("while") loop_label = self.__get_label("WHILE_START") exit_label = self.__get_label("WHILE_END") self.vm_writer.write_label(loop_label) self.eat(CompilationEngine._OPEN_PARENTHESIS) # Compute ~condition self.compile_expression() self.vm_writer.write_arithmetic("~") # if ~condition exit loop self.vm_writer.write_if(exit_label) self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") self.compile_statements() self.vm_writer.write_goto(loop_label) self.vm_writer.write_label(exit_label) self.eat("}") def compile_return(self): """ Compiles a return statement. """ self.eat("return") # if next is expression: if self.__is_term(): self.compile_expression() else: # Void function - push 0 self.vm_writer.write_push(CONSTANT, 0) self.vm_writer.write_return() self.eat(";") def compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. :return: """ self.eat("if") self.eat(CompilationEngine._OPEN_PARENTHESIS) # ~cond self.compile_expression() # self.vm_writer.write_arithmetic("~") self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") if_true = self.__get_label("IF_TRUE") self.vm_writer.write_if(if_true) if_false = self.__get_label("IF_FALSE") self.vm_writer.write_goto(if_false) self.vm_writer.write_label(if_true) self.compile_statements() self.eat("}") # Handle else: if self.peek_token("else"): if_end = self.__get_label("IF_END") self.vm_writer.write_goto(if_end) self.vm_writer.write_label(if_false) self.eat("else") self.eat("{") self.compile_statements() self.eat("}") self.vm_writer.write_label(if_end) else: self.vm_writer.write_label(if_false) def compile_expression(self): """ Compiles an expression. :return: """ def comp_expression(): self.compile_term() # Case: term op term if self.peek_token(CompilationEngine._OPS): operation = self.eat(CompilationEngine._OPS) self.compile_term() self.vm_writer.write_arithmetic(operation) self.wrap("expression", comp_expression) def compile_term(self): """ Compiles a term. :return: """ def term(): curr_type = self.peek_type() val = self.curr_token.get_token() # Handle integer constant if curr_type == INT_CONST: self.vm_writer.write_push(CONSTANT, int(val)) self.__advance_token() # Handle String constant elif curr_type == STRING_CONST: self.__handle_string_constant(val) self.__advance_token() # Handle Keyword constant elif curr_type == KEYWORD: self.__handle_keyword_constant(val) self.__advance_token() # Case: token is a varName or a subroutineName elif curr_type == IDENTIFIER: self.__handle_identifier() # Case: ( expression ) elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS): self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) # Case: unaryOp term elif self.peek_token("-|~"): self.__handle_unary_op() else: print("Error: Incorrect Term") exit(-1) term() # self.wrap("term", term) def compile_expression_list(self): """ Compiles a possibly empty list of comma separated expressions :return: """ def exp_list(): count = 0 if self.__is_term(): self.compile_expression() count += 1 while self.peek_token(","): self.eat(",") self.compile_expression() count += 1 return count return exp_list() # self.wrap("expressionList", exp_list) # ========== Compilation Helper ========== # def __class_var_dec(self): """ Compiles a single class var declaration. """ var_type_reg = "static|field" # (static|field) kind = self.eat(var_type_reg) # type var_type = self.__compile_type(False) # Compile varName combo until no more "," self.__var_declare(var_type, kind) self.eat(";") def __var_declare(self, var_type, kind): name = self.eat(NAME_REG) self.symbol_table.define(name, var_type, kind) if self.peek_token(","): self.eat(",") self.__var_declare(var_type, kind) def __compile_type(self, for_function): """ Compiles a type for a function or variable, determined by a received boolean value. :param for_function: True if is type of function, false otherwise. :return: """ type_reg = r"int|char|boolean|[A-Za-z_]\w*" if for_function: type_reg += "|void" return self.eat(type_reg) def __set_pointer(self, kind): if kind == "method": self.vm_writer.write_push("argument", 0) self.vm_writer.write_pop("pointer", 0) elif kind == "constructor": self.__handle_constructor() def __handle_constructor(self): # Allocate memory for the new object var_num = self.symbol_table.var_count("this") self.vm_writer.write_push(CONSTANT, var_num) self.vm_writer.write_call("Memory.alloc", 1) # Set the new memory spot to this self.vm_writer.write_pop("pointer", 0) def __compile_name(self): if self.peek_type() == IDENTIFIER: return self.eat(NAME_REG) else: print("ERROR: Identifier Expected") exit(-1) def __params(self): var_type = self.__compile_type(False) name = self.eat(NAME_REG) self.symbol_table.define(name, var_type, "argument") if self.peek_token(","): self.eat(",") def __handle_unary_op(self): command = self.eat("-|~") self.compile_term() if command == "-": self.vm_writer.write_arithmetic("neg") else: self.vm_writer.write_arithmetic(command) def __handle_identifier(self): """ Handles the case of an identifier given as a term """ # Case: varName [ expression ] if self.peek_next(CompilationEngine._OPEN_BRACKET): name = self.__compile_name() self.__handle_array(name) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("that", 0) # self.__var_name_array() # Case: subroutineCall: elif self.peek_next(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_next(CompilationEngine._DOT): self.__subroutine_call() else: name = self.eat(NAME_REG) self.__write_push(name) def __handle_string_constant(self, string): """ Handles the case of a string constant in a term :param string: the constant """ self.vm_writer.write_push(CONSTANT, len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push(CONSTANT, ord(char)) self.vm_writer.write_call("String.appendChar", 2) def __handle_keyword_constant(self, word): """ Handles the case of a keyword constant given in a term. If the word is not valid the program prints a relevant message and exits. :param word: The keyword """ if word == "this": self.vm_writer.write_push("pointer", 0) else: self.vm_writer.write_push(CONSTANT, 0) if word == "true": self.vm_writer.write_arithmetic("~") def __is_term(self): curr_type = self.peek_type() return curr_type == STRING_CONST or curr_type == INT_CONST or \ curr_type == KEYWORD or curr_type == IDENTIFIER or \ self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_token(CompilationEngine._OPS) def __subroutine_call(self): if self.curr_token.get_type() == IDENTIFIER: if self.peek_next(CompilationEngine._OPEN_PARENTHESIS): self.vm_writer.write_push("pointer", 0) self.__subroutine_name(self.class_name, 1) elif self.peek_next(CompilationEngine._DOT): self.__object_subroutine_call() else: print("Error: ( or . expected") exit(-1) def __object_subroutine_call(self): name = self.eat(NAME_REG) n_args = 0 # Push the object reference to the stack if self.symbol_table.kind_of(name): self.__write_push(name) name = self.symbol_table.type_of(name) n_args = 1 self.eat(CompilationEngine._DOT) self.__subroutine_name(name, n_args) def __subroutine_name(self, type_name, n_args): """ Handles the case of subroutineName(expressionList) :return: """ name = self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_PARENTHESIS) nargs = self.compile_expression_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.vm_writer.write_call("{}.{}".format(type_name, name), nargs + n_args) def __handle_array(self, name): self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) self.__write_push(name) self.vm_writer.write_arithmetic("+") # ========== XML Handling ========== # def wrap(self, section_name, func): """ Wraps a program structure block with the section_name, and executes its function :param section_name: The name of the section :param func: The function to perform :return: """ self.write("<{}>".format(section_name)) self.indent += 2 func() self.indent -= 2 self.write("</{}>".format(section_name)) # ========== Token Handling ========== # def eat(self, token): """ Handles advancing and writing terminal tokens. Will exit the program if an error occurs. :param token: The regex of the token to compare :return: """ ctoken = self.curr_token.get_token() if re.match(token, self.curr_token.get_token()): # self.write(self.curr_token.get_xml_wrap()) self.__advance_token() return ctoken # else: # # if self.tokenizer.get_current_token() != token: # print("Error: Expected " + token) # exit(-1) def peek_token(self, compare_next): """ :param compare_next: The regex to compare. :return: True if the current token matches the regex, False otherwise. """ if self.curr_token: return re.match(compare_next, self.curr_token.get_token()) return False def peek_type(self): """ :return: the type of the current token """ return self.curr_token.get_type() def peek_next(self, comp): next_token = self.tokenizer.get_next_token() # Case: There actually is a next token if next_token: return re.match(comp, self.tokenizer.get_next_token().get_token()) return False def __advance_token(self): self.tokenizer.advance() if self.tokenizer.has_more_tokens(): self.curr_token = self.tokenizer.get_current_token() # ========== VM Helper ========== # def __get_label(self, label): self.label_count += 1 return "{}{}".format(label, str(self.label_count)) def __write_pop(self, name): self.vm_writer.write_pop(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) def __write_push(self, name): self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name))
class CompilationEngine: def __init__(self, input_path, output_path): self.class_name = '' self.subroutine_name = '' self.if_counter = -1 self.while_counter = -1 self.subroutine_num_arg = 0 self.tkx = JackTokenizer(input_path) self.class_table = symbolTable() self.subroutine_table = symbolTable() self.vm_writer = VMWriter(output_path) self.compile_class(output_path) def compile_class(self, output_path): """ complete class """ #Class self.tkx.advance() #className self.subroutine_table.class_name = self.tkx.advance() self.class_name = self.tkx.current_token() #{ self.tkx.advance() self.tkx.advance() while self.tkx.current_token() == 'static' or self.tkx.current_token( ) == 'field': self.compile_class_var_dec() self.tkx.advance() while self.tkx.current_token( ) == 'constructor' or self.tkx.current_token( ) == 'function' or self.tkx.current_token() == 'method': self.compile_subroutine_dec() self.tkx.advance() # tree = ET.ElementTree(root) # rough_string = ET.tostring(root, 'utf-8') # reparsed = minidom.parseString(rough_string) # out_file = open(output_path, 'w') # out_file.write(reparsed.toprettyxml(indent="\t")[reparsed.toprettyxml(indent="\t").find('\n')+1:]) def compile_subroutine_dec(self): """ static declaration or field declaration """ self.if_counter = -1 self.while_counter = -1 was_constructor = False was_method = False is_type = True # constructor or function or method subroutine = self.tkx.current_token() # void or type self.tkx.advance() # todo check if we need 2 advances self.subroutine_table.start_subroutine() if subroutine == 'constructor': was_constructor = True else: if subroutine == 'method': was_method = True self.subroutine_table.define(THIS, self.class_name, 'argument') self.subroutine_num_arg = 1 self.subroutine_name = self.class_name + '.' # subroutine name self.tkx.advance() self.subroutine_name += self.tkx.current_token() #todo: check is_type # ( self.tkx.advance() self.compile_parameter_list() self.compile_subroutine_body(was_constructor, was_method) def compile_parameter_list(self): """ parameter list """ if self.tkx.advance() != ')': # type type = self.tkx.current_token() # var name name = self.tkx.advance() self.subroutine_table.define(name, type, 'argument') else: return self.tkx.advance() while self.tkx.current_token() != ')': # type type = self.tkx.advance() # var name name = self.tkx.advance() self.subroutine_table.define(name, type, 'argument') self.tkx.advance() def compile_subroutine_body(self, was_constructor, was_method): """ subroutine body Inside declaration """ # self.subroutine_num_arg = 0 # { self.tkx.advance() # var declaration while self.tkx.get_next_token() == 'var': self.compile_var_dec() self.vm_writer.write_function(self.subroutine_name, self.subroutine_table.var_count(LOCAL)) if was_constructor: self.vm_writer.write_push(CONSTANT, self.class_table.var_count(FIELD)) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(POINTER, 0) elif was_method: self.vm_writer.write_push(ARGUMENT, 0) self.vm_writer.write_pop(POINTER, 0) self.compile_statements() def compile_class_var_dec(self): """ class variable declaration """ self.subroutine_num_arg = 0 # static or field kind = self.tkx.current_token() # type type = self.tkx.advance() # var name name = self.tkx.advance() self.class_table.define(name, type, kind) self.tkx.advance() while self.tkx.current_token() != ';': # var name name = self.tkx.advance() self.class_table.define(name, type, kind) self.tkx.advance() def compile_var_dec(self): """ variable declaration """ # var self.tkx.advance() # type type = self.tkx.advance() # var name name = self.tkx.advance() self.subroutine_table.define(name, type, LOCAL) self.tkx.advance() while self.tkx.current_token() != ';': # var name name = self.tkx.advance() self.subroutine_table.define(name, type, LOCAL) self.tkx.advance() def compile_statements(self): """ statements """ # for each statement in statements self.tkx.advance() while self.tkx.current_token() != '}': self.compile_statement() self.tkx.advance() def compile_statement(self): if self.tkx.current_token() == 'let': self.compile_let() elif self.tkx.current_token() == 'if': self.compile_if() elif self.tkx.current_token() == 'do': self.compile_do() elif self.tkx.current_token() == 'while': self.compile_while() elif self.tkx.current_token() == 'return': self.compile_return() def compile_let(self): """ let statement After that this is a var declaration 'let' varName ('['expression']')? '=' expression ';' """ was_array = False name = self.tkx.advance() # identifier if self.tkx.advance() == '[': self.compile_expression() kind = self.get_kind(name) index = self.get_index(name) self.vm_writer.write_push(kind, index) self.vm_writer.write_arithmetic('add') was_array = True # ']' self.tkx.advance() self.compile_expression() # = if not was_array: kind = self.get_kind(name) index = self.get_index(name) self.vm_writer.write_pop(kind, index) else: self.vm_writer.write_pop(TEMP, 0) self.vm_writer.write_pop(POINTER, 1) self.vm_writer.write_push(TEMP, 0) self.vm_writer.write_pop(THAT, 0) def compile_if(self): """ if statement 'if' '(' expression ')' '{' statements '}' ('else' '{' statements '}')? """ isElse = False self.if_counter += 1 label_if = self.if_counter self.tkx.advance() # '(' symbol self.compile_expression() self.vm_writer.write_if_goto(IF_TRUE + str(label_if)) self.vm_writer.write_goto(IF_FALSE + str(label_if)) self.vm_writer.write_label(IF_TRUE + str(label_if)) # ')' symbol self.tkx.advance() # '{' symbol self.compile_statements() # self.vm_writer.write_label(IF_FALSE + str(self.if_counter)) # '}' symbol if self.tkx.get_next_token() == 'else': isElse = True self.vm_writer.write_goto(IF_END + str(label_if)) self.vm_writer.write_label(IF_FALSE + str(label_if)) self.tkx.advance() # else self.tkx.advance() # '{' symbol self.compile_statements() # '}' symbol if isElse: self.vm_writer.write_label(IF_END + str(label_if)) else: self.vm_writer.write_label(IF_FALSE + str(label_if)) def compile_do(self): """ do statement """ # do # name name = self.tkx.advance() self.tkx.advance() self.compile_subroutine_call(name, True) # ; self.tkx.advance() def compile_while(self): """ while statement """ self.while_counter += 1 while_label = self.while_counter self.vm_writer.write_label(WHILE_LABEL + str(while_label)) self.tkx.advance() # '(' symbol self.compile_expression() self.vm_writer.write_arithmetic('not') self.vm_writer.write_if_goto(END_WHILE + str(while_label)) # ')' symbol self.tkx.advance() # '{' symbol self.compile_statements() # '}' symbol self.vm_writer.write_goto(WHILE_LABEL + str(while_label)) self.vm_writer.write_label(END_WHILE + str(while_label)) def compile_return(self): """ return statement """ # return if self.tkx.get_next_token() != ';': self.compile_expression() self.vm_writer.write_return() # ; return self.vm_writer.write_push(CONSTANT, 0) # ; self.tkx.advance() self.vm_writer.write_return() def compile_expression(self): """ expression Maybe after all "()" """ self.tkx.advance() self.compile_term() while self.tkx.current_token() in OP: op = self.tkx.current_token() self.tkx.advance() self.compile_term() self.vm_writer.write_arithmetic(OP_TRANSLATOR[op]) def compile_expression_list(self): """ expression list Maybe after all "()" that in call to function """ self.subroutine_num_arg = 0 if self.tkx.get_next_token() != ')': self.compile_expression() self.subroutine_num_arg += 1 else: self.tkx.advance() return while self.tkx.current_token() != ')': self.compile_expression() self.subroutine_num_arg += 1 def compile_term(self): """ term Distinguish between the kinds by "(", "." and "[" (See the explanation in the book) """ if self.tkx.token_type() == TERM_INT_CONST: self.vm_writer.write_push(CONSTANT, str(self.tkx.current_token())) self.tkx.advance() elif self.tkx.token_type() == TERM_STRING_CONST: self.write_string_const(self.tkx.current_token()) self.tkx.advance() elif self.tkx.token_type() == TERM_KEYWORD: if self.tkx.current_token() in {'true', 'false', 'null'}: self.vm_writer.write_push(CONSTANT, 0) if self.tkx.current_token() == 'true': self.vm_writer.write_arithmetic('not') else: # this self.vm_writer.write_push(POINTER, 0) self.tkx.advance() elif self.tkx.token_type() == TERM_SYMBOL: if self.tkx.current_token() == '(': self.compile_expression() self.tkx.advance() else: unary_op = self.tkx.current_token() self.tkx.advance() self.compile_term() self.vm_writer.write_arithmetic(UNARY_OP_TRANSLATOR[unary_op]) elif self.tkx.token_type() == TERM_IDENTIFIER: name = self.tkx.current_token() kind = self.get_kind(name) index = self.get_index(name) self.tkx.advance() if self.tkx.current_token() == '[': self.compile_expression() self.vm_writer.write_push(kind, index) self.vm_writer.write_arithmetic('add') self.vm_writer.write_pop(POINTER, 1) self.vm_writer.write_push(THAT, 0) # ] self.tkx.advance() elif self.tkx.current_token() == '(' or self.tkx.current_token( ) == '.': self.compile_subroutine_call(name, False) self.tkx.advance() else: self.vm_writer.write_push(kind, index) def compile_subroutine_call(self, name, isDo): """ subroutine call """ was_method = False if self.tkx.current_token() == '.': kind = self.get_kind(name) if kind: index = self.get_index(name) self.vm_writer.write_push(kind, index) was_method = True name = self.get_type(name) + '.' + self.tkx.advance() # subroutine name else: name += '.' + self.tkx.advance() # ( self.tkx.advance() elif '.' not in name: name = self.class_name + '.' + name was_method = True self.vm_writer.write_push(POINTER, 0) self.compile_expression_list() if was_method: self.subroutine_num_arg += 1 self.vm_writer.write_call(name, self.subroutine_num_arg) self.subroutine_num_arg = 0 if isDo: self.vm_writer.write_pop(TEMP, 0) # ) def get_kind(self, name): if self.subroutine_table.kind_of(name) is not None: return self.subroutine_table.kind_of(name) return self.class_table.kind_of(name) def get_type(self, name): if self.subroutine_table.type_of(name) is not None: return self.subroutine_table.type_of(name) return self.class_table.type_of(name) def get_index(self, name): if self.subroutine_table.index_of(name) is not None: return self.subroutine_table.index_of(name) return self.class_table.index_of(name) def write_string_const(self, str): re_str = str.replace('"', '') self.vm_writer.write_push(CONSTANT, len(re_str)) self.vm_writer.write_call('String.new', 1) for char in re_str: self.vm_writer.write_push(CONSTANT, ord(char)) self.vm_writer.write_call("String.appendChar", 2)