def __init__(self, tokenizer): self._name = tokenizer.get_filename().replace('.jack','') # tokenizer for input self._tokenizer = tokenizer # symbol table self._symbols = SymbolTable() # vm output fiole self._writer = VMWriter(self._name + '.vm') # Input should be a tokenized .jack file containing one class assert self._tokenizer.has_more_tokens() self._tokenizer.advance() self._class = None self._subroutine = None self._counter = 0 self.compile_class() self.close()
def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file)
class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # symbol table symbol_table = None # vm writer vm_writer = None # the class name class_name = "" # indicies for if and while loops # start at -1 because we increment before use while_index = -1 if_index = -1 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file) # compiles a complete class and closes the output file def compile_class(self): # class keyword tt, t = self._token_next(True, "KEYWORD", "class") # name of class tt, t = self._token_next(True, "IDENTIFIER") self.class_name = t # open brace tt, t = self._token_next(True, "SYMBOL", "{") # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") # done with compilation; close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # compile the variable declaration # False means this is a class (not a subroutine) self.compile_var_dec(False) # compiles a complete method, function, or constructor def compile_subroutine(self): # start of subroutine self.symbol_table.start_subroutine() # constructor, function, or method keyword tt, type = self._token_next(False, "KEYWORD") # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) # name of the method/function/constructor tt, name = self._token_next(True) name = self.class_name + "." + name # if the type is a method, "define" this as an argument, so the other # argument indexes work correctly if type == "method": self.symbol_table.define("this", self.class_name, SymbolTable.ARG) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # write the function num_locals = self.symbol_table.var_count(self.symbol_table.VAR) self.vm_writer.write_function(name, num_locals) # write any special code at the top of the function if type == "constructor": # code to allocate memory and set "this" size = self.symbol_table.var_count(self.symbol_table.FIELD) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) elif type == "function": # nothing special pass elif type == "method": # put argument 0 into pointer 0 (this) self.vm_writer.write_push(self.vm_writer.ARG, 0) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) else: print "WARNING: Expected constructor, function, or name; got", type # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, type = self._token_next(False) # identifier (variable name) tt, name = self._token_next(True) # the kind is always an arg, since these are all parameters to the # function kind = SymbolTable.ARG # define the variable in the symbol table self.symbol_table.define(name, type, kind) # possible comma tt, t = self._token_next(True) if tt != "SYMBOL" or t != ",": # not a comma; stop processing parameters break self.tokenizer.advance() # compiles a var declaration # if subroutine is true, only the var keyword can be used # if subroutine is false, only the static and field keywords can be used def compile_var_dec(self, subroutine=True): # the keyword to start the declaration tt, kind = self._token_next(False, "KEYWORD") # check for required types if subroutine: if kind == "var": kind = SymbolTable.VAR else: print "WARNING: expecting var, but received %s" % (str(kind)) else: if kind == "static": kind = SymbolTable.STATIC elif kind == "field": kind = SymbolTable.FIELD else: print "WARNING: expecting static or field, but received %s" % (str(kind)) # type of the declaration # could be an identifier or a keyword (int, etc) tt, type = self._token_next(True) # name of the declaration tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # another variable name follows tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break # compiles a do statement def compile_do(self): # do keyword tt, t = self._token_next(False, "KEYWORD", "do") # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # do statements do not have a return value, so eliminate the return # off of the stack self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a let statement def compile_let(self): # let keyword tt, t = self._token_next(False, "KEYWORD", "let") # variable name tt, name = self._token_next(True, "IDENTIFIER") # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # array - write operation array = True # compile the offset expression self.tokenizer.advance() self.compile_expression() # write the base address onto the stack segment, index = self._resolve_symbol(name) self.vm_writer.write_push(segment, index) # add base and offset self.vm_writer.write_arithmetic("add") # we cannot yet put the result into pointer 1, since the read # operation (which hasn't been parsed/computed yet) may use pointer 1 # to read from an arrya value # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() else: array = False # equals sign tt, t = self._token_next(False, "SYMBOL", "=") # expression self.tokenizer.advance() self.compile_expression() if array: # our stack now looks like this: # TOP OF STACK # computed result to store # address in which value should be stored # ... previous stuff ... # pop the computed value to temp 0 self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # pop the array address to pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # put the computed value back onto the stack self.vm_writer.write_push(self.vm_writer.TEMP, 0) # pop to the variable name or the array reference self.vm_writer.write_pop(self.vm_writer.THAT, 0) else: # not an array - pop the expression to the variable segment, index = self._resolve_symbol(name) self.vm_writer.write_pop(segment, index) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a while statement def compile_while(self): # labels for this while loop self.while_index += 1 while_start = "WHILE_START_%d" % (self.while_index) while_end = "WHILE_END_%d" % (self.while_index) # while keyword tt, t = self._token_next(False, "KEYWORD", "while") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # label for the start of the while statement self.vm_writer.write_label(while_start) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto to the end of the loop # to do this, negate and then call if-goto self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(while_end) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") # after the last statement of the while loop # need to jump back up to the top of the loop to evaluate again self.vm_writer.write_goto(while_start) # label at the end of the loop self.vm_writer.write_label(while_end) self.tokenizer.advance() # compiles a return statement def compile_return(self): # return keyword tt, t = self._token_next(False, "KEYWORD", "return") # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() else: # no return expression; return 0 self.vm_writer.write_push(self.vm_writer.CONST, 0) # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.vm_writer.write_return() self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): # it is more efficient in an if-else case to have the else portion first # in the code when testing, but we use the less-efficient but # easier-to-write true-false pattern here # labels for this if statement self.if_index += 1 if_false = "IF_FALSE_%d" % (self.if_index) if_end = "IF_END_%d" % (self.if_index) # if keyword tt, t = self._token_next(False, "KEYWORD", "if") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto the false label # if true, fall through to executing code # if there is no else, then false and end are the same, but having two # labels does not increase code size self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements for true portion self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # goto the end of the if statement at the end of the true portion self.vm_writer.write_goto(if_end) # label for the start of the false portion self.vm_writer.write_label(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") # end label self.vm_writer.write_label(if_end) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() else: # no else portion; only put in a label for false, since end is not # used self.vm_writer.write_label(if_false) # compiles an expression (one or more terms connected by operators) def compile_expression(self): # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator # postfix order - add the next term and then do the operator # the next term self.tokenizer.advance() self.compile_term() # the operator if t == "+": self.vm_writer.write_arithmetic("add") if t == "-": self.vm_writer.write_arithmetic("sub") if t == "=": self.vm_writer.write_arithmetic("eq") if t == ">": self.vm_writer.write_arithmetic("gt") if t == "<": self.vm_writer.write_arithmetic("lt") if t == "&": self.vm_writer.write_arithmetic("and") if t == "|": self.vm_writer.write_arithmetic("or") if t == "*": self.vm_writer.write_call("Math.multiply", 2) if t == "/": self.vm_writer.write_call("Math.divide", 2) else: # no term found; done parsing the expression break # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt == "INT_CONST": self.vm_writer.write_push(self.vm_writer.CONST, t) # advance for the next statement self.tokenizer.advance() elif tt == "STRING_CONST": # after this portion is run, a pointer to a string should be on the # stack # we create a new string of a certain size and then append characters # one by one; each append operation returns the pointer to the same # string # create the string # string is a len, data tuple; not null-terminated size = len(t) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("String.new", 1) # append each character for char in t: self.vm_writer.write_push(self.vm_writer.CONST, ord(char)) self.vm_writer.write_call("String.appendChar", 2) # advance for the next statement self.tokenizer.advance() elif tt == "KEYWORD": if t == "true": # true is -1, which is 0 negated self.vm_writer.write_push(self.vm_writer.CONST, 0) self.vm_writer.write_arithmetic("not") elif t == "false" or t == "null": self.vm_writer.write_push(self.vm_writer.CONST, 0) elif t == "this": self.vm_writer.write_push(self.vm_writer.POINTER, 0) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # postfix order - add the next term and then do the operator # parse the rest of the term self.tokenizer.advance() self.compile_term() # write the unary operation if t == "-": self.vm_writer.write_arithmetic("neg") elif t == "~": self.vm_writer.write_arithmetic("not") elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array - read operation # write the base address onto the stack segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) # compile the offset expression self.tokenizer.advance() self.compile_expression() # add base and offset self.vm_writer.write_arithmetic("add") # put the resulting address into pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # read from that 0 onto the stack self.vm_writer.write_push(self.vm_writer.THAT, 0) # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) else: # unknown print "WARNING: Unknown term expression object:", tt, t # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): num_args = 0 # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() num_args += 1 # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self.tokenizer.advance() else: # not a comma; stop processing parameters break return num_args # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, name1 = self._token_next(False, "IDENTIFIER") # a dot and another name may exist, or it could be a parenthesis name2 = None tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": # the name after the dot tt, name2 = self._token_next(True, "IDENTIFIER") # advance so that we are on the parenthesis self.tokenizer.advance() # determine if this is a method call # three possibilities # - class.func() - function call # - var.func() - method call # - func() - method call on current object if self.symbol_table.contains(name1): method_call = True local_call = False elif name2 == None: method_call = True local_call = True else: method_call = False # if a method call, push variable name1 # this a method call if the symbol table contains name1 and name2 exists # OR name1 is a method in the current object if method_call and local_call: # push the current object onto the stack as a hidden argument self.vm_writer.write_push(self.vm_writer.POINTER, 0) elif method_call and not local_call: # push the variable onto the stack as a hidden argument segment, index = self._resolve_symbol(name1) self.vm_writer.write_push(segment, index) # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") # expression list self.tokenizer.advance() num_args = self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # write the call if method_call and local_call: # methd + <blank> # get the name of the vm function to call classname = self.class_name vm_function_name = classname + "." + name1 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) elif method_call and not local_call: # variable name + method # get the name of the vm function to call classname = self.symbol_table.get(name1)[1] vm_function_name = classname + "." + name2 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) else: # get the name of the vm function to call vm_function_name = name1 + "." + name2 # make the call self.vm_writer.write_call(vm_function_name, num_args) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # convets a symbol table type into a segment type def _type_to_segment(self, type): if type == self.symbol_table.STATIC: return self.vm_writer.STATIC elif type == self.symbol_table.FIELD: return self.vm_writer.THIS elif type == self.symbol_table.ARG: return self.vm_writer.ARG elif type == self.symbol_table.VAR: return self.vm_writer.LOCAL else: print "ERROR: Bad type %s" % (str(type)) # resolves the symbol from the symbol table # the segment and index is returned as a 2-tuple def _resolve_symbol(self, name): kind, type, index = self.symbol_table.get(name) return self._type_to_segment(kind), index
def __init__(self,infile,outfile): self.writer = VMWriter(outfile) self.token = JackToken(infile) self.table = SymbolTable()
class CompilationEngine: #------------------------------------------------------------------------------ # Var Declar: #------------------------------------------------------------------------------ #stores all the different key words key_class='CLASS' key_method='METHOD' key_function='FUNCTION' key_constructor='CONSTRUCTOR' key_int='INT' key_boolean='BOOLEAN' key_char='CHAR' key_void='VOID' key_var='VAR' key_static='STATIC' key_field='FIELD' key_let='LET' key_do='DO' key_if='IF' key_else='ELSE' key_while='WHILE' key_return='RETURN' key_true='TRUE' key_false='FALSE' key_null='NULL' key_this='THIS' #stores all the token types keyword='KEYWORD' sym='SYMBOL' ident='IDENTIFIER' intc='INT_CONST' string_c='STRING_CONST' #This stores the convertions from the jack kind to the appropriate segment field segment = {'VAR':'local', 'STATIC':'static', 'FIELD':'this', 'ARG':'argument'} #Stores counters for lables of loops and if/else statments loopCounter = 0 ifCounter = 0 #-------------------------------------------------------------------------- # Class declaration: #-------------------------------------------------------------------------- #------------------------------------------------------------------------------ # This is the constructor def __init__(self,infile,outfile): self.writer = VMWriter(outfile) self.token = JackToken(infile) self.table = SymbolTable() #------------------------------------------------------------------------------ # This method compiles the entire class contained in the input file def compileClass(self): self.token.advance() while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_class in tempkey: s = "Place holder nothing to do here" #if the keyword is static or field then it is known that it is a class var dec #at this level of compilation elif self.key_static in tempkey or self.key_field in tempkey: self.compileClassVarDec() continue #continue because there maybe more then one class var and don't want to advane tokenizer #if the keyword is a subroutine type elif self.key_constructor in tempkey or self.key_method in tempkey or self.key_function in tempkey: self.compileSubroutine() elif self.sym in tokentype: tempsym = self.token.symbol() #if we run into } at this level then we are at the end of the class if '}' in tempsym: break elif self.ident in tokentype: tempident = self.token.identifier() #stores the name of the class we are in for calling methods from #with in this class and for other things as well self.currClassName = tempident self.token.advance() self.writer.close() #------------------------------------------------------------------------------ # This method compiles class var dec def compileClassVarDec(self): curtype = "" curkind = "" curname = "" while self.token.hasMoreTokens: tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey: curtype = tempkey elif self.key_static in tempkey or self.key_field in tempkey: curkind = tempkey #if we run into a subroutine declaration then we break elif self.key_function in tempkey or self.key_method in tempkey or self.key_constructor in tempkey: break elif self.ident in tokentype: tempident = self.token.identifier() #if the curtype string is empty then its type is an object if len(curtype) == 0: curtype = tempident else: curname = tempident elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into any of the below symboles then it is an invalid var decleration if re.search('[\(\)\{\}\[\]\.\+\-\*\/\&\<\>\=\~]{1}',tempsym) is not None: print(self.token.errorMsg()) sys.exit(0) #if we run into a ; then it is the end of this particular class var dec if ';' in tempsym: #want to advance past ; so the calling method can do the proper checks self.token.advance() self.table.Define(curname,curtype,curkind) break self.table.Define(curname,curtype,curkind) #clears the curname for cases like 'FIELD int haberdash, x, y' all have same #type and kind but different names curname = '' self.token.advance() #------------------------------------------------------------------------------ # This method compiles the subroutines def compileSubroutine(self): self.table.startSubroutine() self.curSubType = '' if_param = False #ensures that at least an empty param list is discovered #this is to tell other methods that the current block being read in is a constructor and to take #the appropriate actions self.isConstruct = False isFunct = False while self.token.hasMoreTokens: tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_method in tempkey or self.key_function in tempkey or self.key_constructor in tempkey: #sets isConstruct to true if the keyword is constructor or false other wise self.isConstruct = True if self.key_constructor in tempkey else False #sets isFunct to true if the keyword is function or false other wise isFunct = True if self.key_function in tempkey else False elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey or self.key_void in tempkey: self.curSubType = tempkey #if the keyward var is in tempkey then we need to compile a vardeck elif self.key_var in tempkey: self.compileVarDec() #if it runs into any keywords that aren't caught by the above statements then it is no longer #in a subroutine else: self.writer.writeFunction(self.currClassName+'.'+self.curSubName,self.table.varCount('VAR')) break elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into a ( then it is descovering a parameter list if '(' in tempsym: self.token.advance() self.compileParameterList(self.isConstruct or isFunct) if_param = True #set param list discovered to true #if it has fond at lest an empty paramlist then it can print the next symboles elif if_param: s = "this is does nothing just place holeder" #error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: #if cursubtype is empty then the return type is an object #used for compiling returns and type checking if len(self.curSubType) == 0: self.curSubType = self.token.identifier() else: self.curSubName = self.token.identifier() self.token.advance() #If this was defined as an argument then the subroutine is not a function or constructor #thus we need to set the this pointer in the subroutine to the first argument passed in if 'NONE' not in self.table.kindOf('this'): self.writer.writePush(self.segment[self.table.kindOf('this')],repr(self.table.indexOf('this'))) self.writer.writePop('pointer','0') #if it is a constructor then we need to allocate memory for the object if self.isConstruct: self.writer.writePush('constant',repr(self.table.varCount('FIELD'))) self.writer.writeCall('Memory.alloc',1) self.writer.writePop('pointer','0') #compile the body of the subroutine self.compileStatements() self.loopCounter = 0 self.ifCounter = 0 self.curSubName = '' #------------------------------------------------------------------------------ # This method compiles the parameter list def compileParameterList(self,isConstruct): curname = '' curtype = '' curkind = '' #If it isn't a constructor then we need to define this as the #first argument if not isConstruct: self.table.Define('this',self.currClassName,'ARG') while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() curtype = tempkey elif self.ident in tokentype: tempident = self.token.identifier() #if the curtype is empty then its type is an object if len(curtype) == 0: curtype = tempident else: curname = tempident elif self.sym in tokentype: tempsym = self.token.symbol() #if it runs into a ) means the end of the parameter list so break if ')' in tempsym: self.table.Define(curname, curtype, 'ARG') break #seperation of the parameters elif ',' in tempsym: self.table.Define(curname, curtype, 'ARG') curname = '' curtype = '' #any other symbol results in a an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #advance twice because we are at ( so need to getpast that and need to get the next symbol self.token.advance() self.token.advance() #------------------------------------------------------------------------------ # This method compiles the var decliration def compileVarDec(self): curname = '' curtype = '' while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_var in tempkey: s = 'Place holder does nothing just ensures that a var is seen' elif self.key_int in tempkey or self.key_char in tempkey or self.key_boolean in tempkey: curtype = tempkey #if any keyword is docovered than what is above then the vardec is over else: break elif self.ident in tokentype: tempident = self.token.identifier() #if the curtype is empty then its type is an object if len(curtype) == 0: curtype = tempident else: curname = tempident elif self.sym in tokentype: tempsym = self.token.symbol() if ',' in tempsym: self.table.Define(curname,curtype, 'VAR') curname = '' #once ; is found then at the end of a vardec elif ';' in tempsym: self.table.Define(curname,curtype, 'VAR') break self.token.advance() #------------------------------------------------------------------------------ # This method compiles the statements def compileStatements(self): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() #if 'let' is found then compilelet if self.key_let in tempkey: self.compileLet() elif self.key_if in tempkey: self.compileIf() #continue because we could have multiple if statements found and #the current token could be the key word if so we don't want to advance #the tokenizer prematurely continue elif self.key_while in tempkey: self.compileWhile() elif self.key_do in tempkey: self.compileDo() elif self.key_return in tempkey: self.compileReturn() #incorrect key word at this level of compilation else: print(self.token.errorMsg()) sys.exit(0) elif self.sym in tokentype: tempsym = self.token.symbol() #once we run into } thats the endof statments if '}' in tempsym: break #any other symbol discovered at this stage is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #------------------------------------------------------------------------------ # This method compiles the do def compileDo(self): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_do in tempkey: s = 'Place holder this does nothing' #if any keyword other then do is discovered at this level it results in an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: #compiles the expression with the value for a subroutine call passed in being true self.compileExpression(True) self.token.advance() break self.token.advance() #need to pop the return value of the stack so that it doesn't interfeer #with other operations self.writer.writePop('temp','0') #------------------------------------------------------------------------------ # This method compiles the letStatement def compileLet(self): isArray = False leftSideEq = '' while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_let in tempkey: s = 'Place holder this does nothing' #if any other keyword is discovered it is an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: tempident = self.token.identifier() peak = self.token.peak() #if [ is discovered it means that it is an array access if '[' in peak: self.token.advance() self.token.advance() kind = self.table.kindOf(tempident) #if the identifiers kind is non then it is an udefined variable if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) #pushs the arrays location on to the stack self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident))) #compiles the expression for the index self.compileExpression(False) #adds the result of the expression to the base location self.writer.writeArithmetic('+') isArray = True self.token.advance() #continue so that the bellow error catching isn't accidently triped hence the advance command #before this continue else: kind = self.table.kindOf(tempident) if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) #stores the lefside idetifier if it isn't an array leftSideEq = tempident elif self.sym in tokentype: tempsym = self.token.symbol() #this means that we compile th expression on the other side of the = sign if '=' in tempsym: self.token.advance() self.compileExpression(False) #if we are setting an array location (left side of =) to the expressions result if isArray: #pop expressions result into temp 0 self.writer.writePop('temp','0') #sets that to what the left side resulted in self.writer.writePop('pointer','1') #pushs temp back on to stack and pops it to that at 0 self.writer.writePush('temp','0') self.writer.writePop('that','0') #other wise pop it to the variables location else: kind = self.table.kindOf(leftSideEq) self.writer.writePop(self.segment[kind],repr(self.table.indexOf(leftSideEq))) #sets tempsym to the current symbole tempsym = self.token.symbol() #if tempsym at this point is ; then end of let statement if ';' in self.token.symbol(): break #othre wise it is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #------------------------------------------------------------------------------ # This method compiles the whileStatement def compileWhile(self): #lables for the begenning and the exit of a loop curLoop = self.curSubName+'.loop.'+repr(self.loopCounter) curLoopExit = curLoop+'.EXIT' #incremets loop counter so that all loop for this subroutine will have #unique exit and begin label self.loopCounter += 1 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_while in tempkey: self.writer.writeLabel(curLoop) #if any other keyword is discovered at this level it is an error else: print(self.token.errorMsg()) sys.exit(0) elif self.sym in tokentype: tempsym = self.token.symbol() #the condition of the while loop if '(' in tempsym: self.token.advance() self.compileExpression(False) #not the result of the exprssion that if the expression #is false we jump the loops exit self.writer.writeArithmetic('~') self.writer.writeIf(curLoopExit) #body of the while loop elif '{' in tempsym: self.token.advance() self.compileStatements() #bottom of loop need to go back to the top self.writer.writeGoto(curLoop) #once the statments are compiled the whilestatment is done break #any other symbol at this level results in an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.writer.writeLabel(curLoopExit) #------------------------------------------------------------------------------ # This method compiles the ReturnStatement def compileReturn(self): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_return in tempkey: s = "Place holder does nothing" #Any other keyword means that an exprssion is to be compiled and return is done else: self.compileExpression(False) self.token.advance() break #other wise compile expression elif self.ident in tokentype or self.string_c in tokentype or self.intc in tokentype: self.compileExpression(False) self.token.advance() break elif self.sym in tokentype: tempsym = self.token.symbol() #denotes the end of a return statment if ';' in tempsym: #if the current subroutines type is the same as the class #then it is a constructor and needs to return the this pointer if self.curSubType == self.currClassName: self.writer.writePush('pointer','0') #if we reach this point and void is not the subroutines type #then the user must need to return a value elif self.key_void not in self.curSubType: print(self.token.errorMsg()+'must return something\n') sys.exit(0) #if void is the subroutines type return 0 else: self.writer.writePush('constant','0') break #any other symbol at this level is an error else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() self.writer.writeReturn() #------------------------------------------------------------------------------ # This method compiles the ifStatement def compileIf(self): #labels for the else part of if and the exit of both if and else statents currIf = self.curSubName+'.else.'+repr(self.ifCounter) currIfExit = self.curSubName+'.if.'+repr(self.ifCounter)+'.EXIT' #ensurest that all future if|else blocks have unique labels for this #subroutine self.ifCounter += 1 ifElse = False #this means that keyword if has been seen only once so if it seen again #that means it is a seperate if statment seen_once = True while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_if in tempkey and seen_once: s = 'Place holeder does nothing' elif self.key_else in tempkey and not ifElse: ifElse = True #write the jump to the exit of the if/else block self.writer.writeGoto(currIfExit) #Else part of the block self.writer.writeLabel(currIf) #if any other keyword is seen then it is the end of an if statement else: break elif self.sym in tokentype: tempsym = self.token.symbol() #The condition of an if statment if '(' in tempsym: self.token.advance() self.compileExpression(False) self.writer.writeArithmetic('~') self.writer.writeIf(currIf) #body of an if|else statment elif '{' in tempsym: self.token.advance() self.compileStatements() seen_once = False #if part of an if else block then break if ifElse: self.token.advance() break #just incase this catches } which means that its #the end of an if else block that isn't this one elif '}' in tempsym: break self.token.advance() #if an if/else block write the exit label if ifElse: self.writer.writeLabel(currIfExit) else: self.writer.writeLabel(currIf) #------------------------------------------------------------------------------ # This method compiles the expression # @param: if this is part of an enclosed statment meanig args to another sub # routine def compileExpression(self,enclosed): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.sym in tokentype: tempsym = self.token.symbol() #this means that we have term to compile with a potential unary op if tempsym in '(~-': self.compileTerm(enclosed,True,False,'') #signifies the end of an expression elif tempsym in ';)],': break else: self.compileTerm(enclosed,False,False,'') self.token.advance() #------------------------------------------------------------------------------ # This method compiles the term # @param: if argument or array expression # @param: if the term contains a unary operator # @param: if the method was recursively called # @param: the previous sumbol if recursively called def compileTerm(self,enclosed,isUnary,callfromTerm,prevSym): while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.keyword in tokentype: tempkey = self.token.keyWord() if self.key_true in tempkey: #pushes -1 onto the stack self.writer.writePush('constant','1') self.writer.writeArithmetic('NEG') elif self.key_false in tempkey: self.writer.writePush('constant','0') elif self.key_null in tempkey: self.writer.writePush('constant','0') elif self.key_this in tempkey: self.writer.writePush('pointer','0') #any other keyword than the ones above results in an error else: print(self.token.errorMsg()) sys.exit(0) elif self.ident in tokentype: tempident = self.token.identifier() #peaks at the next token to determine the type of call peaks = self.token.peak() #means that it as a call to a var or class method if '.' in peaks: callName = '' numArgs = 0 typeof = self.table.typeOf(tempident) #if the type is none then we are calling a function or constructor not a method if 'NONE' in typeof: callName = tempident else: callName = typeof numArgs += 1 #push the objects location value as the first argument self.writer.writePush(self.segment[self.table.kindOf(tempident)],repr(self.table.indexOf(tempident))) self.token.advance() callName += self.token.symbol() self.token.advance() #checks to see if the next token is an identifier if not error if self.ident in self.token.tokenType(): callName += self.token.identifier() else: print(self.token.errorMsg()) sys.exit(0) self.token.advance() #if the token type is not a symbol then error if self.sym not in self.token.tokenType(): print(self.token.errorMsg()) sys.exit() self.token.advance() #then compiles the expression list and gets the number of arguments numArgs += self.compileExpressionList() self.writer.writeCall(callName,numArgs) #this means that it is a subroutine call to one of its own methods elif '(' in peaks: #calling one of its own methods so push this pointer onto the stack as the first argument #to the function self.writer.writePush('pointer','0') self.token.advance() self.token.advance() #gets the number of arguments from the expression list and adds 1 for the this pointer pushed #on earlier numArgs = self.compileExpressionList()+1 self.writer.writeCall(self.currClassName+'.'+tempident,numArgs if numArgs != 0 else 1) #this means that it is accessing an array element elif '[' in peaks: self.token.advance() self.token.advance() kind = self.table.kindOf(tempident) #if the kind of the identifier is none then it wasn't defined if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) #push base location of the array onto the stack self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident))) #calc offset self.compileExpression(enclosed) #add offset to base self.writer.writeArithmetic('+') #set that to the new value self.writer.writePop('pointer','1') #get the value at the offset self.writer.writePush('that','0') #other wise it is just an identifier else: kind = self.table.kindOf(tempident) if "NONE" in kind: print(self.token.errorMsg()+"Undefined Variable\n") sys.exit(0) self.writer.writePush(self.segment[kind],repr(self.table.indexOf(tempident))) elif self.intc in tokentype: self.writer.writePush('constant',self.token.intVal()) elif self.string_c in tokentype: string = self.token.stringVal() #creates a new string of the appropriate length self.writer.writePush('constant', repr(len(string))) self.writer.writeCall('String.new',1) #appends each new character to the string for c in string: self.writer.writePush('constant',repr(ord(c))) self.writer.writeCall('String.appendChar',2) elif self.sym in tokentype: tempsym = self.token.symbol() #this means that it is and expression surrounded by () if '(' in tempsym: self.token.advance() self.compileExpression(True) enclosed = True #not unary operator elif '~' in tempsym: self.token.advance() self.compileTerm(enclosed,False,False,prevSym) self.writer.writeArithmetic(tempsym) elif '-' in tempsym and isUnary and not enclosed: self.token.advance() self.compileTerm(enclosed,False,False,prevSym) self.writer.writeArithmetic('NEG') #operator elif tempsym in '+-*/&|<>=': self.token.advance() #if this was recursivelly called then need to print symble #of previous call ensures that the correct values on the stack #are used if callfromTerm: if '*' in prevSym: self.writer.writeCall('Math.multiply',2) elif '/' in prevSym: self.writer.writeCall('Math.divide',2) else: self.writer.writeArithmetic(prevSym) what = self.compileTerm(enclosed,False,True,tempsym) #if the return value is true and is the end of the expression if what and self.token.peak() in ']);,': if '*' in tempsym: self.writer.writeCall('Math.multiply',2) elif '/' in tempsym: self.writer.writeCall('Math.divide',2) else: self.writer.writeArithmetic(tempsym) #return false becuase we don't want to write anything #more from this block return False #if what is false and at the end of the expression #return false elif not what and self.token.peak() in ']);,': return False #if the next token is ]);, means the end of a term if self.token.peak() in ']);,': break self.token.advance() return True #------------------------------------------------------------------------------ # This method compiles the expressionList def compileExpressionList(self): expressCount = 0 while self.token.hasMoreTokens(): tokentype = self.token.tokenType() if self.sym in tokentype: tempsym = self.token.symbol() #indicates teh start of another expression if ',' in tempsym: self.token.advance() self.compileExpression(False) expressCount += 1 #indicates that end of expression list elif ')' in tempsym: break else: self.compileExpression(False) expressCount += 1 else: self.compileExpression(False) expressCount += 1 return expressCount #-------------------End Class--------------------------------------------------
def __init__(self, symbolTable, tokenizer, filename): self.symbolTable = symbolTable self.tokenizer = tokenizer self.writer = VMWriter(filename) self.lines = list() self.label_count = 0
class CompileEngine: def __init__(self, symbolTable, tokenizer, filename): self.symbolTable = symbolTable self.tokenizer = tokenizer self.writer = VMWriter(filename) self.lines = list() self.label_count = 0 def writeFile(self): self.writer.close() def writeOpen(self, tag): self.lines.append("<{}>".format(tag)) def writeClose(self, tag): self.lines.append("</{}>".format(tag)) def writeTerminal(self, tag, value): self.lines.append("<{}> {} </{}>".format(tag, value, tag)) def advance(self): if (self.tokenizer.hasMoreTokens()): self.tokenizer.advance() def uniqueLabel(self, label): self.label_count += 1 return label + str(self.label_count) def compileClass(self): self.advance() self.writeOpen("class") self.compileItem() # class self.classname = self.tokenizer.identifier self.compileItem() # name self.compileItem() # { while (self.tokenizer.keyWord in ["STATIC", "FIELD"]): self.compileClassVarDec() while (self.tokenizer.keyWord in ["CONSTRUCTOR", "FUNCTION", "METHOD"]): self.compileSubroutine() self.compileItem() # } self.writeClose("class") def compileClassVarDec(self): self.writeOpen("classVarDec") self.compileVarDecList() self.writeClose("classVarDec") def compileVarDecList(self): kind = self.tokenizer.keyWord self.compileItem() # static/field/var thetype = self.tokenizer.identifier if self.tokenizer.identifier else self.tokenizer.keyWord self.compileItem() # type count = 0 while (self.tokenizer.symbol != ";"): count += 1 self.compileNewIdentifier(thetype, kind) # name if (self.tokenizer.symbol == ","): self.compileItem() # , self.compileItem() # ; return count def compileSubroutine(self): self.writeOpen("subroutineDec") funtype = self.tokenizer.keyWord self.symbolTable.startSubroutine(funtype == "METHOD") self.compileItem() # constructor/function/method self.compileItem() # return type label = "{}.{}".format(self.classname, self.tokenizer.identifier) self.compileItem() # name self.compileItem() # ( self.compileParameterList() nArgs = self.symbolTable.varCount("ARG") self.compileItem() # ) self.writeOpen("subroutineBody") self.compileItem() # { while (self.tokenizer.keyWord == "VAR"): self.compileVarDec() nLocals = self.symbolTable.varCount("VAR") self.writer.writeFunction(label, nLocals) if (funtype == "METHOD"): nArgs += 1 self.writer.writePush("ARG", 0) self.writer.writePop("POINTER", 0) elif (funtype == "CONSTRUCTOR"): self.writer.writePush("CONST", self.symbolTable.varCount("FIELD")) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop("POINTER", 0) self.compileStatements() self.compileItem() # } self.writeClose("subroutineBody") self.writeClose("subroutineDec") def compileParameterList(self): self.writeOpen("parameterList") kind = "ARG" while (self.tokenizer.symbol != ")"): thetype = self.tokenizer.identifier if self.tokenizer.identifier else self.tokenizer.keyWord self.compileItem() # type self.compileNewIdentifier(thetype, kind) # name if (self.tokenizer.symbol == ","): self.compileItem() # , self.writeClose("parameterList") def compileVarDec(self): self.writeOpen("varDec") count = self.compileVarDecList() self.writeClose("varDec") return count def compileStatements(self): self.writeOpen("statements") while (self.tokenizer.symbol != "}"): if (self.tokenizer.keyWord == "DO"): self.compileDo() elif (self.tokenizer.keyWord == "IF"): self.compileIf() elif (self.tokenizer.keyWord == "LET"): self.compileLet() elif (self.tokenizer.keyWord == "RETURN"): self.compileReturn() elif (self.tokenizer.keyWord == "WHILE"): self.compileWhile() else: raise Exception() self.writeClose("statements") def compileDo(self): self.writeOpen("doStatement") self.compileItem() # do label = "" argCount = 0 while (self.tokenizer.symbol != "("): label += self.compileItem()[0] # Main . method parts = label.split(".") objsym = self.symbolTable.getSymbol(parts[0]) if (objsym is not None): # is a method, push obj as first param self.writer.writePush(objsym.kind, objsym.index) label = label.replace(objsym.name, objsym.thetype) argCount += 1 elif (len(parts) == 1): label = self.classname + "." + label self.writer.writePush("POINTER", 0) argCount += 1 self.compileItem() # ( argCount += self.compileExpressionList() self.compileItem() # ) self.compileItem() # ; self.writer.writeCall(label, argCount) self.writer.writePop("TEMP", 0) self.writeClose("doStatement") def compileLet(self): self.writeOpen("letStatement") self.compileItem() # let is_array = False if (self.tokenizer.symbol != "="): value, sym = self.compileItem() # a if (self.tokenizer.symbol == "["): is_array = True self.compileItem() # [ self.compileExpression() # 0 self.writer.writePush(sym.kind, sym.index) self.writer.writeArithmetic("ADD") self.compileItem() # ] self.compileItem() # = self.compileExpression() self.compileItem() # ; if (is_array): self.writer.writePop("TEMP", 0) self.writer.writePop("POINTER", 1) self.writer.writePush("TEMP", 0) self.writer.writePop("THAT", 0) else: self.writer.writePop(sym.kind, sym.index) self.writeClose("letStatement") def compileWhile(self): self.writeOpen("whileStatement") start = self.uniqueLabel("LOOPSTART") end = self.uniqueLabel("LOOPEND") self.writer.writeLabel(start) self.compileItem() # while self.compileItem() # ( self.compileExpression() self.writer.writeArithmetic("NOT") self.writer.writeIf(end) self.compileItem() # ) self.compileItem() # { self.compileStatements() self.compileItem() # } self.writer.writeGoto(start) self.writer.writeLabel(end) self.writeClose("whileStatement") def compileReturn(self): self.writeOpen("returnStatement") self.compileItem() # return if (self.tokenizer.symbol != ";"): self.compileExpression() self.compileItem() # ; self.writer.writeReturn() self.writeClose("returnStatement") def compileIf(self): self.writeOpen("ifStatement") self.compileItem() # if self.compileItem() # ( self.compileExpression() iftrue = self.uniqueLabel("IFTRUE") iffalse = self.uniqueLabel("IFFALSE") ifend = self.uniqueLabel("IFEND") self.writer.writeIf(iftrue) self.writer.writeGoto(iffalse) self.writer.writeLabel(iftrue) self.compileItem() # ) self.compileItem() # { self.compileStatements() self.compileItem() # } self.writer.writeGoto(ifend) self.writer.writeLabel(iffalse) if (self.tokenizer.keyWord == "ELSE"): self.compileItem() # else self.compileItem() # { self.compileStatements() self.compileItem() # } self.writer.writeLabel(ifend) self.writeClose("ifStatement") def compileExpression(self): self.writeOpen("expression") self.compileTerm() while (self.tokenizer.symbol in OPS.keys()): op, _ = self.compileItem() # & | + etc self.compileTerm() if (op in ["*", "/"]): self.writer.writeCall(OPS[op], 2) else: self.writer.writeArithmetic(OPS[op]) self.writeClose("expression") def compileNewIdentifier(self, thetype, kind): sym = self.symbolTable.define(self.tokenizer.identifier, thetype, kind) self.writeTerminal( "identifier", "{} DEFINE {} {} {}".format(sym.kind, sym.thetype, sym.name, sym.index)) self.advance() def compileItem(self): sym = None ret = None if (self.tokenizer.tokenType == "KEYWORD"): ret = self.tokenizer.keyWord self.writeTerminal("keyword", self.tokenizer.keyWord.lower()) elif (self.tokenizer.tokenType == "IDENTIFIER"): name = self.tokenizer.identifier sym = self.symbolTable.getSymbol(name) if (sym): ret = name self.writeTerminal( "identifier", "{} EXISTING {} {} {}".format(sym.kind, sym.thetype, sym.name, sym.index)) else: ret = name # class or subroutine self.writeTerminal("identifier", "CLASS/SUBROUTINE " + name) elif (self.tokenizer.tokenType == "SYMBOL"): ret = self.tokenizer.symbol self.writeTerminal( "symbol", self.tokenizer.symbol.replace("&", "&").replace( "<", "<").replace(">", ">")) elif (self.tokenizer.tokenType == "INT_CONST"): ret = self.tokenizer.intVal self.writeTerminal("integerConstant", self.tokenizer.intVal) elif (self.tokenizer.tokenType == "STRING_CONST"): ret = self.tokenizer.stringVal self.writeTerminal("stringConstant", self.tokenizer.stringVal) self.advance() return ret, sym def compileTerm(self): self.writeOpen("term") if (self.tokenizer.symbol == "("): self.compileItem() # ( self.compileExpression() self.compileItem() # ) elif (self.tokenizer.symbol in UNARY_OPS.keys()): unary_op, _ = self.compileItem() # - ~ self.compileTerm() self.writer.writeArithmetic(UNARY_OPS[unary_op]) else: tokenType = self.tokenizer.tokenType value, sym = self.compileItem() # any value if (self.tokenizer.symbol == "."): value += self.compileItem()[0] # . value += self.compileItem()[0] # subroutineName if (self.tokenizer.symbol == "["): self.writer.writePush(sym.kind, sym.index) self.compileItem() # [ self.compileExpression() self.writer.writeArithmetic("ADD") self.writer.writePop("POINTER", 1) self.writer.writePush("THAT", 0) self.compileItem() # ] elif (self.tokenizer.symbol == "("): parts = value.split(".") sym = self.symbolTable.getSymbol(parts[0]) nArgs = 0 if (sym is not None): # is a method, push obj as first param self.writer.writePush(sym.kind, sym.index) value = value.replace(sym.name, sym.thetype) nArgs += 1 elif (len(parts) == 1): value = self.classname + "." + value self.writer.writePush("POINTER", 0) nArgs += 1 self.compileItem() # ( nArgs += self.compileExpressionList() self.writer.writeCall(value, nArgs) self.compileItem() # ) elif (tokenType == "INT_CONST"): self.writer.writePush("CONST", value) elif (tokenType == "STRING_CONST"): self.writer.writePush("CONST", len(value)) self.writer.writeCall("String.new", 1) for i in range(len(value)): self.writer.writePop("TEMP", 1) self.writer.writePush("TEMP", 1) self.writer.writePush("TEMP", 1) self.writer.writePush("CONST", ord(value[i])) self.writer.writeCall("String.appendChar", 2) self.writer.writePop("TEMP", 0) elif (tokenType == "KEYWORD"): if (value == "TRUE"): self.writer.writePush("CONST", 1) self.writer.writeArithmetic("NEG") elif (value == "THIS"): self.writer.writePush("POINTER", 0) elif (value in ["FALSE", "NULL"]): self.writer.writePush("CONST", 0) else: print(value) raise Exception() else: self.writer.writePush(sym.kind, sym.index) self.writeClose("term") def compileExpressionList(self): self.writeOpen("expressionList") count = 0 while (self.tokenizer.symbol != ")"): count += 1 self.compileExpression() if (self.tokenizer.symbol == ","): self.compileItem() # , self.writeClose("expressionList") return count
class CompilationEngine(): ''' Parses a stream of jack tokens recursively. ''' def __init__(self, tokenizer): self._name = tokenizer.get_filename().replace('.jack','') # tokenizer for input self._tokenizer = tokenizer # symbol table self._symbols = SymbolTable() # vm output fiole self._writer = VMWriter(self._name + '.vm') # Input should be a tokenized .jack file containing one class assert self._tokenizer.has_more_tokens() self._tokenizer.advance() self._class = None self._subroutine = None self._counter = 0 self.compile_class() self.close() def change_name(self, name): self._name = name def get_name(self, name): return self._name def get_token(self): return self._tokenizer._token def get_type(self): return self._tokenizer._type def close(self): # close the output file at the end self._writer.close() def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' # keyword - class assert self._tokenizer.keyword() == 'class' self._tokenizer.advance() # identifier - className assert self._tokenizer.identifier() self._class = self._tokenizer.identifier() self._tokenizer.advance() # sybmol - '{' assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token() self._tokenizer.advance() # classVarDec* while self._tokenizer.is_valid_class_variable(): self.compile_class_var() # subroutineBody* while self._tokenizer.is_valid_subroutine(): self.compile_subroutine() # sybmol - '}' assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token() self._tokenizer.advance() # assuming .jack file is properly formatted, there should be no more tokens assert not self._tokenizer.has_more_tokens() def compile_class_var(self): # ('static'|'field') type varName (',' varName)* ';' assert self._tokenizer.is_valid_class_variable() # keyword - 'static' or 'field' temp_kind = self._tokenizer.get_token() self._tokenizer.advance() # type - 'int' or 'char' or 'boolean' or className assert self._tokenizer.is_valid_type() temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, temp_kind) self._tokenizer.advance() # recursively check for (',' varName)* structure while self._tokenizer.symbol() == ',': self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, temp_kind) # symbol - ',' or ';' self._tokenizer.advance() # next token should be a ';' assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token() self._tokenizer.advance() def compile_subroutine(self): # ('constructor'|'method'|'function') ('void'| type) subroutineName '(' parameterList ')' subroutineBody assert self._tokenizer.is_valid_subroutine() self._symbols.start_subroutine() # keyword - constructor or method or function self._subroutine = self._tokenizer.get_token() if self._subroutine == 'method': # in the case of method, add 'this' to symbol table self._symbols.define('this', self._class, 'argument') self._tokenizer.advance() # keyword - type or void assert self._tokenizer.is_valid_subroutine_type() self._tokenizer.advance() # identifier - subroutineName assert self._tokenizer.identifier() temp_name = self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() # parameterList if self._tokenizer.is_valid_type(): self.compile_parameter_list() # symbol - '(' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() temp_name = self._class + '.' + temp_name # symbol - '{' assert self._tokenizer.symbol() == '{' # subroutineBody self.compile_subroutine_body(temp_name) self._writer.write_comment('end subroutine ' + temp_name) def compile_parameter_list(self): # ( (type varName) (',' type varName)* )? # only called if non-empty parameter list assert self._tokenizer.is_valid_type() # type - int or char or boolean or className temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'argument') self._tokenizer.advance() while self._tokenizer.symbol() == ',': # symbol - ',' self._tokenizer.advance() assert self._tokenizer.is_valid_type() # type - int or char or boolean or className temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'argument') self._tokenizer.advance() # symbol - ')' assert self._tokenizer.symbol() == ')' def compile_subroutine_body(self, name): # '{' varDec* statements '}' # symbol - '{' assert self._tokenizer.symbol() == '{' self._tokenizer.advance() # varDec num_locals = 0 while self._tokenizer.keyword() == 'var': # remember that compiling variables writes NO vm code num_locals += self.compile_var() self._writer.write_function(name, num_locals) if self._subroutine == 'method': # set this, in the case of a method self._writer.write_push('argument',0) self._writer.write_pop('pointer',0) elif self._subroutine == 'constructor': # allocate object self._writer.write_object_alloc(self._symbols.var_count('field')) # statements self.compile_statements() # symbol - '{' assert self._tokenizer.symbol() == '}' self._tokenizer.advance() def compile_var(self): # 'var' type varName (',' varName)* ';' assert self._tokenizer.is_valid_variable() # keyword - 'var' self._tokenizer.advance() # type - int or char or boolean or className assert self._tokenizer.is_valid_type() temp_type = self._tokenizer.get_token() self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'local') num_locals = 1 self._tokenizer.advance() while self._tokenizer.symbol() == ',': # symbol - ',' self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() temp_name = self._tokenizer.get_token() self._symbols.define(temp_name, temp_type, 'local') num_locals += 1 self._tokenizer.advance() # symbol - ';' assert self._tokenizer.symbol() == ';' self._tokenizer.advance() return num_locals def compile_statements(self): # statement* while self._tokenizer.is_valid_statement(): if self._tokenizer.keyword() == 'let': # letStatement self.compile_let() elif self._tokenizer.keyword() == 'if': # ifStatement self.compile_if() elif self._tokenizer.keyword() == 'while': # whileStatement self.compile_while() elif self._tokenizer.keyword() == 'do': # doStatement self.compile_do() elif self._tokenizer.keyword() == 'return': # returnStatement self.compile_return() # symbol - '}' assert self._tokenizer.symbol() == '}' def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' # keyword - 'let' assert self._tokenizer.keyword() == 'let' self._tokenizer.advance() # identifier - varName assert self._tokenizer.identifier() if self._tokenizer.peek() == '=': # varName '=' expression ';' var_kind = self._symbols.kind_of(self._tokenizer.identifier()) var_index = self._symbols.index_of(self._tokenizer.identifier()) self._tokenizer.advance() # next token is '=' self._tokenizer.advance() # evaluate RHS expression, pop into variable self.compile_expression() if var_kind == 'field': self._writer.write_pop('this', var_index) else: self._writer.write_pop(var_kind, var_index) # expression ends with a ';' assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token() self._tokenizer.advance() elif self._tokenizer.peek() == '[': # varName '[' expression ']' '=' expression ';' # write base address to stack self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()), self._symbols.index_of(self._tokenizer.identifier())) self._tokenizer.advance() # symbol - '[' self._tokenizer.advance() # expression - represents array index self.compile_expression() # base address + array index self._writer.write_arithmetic('add') # symbol - '[' assert self._tokenizer.symbol() == ']' self._tokenizer.advance() # symbol - '=' assert self._tokenizer.symbol() == '=' self._tokenizer.advance() # expression self.compile_expression() # pop RHS value into temp segment self._writer.write_pop('temp', 1) # align that with array[i] self._writer.write_pop('pointer', 1) # push value of RHS expression onto stack self._writer.write_push('temp', 1) # pop value into correct array index self._writer.write_pop('that', 0) # symbol - ';' assert self._tokenizer.symbol() == ';', "expected ';' but got " + self.get_token() self._tokenizer.advance() def compile_if(self): # 'if' '(' expression ')' ('else' '{' statements '}')? # keyword - if assert self._tokenizer.keyword() == 'if' self._writer.write_comment('if statement') self._tokenizer.advance() # symbol - ( assert self._tokenizer.symbol() == '(', "expected '(' but got " + self.get_token() self._tokenizer.advance() # expression self.compile_expression() self._writer.write_arithmetic('not') label_num = str(self._counter) self._counter += 1 self._writer.write_if('ELSE'+label_num) # symbol - ) assert self._tokenizer.symbol() == ')', "expected '(' but got " + self.get_token() self._tokenizer.advance() # symbol - '{' assert self._tokenizer.symbol() == '{' self._tokenizer.advance() # statements self.compile_statements() # symbol - '}' assert self._tokenizer.symbol() == '}' self._tokenizer.advance() self._writer.write_goto('IF'+label_num) self._writer.write_label('ELSE'+label_num) # check for else if self._tokenizer.keyword() == 'else': # 'else' '{' statements '}' # keyword - 'else' self._tokenizer.advance() # symbol - '{' assert self._tokenizer.symbol() == '{', "expected '{' but got " + self.get_token() self._tokenizer.advance() # statements self.compile_statements() # symbol - '}' assert self._tokenizer.symbol() == '}', "expected '}' but got " + self.get_token() self._tokenizer.advance() self._writer.write_label('IF'+label_num) def compile_while(self): # 'while' '(' expression ')' '{' statements '}' # keyword - 'while' assert self._tokenizer.keyword() == 'while' # labels for ifgoto and goto vm commands label_num = str(self._counter) self._counter += 1 self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() self._writer.write_label('WHILE'+label_num) # expression self.compile_expression() self._writer.write_arithmetic('not') self._writer.write_if('ELSE'+label_num) # symbol - ')' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() # symbol - '{' assert self._tokenizer.symbol() == '{' self._tokenizer.advance() # statements self.compile_statements() self._writer.write_goto('WHILE'+label_num) self._writer.write_label('ELSE'+label_num) # symbol - '}' assert self._tokenizer.symbol() == '}' self._tokenizer.advance() def compile_do(self): # 'do' subroutineCall ';' assert self._tokenizer.keyword() == 'do' # keyword - 'do' self._tokenizer.advance() # identifier - subroutineCall assert self._tokenizer.identifier() # outer subroutine must be void function self.compile_subroutine_call() # symbol - ';' assert self._tokenizer.symbol() == ';' # discard void function default return value self._writer.write_pop('temp',0) self._tokenizer.advance() def compile_return(self): # 'return' expression? ';' # keyword - 'return' assert self._tokenizer.keyword() == 'return' self._writer.write_comment('return statement') self._tokenizer.advance() # expression? if self._tokenizer.symbol() == ';': # symbol - ';' (void function) self._writer.write_push('constant', 0) self._tokenizer.advance() else: # expression (not void) self.compile_expression() # symbol - ';' assert self._tokenizer.symbol() == ';' self._tokenizer.advance() self._writer.write_return() def compile_expression(self): # term (op term)* # term self.compile_term() # check for op while self._tokenizer.is_valid_operator(): # op temp_op = self._tokenizer.symbol() self._tokenizer.advance() # term self.compile_term() # write operator vm command, postfix order self._writer.write_operator(temp_op) def compile_term(self): # integerConstant | stringConstant | keywordConstant | varName | # varName '[' expression']' | subroutineCall | '(' expression ')' | unaryOp term if self._tokenizer.int_value() is not None: # integerConstant self._writer.write_push('constant', self._tokenizer.int_value()) self._tokenizer.advance() elif self._tokenizer.string_value() is not None: # stringConstant self._writer.write_string_constant(self._tokenizer.string_value()) self._tokenizer.advance() elif self._tokenizer.keyword() is not None: # keywordConstant self._writer.write_keyword_constant(self._tokenizer.keyword()) self._tokenizer.advance() elif self._tokenizer.symbol() == '(': # '(' expression ')' self._tokenizer.advance() self.compile_expression() assert self._tokenizer.symbol() == ')' self._tokenizer.advance() elif self._tokenizer.is_valid_unary(): # unaryOp term temp_op = self._tokenizer.symbol() self._tokenizer.advance() # term self.compile_term() # write operator vm command, postfix order self._writer.write_unary(temp_op) elif self._tokenizer.identifier() and self._tokenizer.peek() == '[': # varName '[' expression']' # process array name, push associated value onto stack self._writer.write_push(self._symbols.kind_of(self._tokenizer.identifier()), self._symbols.index_of(self._tokenizer.identifier())) self._tokenizer.advance() # process [ symbol self._tokenizer.advance() # expects expression, value is pushed onto the stack self.compile_expression() # setup pointer to array element self._writer.write_operator('+') self._writer.write_pop('pointer', 1) # push array value onto stack self._writer.write_push('that', 0) # expects closing square bracket assert self._tokenizer.symbol() == ']' self._tokenizer.advance() elif self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.']: # subroutineCall self.compile_subroutine_call() elif self._symbols.exists(self._tokenizer.identifier()): # varName var_name = self._tokenizer.identifier() var_kind = self._symbols.kind_of(var_name) var_index = self._symbols.index_of(var_name) if var_kind == 'field': # push field var onto stack self._writer.write_push('this', var_index) else: self._writer.write_push(var_kind, var_index) self._tokenizer.advance() else: assert False, "unknown token: " + self.get_token() + " with type " + self.get_type() def compile_subroutine_call(self): # subroutineName '(' expressionList ')'| (className | varName) '.' subroutineName '(' expressionList ')' assert self._tokenizer.identifier() and self._tokenizer.peek() in ['(','.'] if self._tokenizer.identifier() and self._tokenizer.peek() == '(': # subroutineName '(' expressionList ')' # method (in current class) temp_name = self._class + '.' + self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' self._tokenizer.advance() # push this onto the stack self._writer.write_push('pointer',0) temp_nargs = 1 # expressionList temp_nargs += self.compile_expression_list() self._writer.write_call(temp_name, temp_nargs) # symbol - ')' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() elif self._symbols.exists(self._tokenizer.identifier()) and self._tokenizer.peek() == '.': # varName '.' subroutineName '(' expressionList ')' # varName (object) temp_name = self._tokenizer.identifier() # push object address onto stack, this is an implicit argument if self._symbols.kind_of(temp_name) == 'field': self._writer.write_push('this', self._symbols.index_of(temp_name)) else: self._writer.write_push(self._symbols.kind_of(temp_name), self._symbols.index_of(temp_name)) # change name to class name temp_name = self._symbols.type_of(temp_name) temp_nargs = 1 self._tokenizer.advance() # symbol - '.' temp_name += self._tokenizer.get_token() self._tokenizer.advance() # subroutineName assert self._tokenizer.identifier() temp_name += self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() # expressionList temp_nargs += self.compile_expression_list() self._writer.write_call(temp_name, temp_nargs) # symbol - '(' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() elif self._tokenizer.identifier() and self._tokenizer.peek() == '.': # className . subroutineName '(' expressionList ')' # className temp_name = self._tokenizer.identifier() self._tokenizer.advance() # symbol - '.' temp_name += self._tokenizer.get_token() self._tokenizer.advance() # subroutineName assert self._tokenizer.identifier(), print(self._tokenizer._tokens) temp_name += self._tokenizer.identifier() self._tokenizer.advance() # symbol - '(' assert self._tokenizer.symbol() == '(' self._tokenizer.advance() # expressionList temp_nargs = self.compile_expression_list() self._writer.write_call(temp_name, temp_nargs) # symbol - ')' assert self._tokenizer.symbol() == ')' self._tokenizer.advance() def compile_expression_list(self): # (expression ( ',' expression)* )? temp_nargs = 0 while self._tokenizer.symbol() != ')': self.compile_expression() temp_nargs += 1 if self._tokenizer.symbol() == ',': # there is another expression in the list self._tokenizer.advance() return temp_nargs