def __processFile__(self, filePath): ''' processes a single file, first feeding the file to JackTokenizer to generate a list of tokens (output as T.xml files for debugging use) and that token list is fed through CompilationEngine to generate a final result list of XML tokens which is output into an .xml file. ''' #Phase 1 Tokenize/Analyze tokenizer = JackTokenizer(filePath) print(filePath) xmlTokenList = ["<tokens>"] taggedTokenList = [("listStart", "tokens", xmlTokenList[0])] token = tokenizer.advance() while token: taggedToken = self.__wrapTokenInXML__(token) taggedTokenList += [taggedToken] xmlTokenList += [taggedToken[TT_XML]] token = tokenizer.advance() xmlTokenList += ["</tokens>"] length = len(xmlTokenList) taggedTokenList += [("listEnd", "tokens", xmlTokenList[length - 1])] Tfilename = str(filePath.parent) + '/' + filePath.stem + "T.xml" self.__output__(Tfilename, xmlTokenList) #Phase 2 Compile/Translate compiler = CompilationEngine(taggedTokenList) compiledXMLList = compiler.compileTokens() Cfilename = str(filePath.parent) + '/' + filePath.stem + ".xml" self.__output__(Cfilename, compiledXMLList)
def __processFile__(self, filePath): ''' processes a single file, first feeding the file to JackTokenizer to generate a list of tokens (output as T.xml files for debugging use) and that token list is fed through CompilationEngine to generate a final result list of XML tokens which is output into an .xml file. ''' #TODO make it work # create opening token tag for tokenizing lines of .jack tokens = ["<tokens>"] tokenizer = JackTokenizer(filePath) line = tokenizer.advance() # tokenize each line of .jack while line: tokens += [self.__wrapTokenInXML__(line)] line = tokenizer.advance() tokens += ["</tokens>"] # 2. create a list for compiled tokens to go into, create compEngine instance # compile the tokens compiledTokens = [] compEngine = CompilationEngine(tokens) compiledTokens += compEngine.compileTokens() # create the filepath names for writing the tokens and full blown xml xml_T_FilePath = Path(filePath.parent / (filePath.stem + 'T.xml')) finalTokenPath = Path(filePath.parent / (filePath.stem + '.xml')) # write out the raw tokens self.__output__(xml_T_FilePath, tokens) self.__output__(finalTokenPath, compiledTokens)
def compileParameterList(rfile, wfile): wfile.write('<parameterList>\n') token = JackTokenizer.advance(rfile) while token != ')': JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) wfile.write('</parameterList>\n')
def compileIf(rfile, wfile): wfile.write('<ifStatement>\n') wfile.write('<keyword> if </keyword>\n') #(expression) token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) compileExpression(rfile, wfile) wfile.write('<symbol> ) </symbol>\n') #{statements} token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) compileStatements(rfile, wfile) wfile.write('<symbol> } </symbol>\n') #(else {statements})? token = JackTokenizer.advance(rfile) if token == 'else': JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) compileStatements(rfile, wfile) wfile.write('<symbol> } </symbol>\n') else: lennum = -len(token) rfile.seek(lennum, 1) wfile.write('</ifStatement>\n')
def compileClassVarDec(rfile, wfile, tokenAtt): wfile.write('<classVarDec>\n<keyword> ' + tokenAtt + ' </keyword>\n') token = JackTokenizer.advance(rfile) while token != ';': JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) wfile.write('</classVarDec>\n')
def compileVarDec(rfile, wfile): wfile.write('<varDec>\n<keyword> var </keyword>\n') token = JackTokenizer.advance(rfile) while token != ';': JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) wfile.write('</varDec>\n')
def compileExpression(rfile, wfile): wfile.write('<expression>\n') compileTerm(rfile, wfile) token = JackTokenizer.advance(rfile) while not (token in (')', ']', ';', ',')): JackTokenizer.writeToken(wfile, token) compileTerm(rfile, wfile) token = JackTokenizer.advance(rfile) wfile.write('</expression>\n')
def compileSubroutine(rfile, wfile, tokenAtt): wfile.write('<subroutineDec>\n<keyword> ' + tokenAtt + ' </keyword>\n') #(void|type) subroutineName (parameterList) token = JackTokenizer.advance(rfile) while token != '(': JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) wfile.write('<symbol> ( </symbol>\n') compileParameterList(rfile, wfile) wfile.write('<symbol> ) </symbol>\n') #subroutinBody compileSubroutineBody(rfile, wfile) wfile.write('</subroutineDec>\n')
def compileExpressionList(rfile, wfile): wfile.write('<expressionList>\n') token = JackTokenizer.advance(rfile) if token != ')': lennum = -len(token) rfile.seek(lennum, 1) while token != ')': compileExpression(rfile, wfile) rfile.seek(-1, 1) token = JackTokenizer.advance(rfile) if token == ',': wfile.write('<symbol> , </symbol>\n') wfile.write('</expressionList>\n')
def compileWhile(rfile, wfile): wfile.write('<whileStatement>\n') wfile.write('<keyword> while </keyword>\n') #(expression) token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) compileExpression(rfile, wfile) wfile.write('<symbol> ) </symbol>\n') #{statements} token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) compileStatements(rfile, wfile) wfile.write('<symbol> } </symbol>\n') wfile.write('</whileStatement>\n')
def compileSubroutineBody(rfile, wfile): wfile.write('<subroutineBody>\n') #{varDec* statements} token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) while token == 'var': compileVarDec(rfile, wfile) token = JackTokenizer.advance(rfile) lennum = -len(token) rfile.seek(lennum, 1) compileStatements(rfile, wfile) wfile.write('<symbol> } </symbol>\n') wfile.write('</subroutineBody>\n')
def subroutinCall(rfile, wfile): token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) temp = rfile.read(1) if temp == '.': wfile.write('<symbol> . </symbol>\n') token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) rfile.read(1) wfile.write('<symbol> ( </symbol>\n') compileExpressionList(rfile, wfile) wfile.write('<symbol> ) </symbol>\n') elif temp == '(': wfile.write('<symbol> ( </symbol>\n') compileExpressionList(rfile, wfile) wfile.write('<symbol> ) </symbol>\n')
def compileTerm(rfile, wfile): wfile.write('<term>\n') token = JackTokenizer.advance(rfile) tType = JackTokenizer.tokenType(token) if tType == 'IDENTIFIER': temp = rfile.read(1) if temp == '.': lennum = -len(token) - 1 rfile.seek(lennum, 1) subroutinCall(rfile, wfile) elif temp == '[': JackTokenizer.writeToken(wfile, token) wfile.write('<symbol> [ </symbol>\n') compileExpression(rfile, wfile) wfile.write('<symbol> ] </symbol>\n') elif temp == '(': JackTokenizer.writeToken(wfile, token) wfile.write('<symbol> ( </symbol>\n') compileExpression(rfile, wfile) wfile.write('<symbol> ) </symbol>\n') else: rfile.seek(-1, 1) JackTokenizer.writeToken(wfile, token) elif token in ('-', '~'): JackTokenizer.writeToken(wfile, token) compileTerm(rfile, wfile) elif token == '(': wfile.write('<symbol> ( </symbol>\n') compileExpression(rfile, wfile) wfile.write('<symbol> ) </symbol>\n') else: JackTokenizer.writeToken(wfile, token) wfile.write('</term>\n')
def compileDo(rfile, wfile): wfile.write('<doStatement>\n') wfile.write('<keyword> do </keyword>\n') subroutinCall(rfile, wfile) token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) wfile.write('</doStatement>\n')
def compileLet(rfile, wfile): wfile.write('<letStatement>\n') wfile.write('<keyword> let </keyword>\n') token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) temp = JackTokenizer.advance(rfile) if temp == '[': wfile.write('<symbol> [ </symbol>\n') compileExpression(rfile, wfile) wfile.write('<symbol> ] </symbol>\n') token = JackTokenizer.advance(rfile) JackTokenizer.writeToken(wfile, token) elif temp == '=': wfile.write('<symbol> = </symbol>\n') compileExpression(rfile, wfile) wfile.write('<symbol> ; </symbol>\n') wfile.write('</letStatement>\n')
def compileClass(rfile, wfile): wfile.write('<class>\n') token = JackTokenizer.advance(rfile) while token != '{': JackTokenizer.writeToken(wfile, token) token = JackTokenizer.advance(rfile) wfile.write('<symbol> { </symbol>\n') #classVarDec* token = JackTokenizer.advance(rfile) while token in ('static', 'field'): compileClassVarDec(rfile, wfile, token) token = JackTokenizer.advance(rfile) #subroutineDec* while token in ('constructor', 'function', 'method'): compileSubroutine(rfile, wfile, token) token = JackTokenizer.advance(rfile) wfile.write('<symbol> } </symbol>\n') wfile.write('</class>\n')
def compileStatements(rfile, wfile): wfile.write('<statements>\n') token = JackTokenizer.advance(rfile) while token != '}': if token == 'let': compileLet(rfile, wfile) elif token == 'if': compileIf(rfile, wfile) elif token == 'while': compileWhile(rfile, wfile) elif token == 'do': compileDo(rfile, wfile) elif token == 'return': compileReturn(rfile, wfile) else: print('Error!' + token) exit() token = JackTokenizer.advance(rfile) wfile.write('</statements>\n')
def compileReturn(rfile, wfile): wfile.write('<returnStatement>\n') wfile.write('<keyword> return </keyword>\n') #expression? token = JackTokenizer.advance(rfile) if token == ';': JackTokenizer.writeToken(wfile, token) else: lennum = -len(token) rfile.seek(lennum, 1) compileExpression(rfile, wfile) wfile.write('<symbol> ; </symbol>\n') wfile.write('</returnStatement>\n')
def main(): """Drives the Jack-to-VM translation process""" file_name = sys.argv[1] tokenizers = [] output_files = [] abs_path = os.path.abspath(file_name) if '.jack' in file_name and file_name[-5:] == '.jack': tokenizer = JackTokenizer(abs_path) tokenizers.append(tokenizer) output_path = os.path.splitext(abs_path)[0] + '.xml' output_files.append(output_path) else: for walk_obj in os.walk(abs_path): for jack_file in walk_obj[2]: if '.jack' in jack_file and jack_file[-5:] == '.jack': tokenizer = JackTokenizer(abs_path + '/' + jack_file) tokenizers.append(tokenizer) output_path = abs_path + '/' + jack_file[:-5] + '.xml' output_files.append(output_path) for tokenizer in tokenizers: while tokenizer.has_more_tokens(): tokenizer.advance() token_type = tokenizer.token_type() if token_type == 'KEYWORD': keyword = tokenizer.keyword() elif token_type == 'SYMBOL': symbol = tokenizer.symbol() elif token_type == 'IDENTIFIER': identifier = tokenizer.identifier() elif token_type == 'INT_CONST': int_val = tokenizer.int_val() elif token_type == 'STRING_CONST': string_val = tokenizer.string_val()
import CompilationEngine as ce debug = 0 if debug == 1: jack_file_name = 'ArrayTest/Main.jack' else: jack_file_name = 'ArrayTest/Main.jack' token_xml_file_name = jack_file_name.replace('.jack', 'T_gen.xml') # part 1, tokenize jt.Constructor(jack_file_name) token_xml = '<tokens>\n' while jt.hasMoreTokens(): jt.advance() token_type = jt.tokenType() token = jt.func_list[token_type]() token_xml = token_xml + token token_xml = token_xml + '</tokens>\n' elements = token_xml.split('\n') if debug: # print(token_xml) pass else: of = open(token_xml_file_name, 'w+') of.write(token_xml) of.close()
class CompilationEngine: ############### # CONSTRUCTOR # ############### def __init__(self, in_filename, in_file, out_xml, out_vm): """ Creates a new compilation engine with the given input and output. The next routine called must be compileClass(). :param in_file: Open source Jack file. :param out_xml: Open XML file. :param out_vm: Open VM file. """ self.__in_filename = in_filename self.__in_file, self.__out_xml = in_file, out_xml self.__tokenizer = JackTokenizer(in_file) self.__symbolTable = SymbolTable() self.__vmWriter = VMWriter(in_filename, out_vm) self.__stack = list() self.__tokenizer.advance() self.__resetUniqueLabels() ################### # PRIVATE METHODS # ################### def __resetUniqueLabels(self): self.__unique_id_if = 0 self.__unique_id_while = 0 def __uniqueWhileLabels(self): """ Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to prevent collisions with other labels carrying the same name. Example: while_exp, while_end = __uniqueWhileLabels() --> while_exp = "WHILE_EXP123" while_end = "WHILE_END123" """ unique_labels = [] for label in [WHILE_EXP, WHILE_END]: unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER, self.__unique_id_while)) self.__unique_id_while += 1 return unique_labels def __uniqueIfLabels(self): """ Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to prevent collisions with other labels carrying the same name. Example: if_true, if_false, if_end = __uniqueIfLabels() --> if_true = "IF_TRUE123" if_false = "IF_FALSE123" if_end = "IF_END123" """ unique_labels = [] for label in [IF_TRUE, IF_FALSE, IF_END]: unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER, self.__unique_id_if)) self.__unique_id_if += 1 return unique_labels def __writeToken(self, token, token_type): """ Writes the given token as an xml tag to the output. :param token: :param token_type: :return: """ tag = self.__getIndentedTag("<{0}>{1}{2}{1}</{0}>\n".format( token_type, XML_DELIM_TERMINAL, token)) self.__out_xml.write(tag) def __writeTokenAndAdvance(self, token, token_type): """ Writes the given token as an xml tag to the output and extracts the next token from the code. :param token: token tag value :param token_type: token tag type """ # Build XML tag self.__writeToken(token, token_type) self.__tokenizer.advance() def __getIndentedTag(self, tag): """ Return the given tag with trailing tabs according to current indentation level. :param tag: tag to indent :return: tag indented with trailing tabs. """ return XML_INDENT_CHAR * len(self.__stack) + tag def __openTag(self, tagName): """ Open an XML tag with the given name. All following tags will be written as inner tags until __closeTag() is called. :param tagName: name of the tag to open """ tag = self.__getIndentedTag("<{}>\n".format(tagName)) self.__out_xml.write(tag) self.__stack.append(tagName) def __closeTag(self): """ Close the current open XML tag. All following tags will be written as outer tags in the previous indentation level. """ tagName = self.__stack.pop() tag = self.__getIndentedTag("</{}>\n".format(tagName)) self.__out_xml.write(tag) def __compileKeyWord(self): """ Compile a keyword token """ keyword = self.__tokenizer.keyWord() self.__writeTokenAndAdvance(keyword, TOKEN_TYPE_KEYWORD) return keyword def __compileSymbol(self): """ Compile a symbol token """ symbol = self.__tokenizer.symbol() self.__writeTokenAndAdvance(symbol, TOKEN_TYPE_SYMBOL) return symbol def __compileIdentifier(self, category, status, kind=KIND_NONE, index=INDEX_NONE): """ Compile an identifier token """ info = "{} {}".format(category, status) if kind != KIND_NONE: info += " " + KIND_2_SEGMENT[kind] if index != INDEX_NONE: info += " " + str(index) info = "[{}] ".format(info) identifier = self.__tokenizer.identifier() self.__writeTokenAndAdvance(info + identifier, TOKEN_TYPE_IDENTIFIER) return identifier def __compileIntVal(self): """ Compile an intVal token """ intval = self.__tokenizer.intVal() self.__writeTokenAndAdvance(intval, TOKEN_TYPE_INTEGER) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, intval) return intval def __compileStringVal(self): """ Compile a stringVal token """ string = self.__tokenizer.stringVal() self.__writeTokenAndAdvance(string, TOKEN_TYPE_STRING) corrected = self.__correctString(string) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, len(corrected)) self.__vmWriter.writeCall(OS_STRING_NEW, 1) for char in corrected: self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, ord(char)) self.__vmWriter.writeCall(OS_STRING_APPEND_CHAR, 2) def __compileClassName(self, status): """ Compiles a variable name. """ return self.__compileIdentifier(CATEGORY_CLASS, status) def __compileSubroutineName(self, status): """ Compiles a variable name. """ return self.__compileIdentifier(CATEGORY_SUBROUTINE, status) def __compileSubroutineCall(self): """ Compiles a subroutine call. Syntax: ( className | varName) '.' subroutineName '(' expressionList ')' | subroutineName '(' expressionList ')' """ # Compile XML callName = "" exp_count = 0 if self.__tokenizer.lookahead() == RE_DOT: # className | varName # extract var\class name callName = self.__tokenizer.peek() # className or varName? kind = self.__symbolTable.kindOf(callName) if (kind != KIND_NONE): # varName # Use class name instead of object name varName = callName callName = self.__symbolTable.typeOf(callName) # Push variable (this) and call class method index = self.__symbolTable.indexOf(varName) segment = self.__symbolTable.segmentOf(varName) self.__vmWriter.writePush(segment, index) # Include self as argument 0 exp_count += 1 self.__compileIdentifier(kind, STATUS_USE, kind, index) else: # className self.__compileIdentifier(CATEGORY_CLASS, STATUS_USE) callName += self.__compileSymbol() # '.' else: # subroutineName # Subroutine -> className.Subroutine self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0) callName += self.__className + FUNC_NAME_DELIMITER exp_count += 1 callName += self.__compileSubroutineName(STATUS_USE) self.__compileSymbol() # '(' exp_count += self.CompileExpressionList() # expressionList self.__compileSymbol() # ')' # Compile VM self.__vmWriter.writeCall(callName, exp_count) def __compileVarName(self, status): """ Compiles a variable name. """ name = self.__tokenizer.peek() index = INDEX_NONE if status != STATUS_DEFINE: index = self.__symbolTable.indexOf(name) varName = self.__compileIdentifier(CATEGORY_VAR, status, KIND_VAR, index) return varName def __compileType(self): """ Compiles a type. Syntax: 'int' | 'char' | 'boolean' | className """ # 'int' | 'char' | 'boolean' if self.__tokenizer.peek() in {RE_INT, RE_CHAR, RE_BOOLEAN}: type = self.__compileKeyWord() # className else: type = self.__compileClassName(STATUS_USE) return type def __compileSubroutineBody(self, funcType, name): """ Compiles a subroutine body. Syntax: '{' varDec* statements '}' """ self.__openTag('subroutineBody') # <subroutineBody> self.__compileSymbol() # '{' # varDec* while self.__tokenizer.peek() == RE_VAR: self.compileVarDec() # varDec* vars = self.__symbolTable.varCount(KIND_VAR) self.__vmWriter.writeFunction(name, vars) if funcType == RE_METHOD: # Hold self at pointer self.__vmWriter.writePush(VM_SEGMENT_ARGUMENT, 0) self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0) if funcType == RE_CONSTRUCTOR: # Allocate memory for all fields fields = self.__symbolTable.varCount(KIND_FIELD) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, fields) self.__vmWriter.writeCall(OS_MEMORY_ALLOC, 1) # Hold allocated memory at pointer self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0) self.compileStatements() # statements self.__compileSymbol() # '}' self.__closeTag() # </subroutineBody> return vars ################## # PUBLIC METHODS # ################## def compileClass(self): """ Compiles a complete class. Syntax: 'class' className '{' classVarDec* subroutineDec* '}' """ self.__openTag('class') # <class> self.__compileKeyWord() # 'class' className = self.__compileClassName( # className STATUS_DEFINE) self.__className = className self.__compileSymbol() # '{' # classVarDec* while self.__tokenizer.peek() in {RE_STATIC, RE_FIELD}: self.CompileClassVarDec() # subroutineDec* while self.__tokenizer.peek() in { RE_CONSTRUCTOR, RE_FUNCTION, RE_METHOD }: self.CompileSubroutine() self.__compileSymbol() # '}' self.__closeTag() # </class> def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. Syntax: ('static' | 'field') type varName (',' varName)* ';' """ self.__openTag('classVarDec') # <classVarDec> kind = self.__compileKeyWord() # ('static' | 'field') type = self.__compileType() # type moreVars = True while moreVars: # (',' varName)* name = self.__compileVarName( # varName STATUS_DEFINE) self.__symbolTable.define(name, type, kind) if self.__tokenizer.peek() != RE_COMMA: moreVars = False else: self.__compileSymbol() # ',' self.__compileSymbol() # ';' self.__closeTag() # </classVarDec> def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. Syntax: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody """ # Start subroutine in symbol table self.__resetUniqueLabels() self.__symbolTable.startSubroutine() # Compile XML self.__openTag('subroutineDec') # <subroutineDec> funcType = self.__compileKeyWord() # ('constructor' | # 'function' | 'method') if funcType in {RE_METHOD}: # +1 var count for this method (+1 for self) self.__symbolTable.define(VM_SELF, self.__className, KIND_ARG) if self.__tokenizer.peek() == RE_VOID: type = self.__compileKeyWord() # 'void' else: type = self.__compileType() # type subName = self.__compileSubroutineName( # soubroutineName STATUS_DEFINE) name = self.__className + FUNC_NAME_DELIMITER + subName self.__compileSymbol() # '(' self.compileParameterList() # parameterList self.__compileSymbol() # ')' self.__compileSubroutineBody(funcType, name) # subroutineBody self.__closeTag() # </subroutineDec> def compileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing "()". Syntax: ( (type varName) (',' type varName)*)? """ parameters = 0 # no parameters? self.__openTag('parameterList') # <parameterList> if self.__tokenizer.peek() != RE_BRACKETS_RIGHT: moreVars = True while moreVars: parameters += 1 # yes parameters! type = self.__compileType() # type name = self.__compileVarName( # varName STATUS_DEFINE) self.__symbolTable.define(name, type, KIND_ARG) if self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' else: moreVars = False self.__closeTag() # </parametersList> return parameters def compileVarDec(self): """ Compiles a var declaration. Syntax: 'var' type varName (',' varName)* ';' """ self.__openTag('varDec') # <varDec> moreVars = True self.__compileKeyWord() # 'var' type = self.__compileType() # type while moreVars: name = self.__tokenizer.peek() # varName self.__symbolTable.define(name, type, KIND_VAR) self.__compileVarName(STATUS_DEFINE) if self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' else: moreVars = False self.__compileSymbol() # ';' self.__closeTag() # </varDec> def compileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}". Syntax: statement* where statement is in: letStatement | ifStatement | whileStatement | doStatement | returnStatement """ self.__openTag('statements') # <statements> statement = self.__tokenizer.peek() while statement in { RE_LET, RE_IF, RE_WHILE, RE_DO, RE_RETURN_NOTHING, RE_RETURN_SOMETHING }: if statement == RE_LET: self.compileLet() elif statement == RE_IF: self.compileIf() elif statement == RE_WHILE: self.compileWhile() elif statement == RE_DO: self.compileDo() elif statement == RE_RETURN_NOTHING: self.compileReturnNothing() elif statement == RE_RETURN_SOMETHING: self.compileReturnSomething() statement = self.__tokenizer.peek() self.__closeTag() # </statements> def compileDo(self): """ Compiles a do statement. Syntax: 'do' subroutineCall ';' """ self.__openTag('doStatement') # <doStatement> self.__compileKeyWord() # 'do' self.__compileSubroutineCall() # subroutineCall self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0) self.__compileSymbol() # ';' self.__closeTag() # </doStatement> def compileLet(self): """ Compiles a let statement. Syntax: 'let' varName ('[' expression ']')? '=' expression ';' """ isArray = False self.__openTag('letStatement') # <letStatement> self.__compileKeyWord() # 'let' varName = self.__tokenizer.peek() index = self.__symbolTable.indexOf(varName) segment = self.__symbolTable.segmentOf(varName) self.__compileVarName(STATUS_USE) # varName if self.__tokenizer.peek() == RE_BRACKETS_SQUARE_LEFT: isArray = True self.__compileSymbol() # '[' self.CompileExpression() # expression self.__compileSymbol() # ']' # Add the offset to the variable address self.__vmWriter.writePush(segment, index) self.__vmWriter.writeArithmetic(RE_PLUS, True) # Address of array element is at stack top self.__compileSymbol() # '=' self.CompileExpression() # expression self.__compileSymbol() # ';' self.__closeTag() # </letStatement> if isArray: # Pop rh-expression to temp self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0) # Get address of array element self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1) # Push rh-expression to stack self.__vmWriter.writePush(VM_SEGMENT_TEMP, 0) # Pop rh-expression to address of element self.__vmWriter.writePop(VM_SEGMENT_THAT, 0) else: # Compile only if the varName was defined # (unlike class name of subroutine name) # if segment != KIND_NONE: # varName was defined index = self.__symbolTable.indexOf(varName) self.__vmWriter.writePop(segment, index) def compileWhile(self): """ Compiles a while statement. Syntax: 'while' '(' expression ')' '{' statements '}' """ LABEL_EXP, LABEL_END = self.__uniqueWhileLabels() self.__openTag('whileStatement') # <whileStatement> self.__compileKeyWord() # 'while' self.__compileSymbol() # '(' self.__vmWriter.writeLabel( # label WHILE_EXP LABEL_EXP) self.CompileExpression() # expression # Negate the expression # (jump out of while if *NOT* expression) self.__vmWriter.writeArithmetic(RE_TILDA, False) self.__compileSymbol() # ')' self.__vmWriter.writeIf(LABEL_END) # if-goto WHILE_END self.__compileSymbol() # '{' self.compileStatements() # statements self.__compileSymbol() # '}' self.__vmWriter.writeGoto(LABEL_EXP) # goto WHILE_EXP self.__vmWriter.writeLabel(LABEL_END) # lable WHILE_END self.__closeTag() # </whileStatement> def compileReturnNothing(self): """ Compiles a 'return;' statement. Syntax: 'return;' """ # Compile XML self.__openTag('returnStatement') # <returnStatement> self.__writeToken( 'return', # 'return' TOKEN_TYPE_KEYWORD) self.__writeTokenAndAdvance( ';', # ';' TOKEN_TYPE_SYMBOL) self.__vmWriter.writeReturn(True) self.__closeTag() # </returnStatement> def compileReturnSomething(self): """ Compiles a return statement. Syntax: 'return' expression? ';' """ # Compile XML self.__openTag('returnStatement') # <returnStatement> self.__writeTokenAndAdvance( 'return', # 'return' TOKEN_TYPE_KEYWORD) self.CompileExpression() # expression self.__compileSymbol() # ';' self.__vmWriter.writeReturn() self.__closeTag() # </returnStatement> def compileIf(self): """ Compiles an if statement, possibly with a trailing else clause. Syntax: 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? """ LABEL_TRUE, LABEL_FALSE, LABEL_END = self.__uniqueIfLabels() self.__openTag('ifStatement') # <ifStatement> self.__compileKeyWord() # 'if' self.__compileSymbol() # '(' # VM Code for computing ~(cond) self.CompileExpression() # expression self.__compileSymbol() # ')' self.__vmWriter.writeIf(LABEL_TRUE) # if-goto LABEL_TRUE self.__vmWriter.writeGoto(LABEL_FALSE) # goto LABEL_FALSE self.__vmWriter.writeLabel(LABEL_TRUE) # label LABEL_TRUE self.__compileSymbol() # '{' # VM Code for executing TRUE self.compileStatements() # statements self.__compileSymbol() # '}' if self.__tokenizer.peek() == RE_ELSE: # self.__vmWriter.writeGoto(LABEL_END) # goto LABEL_END self.__vmWriter.writeLabel( # label LABEL_FALSE LABEL_FALSE) self.__compileKeyWord() # 'else' self.__compileSymbol() # '{' # VM Code for executing ELSE self.compileStatements() # statements self.__compileSymbol() # '}' self.__vmWriter.writeLabel( # label END LABEL_END) else: self.__vmWriter.writeLabel( # label FALSE LABEL_FALSE) self.__closeTag() # </ifStatement> def CompileExpression(self): """ Compiles an expression. Syntax: term (op term)* """ self.__openTag('expression') # <expression> self.CompileTerm() # term while self.__tokenizer.peek() in { RE_PLUS, RE_BAR, RE_ASTERISK, RE_SLASH, RE_AMPERSAND, RE_VBAR, RE_LT, RE_GT, RE_EQ }: symbol = self.__compileSymbol() # op self.CompileTerm() # term self.__vmWriter.writeSymbol(symbol) self.__closeTag() # </expression> def __correctString(self, string): """ Convert escape characters in a string to valid chars :param string: string to correct :return: corrected strings with escaped characters corrected """ correct = string.replace('\t', '\\t') correct = correct.replace('\n', '\\n') correct = correct.replace('\r', '\\r') return correct def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of "[", "(", or "." suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. Syntax: integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term """ self.__openTag('term') # <term> lookahead = self.__tokenizer.lookahead() if self.__tokenizer.peek() == RE_BRACKETS_LEFT: self.__compileSymbol() # '(' self.CompileExpression() # expression self.__compileSymbol() # ')' elif self.__tokenizer.peek() in {RE_TILDA, RE_BAR}: symbol = self.__compileSymbol() # unaryOp self.CompileTerm() # term self.__vmWriter.writeArithmetic(symbol, False) elif lookahead == RE_BRACKETS_SQUARE_LEFT: varName = self.__tokenizer.peek() self.__compileVarName(STATUS_USE) # varName self.__compileSymbol() # '[' self.CompileExpression() # expression self.__compileSymbol() # ']' # Compile array indexing kind = self.__symbolTable.kindOf(varName) index = self.__symbolTable.indexOf(varName) segment = KIND_2_SEGMENT[kind] self.__vmWriter.writePush(segment, index) self.__vmWriter.writeArithmetic(RE_PLUS, True) self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1) self.__vmWriter.writePush(VM_SEGMENT_THAT, 0) elif lookahead in {RE_BRACKETS_LEFT, RE_DOT}: self.__compileSubroutineCall() # subroutineCall | # (varName | className) '.' subroutineCall else: if self.__tokenizer.tokenType() == TOKEN_TYPE_INTEGER: self.__compileIntVal() # integerConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_STRING: self.__compileStringVal() # stringConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_KEYWORD: # true | false | null | this # true | false | null - pushed to stack as constants keyword = self.__tokenizer.peek() if keyword in {RE_FALSE, RE_NULL, RE_TRUE}: self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, 0) if keyword == RE_TRUE: self.__vmWriter.writeArithmetic(RE_TILDA, False) # this - pushes pointer elif keyword == RE_THIS: self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0) self.__compileKeyWord() # keywordConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_IDENTIFIER: name = self.__tokenizer.peek() kind = self.__symbolTable.kindOf(name) index = self.__symbolTable.indexOf(name) segment = self.__symbolTable.segmentOf(name) self.__compileIdentifier(kind, STATUS_USE, kind, index) self.__vmWriter.writePush(segment, index) self.__closeTag() # </term> def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. Syntax: (expression (',' expression)* )? """ exp_count = 0 self.__openTag('expressionList') # <expressionList> if self.__tokenizer.peek() != RE_BRACKETS_RIGHT: self.CompileExpression() exp_count += 1 # expression while self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' self.CompileExpression() exp_count += 1 self.__closeTag() # </expressionList> return exp_count
class CompilationEngine: def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.xml_file = open(output_file, "w") self.space_depth = 0 # starts the process self.tokenizer.advance() self.compile_class() self.xml_file.close() def compile_class(self): """ compiles the class function :return: none """ # write <class> self.non_terminal_open(XML_CLASS) # write <keyword> class <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advances a single step to get the class name self.tokenizer.advance() # write <identifier> class_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # moves for the symbol self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compiles class variable while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \ KEY_WORDS.get(self.tokenizer.current_token) == FIELD: self.compile_class_var_dec() # compiles subroutine while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \ KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \ KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION: self.compile_sub_routine() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <class> self.non_terminal_end(XML_CLASS) def non_terminal_end(self, xml_type): """ closes a non terminal function :param xml_type: the xml type we are working with :return: none """ self.space_depth -= 1 self.write_line(self.terminal_end(xml_type)) def non_terminal_open(self, xml_type): """ an opening for a non terminal :param xml_type: the xml type :return: none """ self.write_line(self.terminal_opening(xml_type) + "\n") self.space_depth += 1 def terminal_opening(self, word): """ makes the word a starts of a function :param word: the word to make a start :return: the word as a start """ return "<" + word + ">" def terminal_end(self, word): """ makes the word a start and end :param word: the word to work with :return: the word as an end """ return "</" + word + ">\n" def write_line(self, word): """ writes the line to the file with the correct depth :param word: the word we are writing :return: none """ self.xml_file.write("\t" * self.space_depth + word) def one_liner(self, xml_type, token): """ writes the one liner function :param xml_type: the type :param token: thw token to put in the xml :return: """ self.write_line( self.terminal_opening(xml_type) + " " + token + " " + self.terminal_end(xml_type)) def compile_class_var_dec(self): """ compiles a var dec :return: none """ # write <class_var_dict> self.non_terminal_open(XML_CLASS_VAR_DEC) # write <keyword> static/field <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advances the token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # check if there are more var_names while self.tokenizer.current_token != ";": # write <symbol> , <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # closes the statement self.non_terminal_end(XML_CLASS_VAR_DEC) self.tokenizer.advance() def compile_sub_routine(self): """ compiles a single sub routine :return: none """ # writes <subroutine_dec> self.non_terminal_open(XML_SUBROUTINE_DEC) # write <keyword> function/method/const <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advances the token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # advances the token self.tokenizer.advance() # write <identifier> sub_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # compiles the parameter list self.compile_parameter_list() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compile subroutine body self.compile_subroutine_body() # closes the sub routine self.non_terminal_end(XML_SUBROUTINE_DEC) def compile_subroutine_body(self): """ the method compiles the subroutine body :return: none """ # write <sub routine> self.non_terminal_open(XML_SUBROUTINE_BODY) # opens the bracket { self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compile var dec while KEY_WORDS.get(self.tokenizer.current_token) == VAR: self.compile_var_dec() if self.tokenizer.current_token != "}": self.compile_statements() # closes the bracket self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # closes the sub routine body (write <sub routine>) self.non_terminal_end(XML_SUBROUTINE_BODY) def compile_parameter_list(self): """ compiles a parameter list :return: none """ # writes <parameter_list> self.non_terminal_open(XML_PARAMETER_LIST) self.tokenizer.advance() while self.tokenizer.current_token != ')': # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the variables name self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # closes the statement self.non_terminal_end(XML_PARAMETER_LIST) def compile_var_dec(self): """ compiles a declaration of a variable :return: none """ # writes the opening self.non_terminal_open(XML_VAR_DEC) # write <keyword> var <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # tests what to put as the type of the object self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) else: self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the variables name self.tokenizer.advance() while self.tokenizer.current_token != ';': # writes <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # gets the next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # writes <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # closes the statement self.non_terminal_end(XML_VAR_DEC) def compile_statements(self): """ the method compiles statements :return: none """ # write <statements> self.non_terminal_open(XML_STATEMENTS) # while there are more statements, deal with each one while self.tokenizer.current_token != '}': statement_type = self.tokenizer.key_word() if statement_type == LET: self.compile_let() elif statement_type == IF: self.compile_if() elif statement_type == WHILE: self.compile_while() elif statement_type == DO: self.compile_do() elif statement_type == RETURN: self.compile_return() # write <statements> self.non_terminal_end(XML_STATEMENTS) def compile_do(self): """ the method compiles a do command :return: none """ # write <do_statement> self.non_terminal_open(XML_DO_STATEMENT) # write <keyword> do <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advance to next token (subroutine call) self.tokenizer.advance() # write <identifier> name_of_func <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # compile the subroutine call self.compile_subroutine_call() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <do_statement> self.non_terminal_end(XML_DO_STATEMENT) self.tokenizer.advance() def compile_let(self): """ the method compiles a let statement :return: none """ # write <let_statement> self.non_terminal_open(XML_LET_STATEMENT) # write <keyword> let <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) # advance to next token (var_name) self.tokenizer.advance() # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) # advance to next token ('[' | '=') self.tokenizer.advance() if self.tokenizer.current_token == '[': # write <symbol> [ <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # write <symbol> ] <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> = <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <let_statement> self.non_terminal_end(XML_LET_STATEMENT) self.tokenizer.advance() return def compile_while(self): """ the method compiles a while statement :return: none """ # write <while_statement> self.non_terminal_open(XML_WHILE_STATEMENT) # write <keyword> while <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_statements() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <while_statement> self.non_terminal_end(XML_WHILE_STATEMENT) self.tokenizer.advance() return def compile_return(self): """ the method compiles a return statement :return: none """ # write <return_statement> self.non_terminal_open(XML_RETURN_STATEMENT) # write <keyword> return <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() if self.tokenizer.current_token != ';': self.compile_expression() # write <symbol> ; <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) # write <return_statement> self.non_terminal_end(XML_RETURN_STATEMENT) self.tokenizer.advance() return def compile_if(self): """ the method compiles an if statement :return: none """ # write <if_statement> self.non_terminal_open(XML_IF_STATEMENT) # write <keyword> if <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_statements() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() if self.tokenizer.current_token == 'else': # write <keyword> else <keyword> self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> { <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_statements() # write <symbol> } <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <if_statement> self.non_terminal_end(XML_IF_STATEMENT) return def compile_expression(self): """ the method compiles an expression :return: """ # write <expression> self.non_terminal_open(XML_EXPRESSION) self.compile_term() while self.tokenizer.current_token in OP_LST: # write <symbol> op <symbol> if self.tokenizer.current_token in OP_DICT: self.one_liner(XML_SYMBOL, OP_DICT.get(self.tokenizer.current_token)) else: self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() # write <expression> self.non_terminal_end(XML_EXPRESSION) return def compile_term(self): """ the method compiles a term :return: none """ # write <term> self.non_terminal_open(XML_TERM) token_type = self.tokenizer.token_type() if token_type == INT_CONST: self.one_liner(XML_INT_CONST, self.tokenizer.current_token) self.tokenizer.advance() elif token_type == STRING_CONST: # write without the "" self.one_liner(XML_STRING_CONST, self.tokenizer.current_token[1:-1]) self.tokenizer.advance() elif self.tokenizer.current_token in KEY_WORD_CONST: self.one_liner(XML_KEY_WORD, self.tokenizer.current_token) self.tokenizer.advance() elif self.tokenizer.current_token == '(': # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() elif self.tokenizer.current_token in UNARY_OP: # write <symbol> unary_op <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() # var/var[expression]/subroutine_call else: # write <identifier> var_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # if is var_name[expression] if self.tokenizer.current_token == '[': # write <symbol> [ <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression() # write <symbol> ] <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # if is a subroutine call elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.': self.compile_subroutine_call() # write <term> self.non_terminal_end(XML_TERM) return def compile_expression_list(self): """ the method compiles a list of expressions :return: none """ # write <expression_list> self.non_terminal_open(XML_EXPRESSION_LIST) # check that list is not empty if self.tokenizer.current_token != ')': # compile first expression self.compile_expression() # if there are more expressions, compile them while self.tokenizer.current_token == ',': # write <symbol> , <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # compile expression self.compile_expression() # write <expression_list> self.non_terminal_end(XML_EXPRESSION_LIST) return def compile_subroutine_call(self): """ the method compiles a subroutine call (not including the subroutine first varName :return: none """ if self.tokenizer.current_token == '.': # write <symbol> . <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() # write <identifier> sub_name <identifier> self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token) self.tokenizer.advance() # write <symbol> ( <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() self.compile_expression_list() # write <symbol> ) <symbol> self.one_liner(XML_SYMBOL, self.tokenizer.current_token) self.tokenizer.advance() return
class CompilationEngine: """ """ binaryOp = {'+', '-', '*', '/', '|', '=', '<', '>', '&'} unaryOp = {'-', '~'} keywordConstant = {'true', 'false', 'null', 'this'} # Finished def __init__(self, file): """ """ self.label_num = 0 self.tokenizer = JackTokenizer(file) self.advance() self.symbols = SymbolTable() self.vm = VMWriter() self.open_outfile(file) self.compile_class() self.close_outfile() # Finished def open_outfile(self, file): self.vm.open_outfile(file) # Finished def close_outfile(self): self.vm.close_outfile() # Finished def advance(self): return self.tokenizer.advance() # Finished def compile_class(self): """Compiles a complete class.""" self.advance() # skip 'class' keyword self.cur_class = self.tokenizer.get_token() # get class name self.advance() # skip class name self.advance() # skip '{' while self.is_class_var_dec(): self.compile_class_var_dec() while self.is_subroutine(): self.compile_subroutine() self.advance() # skip '}' # Finished def is_subroutine(self): token_type, token = self.tokenizer.peek() return token_type == KEYWORD and (token == 'constructor' or token == 'function' or token == 'method') # Finished def is_class_var_dec(self): return self.is_token(KEYWORD, 'static') or self.is_token( KEYWORD, 'field') # Finished def is_token(self, token_type, token): _token_type, _token = self.tokenizer.peek() return (token_type, token) == (_token_type, _token) # Finished def compile_class_var_dec(self): """Compiles a static declaration or a field declaration.""" kind = KIND_MAP[self.tokenizer.get_token()] # get static | filed self.advance() type = self.tokenizer.get_token() # get var type self.advance() name = self.tokenizer.get_token() # get var name self.advance() self.symbols.define(name, type, kind) while self.is_token(SYMBOL, ','): self.advance() name = self.tokenizer.get_token() # get var name self.symbols.define(name, type, kind) self.advance() # skip ';' # Finished def compile_subroutine(self): """Compiles a complete method, function, or constructor.""" token_type, token = self.tokenizer.get_token_type( ), self.tokenizer.get_token() # get subroutine type self.advance() # at return type self.advance() # at subroutine name self.cur_subroutine = self.tokenizer.get_token( ) # read subroutine name | 'new' self.symbols.start_subroutine() if token == 'method': self.symbols.define('this', self.cur_class, ARG) self.advance() self.advance() # skip '(' self.compile_parameter_list() self.advance() # skip ')' self.compile_subroutine_body(token) # Finished def compile_subroutine_body(self, token): """ """ self.advance() # skip '{' while self.is_token(KEYWORD, 'var'): self.compile_var_dec() self.compile_statements() self.advance() # read '}' # Finished def compile_parameter_list(self): """Compiles a (possibly empty) parameter list, not including the enclosing ()""" while not self.is_token(SYMBOL, ')'): type = self.tokenizer.get_token() # get parameter type self.advance() name = self.tokenizer.get_token() # get parameter name self.advance() self.symbols.define(name, type, ARG) if self.is_token(SYMBOL, ','): self.advance() # read ',' # Finished def compile_var_dec(self): """Compiles a var declaration.""" kind = KIND_MAP[self.tokenizer.get_token()] # get 'var' keyword self.advance() type = self.tokenizer.get_token() # get var type self.advance() name = self.tokenizer.get_token() # get var name self.advance() self.symbols.define(name, type, kind) while self.is_token(SYMBOL, ','): self.advance() # skip ',' name = self.tokenizer.get_token() # read var name self.symbols.define(name, type, kind) self.advance() # skip ';' # Finished def compile_statements(self): """Compiles a sequence of statements, not including the enclosing {}.""" while True: if self.is_token(KEYWORD, 'do'): self.compile_do() elif self.is_token(KEYWORD, 'let'): self.compile_let() elif self.is_token(KEYWORD, 'if'): self.compile_if() elif self.is_token(KEYWORD, 'while'): self.compile_while() elif self.is_token(KEYWORD, 'return'): self.compile_return() else: break # Finished def compile_do(self): """Compiles a do statement.""" self.advance() # skip 'do' keyword name = self.tokenizer.get_token() self.compile_subroutine_call(name) self.vm.write_pop('temp', 0) self.advance() # skip ';' # Finished def compile_subroutine_call(self, name): num_args = 0 if self.is_token(SYMBOL, '.'): self.advance() # skip '.' method_name = self.tokenizer.get_token() self.advance() # skip last_name if name in self.symbols.subroutine_symbol or name in self.symbols.global_symbol: self.write_push(name) full_name = self.symbols.type_of(name) + '.' + method_name num_args += 1 else: full_name = name + '.' + method_name else: self.vm.write_push('pointer', 0) num_args += 1 full_name = self.cur_class + '.' + name self.advance() # skip '(' num_args += self.compile_expression_list() self.vm.write_call(full_name, num_args) self.advance() # skip ')' """ def compile_subroutine_call(self, name): (type, kind, index) = self.symbols.look_up(name) """ # Finished def compile_let(self): """Compiles a let statement.""" self.advance() # skip 'let' name = self.tokenizer.get_token() self.advance() # skip name subscript = self.is_token(SYMBOLS, '[') if subscript: self.compile_base_plus_index(name) self.advance() # skip '=' self.compile_expression() # calculate expression self.advance() # write ';' if subscript: self.pop_array_element() # *(base+index) == expr else: self.write_pop(name) # Finished def pop_array_element(self): self.vm.write_pop('temp', 1) # pop expr value to temp register self.vm.write_pop('pointer', 1) # pop base+index into 'that' register self.vm.write_push('temp', 1) # push expr back into stack self.vm.write_pop('that', 0) # pop value into *(base+index) # Finished def compile_base_plus_index(self, name): self.write_push(name) self.advance() # skip '[' self.compile_expression() # push index into stack self.advance() # skip '[' self.vm.write_vm_cmd('add') # base+index # Finished def compile_while(self): """Compiles a while statement.""" L1 = self.new_label() L2 = self.new_label() self.vm.write_label(L1) self.advance() # skip 'while' self.advance() # skip '(' self.compile_expression() self.advance() # skip ')' self.vm.write_vm_cmd('not') # ~(cond) self.vm.write_if(L2) self.advance() # skip '{' self.compile_statements() self.advance() # rskip '}' self.vm.write_goto(L1) # goto L1 self.vm.write_label(L2) # Finished def compile_if(self): """Compiles an if statement, possibly with a trailing else clause.""" L1 = self.new_label() L2 = self.new_label() self.advance() # skip 'if' self.advance() # skip '(' self.compile_expression() self.advance() # skip ')' self.vm.write_vm_cmd('not') # ~(cond) self.vm.write_if(L1) self.advance() # skip '{' self.compile_statements() self.advance() # skip '}' self.vm.write_goto(L2) # goto L2 self.vm.write_label(L1) if self.is_token(KEYWORD, 'else'): self.advance() # skip 'else' self.advance() # skip '{' self.compile_statements() self.advance() # skip '}' self.vm.write_label(L2) # Finished def compile_return(self): """Compiles a return statement.""" self.advance() # skip 'return' if not self.is_token(SYMBOLS, ';'): self.compile_expression() else: self.vm.write_push('constant', 0) self.advance() # skip ';' self.vm.write_return() # Finished def compile_expression(self): """Compiles an expression.""" self.compile_term() while self.is_binary_op(): binary_op = self.tokenizer.get_token() self.advance() # skip op self.compile_term() self.vm.write_vm_cmd(VM_BINORY_CMDS[binary_op]) # Finished def compile_term(self): """Compiles a term.""" token_type, token = self.tokenizer.peek() if self.is_const(): self.compile_const() elif self.is_unary_op(): self.advance() # write unaryOp self.compile_term() elif self.is_token(SYMBOL, '('): self.advance() # write '(' self.compile_expression() self.advance() # write ')' elif token_type is IDENTIFIER: self.advance() # skip class name if self.is_token(SYMBOLS, '['): self.compile_array_subscript(token) elif self.is_token(SYMBOLS, '.'): self.compile_subroutine_call(token) else: self.write_push(token) # Finished def compile_array_subscript(self, name): self.write_push(name) self.advance() # skip name self.advance() # skip '[' self.compile_expression() # push index into stack self.advance() # skip ']' self.vm.write_vm_cmd('add') self.vm.write_pop('pointer', 1) # pop into 'that' ptr self.vm.write_push('that', 0) # push *(base+index) into stack # Finished def compile_const(self): token_type, token = self.tokenizer.peek() if token_type == INT_CONST: self.vm.write_push('constant', token) elif token_type == STRING_CONST: self.write_string_const(token) elif token_type == CompilationEngine.keywordConstant: self.compile_kew_const(token) # Finished def compile_kew_const(self, kwd): if kwd == 'this': self.vm.write_push('pointer', 0) elif kwd == 'true': self.vm.write_push('constant', 1) self.vm.write_vm_cmd('neg') else: self.vm.write_push('constant', 0) # Finished def write_string_const(self, token): """ """ self.vm.write_push('constant', len(token)) self.vm.write_call('String.new', 1) # String.new(len(str)) for c in token: self.vm.write_push('constant', ord(c)) self.vm.write_call('String.appendChar', 2) # String.appendChar(c) # Finished def compile_expression_list(self): """Compiles a (possibly empty) comma-separated list of expressions.""" if self.is_term(): self.compile_expression() while self.is_token(SYMBOL, ','): self.advance() # write ',' self.compile_expression() def is_const(self): token_type, token = self.tokenizer.peek() return token_type in [INT_CONST, STRING_CONST ] or token in CompilationEngine.keywordConstant def is_binary_op(self): token_type, token = self.tokenizer.peek() return token_type == SYMBOL and token in CompilationEngine.binaryOp def is_unary_op(self): token_type, token = self.tokenizer.peek() return token_type == SYMBOL and token in CompilationEngine.unaryOp def is_keyword_constant(self): token_type, token = self.tokenizer.peek() return token_type == KEYWORD and token in CompilationEngine.keywordConstant def is_term(self): token_type, token = self.tokenizer.peek() return token_type in [ INT_CONST, STRING_CONST ] or self.is_keyword_constant( ) or token_type is IDENTIFIER or self.is_unary_op() or self.is_token( SYMBOL, '(') def new_label(self): self.label_num += 1 return 'label' + str(self.label_num) def write_push(self, name): (type, kind, index) = self.symbols.look_up(name) self.vm.write_push(SEGMENTS[kind], index) def write_pop(self, name): (type, kind, index) = self.symbols.look_up(name) self.vm.write_pop(SEGMENTS[kind], index) def load_pointer(self, func_type): if func_type[1] == 'method': self.vm.write_push('argument', 0) self.vm.write_pop('pointer', 0) elif func_type[1] == 'constructor': global_vars = self.symbols.index[FIELD] self.vm.write_push('constant', global_vars) self.vm.write_call('Memory.alloc', 1) self.vm.write_pop('pointer', 0)
class CompilationEngine: """ The compilation engine compile the jack code given in the input file into an xml code saved in the out_file """ def __init__(self, in_file, out_file): """ A compilation engine constructor :param in_file: the file we are currently compiling :param out_file: the file where we save the output """ self.tokenizer = JackTokenizer(in_file) self.out_file = open(out_file, 'w') self._indent_count = 0 def compile_class(self): """ compiles a class according to the grammar """ self._write_outer_tag(CLASS_TAG) self.tokenizer.advance() if self.tokenizer.key_word() != CLASS_TAG: print(COMPILE_CLASS_ERROR) sys.exit() self._write_token(self.tokenizer.token_type()) self._check_write_name() self._check_write_symbol("{") # there may be multiple variable declarations while self._check_if_var_dec(): self.compile_class_var_dec() # there may be multiple subroutine declarations while self._check_subroutine_dec(): self.compile_subroutine_dec() self._check_write_symbol("}") self._write_outer_tag(CLASS_TAG, IS_ENDING_TAG) def compile_class_var_dec(self): """ compiles the class's variables declarations """ self._write_outer_tag(CLASS_VAR_DEC_TAG) # we only come in the function if the current token is correct so we # can just write it self._write_token(self.tokenizer.token_type()) self._check_write_type() self._check_write_name() while self._check_if_comma(): # there are more variables self._check_write_symbol(",") self._check_write_name() self._check_write_symbol(";") self._write_outer_tag(CLASS_VAR_DEC_TAG, IS_ENDING_TAG) def compile_subroutine_dec(self): """ compiles the class's subroutine (methods and functions) declarations """ self._write_outer_tag(SUBROUTINE_DEC_TAG) # we only come in the function if the current token is correct so we # can just write it self._write_token(self.tokenizer.token_type()) # the function is either void or has a type if self.tokenizer.key_word() == 'void': self._write_token(self.tokenizer.token_type()) else: self._check_write_type() self._check_write_name() self._check_write_symbol("(") self.compile_parameter_list() self._check_write_symbol(")") self.compile_subroutine_body() self._write_outer_tag(SUBROUTINE_DEC_TAG, IS_ENDING_TAG) def compile_parameter_list(self): """ compiles the parameter list for the subroutines """ self._write_outer_tag(PARAMETER_LIST_TAG) # if curr_token is ')' it means the param list is empty if self.tokenizer.symbol() != ')': self._check_write_type() self._check_write_name() while self._check_if_comma(): # there are more params self._check_write_symbol(",") self._check_write_type() self._check_write_name() self._write_outer_tag(PARAMETER_LIST_TAG, IS_ENDING_TAG) def compile_subroutine_body(self): """ compiles the body of the subroutine """ self._write_outer_tag(SUBROUTINE_BODY_TAG) self._check_write_symbol("{") # there may be multiple variable declarations at the beginning of # the subroutine while self.tokenizer.key_word() == 'var': self.compile_var_dec() self.compile_statements() self._check_write_symbol("}") self._write_outer_tag(SUBROUTINE_BODY_TAG, IS_ENDING_TAG) def compile_var_dec(self): """ compiles the variable declarations """ self._write_outer_tag(VAR_DEC_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_type() self._check_write_name() # there may be multiple variable names in the dec while self._check_if_comma(): self._check_write_symbol(",") self._check_write_name() self._check_write_symbol(";") self._write_outer_tag(VAR_DEC_TAG, IS_ENDING_TAG) def compile_statements(self): """ compiles the statements (0 or more statements) """ self._write_outer_tag(STATEMENTS_TAG) while self._check_if_statement(): if self.tokenizer.key_word() == 'let': self.compile_let() elif self.tokenizer.key_word() == 'if': self.compile_if() elif self.tokenizer.key_word() == 'while': self.compile_while() elif self.tokenizer.key_word() == 'do': self.compile_do() elif self.tokenizer.key_word() == 'return': self.compile_return() self._write_outer_tag(STATEMENTS_TAG, IS_ENDING_TAG) def compile_do(self): """ compiles the do statement """ self._write_outer_tag(DO_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self.compile_subroutine_call() self._check_write_symbol(";") self._write_outer_tag(DO_STATEMENT_TAG, IS_ENDING_TAG) def compile_let(self): """ compiles the let statement """ self._write_outer_tag(LET_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_name() if self.tokenizer.symbol() == '[': # if there is an array self._check_write_symbol("[") self.compile_expression() self._check_write_symbol("]") self._check_write_symbol("=") self.compile_expression() self._check_write_symbol(";") self._write_outer_tag(LET_STATEMENT_TAG, IS_ENDING_TAG) def compile_if(self): """ compiles the if statements """ self._write_outer_tag(IF_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_symbol("(") self.compile_expression() self._check_write_symbol(")") self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") # there can also be an if else scenario if self.tokenizer.key_word() == 'else': self._write_token(self.tokenizer.token_type()) self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") self._write_outer_tag(IF_STATEMENT_TAG, IS_ENDING_TAG) def compile_while(self): """ compiles the while statements """ self._write_outer_tag("whileStatement") self._write_token(self.tokenizer.token_type()) self._check_write_symbol("(") self.compile_expression() self._check_write_symbol(")") self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") self._write_outer_tag("whileStatement", IS_ENDING_TAG) def compile_return(self): """ compiles the return statements """ self._write_outer_tag(RETURN_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) # if cur token is ; we return nothing, otherwise we return something if not self.tokenizer.symbol() == ';': self.compile_expression() self._check_write_symbol(";") self._write_outer_tag(RETURN_STATEMENT_TAG, IS_ENDING_TAG) def compile_subroutine_call(self): """ compiles the subroutine calls ( when we actually call a subroutine as opposed to declaring it) """ self._check_write_name() # there may be a '.' if it is a foo.bar() scenario (or Foo.bar()) if self.tokenizer.symbol() == ".": self._check_write_symbol(".") self._check_write_name() self._check_write_symbol("(") self.compile_expression_list() self._check_write_symbol(")") def compile_expression(self): """ compiles expressions which are terms and possibly operators and more terms """ self._write_outer_tag(EXPRESSION_TAG) self.compile_term() # there may be a few operators in one expression while self.tokenizer.symbol() in OPERATIONS: self._write_op() self.compile_term() self._write_outer_tag(EXPRESSION_TAG, IS_ENDING_TAG) def compile_term(self): """ compiles terms according to the grammar """ self._write_outer_tag(TERM_TAG) cur_type = self.tokenizer.token_type() # either a string/int constant if self.tokenizer.token_type() in ["INT_CONST", "STRING_CONST"]: self._write_token(cur_type) # or a constant keyword (true, false, null, this) elif self.tokenizer.key_word() in KEYWORD_CONST: self._write_token(cur_type) # or an expression within brown brackets elif self.tokenizer.symbol() == '(': self._write_token(cur_type) self.compile_expression() self._check_write_symbol(")") # or a unary op and then a term elif self.tokenizer.symbol() in UNARY_OPS: self._write_op() self.compile_term() # or it is an identifier which could be: elif self.tokenizer.identifier(): self._compile_term_identifier() else: print(COMPILE_TERM_ERROR) sys.exit() self._write_outer_tag(TERM_TAG, IS_ENDING_TAG) def _compile_term_identifier(self): """ compiles terms in case of identifier token """ # an array if self.tokenizer.get_next_token() == '[': self._check_write_name() self._check_write_symbol("[") self.compile_expression() self._check_write_symbol("]") # or a subroutine call elif self.tokenizer.get_next_token() in [".", "("]: self.compile_subroutine_call() else: self._check_write_name() # or just a variable name def compile_expression_list(self): """ compiles the expression lists """ self._write_outer_tag(EXPRESSION_LIST_TAG) # if it is ')' then the expression list is empty if self.tokenizer.symbol() != ')': self.compile_expression() while self._check_if_comma(): # while there are more expressions self._write_token(self.tokenizer.token_type()) self.compile_expression() self._write_outer_tag(EXPRESSION_LIST_TAG, IS_ENDING_TAG) def _check_if_var_dec(self): """ check if we are currently compiling a variable declaration :return: true iff the current token is either 'static' or 'field' """ return self.tokenizer.key_word() in CLASS_VAR_KEYWORDS def _check_subroutine_dec(self): """ checks if we are currently compiling a subroutine declaration :return: true iff the current token is either 'constructor' or 'function' or 'method' """ return self.tokenizer.key_word() in SUBROUTINE def _check_if_comma(self): """ checks if current token is a comma :return: true iff the current token is a ',' """ return self.tokenizer.symbol() == ',' def _check_if_statement(self): """ checks if we are currently compiling a statement :return: true iff the current token is in ['let', 'if', 'while', 'do', 'return'] """ return self.tokenizer.key_word() in STATEMENTS def _check_write_type(self): """ checks if the current token is a valid type and if so, it writes it to the output file """ if self.tokenizer.key_word() in TYPE_KEYWORDS: self._write_token(self.tokenizer.token_type()) else: self._check_write_name() def _check_write_symbol(self, expected_symbol): """ checks if the current token is the expected symbol, if so it write it to the output file :param expected_symbol: the symbol we are validating is the current token :return: prints illegal statement error if it is not the expected symbol and exits the program """ if self.tokenizer.symbol() != expected_symbol: print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._write_token(self.tokenizer.token_type()) def _check_write_name(self): """ checks the current token is a name (identifier), and if so, write it to the output file :return: prints illegal statement error if it is not a name and exits the program """ if self.tokenizer.identifier(): self._write_token("IDENTIFIER") else: print(ILLEGAL_STATEMENT_ERROR) sys.exit() def _write_outer_tag(self, tag_str, end=False): """ writes the outer tags of the different sections we are compiling :param tag_str: the string of the current section we are compiling :param end: true iff it is an end tag """ if end: # we decrease the indent count before the closing tag self._indent_count -= 1 self.out_file.write("\t" * self._indent_count) self.out_file.write("</" + tag_str + ">\n") else: # we increase the indent count after the opening tag self.out_file.write("\t" * self._indent_count) self.out_file.write("<" + tag_str + ">\n") self._indent_count += 1 def _write_op(self): """ writes an op symbol to the out file """ self.out_file.write("\t" * self._indent_count) self.out_file.write("<symbol> ") if self.tokenizer.symbol() == '<': self.out_file.write("<") elif self.tokenizer.symbol() == '>': self.out_file.write(">") elif self.tokenizer.symbol() == '&': self.out_file.write("&") elif self.tokenizer.symbol() == '\"': self.out_file.write(""") else: self.out_file.write(self.tokenizer.symbol()) self.out_file.write(" </symbol>\n") self.tokenizer.advance() def _write_token(self, cur_type): """ writes the current token to the output file :param cur_type: the type of the current token """ self.out_file.write("\t" * self._indent_count) self.out_file.write("<" + TOKEN_TYPE_STR[cur_type] + "> ") self.out_file.write(str(self.tokenizer.get_token_str())) self.out_file.write(" </" + TOKEN_TYPE_STR[cur_type] + ">\n") self.tokenizer.advance()
class CompilationEngine: op_list = ["+", "-", "*", "/", "&", "|", "<", ">", "="] un_op_list = ["-", "~"] un_op_dic = {"-": "NEG", "~": "NOT"} kind_dic = { "VAR": "LOCAL", "ARG": "ARG", "STATIC": "STATIC", "FIELD": "THIS" } op_dic = { "+": "ADD", "-": "SUB", "=": "EQ", ">": "GT", "<": "LT", "|": "OR", "&": "AND", "*": "MUL", "/": "DIV" } special_chars = dict() special_chars["<"] = "<" special_chars[">"] = ">" # special_chars['\"'] = """ special_chars["&"] = "&" INDENT = " " def __init__(self, inputFile, outputFile): # self.outFile = outputFile self.tokenizer = JackTokenizer(inputFile) self.vmwriter = VMWriter(outputFile) self.outFile = outputFile self.currToken = "" self.tabber = "" self.argumentsCounter = 0 def compileClass(self): self.symbolTable = SymbolTable() self.currToken = self.tokenizer.getToken() # assuming first token is keyword class self.__advanceToken() # assuming next token is the class name self.thisType = self.currToken self.__advanceToken() # assuming next token is '{' if match("^[^}]+", self.tokenizer.revealNext()): self.compileClassVarDec() self.compileSubroutine() # assuming next token is '}' self.__advanceToken() self.vmwriter.close() def compileClassVarDec(self): while match("^(static|field)", self.tokenizer.revealNext()): # we know next token is static or field self.__advanceToken() kind = self.currToken.upper() # we assume this will be type of the var self.__advanceToken() typeVar = self.currToken while match("[^;]+", self.tokenizer.revealNext()): if match("[,]", self.tokenizer.revealNext()): self.__advanceToken() # we assume this will be the var name self.__advanceToken() name = self.currToken self.symbolTable.define(name, typeVar, kind) self.__advanceToken() def compileSubroutine(self): while match("(constructor|function|method)", self.tokenizer.revealNext()): self.ifCounter = 0 self.whileCounter = 0 self.symbolTable.startSubroutine() self.__advanceToken() self.subroutineKind = self.currToken if match("method", self.currToken): self.symbolTable.define("this", self.thisType, "ARG") self.__advanceToken() self.subroutineType = self.currToken # assuming this will be subroutine name self.__advanceToken() self.subroutineName = self.currToken # assuming this will be '(' self.__advanceToken() self.compileParameterList() # assuming this will be ')' self.__advanceToken() self.compileSubroutineBody() def compileParameterList(self): while match("[^)]+", self.tokenizer.revealNext()): self.__advanceToken() typeArg = self.currToken # assuming this will be var name self.__advanceToken() nameArg = self.currToken self.symbolTable.define(nameArg, typeArg, "ARG") if match("[,]", self.tokenizer.revealNext()): self.__advanceToken() def compileSubroutineBody(self): if match("[{]", self.tokenizer.revealNext()): self.__advanceToken() if match("[^}]+", self.tokenizer.revealNext()): self.compileVarDec() nLocal = self.symbolTable.varCount("VAR") self.vmwriter.writeFunction( self.thisType + '.' + self.subroutineName, nLocal) if match("method", self.subroutineKind): self.vmwriter.writePush("ARG", 0) self.vmwriter.writePop("POINTER", 0) elif match("constructor", self.subroutineKind): fieldCounter = self.symbolTable.varCount("FIELD") self.vmwriter.writePush("CONST", fieldCounter) self.vmwriter.writeCall("Memory.alloc", 1) self.vmwriter.writePop("POINTER", 0) self.compileStatements() # assuming this will be '}' self.__advanceToken() def compileVarDec(self): while match("var", self.tokenizer.revealNext()): self.__advanceToken() kind = self.currToken.upper() # assuming this will be type self.__advanceToken() typeVar = self.currToken while match("[^;]+", self.tokenizer.revealNext()): if match("[,]", self.tokenizer.revealNext()): self.__advanceToken() # assuming this will be the var name self.__advanceToken() nameVar = self.currToken self.symbolTable.define(nameVar, typeVar, kind) # assuming this will be ; self.__advanceToken() def compileStatements(self): while match("(let|do|if|while|return)", self.tokenizer.revealNext()): if match("(let)", self.tokenizer.revealNext()): self.compileLet() elif match("(do)", self.tokenizer.revealNext()): self.compileDo() elif match("(if)", self.tokenizer.revealNext()): self.compileIf() elif match("(while)", self.tokenizer.revealNext()): self.compileWhile() elif match("(return)", self.tokenizer.revealNext()): self.compileReturn() def subRoutineCall(self): numArg = 0 # identifier was advanced already name = self.currToken remember = 0 if match("[.]", self.tokenizer.revealNext()): if self.symbolTable.kindOf(name): kind = self.kind_dic[self.symbolTable.kindOf(name)] index = self.symbolTable.indexOf(name) self.vmwriter.writePush(kind, index) remember += 1 name = self.symbolTable.typeOf(name) # assuming this will be '.' self.__advanceToken() name += self.currToken # assuming this will be subroutineName self.__advanceToken() name += self.currToken else: self.vmwriter.writePush("POINTER", 0) remember += 1 name = self.thisType + '.' + name #assuming this will be '(' self.__advanceToken() numArg = self.compileExpressionList() self.vmwriter.writeCall(name, numArg + remember) #assuming this will be ')' self.__advanceToken() def compileDo(self): # 'do' subroutineCall ';' self.__advanceToken() #assuming this will be varName/className/subroutineName self.__advanceToken() self.subRoutineCall() # assuming this will be ; self.vmwriter.writePop("TEMP", 0) self.__advanceToken() def compileLet(self): self.__advanceToken() # assuming this will be var name self.__advanceToken() name = self.currToken kind = self.kind_dic[self.symbolTable.kindOf(name)] index = self.symbolTable.indexOf(name) arrayExp = False if match("[[]", self.tokenizer.revealNext()): arrayExp = True self.__advanceToken() self.compileExpression() self.vmwriter.writePush(kind, index) self.vmwriter.writeArithmetic("ADD") # assuming this will be ']' self.__advanceToken() # assuming this will be '=' self.__advanceToken() self.compileExpression() if arrayExp: self.vmwriter.writePop("TEMP", 0) self.vmwriter.writePop("POINTER", 1) self.vmwriter.writePush("TEMP", 0) self.vmwriter.writePop("THAT", 0) else: self.vmwriter.writePop(kind, index) # assuming this will be ; self.__advanceToken() def compileWhile(self): # 'while' '(' 'expression' ')' '{' 'statments' '}' currWhile = self.whileCounter self.whileCounter += 1 self.__advanceToken() self.vmwriter.writeLabel("WHILE_EXP" + str(currWhile)) #assuming this will be '(' self.__advanceToken() self.compileExpression() #assuming this will be ')' self.__advanceToken() self.vmwriter.writeArithmetic("NOT") self.vmwriter.writeIf("WHILE_END" + str(currWhile)) #assuming this will be '{' self.__advanceToken() self.compileStatements() #assuming this will be '}' self.__advanceToken() self.vmwriter.writeGoTo("WHILE_EXP" + str(currWhile)) self.vmwriter.writeLabel("WHILE_END" + str(currWhile)) def compileReturn(self): # 'return' expression? ';' self.__advanceToken() if match("[^;]+", self.tokenizer.revealNext()): self.compileExpression() # assuming this will be ; if match("void", self.subroutineType): self.vmwriter.writePush("CONST", 0) self.vmwriter.writeReturn() self.__advanceToken() def compileIf( self): # if ( exprssion ) { statments } (else { statments }) ? curr_if = self.ifCounter self.ifCounter += 1 self.__advanceToken() # assuming this will be '(' self.__advanceToken() self.compileExpression() # assuming this will be ')' self.__advanceToken() self.vmwriter.writeIf("IF_TRUE" + str(curr_if)) self.vmwriter.writeGoTo("IF_FALSE" + str(curr_if)) self.vmwriter.writeLabel("IF_TRUE" + str(curr_if)) # assuming this will be '{' self.__advanceToken() self.compileStatements() # assuming this will be '}' self.__advanceToken() if match("(else)", self.tokenizer.revealNext()): self.vmwriter.writeGoTo("IF_END" + str(curr_if)) self.vmwriter.writeLabel("IF_FALSE" + str(curr_if)) self.__advanceToken() # assuming this will be '{' self.__advanceToken() self.compileStatements() #assuming this will be '}' self.__advanceToken() self.vmwriter.writeLabel("IF_END" + str(curr_if)) else: self.vmwriter.writeLabel("IF_FALSE" + str(curr_if)) def compileExpression(self): relevantOp = None counter = 0 while match("[^,)}\];]+", self.tokenizer.revealNext()): self.compileTerm() if counter != 0: self.vmwriter.writeArithmetic(self.op_dic[relevantOp]) if self.tokenizer.revealNext() in self.op_list: self.__advanceToken() relevantOp = self.currToken counter += 1 def compileTerm(self): self.__advanceToken() if match("KEYWORD", self.tokenizer.tokenType()): if match("(false|null)", self.currToken): self.vmwriter.writePush("CONST", 0) elif match("true", self.currToken): self.vmwriter.writePush("CONST", 0) self.vmwriter.writeArithmetic("NOT") else: self.vmwriter.writePush("POINTER", 0) elif match("STRING_CONST", self.tokenizer.tokenType()): constString = self.currToken.replace('\"', "") constString = constString.replace('\t', "\\t") constString = constString.replace('\b', "\\b") constString = constString.replace('\r', "\\r") constString = constString.replace('\n', "\\n") self.vmwriter.writeString(constString) elif match("INT_CONST", self.tokenizer.tokenType()): self.vmwriter.writePush("CONST", int(self.currToken)) elif match("IDENTIFIER", self.tokenizer.tokenType()): if match("[\[]", self.tokenizer.revealNext()): name = self.currToken kind = self.kind_dic[self.symbolTable.kindOf(name)] index = self.symbolTable.indexOf(name) self.__advanceToken() self.compileExpression() self.vmwriter.writePush(kind, index) self.vmwriter.writeArithmetic("ADD") self.vmwriter.writePop("POINTER", 1) self.vmwriter.writePush("THAT", 0) #assuming this will be ']' self.__advanceToken() elif match("([.]|[(])", self.tokenizer.revealNext()): self.subRoutineCall() else: seg = self.kind_dic[self.symbolTable.kindOf(self.currToken)] index = self.symbolTable.indexOf(self.currToken) self.vmwriter.writePush(seg, index) elif match("SYMBOL", self.tokenizer.tokenType()): if match("[(]", self.currToken): self.compileExpression() #assuming this will be ')' self.__advanceToken() else: un_op = self.currToken self.compileTerm() self.vmwriter.writeArithmetic(self.un_op_dic[un_op]) def compileExpressionList(self): argsCount = 0 while match("[^)]+", self.tokenizer.revealNext()): if match("[,]", self.tokenizer.revealNext()): self.__advanceToken() self.compileExpression() argsCount += 1 return argsCount def __advanceToken(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() self.currToken = self.tokenizer.getToken()
class CompilationEngine: def __init__(self, filename): self.tokenizer = JackTokenizer(filename) def compile(self, filename): input_stream = initialize(filename) compileClass() def xml_print_el(self): xmlprint(self.tokenizer.token_type, self.tokenizer.token) def advanceSymbol(self, symbol): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Symbol expected:' + symbol + ', found end of stream') if self.tokenizer.symbol() != symbol: raise SyntaxError('Symbol expected:' + symbol) def advanceKeyword(self, keyword): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Keyword expected:' + keyword + ', found end of stream') if self.tokenizer.keyword() != keyword: raise SyntaxError('Keyword expected:' + keyword) def advanceTokenType(self, tokenType): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Identifier expected, found end of stream') if self.tokenizer.token_type != 'identifier': raise SyntaxError('Identifier expected') def advanceKeywords(self, *args): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Keywords expected:' + args + ', found end of stream') if self.tokenizer.keyword() != keyword: raise SyntaxError('Keywords expected:' + args) def advanceAndGetType(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('type expected, found end of stream') if self.is_type(): return self.tokenizer.token else: raise SyntaxError('type expected') def is_type(self): return self.tokenizer.keyword() in [ 'int', 'char', 'boolean' ] or self.tokenizer.token_type == 'identifier' def advanceAndGetReturnType(self): self.advance() if self.is_type() or self.tokenizer.keyword() == 'void': return self.tokenizer.token else: raise SyntaxError('type expected') def advanceToClassName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def advanceToVarName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def advanceToSubroutineName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def hasClassVarDec(self): pass def advance(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('found end of stream!') def compileClass(self): # 'class' className '{' classVarDec* subroutineDec* '}' print('<class>') self.advanceKeyword('class') self.xml_print_el() # classname self.advanceToClassName() className = self.tokenizer.identifier() self.xml_print_el() # { self.advanceSymbol('{') self.xml_print_el() self.advance() # classVarDec* while (self.tokenizer.keyword() in ['static', 'field']): self.compileClassVarDec() # subroutineDec* while (self.tokenizer.keyword() in ['constructor', 'function', 'method']): self.compileSubroutine() # } self.advanceSymbol('}') self.xml_print_el() print('</class>') def compileClassVarDec(self): # ('static'|'field') type varName (',' varName)* ';' print('<classVarDec>') # ('static'|'field') self.xml_print_el() # type type = self.advanceAndGetType() self.xml_print_el() # varName varName = self.advanceToVarName() self.xml_print_el() # ; self.advanceSymbol(';') self.xml_print_el() print('</classVarDec>') self.advance() def compileSubroutine(self): print('<subroutineDec>') kind = self.tokenizer.keyword() self.xml_print_el() # ( 'void' | type ) return_type = self.advanceAndGetReturnType() self.xml_print_el() # subroutineName name = self.advanceToSubroutineName() self.xml_print_el() # ( self.advanceSymbol('(') self.xml_print_el() # TODO parameterList self.compileParameterList() # ( self.advanceSymbol(')') self.xml_print_el() # subroutineBody self.compileSubroutineBody() print('</subroutineDec>') self.advance() pass def compileSubroutineBody(self): print('<subroutineBody>') # { self.advanceSymbol('{') self.xml_print_el() # varDec* #TODO a structure to represent the * self.varDec() # statementes self.compileStatements() # } self.advanceSymbol('}') self.xml_print_el() print('</subroutineBody>') def compileParameterList(self): print('<parameterList>') print('</parameterList>') def compileVarDec(self): pass def compileStatements(self): pass def compileDo(): pass def compileLet(): pass def compileWhile(): pass def compileReturn(): pass def compileIf(): pass def compileExpression(): pass # if identifier: variable, array entry, subroutine call def compileTerm(): # single lookahead token - can be [ ( or . pass # comma separated list of expressions def compileExpressionList(): pass
class CompilationEngine: def __init__(self, input_path, output_path): """ creates a new compilation engine with the given input and output. the next routine called must be compileClass() :param input_path: input stream/file :param output_path: output stream/file """ self._root = None self._current_node = None self.tokenizer = JackTokenizer(input_path) self.CompileClass() for elem in self._root.iter(): if elem.tag == 'expressionList' or elem.tag == 'parameterList': if "/>" in str(ET.tostring(elem)): elem.text = '\n' p = ET.XMLParser(remove_blank_text=True) tree = ET.ElementTree(self._root, parser=p) tree.write(output_path, method='xml', pretty_print=True) def CompileClass(self): """ Compiles a complete class. """ self._root = ET.Element('class') self.tokenizer.advance() self._write_line(self._root, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(self._root, self.tokenizer.identifier()) self.tokenizer.advance() self._write_line(self._root, self.tokenizer.symbol()) self.CompileClassVarDec() self.CompileSubroutine() self.tokenizer.advance() self._write_line(self._root, self.tokenizer.symbol()) def _write_line(self, node, name): """ writes the current node to the output file :param name: the name of the node """ _ = ET.SubElement(node, TYPES[self.tokenizer.tokenType()]) _.text = ' ' + name + ' ' def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ peek = self.tokenizer.peek() if 'static' in peek or 'field' in peek: _classVarNode = ET.SubElement(self._root, 'classVarDec') while 'static' in peek or 'field' in peek: self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.keyWord()) # field/static self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.identifier()) # name self.tokenizer.advance() while self.tokenizer.symbol() == ',': self._write_line(_classVarNode, self.tokenizer.symbol()) # , self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.symbol()) # ; peek = self.tokenizer.peek() if 'static' in peek or 'field' in peek: _classVarNode = ET.SubElement(self._root, 'classVarDec') def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ _last_node = self._current_node _subroutineNode = ET.SubElement(self._root, 'subroutineDec') self._current_node = _subroutineNode peek = self.tokenizer.peek() while 'function' in peek or 'constructor' in peek or 'method' in peek: self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.keyWord()) # const/func/method self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.current_token) # void/type self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.symbol()) # '(' self.CompileParameterList() self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._current_node = ET.SubElement(_subroutineNode, 'subroutineBody') self._write_line(self._current_node, self.tokenizer.symbol()) # '{' peek = self.tokenizer.peek() if 'var' in peek: self.CompileVarDec() self.CompileStatements() self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) # '}' peek = self.tokenizer.peek() if 'function' in peek or 'constructor' in peek or 'method' in peek: _subroutineNode = ET.SubElement(self._root, 'subroutineDec') self._current_node = _subroutineNode def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing () """ param_list = ET.SubElement(self._current_node, 'parameterList') peek = self.tokenizer.peek() if peek != ')': self.tokenizer.advance() self._write_line(param_list, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(param_list, self.tokenizer.identifier()) # name peek = self.tokenizer.peek() while peek == ',': self.tokenizer.advance() self._write_line(param_list, self.tokenizer.symbol()) # ',' self.tokenizer.advance() self._write_line(param_list, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(param_list, self.tokenizer.identifier()) # name peek = self.tokenizer.peek() # if not param_list.text: # param_list.text = '\n' def CompileVarDec(self): """ Compiles a var declaration. """ _varDecNode = ET.SubElement(self._current_node, 'varDec') peek = self.tokenizer.peek() while 'var' in peek: self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.identifier()) self.tokenizer.advance() while self.tokenizer.symbol() == ',': self._write_line(_varDecNode, self.tokenizer.symbol()) # , self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.symbol()) # ; peek = self.tokenizer.peek() if peek == 'var': _varDecNode = ET.SubElement(self._current_node, 'varDec') def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}" """ peek = self.tokenizer.peek() _parent = self._current_node self._current_node = ET.SubElement(self._current_node, 'statements') while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek: if 'let' in peek: self.CompileLet() elif 'if' in peek: self.CompileIf() elif 'while' in peek: self.CompileWhile() elif 'do' in peek: self.CompileDo() elif 'return' in peek: self.CompileReturn() peek = self.tokenizer.peek() self._current_node = _parent def CompileDo(self): """ Compiles a do statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'doStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() while peek == '.': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.CompileExpressionList() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileLet(self): """ Compiles a let statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'letStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '[' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ']' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '=' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileWhile(self): """ Compiles a while statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'whileStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # while self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' self._current_node = _last_node def CompileReturn(self): """ Compiles a return statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'returnStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # return peek = self.tokenizer.peek() if peek != ';': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'ifStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # if self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' peek = self.tokenizer.peek() if peek == 'else': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # else self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' self._current_node = _last_node def CompileExpression(self): """ Compiles an expression. """ _last_node = self._current_node self._current_node = ET.SubElement(self._current_node, 'expression') self.CompileTerm() peek = self.tokenizer.peek() while peek in OPS: self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) self.tokenizer.advance() self.CompileTerm() peek = self.tokenizer.peek() self._current_node = _last_node def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ term_branch = ET.SubElement(self._current_node, 'term') # self.tokenizer.advance() if self.tokenizer.tokenType( ) == 'INT_CONST' or self.tokenizer.tokenType() == 'KEYWORD': self._write_line(term_branch, self.tokenizer.current_token) elif self.tokenizer.tokenType() == 'STRING_CONST': self._write_line(term_branch, self.tokenizer.stringVal()) elif self.tokenizer.current_token in UNARY_OP: self._write_line(term_branch, self.tokenizer.symbol()) last_node = self._current_node self._current_node = term_branch self.tokenizer.advance() self.CompileTerm() self._current_node = last_node elif self.tokenizer.current_token in SYMBOLS: self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() last_node = self._current_node self._current_node = term_branch self.CompileExpression() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) else: self._write_line(term_branch, self.tokenizer.identifier()) peek = self.tokenizer.peek() if '[' in peek or '(' in peek: self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() last_node = self._current_node self._current_node = term_branch self.CompileExpression() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) elif '.' in peek: self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.identifier()) self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) last_node = self._current_node self._current_node = term_branch self.CompileExpressionList() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ last_node = self._current_node self._current_node = ET.SubElement(self._current_node, 'expressionList') peek = self.tokenizer.peek() while peek != ')': self.tokenizer.advance() if peek == ',': self._write_line(self._current_node, self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() peek = self.tokenizer.peek() self._current_node = last_node
def test_advance(self): """Tests all parts of the tokenizer using this Jack code: /** Multi-line comment for some class. */ class A{ // Single-line comment let x = -4; do Output.printString("Ring Constants!"); } """ tokenizer = JackTokenizer("test.jack") tokenizer.advance() self.assertEqual(tokenizer.keyword(), CLASS) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'A') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '{') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), LET) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'x') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '=') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '-') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.int_val(), 4) self.assertEqual(tokenizer.token_type(), INT_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), DO) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'Output') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '.') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'printString') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '(') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.string_val(), 'Ring Constants!') self.assertEqual(tokenizer.token_type(), STRING_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ')') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '}') self.assertEqual(tokenizer.token_type(), SYMBOL)
class CompilationEngine: def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.current_sub_name = None self.class_name = None self.func_counter = 0 self.while_counter = 0 self.if_counter = 0 # starts the process self.tokenizer.advance() self.compile_class() self.vm_writer.close() def compile_class(self): """ compiles the class function :return: none """ # advances a single step to get the class name self.tokenizer.advance() # set class's name self.class_name = self.tokenizer.current_token # moves to the symbol { self.tokenizer.advance() # move to the next symbol and check what it is self.tokenizer.advance() # compiles class variable while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \ KEY_WORDS.get(self.tokenizer.current_token) == FIELD: self.compile_class_var_dec() # compiles subroutine while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \ KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \ KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION: self.compile_sub_routine() # we are now at the <symbol> } <symbol> which closes the class def compile_class_var_dec(self): """ compiles a var dec :return: none """ var_kind = self.tokenizer.key_word() # advances the token to the var's type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advances the token to the var's identifier self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_name = self.tokenizer.key_word() else: var_name = self.tokenizer.identifier() # update symbol table self.symbol_table.define(var_name, var_type, var_kind) # advance to next token, and check if there are more var_names self.tokenizer.advance() while self.tokenizer.current_token != ";": # token is <symbol> , <symbol> # advance to var's identifier self.tokenizer.advance() var_name = self.tokenizer.current_token # update symbol table self.symbol_table.define(var_name, var_type, var_kind) self.tokenizer.advance() # the current token is <symbol> ; <symbol>, advance to next self.tokenizer.advance() def compile_sub_routine(self): """ compiles a single sub routine :return: none """ # start new subroutine symbol table self.symbol_table.start_subroutine() # get subroutine type (method/construction/function) sub_type = self.tokenizer.key_word() # advances the token to what the subroutine returns self.tokenizer.advance() # updates the return type if self.tokenizer.token_type() == KEY_WORD: return_type = self.tokenizer.key_word() else: return_type = self.tokenizer.identifier() # advances the token to <identifier> sub_name <identifier> self.tokenizer.advance() # update the subroutine name subroutine_name = self.tokenizer.identifier() self.current_sub_name = subroutine_name # advance to <symbol> ( <symbol> self.tokenizer.advance() # if subroutine is a method, add 'this' to the symbol table as argument 0 if sub_type == METHOD: self.symbol_table.define("this", self.class_name, "ARG") # compiles the parameter list self.compile_parameter_list() # we are at <symbol> ) <symbol> # advance to subroutine body, and compile it self.tokenizer.advance() self.compile_subroutine_body(sub_type) def compile_subroutine_body(self, sub_type): """ the method compiles the subroutine body :return: none """ # we are at bracket {, advance self.tokenizer.advance() # compile var dec while KEY_WORDS.get(self.tokenizer.current_token) == VAR: self.compile_var_dec() # write function label self.vm_writer.write_function( self.class_name + '.' + self.current_sub_name, self.symbol_table.var_count("VAR")) # if is method, update THIS to the object if sub_type == METHOD: self.vm_writer.write_push(ARG, 0) self.vm_writer.write_pop("POINTER", 0) # if is constructor, allocate memory, and put in this if sub_type == CONSTRUCTOR: self.vm_writer.write_push("CONST", self.symbol_table.var_count("FIELD")) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) if self.tokenizer.current_token != "}": self.compile_statements() # we are at bracket }, advance self.tokenizer.advance() def compile_parameter_list(self): """ compiles a parameter list :return: none """ # advance to first parameter self.tokenizer.advance() # while there are more parameters while self.tokenizer.current_token != ')': # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to variables name <identifier> var_name <identifier> self.tokenizer.advance() var_name = self.tokenizer.identifier() # define new variable self.symbol_table.define(var_name, var_type, "ARG") # gets the next token self.tokenizer.advance() # advance to next token if we are at ',' if self.tokenizer.current_token == ",": self.tokenizer.advance() def compile_var_dec(self): """ compiles a declaration of a variable :return: none """ # we are at <keyword> var <keyword> # advance to variable type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to the variables name self.tokenizer.advance() while self.tokenizer.current_token != ';': # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # define variable in symbol table self.symbol_table.define(var_name, var_type, "VAR") # advance to next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.tokenizer.advance() # we are at <symbol> ; <symbol> # advance to next token self.tokenizer.advance() def compile_statements(self): """ the method compiles statements :return: none """ # while there are more statements, deal with each one while self.tokenizer.current_token != '}': statement_type = self.tokenizer.key_word() if statement_type == LET: self.compile_let() elif statement_type == IF: self.compile_if() elif statement_type == WHILE: self.compile_while() elif statement_type == DO: self.compile_do() elif statement_type == RETURN: self.compile_return() def compile_do(self): """ the method compiles a do command :return: none """ # we are at <keyword> do <keyword> # advance to next token <identifier> name_of_func <identifier> self.tokenizer.advance() func_name = self.tokenizer.identifier() self.tokenizer.advance() # compile the subroutine call self.compile_subroutine_call(func_name) # pop the result from the function into temp self.vm_writer.write_pop("TEMP", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() def compile_let(self): """ the method compiles a let statement :return: none """ # we are at <keyword> let <keyword> # advance to next token (var_name) self.tokenizer.advance() # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # get variable data var_index = self.symbol_table.index_of(var_name) var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) # advance to next token ('[' | '=') self.tokenizer.advance() is_array = False if self.tokenizer.current_token == '[': is_array = True # push arr self.vm_writer.write_push(var_kind, var_index) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # we are at <symbol> ] <symbol>, advance to next token self.tokenizer.advance() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # we are at <symbol> = <symbol> # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # if var is an array if is_array: self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) # if var is not an array else: self.vm_writer.write_pop(var_kind, var_index) # we are at <symbol> ; <symbol>, advance to next self.tokenizer.advance() return def compile_while(self): """ the method compiles a while statement :return: none """ while_counter = str(self.while_counter) # update the while counter self.while_counter += 1 # create new label for the start of the while self.vm_writer.write_label("While_" + while_counter) # we are at <keyword> while <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # negate expression self.vm_writer.write_arithmetic("NOT") # if condition is not met, go to the end of the while self.vm_writer.write_if("End_While_" + while_counter) # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() # compile statements self.compile_statements() # go back to the start of the while self.vm_writer.write_goto("While_" + while_counter) # create new label for the end of the while self.vm_writer.write_label("End_While_" + while_counter) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() return def compile_return(self): """ the method compiles a return statement :return: none """ # we are at <keyword> return <keyword>, advance to next token self.tokenizer.advance() if self.tokenizer.current_token != ';': self.compile_expression() else: # if function is void, push const 0 to the stack self.vm_writer.write_push("CONST", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() self.vm_writer.write_return() return def compile_if(self): """ the method compiles an if statement :return: none """ if_count = str(self.if_counter) # update if counter self.if_counter += 1 # we are at <keyword> if <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() # compile expression self.compile_expression() # negate the expression self.vm_writer.write_arithmetic("NOT") # check if condition is met self.vm_writer.write_if("ELSE_" + if_count) # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() self.compile_statements() # jump to the end of the if self.vm_writer.write_goto("END_IF_" + if_count) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() # create else label (which may be empty) self.vm_writer.write_label("ELSE_" + if_count) if self.tokenizer.current_token == 'else': # we are at <keyword> else <keyword>, advance self.tokenizer.advance() # we are at <symbol> { <symbol>, advance self.tokenizer.advance() self.compile_statements() # we are at <symbol> } <symbol>, advance self.tokenizer.advance() # create new label self.vm_writer.write_label("END_IF_" + if_count) return def compile_expression(self): """ the method compiles an expression :return: """ # compile the term self.compile_term() while self.tokenizer.current_token in OP_LST: call_math = False # we are at <symbol> op <symbol> op = OP_DICT.get(self.tokenizer.current_token) # check if operator needs to call math if self.tokenizer.current_token == '*' or self.tokenizer.current_token == '/': call_math = True # advance to next term and compile term self.tokenizer.advance() self.compile_term() # output the operator if call_math: self.vm_writer.write_call(op[0], op[1]) else: self.vm_writer.write_arithmetic(op) return def compile_term(self): """ the method compiles a term :return: none """ token_type = self.tokenizer.token_type() if token_type == INT_CONST: # push the const int self.vm_writer.write_push("CONST", self.tokenizer.int_val()) self.tokenizer.advance() elif token_type == STRING_CONST: # write without the "" string_val = self.tokenizer.string_val() # push the len of the string and call the string constructor self.vm_writer.write_push("CONST", len(string_val)) self.vm_writer.write_call("String.new", 1) # update new string for char in string_val: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.tokenizer.advance() elif self.tokenizer.current_token in KEY_WORD_CONST: segment, idx = KEY_WORD_CONST.get(self.tokenizer.current_token) self.vm_writer.write_push(segment, idx) if self.tokenizer.current_token == 'true': self.vm_writer.write_arithmetic('NOT') self.tokenizer.advance() elif self.tokenizer.current_token == '(': # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() elif self.tokenizer.current_token in UNARY_OP: op_command = UNARY_OP.get(self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(op_command) # var/var[expression]/subroutine_call else: # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() self.tokenizer.advance() # if is var_name[expression] if self.tokenizer.current_token == '[': var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) # push arr self.vm_writer.write_push(var_kind, var_index) # we are at <symbol> [ <symbol>, advance to expression and compile it self.tokenizer.advance() self.compile_expression() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # set the that pointer self.vm_writer.write_pop("POINTER", 1) # push to the stack what is in the arr[i] self.vm_writer.write_push("THAT", 0) # we are at <symbol> ] <symbol>, advance self.tokenizer.advance() # if is a subroutine call elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.': self.compile_subroutine_call(var_name) else: # if is just 'var' var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) return def compile_expression_list(self): """ the method compiles a list of expressions :return: amount of arguments in the expression list """ expression_counter = 0 # check that list is not empty if self.tokenizer.current_token != ')': expression_counter += 1 # compile first expression self.compile_expression() # if there are more expressions, compile them while self.tokenizer.current_token == ',': expression_counter += 1 # we are at <symbol> , <symbol>, advance self.tokenizer.advance() # compile expression self.compile_expression() return expression_counter def compile_subroutine_call(self, identifier): """ the method compiles a subroutine call (not including the subroutine first varName :return: none """ func_name = self.class_name + "." + identifier num_of_arguments = 0 if self.tokenizer.current_token == '.': # change func name to its class name if self.symbol_table.type_of(identifier) is not None: func_name = self.symbol_table.type_of(identifier) # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = func_name + "." + self.tokenizer.identifier() self.tokenizer.advance() # push the object to the stack segment = TYPE_DICT.get(self.symbol_table.kind_of(identifier)) idx = self.symbol_table.index_of(identifier) self.vm_writer.write_push(segment, idx) num_of_arguments += 1 else: # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = identifier + "." + self.tokenizer.identifier() self.tokenizer.advance() else: self.vm_writer.write_push("POINTER", 0) num_of_arguments += 1 # we are at <symbol> ( <symbol>, advance self.tokenizer.advance() num_of_arguments += self.compile_expression_list() # we are at <symbol> ) <symbol>, advance self.tokenizer.advance() self.vm_writer.write_call(func_name, num_of_arguments) return
import sys from JackTokenizer import * from CompilationEngine import * if __name__ == "__main__": splitter = '\\' root = sys.argv[1] print('[ARGUMENT]:', root) sourceFile = open(root, 'r') opFile = open('Output.xml', 'w') tokenizer = JackTokenizer(sourceFile) compiler = CompilationEngine(tokenizer, opFile) while not tokenizer.endOfFile(): tokenizer.advance() tokenizer.end() # tokenizer.feed() print() if not tokenizer.feedFinish: compiler.compile()
class CompilationEngine: def __init__(self, input_path, output_path): """ creates a new compilation engine with the given input and output. the next routine called must be compileClass() :param input_path: input stream/file :param output_path: output stream/file """ self.labels = 0 self.jack_class = None self.class_subroutine = None self.tokenizer = JackTokenizer(input_path) self._writer = VMWriter(output_path) self.CompileClass() def CompileClass(self): """ Compiles a complete class. """ self.tokenizer.advance() self.tokenizer.advance() self.jack_class = JackClass(self.tokenizer.current_token) self.tokenizer.advance() self.CompileClassVarDec() self.CompileSubroutine() self.tokenizer.advance() def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ peek = self.tokenizer.peek() while 'static' in peek or 'field' in peek: self.tokenizer.advance() kind = self.tokenizer.keyWord() # field/static self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.tokenizer.advance() self.jack_class.add_var(name, type, kind) while self.tokenizer.symbol() == ',': self.tokenizer.advance() name = self.tokenizer.identifier() self.tokenizer.advance() self.jack_class.add_var(name, type, kind) peek = self.tokenizer.peek() def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ peek = self.tokenizer.peek() while 'function' in peek or 'constructor' in peek or 'method' in peek: self.tokenizer.advance() kind = self.tokenizer.keyWord() # const/func/method self.tokenizer.advance() type = self.tokenizer.current_token # void/type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.tokenizer.advance() self.class_subroutine = JackSubroutine(name, kind, type, self.jack_class) self.CompileParameterList() self.tokenizer.advance() self.tokenizer.advance() peek = self.tokenizer.peek() if 'var' in peek: self.CompileVarDec() full_name = '{}.{}'.format(self.jack_class.class_name, self.class_subroutine.name) self._writer.write_function(full_name, self.class_subroutine.var_c) if kind == 'constructor': fields = self.jack_class.counters[0] self._writer.push('constant', str(fields)) self._writer.write_call('Memory.alloc', '1') self._writer.pop('pointer', '0') elif kind == 'method': self._writer.push('argument', '0') self._writer.pop('pointer', '0') self.CompileStatements() self.tokenizer.advance() peek = self.tokenizer.peek() def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing () """ peek = self.tokenizer.peek() if peek != ')': self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name peek = self.tokenizer.peek() self.class_subroutine.add_arg(name, type) while peek == ',': self.tokenizer.advance() self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.class_subroutine.add_arg(name, type) peek = self.tokenizer.peek() def CompileVarDec(self): """ Compiles a var declaration. """ peek = self.tokenizer.peek() while 'var' in peek: self.tokenizer.advance() self.tokenizer.advance() type = self.tokenizer.keyWord() self.tokenizer.advance() name = self.tokenizer.identifier() self.class_subroutine.add_var(name, type) self.tokenizer.advance() while self.tokenizer.symbol() == ',': self.tokenizer.advance() name = self.tokenizer.identifier() # name self.class_subroutine.add_var(name, type) self.tokenizer.advance() peek = self.tokenizer.peek() def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}" """ peek = self.tokenizer.peek() while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek: if 'let' in peek: self.CompileLet() elif 'if' in peek: self.CompileIf() elif 'while' in peek: self.CompileWhile() elif 'do' in peek: self.CompileDo() elif 'return' in peek: self.CompileReturn() peek = self.tokenizer.peek() def CompileDo(self): """ Compiles a do statement. """ self.tokenizer.advance() # do self.tokenizer.advance() # do self.CompileTerm() self._writer.pop('temp', '0') self.tokenizer.advance() # ; if self.tokenizer.current_token != ';': self.tokenizer.advance() def CompileLet(self): """ Compiles a let statement. """ self.tokenizer.advance() # let self.tokenizer.advance() name = self.tokenizer.identifier() symbol = self.class_subroutine.get_symbol(name) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() # [ self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() # ] self.tokenizer.advance() # = self._writer.push(symbol) self._writer.write_cmd('add') self.tokenizer.advance() self.CompileExpression() self._writer.pop('temp', '0') self._writer.pop('pointer', '1') self._writer.push('temp', '0') self._writer.pop('that', '0') else: self.tokenizer.advance() # = self.tokenizer.advance() self.CompileExpression() self._writer.pop(symbol) self.tokenizer.advance() # ; def CompileWhile(self): """ Compiles a while statement. """ label_c = self.labels self.tokenizer.advance() # while self.tokenizer.advance() # ( self.tokenizer.advance() self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c)) self.CompileExpression() self.tokenizer.advance() # ) self.tokenizer.advance() # { self._writer.write_if(LABEL_FORMAT.format('WHILE_END', label_c)) self.CompileStatements() self._writer.write_goto(LABEL_FORMAT.format('WHILE_END', label_c)) self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c)) self.tokenizer.advance() # } def CompileReturn(self): """ Compiles a return statement. """ self.tokenizer.advance() # return peek = self.tokenizer.peek() if peek != ';': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: self._writer.push('constant', '0') self.tokenizer.advance() self._writer.write_return() def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ label_c = self.labels self.tokenizer.advance() # if self.tokenizer.advance() self.tokenizer.advance() # ( self.CompileExpression() self.tokenizer.advance() # ) self.tokenizer.advance() # { self._writer.write_if(LABEL_FORMAT.format('IF_TRUE', label_c)) self._writer.write_goto(LABEL_FORMAT.format('IF_FALSE', label_c)) self._writer.write_label(LABEL_FORMAT.format('IF_TRUE', label_c)) self.labels += 1 self.CompileStatements() self.tokenizer.advance() # } peek = self.tokenizer.peek() if peek == 'else': self._writer.write_goto(LABEL_FORMAT.format('IF_END', label_c)) self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c)) self.tokenizer.advance() # else self.tokenizer.advance() # { self.CompileStatements() self.tokenizer.advance() # } self._writer.write_label(LABEL_FORMAT.format('IF_END', label_c)) else: self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c)) def CompileExpression(self): """ Compiles an expression. """ self.CompileTerm() peek = self.tokenizer.peek() while peek in OPS: self.tokenizer.advance() op = self.tokenizer.symbol() self.tokenizer.advance() self.CompileTerm() self._writer.write_cmd(OP_DIC[op]) peek = self.tokenizer.peek() def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ if self.tokenizer.current_token in UNARY_OP: self._writer.write_cmd(UNARY_DIC[self.tokenizer.current_token]) self.tokenizer.advance() self.CompileTerm() elif self.tokenizer.current_token == '(': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() # ) elif self.tokenizer.tokenType() == 'INT_CONST': self._writer.push('constant', self.tokenizer.current_token) elif self.tokenizer.tokenType() == 'STRING_CONST': str = self.tokenizer.stringVal() self._writer.push('constant', len(str)) self._writer.write_call('String.new', '1') for char in str: self._writer.push('constant', ord(char)) self._writer.write_call('String.appendChar', '2') elif self.tokenizer.tokenType() == 'KEYWORD': if self.tokenizer.current_token == 'this': self._writer.push('pointer', '0') else: self._writer.push('constant', '0') if self.tokenizer.current_token == 'true': self._writer.write('not') elif self.tokenizer.tokenType() == 'IDENTIFIER': value = self.tokenizer.identifier() var = self.class_subroutine.get_symbol(value) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() self.tokenizer.advance() # [ self.CompileExpression() self._writer.push(var) self._writer.write_cmd('add') self._writer.pop('pointer', '1') self._writer.push('that', '0') self.tokenizer.advance() # ] else: function_name = value functions_class = self.class_subroutine.jack_class is_default = True args = 0 if peek == '.': is_default = False self.tokenizer.advance() self.tokenizer.advance() function_object = self.class_subroutine.get_symbol( function_name) function_name = self.tokenizer.current_token if function_object: functions_class = var.type args = 1 self._writer.push(var) else: functions_class = value peek = self.tokenizer.peek() if peek == '(': if is_default: args = 1 self._writer.push('pointer', '0') self.tokenizer.advance() # ( args += self.CompileExpressionList() if type(functions_class) != type(''): functions_class = functions_class.class_name full_name = '{}.{}'.format(functions_class, function_name) self._writer.write_call(full_name, args) if self.tokenizer.current_token != ')': self.tokenizer.advance() # ')' elif var: self._writer.push(var) def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ expressions_counter = 0 peek = self.tokenizer.peek() while peek != ')' and peek != ';': self.tokenizer.advance() expressions_counter += 1 if self.tokenizer.current_token == ',': self.tokenizer.advance() self.CompileExpression() peek = self.tokenizer.peek() return expressions_counter