class CompilationEngine: def __init__(self, input_path, output_path): """ creates a new compilation engine with the given input and output. the next routine called must be compileClass() :param input_path: input stream/file :param output_path: output stream/file """ self._root = None self._current_node = None self.tokenizer = JackTokenizer(input_path) self.CompileClass() for elem in self._root.iter(): if elem.tag == 'expressionList' or elem.tag == 'parameterList': if "/>" in str(ET.tostring(elem)): elem.text = '\n' p = ET.XMLParser(remove_blank_text=True) tree = ET.ElementTree(self._root, parser=p) tree.write(output_path, method='xml', pretty_print=True) def CompileClass(self): """ Compiles a complete class. """ self._root = ET.Element('class') self.tokenizer.advance() self._write_line(self._root, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(self._root, self.tokenizer.identifier()) self.tokenizer.advance() self._write_line(self._root, self.tokenizer.symbol()) self.CompileClassVarDec() self.CompileSubroutine() self.tokenizer.advance() self._write_line(self._root, self.tokenizer.symbol()) def _write_line(self, node, name): """ writes the current node to the output file :param name: the name of the node """ _ = ET.SubElement(node, TYPES[self.tokenizer.tokenType()]) _.text = ' ' + name + ' ' def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ peek = self.tokenizer.peek() if 'static' in peek or 'field' in peek: _classVarNode = ET.SubElement(self._root, 'classVarDec') while 'static' in peek or 'field' in peek: self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.keyWord()) # field/static self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.identifier()) # name self.tokenizer.advance() while self.tokenizer.symbol() == ',': self._write_line(_classVarNode, self.tokenizer.symbol()) # , self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.symbol()) # ; peek = self.tokenizer.peek() if 'static' in peek or 'field' in peek: _classVarNode = ET.SubElement(self._root, 'classVarDec') def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ _last_node = self._current_node _subroutineNode = ET.SubElement(self._root, 'subroutineDec') self._current_node = _subroutineNode peek = self.tokenizer.peek() while 'function' in peek or 'constructor' in peek or 'method' in peek: self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.keyWord()) # const/func/method self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.current_token) # void/type self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.symbol()) # '(' self.CompileParameterList() self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._current_node = ET.SubElement(_subroutineNode, 'subroutineBody') self._write_line(self._current_node, self.tokenizer.symbol()) # '{' peek = self.tokenizer.peek() if 'var' in peek: self.CompileVarDec() self.CompileStatements() self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) # '}' peek = self.tokenizer.peek() if 'function' in peek or 'constructor' in peek or 'method' in peek: _subroutineNode = ET.SubElement(self._root, 'subroutineDec') self._current_node = _subroutineNode def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing () """ param_list = ET.SubElement(self._current_node, 'parameterList') peek = self.tokenizer.peek() if peek != ')': self.tokenizer.advance() self._write_line(param_list, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(param_list, self.tokenizer.identifier()) # name peek = self.tokenizer.peek() while peek == ',': self.tokenizer.advance() self._write_line(param_list, self.tokenizer.symbol()) # ',' self.tokenizer.advance() self._write_line(param_list, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(param_list, self.tokenizer.identifier()) # name peek = self.tokenizer.peek() # if not param_list.text: # param_list.text = '\n' def CompileVarDec(self): """ Compiles a var declaration. """ _varDecNode = ET.SubElement(self._current_node, 'varDec') peek = self.tokenizer.peek() while 'var' in peek: self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.identifier()) self.tokenizer.advance() while self.tokenizer.symbol() == ',': self._write_line(_varDecNode, self.tokenizer.symbol()) # , self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.symbol()) # ; peek = self.tokenizer.peek() if peek == 'var': _varDecNode = ET.SubElement(self._current_node, 'varDec') def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}" """ peek = self.tokenizer.peek() _parent = self._current_node self._current_node = ET.SubElement(self._current_node, 'statements') while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek: if 'let' in peek: self.CompileLet() elif 'if' in peek: self.CompileIf() elif 'while' in peek: self.CompileWhile() elif 'do' in peek: self.CompileDo() elif 'return' in peek: self.CompileReturn() peek = self.tokenizer.peek() self._current_node = _parent def CompileDo(self): """ Compiles a do statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'doStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() while peek == '.': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.CompileExpressionList() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileLet(self): """ Compiles a let statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'letStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '[' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ']' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '=' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileWhile(self): """ Compiles a while statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'whileStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # while self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' self._current_node = _last_node def CompileReturn(self): """ Compiles a return statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'returnStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # return peek = self.tokenizer.peek() if peek != ';': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'ifStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # if self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' peek = self.tokenizer.peek() if peek == 'else': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # else self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' self._current_node = _last_node def CompileExpression(self): """ Compiles an expression. """ _last_node = self._current_node self._current_node = ET.SubElement(self._current_node, 'expression') self.CompileTerm() peek = self.tokenizer.peek() while peek in OPS: self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) self.tokenizer.advance() self.CompileTerm() peek = self.tokenizer.peek() self._current_node = _last_node def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ term_branch = ET.SubElement(self._current_node, 'term') # self.tokenizer.advance() if self.tokenizer.tokenType( ) == 'INT_CONST' or self.tokenizer.tokenType() == 'KEYWORD': self._write_line(term_branch, self.tokenizer.current_token) elif self.tokenizer.tokenType() == 'STRING_CONST': self._write_line(term_branch, self.tokenizer.stringVal()) elif self.tokenizer.current_token in UNARY_OP: self._write_line(term_branch, self.tokenizer.symbol()) last_node = self._current_node self._current_node = term_branch self.tokenizer.advance() self.CompileTerm() self._current_node = last_node elif self.tokenizer.current_token in SYMBOLS: self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() last_node = self._current_node self._current_node = term_branch self.CompileExpression() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) else: self._write_line(term_branch, self.tokenizer.identifier()) peek = self.tokenizer.peek() if '[' in peek or '(' in peek: self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() last_node = self._current_node self._current_node = term_branch self.CompileExpression() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) elif '.' in peek: self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.identifier()) self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) last_node = self._current_node self._current_node = term_branch self.CompileExpressionList() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ last_node = self._current_node self._current_node = ET.SubElement(self._current_node, 'expressionList') peek = self.tokenizer.peek() while peek != ')': self.tokenizer.advance() if peek == ',': self._write_line(self._current_node, self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() peek = self.tokenizer.peek() self._current_node = last_node
class CompilationEngine: ############### # CONSTRUCTOR # ############### def __init__(self, in_filename, in_file, out_xml, out_vm): """ Creates a new compilation engine with the given input and output. The next routine called must be compileClass(). :param in_file: Open source Jack file. :param out_xml: Open XML file. :param out_vm: Open VM file. """ self.__in_filename = in_filename self.__in_file, self.__out_xml = in_file, out_xml self.__tokenizer = JackTokenizer(in_file) self.__symbolTable = SymbolTable() self.__vmWriter = VMWriter(in_filename, out_vm) self.__stack = list() self.__tokenizer.advance() self.__resetUniqueLabels() ################### # PRIVATE METHODS # ################### def __resetUniqueLabels(self): self.__unique_id_if = 0 self.__unique_id_while = 0 def __uniqueWhileLabels(self): """ Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to prevent collisions with other labels carrying the same name. Example: while_exp, while_end = __uniqueWhileLabels() --> while_exp = "WHILE_EXP123" while_end = "WHILE_END123" """ unique_labels = [] for label in [WHILE_EXP, WHILE_END]: unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER, self.__unique_id_while)) self.__unique_id_while += 1 return unique_labels def __uniqueIfLabels(self): """ Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to prevent collisions with other labels carrying the same name. Example: if_true, if_false, if_end = __uniqueIfLabels() --> if_true = "IF_TRUE123" if_false = "IF_FALSE123" if_end = "IF_END123" """ unique_labels = [] for label in [IF_TRUE, IF_FALSE, IF_END]: unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER, self.__unique_id_if)) self.__unique_id_if += 1 return unique_labels def __writeToken(self, token, token_type): """ Writes the given token as an xml tag to the output. :param token: :param token_type: :return: """ tag = self.__getIndentedTag("<{0}>{1}{2}{1}</{0}>\n".format( token_type, XML_DELIM_TERMINAL, token)) self.__out_xml.write(tag) def __writeTokenAndAdvance(self, token, token_type): """ Writes the given token as an xml tag to the output and extracts the next token from the code. :param token: token tag value :param token_type: token tag type """ # Build XML tag self.__writeToken(token, token_type) self.__tokenizer.advance() def __getIndentedTag(self, tag): """ Return the given tag with trailing tabs according to current indentation level. :param tag: tag to indent :return: tag indented with trailing tabs. """ return XML_INDENT_CHAR * len(self.__stack) + tag def __openTag(self, tagName): """ Open an XML tag with the given name. All following tags will be written as inner tags until __closeTag() is called. :param tagName: name of the tag to open """ tag = self.__getIndentedTag("<{}>\n".format(tagName)) self.__out_xml.write(tag) self.__stack.append(tagName) def __closeTag(self): """ Close the current open XML tag. All following tags will be written as outer tags in the previous indentation level. """ tagName = self.__stack.pop() tag = self.__getIndentedTag("</{}>\n".format(tagName)) self.__out_xml.write(tag) def __compileKeyWord(self): """ Compile a keyword token """ keyword = self.__tokenizer.keyWord() self.__writeTokenAndAdvance(keyword, TOKEN_TYPE_KEYWORD) return keyword def __compileSymbol(self): """ Compile a symbol token """ symbol = self.__tokenizer.symbol() self.__writeTokenAndAdvance(symbol, TOKEN_TYPE_SYMBOL) return symbol def __compileIdentifier(self, category, status, kind=KIND_NONE, index=INDEX_NONE): """ Compile an identifier token """ info = "{} {}".format(category, status) if kind != KIND_NONE: info += " " + KIND_2_SEGMENT[kind] if index != INDEX_NONE: info += " " + str(index) info = "[{}] ".format(info) identifier = self.__tokenizer.identifier() self.__writeTokenAndAdvance(info + identifier, TOKEN_TYPE_IDENTIFIER) return identifier def __compileIntVal(self): """ Compile an intVal token """ intval = self.__tokenizer.intVal() self.__writeTokenAndAdvance(intval, TOKEN_TYPE_INTEGER) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, intval) return intval def __compileStringVal(self): """ Compile a stringVal token """ string = self.__tokenizer.stringVal() self.__writeTokenAndAdvance(string, TOKEN_TYPE_STRING) corrected = self.__correctString(string) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, len(corrected)) self.__vmWriter.writeCall(OS_STRING_NEW, 1) for char in corrected: self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, ord(char)) self.__vmWriter.writeCall(OS_STRING_APPEND_CHAR, 2) def __compileClassName(self, status): """ Compiles a variable name. """ return self.__compileIdentifier(CATEGORY_CLASS, status) def __compileSubroutineName(self, status): """ Compiles a variable name. """ return self.__compileIdentifier(CATEGORY_SUBROUTINE, status) def __compileSubroutineCall(self): """ Compiles a subroutine call. Syntax: ( className | varName) '.' subroutineName '(' expressionList ')' | subroutineName '(' expressionList ')' """ # Compile XML callName = "" exp_count = 0 if self.__tokenizer.lookahead() == RE_DOT: # className | varName # extract var\class name callName = self.__tokenizer.peek() # className or varName? kind = self.__symbolTable.kindOf(callName) if (kind != KIND_NONE): # varName # Use class name instead of object name varName = callName callName = self.__symbolTable.typeOf(callName) # Push variable (this) and call class method index = self.__symbolTable.indexOf(varName) segment = self.__symbolTable.segmentOf(varName) self.__vmWriter.writePush(segment, index) # Include self as argument 0 exp_count += 1 self.__compileIdentifier(kind, STATUS_USE, kind, index) else: # className self.__compileIdentifier(CATEGORY_CLASS, STATUS_USE) callName += self.__compileSymbol() # '.' else: # subroutineName # Subroutine -> className.Subroutine self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0) callName += self.__className + FUNC_NAME_DELIMITER exp_count += 1 callName += self.__compileSubroutineName(STATUS_USE) self.__compileSymbol() # '(' exp_count += self.CompileExpressionList() # expressionList self.__compileSymbol() # ')' # Compile VM self.__vmWriter.writeCall(callName, exp_count) def __compileVarName(self, status): """ Compiles a variable name. """ name = self.__tokenizer.peek() index = INDEX_NONE if status != STATUS_DEFINE: index = self.__symbolTable.indexOf(name) varName = self.__compileIdentifier(CATEGORY_VAR, status, KIND_VAR, index) return varName def __compileType(self): """ Compiles a type. Syntax: 'int' | 'char' | 'boolean' | className """ # 'int' | 'char' | 'boolean' if self.__tokenizer.peek() in {RE_INT, RE_CHAR, RE_BOOLEAN}: type = self.__compileKeyWord() # className else: type = self.__compileClassName(STATUS_USE) return type def __compileSubroutineBody(self, funcType, name): """ Compiles a subroutine body. Syntax: '{' varDec* statements '}' """ self.__openTag('subroutineBody') # <subroutineBody> self.__compileSymbol() # '{' # varDec* while self.__tokenizer.peek() == RE_VAR: self.compileVarDec() # varDec* vars = self.__symbolTable.varCount(KIND_VAR) self.__vmWriter.writeFunction(name, vars) if funcType == RE_METHOD: # Hold self at pointer self.__vmWriter.writePush(VM_SEGMENT_ARGUMENT, 0) self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0) if funcType == RE_CONSTRUCTOR: # Allocate memory for all fields fields = self.__symbolTable.varCount(KIND_FIELD) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, fields) self.__vmWriter.writeCall(OS_MEMORY_ALLOC, 1) # Hold allocated memory at pointer self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0) self.compileStatements() # statements self.__compileSymbol() # '}' self.__closeTag() # </subroutineBody> return vars ################## # PUBLIC METHODS # ################## def compileClass(self): """ Compiles a complete class. Syntax: 'class' className '{' classVarDec* subroutineDec* '}' """ self.__openTag('class') # <class> self.__compileKeyWord() # 'class' className = self.__compileClassName( # className STATUS_DEFINE) self.__className = className self.__compileSymbol() # '{' # classVarDec* while self.__tokenizer.peek() in {RE_STATIC, RE_FIELD}: self.CompileClassVarDec() # subroutineDec* while self.__tokenizer.peek() in { RE_CONSTRUCTOR, RE_FUNCTION, RE_METHOD }: self.CompileSubroutine() self.__compileSymbol() # '}' self.__closeTag() # </class> def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. Syntax: ('static' | 'field') type varName (',' varName)* ';' """ self.__openTag('classVarDec') # <classVarDec> kind = self.__compileKeyWord() # ('static' | 'field') type = self.__compileType() # type moreVars = True while moreVars: # (',' varName)* name = self.__compileVarName( # varName STATUS_DEFINE) self.__symbolTable.define(name, type, kind) if self.__tokenizer.peek() != RE_COMMA: moreVars = False else: self.__compileSymbol() # ',' self.__compileSymbol() # ';' self.__closeTag() # </classVarDec> def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. Syntax: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody """ # Start subroutine in symbol table self.__resetUniqueLabels() self.__symbolTable.startSubroutine() # Compile XML self.__openTag('subroutineDec') # <subroutineDec> funcType = self.__compileKeyWord() # ('constructor' | # 'function' | 'method') if funcType in {RE_METHOD}: # +1 var count for this method (+1 for self) self.__symbolTable.define(VM_SELF, self.__className, KIND_ARG) if self.__tokenizer.peek() == RE_VOID: type = self.__compileKeyWord() # 'void' else: type = self.__compileType() # type subName = self.__compileSubroutineName( # soubroutineName STATUS_DEFINE) name = self.__className + FUNC_NAME_DELIMITER + subName self.__compileSymbol() # '(' self.compileParameterList() # parameterList self.__compileSymbol() # ')' self.__compileSubroutineBody(funcType, name) # subroutineBody self.__closeTag() # </subroutineDec> def compileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing "()". Syntax: ( (type varName) (',' type varName)*)? """ parameters = 0 # no parameters? self.__openTag('parameterList') # <parameterList> if self.__tokenizer.peek() != RE_BRACKETS_RIGHT: moreVars = True while moreVars: parameters += 1 # yes parameters! type = self.__compileType() # type name = self.__compileVarName( # varName STATUS_DEFINE) self.__symbolTable.define(name, type, KIND_ARG) if self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' else: moreVars = False self.__closeTag() # </parametersList> return parameters def compileVarDec(self): """ Compiles a var declaration. Syntax: 'var' type varName (',' varName)* ';' """ self.__openTag('varDec') # <varDec> moreVars = True self.__compileKeyWord() # 'var' type = self.__compileType() # type while moreVars: name = self.__tokenizer.peek() # varName self.__symbolTable.define(name, type, KIND_VAR) self.__compileVarName(STATUS_DEFINE) if self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' else: moreVars = False self.__compileSymbol() # ';' self.__closeTag() # </varDec> def compileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}". Syntax: statement* where statement is in: letStatement | ifStatement | whileStatement | doStatement | returnStatement """ self.__openTag('statements') # <statements> statement = self.__tokenizer.peek() while statement in { RE_LET, RE_IF, RE_WHILE, RE_DO, RE_RETURN_NOTHING, RE_RETURN_SOMETHING }: if statement == RE_LET: self.compileLet() elif statement == RE_IF: self.compileIf() elif statement == RE_WHILE: self.compileWhile() elif statement == RE_DO: self.compileDo() elif statement == RE_RETURN_NOTHING: self.compileReturnNothing() elif statement == RE_RETURN_SOMETHING: self.compileReturnSomething() statement = self.__tokenizer.peek() self.__closeTag() # </statements> def compileDo(self): """ Compiles a do statement. Syntax: 'do' subroutineCall ';' """ self.__openTag('doStatement') # <doStatement> self.__compileKeyWord() # 'do' self.__compileSubroutineCall() # subroutineCall self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0) self.__compileSymbol() # ';' self.__closeTag() # </doStatement> def compileLet(self): """ Compiles a let statement. Syntax: 'let' varName ('[' expression ']')? '=' expression ';' """ isArray = False self.__openTag('letStatement') # <letStatement> self.__compileKeyWord() # 'let' varName = self.__tokenizer.peek() index = self.__symbolTable.indexOf(varName) segment = self.__symbolTable.segmentOf(varName) self.__compileVarName(STATUS_USE) # varName if self.__tokenizer.peek() == RE_BRACKETS_SQUARE_LEFT: isArray = True self.__compileSymbol() # '[' self.CompileExpression() # expression self.__compileSymbol() # ']' # Add the offset to the variable address self.__vmWriter.writePush(segment, index) self.__vmWriter.writeArithmetic(RE_PLUS, True) # Address of array element is at stack top self.__compileSymbol() # '=' self.CompileExpression() # expression self.__compileSymbol() # ';' self.__closeTag() # </letStatement> if isArray: # Pop rh-expression to temp self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0) # Get address of array element self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1) # Push rh-expression to stack self.__vmWriter.writePush(VM_SEGMENT_TEMP, 0) # Pop rh-expression to address of element self.__vmWriter.writePop(VM_SEGMENT_THAT, 0) else: # Compile only if the varName was defined # (unlike class name of subroutine name) # if segment != KIND_NONE: # varName was defined index = self.__symbolTable.indexOf(varName) self.__vmWriter.writePop(segment, index) def compileWhile(self): """ Compiles a while statement. Syntax: 'while' '(' expression ')' '{' statements '}' """ LABEL_EXP, LABEL_END = self.__uniqueWhileLabels() self.__openTag('whileStatement') # <whileStatement> self.__compileKeyWord() # 'while' self.__compileSymbol() # '(' self.__vmWriter.writeLabel( # label WHILE_EXP LABEL_EXP) self.CompileExpression() # expression # Negate the expression # (jump out of while if *NOT* expression) self.__vmWriter.writeArithmetic(RE_TILDA, False) self.__compileSymbol() # ')' self.__vmWriter.writeIf(LABEL_END) # if-goto WHILE_END self.__compileSymbol() # '{' self.compileStatements() # statements self.__compileSymbol() # '}' self.__vmWriter.writeGoto(LABEL_EXP) # goto WHILE_EXP self.__vmWriter.writeLabel(LABEL_END) # lable WHILE_END self.__closeTag() # </whileStatement> def compileReturnNothing(self): """ Compiles a 'return;' statement. Syntax: 'return;' """ # Compile XML self.__openTag('returnStatement') # <returnStatement> self.__writeToken( 'return', # 'return' TOKEN_TYPE_KEYWORD) self.__writeTokenAndAdvance( ';', # ';' TOKEN_TYPE_SYMBOL) self.__vmWriter.writeReturn(True) self.__closeTag() # </returnStatement> def compileReturnSomething(self): """ Compiles a return statement. Syntax: 'return' expression? ';' """ # Compile XML self.__openTag('returnStatement') # <returnStatement> self.__writeTokenAndAdvance( 'return', # 'return' TOKEN_TYPE_KEYWORD) self.CompileExpression() # expression self.__compileSymbol() # ';' self.__vmWriter.writeReturn() self.__closeTag() # </returnStatement> def compileIf(self): """ Compiles an if statement, possibly with a trailing else clause. Syntax: 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? """ LABEL_TRUE, LABEL_FALSE, LABEL_END = self.__uniqueIfLabels() self.__openTag('ifStatement') # <ifStatement> self.__compileKeyWord() # 'if' self.__compileSymbol() # '(' # VM Code for computing ~(cond) self.CompileExpression() # expression self.__compileSymbol() # ')' self.__vmWriter.writeIf(LABEL_TRUE) # if-goto LABEL_TRUE self.__vmWriter.writeGoto(LABEL_FALSE) # goto LABEL_FALSE self.__vmWriter.writeLabel(LABEL_TRUE) # label LABEL_TRUE self.__compileSymbol() # '{' # VM Code for executing TRUE self.compileStatements() # statements self.__compileSymbol() # '}' if self.__tokenizer.peek() == RE_ELSE: # self.__vmWriter.writeGoto(LABEL_END) # goto LABEL_END self.__vmWriter.writeLabel( # label LABEL_FALSE LABEL_FALSE) self.__compileKeyWord() # 'else' self.__compileSymbol() # '{' # VM Code for executing ELSE self.compileStatements() # statements self.__compileSymbol() # '}' self.__vmWriter.writeLabel( # label END LABEL_END) else: self.__vmWriter.writeLabel( # label FALSE LABEL_FALSE) self.__closeTag() # </ifStatement> def CompileExpression(self): """ Compiles an expression. Syntax: term (op term)* """ self.__openTag('expression') # <expression> self.CompileTerm() # term while self.__tokenizer.peek() in { RE_PLUS, RE_BAR, RE_ASTERISK, RE_SLASH, RE_AMPERSAND, RE_VBAR, RE_LT, RE_GT, RE_EQ }: symbol = self.__compileSymbol() # op self.CompileTerm() # term self.__vmWriter.writeSymbol(symbol) self.__closeTag() # </expression> def __correctString(self, string): """ Convert escape characters in a string to valid chars :param string: string to correct :return: corrected strings with escaped characters corrected """ correct = string.replace('\t', '\\t') correct = correct.replace('\n', '\\n') correct = correct.replace('\r', '\\r') return correct def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of "[", "(", or "." suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. Syntax: integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term """ self.__openTag('term') # <term> lookahead = self.__tokenizer.lookahead() if self.__tokenizer.peek() == RE_BRACKETS_LEFT: self.__compileSymbol() # '(' self.CompileExpression() # expression self.__compileSymbol() # ')' elif self.__tokenizer.peek() in {RE_TILDA, RE_BAR}: symbol = self.__compileSymbol() # unaryOp self.CompileTerm() # term self.__vmWriter.writeArithmetic(symbol, False) elif lookahead == RE_BRACKETS_SQUARE_LEFT: varName = self.__tokenizer.peek() self.__compileVarName(STATUS_USE) # varName self.__compileSymbol() # '[' self.CompileExpression() # expression self.__compileSymbol() # ']' # Compile array indexing kind = self.__symbolTable.kindOf(varName) index = self.__symbolTable.indexOf(varName) segment = KIND_2_SEGMENT[kind] self.__vmWriter.writePush(segment, index) self.__vmWriter.writeArithmetic(RE_PLUS, True) self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1) self.__vmWriter.writePush(VM_SEGMENT_THAT, 0) elif lookahead in {RE_BRACKETS_LEFT, RE_DOT}: self.__compileSubroutineCall() # subroutineCall | # (varName | className) '.' subroutineCall else: if self.__tokenizer.tokenType() == TOKEN_TYPE_INTEGER: self.__compileIntVal() # integerConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_STRING: self.__compileStringVal() # stringConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_KEYWORD: # true | false | null | this # true | false | null - pushed to stack as constants keyword = self.__tokenizer.peek() if keyword in {RE_FALSE, RE_NULL, RE_TRUE}: self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, 0) if keyword == RE_TRUE: self.__vmWriter.writeArithmetic(RE_TILDA, False) # this - pushes pointer elif keyword == RE_THIS: self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0) self.__compileKeyWord() # keywordConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_IDENTIFIER: name = self.__tokenizer.peek() kind = self.__symbolTable.kindOf(name) index = self.__symbolTable.indexOf(name) segment = self.__symbolTable.segmentOf(name) self.__compileIdentifier(kind, STATUS_USE, kind, index) self.__vmWriter.writePush(segment, index) self.__closeTag() # </term> def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. Syntax: (expression (',' expression)* )? """ exp_count = 0 self.__openTag('expressionList') # <expressionList> if self.__tokenizer.peek() != RE_BRACKETS_RIGHT: self.CompileExpression() exp_count += 1 # expression while self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' self.CompileExpression() exp_count += 1 self.__closeTag() # </expressionList> return exp_count
class CompilationEngine: def __init__(self, input_path, output_path): """ creates a new compilation engine with the given input and output. the next routine called must be compileClass() :param input_path: input stream/file :param output_path: output stream/file """ self.labels = 0 self.jack_class = None self.class_subroutine = None self.tokenizer = JackTokenizer(input_path) self._writer = VMWriter(output_path) self.CompileClass() def CompileClass(self): """ Compiles a complete class. """ self.tokenizer.advance() self.tokenizer.advance() self.jack_class = JackClass(self.tokenizer.current_token) self.tokenizer.advance() self.CompileClassVarDec() self.CompileSubroutine() self.tokenizer.advance() def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ peek = self.tokenizer.peek() while 'static' in peek or 'field' in peek: self.tokenizer.advance() kind = self.tokenizer.keyWord() # field/static self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.tokenizer.advance() self.jack_class.add_var(name, type, kind) while self.tokenizer.symbol() == ',': self.tokenizer.advance() name = self.tokenizer.identifier() self.tokenizer.advance() self.jack_class.add_var(name, type, kind) peek = self.tokenizer.peek() def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ peek = self.tokenizer.peek() while 'function' in peek or 'constructor' in peek or 'method' in peek: self.tokenizer.advance() kind = self.tokenizer.keyWord() # const/func/method self.tokenizer.advance() type = self.tokenizer.current_token # void/type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.tokenizer.advance() self.class_subroutine = JackSubroutine(name, kind, type, self.jack_class) self.CompileParameterList() self.tokenizer.advance() self.tokenizer.advance() peek = self.tokenizer.peek() if 'var' in peek: self.CompileVarDec() full_name = '{}.{}'.format(self.jack_class.class_name, self.class_subroutine.name) self._writer.write_function(full_name, self.class_subroutine.var_c) if kind == 'constructor': fields = self.jack_class.counters[0] self._writer.push('constant', str(fields)) self._writer.write_call('Memory.alloc', '1') self._writer.pop('pointer', '0') elif kind == 'method': self._writer.push('argument', '0') self._writer.pop('pointer', '0') self.CompileStatements() self.tokenizer.advance() peek = self.tokenizer.peek() def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing () """ peek = self.tokenizer.peek() if peek != ')': self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name peek = self.tokenizer.peek() self.class_subroutine.add_arg(name, type) while peek == ',': self.tokenizer.advance() self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.class_subroutine.add_arg(name, type) peek = self.tokenizer.peek() def CompileVarDec(self): """ Compiles a var declaration. """ peek = self.tokenizer.peek() while 'var' in peek: self.tokenizer.advance() self.tokenizer.advance() type = self.tokenizer.keyWord() self.tokenizer.advance() name = self.tokenizer.identifier() self.class_subroutine.add_var(name, type) self.tokenizer.advance() while self.tokenizer.symbol() == ',': self.tokenizer.advance() name = self.tokenizer.identifier() # name self.class_subroutine.add_var(name, type) self.tokenizer.advance() peek = self.tokenizer.peek() def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}" """ peek = self.tokenizer.peek() while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek: if 'let' in peek: self.CompileLet() elif 'if' in peek: self.CompileIf() elif 'while' in peek: self.CompileWhile() elif 'do' in peek: self.CompileDo() elif 'return' in peek: self.CompileReturn() peek = self.tokenizer.peek() def CompileDo(self): """ Compiles a do statement. """ self.tokenizer.advance() # do self.tokenizer.advance() # do self.CompileTerm() self._writer.pop('temp', '0') self.tokenizer.advance() # ; if self.tokenizer.current_token != ';': self.tokenizer.advance() def CompileLet(self): """ Compiles a let statement. """ self.tokenizer.advance() # let self.tokenizer.advance() name = self.tokenizer.identifier() symbol = self.class_subroutine.get_symbol(name) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() # [ self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() # ] self.tokenizer.advance() # = self._writer.push(symbol) self._writer.write_cmd('add') self.tokenizer.advance() self.CompileExpression() self._writer.pop('temp', '0') self._writer.pop('pointer', '1') self._writer.push('temp', '0') self._writer.pop('that', '0') else: self.tokenizer.advance() # = self.tokenizer.advance() self.CompileExpression() self._writer.pop(symbol) self.tokenizer.advance() # ; def CompileWhile(self): """ Compiles a while statement. """ label_c = self.labels self.tokenizer.advance() # while self.tokenizer.advance() # ( self.tokenizer.advance() self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c)) self.CompileExpression() self.tokenizer.advance() # ) self.tokenizer.advance() # { self._writer.write_if(LABEL_FORMAT.format('WHILE_END', label_c)) self.CompileStatements() self._writer.write_goto(LABEL_FORMAT.format('WHILE_END', label_c)) self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c)) self.tokenizer.advance() # } def CompileReturn(self): """ Compiles a return statement. """ self.tokenizer.advance() # return peek = self.tokenizer.peek() if peek != ';': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: self._writer.push('constant', '0') self.tokenizer.advance() self._writer.write_return() def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ label_c = self.labels self.tokenizer.advance() # if self.tokenizer.advance() self.tokenizer.advance() # ( self.CompileExpression() self.tokenizer.advance() # ) self.tokenizer.advance() # { self._writer.write_if(LABEL_FORMAT.format('IF_TRUE', label_c)) self._writer.write_goto(LABEL_FORMAT.format('IF_FALSE', label_c)) self._writer.write_label(LABEL_FORMAT.format('IF_TRUE', label_c)) self.labels += 1 self.CompileStatements() self.tokenizer.advance() # } peek = self.tokenizer.peek() if peek == 'else': self._writer.write_goto(LABEL_FORMAT.format('IF_END', label_c)) self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c)) self.tokenizer.advance() # else self.tokenizer.advance() # { self.CompileStatements() self.tokenizer.advance() # } self._writer.write_label(LABEL_FORMAT.format('IF_END', label_c)) else: self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c)) def CompileExpression(self): """ Compiles an expression. """ self.CompileTerm() peek = self.tokenizer.peek() while peek in OPS: self.tokenizer.advance() op = self.tokenizer.symbol() self.tokenizer.advance() self.CompileTerm() self._writer.write_cmd(OP_DIC[op]) peek = self.tokenizer.peek() def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ if self.tokenizer.current_token in UNARY_OP: self._writer.write_cmd(UNARY_DIC[self.tokenizer.current_token]) self.tokenizer.advance() self.CompileTerm() elif self.tokenizer.current_token == '(': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() # ) elif self.tokenizer.tokenType() == 'INT_CONST': self._writer.push('constant', self.tokenizer.current_token) elif self.tokenizer.tokenType() == 'STRING_CONST': str = self.tokenizer.stringVal() self._writer.push('constant', len(str)) self._writer.write_call('String.new', '1') for char in str: self._writer.push('constant', ord(char)) self._writer.write_call('String.appendChar', '2') elif self.tokenizer.tokenType() == 'KEYWORD': if self.tokenizer.current_token == 'this': self._writer.push('pointer', '0') else: self._writer.push('constant', '0') if self.tokenizer.current_token == 'true': self._writer.write('not') elif self.tokenizer.tokenType() == 'IDENTIFIER': value = self.tokenizer.identifier() var = self.class_subroutine.get_symbol(value) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() self.tokenizer.advance() # [ self.CompileExpression() self._writer.push(var) self._writer.write_cmd('add') self._writer.pop('pointer', '1') self._writer.push('that', '0') self.tokenizer.advance() # ] else: function_name = value functions_class = self.class_subroutine.jack_class is_default = True args = 0 if peek == '.': is_default = False self.tokenizer.advance() self.tokenizer.advance() function_object = self.class_subroutine.get_symbol( function_name) function_name = self.tokenizer.current_token if function_object: functions_class = var.type args = 1 self._writer.push(var) else: functions_class = value peek = self.tokenizer.peek() if peek == '(': if is_default: args = 1 self._writer.push('pointer', '0') self.tokenizer.advance() # ( args += self.CompileExpressionList() if type(functions_class) != type(''): functions_class = functions_class.class_name full_name = '{}.{}'.format(functions_class, function_name) self._writer.write_call(full_name, args) if self.tokenizer.current_token != ')': self.tokenizer.advance() # ')' elif var: self._writer.push(var) def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ expressions_counter = 0 peek = self.tokenizer.peek() while peek != ')' and peek != ';': self.tokenizer.advance() expressions_counter += 1 if self.tokenizer.current_token == ',': self.tokenizer.advance() self.CompileExpression() peek = self.tokenizer.peek() return expressions_counter