def main(): """Drives the Jack-to-VM translation process""" file_name = sys.argv[1] tokenizers = [] output_files = [] abs_path = os.path.abspath(file_name) if '.jack' in file_name and file_name[-5:] == '.jack': tokenizer = JackTokenizer(abs_path) tokenizers.append(tokenizer) output_path = os.path.splitext(abs_path)[0] + '.xml' output_files.append(output_path) else: for walk_obj in os.walk(abs_path): for jack_file in walk_obj[2]: if '.jack' in jack_file and jack_file[-5:] == '.jack': tokenizer = JackTokenizer(abs_path + '/' + jack_file) tokenizers.append(tokenizer) output_path = abs_path + '/' + jack_file[:-5] + '.xml' output_files.append(output_path) for tokenizer in tokenizers: while tokenizer.has_more_tokens(): tokenizer.advance() token_type = tokenizer.token_type() if token_type == 'KEYWORD': keyword = tokenizer.keyword() elif token_type == 'SYMBOL': symbol = tokenizer.symbol() elif token_type == 'IDENTIFIER': identifier = tokenizer.identifier() elif token_type == 'INT_CONST': int_val = tokenizer.int_val() elif token_type == 'STRING_CONST': string_val = tokenizer.string_val()
def test_advance(self): """Tests all parts of the tokenizer using this Jack code: /** Multi-line comment for some class. */ class A{ // Single-line comment let x = -4; do Output.printString("Ring Constants!"); } """ tokenizer = JackTokenizer("test.jack") tokenizer.advance() self.assertEqual(tokenizer.keyword(), CLASS) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'A') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '{') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), LET) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'x') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '=') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '-') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.int_val(), 4) self.assertEqual(tokenizer.token_type(), INT_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.keyword(), DO) self.assertEqual(tokenizer.token_type(), KEYWORD) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'Output') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '.') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.identifier(), 'printString') self.assertEqual(tokenizer.token_type(), IDENTIFIER) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '(') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.string_val(), 'Ring Constants!') self.assertEqual(tokenizer.token_type(), STRING_CONST) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ')') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), ';') self.assertEqual(tokenizer.token_type(), SYMBOL) tokenizer.advance() self.assertEqual(tokenizer.symbol(), '}') self.assertEqual(tokenizer.token_type(), SYMBOL)
class CompilationEngine: ############### # CONSTRUCTOR # ############### def __init__(self, in_filename, in_file, out_xml, out_vm): """ Creates a new compilation engine with the given input and output. The next routine called must be compileClass(). :param in_file: Open source Jack file. :param out_xml: Open XML file. :param out_vm: Open VM file. """ self.__in_filename = in_filename self.__in_file, self.__out_xml = in_file, out_xml self.__tokenizer = JackTokenizer(in_file) self.__symbolTable = SymbolTable() self.__vmWriter = VMWriter(in_filename, out_vm) self.__stack = list() self.__tokenizer.advance() self.__resetUniqueLabels() ################### # PRIVATE METHODS # ################### def __resetUniqueLabels(self): self.__unique_id_if = 0 self.__unique_id_while = 0 def __uniqueWhileLabels(self): """ Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to prevent collisions with other labels carrying the same name. Example: while_exp, while_end = __uniqueWhileLabels() --> while_exp = "WHILE_EXP123" while_end = "WHILE_END123" """ unique_labels = [] for label in [WHILE_EXP, WHILE_END]: unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER, self.__unique_id_while)) self.__unique_id_while += 1 return unique_labels def __uniqueIfLabels(self): """ Return (IF_TRUE, IF_FALSE, IF_END) labels carrying a unique id to prevent collisions with other labels carrying the same name. Example: if_true, if_false, if_end = __uniqueIfLabels() --> if_true = "IF_TRUE123" if_false = "IF_FALSE123" if_end = "IF_END123" """ unique_labels = [] for label in [IF_TRUE, IF_FALSE, IF_END]: unique_labels.append("{}{}{}".format(label, UNIQUE_DELIMITER, self.__unique_id_if)) self.__unique_id_if += 1 return unique_labels def __writeToken(self, token, token_type): """ Writes the given token as an xml tag to the output. :param token: :param token_type: :return: """ tag = self.__getIndentedTag("<{0}>{1}{2}{1}</{0}>\n".format( token_type, XML_DELIM_TERMINAL, token)) self.__out_xml.write(tag) def __writeTokenAndAdvance(self, token, token_type): """ Writes the given token as an xml tag to the output and extracts the next token from the code. :param token: token tag value :param token_type: token tag type """ # Build XML tag self.__writeToken(token, token_type) self.__tokenizer.advance() def __getIndentedTag(self, tag): """ Return the given tag with trailing tabs according to current indentation level. :param tag: tag to indent :return: tag indented with trailing tabs. """ return XML_INDENT_CHAR * len(self.__stack) + tag def __openTag(self, tagName): """ Open an XML tag with the given name. All following tags will be written as inner tags until __closeTag() is called. :param tagName: name of the tag to open """ tag = self.__getIndentedTag("<{}>\n".format(tagName)) self.__out_xml.write(tag) self.__stack.append(tagName) def __closeTag(self): """ Close the current open XML tag. All following tags will be written as outer tags in the previous indentation level. """ tagName = self.__stack.pop() tag = self.__getIndentedTag("</{}>\n".format(tagName)) self.__out_xml.write(tag) def __compileKeyWord(self): """ Compile a keyword token """ keyword = self.__tokenizer.keyWord() self.__writeTokenAndAdvance(keyword, TOKEN_TYPE_KEYWORD) return keyword def __compileSymbol(self): """ Compile a symbol token """ symbol = self.__tokenizer.symbol() self.__writeTokenAndAdvance(symbol, TOKEN_TYPE_SYMBOL) return symbol def __compileIdentifier(self, category, status, kind=KIND_NONE, index=INDEX_NONE): """ Compile an identifier token """ info = "{} {}".format(category, status) if kind != KIND_NONE: info += " " + KIND_2_SEGMENT[kind] if index != INDEX_NONE: info += " " + str(index) info = "[{}] ".format(info) identifier = self.__tokenizer.identifier() self.__writeTokenAndAdvance(info + identifier, TOKEN_TYPE_IDENTIFIER) return identifier def __compileIntVal(self): """ Compile an intVal token """ intval = self.__tokenizer.intVal() self.__writeTokenAndAdvance(intval, TOKEN_TYPE_INTEGER) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, intval) return intval def __compileStringVal(self): """ Compile a stringVal token """ string = self.__tokenizer.stringVal() self.__writeTokenAndAdvance(string, TOKEN_TYPE_STRING) corrected = self.__correctString(string) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, len(corrected)) self.__vmWriter.writeCall(OS_STRING_NEW, 1) for char in corrected: self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, ord(char)) self.__vmWriter.writeCall(OS_STRING_APPEND_CHAR, 2) def __compileClassName(self, status): """ Compiles a variable name. """ return self.__compileIdentifier(CATEGORY_CLASS, status) def __compileSubroutineName(self, status): """ Compiles a variable name. """ return self.__compileIdentifier(CATEGORY_SUBROUTINE, status) def __compileSubroutineCall(self): """ Compiles a subroutine call. Syntax: ( className | varName) '.' subroutineName '(' expressionList ')' | subroutineName '(' expressionList ')' """ # Compile XML callName = "" exp_count = 0 if self.__tokenizer.lookahead() == RE_DOT: # className | varName # extract var\class name callName = self.__tokenizer.peek() # className or varName? kind = self.__symbolTable.kindOf(callName) if (kind != KIND_NONE): # varName # Use class name instead of object name varName = callName callName = self.__symbolTable.typeOf(callName) # Push variable (this) and call class method index = self.__symbolTable.indexOf(varName) segment = self.__symbolTable.segmentOf(varName) self.__vmWriter.writePush(segment, index) # Include self as argument 0 exp_count += 1 self.__compileIdentifier(kind, STATUS_USE, kind, index) else: # className self.__compileIdentifier(CATEGORY_CLASS, STATUS_USE) callName += self.__compileSymbol() # '.' else: # subroutineName # Subroutine -> className.Subroutine self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0) callName += self.__className + FUNC_NAME_DELIMITER exp_count += 1 callName += self.__compileSubroutineName(STATUS_USE) self.__compileSymbol() # '(' exp_count += self.CompileExpressionList() # expressionList self.__compileSymbol() # ')' # Compile VM self.__vmWriter.writeCall(callName, exp_count) def __compileVarName(self, status): """ Compiles a variable name. """ name = self.__tokenizer.peek() index = INDEX_NONE if status != STATUS_DEFINE: index = self.__symbolTable.indexOf(name) varName = self.__compileIdentifier(CATEGORY_VAR, status, KIND_VAR, index) return varName def __compileType(self): """ Compiles a type. Syntax: 'int' | 'char' | 'boolean' | className """ # 'int' | 'char' | 'boolean' if self.__tokenizer.peek() in {RE_INT, RE_CHAR, RE_BOOLEAN}: type = self.__compileKeyWord() # className else: type = self.__compileClassName(STATUS_USE) return type def __compileSubroutineBody(self, funcType, name): """ Compiles a subroutine body. Syntax: '{' varDec* statements '}' """ self.__openTag('subroutineBody') # <subroutineBody> self.__compileSymbol() # '{' # varDec* while self.__tokenizer.peek() == RE_VAR: self.compileVarDec() # varDec* vars = self.__symbolTable.varCount(KIND_VAR) self.__vmWriter.writeFunction(name, vars) if funcType == RE_METHOD: # Hold self at pointer self.__vmWriter.writePush(VM_SEGMENT_ARGUMENT, 0) self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0) if funcType == RE_CONSTRUCTOR: # Allocate memory for all fields fields = self.__symbolTable.varCount(KIND_FIELD) self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, fields) self.__vmWriter.writeCall(OS_MEMORY_ALLOC, 1) # Hold allocated memory at pointer self.__vmWriter.writePop(VM_SEGMENT_POINTER, 0) self.compileStatements() # statements self.__compileSymbol() # '}' self.__closeTag() # </subroutineBody> return vars ################## # PUBLIC METHODS # ################## def compileClass(self): """ Compiles a complete class. Syntax: 'class' className '{' classVarDec* subroutineDec* '}' """ self.__openTag('class') # <class> self.__compileKeyWord() # 'class' className = self.__compileClassName( # className STATUS_DEFINE) self.__className = className self.__compileSymbol() # '{' # classVarDec* while self.__tokenizer.peek() in {RE_STATIC, RE_FIELD}: self.CompileClassVarDec() # subroutineDec* while self.__tokenizer.peek() in { RE_CONSTRUCTOR, RE_FUNCTION, RE_METHOD }: self.CompileSubroutine() self.__compileSymbol() # '}' self.__closeTag() # </class> def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. Syntax: ('static' | 'field') type varName (',' varName)* ';' """ self.__openTag('classVarDec') # <classVarDec> kind = self.__compileKeyWord() # ('static' | 'field') type = self.__compileType() # type moreVars = True while moreVars: # (',' varName)* name = self.__compileVarName( # varName STATUS_DEFINE) self.__symbolTable.define(name, type, kind) if self.__tokenizer.peek() != RE_COMMA: moreVars = False else: self.__compileSymbol() # ',' self.__compileSymbol() # ';' self.__closeTag() # </classVarDec> def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. Syntax: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody """ # Start subroutine in symbol table self.__resetUniqueLabels() self.__symbolTable.startSubroutine() # Compile XML self.__openTag('subroutineDec') # <subroutineDec> funcType = self.__compileKeyWord() # ('constructor' | # 'function' | 'method') if funcType in {RE_METHOD}: # +1 var count for this method (+1 for self) self.__symbolTable.define(VM_SELF, self.__className, KIND_ARG) if self.__tokenizer.peek() == RE_VOID: type = self.__compileKeyWord() # 'void' else: type = self.__compileType() # type subName = self.__compileSubroutineName( # soubroutineName STATUS_DEFINE) name = self.__className + FUNC_NAME_DELIMITER + subName self.__compileSymbol() # '(' self.compileParameterList() # parameterList self.__compileSymbol() # ')' self.__compileSubroutineBody(funcType, name) # subroutineBody self.__closeTag() # </subroutineDec> def compileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing "()". Syntax: ( (type varName) (',' type varName)*)? """ parameters = 0 # no parameters? self.__openTag('parameterList') # <parameterList> if self.__tokenizer.peek() != RE_BRACKETS_RIGHT: moreVars = True while moreVars: parameters += 1 # yes parameters! type = self.__compileType() # type name = self.__compileVarName( # varName STATUS_DEFINE) self.__symbolTable.define(name, type, KIND_ARG) if self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' else: moreVars = False self.__closeTag() # </parametersList> return parameters def compileVarDec(self): """ Compiles a var declaration. Syntax: 'var' type varName (',' varName)* ';' """ self.__openTag('varDec') # <varDec> moreVars = True self.__compileKeyWord() # 'var' type = self.__compileType() # type while moreVars: name = self.__tokenizer.peek() # varName self.__symbolTable.define(name, type, KIND_VAR) self.__compileVarName(STATUS_DEFINE) if self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' else: moreVars = False self.__compileSymbol() # ';' self.__closeTag() # </varDec> def compileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}". Syntax: statement* where statement is in: letStatement | ifStatement | whileStatement | doStatement | returnStatement """ self.__openTag('statements') # <statements> statement = self.__tokenizer.peek() while statement in { RE_LET, RE_IF, RE_WHILE, RE_DO, RE_RETURN_NOTHING, RE_RETURN_SOMETHING }: if statement == RE_LET: self.compileLet() elif statement == RE_IF: self.compileIf() elif statement == RE_WHILE: self.compileWhile() elif statement == RE_DO: self.compileDo() elif statement == RE_RETURN_NOTHING: self.compileReturnNothing() elif statement == RE_RETURN_SOMETHING: self.compileReturnSomething() statement = self.__tokenizer.peek() self.__closeTag() # </statements> def compileDo(self): """ Compiles a do statement. Syntax: 'do' subroutineCall ';' """ self.__openTag('doStatement') # <doStatement> self.__compileKeyWord() # 'do' self.__compileSubroutineCall() # subroutineCall self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0) self.__compileSymbol() # ';' self.__closeTag() # </doStatement> def compileLet(self): """ Compiles a let statement. Syntax: 'let' varName ('[' expression ']')? '=' expression ';' """ isArray = False self.__openTag('letStatement') # <letStatement> self.__compileKeyWord() # 'let' varName = self.__tokenizer.peek() index = self.__symbolTable.indexOf(varName) segment = self.__symbolTable.segmentOf(varName) self.__compileVarName(STATUS_USE) # varName if self.__tokenizer.peek() == RE_BRACKETS_SQUARE_LEFT: isArray = True self.__compileSymbol() # '[' self.CompileExpression() # expression self.__compileSymbol() # ']' # Add the offset to the variable address self.__vmWriter.writePush(segment, index) self.__vmWriter.writeArithmetic(RE_PLUS, True) # Address of array element is at stack top self.__compileSymbol() # '=' self.CompileExpression() # expression self.__compileSymbol() # ';' self.__closeTag() # </letStatement> if isArray: # Pop rh-expression to temp self.__vmWriter.writePop(VM_SEGMENT_TEMP, 0) # Get address of array element self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1) # Push rh-expression to stack self.__vmWriter.writePush(VM_SEGMENT_TEMP, 0) # Pop rh-expression to address of element self.__vmWriter.writePop(VM_SEGMENT_THAT, 0) else: # Compile only if the varName was defined # (unlike class name of subroutine name) # if segment != KIND_NONE: # varName was defined index = self.__symbolTable.indexOf(varName) self.__vmWriter.writePop(segment, index) def compileWhile(self): """ Compiles a while statement. Syntax: 'while' '(' expression ')' '{' statements '}' """ LABEL_EXP, LABEL_END = self.__uniqueWhileLabels() self.__openTag('whileStatement') # <whileStatement> self.__compileKeyWord() # 'while' self.__compileSymbol() # '(' self.__vmWriter.writeLabel( # label WHILE_EXP LABEL_EXP) self.CompileExpression() # expression # Negate the expression # (jump out of while if *NOT* expression) self.__vmWriter.writeArithmetic(RE_TILDA, False) self.__compileSymbol() # ')' self.__vmWriter.writeIf(LABEL_END) # if-goto WHILE_END self.__compileSymbol() # '{' self.compileStatements() # statements self.__compileSymbol() # '}' self.__vmWriter.writeGoto(LABEL_EXP) # goto WHILE_EXP self.__vmWriter.writeLabel(LABEL_END) # lable WHILE_END self.__closeTag() # </whileStatement> def compileReturnNothing(self): """ Compiles a 'return;' statement. Syntax: 'return;' """ # Compile XML self.__openTag('returnStatement') # <returnStatement> self.__writeToken( 'return', # 'return' TOKEN_TYPE_KEYWORD) self.__writeTokenAndAdvance( ';', # ';' TOKEN_TYPE_SYMBOL) self.__vmWriter.writeReturn(True) self.__closeTag() # </returnStatement> def compileReturnSomething(self): """ Compiles a return statement. Syntax: 'return' expression? ';' """ # Compile XML self.__openTag('returnStatement') # <returnStatement> self.__writeTokenAndAdvance( 'return', # 'return' TOKEN_TYPE_KEYWORD) self.CompileExpression() # expression self.__compileSymbol() # ';' self.__vmWriter.writeReturn() self.__closeTag() # </returnStatement> def compileIf(self): """ Compiles an if statement, possibly with a trailing else clause. Syntax: 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? """ LABEL_TRUE, LABEL_FALSE, LABEL_END = self.__uniqueIfLabels() self.__openTag('ifStatement') # <ifStatement> self.__compileKeyWord() # 'if' self.__compileSymbol() # '(' # VM Code for computing ~(cond) self.CompileExpression() # expression self.__compileSymbol() # ')' self.__vmWriter.writeIf(LABEL_TRUE) # if-goto LABEL_TRUE self.__vmWriter.writeGoto(LABEL_FALSE) # goto LABEL_FALSE self.__vmWriter.writeLabel(LABEL_TRUE) # label LABEL_TRUE self.__compileSymbol() # '{' # VM Code for executing TRUE self.compileStatements() # statements self.__compileSymbol() # '}' if self.__tokenizer.peek() == RE_ELSE: # self.__vmWriter.writeGoto(LABEL_END) # goto LABEL_END self.__vmWriter.writeLabel( # label LABEL_FALSE LABEL_FALSE) self.__compileKeyWord() # 'else' self.__compileSymbol() # '{' # VM Code for executing ELSE self.compileStatements() # statements self.__compileSymbol() # '}' self.__vmWriter.writeLabel( # label END LABEL_END) else: self.__vmWriter.writeLabel( # label FALSE LABEL_FALSE) self.__closeTag() # </ifStatement> def CompileExpression(self): """ Compiles an expression. Syntax: term (op term)* """ self.__openTag('expression') # <expression> self.CompileTerm() # term while self.__tokenizer.peek() in { RE_PLUS, RE_BAR, RE_ASTERISK, RE_SLASH, RE_AMPERSAND, RE_VBAR, RE_LT, RE_GT, RE_EQ }: symbol = self.__compileSymbol() # op self.CompileTerm() # term self.__vmWriter.writeSymbol(symbol) self.__closeTag() # </expression> def __correctString(self, string): """ Convert escape characters in a string to valid chars :param string: string to correct :return: corrected strings with escaped characters corrected """ correct = string.replace('\t', '\\t') correct = correct.replace('\n', '\\n') correct = correct.replace('\r', '\\r') return correct def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of "[", "(", or "." suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. Syntax: integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term """ self.__openTag('term') # <term> lookahead = self.__tokenizer.lookahead() if self.__tokenizer.peek() == RE_BRACKETS_LEFT: self.__compileSymbol() # '(' self.CompileExpression() # expression self.__compileSymbol() # ')' elif self.__tokenizer.peek() in {RE_TILDA, RE_BAR}: symbol = self.__compileSymbol() # unaryOp self.CompileTerm() # term self.__vmWriter.writeArithmetic(symbol, False) elif lookahead == RE_BRACKETS_SQUARE_LEFT: varName = self.__tokenizer.peek() self.__compileVarName(STATUS_USE) # varName self.__compileSymbol() # '[' self.CompileExpression() # expression self.__compileSymbol() # ']' # Compile array indexing kind = self.__symbolTable.kindOf(varName) index = self.__symbolTable.indexOf(varName) segment = KIND_2_SEGMENT[kind] self.__vmWriter.writePush(segment, index) self.__vmWriter.writeArithmetic(RE_PLUS, True) self.__vmWriter.writePop(VM_SEGMENT_POINTER, 1) self.__vmWriter.writePush(VM_SEGMENT_THAT, 0) elif lookahead in {RE_BRACKETS_LEFT, RE_DOT}: self.__compileSubroutineCall() # subroutineCall | # (varName | className) '.' subroutineCall else: if self.__tokenizer.tokenType() == TOKEN_TYPE_INTEGER: self.__compileIntVal() # integerConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_STRING: self.__compileStringVal() # stringConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_KEYWORD: # true | false | null | this # true | false | null - pushed to stack as constants keyword = self.__tokenizer.peek() if keyword in {RE_FALSE, RE_NULL, RE_TRUE}: self.__vmWriter.writePush(VM_SEGMENT_CONSTANT, 0) if keyword == RE_TRUE: self.__vmWriter.writeArithmetic(RE_TILDA, False) # this - pushes pointer elif keyword == RE_THIS: self.__vmWriter.writePush(VM_SEGMENT_POINTER, 0) self.__compileKeyWord() # keywordConstant elif self.__tokenizer.tokenType() == TOKEN_TYPE_IDENTIFIER: name = self.__tokenizer.peek() kind = self.__symbolTable.kindOf(name) index = self.__symbolTable.indexOf(name) segment = self.__symbolTable.segmentOf(name) self.__compileIdentifier(kind, STATUS_USE, kind, index) self.__vmWriter.writePush(segment, index) self.__closeTag() # </term> def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. Syntax: (expression (',' expression)* )? """ exp_count = 0 self.__openTag('expressionList') # <expressionList> if self.__tokenizer.peek() != RE_BRACKETS_RIGHT: self.CompileExpression() exp_count += 1 # expression while self.__tokenizer.peek() == RE_COMMA: self.__compileSymbol() # ',' self.CompileExpression() exp_count += 1 self.__closeTag() # </expressionList> return exp_count
class CompilationEngine: def __init__(self, input_path, output_path): """ creates a new compilation engine with the given input and output. the next routine called must be compileClass() :param input_path: input stream/file :param output_path: output stream/file """ self._root = None self._current_node = None self.tokenizer = JackTokenizer(input_path) self.CompileClass() for elem in self._root.iter(): if elem.tag == 'expressionList' or elem.tag == 'parameterList': if "/>" in str(ET.tostring(elem)): elem.text = '\n' p = ET.XMLParser(remove_blank_text=True) tree = ET.ElementTree(self._root, parser=p) tree.write(output_path, method='xml', pretty_print=True) def CompileClass(self): """ Compiles a complete class. """ self._root = ET.Element('class') self.tokenizer.advance() self._write_line(self._root, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(self._root, self.tokenizer.identifier()) self.tokenizer.advance() self._write_line(self._root, self.tokenizer.symbol()) self.CompileClassVarDec() self.CompileSubroutine() self.tokenizer.advance() self._write_line(self._root, self.tokenizer.symbol()) def _write_line(self, node, name): """ writes the current node to the output file :param name: the name of the node """ _ = ET.SubElement(node, TYPES[self.tokenizer.tokenType()]) _.text = ' ' + name + ' ' def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ peek = self.tokenizer.peek() if 'static' in peek or 'field' in peek: _classVarNode = ET.SubElement(self._root, 'classVarDec') while 'static' in peek or 'field' in peek: self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.keyWord()) # field/static self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.identifier()) # name self.tokenizer.advance() while self.tokenizer.symbol() == ',': self._write_line(_classVarNode, self.tokenizer.symbol()) # , self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_classVarNode, self.tokenizer.symbol()) # ; peek = self.tokenizer.peek() if 'static' in peek or 'field' in peek: _classVarNode = ET.SubElement(self._root, 'classVarDec') def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ _last_node = self._current_node _subroutineNode = ET.SubElement(self._root, 'subroutineDec') self._current_node = _subroutineNode peek = self.tokenizer.peek() while 'function' in peek or 'constructor' in peek or 'method' in peek: self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.keyWord()) # const/func/method self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.current_token) # void/type self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.symbol()) # '(' self.CompileParameterList() self.tokenizer.advance() self._write_line(_subroutineNode, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._current_node = ET.SubElement(_subroutineNode, 'subroutineBody') self._write_line(self._current_node, self.tokenizer.symbol()) # '{' peek = self.tokenizer.peek() if 'var' in peek: self.CompileVarDec() self.CompileStatements() self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) # '}' peek = self.tokenizer.peek() if 'function' in peek or 'constructor' in peek or 'method' in peek: _subroutineNode = ET.SubElement(self._root, 'subroutineDec') self._current_node = _subroutineNode def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing () """ param_list = ET.SubElement(self._current_node, 'parameterList') peek = self.tokenizer.peek() if peek != ')': self.tokenizer.advance() self._write_line(param_list, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(param_list, self.tokenizer.identifier()) # name peek = self.tokenizer.peek() while peek == ',': self.tokenizer.advance() self._write_line(param_list, self.tokenizer.symbol()) # ',' self.tokenizer.advance() self._write_line(param_list, self.tokenizer.keyWord()) # type self.tokenizer.advance() self._write_line(param_list, self.tokenizer.identifier()) # name peek = self.tokenizer.peek() # if not param_list.text: # param_list.text = '\n' def CompileVarDec(self): """ Compiles a var declaration. """ _varDecNode = ET.SubElement(self._current_node, 'varDec') peek = self.tokenizer.peek() while 'var' in peek: self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.identifier()) self.tokenizer.advance() while self.tokenizer.symbol() == ',': self._write_line(_varDecNode, self.tokenizer.symbol()) # , self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.identifier()) # name self.tokenizer.advance() self._write_line(_varDecNode, self.tokenizer.symbol()) # ; peek = self.tokenizer.peek() if peek == 'var': _varDecNode = ET.SubElement(self._current_node, 'varDec') def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}" """ peek = self.tokenizer.peek() _parent = self._current_node self._current_node = ET.SubElement(self._current_node, 'statements') while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek: if 'let' in peek: self.CompileLet() elif 'if' in peek: self.CompileIf() elif 'while' in peek: self.CompileWhile() elif 'do' in peek: self.CompileDo() elif 'return' in peek: self.CompileReturn() peek = self.tokenizer.peek() self._current_node = _parent def CompileDo(self): """ Compiles a do statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'doStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() while peek == '.': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.CompileExpressionList() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileLet(self): """ Compiles a let statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'letStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) self.tokenizer.advance() self._write_line(_statement, self.tokenizer.identifier()) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '[' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ']' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '=' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileWhile(self): """ Compiles a while statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'whileStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # while self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' self._current_node = _last_node def CompileReturn(self): """ Compiles a return statement. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'returnStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # return peek = self.tokenizer.peek() if peek != ';': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) # ';' self._current_node = _last_node def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ _last_node = self._current_node _statement = ET.SubElement(self._current_node, 'ifStatement') self._current_node = _statement self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # if self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '(' self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # ')' self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' peek = self.tokenizer.peek() if peek == 'else': self.tokenizer.advance() self._write_line(_statement, self.tokenizer.keyWord()) # else self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '{' self.CompileStatements() self.tokenizer.advance() self._write_line(_statement, self.tokenizer.symbol()) # '}' self._current_node = _last_node def CompileExpression(self): """ Compiles an expression. """ _last_node = self._current_node self._current_node = ET.SubElement(self._current_node, 'expression') self.CompileTerm() peek = self.tokenizer.peek() while peek in OPS: self.tokenizer.advance() self._write_line(self._current_node, self.tokenizer.symbol()) self.tokenizer.advance() self.CompileTerm() peek = self.tokenizer.peek() self._current_node = _last_node def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ term_branch = ET.SubElement(self._current_node, 'term') # self.tokenizer.advance() if self.tokenizer.tokenType( ) == 'INT_CONST' or self.tokenizer.tokenType() == 'KEYWORD': self._write_line(term_branch, self.tokenizer.current_token) elif self.tokenizer.tokenType() == 'STRING_CONST': self._write_line(term_branch, self.tokenizer.stringVal()) elif self.tokenizer.current_token in UNARY_OP: self._write_line(term_branch, self.tokenizer.symbol()) last_node = self._current_node self._current_node = term_branch self.tokenizer.advance() self.CompileTerm() self._current_node = last_node elif self.tokenizer.current_token in SYMBOLS: self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() last_node = self._current_node self._current_node = term_branch self.CompileExpression() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) else: self._write_line(term_branch, self.tokenizer.identifier()) peek = self.tokenizer.peek() if '[' in peek or '(' in peek: self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() last_node = self._current_node self._current_node = term_branch self.CompileExpression() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) elif '.' in peek: self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.identifier()) self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) last_node = self._current_node self._current_node = term_branch self.CompileExpressionList() self._current_node = last_node self.tokenizer.advance() self._write_line(term_branch, self.tokenizer.symbol()) def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ last_node = self._current_node self._current_node = ET.SubElement(self._current_node, 'expressionList') peek = self.tokenizer.peek() while peek != ')': self.tokenizer.advance() if peek == ',': self._write_line(self._current_node, self.tokenizer.symbol()) self.tokenizer.advance() self.CompileExpression() peek = self.tokenizer.peek() self._current_node = last_node
class CompilationEngine: def __init__(self, filename): self.tokenizer = JackTokenizer(filename) def compile(self, filename): input_stream = initialize(filename) compileClass() def xml_print_el(self): xmlprint(self.tokenizer.token_type, self.tokenizer.token) def advanceSymbol(self, symbol): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Symbol expected:' + symbol + ', found end of stream') if self.tokenizer.symbol() != symbol: raise SyntaxError('Symbol expected:' + symbol) def advanceKeyword(self, keyword): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Keyword expected:' + keyword + ', found end of stream') if self.tokenizer.keyword() != keyword: raise SyntaxError('Keyword expected:' + keyword) def advanceTokenType(self, tokenType): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Identifier expected, found end of stream') if self.tokenizer.token_type != 'identifier': raise SyntaxError('Identifier expected') def advanceKeywords(self, *args): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('Keywords expected:' + args + ', found end of stream') if self.tokenizer.keyword() != keyword: raise SyntaxError('Keywords expected:' + args) def advanceAndGetType(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('type expected, found end of stream') if self.is_type(): return self.tokenizer.token else: raise SyntaxError('type expected') def is_type(self): return self.tokenizer.keyword() in [ 'int', 'char', 'boolean' ] or self.tokenizer.token_type == 'identifier' def advanceAndGetReturnType(self): self.advance() if self.is_type() or self.tokenizer.keyword() == 'void': return self.tokenizer.token else: raise SyntaxError('type expected') def advanceToClassName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def advanceToVarName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def advanceToSubroutineName(self): self.advanceTokenType('identifier') return self.tokenizer.identifier() def hasClassVarDec(self): pass def advance(self): if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() else: raise SyntaxError('found end of stream!') def compileClass(self): # 'class' className '{' classVarDec* subroutineDec* '}' print('<class>') self.advanceKeyword('class') self.xml_print_el() # classname self.advanceToClassName() className = self.tokenizer.identifier() self.xml_print_el() # { self.advanceSymbol('{') self.xml_print_el() self.advance() # classVarDec* while (self.tokenizer.keyword() in ['static', 'field']): self.compileClassVarDec() # subroutineDec* while (self.tokenizer.keyword() in ['constructor', 'function', 'method']): self.compileSubroutine() # } self.advanceSymbol('}') self.xml_print_el() print('</class>') def compileClassVarDec(self): # ('static'|'field') type varName (',' varName)* ';' print('<classVarDec>') # ('static'|'field') self.xml_print_el() # type type = self.advanceAndGetType() self.xml_print_el() # varName varName = self.advanceToVarName() self.xml_print_el() # ; self.advanceSymbol(';') self.xml_print_el() print('</classVarDec>') self.advance() def compileSubroutine(self): print('<subroutineDec>') kind = self.tokenizer.keyword() self.xml_print_el() # ( 'void' | type ) return_type = self.advanceAndGetReturnType() self.xml_print_el() # subroutineName name = self.advanceToSubroutineName() self.xml_print_el() # ( self.advanceSymbol('(') self.xml_print_el() # TODO parameterList self.compileParameterList() # ( self.advanceSymbol(')') self.xml_print_el() # subroutineBody self.compileSubroutineBody() print('</subroutineDec>') self.advance() pass def compileSubroutineBody(self): print('<subroutineBody>') # { self.advanceSymbol('{') self.xml_print_el() # varDec* #TODO a structure to represent the * self.varDec() # statementes self.compileStatements() # } self.advanceSymbol('}') self.xml_print_el() print('</subroutineBody>') def compileParameterList(self): print('<parameterList>') print('</parameterList>') def compileVarDec(self): pass def compileStatements(self): pass def compileDo(): pass def compileLet(): pass def compileWhile(): pass def compileReturn(): pass def compileIf(): pass def compileExpression(): pass # if identifier: variable, array entry, subroutine call def compileTerm(): # single lookahead token - can be [ ( or . pass # comma separated list of expressions def compileExpressionList(): pass
class CompilationEngine: """ The compilation engine compile the jack code given in the input file into an xml code saved in the out_file """ def __init__(self, in_file, out_file): """ A compilation engine constructor :param in_file: the file we are currently compiling :param out_file: the file where we save the output """ self.tokenizer = JackTokenizer(in_file) self.out_file = open(out_file, 'w') self._indent_count = 0 def compile_class(self): """ compiles a class according to the grammar """ self._write_outer_tag(CLASS_TAG) self.tokenizer.advance() if self.tokenizer.key_word() != CLASS_TAG: print(COMPILE_CLASS_ERROR) sys.exit() self._write_token(self.tokenizer.token_type()) self._check_write_name() self._check_write_symbol("{") # there may be multiple variable declarations while self._check_if_var_dec(): self.compile_class_var_dec() # there may be multiple subroutine declarations while self._check_subroutine_dec(): self.compile_subroutine_dec() self._check_write_symbol("}") self._write_outer_tag(CLASS_TAG, IS_ENDING_TAG) def compile_class_var_dec(self): """ compiles the class's variables declarations """ self._write_outer_tag(CLASS_VAR_DEC_TAG) # we only come in the function if the current token is correct so we # can just write it self._write_token(self.tokenizer.token_type()) self._check_write_type() self._check_write_name() while self._check_if_comma(): # there are more variables self._check_write_symbol(",") self._check_write_name() self._check_write_symbol(";") self._write_outer_tag(CLASS_VAR_DEC_TAG, IS_ENDING_TAG) def compile_subroutine_dec(self): """ compiles the class's subroutine (methods and functions) declarations """ self._write_outer_tag(SUBROUTINE_DEC_TAG) # we only come in the function if the current token is correct so we # can just write it self._write_token(self.tokenizer.token_type()) # the function is either void or has a type if self.tokenizer.key_word() == 'void': self._write_token(self.tokenizer.token_type()) else: self._check_write_type() self._check_write_name() self._check_write_symbol("(") self.compile_parameter_list() self._check_write_symbol(")") self.compile_subroutine_body() self._write_outer_tag(SUBROUTINE_DEC_TAG, IS_ENDING_TAG) def compile_parameter_list(self): """ compiles the parameter list for the subroutines """ self._write_outer_tag(PARAMETER_LIST_TAG) # if curr_token is ')' it means the param list is empty if self.tokenizer.symbol() != ')': self._check_write_type() self._check_write_name() while self._check_if_comma(): # there are more params self._check_write_symbol(",") self._check_write_type() self._check_write_name() self._write_outer_tag(PARAMETER_LIST_TAG, IS_ENDING_TAG) def compile_subroutine_body(self): """ compiles the body of the subroutine """ self._write_outer_tag(SUBROUTINE_BODY_TAG) self._check_write_symbol("{") # there may be multiple variable declarations at the beginning of # the subroutine while self.tokenizer.key_word() == 'var': self.compile_var_dec() self.compile_statements() self._check_write_symbol("}") self._write_outer_tag(SUBROUTINE_BODY_TAG, IS_ENDING_TAG) def compile_var_dec(self): """ compiles the variable declarations """ self._write_outer_tag(VAR_DEC_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_type() self._check_write_name() # there may be multiple variable names in the dec while self._check_if_comma(): self._check_write_symbol(",") self._check_write_name() self._check_write_symbol(";") self._write_outer_tag(VAR_DEC_TAG, IS_ENDING_TAG) def compile_statements(self): """ compiles the statements (0 or more statements) """ self._write_outer_tag(STATEMENTS_TAG) while self._check_if_statement(): if self.tokenizer.key_word() == 'let': self.compile_let() elif self.tokenizer.key_word() == 'if': self.compile_if() elif self.tokenizer.key_word() == 'while': self.compile_while() elif self.tokenizer.key_word() == 'do': self.compile_do() elif self.tokenizer.key_word() == 'return': self.compile_return() self._write_outer_tag(STATEMENTS_TAG, IS_ENDING_TAG) def compile_do(self): """ compiles the do statement """ self._write_outer_tag(DO_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self.compile_subroutine_call() self._check_write_symbol(";") self._write_outer_tag(DO_STATEMENT_TAG, IS_ENDING_TAG) def compile_let(self): """ compiles the let statement """ self._write_outer_tag(LET_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_name() if self.tokenizer.symbol() == '[': # if there is an array self._check_write_symbol("[") self.compile_expression() self._check_write_symbol("]") self._check_write_symbol("=") self.compile_expression() self._check_write_symbol(";") self._write_outer_tag(LET_STATEMENT_TAG, IS_ENDING_TAG) def compile_if(self): """ compiles the if statements """ self._write_outer_tag(IF_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) self._check_write_symbol("(") self.compile_expression() self._check_write_symbol(")") self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") # there can also be an if else scenario if self.tokenizer.key_word() == 'else': self._write_token(self.tokenizer.token_type()) self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") self._write_outer_tag(IF_STATEMENT_TAG, IS_ENDING_TAG) def compile_while(self): """ compiles the while statements """ self._write_outer_tag("whileStatement") self._write_token(self.tokenizer.token_type()) self._check_write_symbol("(") self.compile_expression() self._check_write_symbol(")") self._check_write_symbol("{") self.compile_statements() self._check_write_symbol("}") self._write_outer_tag("whileStatement", IS_ENDING_TAG) def compile_return(self): """ compiles the return statements """ self._write_outer_tag(RETURN_STATEMENT_TAG) self._write_token(self.tokenizer.token_type()) # if cur token is ; we return nothing, otherwise we return something if not self.tokenizer.symbol() == ';': self.compile_expression() self._check_write_symbol(";") self._write_outer_tag(RETURN_STATEMENT_TAG, IS_ENDING_TAG) def compile_subroutine_call(self): """ compiles the subroutine calls ( when we actually call a subroutine as opposed to declaring it) """ self._check_write_name() # there may be a '.' if it is a foo.bar() scenario (or Foo.bar()) if self.tokenizer.symbol() == ".": self._check_write_symbol(".") self._check_write_name() self._check_write_symbol("(") self.compile_expression_list() self._check_write_symbol(")") def compile_expression(self): """ compiles expressions which are terms and possibly operators and more terms """ self._write_outer_tag(EXPRESSION_TAG) self.compile_term() # there may be a few operators in one expression while self.tokenizer.symbol() in OPERATIONS: self._write_op() self.compile_term() self._write_outer_tag(EXPRESSION_TAG, IS_ENDING_TAG) def compile_term(self): """ compiles terms according to the grammar """ self._write_outer_tag(TERM_TAG) cur_type = self.tokenizer.token_type() # either a string/int constant if self.tokenizer.token_type() in ["INT_CONST", "STRING_CONST"]: self._write_token(cur_type) # or a constant keyword (true, false, null, this) elif self.tokenizer.key_word() in KEYWORD_CONST: self._write_token(cur_type) # or an expression within brown brackets elif self.tokenizer.symbol() == '(': self._write_token(cur_type) self.compile_expression() self._check_write_symbol(")") # or a unary op and then a term elif self.tokenizer.symbol() in UNARY_OPS: self._write_op() self.compile_term() # or it is an identifier which could be: elif self.tokenizer.identifier(): self._compile_term_identifier() else: print(COMPILE_TERM_ERROR) sys.exit() self._write_outer_tag(TERM_TAG, IS_ENDING_TAG) def _compile_term_identifier(self): """ compiles terms in case of identifier token """ # an array if self.tokenizer.get_next_token() == '[': self._check_write_name() self._check_write_symbol("[") self.compile_expression() self._check_write_symbol("]") # or a subroutine call elif self.tokenizer.get_next_token() in [".", "("]: self.compile_subroutine_call() else: self._check_write_name() # or just a variable name def compile_expression_list(self): """ compiles the expression lists """ self._write_outer_tag(EXPRESSION_LIST_TAG) # if it is ')' then the expression list is empty if self.tokenizer.symbol() != ')': self.compile_expression() while self._check_if_comma(): # while there are more expressions self._write_token(self.tokenizer.token_type()) self.compile_expression() self._write_outer_tag(EXPRESSION_LIST_TAG, IS_ENDING_TAG) def _check_if_var_dec(self): """ check if we are currently compiling a variable declaration :return: true iff the current token is either 'static' or 'field' """ return self.tokenizer.key_word() in CLASS_VAR_KEYWORDS def _check_subroutine_dec(self): """ checks if we are currently compiling a subroutine declaration :return: true iff the current token is either 'constructor' or 'function' or 'method' """ return self.tokenizer.key_word() in SUBROUTINE def _check_if_comma(self): """ checks if current token is a comma :return: true iff the current token is a ',' """ return self.tokenizer.symbol() == ',' def _check_if_statement(self): """ checks if we are currently compiling a statement :return: true iff the current token is in ['let', 'if', 'while', 'do', 'return'] """ return self.tokenizer.key_word() in STATEMENTS def _check_write_type(self): """ checks if the current token is a valid type and if so, it writes it to the output file """ if self.tokenizer.key_word() in TYPE_KEYWORDS: self._write_token(self.tokenizer.token_type()) else: self._check_write_name() def _check_write_symbol(self, expected_symbol): """ checks if the current token is the expected symbol, if so it write it to the output file :param expected_symbol: the symbol we are validating is the current token :return: prints illegal statement error if it is not the expected symbol and exits the program """ if self.tokenizer.symbol() != expected_symbol: print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._write_token(self.tokenizer.token_type()) def _check_write_name(self): """ checks the current token is a name (identifier), and if so, write it to the output file :return: prints illegal statement error if it is not a name and exits the program """ if self.tokenizer.identifier(): self._write_token("IDENTIFIER") else: print(ILLEGAL_STATEMENT_ERROR) sys.exit() def _write_outer_tag(self, tag_str, end=False): """ writes the outer tags of the different sections we are compiling :param tag_str: the string of the current section we are compiling :param end: true iff it is an end tag """ if end: # we decrease the indent count before the closing tag self._indent_count -= 1 self.out_file.write("\t" * self._indent_count) self.out_file.write("</" + tag_str + ">\n") else: # we increase the indent count after the opening tag self.out_file.write("\t" * self._indent_count) self.out_file.write("<" + tag_str + ">\n") self._indent_count += 1 def _write_op(self): """ writes an op symbol to the out file """ self.out_file.write("\t" * self._indent_count) self.out_file.write("<symbol> ") if self.tokenizer.symbol() == '<': self.out_file.write("<") elif self.tokenizer.symbol() == '>': self.out_file.write(">") elif self.tokenizer.symbol() == '&': self.out_file.write("&") elif self.tokenizer.symbol() == '\"': self.out_file.write(""") else: self.out_file.write(self.tokenizer.symbol()) self.out_file.write(" </symbol>\n") self.tokenizer.advance() def _write_token(self, cur_type): """ writes the current token to the output file :param cur_type: the type of the current token """ self.out_file.write("\t" * self._indent_count) self.out_file.write("<" + TOKEN_TYPE_STR[cur_type] + "> ") self.out_file.write(str(self.tokenizer.get_token_str())) self.out_file.write(" </" + TOKEN_TYPE_STR[cur_type] + ">\n") self.tokenizer.advance()
class CompilationEngine: def __init__(self, input_file, output_file): self.tokenizer = JackTokenizer(input_file) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(output_file) self.current_sub_name = None self.class_name = None self.func_counter = 0 self.while_counter = 0 self.if_counter = 0 # starts the process self.tokenizer.advance() self.compile_class() self.vm_writer.close() def compile_class(self): """ compiles the class function :return: none """ # advances a single step to get the class name self.tokenizer.advance() # set class's name self.class_name = self.tokenizer.current_token # moves to the symbol { self.tokenizer.advance() # move to the next symbol and check what it is self.tokenizer.advance() # compiles class variable while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \ KEY_WORDS.get(self.tokenizer.current_token) == FIELD: self.compile_class_var_dec() # compiles subroutine while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \ KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \ KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION: self.compile_sub_routine() # we are now at the <symbol> } <symbol> which closes the class def compile_class_var_dec(self): """ compiles a var dec :return: none """ var_kind = self.tokenizer.key_word() # advances the token to the var's type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advances the token to the var's identifier self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_name = self.tokenizer.key_word() else: var_name = self.tokenizer.identifier() # update symbol table self.symbol_table.define(var_name, var_type, var_kind) # advance to next token, and check if there are more var_names self.tokenizer.advance() while self.tokenizer.current_token != ";": # token is <symbol> , <symbol> # advance to var's identifier self.tokenizer.advance() var_name = self.tokenizer.current_token # update symbol table self.symbol_table.define(var_name, var_type, var_kind) self.tokenizer.advance() # the current token is <symbol> ; <symbol>, advance to next self.tokenizer.advance() def compile_sub_routine(self): """ compiles a single sub routine :return: none """ # start new subroutine symbol table self.symbol_table.start_subroutine() # get subroutine type (method/construction/function) sub_type = self.tokenizer.key_word() # advances the token to what the subroutine returns self.tokenizer.advance() # updates the return type if self.tokenizer.token_type() == KEY_WORD: return_type = self.tokenizer.key_word() else: return_type = self.tokenizer.identifier() # advances the token to <identifier> sub_name <identifier> self.tokenizer.advance() # update the subroutine name subroutine_name = self.tokenizer.identifier() self.current_sub_name = subroutine_name # advance to <symbol> ( <symbol> self.tokenizer.advance() # if subroutine is a method, add 'this' to the symbol table as argument 0 if sub_type == METHOD: self.symbol_table.define("this", self.class_name, "ARG") # compiles the parameter list self.compile_parameter_list() # we are at <symbol> ) <symbol> # advance to subroutine body, and compile it self.tokenizer.advance() self.compile_subroutine_body(sub_type) def compile_subroutine_body(self, sub_type): """ the method compiles the subroutine body :return: none """ # we are at bracket {, advance self.tokenizer.advance() # compile var dec while KEY_WORDS.get(self.tokenizer.current_token) == VAR: self.compile_var_dec() # write function label self.vm_writer.write_function( self.class_name + '.' + self.current_sub_name, self.symbol_table.var_count("VAR")) # if is method, update THIS to the object if sub_type == METHOD: self.vm_writer.write_push(ARG, 0) self.vm_writer.write_pop("POINTER", 0) # if is constructor, allocate memory, and put in this if sub_type == CONSTRUCTOR: self.vm_writer.write_push("CONST", self.symbol_table.var_count("FIELD")) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop("POINTER", 0) if self.tokenizer.current_token != "}": self.compile_statements() # we are at bracket }, advance self.tokenizer.advance() def compile_parameter_list(self): """ compiles a parameter list :return: none """ # advance to first parameter self.tokenizer.advance() # while there are more parameters while self.tokenizer.current_token != ')': # tests what to put as the type of the object if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to variables name <identifier> var_name <identifier> self.tokenizer.advance() var_name = self.tokenizer.identifier() # define new variable self.symbol_table.define(var_name, var_type, "ARG") # gets the next token self.tokenizer.advance() # advance to next token if we are at ',' if self.tokenizer.current_token == ",": self.tokenizer.advance() def compile_var_dec(self): """ compiles a declaration of a variable :return: none """ # we are at <keyword> var <keyword> # advance to variable type self.tokenizer.advance() if self.tokenizer.token_type() == KEY_WORD: var_type = self.tokenizer.key_word() else: var_type = self.tokenizer.identifier() # advance to the variables name self.tokenizer.advance() while self.tokenizer.current_token != ';': # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # define variable in symbol table self.symbol_table.define(var_name, var_type, "VAR") # advance to next token self.tokenizer.advance() # tests what to put as the type of the object if self.tokenizer.current_token == ",": self.tokenizer.advance() # we are at <symbol> ; <symbol> # advance to next token self.tokenizer.advance() def compile_statements(self): """ the method compiles statements :return: none """ # while there are more statements, deal with each one while self.tokenizer.current_token != '}': statement_type = self.tokenizer.key_word() if statement_type == LET: self.compile_let() elif statement_type == IF: self.compile_if() elif statement_type == WHILE: self.compile_while() elif statement_type == DO: self.compile_do() elif statement_type == RETURN: self.compile_return() def compile_do(self): """ the method compiles a do command :return: none """ # we are at <keyword> do <keyword> # advance to next token <identifier> name_of_func <identifier> self.tokenizer.advance() func_name = self.tokenizer.identifier() self.tokenizer.advance() # compile the subroutine call self.compile_subroutine_call(func_name) # pop the result from the function into temp self.vm_writer.write_pop("TEMP", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() def compile_let(self): """ the method compiles a let statement :return: none """ # we are at <keyword> let <keyword> # advance to next token (var_name) self.tokenizer.advance() # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() # get variable data var_index = self.symbol_table.index_of(var_name) var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) # advance to next token ('[' | '=') self.tokenizer.advance() is_array = False if self.tokenizer.current_token == '[': is_array = True # push arr self.vm_writer.write_push(var_kind, var_index) # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # we are at <symbol> ] <symbol>, advance to next token self.tokenizer.advance() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # we are at <symbol> = <symbol> # advance to expression and compile it self.tokenizer.advance() self.compile_expression() # if var is an array if is_array: self.vm_writer.write_pop("TEMP", 0) self.vm_writer.write_pop("POINTER", 1) self.vm_writer.write_push("TEMP", 0) self.vm_writer.write_pop("THAT", 0) # if var is not an array else: self.vm_writer.write_pop(var_kind, var_index) # we are at <symbol> ; <symbol>, advance to next self.tokenizer.advance() return def compile_while(self): """ the method compiles a while statement :return: none """ while_counter = str(self.while_counter) # update the while counter self.while_counter += 1 # create new label for the start of the while self.vm_writer.write_label("While_" + while_counter) # we are at <keyword> while <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # negate expression self.vm_writer.write_arithmetic("NOT") # if condition is not met, go to the end of the while self.vm_writer.write_if("End_While_" + while_counter) # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() # compile statements self.compile_statements() # go back to the start of the while self.vm_writer.write_goto("While_" + while_counter) # create new label for the end of the while self.vm_writer.write_label("End_While_" + while_counter) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() return def compile_return(self): """ the method compiles a return statement :return: none """ # we are at <keyword> return <keyword>, advance to next token self.tokenizer.advance() if self.tokenizer.current_token != ';': self.compile_expression() else: # if function is void, push const 0 to the stack self.vm_writer.write_push("CONST", 0) # we are at <symbol> ; <symbol>, advance to next token self.tokenizer.advance() self.vm_writer.write_return() return def compile_if(self): """ the method compiles an if statement :return: none """ if_count = str(self.if_counter) # update if counter self.if_counter += 1 # we are at <keyword> if <keyword>, advance to next token self.tokenizer.advance() # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() # compile expression self.compile_expression() # negate the expression self.vm_writer.write_arithmetic("NOT") # check if condition is met self.vm_writer.write_if("ELSE_" + if_count) # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() # we are at <symbol> { <symbol>, advance to next token self.tokenizer.advance() self.compile_statements() # jump to the end of the if self.vm_writer.write_goto("END_IF_" + if_count) # we are at <symbol> } <symbol>, advance to next token self.tokenizer.advance() # create else label (which may be empty) self.vm_writer.write_label("ELSE_" + if_count) if self.tokenizer.current_token == 'else': # we are at <keyword> else <keyword>, advance self.tokenizer.advance() # we are at <symbol> { <symbol>, advance self.tokenizer.advance() self.compile_statements() # we are at <symbol> } <symbol>, advance self.tokenizer.advance() # create new label self.vm_writer.write_label("END_IF_" + if_count) return def compile_expression(self): """ the method compiles an expression :return: """ # compile the term self.compile_term() while self.tokenizer.current_token in OP_LST: call_math = False # we are at <symbol> op <symbol> op = OP_DICT.get(self.tokenizer.current_token) # check if operator needs to call math if self.tokenizer.current_token == '*' or self.tokenizer.current_token == '/': call_math = True # advance to next term and compile term self.tokenizer.advance() self.compile_term() # output the operator if call_math: self.vm_writer.write_call(op[0], op[1]) else: self.vm_writer.write_arithmetic(op) return def compile_term(self): """ the method compiles a term :return: none """ token_type = self.tokenizer.token_type() if token_type == INT_CONST: # push the const int self.vm_writer.write_push("CONST", self.tokenizer.int_val()) self.tokenizer.advance() elif token_type == STRING_CONST: # write without the "" string_val = self.tokenizer.string_val() # push the len of the string and call the string constructor self.vm_writer.write_push("CONST", len(string_val)) self.vm_writer.write_call("String.new", 1) # update new string for char in string_val: self.vm_writer.write_push("CONST", ord(char)) self.vm_writer.write_call("String.appendChar", 2) self.tokenizer.advance() elif self.tokenizer.current_token in KEY_WORD_CONST: segment, idx = KEY_WORD_CONST.get(self.tokenizer.current_token) self.vm_writer.write_push(segment, idx) if self.tokenizer.current_token == 'true': self.vm_writer.write_arithmetic('NOT') self.tokenizer.advance() elif self.tokenizer.current_token == '(': # we are at <symbol> ( <symbol>, advance to next token self.tokenizer.advance() self.compile_expression() # we are at <symbol> ) <symbol>, advance to next token self.tokenizer.advance() elif self.tokenizer.current_token in UNARY_OP: op_command = UNARY_OP.get(self.tokenizer.current_token) self.tokenizer.advance() self.compile_term() self.vm_writer.write_arithmetic(op_command) # var/var[expression]/subroutine_call else: # we are at <identifier> var_name <identifier> var_name = self.tokenizer.identifier() self.tokenizer.advance() # if is var_name[expression] if self.tokenizer.current_token == '[': var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) # push arr self.vm_writer.write_push(var_kind, var_index) # we are at <symbol> [ <symbol>, advance to expression and compile it self.tokenizer.advance() self.compile_expression() # add the index of array and the expression to get the correct location self.vm_writer.write_arithmetic("ADD") # set the that pointer self.vm_writer.write_pop("POINTER", 1) # push to the stack what is in the arr[i] self.vm_writer.write_push("THAT", 0) # we are at <symbol> ] <symbol>, advance self.tokenizer.advance() # if is a subroutine call elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.': self.compile_subroutine_call(var_name) else: # if is just 'var' var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name)) var_index = self.symbol_table.index_of(var_name) self.vm_writer.write_push(var_kind, var_index) return def compile_expression_list(self): """ the method compiles a list of expressions :return: amount of arguments in the expression list """ expression_counter = 0 # check that list is not empty if self.tokenizer.current_token != ')': expression_counter += 1 # compile first expression self.compile_expression() # if there are more expressions, compile them while self.tokenizer.current_token == ',': expression_counter += 1 # we are at <symbol> , <symbol>, advance self.tokenizer.advance() # compile expression self.compile_expression() return expression_counter def compile_subroutine_call(self, identifier): """ the method compiles a subroutine call (not including the subroutine first varName :return: none """ func_name = self.class_name + "." + identifier num_of_arguments = 0 if self.tokenizer.current_token == '.': # change func name to its class name if self.symbol_table.type_of(identifier) is not None: func_name = self.symbol_table.type_of(identifier) # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = func_name + "." + self.tokenizer.identifier() self.tokenizer.advance() # push the object to the stack segment = TYPE_DICT.get(self.symbol_table.kind_of(identifier)) idx = self.symbol_table.index_of(identifier) self.vm_writer.write_push(segment, idx) num_of_arguments += 1 else: # we are at <symbol> . <symbol>, advance self.tokenizer.advance() # we are at <identifier> sub_name <identifier> func_name = identifier + "." + self.tokenizer.identifier() self.tokenizer.advance() else: self.vm_writer.write_push("POINTER", 0) num_of_arguments += 1 # we are at <symbol> ( <symbol>, advance self.tokenizer.advance() num_of_arguments += self.compile_expression_list() # we are at <symbol> ) <symbol>, advance self.tokenizer.advance() self.vm_writer.write_call(func_name, num_of_arguments) return
class CompilationEngine: def __init__(self, input_path, output_path): """ creates a new compilation engine with the given input and output. the next routine called must be compileClass() :param input_path: input stream/file :param output_path: output stream/file """ self.labels = 0 self.jack_class = None self.class_subroutine = None self.tokenizer = JackTokenizer(input_path) self._writer = VMWriter(output_path) self.CompileClass() def CompileClass(self): """ Compiles a complete class. """ self.tokenizer.advance() self.tokenizer.advance() self.jack_class = JackClass(self.tokenizer.current_token) self.tokenizer.advance() self.CompileClassVarDec() self.CompileSubroutine() self.tokenizer.advance() def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ peek = self.tokenizer.peek() while 'static' in peek or 'field' in peek: self.tokenizer.advance() kind = self.tokenizer.keyWord() # field/static self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.tokenizer.advance() self.jack_class.add_var(name, type, kind) while self.tokenizer.symbol() == ',': self.tokenizer.advance() name = self.tokenizer.identifier() self.tokenizer.advance() self.jack_class.add_var(name, type, kind) peek = self.tokenizer.peek() def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ peek = self.tokenizer.peek() while 'function' in peek or 'constructor' in peek or 'method' in peek: self.tokenizer.advance() kind = self.tokenizer.keyWord() # const/func/method self.tokenizer.advance() type = self.tokenizer.current_token # void/type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.tokenizer.advance() self.class_subroutine = JackSubroutine(name, kind, type, self.jack_class) self.CompileParameterList() self.tokenizer.advance() self.tokenizer.advance() peek = self.tokenizer.peek() if 'var' in peek: self.CompileVarDec() full_name = '{}.{}'.format(self.jack_class.class_name, self.class_subroutine.name) self._writer.write_function(full_name, self.class_subroutine.var_c) if kind == 'constructor': fields = self.jack_class.counters[0] self._writer.push('constant', str(fields)) self._writer.write_call('Memory.alloc', '1') self._writer.pop('pointer', '0') elif kind == 'method': self._writer.push('argument', '0') self._writer.pop('pointer', '0') self.CompileStatements() self.tokenizer.advance() peek = self.tokenizer.peek() def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing () """ peek = self.tokenizer.peek() if peek != ')': self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name peek = self.tokenizer.peek() self.class_subroutine.add_arg(name, type) while peek == ',': self.tokenizer.advance() self.tokenizer.advance() type = self.tokenizer.keyWord() # type self.tokenizer.advance() name = self.tokenizer.identifier() # name self.class_subroutine.add_arg(name, type) peek = self.tokenizer.peek() def CompileVarDec(self): """ Compiles a var declaration. """ peek = self.tokenizer.peek() while 'var' in peek: self.tokenizer.advance() self.tokenizer.advance() type = self.tokenizer.keyWord() self.tokenizer.advance() name = self.tokenizer.identifier() self.class_subroutine.add_var(name, type) self.tokenizer.advance() while self.tokenizer.symbol() == ',': self.tokenizer.advance() name = self.tokenizer.identifier() # name self.class_subroutine.add_var(name, type) self.tokenizer.advance() peek = self.tokenizer.peek() def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}" """ peek = self.tokenizer.peek() while 'let' in peek or 'if' in peek or 'while' in peek or 'do' in peek or 'return' in peek: if 'let' in peek: self.CompileLet() elif 'if' in peek: self.CompileIf() elif 'while' in peek: self.CompileWhile() elif 'do' in peek: self.CompileDo() elif 'return' in peek: self.CompileReturn() peek = self.tokenizer.peek() def CompileDo(self): """ Compiles a do statement. """ self.tokenizer.advance() # do self.tokenizer.advance() # do self.CompileTerm() self._writer.pop('temp', '0') self.tokenizer.advance() # ; if self.tokenizer.current_token != ';': self.tokenizer.advance() def CompileLet(self): """ Compiles a let statement. """ self.tokenizer.advance() # let self.tokenizer.advance() name = self.tokenizer.identifier() symbol = self.class_subroutine.get_symbol(name) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() # [ self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() # ] self.tokenizer.advance() # = self._writer.push(symbol) self._writer.write_cmd('add') self.tokenizer.advance() self.CompileExpression() self._writer.pop('temp', '0') self._writer.pop('pointer', '1') self._writer.push('temp', '0') self._writer.pop('that', '0') else: self.tokenizer.advance() # = self.tokenizer.advance() self.CompileExpression() self._writer.pop(symbol) self.tokenizer.advance() # ; def CompileWhile(self): """ Compiles a while statement. """ label_c = self.labels self.tokenizer.advance() # while self.tokenizer.advance() # ( self.tokenizer.advance() self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c)) self.CompileExpression() self.tokenizer.advance() # ) self.tokenizer.advance() # { self._writer.write_if(LABEL_FORMAT.format('WHILE_END', label_c)) self.CompileStatements() self._writer.write_goto(LABEL_FORMAT.format('WHILE_END', label_c)) self._writer.write_label(LABEL_FORMAT.format('WHILE_EXP', label_c)) self.tokenizer.advance() # } def CompileReturn(self): """ Compiles a return statement. """ self.tokenizer.advance() # return peek = self.tokenizer.peek() if peek != ';': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() else: self._writer.push('constant', '0') self.tokenizer.advance() self._writer.write_return() def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ label_c = self.labels self.tokenizer.advance() # if self.tokenizer.advance() self.tokenizer.advance() # ( self.CompileExpression() self.tokenizer.advance() # ) self.tokenizer.advance() # { self._writer.write_if(LABEL_FORMAT.format('IF_TRUE', label_c)) self._writer.write_goto(LABEL_FORMAT.format('IF_FALSE', label_c)) self._writer.write_label(LABEL_FORMAT.format('IF_TRUE', label_c)) self.labels += 1 self.CompileStatements() self.tokenizer.advance() # } peek = self.tokenizer.peek() if peek == 'else': self._writer.write_goto(LABEL_FORMAT.format('IF_END', label_c)) self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c)) self.tokenizer.advance() # else self.tokenizer.advance() # { self.CompileStatements() self.tokenizer.advance() # } self._writer.write_label(LABEL_FORMAT.format('IF_END', label_c)) else: self._writer.write_label(LABEL_FORMAT.format('IF_FALSE', label_c)) def CompileExpression(self): """ Compiles an expression. """ self.CompileTerm() peek = self.tokenizer.peek() while peek in OPS: self.tokenizer.advance() op = self.tokenizer.symbol() self.tokenizer.advance() self.CompileTerm() self._writer.write_cmd(OP_DIC[op]) peek = self.tokenizer.peek() def CompileTerm(self): """ Compiles a term. This routine is faced with a slight difficulty when trying to decide between some of the alternative parsing rules. Specifically, if the current token is an identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A single look-ahead token, which may be one of [, (, or . suffices to distinguish between the three possibilities. Any other token is not part of this term and should not be advanced over. """ if self.tokenizer.current_token in UNARY_OP: self._writer.write_cmd(UNARY_DIC[self.tokenizer.current_token]) self.tokenizer.advance() self.CompileTerm() elif self.tokenizer.current_token == '(': self.tokenizer.advance() self.CompileExpression() self.tokenizer.advance() # ) elif self.tokenizer.tokenType() == 'INT_CONST': self._writer.push('constant', self.tokenizer.current_token) elif self.tokenizer.tokenType() == 'STRING_CONST': str = self.tokenizer.stringVal() self._writer.push('constant', len(str)) self._writer.write_call('String.new', '1') for char in str: self._writer.push('constant', ord(char)) self._writer.write_call('String.appendChar', '2') elif self.tokenizer.tokenType() == 'KEYWORD': if self.tokenizer.current_token == 'this': self._writer.push('pointer', '0') else: self._writer.push('constant', '0') if self.tokenizer.current_token == 'true': self._writer.write('not') elif self.tokenizer.tokenType() == 'IDENTIFIER': value = self.tokenizer.identifier() var = self.class_subroutine.get_symbol(value) peek = self.tokenizer.peek() if peek == '[': self.tokenizer.advance() self.tokenizer.advance() # [ self.CompileExpression() self._writer.push(var) self._writer.write_cmd('add') self._writer.pop('pointer', '1') self._writer.push('that', '0') self.tokenizer.advance() # ] else: function_name = value functions_class = self.class_subroutine.jack_class is_default = True args = 0 if peek == '.': is_default = False self.tokenizer.advance() self.tokenizer.advance() function_object = self.class_subroutine.get_symbol( function_name) function_name = self.tokenizer.current_token if function_object: functions_class = var.type args = 1 self._writer.push(var) else: functions_class = value peek = self.tokenizer.peek() if peek == '(': if is_default: args = 1 self._writer.push('pointer', '0') self.tokenizer.advance() # ( args += self.CompileExpressionList() if type(functions_class) != type(''): functions_class = functions_class.class_name full_name = '{}.{}'.format(functions_class, function_name) self._writer.write_call(full_name, args) if self.tokenizer.current_token != ')': self.tokenizer.advance() # ')' elif var: self._writer.push(var) def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ expressions_counter = 0 peek = self.tokenizer.peek() while peek != ')' and peek != ';': self.tokenizer.advance() expressions_counter += 1 if self.tokenizer.current_token == ',': self.tokenizer.advance() self.CompileExpression() peek = self.tokenizer.peek() return expressions_counter
class CompilationEngine: """ The compilation engine compile the jack code given in the input file into an xml code saved in the out_file """ def __init__(self, in_file, out_file): """ A compilation engine constructor :param in_file: the file we are currently compiling :param out_file: the file where we save the output """ self._tokenizer = JackTokenizer(in_file) self._class_table = SymbolTable() self._method_table = SymbolTable() self._cur_class_name = "" self._vm_writer = VMWriter(out_file) self._label_count_while = 0 self._label_count_if = 0 def compile_class(self): """ compiles a class according to the grammar """ self._class_table.start_subroutine() self._tokenizer.advance() # check if the current keyword is the right class tag if self._tokenizer.key_word() != CLASS_TAG: print(COMPILE_CLASS_ERROR) sys.exit() self._tokenizer.advance() self._cur_class_name = self.get_cur_token() self._tokenizer.advance() self._check_symbol("{") # there may be multiple variable declarations while self._check_if_var_dec(): self.compile_class_var_dec() # there may be multiple subroutine declarations while self._check_subroutine_dec(): self.compile_subroutine_dec() self._check_symbol("}") def compile_class_var_dec(self): """ compiles the class's variables declarations """ cur_kind = self.get_cur_token() self._tokenizer.advance() cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._class_table.define(cur_name, cur_type, cur_kind) while self._check_if_comma(): # there are more variables self._tokenizer.advance() cur_name = self.get_cur_token() self._check_name() self._class_table.define(cur_name, cur_type, cur_kind) self._check_symbol(";") def get_cur_token(self): return self._tokenizer.get_token_str() def compile_subroutine_dec(self): """ compiles the class's subroutine (methods and functions) declarations """ # re-initialize the method symbol table self._method_table.start_subroutine() key_word = self._tokenizer.key_word() self._tokenizer.advance() self._tokenizer.advance() cur_name = self.get_cur_token() self._tokenizer.advance() # method get the as argument the base address of the current object if key_word == "method": self._method_table.define("this", self._cur_class_name, "argument") self._check_symbol("(") self.compile_parameter_list() self._check_symbol(")") subroutine_path = self._cur_class_name + '.' + cur_name # the function is either void or has a type self.compile_subroutine_body(subroutine_path, key_word) def compile_parameter_list(self): """ compiles the parameter list for the subroutines """ # if curr_token is ')' it means the param list is empty if self._tokenizer.symbol() == ')': return cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._method_table.define(cur_name, cur_type, "argument") while self._check_if_comma(): # there are more params self._tokenizer.advance() cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._method_table.define(cur_name, cur_type, "argument") def compile_subroutine_body(self, subroutine_name, subroutine_kind): """ compiles the body of the subroutine """ self._check_symbol("{") # there may be multiple variable declarations at the beginning of # the subroutine while self._tokenizer.key_word() == 'var': self.compile_var_dec() # define the subroutine n_locals = self._method_table.var_count("local") self._vm_writer.write_function(subroutine_name, n_locals) if subroutine_kind == "constructor": # allocating memory for the object's fields num_of_fields = self._class_table.var_count("field") self._vm_writer.write_push("constant", num_of_fields) self._vm_writer.write_call("Memory.alloc", 1) # make 'this' to point to address returned by Memory.alloc self._vm_writer.write_pop("pointer", 0) if subroutine_kind == "method": # assign pointer[0] to the object's base address in order to # get access to 'this' segment self._vm_writer.write_push("argument", 0) self._vm_writer.write_pop("pointer", 0) self.compile_statements() self._check_symbol("}") def compile_var_dec(self): """ compiles the variable declarations """ self._tokenizer.advance() cur_type = self.get_cur_token() self._check_type() cur_name = self.get_cur_token() self._check_name() self._method_table.define(cur_name, cur_type, "local") # there may be multiple variable names in the dec while self._check_if_comma(): self._tokenizer.advance() self._method_table.define(self.get_cur_token(), cur_type, "local") self._check_name() self._check_symbol(";") def compile_statements(self): """ compiles the statements (0 or more statements) """ while self._check_if_statement(): if self._tokenizer.key_word() == 'let': self.compile_let() elif self._tokenizer.key_word() == 'if': self.compile_if() elif self._tokenizer.key_word() == 'while': self.compile_while() elif self._tokenizer.key_word() == 'do': self.compile_do() elif self._tokenizer.key_word() == 'return': self.compile_return() def compile_do(self): """ compiles the do statement """ self._tokenizer.advance() self.compile_subroutine_call() self._check_symbol(";") self._vm_writer.write_pop("temp", 0) def compile_let(self): """ compiles the let statement """ self._tokenizer.advance() name = self.get_cur_token() info = self._get_symbol_info(name) self._check_if_declared(info) s_type, s_kind, s_id = info seg = self._get_segment(s_kind) is_and_array = False if self._tokenizer.get_next_token() == '[': # if there is an array is_and_array = True self.compile_term() else: self._tokenizer.advance() self._check_symbol("=") self.compile_expression() if is_and_array: # save the value created after compiling the expression which # appears right after '=' in temp[0] self._vm_writer.write_pop("temp", 0) # now the top of the stack should be the address of the right cell # in the array so we assign it to pointer[1] self._vm_writer.write_pop("pointer", 1) # re-pushing the value we saved in temp[0] self._vm_writer.write_push("temp", 0) # the value of the array is located in that[0] seg = "that" s_id = 0 # execute the assignment self._vm_writer.write_pop(seg, s_id) self._check_symbol(";") @staticmethod def _check_if_declared(info): if info is None: print("Unknown Symbol") sys.exit() def compile_if(self): """ compiles the if statements """ false_label = self._get_if_label() end_label = self._get_if_label() self._tokenizer.advance() self._check_symbol("(") self.compile_expression() self._check_symbol(")") self._check_symbol("{") self._vm_writer.write_arithmetic("not") self._vm_writer.write_if_goto(false_label) self.compile_statements() self._check_symbol("}") # there can also be an if else scenario self._vm_writer.write_goto(end_label) self._vm_writer.write_label(false_label) if self._tokenizer.key_word() == 'else': self._tokenizer.advance() self._check_symbol("{") self.compile_statements() self._check_symbol("}") self._vm_writer.write_label(end_label) def compile_while(self): """ compiles the while statements """ self._tokenizer.advance() first_label = self._get_while_label() second_label = self._get_while_label(END_WHILE) self._check_symbol("(") self._vm_writer.write_label(first_label) self.compile_expression() self._vm_writer.write_arithmetic("not") self._vm_writer.write_if_goto(second_label) self._check_symbol(")") self._check_symbol("{") self.compile_statements() self._vm_writer.write_goto(first_label) self._vm_writer.write_label(second_label) self._check_symbol("}") def compile_return(self): """ compiles the return statements """ self._tokenizer.advance() # if cur token is ; we return nothing, otherwise we return something if not self._tokenizer.symbol() == ';': self.compile_expression() else: self._vm_writer.write_push("constant", 0) self._check_symbol(";") self._vm_writer.write_return() def compile_subroutine_call(self): """ compiles the subroutine calls ( when we actually call a subroutine as opposed to declaring it) """ method_name = self.get_cur_token() self._check_name() num_of_args = 0 # there may be a '.' if it is a foo.bar() scenario (or Foo.bar()) if self._tokenizer.symbol() == ".": self._tokenizer.advance() class_name = method_name method_name = self.get_cur_token() self._check_name() symbol_info = self._get_symbol_info(class_name) if symbol_info is None: cur_name = class_name + '.' + method_name else: type_of, kind_of, id_of = symbol_info num_of_args += 1 self._vm_writer.write_push(self._get_segment(kind_of), id_of) cur_name = type_of + '.' + method_name else: cur_name = self._cur_class_name + '.' + method_name num_of_args += 1 self._vm_writer.write_push("pointer", 0) self._check_symbol("(") num_of_args += self.compile_expression_list() self._check_symbol(")") self._vm_writer.write_call(cur_name, num_of_args) def compile_expression(self): """ compiles expressions which are terms and possibly operators and more terms """ symbol = self._tokenizer.symbol() self.compile_term() # write the 'not' operator if necessary if symbol == '~': self._vm_writer.write_arithmetic("not") # there may be a few operators in one expression while self._tokenizer.symbol() in OPERATIONS: symbol = self._tokenizer.symbol() self.compile_term() # executing operators after handling the the operands # in order to evaluate the current expression as postfix expression op = self._get_op(symbol) self._vm_writer.write_arithmetic(op) def compile_term(self): """ compiles terms according to the grammar """ cur_type = self._tokenizer.token_type() key_word = self._tokenizer.key_word() cur_token = self.get_cur_token() # either a string/int constant if cur_type in ["INT_CONST", "STRING_CONST"]: self._compile_string_int_term(cur_token, cur_type) # or a constant keyword (true, false, null, this) elif key_word in KEYWORD_CONST: self._compile_const_keyword_term(key_word) # or an expression within brown brackets elif self._tokenizer.symbol() == '(': self._tokenizer.advance() self.compile_expression() self._check_symbol(")") # or a unary op and then a term elif self._tokenizer.symbol() in OPERATIONS: self._tokenizer.advance() self.compile_term() # or it is an identifier which could be: elif self._tokenizer.identifier(): self._compile_term_identifier() else: print(COMPILE_TERM_ERROR) sys.exit() def _compile_const_keyword_term(self, key_word): """ compile term in case the current token type is constant keyword :param key_word: string from {'true', 'false', 'null', 'this'} """ if key_word == "this": self._vm_writer.write_push("pointer", 0) else: self._vm_writer.write_push("constant", 0) if key_word == "true": self._vm_writer.write_arithmetic("not") self._tokenizer.advance() def _compile_string_int_term(self, cur_token, cur_type): """ compile term in case the given token type is constant string or constant integer :param cur_token: the current token as a string :param cur_type: the type of the current token """ if cur_type == "INT_CONST": self._vm_writer.write_push("constant", cur_token) else: # is string n = len(cur_token) self._vm_writer.write_push("constant", n) self._vm_writer.write_call("String.new", 1) for c in cur_token: self._vm_writer.write_push("constant", ord(c)) self._vm_writer.write_call("String.appendChar", 2) self._tokenizer.advance() def _compile_term_identifier(self): """ compiles terms in case of identifier token """ cur_token = self.get_cur_token() info = self._get_symbol_info(cur_token) next_token = self._tokenizer.get_next_token() if info is not None and next_token not in [".", "("]: type_of, kind_of, id_of = info seg = self._get_segment(kind_of) self._vm_writer.write_push(seg, id_of) # an array if next_token == '[': self._check_name() self._check_symbol("[") self.compile_expression() self._check_symbol("]") self._vm_writer.write_arithmetic("add") if self._tokenizer.symbol() != '=': self._vm_writer.write_pop("pointer", 1) self._vm_writer.write_push("that", 0) # or a subroutine call elif next_token in [".", "("]: self.compile_subroutine_call() else: self._tokenizer.advance() def compile_expression_list(self): """ compiles the expression lists """ # if it is ')' then the expression list is empty if self._tokenizer.symbol() == ')': return 0 num_of_args = 1 # at least one argument self.compile_expression() # while there are more expressions while self._check_if_comma(): self._tokenizer.advance() cur_symbol = self._tokenizer.symbol() self.compile_expression() if cur_symbol == '-': # negative int self._vm_writer.write_arithmetic("neg") num_of_args += 1 return num_of_args def _check_if_var_dec(self): """ check if we are currently compiling a variable declaration :return: true iff the current token is either 'static' or 'field' """ return self._tokenizer.key_word() in CLASS_VAR_KEYWORDS def _check_subroutine_dec(self): """ checks if we are currently compiling a subroutine declaration :return: true iff the current token is either 'constructor' or 'function' or 'method' """ return self._tokenizer.key_word() in SUBROUTINE def _check_if_comma(self): """ checks if current token is a comma :return: true iff the current token is a ',' """ return self._tokenizer.symbol() == ',' def _check_if_statement(self): """ checks if we are currently compiling a statement :return: true iff the current token is in ['let', 'if', 'while', 'do', 'return'] """ return self._tokenizer.key_word() in STATEMENTS def _check_type(self): """ checks if the current token is a valid type and if so, it writes it to the output file """ if not self._tokenizer.key_word() in TYPE_KEYWORDS: self._check_name() else: self._tokenizer.advance() def _check_symbol(self, expected_symbol): """ checks if the current token is the expected symbol, if so it write it to the output file :param expected_symbol: the symbol we are validating is the current token :return: prints illegal statement error if it is not the expected symbol and exits the program """ if self._tokenizer.symbol() != expected_symbol: print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._tokenizer.advance() def _check_name(self): """ checks the current token is a name (identifier), and if so, write it to the output file :return: prints illegal statement error if it is not a name and exits the program """ if not self._tokenizer.identifier(): print(ILLEGAL_STATEMENT_ERROR) sys.exit() self._tokenizer.advance() @staticmethod def _get_op(symbol): """ writes an op symbol to the out file """ if symbol == '<': return "lt" elif symbol == '>': return "gt" elif symbol == '=': return "eq" elif symbol == '&': return "and" elif symbol == '|': return "or" elif symbol == '+': return "add" elif symbol == '-': return "sub" elif symbol == '~': return "not" elif symbol == "*": return "call Math.multiply 2" elif symbol == "/": return "call Math.divide 2" def _get_symbol_info(self, symbol_name): """ first checks if the given symbol in the method symbol table if the method table contains the symbol it returns it's information: (type,kind,id) otherwise check if the class symbol table contains the symbol if it does it return the symbol information from the class table else returns None :param symbol_name: string """ info = self._method_table.get_info(symbol_name) if info is None: info = self._class_table.get_info(symbol_name) return info @staticmethod def _get_segment(cur_kind): """ :param cur_kind: Jack kind - from the list: ["var", "argument", "field", "class", "subroutine", "local", "static"] :return: if the given kind is "field" it returns 'this' otherwise returns the given kind """ if cur_kind == "field": return "this" else: return cur_kind def _get_if_label(self): """ create new if label and increment the if label counter :return: if unused label """ curr_counter = str(self._label_count_if) self._label_count_if += 1 return "IF" + curr_counter def _get_while_label(self, is_end_while=False): """ creates label according to the given flag, if the method creates end while label it increments the while label counter :param is_end_while: if true creates end while label otherwise creates while label :return: unused while label or end while label according to the flag """ curr_counter = str(self._label_count_while) if is_end_while: self._label_count_while += 1 return "WHILE_END" + curr_counter return "WHILE" + curr_counter