def main(): tk = Tokenizer('Mytestfor10.jack') while(tk.has_more_tokens()): tk.advance() print(tk.token_type(),tk.identifier()) ce = CompilationEngine('Mytestfor10.jack') root = Element('do') ce.compile_if(root)
def test_issues__semicolon_missing_from_outfile(): fake_file = BytesIO(b""" var char key; // the key currently pressed by the user var boolean exit; let exit = false; """) t = Tokenizer(fake_file) assert (t.advance().token == 'var') assert (t.advance().token == 'char') assert (t.advance().token == 'key') assert (t.advance().token == ';') assert (t.advance().token == 'var') assert (t.advance().token == 'boolean') assert (t.advance().token == 'exit') assert (t.advance().token == ';') assert (t.advance().token == 'let') assert (t.advance().token == 'exit') assert (t.advance().token == '=') assert (t.advance().token == 'false') assert (t.advance().token == ';') fake_file = BytesIO(b" return();") t = Tokenizer(fake_file) assert (t.advance().token == 'return') assert (t.advance().token == '(') assert (t.advance().token == ')') assert (t.advance().token == ';')
def test_base(): fake_file = BytesIO(b"method void dispose();") t = Tokenizer(fake_file) token = t.advance() assert (token.token == 'method') assert (token.type == 'keyword') token = t.advance() assert (token.token == 'void') assert (token.type == 'keyword') token = t.advance() assert (token.token == 'dispose') assert (token.type == 'identifier') token = t.advance() assert (token.token == '(') assert (token.type == 'symbol') token = t.advance() assert (token.token == ')') assert (token.type == 'symbol') token = t.advance() assert (token.token == ';') assert (token.type == 'symbol')
def test_compile_subroutine_desc(): fake_file = BytesIO(b""" { return this; } NEXTTOKEN """) t = Tokenizer(fake_file) c = CompilationEngine(t) body = c.compile_subroutine_body() assert body.name == 'subroutineBody' assert body.value[0].value == '{' statements = body.value[1].value assert body.value[2].value == '}' assert statements[0].name == 'returnStatement' assert statements[0].value[0].value == 'return' assert statements[0].value[1].name == 'expression' assert statements[0].value[2].value == ';' assert t.advance().token == 'NEXTTOKEN'
class CompilationEngine: XML_LINE = "<{0}> {1} </{0}>\n" COMPARE_SYM_REPLACER = { '<': "<", '>': ">", '"': """, '&': "&" } KEYWORD_CONSTANT = ("true", "false", "null", "this") def __init__(self, input_stream, output_stream): """ constructor of the Compilation Engine object :param input_stream: the input stream :param output_stream: the output stream """ self.__tokenizer = Tokenizer(input_stream) # Tokenizer object self.__output = VMWriter(output_stream) self.__symbol = SymbolTable() self.__class_name = "" self.__statements = { "let": self.compile_let, "if": self.compile_if, "while": self.compile_while, "do": self.compile_do, "return": self.compile_return } self.compile_class() # self.__output.close() def write_xml(self): """ writing xml line """ if self.__tokenizer.token_type() == "stringConstant": self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), self.__tokenizer.string_val())) elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER: xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()] self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), xml_val)) else: self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), self.__tokenizer.get_value())) def compile_class(self): """ compiling the program from the class definition """ # self.__output.write("<class>\n") # self.write_xml() self.__tokenizer.advance() # skip "class" self.__class_name = self.__tokenizer.get_value() # self.write_xml() self.__tokenizer.advance() # skip class name # self.write_xml() self.__tokenizer.advance() # skip { current_token = self.__tokenizer.get_value() while current_token == "static" or current_token == "field": self.compile_class_var_dec() current_token = self.__tokenizer.get_value() while current_token == "constructor" or current_token == "function" or current_token == "method": self.compile_subroutine_dec() current_token = self.__tokenizer.get_value() # self.write_xml() # self.__output.write("</class>\n") self.__output.close() def compile_class_var_dec(self): """ compiling the program from the class's declaration on vars """ current_token = self.__tokenizer.get_value() while current_token == "static" or current_token == "field": # self.__output.write("<classVarDec>\n") # self.write_xml() index = self.__symbol.var_count(current_token) self.__tokenizer.advance() # get token type token_type = self.__tokenizer.get_value() self.__output.write_push(current_token, index) self.__tokenizer.advance() # get token name token_name = self.__tokenizer.get_value() self.__symbol.define(token_name, token_type, current_token) self.__tokenizer.advance() # self.write_xml() # self.__tokenizer.advance() # self.write_xml() # self.__tokenizer.advance() while self.__tokenizer.get_value() == ",": # self.write_xml() # write , self.__tokenizer.advance() # get token name token_name = self.__tokenizer.get_value() index = self.__symbol.var_count(current_token) # get new index self.__output.write_push(current_token, index) self.__symbol.define(token_name, token_type, current_token) self.__tokenizer.advance() # self.write_xml() # write value # self.__tokenizer.advance() # self.write_xml() self.__tokenizer.advance() current_token = self.__tokenizer.get_value() # self.__output.write("</classVarDec>\n") def compile_subroutine_body(self): """ compiling the program's subroutine body """ # self.__output.write("<subroutineBody>\n") # self.write_xml() # write { self.__tokenizer.advance() # skip { while self.__tokenizer.get_value() == "var": self.compile_var_dec() self.compile_statements() # self.write_xml() # write } self.__tokenizer.advance() # skip } # self.__output.write("</subroutineBody>\n") def compile_subroutine_dec(self): """ compiling the program's subroutine declaration """ # self.__output.write("<subroutineDec>\n") # self.write_xml() # write constructor/function/method self.__tokenizer.advance() # skip constructor/function/method return_value = self.__tokenizer.get_value() self.__tokenizer.advance() func_name = self.__tokenizer.get_value() self.__tokenizer.advance() func_args = self.compile_parameter_list() self.__output.write_function(func_name, func_args) self.compile_subroutine_body() if return_value == "void": self.__output.write_pop("temp", "0") # self.__output.write("</subroutineDec>\n") def compile_parameter_list(self): """ compiling a parameter list """ # todo returns the number og args ! # self.write_xml() # write ( counter = 0 self.__tokenizer.advance() # skip ( # self.__output.write("<parameterList>\n") if self.__tokenizer.get_value() != ")": # self.write_xml() # write type self.__tokenizer.advance() # skip type # self.write_xml() # write varName self.__tokenizer.advance() # skip var name counter += 1 while self.__tokenizer.get_value() == ",": # self.write_xml() # write , self.__tokenizer.advance() # skip , # self.write_xml() # type self.__tokenizer.advance() # skip type # self.write_xml() # varName self.__tokenizer.advance() # skip varName counter += 1 # self.__output.write("</parameterList>\n") # self.write_xml() # write ) self.__tokenizer.advance() return counter def compile_var_dec(self): """ compiling function's var declaration """ # self.__output.write("<varDec>\n") # self.write_xml() # write var token_kind = self.__tokenizer.get_value() self.__tokenizer.advance() # self.write_xml() # write type token_type = self.__tokenizer.get_value() self.__tokenizer.advance() # self.write_xml() # write varName token_name = self.__tokenizer.get_value() self.__tokenizer.advance() index = self.__symbol.var_count(token_kind) self.__output.write_push(token_kind, index) self.__symbol.define(token_name, token_type, token_kind) while self.__tokenizer.get_value() == ",": # self.write_xml() # write , self.__tokenizer.advance() # skip , # self.write_xml() token_name = self.__tokenizer.get_value() index = self.__symbol.var_count(token_kind) self.__output.write_push(token_kind, index) self.__symbol.define(token_name, token_type, token_kind) self.__tokenizer.advance() # self.write_xml() # write ; self.__tokenizer.advance() # skip ; # self.__output.write("</varDec>\n") def compile_statements(self): """ compiling statements """ key = self.__tokenizer.get_value() # self.__output.write("<statements>\n") if key != "}": while key in self.__statements: self.__statements[self.__tokenizer.get_value()]() key = self.__tokenizer.get_value() # self.__output.write("</statements>\n") def compile_do(self): """ compiling do call """ # self.__output.write("<doStatement>\n") # self.write_xml() # write do self.__tokenizer.advance() # skip do self.subroutine_call() # self.write_xml() # write ; self.__tokenizer.advance() # skip ; # self.__output.write("</doStatement>\n") def compile_let(self): """ compiling let call """ # self.__output.write("<letStatement>\n") # self.write_xml() # write let self.__tokenizer.advance() # skip let # self.write_xml() # write varName var_name = self.__tokenizer.get_value() self.__tokenizer.advance() # if self.__tokenizer.get_value() == "[": # todo handle array # self.write_xml() # write [ # self.__tokenizer.advance() # self.compile_expression() # self.write_xml() # write ] # self.__tokenizer.advance() # self.write_xml() # write = self.__tokenizer.advance() # skip = self.compile_expression() # todo push the value to the stack # self.write_xml() # write ; self.__tokenizer.advance() # skip ; # self.__output.write("</letStatement>\n") var_kind = self.__symbol.kind_of(var_name) var_index = self.__symbol.index_of(var_name) self.__output.write_pop(var_kind, var_index) def compile_while(self): """ compiling while loop call """ self.__output.write("<whileStatement>\n") self.write_xml() # write while self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</whileStatement>\n") def compile_return(self): """ compiling return statement """ self.__output.write("<returnStatement>\n") self.write_xml() # write return self.__tokenizer.advance() if self.__tokenizer.get_value() != ";": self.compile_expression() self.write_xml() # write ; self.__tokenizer.advance() self.__output.write("</returnStatement>\n") def compile_if(self): """ compiling if condition """ self.__output.write("<ifStatement>\n") self.write_xml() # write if self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() if self.__tokenizer.get_value() == "else": self.write_xml() # write else self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</ifStatement>\n") def compile_expression(self): """ compiling expressions """ self.__output.write("<expression>\n") self.compile_term() while self.__tokenizer.is_operator(): self.write_xml() # write the operator self.__tokenizer.advance() self.compile_term() self.__output.write("</expression>\n") def compile_term(self): """ compiling any kind of terms """ # dealing with unknown token self.__output.write("<term>\n") curr_type = self.__tokenizer.token_type() # handle consts if curr_type == "integerConstant" or curr_type == "stringConstant": self.write_xml() # write the int \ string self.__tokenizer.advance() # handle const keyword elif curr_type == "keyword" and self.__tokenizer.get_value( ) in self.KEYWORD_CONSTANT: self.__tokenizer.set_type("keywordConstant") self.write_xml() # write key word self.__tokenizer.advance() elif curr_type == "identifier": # handle var names if self.__tokenizer.get_next_token( ) != "(" and self.__tokenizer.get_next_token() != ".": self.write_xml() # write the var name self.__tokenizer.advance() if self.__tokenizer.get_value() == "[": self.write_xml() # write [ self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ] self.__tokenizer.advance() # handle function calls else: self.subroutine_call() # handle expression elif curr_type == "symbol" and self.__tokenizer.get_value() == "(": self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() # handle - \ ~ elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value( ) == "~": self.write_xml() # write -\~ self.__tokenizer.advance() self.compile_term() self.__output.write("</term>\n") def subroutine_call(self): """ compiling the program's subroutine call """ if self.__tokenizer.get_next_token() == ".": self.write_xml() # write name self.__tokenizer.advance() self.write_xml() # write . self.__tokenizer.advance() self.write_xml() # write name self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression_list() self.write_xml() # write ) self.__tokenizer.advance() def compile_expression_list(self): """ compiling expression list """ self.__output.write("<expressionList>\n") if self.__tokenizer.get_value() != ")": self.compile_expression() while self.__tokenizer.get_value() == ",": self.write_xml() # write , self.__tokenizer.advance() self.compile_expression() self.__output.write("</expressionList>\n")
class CompilationEngine: def __init__(self, source): self.tokenizer = Tokenizer(source) self.tokenizer.has_more_tokens() self.tokenizer.advance() self.root = Element(CLASS) self.compile_class(self.root) def next(self): if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def get_xml(self): """ Returns a textual XML representation of the program structure. :return: """ return prettify(self.root)[23:] def compile_expression(self,caller): """ Compiles an expression. :param caller: :return: """ self.compile_term(SubElement(caller,TERM)) while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS: SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_term(SubElement(caller,TERM)) def compile_expressionList(self,caller): """ :param caller: :return: """ # if expression list is empty if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = "\n" return self.compile_expression(SubElement(caller,EXPRESSION)) while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",": SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller,EXPRESSION)) def compile_subroutineCall(self,caller,first_token): """ First token, the first identifier must be sent manually, so the method expects the current token to be the second in the specification. :param caller: :param first_token: :return: """ SubElement(caller, IDENTIFIER).text = first_token SubElement(caller, SYMBOL).text = self.tokenizer.symbol() if self.tokenizer.symbol() == '.': self.next() SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.next() SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expressionList(SubElement(caller, EXPRESSION_LIST)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() def compile_term(self,caller): """ :param caller: :return: """ type = self.tokenizer.token_type() if type is JTok.INT_CONST: SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal()) self.next() elif type is JTok.STRING_CONST: SubElement(caller, STRING_CONSTANT).text = self.tokenizer.string_val() self.next() elif type is JTok.KEYWORD: SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() elif type is JTok.IDENTIFIER: name = self.tokenizer.identifier() self.next() type = self.tokenizer.token_type() if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}: self.compile_subroutineCall(caller,name) elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[': SubElement(caller, IDENTIFIER).text = name SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() else: SubElement(caller, IDENTIFIER).text = name elif type is JTok.SYMBOL: if self.tokenizer.symbol() == '(': SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() elif self.tokenizer.symbol() in {'-','~'}: SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_term(SubElement(caller,TERM)) def compile_do(self, caller): """ format : 'do' subroutineCall ';' :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() name = self.tokenizer.identifier() self.next() self.compile_subroutineCall(caller,name) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() def compile_let(self, caller): """ format : 'let' varName ( '[' expression ']' )? '=' expression ';' :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() # set 'let' as text self.next() SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() # varName self.next() if self.tokenizer.symbol() == '[': SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '[' self.next() self.compile_expression(SubElement(caller, EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ']' self.next() # If there is no expression to compile: SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '=' self.next() self.compile_expression(SubElement(caller, EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() def compile_return(self, caller): """ format : 'return' expression? ';' :param caller: :return: """ SubElement(caller,KEYWORD).text = self.tokenizer.identifier() self.next() if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";": SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() return self.compile_expression(SubElement(caller,EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() def compile_while(self, caller): """ format : 'while' '(' expression ')' '{' statements '}' :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() # set 'while' as text self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '(' self.next() self.compile_expression(SubElement(caller, EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ')' self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '{' self.next() self.compile_statements(SubElement(caller, STATEMENTS)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '}' self.next() def compile_statements(self, caller): """ :param caller: :return: """ STATEMENTS = {'do','while','let','return','if'} run_once = False while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS: run_once = True if self.tokenizer.key_word() == 'do': self.compile_do(SubElement(caller, 'doStatement')) elif self.tokenizer.key_word() == 'while': self.compile_while(SubElement(caller, 'whileStatement')) elif self.tokenizer.key_word() == 'let': self.compile_let(SubElement(caller, 'letStatement')) elif self.tokenizer.key_word() == 'return': self.compile_return(SubElement(caller, 'returnStatement')) elif self.tokenizer.key_word() == 'if': self.compile_if(SubElement(caller, 'ifStatement')) if not run_once: caller.text = "\n" def compile_if(self, caller): """ format : 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() # set 'if' as text self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '(' self.next() self.compile_expression(SubElement(caller, EXPRESSION)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ')' self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '{' self.next() self.compile_statements(SubElement(caller, STATEMENTS)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '}' self.next() if self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() == 'else': SubElement(caller, KEYWORD).text = self.tokenizer.key_word() # set 'else' as text self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '{' self.next() self.compile_statements(SubElement(caller, STATEMENTS)) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set '}' self.next() def compile_var_dec(self, caller): """ format: 'var' type varName ( ',' varName)* ';' :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() # set var as keyword self.next() self.compile_list_of_vars(caller) def compile_class(self,caller): """ :param caller: :return: """ SubElement(caller,KEYWORD).text = self.tokenizer.key_word() self.next() SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier() self.next() SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #{ self.next() while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in {'static','field'}: self.compile_classVarDec(SubElement(caller,"classVarDec")) while not self.tokenizer.token_type() is JTok.SYMBOL: self.compile_subroutine(SubElement(caller,"subroutineDec")) SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #} self.next() def compile_list_of_vars(self,caller): """ Helper method to compile lists of variables according to type varName (',' varName)* :param caller: :return: """ self.compile_type(caller) SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() # set var name as identifier self.next() while self.tokenizer.symbol() != ';': SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ',' self.next() SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() # set var name self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() def compile_classVarDec(self,caller): """ :param caller: :return: """ SubElement(caller,KEYWORD).text = self.tokenizer.key_word() self.next() self.compile_list_of_vars(caller) def compile_type(self,caller): """ Compiles a tag according to type, for variables :param caller: :return: """ tag = KEYWORD if self.tokenizer.token_type() is JTok.KEYWORD else IDENTIFIER text = self.tokenizer.key_word() if tag is KEYWORD else self.tokenizer.identifier() SubElement(caller, tag).text = text self.next() def compile_subroutine(self,caller): """ :param caller: :return: """ SubElement(caller,KEYWORD).text = self.tokenizer.key_word() self.next() if self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() == "void": SubElement(caller,KEYWORD).text = self.tokenizer.key_word() self.next() else: self.compile_type(caller) SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier() self.next() SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_parameterList(SubElement(caller,"parameterList")) SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_subroutineBody(SubElement(caller,"subroutineBody")) def compile_subroutineBody(self,caller): """ Compiles a subroutine body :param caller: :return: """ SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #{ self.next() while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() == "var": self.compile_var_dec(SubElement(caller,"varDec")) self.compile_statements(SubElement(caller,"statements")) SubElement(caller,SYMBOL).text = self.tokenizer.symbol() #} self.next() def compile_parameterList(self,caller): """ :param caller: :return: """ if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = "\n" return self.compile_type(caller) SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier() self.next() while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",": SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_type(caller) SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.next()
class JackAnalyzer: """ A jack parser class """ Operators = ['+', '-', '*', '/', '|', '=', '>', '&', '<'] statements_keyword = ["let", "do", "if", "while", "return"] Symbols = [ '(', ')', '{', '}', '[', ']', ',', ';', '.', '+', '-', '*', '/', '&', '|', '>', '<', '=', '~' ] def __init__(self, file): """ A constructor which initializes the members of the class """ self.tekonizer = Tokenizer(file) self.output_file = self.openfile(file) self.taps = '' self.two_taps = ' ' def openfile(self, file): """ This function opens a file to write """ point = file.find('.') new_path = file[:point] + '.xml' output_file = open(new_path, 'w') return output_file def compileClass(self): """ This function compiles a class state """ if (self.tekonizer.current_token == "class"): self.output_file.write("<class>\n") self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileClassVarDec() self.compileSubroutineDec() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "class" '>\n') def compileVarName(self): """ This function compile the names of the var declaration of a class """ self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == ','): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileVarName() elif (self.tekonizer.current_token == ';'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "classVarDec" + '>' + '\n') self.tekonizer.advance() def compileClassVarDec(self): """ This function compile the names of the var declaration of a class """ if (self.tekonizer.current_token == "static" or self.tekonizer.current_token == "field"): self.output_file.write(self.taps + '<' + "classVarDec" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileVarName() self.compileClassVarDec() def compileSubroutineDec(self): """ This function compile the method \ function \ constructor case """ if (self.tekonizer.current_token == "method" or self.tekonizer.current_token == "constructor" or self.tekonizer.current_token == "function"): self.output_file.write(self.taps + '<subroutineDec>' + '\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.compileParameterList() self.compileSubrotineBody() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + 'subroutineDec' + '>\n') if (self.tekonizer.current_token != '}'): self.compileSubroutineDec() def compileParameterList(self): """ This function compiles the parameterlist of the method\function\constructor """ self.output_file.write(self.taps + '<' + "parameterList" + '>\n') self.taps += self.two_taps self.tekonizer.advance() self.compileParametrs() def compileParametrs(self): """ This function compiles the arguments of the method\function\constructor """ if (self.tekonizer.current_token == ')'): self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "parameterList" + '>\n') self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() else: self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == ','): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileParametrs() else: self.compileParametrs() def compileSubrotineBody(self): """ This function compiles the subroutinebody of the method\function\constructor """ self.output_file.write(self.taps + '<' + "subroutineBody" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileVarDec() self.compileStatements() self.tekonizer.advance() if (self.tekonizer.current_token == '}'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "subroutineBody" + '>\n') def compileVarDec(self): """ This function compile the var declaration for a method """ if (self.tekonizer.current_token == "var"): self.output_file.write(self.taps + '<' + "varDec" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileVarDecName() self.compileVarDec() def compileVarDecName(self): """ This function compile the name of the var declaration for a method """ self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == ','): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileVarDecName() if (self.tekonizer.current_token == ';'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "varDec" + '>\n') self.tekonizer.advance() def compileStatements(self): """ This function compiles the statements declaration """ self.output_file.write(self.taps + '<' + "statements" + '>\n') self.taps += self.two_taps self.compile_Statements() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "statements" + '>\n') def compile_Statements(self): """ This function checks the current statements and calls the appropriate function """ if (self.tekonizer.current_token == "if"): self.if_statement() self.compile_Statements() elif (self.tekonizer.current_token == "while"): self.while_statement() self.compile_Statements() elif (self.tekonizer.current_token == "do"): self.do_statement() self.compile_Statements() elif (self.tekonizer.current_token == "let"): self.let_statement() self.compile_Statements() elif (self.tekonizer.current_token == "return"): self.return_statement() self.compile_Statements() def if_statement(self): """ This function compile the if statement """ self.output_file.write(self.taps + '<' + "ifStatement" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileStatements() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == 'else'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileStatements() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "ifStatement" + '>\n') def while_statement(self): """ This function compile the while statement """ self.output_file.write(self.taps + '<' + "whileStatement" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileStatements() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "whileStatement" + '>\n') self.tekonizer.advance() def do_statement(self): """ This function compile the do statement """ self.output_file.write(self.taps + '<' + "doStatement" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.subroutineCall() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "doStatement" + '>\n') def let_statement(self): """ This function compile the let statement """ self.output_file.write(self.taps + '<' + "letStatement" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileLetVarName() if (self.tekonizer.current_token == '='): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() elif (self.tekonizer.current_token == '['): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() if (self.tekonizer.current_token == ';'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "letStatement" + '>\n') def return_statement(self): """ This function compiles the return statement """ self.output_file.write(self.taps + '<' + "returnStatement" + '>\n') self.taps += self.two_taps self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token != ';'): self.compile_expression() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "returnStatement" + '>\n') def compileLetVarName(self): """ This function compiles the let var name """ self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == ','): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compileVarName() def compile_expression(self): """ This function compiles the expression case's """ self.output_file.write(self.taps + '<' + "expression" + '>\n') self.taps += self.two_taps self.compile_term() if (self.tekonizer.current_token in self.Operators): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_term() self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "expression" + '>\n') if (self.tekonizer.current_token == ','): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() def compile_term(self): """ This function compiles the term case's """ self.output_file.write(self.taps + '<' + "term" + '>\n') self.taps += self.two_taps term_flag = True if (self.tekonizer.current_token.isdigit() or self.tekonizer.current_token.startswith('"') or self.tekonizer.current_token == 'this' or self.tekonizer.current_token == 'null' or self.tekonizer.current_token == 'true' or self.tekonizer.current_token == 'false'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() elif (self.tekonizer.current_token == '-' or self.tekonizer.current_token == '~'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_term() elif (self.tekonizer.current_token == '('): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() if (self.tekonizer.current_token == ')'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() else: if (self.tekonizer.all_tokens[self.tekonizer.counter + 1] == '.' or self.tekonizer.all_tokens[self.tekonizer.counter + 1] == '('): #identefier self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.subroutineCall() elif (self.tekonizer.all_tokens[self.tekonizer.counter + 1] == '['): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_expression() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') #term->expreession->current==] self.tekonizer.advance() elif (self.tekonizer.all_tokens[self.tekonizer.counter + 1] == ')'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() else: self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() term_flag = False self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "term" + '>\n') if (self.tekonizer.current_token in self.Operators and not term_flag): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_term() def compile_exclist(self): """ This function compiles the expression list case """ self.output_file.write(self.taps + '<' + "expressionList" + '>' + '\n') self.taps += self.two_taps if (self.tekonizer.current_token != ')'): self.compile_expression() # if item != ) self.taps = self.taps[:-2] self.output_file.write(self.taps + '</' + "expressionList" + '>\n') def subroutineCall(self): """ This function compiles the call of a function """ if (self.tekonizer.return_typetoken() == "identifier"): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == '.'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() elif (self.tekonizer.current_token == '('): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() self.compile_exclist() if (self.tekonizer.current_token == ')'): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == '('): self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() if (self.tekonizer.current_token == ')'): self.output_file.write(self.taps + '<' + "expressionList" + '>\n') self.output_file.write(self.taps + '</' + "expressionList" + '>\n') self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance() else: self.compile_exclist() self.output_file.write(self.taps + self.tekonizer.tokenType() + '\n') self.tekonizer.advance()
class CompilationEngine: XML_LINE = "<{0}> {1} </{0}>\n" COMPARE_SYM_REPLACER = { '<': "<", '>': ">", '"': """, '&': "&" } KEYWORD_CONSTANT = ("true", "false", "null", "this") def __init__(self, input_stream, output_stream): """ constructor of the Compilation Engine object :param input_stream: the input stream :param output_stream: the output stream """ self.__tokenizer = Tokenizer(input_stream) # Tokenizer object self.__output = open(output_stream, "w") self.__statements = { "let": self.compile_let, "if": self.compile_if, "while": self.compile_while, "do": self.compile_do, "return": self.compile_return } self.compile_class() self.__output.close() def write_xml(self): """ writing xml line """ if self.__tokenizer.token_type() == "stringConstant": self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), self.__tokenizer.string_val())) elif self.__tokenizer.get_value() in self.COMPARE_SYM_REPLACER: xml_val = self.COMPARE_SYM_REPLACER[self.__tokenizer.get_value()] self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), xml_val)) else: self.__output.write( self.XML_LINE.format(self.__tokenizer.token_type(), self.__tokenizer.get_value())) def compile_class(self): """ compiling the program from the class definition """ self.__output.write("<class>\n") self.write_xml() self.__tokenizer.advance() self.write_xml() self.__tokenizer.advance() self.write_xml() self.__tokenizer.advance() current_token = self.__tokenizer.get_value() while current_token == "static" or current_token == "field": self.compile_class_var_dec() current_token = self.__tokenizer.get_value() while current_token == "constructor" or current_token == "function" or current_token == "method": self.compile_subroutine_dec() current_token = self.__tokenizer.get_value() self.write_xml() self.__output.write("</class>\n") def compile_class_var_dec(self): """ compiling the program from the class's declaration on vars """ current_token = self.__tokenizer.get_value() while current_token == "static" or current_token == "field": self.__output.write("<classVarDec>\n") self.write_xml() self.__tokenizer.advance() self.write_xml() self.__tokenizer.advance() self.write_xml() self.__tokenizer.advance() while self.__tokenizer.get_value() == ",": self.write_xml() # write , self.__tokenizer.advance() self.write_xml() # write value self.__tokenizer.advance() self.write_xml() self.__tokenizer.advance() current_token = self.__tokenizer.get_value() self.__output.write("</classVarDec>\n") def compile_subroutine_body(self): """ compiling the program's subroutine body """ self.__output.write("<subroutineBody>\n") self.write_xml() # write { self.__tokenizer.advance() while self.__tokenizer.get_value() == "var": self.compile_var_dec() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</subroutineBody>\n") def compile_subroutine_dec(self): """ compiling the program's subroutine declaration """ self.__output.write("<subroutineDec>\n") self.write_xml() # write constructor/function/method self.__tokenizer.advance() self.write_xml() # write return type self.__tokenizer.advance() self.write_xml() # write identifier name self.__tokenizer.advance() self.compile_parameter_list() self.compile_subroutine_body() self.__output.write("</subroutineDec>\n") def compile_parameter_list(self): """ compiling a parameter list """ self.write_xml() # write ( self.__tokenizer.advance() self.__output.write("<parameterList>\n") if self.__tokenizer.get_value() != ")": self.write_xml() # write type self.__tokenizer.advance() self.write_xml() # write varName self.__tokenizer.advance() while self.__tokenizer.get_value() == ",": self.write_xml() # write , self.__tokenizer.advance() self.write_xml() # type self.__tokenizer.advance() self.write_xml() # varName self.__tokenizer.advance() self.__output.write("</parameterList>\n") self.write_xml() # write ) self.__tokenizer.advance() def compile_var_dec(self): """ compiling function's var declaration """ self.__output.write("<varDec>\n") self.write_xml() # write var self.__tokenizer.advance() self.write_xml() # write type self.__tokenizer.advance() self.write_xml() # write varName self.__tokenizer.advance() while self.__tokenizer.get_value() == ",": self.write_xml() # write , self.__tokenizer.advance() self.write_xml() self.__tokenizer.advance() self.write_xml() # write ; self.__tokenizer.advance() self.__output.write("</varDec>\n") def compile_statements(self): """ compiling statements """ key = self.__tokenizer.get_value() self.__output.write("<statements>\n") if key != "}": while key in self.__statements: self.__statements[self.__tokenizer.get_value()]() key = self.__tokenizer.get_value() self.__output.write("</statements>\n") def compile_do(self): """ compiling do call """ self.__output.write("<doStatement>\n") self.write_xml() # write do self.__tokenizer.advance() self.subroutine_call() self.write_xml() # write ; self.__tokenizer.advance() self.__output.write("</doStatement>\n") def compile_let(self): """ compiling let call """ self.__output.write("<letStatement>\n") self.write_xml() # write let self.__tokenizer.advance() self.write_xml() # write varName self.__tokenizer.advance() if self.__tokenizer.get_value() == "[": self.write_xml() # write [ self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ] self.__tokenizer.advance() self.write_xml() # write = self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ; self.__tokenizer.advance() self.__output.write("</letStatement>\n") def compile_while(self): """ compiling while loop call """ self.__output.write("<whileStatement>\n") self.write_xml() # write while self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</whileStatement>\n") def compile_return(self): """ compiling return statement """ self.__output.write("<returnStatement>\n") self.write_xml() # write return self.__tokenizer.advance() if self.__tokenizer.get_value() != ";": self.compile_expression() self.write_xml() # write ; self.__tokenizer.advance() self.__output.write("</returnStatement>\n") def compile_if(self): """ compiling if condition """ self.__output.write("<ifStatement>\n") self.write_xml() # write if self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() if self.__tokenizer.get_value() == "else": self.write_xml() # write else self.__tokenizer.advance() self.write_xml() # write { self.__tokenizer.advance() self.compile_statements() self.write_xml() # write } self.__tokenizer.advance() self.__output.write("</ifStatement>\n") def compile_expression(self): """ compiling expressions """ self.__output.write("<expression>\n") self.compile_term() while self.__tokenizer.is_operator(): self.write_xml() # write the operator self.__tokenizer.advance() self.compile_term() self.__output.write("</expression>\n") def compile_term(self): """ compiling any kind of terms """ # dealing with unknown token self.__output.write("<term>\n") curr_type = self.__tokenizer.token_type() # handle consts if curr_type == "integerConstant" or curr_type == "stringConstant": self.write_xml() # write the int \ string self.__tokenizer.advance() # handle const keyword elif curr_type == "keyword" and self.__tokenizer.get_value( ) in self.KEYWORD_CONSTANT: self.__tokenizer.set_type("keywordConstant") self.write_xml() # write key word self.__tokenizer.advance() elif curr_type == "identifier": # handle var names if self.__tokenizer.get_next_token( ) != "(" and self.__tokenizer.get_next_token() != ".": self.write_xml() # write the var name self.__tokenizer.advance() if self.__tokenizer.get_value() == "[": self.write_xml() # write [ self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ] self.__tokenizer.advance() # handle function calls else: self.subroutine_call() # handle expression elif curr_type == "symbol" and self.__tokenizer.get_value() == "(": self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression() self.write_xml() # write ) self.__tokenizer.advance() # handle - \ ~ elif self.__tokenizer.get_value() == "-" or self.__tokenizer.get_value( ) == "~": self.write_xml() # write -\~ self.__tokenizer.advance() self.compile_term() self.__output.write("</term>\n") def subroutine_call(self): """ compiling the program's subroutine call """ if self.__tokenizer.get_next_token() == ".": self.write_xml() # write name self.__tokenizer.advance() self.write_xml() # write . self.__tokenizer.advance() self.write_xml() # write name self.__tokenizer.advance() self.write_xml() # write ( self.__tokenizer.advance() self.compile_expression_list() self.write_xml() # write ) self.__tokenizer.advance() def compile_expression_list(self): """ compiling expression list """ self.__output.write("<expressionList>\n") if self.__tokenizer.get_value() != ")": self.compile_expression() while self.__tokenizer.get_value() == ",": self.write_xml() # write , self.__tokenizer.advance() self.compile_expression() self.__output.write("</expressionList>\n")
class CompilationEngine: def __init__(self, source): self.if_counter = 0 self.while_counter = 0 self.tokenizer = Tokenizer(source) self.tokenizer.has_more_tokens() self.tokenizer.advance() self.symbols = SymbolTable() self.writer = VMWriter(source) self.arithmetic_op = {} self.init_op() self.root = Element(CLASS) self.class_name = "" self.compile_class(self.root) self.writer.close() def init_op(self): self.arithmetic_op = { '+': "add", '-': "sub", '*': "call Math.multiply 2", '/': "call Math.divide 2", '&': "and", '|': "or", '<': "lt", '>': "gt", '=': "eq" } def next(self): """ Proceed to the next token. :return: """ if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def compile_expression(self, caller): """ Compiles an expression. :param caller: :return: """ op_stack = [] self.compile_term(SubElement(caller, TERM)) while self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS: op_stack.append(self.tokenizer.symbol()) self.next() self.compile_term(SubElement(caller, TERM)) while op_stack: self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()]) def compile_expressionList(self, caller): """ compiles a list of expressions :param caller: :return: num_of_args - number of expressions in expressions list. used by function call """ num_of_args = 0 # if expression list is empty if self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = " " return num_of_args num_of_args += 1 self.compile_expression(SubElement(caller, EXPRESSION)) while self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ",": #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() num_of_args += 1 self.next() self.compile_expression(SubElement(caller, EXPRESSION)) return num_of_args def compile_subroutineCall(self, caller, first_token): """ First token, the first identifier must be sent manually, so the method expects the current token to be the second in the specification. :param caller: :param first_token: :return: """ #SubElement(caller, IDENTIFIER).text = first_token func_name = first_token #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() is_method = 0 if self.tokenizer.symbol() == '.': self.next() if self.symbols.kind_of(func_name): # If first token is var name segment = self.symbols.kind_of(func_name) segment = Kind.get_segment(segment) index = self.symbols.index_of(func_name) self.writer.write_push(segment, index) func_name = self.symbols.type_of(func_name) is_method = 1 func_name = func_name + "." + self.tokenizer.identifier() #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.next() #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() else: func_name = self.class_name + "." + func_name self.writer.write_push(POINTER, 0) is_method = 1 self.next() num_of_args = self.compile_expressionList( SubElement(caller, EXPRESSION_LIST)) + is_method self.writer.write_call(func_name, num_of_args) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() def compile_term(self, caller): """ :param caller: :return: """ type = self.tokenizer.token_type() if type is JTok.INT_CONST: #SubElement(caller, INTEGER_CONSTANT).text = str(self.tokenizer.intVal()) self.writer.write_push(CONSTANT, self.tokenizer.intVal()) self.next() elif type is JTok.STRING_CONST: string_val = self.tokenizer.string_val() self.writer.write_push(CONSTANT, len(string_val)) self.writer.write_call("String.new", 1) for c in string_val: self.writer.write_push(CONSTANT, ord(c)) self.writer.write_call("String.appendChar", 2) self.next() elif type is JTok.KEYWORD: #SubElement(caller, KEYWORD).text = self.tokenizer.key_word() if self.tokenizer.key_word() in {"null", "false"}: self.writer.write_push(CONSTANT, 0) elif self.tokenizer.key_word( ) == "true": # Assuming valid input, it must be true self.writer.write_push(CONSTANT, 1) self.writer.write_arithmetic("neg") elif self.tokenizer.key_word() == "this": self.writer.write_push(POINTER, 0) else: print("unexpected") self.next() elif type is JTok.IDENTIFIER: name = self.tokenizer.identifier() self.next() type = self.tokenizer.token_type() if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}: self.compile_subroutineCall(caller, name) elif type is JTok.SYMBOL and self.tokenizer.symbol( ) == '[': #TODO: Arrays, later # SubElement(caller, IDENTIFIER).text = name # SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(), index) else: print("unexpected") self.writer.write_arithmetic("add") self.writer.write_pop(POINTER, 1) self.writer.write_push("that", 0) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() else: #SubElement(caller, IDENTIFIER).text = name kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(), index) else: print("unexpected") elif type is JTok.SYMBOL: if self.tokenizer.symbol() == '(': #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() elif self.tokenizer.symbol() in {'-', '~'}: #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() unary_op = self.tokenizer.symbol() self.next() self.compile_term(SubElement(caller, TERM)) if unary_op == "-": self.writer.write_arithmetic("neg") elif unary_op == "~": self.writer.write_arithmetic("not") else: "unexpected" def compile_do(self, caller): """ format : 'do' subroutineCall ';' :param caller: :return: """ #SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() name = self.tokenizer.identifier() self.next() self.compile_subroutineCall(caller, name) self.writer.write_pop(TEMP, 0) #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() def compile_let(self, caller): """ format : 'let' varName ( '[' expression ']' )? '=' expression ';' :param caller: :return: """ self.next() # skip 'let' varName = self.tokenizer.identifier() self.next() kind = self.symbols.kind_of(varName) kind = kind.get_segment() index = self.symbols.index_of(varName) if self.tokenizer.symbol() == '[': # if array self.next() # skip [ self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_push(kind, index) self.writer.write_arithmetic("add") self.next() # skip ] self.next() # skip = self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(TEMP, 0) self.writer.write_pop(POINTER, 1) self.writer.write_push(TEMP, 0) self.writer.write_pop("that", 0) else: self.next() # skip = self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(kind, index) self.next() # skip ; def compile_return(self, caller): """ format : 'return' expression? ';' :param caller: :return: """ #SubElement(caller,KEYWORD).text = self.tokenizer.identifier() self.next() if self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ";": #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.writer.write_push(CONSTANT, 0) self.writer.write_return() self.next() return self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_return() #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() self.next() def compile_while(self, caller): """ format : 'while' '(' expression ')' '{' statements '}' :param caller: :return: """ while_index = self.while_counter self.while_counter += 1 self.writer.write_label("WHILE_EXP" + str(while_index)) self.next() # skip while self.next() # skip ( self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_arithmetic("not") self.writer.write_if("WHILE_END" + str(while_index)) self.next() # skip ) self.next() # skip { self.compile_statements(SubElement(caller, STATEMENTS)) self.writer.write_goto("WHILE_EXP" + str(while_index)) self.writer.write_label("WHILE_END" + str(while_index)) self.next() # skip } def compile_statements(self, caller): """ :param caller: :return: """ STATEMENTS = {'do', 'while', 'let', 'return', 'if'} caller.text = " " while self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS: if self.tokenizer.key_word() == 'do': self.compile_do(SubElement(caller, 'doStatement')) elif self.tokenizer.key_word() == 'while': self.compile_while(SubElement(caller, 'whileStatement')) elif self.tokenizer.key_word() == 'let': self.compile_let(SubElement(caller, 'letStatement')) elif self.tokenizer.key_word() == 'return': self.compile_return(SubElement(caller, 'returnStatement')) elif self.tokenizer.key_word() == 'if': self.compile_if(SubElement(caller, 'ifStatement')) def compile_if(self, caller): """ format : 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? :param caller: :return: """ self.next() # ( self.compile_expression(caller) self.next() # { if_index = self.if_counter self.if_counter += 1 self.writer.write_if("IF_TRUE" + str(if_index)) self.writer.write_goto("IF_FALSE" + str(if_index)) self.writer.write_label("IF_TRUE" + str(if_index)) self.compile_statements(caller) self.next() if self.tokenizer.key_word() == 'else': self.writer.write_goto("IF_END" + str(if_index)) self.writer.write_label("IF_FALSE" + str(if_index)) self.next() # else self.next() # { self.compile_statements(caller) self.next() # } self.writer.write_label("IF_END" + str(if_index)) else: self.writer.write_label("IF_FALSE" + str(if_index)) return def compile_var_dec(self, caller): """ format: 'var' type varName ( ',' varName)* ';' :param caller: :return: """ kind = self.tokenizer.key_word() #SubElement(caller, KEYWORD).text = kind # set var as keyword self.next() return self.compile_list_of_vars(caller, "var", Kind[kind]) def compile_class(self, caller): """ :param caller: :return: """ SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.class_name = self.tokenizer.identifier() self.next() SubElement(caller, SYMBOL).text = self.tokenizer.symbol() #{ self.next() while self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() in {'static', 'field'}: self.compile_classVarDec(SubElement(caller, "classVarDec")) while not self.tokenizer.token_type() is JTok.SYMBOL: self.compile_subroutine(SubElement(caller, "subroutineDec")) SubElement(caller, SYMBOL).text = self.tokenizer.symbol() #} self.next() def compile_list_of_vars(self, caller, category, kind): """ Helper method to compile lists of variables according to type varName (',' varName)* :param caller: :return: """ num_of_vars = 0 type = self.compile_type(caller) self.symbols.define(self.tokenizer.identifier(), type, kind) num_of_vars += 1 #text = category+", defined, "+type+", "+kind.name+", "+str(self.symbols.index_of(self.tokenizer.identifier())) #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text # set var name as identifier self.next() while self.tokenizer.symbol() != ';': #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ',' self.next() self.symbols.define(self.tokenizer.identifier(), type, kind) num_of_vars += 1 #text = category + ", defined, " + type + ", " + kind.name + ", " + str( # self.symbols.index_of(self.tokenizer.identifier())) #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier()+", "+text # set var name self.next() #SubElement(caller, SYMBOL).text = self.tokenizer.symbol() # set ';' self.next() return num_of_vars def compile_classVarDec(self, caller): """ :param caller: :return: """ kind = self.tokenizer.key_word() #SubElement(caller,KEYWORD).text = kind self.next() self.compile_list_of_vars(caller, kind, Kind[kind]) def compile_type(self, caller): """ Compiles a tag according to type, for variables :param caller: :return: """ tag = KEYWORD if self.tokenizer.token_type( ) is JTok.KEYWORD else IDENTIFIER text = self.tokenizer.key_word( ) if tag is KEYWORD else self.tokenizer.identifier() SubElement(caller, tag).text = text self.next() return text def compile_subroutine(self, caller): """ :param caller: :return: """ subroutine_type = self.tokenizer.key_word() self.next() # Just to skip void or type if self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() == "void": SubElement(caller, KEYWORD).text = self.tokenizer.key_word() self.next() else: self.compile_type(caller) name = self.class_name + "." + self.tokenizer.identifier() self.symbols.start_subroutine() self.next() self.next() # Skips ( if subroutine_type == "method": self.symbols.define("this", "", Kind.arg) self.compile_parameterList(SubElement(caller, "parameterList")) self.next() # Skips ) self.next() # Skips { num_of_locals = 0 while self.tokenizer.token_type( ) is JTok.KEYWORD and self.tokenizer.key_word() == "var": num_of_locals += self.compile_var_dec(SubElement(caller, "varDec")) self.writer.write_function(name, num_of_locals) if subroutine_type == "constructor": self.writer.write_push(CONSTANT, self.symbols.var_count(Kind.field)) self.writer.write_call("Memory.alloc", 1) self.writer.write_pop(POINTER, 0) elif subroutine_type == "method": self.writer.write_push(ARGUMENT, 0) self.writer.write_pop(POINTER, 0) self.compile_statements(SubElement(caller, "statements")) self.next() # Skips } def compile_parameterList(self, caller): """ :param caller: :return: """ if self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = " " return type = self.compile_type(caller) name = self.tokenizer.identifier() # SubElement(caller,IDENTIFIER).text = self.tokenizer.identifier() self.symbols.define(name, type, Kind.arg) self.next() while self.tokenizer.token_type( ) is JTok.SYMBOL and self.tokenizer.symbol() == ",": # SubElement(caller,SYMBOL).text = self.tokenizer.symbol() self.next() type = self.compile_type(caller) name = self.tokenizer.identifier() self.symbols.define(name, type, Kind.arg) #SubElement(caller, IDENTIFIER).text = self.tokenizer.identifier() self.next()
class CompilationEngine: def __init__(self, source): self.if_counter = 0 self.while_counter = 0 self.tokenizer = Tokenizer(source) self.tokenizer.has_more_tokens() self.tokenizer.advance() self.symbols = SymbolTable() self.writer = VMWriter(source) self.arithmetic_op = {} self.init_op() self.root = Element(CLASS) self.class_name = "" self.compile_class(self.root) self.writer.close() def init_op(self): self.arithmetic_op = {'+': "add", '-': "sub", '*': "call Math.multiply 2", '/': "call Math.divide 2", '&': "and", '|': "or", '<': "lt", '>': "gt", '=': "eq" } def next(self): """ Proceed to the next token. :return: """ if self.tokenizer.has_more_tokens(): self.tokenizer.advance() def compile_expression(self,caller): """ Compiles an expression. :param caller: :return: """ op_stack = [] self.compile_term(SubElement(caller,TERM)) while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() in OPERATORS: op_stack.append(self.tokenizer.symbol()) self.next() self.compile_term(SubElement(caller,TERM)) while op_stack: self.writer.write_arithmetic(self.arithmetic_op[op_stack.pop()]) def compile_expressionList(self,caller): num_of_args = 0 # if expression list is empty if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ")": caller.text = " " return num_of_args num_of_args += 1 self.compile_expression(SubElement(caller,EXPRESSION)) while self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ",": #SubElement(caller,SYMBOL).text = self.tokenizer.symbol() num_of_args += 1 self.next() self.compile_expression(SubElement(caller,EXPRESSION)) return num_of_args def compile_subroutineCall(self,caller,first_token): func_name = first_token is_method = 0 if self.tokenizer.symbol() == '.': self.next() if self.symbols.kind_of(func_name): segment = self.symbols.kind_of(func_name) segment = Kind.get_segment(segment) index = self.symbols.index_of(func_name) self.writer.write_push(segment,index) func_name = self.symbols.type_of(func_name) is_method = 1 func_name = func_name+"."+self.tokenizer.identifier() self.next() else: func_name = self.class_name+"."+func_name self.writer.write_push(POINTER,0) is_method = 1 self.next() num_of_args = self.compile_expressionList(SubElement(caller, EXPRESSION_LIST))+is_method self.writer.write_call(func_name,num_of_args) self.next() def compile_term(self,caller): type = self.tokenizer.token_type() if type is JTok.INT_CONST: self.writer.write_push(CONSTANT,self.tokenizer.intVal()) self.next() elif type is JTok.STRING_CONST: string_val = self.tokenizer.string_val() self.writer.write_push(CONSTANT,len(string_val)) self.writer.write_call("String.new", 1) for c in string_val: self.writer.write_push(CONSTANT,ord(c)) self.writer.write_call("String.appendChar", 2) self.next() elif type is JTok.KEYWORD: if self.tokenizer.key_word() in {"null", "false"}: self.writer.write_push(CONSTANT, 0) elif self.tokenizer.key_word() == "true": self.writer.write_push(CONSTANT, 1) self.writer.write_arithmetic("neg") elif self.tokenizer.key_word() == "this": self.writer.write_push(POINTER, 0) else: print("unexpected") self.next() elif type is JTok.IDENTIFIER: name = self.tokenizer.identifier() self.next() type = self.tokenizer.token_type() if type is JTok.SYMBOL and self.tokenizer.symbol() in {".", "("}: self.compile_subroutineCall(caller,name) elif type is JTok.SYMBOL and self.tokenizer.symbol() == '[': self.next() self.compile_expression(SubElement(caller, EXPRESSION)) kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(),index) else: print("unexpected") self.writer.write_arithmetic("add") self.writer.write_pop(POINTER,1) self.writer.write_push("that",0) self.next() else: kind = self.symbols.kind_of(name) index = self.symbols.index_of(name) if kind is not None: self.writer.write_push(kind.get_segment(),index) else: print("unexpected") elif type is JTok.SYMBOL: if self.tokenizer.symbol() == '(': self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.next() elif self.tokenizer.symbol() in {'-','~'}: unary_op = self.tokenizer.symbol() self.next() self.compile_term(SubElement(caller,TERM)) if unary_op == "-": self.writer.write_arithmetic("neg") elif unary_op == "~": self.writer.write_arithmetic("not") else: "unexpected" def compile_do(self, caller): self.next() name = self.tokenizer.identifier() self.next() self.compile_subroutineCall(caller,name) self.writer.write_pop(TEMP,0) self.next() def compile_let(self, caller): self.next() varName = self.tokenizer.identifier() self.next() kind = self.symbols.kind_of(varName) kind = kind.get_segment() index = self.symbols.index_of(varName) if self.tokenizer.symbol() == '[': self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_push(kind,index) self.writer.write_arithmetic("add") self.next() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(TEMP,0) self.writer.write_pop(POINTER,1) self.writer.write_push(TEMP,0) self.writer.write_pop("that",0) else: self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_pop(kind,index) self.next() def compile_return(self, caller): self.next() if self.tokenizer.token_type() is JTok.SYMBOL and self.tokenizer.symbol() == ";": self.writer.write_push(CONSTANT, 0) self.writer.write_return() self.next() return self.compile_expression(SubElement(caller,EXPRESSION)) self.writer.write_return() self.next() def compile_while(self, caller): while_index = self.while_counter self.while_counter += 1 self.writer.write_label("WHILE_EXP"+str(while_index)) self.next() self.next() self.compile_expression(SubElement(caller, EXPRESSION)) self.writer.write_arithmetic("not") self.writer.write_if("WHILE_END"+str(while_index)) self.next() self.next() self.compile_statements(SubElement(caller, STATEMENTS)) self.writer.write_goto("WHILE_EXP"+str(while_index)) self.writer.write_label("WHILE_END"+str(while_index)) self.next() def compile_statements(self, caller): STATEMENTS = {'do','while','let','return','if'} caller.text = " " while self.tokenizer.token_type() is JTok.KEYWORD and self.tokenizer.key_word() in STATEMENTS: if self.tokenizer.key_word() == 'do': self.compile_do(SubElement(caller, 'doStatement')) elif self.tokenizer.key_word() == 'while': self.compile_while(SubElement(caller, 'whileStatement')) elif self.tokenizer.key_word() == 'let': self.compile_let(SubElement(caller, 'letStatement')) elif self.tokenizer.key_word() == 'return': self.compile_return(SubElement(caller, 'returnStatement')) elif self.tokenizer.key_word() == 'if': self.compile_if(SubElement(caller, 'ifStatement')) def compile_if(self, caller): self.next() # ( self.compile_expression(caller) self.next() # { if_index = self.if_counter self.if_counter += 1 self.writer.write_if("IF_TRUE" + str(if_index)) self.writer.write_goto("IF_FALSE" + str(if_index)) self.writer.write_label("IF_TRUE" + str(if_index)) self.compile_statements(caller) self.next() if self.tokenizer.key_word() == 'else': self.writer.write_goto("IF_END" + str(if_index)) self.writer.write_label("IF_FALSE" + str(if_index)) self.next() self.next() self.compile_statements(caller) self.next() self.writer.write_label("IF_END" + str(if_index)) else: self.writer.write_label("IF_FALSE" + str(if_index)) return
base_name = source[:-len(".jack")] in_file = source tokenizer_outfile = "{}T.xml".format(base_name) compilation_engine_outfile = "{}.xml".format(base_name) with open(tokenizer_outfile, 'w') as tokenizer_file_out: tokenizer_xml_writer = XMLWriter(tokenizer_file_out) tokenizer_xml_writer.open_tag('tokens') with open(in_file, 'rb') as f_in: tokenizer = Tokenizer(f_in) while True: try: tokenizer_xml_writer.write_token(tokenizer.advance()) except TokenizerReachedEndOfFileException: print('Reached end') break tokenizer_xml_writer.close_tag('tokens') with open(compilation_engine_outfile, 'w') as ce_file_out: ce_xml_writer = XMLWriter(ce_file_out) with open(in_file, 'rb') as f_in: tokenizer = Tokenizer(f_in) ce = CompilationEngine(tokenizer) ce_xml_writer.write_node(ce.compile())