def tokens_to_xml(path): """Write the tokens into a xml file with its type as tags. The outpath is the dirpath of the a new directory in the module path to avoid name clashes.""" paths = retrive_files(path) out_dirpath = os.path.join(path, 'Xmlresult') for path in paths: outfile = os.path.basename(path).replace('.jack', 'T.xml') outpath = os.path.join(out_dirpath, outfile) tokenizer = Tokenizer(path) analyzer = TokenAnalyzer(outpath) while tokenizer.has_more_tokens(): tokenizer.advance() t_type = tokenizer.token_type tag = token_tags[t_type] if t_type == T_KEYWORD: analyzer.write_info(tokenizer.keyword, tag) elif t_type == T_SYMBOL: analyzer.write_info(tokenizer.symbol, tag) elif t_type == T_ID: analyzer.write_info(tokenizer.identifier, tag) elif t_type == T_INTEGER: analyzer.write_info(tokenizer.intval, tag) elif t_type == T_STRING: analyzer.write_info(tokenizer.stringval, tag) analyzer.close()
class CompilationEngine: _OPEN_PARENTHESIS = "\(" _CLOSE_PARENTHESIS = "\)" _OPEN_BRACKET = "\[" _CLOSE_BRACKET = "\]" _DOT = "\." _OPS = "\+|-|\*|\/|&|\||<|>|=" def __init__(self, in_address): self.tokenizer = Tokenizer(in_address) self.symbol_table = SymbolTable() self.vm_writer = VMWriter(in_address.replace(".jack", ".vm")) self.curr_token = self.tokenizer.get_current_token() self.out_address = in_address.replace(".jack", ".xml") self.output = "" self.indent = 0 self.label_count = -1 self.class_name = "" self.compile_class() def write_file(self): # with open(self.out_address, 'w') as f: # f.write(self.output) self.vm_writer.write_file() def write(self, to_write): """ Writes to the output, with indentation. :param to_write: The string to write """ self.output += (self.indent * " ") + to_write + "\n" # ========== Compilation Methods ========== # def compile_class(self): """ Compiles a complete class. """ def comp_class(): self.eat("class") self.class_name = self.eat(NAME_REG) self.eat("{") self.compile_class_var_dec() self.compile_subroutine() self.eat("}") self.wrap("class", comp_class) def compile_class_var_dec(self): """ Compiles a static or field declaration. :return: """ var_type_reg = "static|field" if self.peek_token(var_type_reg): self.wrap("classVarDec", self.__class_var_dec) self.compile_class_var_dec() def compile_subroutine(self): """ Compiles a complete method, function or constructor. :return: """ sub_regex = "(constructor|function|method)" self.symbol_table.start_subroutine() kind = self.eat(sub_regex) self.__compile_type(True) # subroutine name name = self.__compile_name() self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_parameter_list(kind) self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") if self.peek_token("var"): self.compile_var_dec() num_locals = self.symbol_table.var_count("local") self.vm_writer.write_function("{}.{}".format(self.class_name, name), num_locals) self.__set_pointer(kind) self.compile_statements() self.eat("}") # def subroutine_dec(): # kind = self.eat(sub_regex) # self.__compile_type(True) # # subroutine name # name = self.__compile_name() # self.eat(CompilationEngine._OPEN_PARENTHESIS) # self.compile_parameter_list(kind) # self.eat(CompilationEngine._CLOSE_PARENTHESIS) # subroutine_body(name) # # self.wrap("subroutineBody", subroutine_body) # # def subroutine_body(name): # self.eat("{") # num_locals = 0 # if self.peek_token("var"): # num_locals = self.compile_var_dec() # self.vm_writer.write_function("{}.{}".format(self.class_name, # name), num_locals) # # self.compile_statements() # # if sub_type == "void": # # self.vm_writer.write_push("constant", 0) # self.eat("}") # Handle next subroutine if there is one if self.peek_token(sub_regex): self.compile_subroutine() def compile_parameter_list(self, kind): """ Compiles a possibly empty parameter list, not including the enclosing () :return: """ if kind == "method": self.symbol_table.define("this", self.class_name, "argument") type_reg = r"int|char|boolean|[A-Za-z_]\w*" while self.peek_token(type_reg): self.__params() def compile_var_dec(self): """ Compiles a var declaration. :return: """ # self.wrap("varDec", self.__comp_var_dec) self.eat("var") var_type = self.__compile_type(False) self.__var_declare(var_type, "var") self.eat(";") if self.peek_token("var"): self.compile_var_dec() def compile_statements(self): """ Compiles a sequence of statements, not including the enclosing {} :return: """ statement_reg = "let|if|while|do|return" if self.peek_token(statement_reg): if self.peek_token("let"): self.compile_let() elif self.peek_token("if"): self.compile_if() elif self.peek_token("while"): self.compile_while() elif self.peek_token("do"): self.compile_do() elif self.peek_token("return"): self.compile_return() self.compile_statements() def compile_do(self): """ Compiles a do statement """ self.eat("do") self.__subroutine_call() # Since we don't use the return value, we pop it to temp self.vm_writer.write_pop("temp", 0) self.eat(";") def compile_let(self): """ Compiles a let statement """ self.eat("let") name = self.__compile_name() is_array = False # Determine [expression] if self.peek_token(CompilationEngine._OPEN_BRACKET): is_array = True self.__handle_array(name) self.eat("=") self.compile_expression() # Pop the value to the spot in the memory if is_array: self.vm_writer.write_pop("temp", 0) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("temp", 0) self.vm_writer.write_pop("that", 0) else: self.__write_pop(name) self.eat(";") def compile_while(self): """ Compiles a while statement. :return: """ self.eat("while") loop_label = self.__get_label("WHILE_START") exit_label = self.__get_label("WHILE_END") self.vm_writer.write_label(loop_label) self.eat(CompilationEngine._OPEN_PARENTHESIS) # Compute ~condition self.compile_expression() self.vm_writer.write_arithmetic("~") # if ~condition exit loop self.vm_writer.write_if(exit_label) self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") self.compile_statements() self.vm_writer.write_goto(loop_label) self.vm_writer.write_label(exit_label) self.eat("}") def compile_return(self): """ Compiles a return statement. """ self.eat("return") # if next is expression: if self.__is_term(): self.compile_expression() else: # Void function - push 0 self.vm_writer.write_push(CONSTANT, 0) self.vm_writer.write_return() self.eat(";") def compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. :return: """ self.eat("if") self.eat(CompilationEngine._OPEN_PARENTHESIS) # ~cond self.compile_expression() # self.vm_writer.write_arithmetic("~") self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") if_true = self.__get_label("IF_TRUE") self.vm_writer.write_if(if_true) if_false = self.__get_label("IF_FALSE") self.vm_writer.write_goto(if_false) self.vm_writer.write_label(if_true) self.compile_statements() self.eat("}") # Handle else: if self.peek_token("else"): if_end = self.__get_label("IF_END") self.vm_writer.write_goto(if_end) self.vm_writer.write_label(if_false) self.eat("else") self.eat("{") self.compile_statements() self.eat("}") self.vm_writer.write_label(if_end) else: self.vm_writer.write_label(if_false) def compile_expression(self): """ Compiles an expression. :return: """ def comp_expression(): self.compile_term() # Case: term op term if self.peek_token(CompilationEngine._OPS): operation = self.eat(CompilationEngine._OPS) self.compile_term() self.vm_writer.write_arithmetic(operation) self.wrap("expression", comp_expression) def compile_term(self): """ Compiles a term. :return: """ def term(): curr_type = self.peek_type() val = self.curr_token.get_token() # Handle integer constant if curr_type == INT_CONST: self.vm_writer.write_push(CONSTANT, int(val)) self.__advance_token() # Handle String constant elif curr_type == STRING_CONST: self.__handle_string_constant(val) self.__advance_token() # Handle Keyword constant elif curr_type == KEYWORD: self.__handle_keyword_constant(val) self.__advance_token() # Case: token is a varName or a subroutineName elif curr_type == IDENTIFIER: self.__handle_identifier() # Case: ( expression ) elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS): self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) # Case: unaryOp term elif self.peek_token("-|~"): self.__handle_unary_op() else: print("Error: Incorrect Term") exit(-1) term() # self.wrap("term", term) def compile_expression_list(self): """ Compiles a possibly empty list of comma separated expressions :return: """ def exp_list(): count = 0 if self.__is_term(): self.compile_expression() count += 1 while self.peek_token(","): self.eat(",") self.compile_expression() count += 1 return count return exp_list() # self.wrap("expressionList", exp_list) # ========== Compilation Helper ========== # def __class_var_dec(self): """ Compiles a single class var declaration. """ var_type_reg = "static|field" # (static|field) kind = self.eat(var_type_reg) # type var_type = self.__compile_type(False) # Compile varName combo until no more "," self.__var_declare(var_type, kind) self.eat(";") def __var_declare(self, var_type, kind): name = self.eat(NAME_REG) self.symbol_table.define(name, var_type, kind) if self.peek_token(","): self.eat(",") self.__var_declare(var_type, kind) def __compile_type(self, for_function): """ Compiles a type for a function or variable, determined by a received boolean value. :param for_function: True if is type of function, false otherwise. :return: """ type_reg = r"int|char|boolean|[A-Za-z_]\w*" if for_function: type_reg += "|void" return self.eat(type_reg) def __set_pointer(self, kind): if kind == "method": self.vm_writer.write_push("argument", 0) self.vm_writer.write_pop("pointer", 0) elif kind == "constructor": self.__handle_constructor() def __handle_constructor(self): # Allocate memory for the new object var_num = self.symbol_table.var_count("this") self.vm_writer.write_push(CONSTANT, var_num) self.vm_writer.write_call("Memory.alloc", 1) # Set the new memory spot to this self.vm_writer.write_pop("pointer", 0) def __compile_name(self): if self.peek_type() == IDENTIFIER: return self.eat(NAME_REG) else: print("ERROR: Identifier Expected") exit(-1) def __params(self): var_type = self.__compile_type(False) name = self.eat(NAME_REG) self.symbol_table.define(name, var_type, "argument") if self.peek_token(","): self.eat(",") def __handle_unary_op(self): command = self.eat("-|~") self.compile_term() if command == "-": self.vm_writer.write_arithmetic("neg") else: self.vm_writer.write_arithmetic(command) def __handle_identifier(self): """ Handles the case of an identifier given as a term """ # Case: varName [ expression ] if self.peek_next(CompilationEngine._OPEN_BRACKET): name = self.__compile_name() self.__handle_array(name) self.vm_writer.write_pop("pointer", 1) self.vm_writer.write_push("that", 0) # self.__var_name_array() # Case: subroutineCall: elif self.peek_next(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_next(CompilationEngine._DOT): self.__subroutine_call() else: name = self.eat(NAME_REG) self.__write_push(name) def __handle_string_constant(self, string): """ Handles the case of a string constant in a term :param string: the constant """ self.vm_writer.write_push(CONSTANT, len(string)) self.vm_writer.write_call("String.new", 1) for char in string: self.vm_writer.write_push(CONSTANT, ord(char)) self.vm_writer.write_call("String.appendChar", 2) def __handle_keyword_constant(self, word): """ Handles the case of a keyword constant given in a term. If the word is not valid the program prints a relevant message and exits. :param word: The keyword """ if word == "this": self.vm_writer.write_push("pointer", 0) else: self.vm_writer.write_push(CONSTANT, 0) if word == "true": self.vm_writer.write_arithmetic("~") def __is_term(self): curr_type = self.peek_type() return curr_type == STRING_CONST or curr_type == INT_CONST or \ curr_type == KEYWORD or curr_type == IDENTIFIER or \ self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_token(CompilationEngine._OPS) def __subroutine_call(self): if self.curr_token.get_type() == IDENTIFIER: if self.peek_next(CompilationEngine._OPEN_PARENTHESIS): self.vm_writer.write_push("pointer", 0) self.__subroutine_name(self.class_name, 1) elif self.peek_next(CompilationEngine._DOT): self.__object_subroutine_call() else: print("Error: ( or . expected") exit(-1) def __object_subroutine_call(self): name = self.eat(NAME_REG) n_args = 0 # Push the object reference to the stack if self.symbol_table.kind_of(name): self.__write_push(name) name = self.symbol_table.type_of(name) n_args = 1 self.eat(CompilationEngine._DOT) self.__subroutine_name(name, n_args) def __subroutine_name(self, type_name, n_args): """ Handles the case of subroutineName(expressionList) :return: """ name = self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_PARENTHESIS) nargs = self.compile_expression_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.vm_writer.write_call("{}.{}".format(type_name, name), nargs + n_args) def __handle_array(self, name): self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) self.__write_push(name) self.vm_writer.write_arithmetic("+") # ========== XML Handling ========== # def wrap(self, section_name, func): """ Wraps a program structure block with the section_name, and executes its function :param section_name: The name of the section :param func: The function to perform :return: """ self.write("<{}>".format(section_name)) self.indent += 2 func() self.indent -= 2 self.write("</{}>".format(section_name)) # ========== Token Handling ========== # def eat(self, token): """ Handles advancing and writing terminal tokens. Will exit the program if an error occurs. :param token: The regex of the token to compare :return: """ ctoken = self.curr_token.get_token() if re.match(token, self.curr_token.get_token()): # self.write(self.curr_token.get_xml_wrap()) self.__advance_token() return ctoken # else: # # if self.tokenizer.get_current_token() != token: # print("Error: Expected " + token) # exit(-1) def peek_token(self, compare_next): """ :param compare_next: The regex to compare. :return: True if the current token matches the regex, False otherwise. """ if self.curr_token: return re.match(compare_next, self.curr_token.get_token()) return False def peek_type(self): """ :return: the type of the current token """ return self.curr_token.get_type() def peek_next(self, comp): next_token = self.tokenizer.get_next_token() # Case: There actually is a next token if next_token: return re.match(comp, self.tokenizer.get_next_token().get_token()) return False def __advance_token(self): self.tokenizer.advance() if self.tokenizer.has_more_tokens(): self.curr_token = self.tokenizer.get_current_token() # ========== VM Helper ========== # def __get_label(self, label): self.label_count += 1 return "{}{}".format(label, str(self.label_count)) def __write_pop(self, name): self.vm_writer.write_pop(self.symbol_table.kind_of(name), self.symbol_table.index_of(name)) def __write_push(self, name): self.vm_writer.write_push(self.symbol_table.kind_of(name), self.symbol_table.index_of(name))
class CompilationEngine: def __init__(self, inpath, outpath): self.tokenizer = Tokenizer(inpath) self.symboltable = SymbolTable() self.vmwriter = VMWriter(outpath) self._class_name = None if self.tokenizer.has_more_tokens(): self.compile_class() self.vmwriter.close() print("{0} completed.".format(outpath)) def _subroutine_init(self): self._sub_kind = None self._sub_name = None self._ret_type = None def _advance(self): self._check_EOF() self.tokenizer.advance() @property def _current_token(self): t_type = self.tokenizer.token_type return (self.tokenizer.keyword if t_type == T_KEYWORD else self.tokenizer.symbol if t_type == T_SYMBOL else self.tokenizer.identifier if t_type == T_ID else self.tokenizer .intval if t_type == T_INTEGER else self.tokenizer.stringval) @property def _current_tok_type(self): return self.tokenizer.token_type @property def _current_tok_tag(self): return token_tags[self._current_tok_type] @property def _next_token(self): """return raw next_token in the tokenizer""" return str(self.tokenizer.next_token) def _require_token(self, tok_type, token=None): """Check whether the next_token(terminal) in the tokenizer meets the requirement (specific token or just token type). If meets, tokenizer advances (update current_token and next_token) and terminal will be writed into outfile; If not, report an error.""" self._advance() if token and self._current_token != token: return self._error(expect_toks=(token, )) elif self._current_tok_type != tok_type: return self._error(expect_types=(tok_type, )) def _require_id(self): self._require_token(T_ID) def _require_kw(self, token): return self._require_token(T_KEYWORD, token=token) def _require_sym(self, token): return self._require_token(T_SYMBOL, token=token) def _require_brackets(self, brackets, procedure): front, back = brackets self._require_sym(front) procedure() self._require_sym(back) def _fol_by_class_vardec(self): return self._next_token in (KW_STATIC, KW_FIELD) def _fol_by_subroutine(self): return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _fol_by_vardec(self): return self._next_token == KW_VAR ######################### # structure compilation # ######################### def compile_class_name(self): self._require_id() self._class_name = self._current_token def compile_subroutine_name(self): self._require_id() self._sub_name = self._current_token def compile_var_name(self, kind=None, type=None, declare=False): self._require_id() name = self._current_token if declare is True: # kind and type are not None self.symboltable.define(name, type, kind) else: self.check_var_name(name, type) def check_var_name(self, name, type=None): recorded_kind = self.symboltable.kindof(name) if recorded_kind is None: self._traceback('name used before declared: {0}'.format(name)) elif type is not None: recorded_type = self.symboltable.typeof(name) if recorded_type != type: get = '{0} "{1}"'.format(recorded_type, name) self._error(expect_types=(type, ), get=get) def compile_type(self, advanced=False, expect='type'): # int, string, boolean or identifier(className) if advanced is False: self._advance() if (self._current_token not in SymbolTable.builtIn_types and self._current_tok_type != T_ID): return self._error(expect=expect) def compile_return_type(self): # void or type self._advance() if self._current_token != KW_VOID: self.compile_type(True, '"void" or type') self._ret_type = self._current_token if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name: me = 'constructor expect current class as return type' self._traceback(me) @record_non_terminal('class') def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' self._require_kw(KW_CLASS) self.compile_class_name() self._require_sym('{') while self._fol_by_class_vardec(): self.compile_class_vardec() while self._fol_by_subroutine(): self.compile_subroutine() self._advance() if self._current_token != '}': self._traceback("Except classVarDec first, subroutineDec second.") if self.tokenizer.has_more_tokens(): if self._next_token == KW_CLASS: self._traceback('Only expect one classDec.') self._traceback('Unexpected extra tokens.') def compile_declare(self): self._advance() id_kind = self._current_token # ('static | field | var') # type varName (',' varName)* ';' self.compile_type() id_type = self._current_token self.compile_var_name(id_kind, id_type, declare=True) # compile ',' or ';' self._advance() while self._current_token == ',': self.compile_var_name(id_kind, id_type, declare=True) self._advance() if self._current_token != ';': return self._error((',', ';')) @record_non_terminal('classVarDec') def compile_class_vardec(self): # ('static|field') type varName (',' varName)* ';' self.compile_declare() @record_non_terminal('subroutineDec') def compile_subroutine(self): # ('constructor'|'function'|'method') # ('void'|type) subroutineName '(' parameterList ')' subroutineBody self._subroutine_init() self.symboltable.start_subroutine() self._advance() self._sub_kind = self._current_token if self._sub_kind == KW_METHOD: self.symboltable.define('this', self._class_name, 'argument') self.compile_return_type() self.compile_subroutine_name() self._require_brackets('()', self.compile_parameter_list) self.compile_subroutine_body() @record_non_terminal('parameterList') def compile_parameter_list(self): # ((type varName) (',' type varName)*)? if self._next_token == ')': return self.compile_type() self.compile_var_name('argument', self._current_token, True) while self._next_token != ')': self._require_sym(',') self.compile_type() self.compile_var_name('argument', self._current_token, True) @record_non_terminal('subroutineBody') def compile_subroutine_body(self): # '{' varDec* statements '}' self._require_sym('{') while self._fol_by_vardec(): self.compile_vardec() self.compile_function() self.compile_statements() self._require_sym('}') def compile_function(self): fn_name = '.'.join((self._class_name, self._sub_name)) num_locals = self.symboltable.varcount(KW_VAR) self.vmwriter.write_function(fn_name, num_locals) # function fn_name num_locals # set up pointer this if self._sub_kind == KW_CONSTRUCTOR: num_fields = self.symboltable.varcount(KW_FIELD) self.vmwriter.write_push('constant', num_fields) self.vmwriter.write_call('Memory.alloc', 1) self.vmwriter.write_pop('pointer', 0) elif self._sub_kind == KW_METHOD: self.vmwriter.write_push('argument', 0) self.vmwriter.write_pop('pointer', 0) @record_non_terminal('varDec') def compile_vardec(self): # 'var' type varName (',' varName)* ';' self.compile_declare() ######################### # statement compilation # ######################### @record_non_terminal('statements') def compile_statements(self): # (letStatement | ifStatement | whileStatement | doStatement | # returnStatement)* last_statement = None while self._next_token != '}': self._advance() last_statement = self._current_token if last_statement == 'do': self.compile_do() elif last_statement == 'let': self.compile_let() elif last_statement == 'while': self.compile_while() elif last_statement == 'return': self.compile_return() elif last_statement == 'if': self.compile_if() else: return self._error(expect='statement expression') #if STACK[-2] == 'subroutineBody' and last_statement != 'return': # self._error(expect='return statement', get=last_statement) @record_non_terminal('doStatement') def compile_do(self): # 'do' subroutineCall ';' self._advance() self.compile_subroutine_call() self.vmwriter.write_pop('temp', 0) # temp[0] store useless value self._require_sym(';') @record_non_terminal('letStatement') def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' self.compile_var_name() var_name = self._current_token array = (self._next_token == '[') if array: self.compile_array_subscript( var_name) # push (array base + subscript) self._require_sym('=') self.compile_expression() # push expression value self._require_sym(';') if array: self.vmwriter.write_pop('temp', 1) # pop exp value to temp[1] self.vmwriter.write_pop('pointer', 1) # that = array base + subscript self.vmwriter.write_push('temp', 1) self.vmwriter.write_pop('that', 0) else: self.assign_variable(var_name) kind_segment = { 'static': 'static', 'field': 'this', 'argument': 'argument', 'var': 'local' } def assign_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_pop(self.kind_segment[kind], index) def load_variable(self, name): kind = self.symboltable.kindof(name) index = self.symboltable.indexof(name) self.vmwriter.write_push(self.kind_segment[kind], index) label_num = 0 @record_non_terminal('whileStatement') def compile_while(self): # 'while' '(' expression ')' '{' statements '}' start_label = 'WHILE_START_' + str(self.label_num) end_label = 'WHILE_END_' + str(self.label_num) self.label_num += 1 self.vmwriter.write_label(start_label) self.compile_cond_expression(start_label, end_label) @record_non_terminal('ifStatement') def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? else_label = 'IF_ELSE_' + str(self.label_num) end_label = 'IF_END_' + str(self.label_num) self.label_num += 1 self.compile_cond_expression(end_label, else_label) # else clause if self._next_token == KW_ELSE: self._require_kw(KW_ELSE) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_label(end_label) def compile_cond_expression(self, goto_label, end_label): self._require_brackets('()', self.compile_expression) self.vmwriter.write_arithmetic('not') self.vmwriter.write_if(end_label) self._require_brackets('{}', self.compile_statements) self.vmwriter.write_goto(goto_label) # meet self.vmwriter.write_label(end_label) @record_non_terminal('returnStatement') def compile_return(self): # 'return' expression? ';' if self._sub_kind == KW_CONSTRUCTOR: self._require_kw(KW_THIS) # constructor must return 'this' self.vmwriter.write_push('pointer', 0) elif self._next_token != ';': self.compile_expression() else: if self._ret_type != KW_VOID: self._traceback('expect return ' + self._ret_type) self.vmwriter.write_push('constant', 0) self._require_sym(';') self.vmwriter.write_return() ########################## # expression compilation # ########################## unary_ops = {'-': 'neg', '~': 'not'} binary_ops = { '+': 'add', '-': 'sub', '*': None, '/': None, '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq' } @record_non_terminal('expression') def compile_expression(self): # term (op term)* self.compile_term() while self._next_token in self.binary_ops: self._advance() if self._current_tok_type != T_SYMBOL: self._error(expect_types=(T_SYMBOL, )) op = self._current_token self.compile_term() self.compile_binaryop(op) def compile_binaryop(self, op): if op == '*': self.vmwriter.write_call('Math.multiply', 2) elif op == '/': self.vmwriter.write_call('Math.divide', 2) else: self.vmwriter.write_arithmetic(self.binary_ops[op]) kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) @record_non_terminal('term') def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term if self._next_token == '(': self._require_brackets('()', self.compile_expression) else: self._advance() tok = self._current_token tok_type = self._current_tok_type if tok_type == T_KEYWORD and tok in self.kw_consts: self.compile_kw_consts(tok) elif tok_type == T_INTEGER: self.vmwriter.write_push('constant', tok) elif tok_type == T_STRING: self.compile_string(tok) elif tok_type == T_ID: if self._next_token in '(.': self.compile_subroutine_call() elif self._next_token == '[': self.check_var_name(tok) self.compile_array_subscript(tok) self.vmwriter.write_pop('pointer', 1) self.vmwriter.write_push('that', 0) else: self.check_var_name(tok) self.load_variable(tok) elif tok_type == T_SYMBOL and tok in self.unary_ops: self.compile_term() self.vmwriter.write_arithmetic(self.unary_ops[tok]) else: self._error(expect='term') # keywordConstant: 'true' | 'false' | 'null' | 'this' def compile_kw_consts(self, kw): if kw == KW_THIS: self.vmwriter.write_push('pointer', 0) elif kw == KW_TRUE: self.vmwriter.write_push('constant', 1) self.vmwriter.write_arithmetic('neg') else: self.vmwriter.write_push('constant', 0) def compile_string(self, string): self.vmwriter.write_push('constant', len(string)) self.vmwriter.write_call('String.new', 1) for char in string: self.vmwriter.write_push('constant', ord(char)) self.vmwriter.write_call('String.appendChar', 2) def compile_subroutine_call(self): # subroutineName '(' expressionList ')' | # (className | varName) '.' subroutineName '(' expressionList ')' ## the first element of structure has already been compiled. fn_name, num_args = self.compile_call_name() self._require_sym('(') num_args = self.compile_expressionlist(num_args) self._require_sym(')') self.vmwriter.write_call(fn_name, num_args) def compile_call_name(self): # the fisrt name of subroutine call could be (className or varName) if # it is followed by '.', or subroutineName if followed by '('. # return name of function call and num_args (1: means pushing this, 0: # means don't) if self._current_tok_type != T_ID: self._error(expect_types=(T_ID, )) name = self._current_token if self._next_token == '.': self._require_sym('.') self.compile_subroutine_name() sub_name = self._current_token if (name in self.symboltable.all_class_types() or name in SymbolTable.builtIn_class or name == self._class_name): return '.'.join((name, sub_name)), 0 # className else: self.check_var_name(name) # varName with class type type = self.symboltable.typeof(name) if type in SymbolTable.builtIn_types: return self._error(expect='class instance or class', get=type) self.load_variable(name) return '.'.join((type, sub_name)), 1 elif self._next_token == '(': self.vmwriter.write_push('pointer', 0) # push this to be 1st arg return '.'.join((self._class_name, name)), 1 # subroutineName @record_non_terminal('expressionList') def compile_expressionlist(self, num_args): # (expression (',' expression)*)? if self._next_token != ')': self.compile_expression() num_args += 1 while self._next_token != ')': self._require_sym(',') self.compile_expression() num_args += 1 return num_args def compile_array_subscript(self, var_name): # varName '[' expression ']' self.check_var_name(var_name, 'Array') self._require_brackets( '[]', self.compile_expression) # push expression value self.load_variable(var_name) self.vmwriter.write_arithmetic('add') # base + subscript def _check_EOF(self): if not self.tokenizer.has_more_tokens(): self._traceback("Unexpected EOF.") def _error(self, expect_toks=(), expect_types=(), expect=None, get=None): if expect is None: exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks)) exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types) exp_type = ' or '.join(exp_types) if exp_tok and exp_type: expect = ' or '.join(exp_tok, exp_type) else: expect = exp_tok + exp_type if get is None: get = self._current_token me = 'Expect {0} but get "{1}"'.format(expect, get) return self._traceback(me) def _traceback(self, message): if DEBUG: print('--------------------------------------------') print(self.symboltable) print(self.symboltable.all_class_types()) print('--------------------------------------------') file_info = 'file: "{0}"'.format(self.tokenizer.filename) line_info = 'line {0}'.format(self.tokenizer.line_count) raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))
class CompilationEngine: def __init__(self, inputPath, outputPath): self.tokenizer = Tokenizer(inputPath) self.outputFile = open(outputPath, 'w') self.tokenizer.advance() self.indentLevel = 0 def CompileClass(self): """ Compiles a complete class. """ self.EnterScope("class") self.ConsumeKeyword([Keyword.CLASS]) self.ConsumeIdentifier() # className self.ConsumeSymbol('{') while (self.IsKeyword([Keyword.STATIC, Keyword.FIELD])): self.CompileClassVarDec() # subroutineDec* while (self.IsKeyword([Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD])): self.CompileSubroutine() self.ConsumeSymbol('}') self.ExitScope("class") self.outputFile.close() def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ self.EnterScope("classVarDec") self.ConsumeKeyword([Keyword.STATIC, Keyword.FIELD]) self.ConsumeType() self.ConsumeIdentifier() # varName while (self.IsSymbol([','])): self.ConsumeSymbol(',') self.ConsumeIdentifier() # varName self.ConsumeSymbol(';') self.ExitScope("classVarDec") def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ self.EnterScope("subroutineDec") self.ConsumeKeyword([Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]) if (self.IsKeyword([Keyword.VOID])): self.ConsumeKeyword([Keyword.VOID]) else: self.ConsumeType() self.ConsumeIdentifier() # subroutineName self.ConsumeSymbol('(') self.CompileParameterList() self.ConsumeSymbol(')') self.CompileSubroutineBody() self.ExitScope("subroutineDec") def CompileSubroutineBody(self): self.EnterScope("subroutineBody") self.ConsumeSymbol('{') while (self.IsKeyword([Keyword.VAR])): self.CompileVarDec() self.CompileStatements() self.ConsumeSymbol('}') self.ExitScope("subroutineBody") def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing "()". """ self.EnterScope("parameterList") if (not self.IsSymbol([')'])): self.ConsumeType() self.ConsumeIdentifier() while(self.IsSymbol([','])): self.ConsumeSymbol(',') self.ConsumeType() self.ConsumeIdentifier() self.ExitScope("parameterList") def CompileVarDec(self): """ Compiles a var declaration. """ self.EnterScope("varDec") self.ConsumeKeyword([Keyword.VAR]) self.ConsumeType() self.ConsumeIdentifier() # varName while (self.IsSymbol([','])): self.ConsumeSymbol(',') self.ConsumeIdentifier() # varName self.ConsumeSymbol(';') self.ExitScope("varDec") def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}". """ self.EnterScope("statements") while self.IsKeyword([Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO, Keyword.RETURN]): if self.IsKeyword([Keyword.LET]): self.CompileLet() if self.IsKeyword([Keyword.IF]): self.CompileIf() if self.IsKeyword([Keyword.WHILE]): self.CompileWhile() if self.IsKeyword([Keyword.DO]): self.CompileDo() if self.IsKeyword([Keyword.RETURN]): self.CompileReturn() self.ExitScope("statements") def CompileDo(self): """ Compiles a do statement. """ self.EnterScope("doStatement") self.ConsumeKeyword([Keyword.DO]) self.ConsumeIdentifier() if self.IsSymbol(['.']): self.ConsumeSymbol('.') self.ConsumeIdentifier() self.ConsumeSymbol('(') self.CompileExpressionList() self.ConsumeSymbol(')') self.ConsumeSymbol(';') self.ExitScope("doStatement") def CompileLet(self): """ Compiles a let statement. """ self.EnterScope("letStatement") self.ConsumeKeyword([Keyword.LET]) self.ConsumeIdentifier() if self.IsSymbol(['[']): self.ConsumeSymbol('[') self.CompileExpression() self.ConsumeSymbol(']') self.ConsumeSymbol('=') self.CompileExpression() self.ConsumeSymbol(';') self.ExitScope("letStatement") def CompileWhile(self): """ Compiles a while statement. """ self.EnterScope("whileStatement") self.ConsumeKeyword([Keyword.WHILE]) self.ConsumeSymbol('(') self.CompileExpression() self.ConsumeSymbol(')') self.ConsumeSymbol('{') self.CompileStatements() self.ConsumeSymbol('}') self.ExitScope("whileStatement") def CompileReturn(self): """ Compiles a return statement. """ self.EnterScope("returnStatement") self.ConsumeKeyword([Keyword.RETURN]) if not self.IsSymbol([';']): self.CompileExpression() self.ConsumeSymbol(';') self.ExitScope("returnStatement") def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ self.EnterScope("ifStatement") self.ConsumeKeyword([Keyword.IF]) self.ConsumeSymbol('(') self.CompileExpression() self.ConsumeSymbol(')') self.ConsumeSymbol('{') self.CompileStatements() self.ConsumeSymbol('}') if self.IsKeyword([Keyword.ELSE]): self.ConsumeKeyword([Keyword.ELSE]) self.ConsumeSymbol('{') self.CompileStatements() self.ConsumeSymbol('}') self.ExitScope("ifStatement") def CompileExpression(self): """ Compiles an expression. """ self.EnterScope("expression") op_symbols = ['+', '-', '*', '/', '&', '|', "<", ">", '='] self.CompileTerm() while (self.IsSymbol(op_symbols)): self.ConsumeSymbol(self.tokenizer.symbol()) self.CompileTerm() self.ExitScope("expression") def CompileTerm(self): """ Compiles a term. """ self.EnterScope("term") keyword_constants = [Keyword.TRUE, Keyword.FALSE, Keyword.NULL, Keyword.THIS] unary_symbols = ['-', '~'] if self.IsType(TokenType.INT_CONST): self.ConsumeIntegerConstant() elif self.IsType(TokenType.STRING_CONST): self.ConsumeStringConstant() elif self.IsKeyword(keyword_constants): self.ConsumeKeyword(keyword_constants) elif self.IsSymbol(['(']): self.ConsumeSymbol('(') self.CompileExpression() self.ConsumeSymbol(')') elif self.IsSymbol(unary_symbols): self.ConsumeSymbol(self.tokenizer.symbol()) self.CompileTerm() else: self.ConsumeIdentifier() if self.IsSymbol(['[']): # varName '[' expression ']' self.ConsumeSymbol('[') self.CompileExpression() self.ConsumeSymbol(']') elif self.IsSymbol(['(']): # subroutineCall self.ConsumeSymbol('(') self.CompileExpressionList() self.ConsumeSymbol(')') elif self.IsSymbol(['.']): self.ConsumeSymbol('.') self.ConsumeIdentifier() self.ConsumeSymbol('(') self.CompileExpressionList() self.ConsumeSymbol(')') self.ExitScope("term") def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ self.EnterScope("expressionList") if not self.IsSymbol(')'): self.CompileExpression() while self.IsSymbol([',']): self.ConsumeSymbol(',') self.CompileExpression() self.ExitScope("expressionList") def IsKeyword(self, keyword_list): return (self.IsType(TokenType.KEYWORD) and self.tokenizer.keyword() in keyword_list) def IsSymbol(self, symbol_list): return (self.IsType(TokenType.SYMBOL) and self.tokenizer.symbol() in symbol_list) def IsType(self, tokenType): return self.tokenizer.tokenType() == tokenType def ConsumeType(self): if (self.tokenizer.tokenType() == TokenType.IDENTIFIER): self.ConsumeIdentifier() else: self.ConsumeKeyword([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]) def ConsumeKeyword(self, keywordList): self.VerifyTokenType(TokenType.KEYWORD) actual = self.tokenizer.keyword() if actual not in keywordList: raise Exception("Expected keywords: {}, Actual: {}". format(keywordList, actual)) self.OutputTag("keyword", actual) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def ConsumeSymbol(self, symbol): self.VerifyTokenType(TokenType.SYMBOL) actual = self.tokenizer.symbol() if actual != symbol: raise Exception("Expected symbol: {}, Actual: {}". format(symbol, actual)) self.OutputTag("symbol", actual) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def ConsumeIntegerConstant(self): self.VerifyTokenType(TokenType.INT_CONST) self.OutputTag("integerConstant", self.tokenizer.intVal()) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def ConsumeStringConstant(self): self.VerifyTokenType(TokenType.STRING_CONST) self.OutputTag("stringConstant", self.tokenizer.stringVal()) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def ConsumeIdentifier(self): self.VerifyTokenType(TokenType.IDENTIFIER) self.OutputTag("identifier", self.tokenizer.identifier()) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() def VerifyTokenType(self, tokenType): actual = self.tokenizer.tokenType() if actual != tokenType: raise Exception("Expected token type: {}, Actual: {}". format(tokenType, actual)) def EnterScope(self, name): self.Output("<{}>".format(name)) self.indentLevel += 1 def ExitScope(self, name): self.indentLevel -= 1 self.Output("</{}>".format(name)) def OutputTag(self, tag, value): self.Output("<{}> {} </{}>".format(tag, value, tag)) def Output(self, text): self.outputFile.write((" " * self.indentLevel) + text + '\n')
class CompilationEngine: _OPEN_PARENTHESIS = "\(" _CLOSE_PARENTHESIS = "\)" _OPEN_BRACKET = "\[" _CLOSE_BRACKET = "\]" _DOT = "\." _OPS = "\+|-|\*|\/|&|\||<|>|=" def __init__(self, in_address): self.tokenizer = Tokenizer(in_address) self.curr_token = self.tokenizer.get_current_token() self.out_address = in_address.replace(".jack", ".xml") self.output = "" self.indent = 0 self.compile_class() def write_file(self): with open(self.out_address, 'w') as f: f.write(self.output) def write(self, to_write): """ Writes to the output, with indentation. :param to_write: The string to write """ self.output += (self.indent * " ") + to_write + "\n" def compile_class(self): """ Compiles a complete class. """ def comp_class(): self.eat("class") self.eat(NAME_REG) self.eat("{") self.compile_class_var_dec() self.compile_subroutine() self.eat("}") self.wrap("class", comp_class) def compile_class_var_dec(self): """ Compiles a static or field declaration. :return: """ var_type_reg = "static|field" if self.peek_token(var_type_reg): self.wrap("classVarDec", self.__class_var_dec) self.compile_class_var_dec() def __class_var_dec(self): """ Compiles a single class var declaration. """ var_type_reg = "static|field" # (static|field) var_type = self.curr_token.get_token() self.eat(var_type_reg) # type self.__compile_type(False) # Compile varName combo until no more "," self.__single_var() self.eat(";") def __single_var(self): """ Compiles a single set of variables separated by commas. """ # varName self.eat(NAME_REG) if self.peek_token(","): self.eat(",") self.__single_var() def __compile_type(self, for_function): """ Compiles a type for a function or variable, determined by a received boolean value. :param for_function: True if is type of function, false otherwise. :return: """ type_reg = r"int|char|boolean|[A-Za-z_]\w*" if for_function: type_reg += "|void" self.eat(type_reg) def compile_subroutine(self): """ Compiles a complete method, function or constructor. :return: """ sub_regex = "(constructor|function|method)" def subroutine_dec(): self.eat(sub_regex) self.__compile_type(True) # subroutine name self.__compile_name() self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_parameter_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.wrap("subroutineBody", subroutine_body) def subroutine_body(): self.eat("{") if self.peek_token("var"): self.compile_var_dec() self.compile_statements() self.eat("}") if self.peek_token(sub_regex): self.wrap("subroutineDec", subroutine_dec) # Handle next subroutine if there is one self.compile_subroutine() def __compile_name(self): if self.peek_type() == IDENTIFIER: self.eat(NAME_REG) else: print("ERROR: Identifier Expected") exit(-1) def compile_parameter_list(self): """ Compiles a possibly empty parameter list, not including the enclosing () :return: """ self.wrap("parameterList", self.__params) def __params(self): type_reg = r"int|char|boolean|[A-Za-z_]\w*" if self.peek_token(type_reg): self.__compile_type(False) self.eat(NAME_REG) if self.peek_token(","): self.eat(",") self.__params() def compile_var_dec(self): """ Compiles a var declaration. :return: """ self.wrap("varDec", self.__comp_var_dec) if self.peek_token("var"): self.compile_var_dec() def __comp_var_dec(self): self.eat("var") self.__compile_type(False) self.__single_var() self.eat(";") def compile_statements(self): """ Compiles a sequence of statements, not including the enclosing {} :return: """ def statement(): """ Determines the type of statement and compiles it. Calls itself afterwards to check for more statements. :return: """ # statement_reg = "let|if|while|do|return" # if self.peek_token(statement_reg): if self.peek_token("let"): self.compile_let() statement() if self.peek_token("if"): self.compile_if() statement() if self.peek_token("while"): self.compile_while() statement() if self.peek_token("do"): self.compile_do() statement() if self.peek_token("return"): self.compile_return() statement() self.wrap("statements", statement) def compile_do(self): """ Compiles a do statement :return: """ def do(): self.eat("do") self.__subroutine_call() self.eat(";") self.wrap("doStatement", do) def __comp_do(self): self.eat("do") self.__subroutine_call() self.eat(";") def compile_let(self): """ Compiles a let statement :return: """ self.wrap("letStatement", self.__comp_let) def __comp_let(self): self.eat("let") self.__compile_name() # Determine [expression] if self.peek_token(CompilationEngine._OPEN_BRACKET): self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) self.eat("=") self.compile_expression() self.eat(";") def compile_while(self): """ Compiles a while statement. :return: """ def comp_while(): self.eat("while") self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") self.compile_statements() self.eat("}") self.wrap("whileStatement", comp_while) def compile_return(self): """ Compiles a return statement. :return: """ def comp_return(): self.eat("return") # if next is expression: if self.is_term(): self.compile_expression() self.eat(";") self.wrap("returnStatement", comp_return) def compile_if(self): """ Compiles an if statement, possibly with a trailing else clause. :return: """ def comp_if(): self.eat("if") self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) self.eat("{") # self.indent += 1 self.compile_statements() self.eat("}") # Handle else: if self.peek_token("else"): self.eat("else") self.eat("{") self.compile_statements() self.eat("}") self.wrap("ifStatement", comp_if) def compile_expression(self): """ Compiles an expression. :return: """ def comp_expression(): self.compile_term() # Case: term op term if self.peek_token(CompilationEngine._OPS): self.eat(CompilationEngine._OPS) self.compile_term() self.wrap("expression", comp_expression) def compile_term(self): """ Compiles a term. :return: """ def term(): curr_type = self.peek_type() is_const = curr_type == STRING_CONST or \ curr_type == INT_CONST or \ curr_type == KEYWORD # Case: term is integerConstant or stringConstant or # keywordConstant if is_const: self.write(self.tokenizer.get_current_token().get_xml_wrap()) self.__advance_token() # Case: token is a varName or a subroutineName elif curr_type == IDENTIFIER: # self.write(self.tokenizer.get_current_token().get_xml_wrap()) # self.tokenizer.advance() # Case: varName [ expression ] if self.peek_next(CompilationEngine._OPEN_BRACKET): self.__var_name_array() # Case: subroutineCall: elif self.peek_next( CompilationEngine._OPEN_PARENTHESIS) or self.peek_next( CompilationEngine._DOT): self.__subroutine_call() else: self.eat(NAME_REG) # Case: ( expression ) elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS): self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression() self.eat(CompilationEngine._CLOSE_PARENTHESIS) # Case: unaryOp term elif self.peek_token("-|~"): self.eat("-|~") self.compile_term() else: print("Error: Incorrect Term") exit(-1) self.wrap("term", term) def __var_name_array(self): """ Handles the case of varName[expression] :return: """ self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_BRACKET) self.compile_expression() self.eat(CompilationEngine._CLOSE_BRACKET) def is_term(self): curr_type = self.peek_type() return curr_type == STRING_CONST or curr_type == INT_CONST or \ curr_type == KEYWORD or curr_type == IDENTIFIER or \ self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \ self.peek_token(CompilationEngine._OPS) def __subroutine_call(self): if self.curr_token.get_type() == IDENTIFIER: # self.write(self.curr_token.get_xml_wrap()) # self.__advance_token() if self.peek_next(CompilationEngine._OPEN_PARENTHESIS): self.__subroutine_name() elif self.peek_next(CompilationEngine._DOT): self.__object_subroutine_call() else: print("Error: ( or . expected") exit(-1) def __object_subroutine_call(self): self.eat(NAME_REG) self.eat(CompilationEngine._DOT) self.__subroutine_name() def __subroutine_name(self): """ Handles the case of subroutineName(expressionList) :return: """ if self.curr_token.get_type() == IDENTIFIER: self.eat(NAME_REG) self.eat(CompilationEngine._OPEN_PARENTHESIS) self.compile_expression_list() self.eat(CompilationEngine._CLOSE_PARENTHESIS) def compile_expression_list(self): """ Compiles a possibly empty list of comma separated expressions :return: """ def exp_list(): if self.is_term(): self.compile_expression() while self.peek_token(","): self.eat(",") self.compile_expression() self.wrap("expressionList", exp_list) def wrap(self, section_name, func): """ Wraps a program structure block with the section_name, and executes its function :param section_name: The name of the section :param func: The function to perform :return: """ self.write("<{}>".format(section_name)) self.indent += 2 func() self.indent -= 2 self.write("</{}>".format(section_name)) def eat(self, token): """ Handles advancing and writing terminal tokens. Will exit the program if an error occurs. :param token: The regex of the token to compare :return: """ if re.match(token, self.curr_token.get_token()): self.write(self.curr_token.get_xml_wrap()) self.__advance_token() else: # if self.tokenizer.get_current_token() != token: print("Error: Expected " + token) exit(-1) def peek_token(self, compare_next): """ :param compare_next: The regex to compare. :return: True if the current token matches the regex, False otherwise. """ if self.curr_token: return re.match(compare_next, self.curr_token.get_token()) return False def peek_type(self): """ :return: the type of the current token """ return self.curr_token.get_type() def peek_next(self, comp): next_token = self.tokenizer.get_next_token() # Case: There actually is a next token if next_token: return re.match(comp, self.tokenizer.get_next_token().get_token()) return False def __advance_token(self): self.tokenizer.advance() if self.tokenizer.has_more_tokens(): self.curr_token = self.tokenizer.get_current_token()
class CompilationEngine(object): def __init__(self, inFile): self.t = Tokenizer(inFile) self.symTable = SymbolTable() self.vmName = inFile.rstrip('.jack') + '.vm' self.vm = VMWriter(self.vmName) self.className = '' self.types = ['int', 'char', 'boolean', 'void'] self.stmnt = ['do', 'let', 'if', 'while', 'return'] self.subroutType = '' self.whileIndex = 0 self.ifIndex = 0 self.fieldNum = 0 def compile_class(self): self.t.advance() self.validator('class') self.className = self.t.current_token() self.t.advance() self.validator('{') self.fieldNum = self.compile_class_var_dec() while self.t.symbol() != '}': # subroutines self.compile_subroutine() self.validator('}') self.vm.close() return def compile_class_var_dec(self): varKeyWords = ['field', 'static'] name = '' kind = '' varType = '' counter = 0 while self.t.keyword() in varKeyWords: kind = self.t.current_token() self.validator(varKeyWords) # variable type varType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'IDENTIFIER']) name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() if kind == 'field': counter += 1 while self.t.symbol() != ';': # checks multiple vars self.validator(',') name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() if kind == 'field': counter += 1 self.validator(';') return counter def compile_subroutine(self): current_subrout_scope = self.symTable.subDict self.symTable.start_subroutine() subroutKword = self.t.current_token() self.validator(['constructor', 'function', 'method']) self.subroutType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) name = self.t.current_token() subroutName = self.className + '.' + name self.t.advance() self.validator('(') if subroutKword == 'method': self.compile_parameter_list(method=True) else: self.compile_parameter_list() self.validator(')') self.validator('{') if self.t.symbol() == '}': self.t.advance() return self.validator(['var', 'let', 'do', 'if', 'while', 'return'], advance=False) numLocals = 0 if self.t.keyword() == 'var': numLocals = self.compile_var_dec() self.vm.write_function(subroutName, numLocals) if subroutKword == 'constructor': self.vm.write_push('constant', self.fieldNum) self.vm.write_call('Memory.alloc', 1) self.vm.write_pop('pointer', 0) elif subroutKword == 'method': self.vm.write_push('argument', 0) self.vm.write_pop('pointer', 0) if self.t.keyword() in self.stmnt: self.compile_statements() self.validator('}') self.symTable.subDict = current_subrout_scope self.whileIndex = 0 self.ifIndex = 0 return def compile_parameter_list(self, method=False): name = '' varType = '' kind = '' counter = 0 if self.t.symbol() == ')': return counter varType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) kind = 'arg' name = self.t.current_token() if method: self.symTable.define(name, varType, kind, method=True) else: self.symTable.define(name, varType, kind) self.t.advance() counter += 1 while self.t.symbol() == ',': self.validator(',') self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) kind = 'arg' name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() counter += 1 return counter def compile_var_dec(self): name = '' kind = '' varType = '' counter = 0 while self.t.keyword() == 'var': # check multiple lines of var kind = 'var' self.t.advance() varType = self.t.current_token() self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER']) name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() counter += 1 while self.t.symbol() == ',': # multiple varNames self.t.advance() name = self.t.current_token() self.symTable.define(name, varType, kind) self.t.advance() counter += 1 self.validator(';') return counter def compile_statements(self): while self.t.keyword() in self.stmnt: if self.t.keyword() == 'let': self.compile_let() elif self.t.keyword() == 'do': self.compile_do() elif self.t.keyword() == 'if': self.compile_if() elif self.t.keyword() == 'while': self.compile_while() elif self.t.keyword() == 'return': self.compile_return() else: raise Exception(self.t.current_token() + ' is not valid') return def compile_do(self): lookAhead = '' self.t.advance() # do lookAhead = self.t.tokens[self.t.tokenIndex + 1] if lookAhead == '(': # subroutineName(exprlist) subroutName = self.className + '.' + self.t.current_token() self.t.advance() self.validator('(') self.vm.write_push('pointer', 0) numArgs = self.compile_expression_list() self.vm.write_call(subroutName, numArgs + 1) # add 1 for 'this' self.validator(')') self.validator(';') self.vm.write_pop('temp', 0) # throws away returned value return else: className = self.t.current_token() self.t.advance() self.validator('.') # name.subroutine(exprList) subroutName = self.t.current_token() self.t.advance() self.validator('(') if self.symTable.kind_of(className) in [ 'this', 'static', 'local', 'argument' ]: # used 'this' for 'field' typeName = self.symTable.type_of(className) subroutName = typeName + '.' + subroutName segment = self.symTable.kind_of(className) index = self.symTable.index_of(className) self.vm.write_push(segment, index) numArgs = self.compile_expression_list() self.vm.write_call(subroutName, numArgs + 1) else: subroutName = className + '.' + subroutName numArgs = self.compile_expression_list() self.vm.write_call(subroutName, numArgs) self.validator(')') self.validator(';') self.vm.write_pop('temp', 0) return def compile_let(self): name = '' kind = '' array = False self.t.advance() # let while self.t.symbol() != ';': name = self.t.identifier() kind = self.symTable.kind_of(name) index = self.symTable.index_of(name) if name in self.symTable.classDict: self.t.advance() elif name in self.symTable.subDict: self.t.advance() else: raise Exception(self.t.identifier() + ' is not defined') if self.t.symbol() == '[': # array index array = True self.vm.write_push(kind, index) self.validator('[') self.compile_expression() self.validator(']') self.vm.write_arithmetic('+') self.validator('=') self.compile_expression() if array: self.vm.write_pop('temp', 0) self.vm.write_pop('pointer', 1) self.vm.write_push('temp', 0) self.vm.write_pop('that', 0) else: self.vm.write_pop(kind, index) self.validator(';') return def compile_while(self): currentWhile = 'WHILE' + str(self.whileIndex) self.vm.write_label(currentWhile) self.whileIndex += 1 self.t.advance() # while self.validator('(') self.compile_expression() self.vm.write_arithmetic('~') self.vm.write_if('END' + currentWhile) self.validator(')') self.validator('{') self.compile_statements() self.vm.write_goto(currentWhile) self.validator('}') self.vm.write_label('END' + currentWhile) return def compile_return(self): self.t.advance() # return if self.t.symbol() == ';': self.vm.write_push('constant', '0') self.vm.write_return() self.t.advance() else: self.compile_expression() self.validator(';') self.vm.write_return() return def compile_if(self): endIf = 'END_IF' + str(self.ifIndex) currentElse = 'IF_ELSE' + str(self.ifIndex) self.ifIndex += 1 self.t.advance() # if self.validator('(') self.compile_expression() self.vm.write_arithmetic('~') self.vm.write_if(currentElse) self.validator(')') self.validator('{') self.compile_statements() self.vm.write_goto(endIf) self.validator('}') self.vm.write_label(currentElse) if self.t.keyword() == 'else': self.t.advance() # else self.validator('{') self.compile_statements() self.validator('}') self.vm.write_label(endIf) return def compile_expression(self): op = ['+', '-', '*', '/', '&', '|', '<', '>', '='] self.compile_term() while self.t.symbol() in op: opToken = self.t.current_token() self.t.advance() self.compile_term() self.vm.write_arithmetic(opToken) return def compile_term(self): keyConst = ['true', 'false', 'null', 'this'] unOps = ['-', '~'] lookAhead = '' name = '' current_subrout_scope = '' if self.t.token_type() == 'INT_CONST': self.vm.write_push('constant', self.t.int_val()) self.t.advance() elif self.t.token_type() == 'STRING_CONST': string = self.t.string_val() length = len(string) self.vm.write_push('constant', length) self.vm.write_call('String.new', 1) for char in string: char = ord(char) # gives the ASCII number self.vm.write_push('constant', char) self.vm.write_call('String.appendChar', 2) self.t.advance() elif self.t.token_type() == 'KEYWORD': self.validator(keyConst, advance=False) if self.t.current_token() in ['false', 'null']: self.t.advance() self.vm.write_push('constant', '0') elif self.t.current_token() == 'true': self.vm.write_push('constant', '1') self.vm.write_arithmetic('-', neg=True) self.t.advance() else: self.vm.write_push('pointer', '0') self.t.advance() elif self.t.token_type() == 'SYMBOL': if self.t.symbol() in unOps: # unary operator unOpToken = self.t.current_token() self.t.advance() self.compile_term() self.vm.write_arithmetic(unOpToken, neg=True) elif self.t.symbol() == '(': # (expression)) self.t.advance() self.compile_expression() self.t.advance() else: raise Exception(self.t.current_token() + ' is not valid') elif self.t.token_type() == 'IDENTIFIER': # varName, array, or subcall lookAhead = self.t.tokens[self.t.tokenIndex + 1] if lookAhead == '[': # array item name = self.t.identifier() kind = self.symTable.kind_of(name) index = self.symTable.index_of(name) if name in self.symTable.classDict: self.t.advance() elif name in self.symTable.subDict: self.t.advance() else: raise Exception(self.t.identifier() + ' is not defined') self.vm.write_push(kind, index) self.validator('[') self.compile_expression() self.vm.write_arithmetic('+') self.vm.write_pop('pointer', 1) self.vm.write_push('that', 0) self.validator(']') elif lookAhead == '(': # subcall current_subrout_scope = self.symTable.subDict name = self.className + '.' + self.t.current_token() self.t.advance() self.validator('(') numArgs = self.compile_expression_list() self.vm.write_call(name, numArgs + 1) self.validator(')') self.symTable.subDict = current_subrout_scope elif lookAhead == '.': # name.subroutName(expressList) current_subrout_scope = self.symTable.subDict className = self.t.current_token() self.t.advance() self.validator('.') subroutName = self.t.current_token() self.validator('IDENTIFIER') name = className + '.' + subroutName self.validator('(') if self.symTable.kind_of(className) in [ 'this', 'static', 'local', 'argument' ]: # used 'this' for 'field' classType = self.symTable.type_of(className) name = classType + '.' + subroutName kind = self.symTable.kind_of(className) index = self.symTable.index_of(className) self.vm.write_push(kind, index) numArgs = self.compile_expression_list() self.vm.write_call(name, numArgs + 1) else: numArgs = self.compile_expression_list() self.vm.write_call(name, numArgs) self.validator(')') self.symTable.subDict = current_subrout_scope else: name = self.t.identifier() # varName kind = self.symTable.kind_of(name) index = self.symTable.index_of(name) self.vm.write_push(kind, index) self.t.advance() else: raise Exception(self.t.current_token() + ' is not valid') return def compile_expression_list(self): # only in subroutineCall counter = 0 if self.t.symbol() == ')': return counter else: self.compile_expression() counter += 1 while self.t.symbol() == ',': self.t.advance() self.compile_expression() counter += 1 return counter def validator(self, syntax, advance=True): tokenType = self.t.token_type() token = self.t.current_token() if advance: self.t.advance() if type(syntax) != list: syntax = [syntax] for item in syntax: if item in [tokenType, token]: return True raise Exception(self.t.current_token() + ' is not valid')
class Compiler: def __init__(self, inpath, outpath): self.tokenizer = Tokenizer(inpath) XMLWriter.set_filepath(outpath) if self.tokenizer.has_more_tokens(): self.compile_class() XMLWriter.close() def _write_current_terminal(self): XMLWriter.write_terminal(self._current_token, self._current_tok_tag) def _advance(self): self._check_EOF() self.tokenizer.advance() type_kws = (KW_INT, KW_CHAR, KW_BOOLEAN) kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS) @property def _current_token(self): t_type = self.tokenizer.token_type return (self.tokenizer.keyword if t_type == T_KEYWORD else self.tokenizer.symbol if t_type == T_SYMBOL else self.tokenizer.identifier if t_type == T_ID else self.tokenizer.intval if t_type == T_INTEGER else self.tokenizer.stringval) @property def _current_tok_type(self): return self.tokenizer.token_type @property def _current_tok_tag(self): return token_tags[self._current_tok_type] @property def _next_token(self): """return raw next_token in the tokenizer""" return str(self.tokenizer.next_token) def _require_token(self, tok_type, token=None): """Check whether the next_token(terminal) in the tokenizer meets the requirement (specific token or just token type). If meets, tokenizer advances (update current_token and next_token) and terminal will be writed into outfile; If not, report an error.""" self._advance() if token and self._current_token != token: return self._error(expect_toks=(token,)) elif self._current_tok_type != tok_type: return self._error(expect_types=(tok_type,)) self._write_current_terminal() def _require_id(self): return self._require_token(T_ID) def _require_kw(self, token): return self._require_token(T_KEYWORD, token=token) def _require_sym(self, token): return self._require_token(T_SYMBOL, token=token) def _require_brackets(self, brackets, procedure): front, back = brackets self._require_sym(front) procedure() self._require_sym(back) def _fol_by_class_vardec(self): return self._next_token in (KW_STATIC, KW_FIELD) def _fol_by_subroutine(self): return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD) def _fol_by_vardec(self): return self._next_token == KW_VAR ######################### # structure compilation # ######################### # the compilation of three types of name might seem redundant here, but # it was for abstraction and later code generation in project 11. def compile_class_name(self): self._require_id() def compile_subroutine_name(self): self._require_id() def compile_var_name(self): self._require_id() def compile_type(self, advanced=False, expect='type'): # int, string, boolean or identifier(className) if advanced is False: self._advance() if self._current_token in self.type_kws: return self._write_current_terminal() elif self._current_tok_type == T_ID: return self._write_current_terminal() else: return self._error(expect=expect) def compile_void_or_type(self): # void or type self._advance() if self._current_token == KW_VOID: self._write_current_terminal() else: self.compile_type(True, '"void" or type') @record_non_terminal('class') def compile_class(self): # 'class' className '{' classVarDec* subroutineDec* '}' self._require_kw(KW_CLASS) self.compile_class_name() self._require_sym('{') while self._fol_by_class_vardec(): self.compile_class_vardec() while self._fol_by_subroutine(): self.compile_subroutine() self._advance() if self._current_token != '}': self._traceback("Except classVarDec or subroutineDec.") self._write_current_terminal() def compile_declare(self): self._advance() self._write_current_terminal() # type varName (',' varName)* ';' self.compile_type() self.compile_var_name() # compile ',' or ';' self._advance() while self._current_token == ',': self._write_current_terminal() self.compile_var_name() self._advance() if self._current_token != ';': return self._error((',', ';')) self._write_current_terminal() @record_non_terminal('classVarDec') def compile_class_vardec(self): # ('static|field') type varName (',' varName)* ';' self.compile_declare() @record_non_terminal('subroutineDec') def compile_subroutine(self): # ('constructor'|'function'|'method') # ('void'|type) subroutineName '(' parameterList ')' subroutineBody self._advance() self._write_current_terminal() # ('constructor'|'function'|'method') self.compile_void_or_type() self.compile_subroutine_name() self._require_brackets('()', self.compile_parameter_list) self.compile_subroutine_body() @record_non_terminal('parameterList') def compile_parameter_list(self): # ((type varName) (',' type varName)*)? if self._next_token == ')': return self.compile_type() self.compile_var_name() while self._next_token != ')': self._require_sym(',') self.compile_type() self.compile_var_name() @record_non_terminal('subroutineBody') def compile_subroutine_body(self): # '{' varDec* statements '}' self._require_sym('{') while self._fol_by_vardec(): self.compile_vardec() self.compile_statements() self._require_sym('}') @record_non_terminal('varDec') def compile_vardec(self): # 'var' type varName (',' varName)* ';' self.compile_declare() ######################### # statement compilation # ######################### @record_non_terminal('statements') def compile_statements(self): # (letStatement | ifStatement | whileStatement | doStatement | # returnStatement)* last_statement = None while self._next_token != '}': self._advance() last_statement = self._current_token if last_statement == 'do': self.compile_do() elif last_statement == 'let': self.compile_let() elif last_statement == 'while': self.compile_while() elif last_statement == 'return': self.compile_return() elif last_statement == 'if': self.compile_if() else: return self._error(expect='statement expression') if STACK[-2] == 'subroutineBody' and last_statement != 'return': self._error(expect='return statement', get=last_statement) @record_non_terminal('doStatement') def compile_do(self): # 'do' subroutineCall ';' self._write_current_terminal() # compile identifier first self._advance() self.compile_subroutine_call() self._require_sym(';') @record_non_terminal('letStatement') def compile_let(self): # 'let' varName ('[' expression ']')? '=' expression ';' self._write_current_terminal() self.compile_var_name() if self._next_token == '[': self._compile_array_subscript() self._require_sym('=') self.compile_expression() self._require_sym(';') @record_non_terminal('whileStatement') def compile_while(self): # 'while' '(' expression ')' '{' statements '}' self._write_current_terminal() self._require_brackets('()', self.compile_expression) self._require_brackets('{}', self.compile_statements) @record_non_terminal('returnStatement') def compile_return(self): # 'return' expression? ';' self._write_current_terminal() if self._next_token != ';': self.compile_expression() self._require_sym(';') @record_non_terminal('ifStatement') def compile_if(self): # 'if' '(' expression ')' '{' statements '}' # ('else' '{' statements '}')? self._write_current_terminal() self._require_brackets('()', self.compile_expression) self._require_brackets('{}', self.compile_statements) # else clause if self._next_token == KW_ELSE: self._require_kw(KW_ELSE) self._require_brackets('{}', self.compile_statements) ########################## # expression compilation # ########################## @record_non_terminal('expression') def compile_expression(self): # term (op term)* self.compile_term() while is_op(self._next_token): self.compile_op() self.compile_term() @record_non_terminal('term') def compile_term(self): # integerConstant | stringConstant | keywordConstant | # varName | varName '[' expression ']' | subroutineCall | # '(' expression ')' | unaryOp term if self._next_token == '(': self._require_brackets('()', self.compile_expression) elif self._next_token in set('-~'): self.compile_unaryop() else: self._advance() tok = self._current_token tok_type = self._current_tok_type if tok in self.kw_consts or tok_type in (T_INTEGER, T_STRING): self._write_current_terminal() elif tok_type == T_ID: if self._next_token in '(.': self.compile_subroutine_call() else: self._write_current_terminal() if self._next_token == '[': self._compile_array_subscript() else: self._error(expect='term') def compile_call_name(self): # the fisrt name of subroutine call could be (className or varName) if # it is followed by '.', or subroutineName if followed by '('. if self._current_tok_type != T_ID: self._error(expect_types=(T_ID,)) self._write_current_terminal() # just write it without analysis. # this method will be extended to decide which kind the name is. def compile_subroutine_call(self): # subroutineName '(' expressionList ')' | (className | # varName) '.' subroutineName '(' expressionList ')' ## the first element of structure has already been compiled. self.compile_call_name() if self._next_token == '.': self._require_sym('.') self.compile_subroutine_name() self._require_brackets('()', self.compile_expressionlist) @record_non_terminal('expressionList') def compile_expressionlist(self): # (expression (',' expression)*)? if self._next_token != ')': self.compile_expression() while self._next_token != ')': self._require_sym(',') self.compile_expression() def compile_op(self): # exclude '~' self._advance() if self._current_token == '~': self._traceback('Unexpected operator: ~') self._write_current_terminal() def compile_unaryop(self): self._advance() self._write_current_terminal() # symbol: - or ~ self.compile_term() def _compile_array_subscript(self): # '[' expression ']' self._require_brackets('[]', self.compile_expression) def _check_EOF(self): if not self.tokenizer.has_more_tokens(): self._traceback("Unexpected EOF.") def _error(self, expect_toks=(), expect_types=(), expect=None, get=None): if expect is None: exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks)) exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types) exp_type = ' or '.join(exp_types) if exp_tok and exp_type: expect = ' or '.join(expect_tok, expect_type) else: expect = exp_toks + exp_types if get is None: get = self._current_token me = 'Expect {0} but get "{1}"'.format(expect, get) return self._traceback(me) def _traceback(self, message): file_info = 'file: "{0}"'.format(self.tokenizer.filename) line_info = 'line {0}'.format(self.tokenizer.line_count) raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))
class CompilationEngine: def __init__(self): self.local_symbol_table = None self.class_symbol_tables = {} self.type_size_map = {"int": 1, "bool": 1, "char": 1} self.unique_label_index = 0 def SetClass(self, input_path, output_path): self.tokenizer = Tokenizer(input_path) self.output_file = open("{0}.xml".format(output_path), 'w') self.code_file = open(output_path, 'w') self.tokenizer.advance() self.indent_level = 0 self.current_class_name = None self.current_sub_name = None def CompileClass(self): """ Compiles a complete class. """ self.EnterScope("class") self.ConsumeKeyword([Keyword.CLASS]) self.ConsumeDeclaration("class", None) self.class_symbol_tables[self.current_class_name] = SymbolTable() self.ConsumeSymbol('{') totalSize = 0 while (self.IsKeyword([Keyword.STATIC, Keyword.FIELD])): totalSize += self.CompileClassVarDec() self.type_size_map[self.current_class_name] = totalSize # subroutineDec* while (self.IsKeyword(subroutine_types)): self.CompileSubroutine() self.ConsumeSymbol('}') self.ExitScope("class") self.output_file.close() def CompileClassVarDec(self): """ Compiles a static declaration or a field declaration. """ self.EnterScope("classVarDec") amount = 0 category = self.tokenizer.keyword() self.ConsumeKeyword([Keyword.STATIC, Keyword.FIELD]) varType = self.ConsumeType() self.ConsumeDeclaration(category, varType) amount += 1 while (self.IsSymbol([','])): self.ConsumeSymbol(',') self.ConsumeDeclaration(category, varType) amount += 1 self.ConsumeSymbol(';') self.ExitScope("classVarDec") return amount def CompileSubroutine(self): """ Compiles a complete method, function, or constructor. """ self.EnterScope("subroutineDec") self.local_symbol_table = SymbolTable() subType = self.tokenizer.keyword() self.ConsumeKeyword([Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]) if (self.IsKeyword([Keyword.VOID])): self.ConsumeKeyword([Keyword.VOID]) else: self.ConsumeType() # The first param is converted to internal rep. the second is preserved self.ConsumeDeclaration(subType, subType) if subType == "method": self.local_symbol_table.indexList[Categories.ARGUMENT[0]] += 1 self.ConsumeSymbol('(') self.CompileParameterList() self.ConsumeSymbol(')') self.CompileSubroutineBody() self.ExitScope("subroutineDec") def CompileSubroutineBody(self): self.EnterScope("subroutineBody") nVars = 0 self.ConsumeSymbol('{') while (self.IsKeyword([Keyword.VAR])): nVars += self.CompileVarDec() self.WriteCode("function {0}.{1} {2}".format(self.current_class_name, self.current_sub_name, str(nVars))) entry = self.SymbolTableLookup(self.current_sub_name) if entry.type == "constructor": self.WriteCode("push constant {0}". format(self.type_size_map[self.current_class_name])) self.WriteCode("call Memory.alloc 1") self.WriteCode("pop pointer 0") elif entry.type == "method": self.WriteCode("push argument 0") self.WriteCode("pop pointer 0") self.CompileStatements() self.ConsumeSymbol('}') self.ExitScope("subroutineBody") def ConsumeDeclaration(self, category, entry_type): entry = SymbolTableEntry() entry.SetCategory(category) entry.name = self.ConsumeIdentifier() entry.type = entry_type local_categories = [Categories.VAR, Categories.ARGUMENT] class_categories = [Categories.SUBROUTINE, Categories.FIELD, Categories.STATIC] # Updating current class / subroutine names if entry.category == Categories.CLASS: self.current_class_name = entry.name elif entry.category == Categories.SUBROUTINE: self.current_sub_name = entry.name # Updating local / class symbol tables if entry.category in local_categories: self.local_symbol_table.InsertEntry(entry) elif entry.category in class_categories: self.class_symbol_tables[self.current_class_name].\ InsertEntry(entry) def CompileParameterList(self): """ Compiles a (possibly empty) parameter list, not including the enclosing "()". """ self.EnterScope("parameterList") nVars = 0 if (not self.IsSymbol([')'])): varType = self.ConsumeType() self.ConsumeDeclaration("argument", varType) nVars += 1 while(self.IsSymbol([','])): self.ConsumeSymbol(',') varType = self.ConsumeType() self.ConsumeDeclaration("argument", varType) nVars += 1 self.ExitScope("parameterList") return nVars def CompileVarDec(self): """ Compiles a var declaration. """ self.EnterScope("varDec") nVars = 0 self.ConsumeKeyword([Keyword.VAR]) varType = self.ConsumeType() self.ConsumeDeclaration("var", varType) nVars += 1 while (self.IsSymbol([','])): self.ConsumeSymbol(',') self.ConsumeDeclaration("var", varType) nVars += 1 self.ConsumeSymbol(';') self.ExitScope("varDec") return nVars def CompileStatements(self): """ Compiles a sequence of statements, not including the enclosing "{}". """ self.EnterScope("statements") while self.IsKeyword([Keyword.LET, Keyword.IF, Keyword.WHILE, Keyword.DO, Keyword.RETURN]): if self.IsKeyword([Keyword.LET]): self.CompileLet() if self.IsKeyword([Keyword.IF]): self.CompileIf() if self.IsKeyword([Keyword.WHILE]): self.CompileWhile() if self.IsKeyword([Keyword.DO]): self.CompileDo() if self.IsKeyword([Keyword.RETURN]): self.CompileReturn() self.ExitScope("statements") def CompileDo(self): """ Compiles a do statement. """ self.EnterScope("doStatement") self.ConsumeKeyword([Keyword.DO]) prefix = self.ConsumeIdentifier() calleeLocation = None subName = None if self.IsSymbol(['.']): self.ConsumeSymbol('.') entry = self.SymbolTableLookup(prefix) if entry is not None and entry.category != Categories.CLASS: calleeLocation = "{0} {1}".format(entry.segment, entry.index) prefix = entry.type postfix = self.ConsumeIdentifier() subName = "{0}.{1}".format(prefix, postfix) else: subName = "{0}.{1}".format(self.current_class_name, prefix) calleeLocation = "pointer 0" nArgs = 0 # This means we are calling an instance method, so we push it first if calleeLocation is not None: self.WriteCode("push {0} //Pushing callee".format(calleeLocation)) nArgs += 1 self.ConsumeSymbol('(') nArgs += self.CompileExpressionList() self.ConsumeSymbol(')') self.ConsumeSymbol(';') self.WriteCode("call {0} {1}".format(subName, nArgs)) # Get rid of the return value (garbage) self.WriteCode("pop temp 0") self.ExitScope("doStatement") def CompileLet(self): """ Compiles a let statement. """ self.EnterScope("letStatement") self.ConsumeKeyword([Keyword.LET]) varName = self.ConsumeIdentifier() entry = self.SymbolTableLookup(varName) isArray = False if self.IsSymbol(['[']): isArray = True self.ConsumeSymbol('[') self.CompileExpression() self.WriteCode("push {0} {1}". format(entry.segment, entry.index)) # array base self.WriteCode("add") # Add offset self.ConsumeSymbol(']') self.ConsumeSymbol('=') self.CompileExpression() self.ConsumeSymbol(';') if isArray: self.WriteCode("pop temp 0") # Save the expression result self.WriteCode("pop pointer 1") # Align THAT self.WriteCode("push temp 0") # Push the exp result # Put the exp result in the array position self.WriteCode("pop that 0") else: self.WriteCode("pop {0} {1}".format(entry.segment, entry.index)) self.ExitScope("letStatement") def CompileWhile(self): """ Compiles a while statement. """ self.EnterScope("whileStatement") self.ConsumeKeyword([Keyword.WHILE]) L1 = self.GenerateUniqueLabel() L2 = self.GenerateUniqueLabel() # While entry point self.WriteCode("label {0}".format(L1)) # while loop condition self.ConsumeSymbol('(') self.CompileExpression() self.ConsumeSymbol(')') # Jump to L2 if condition doesn't hold self.WriteCode("not") self.WriteCode("if-goto {0}".format(L2)) # While loop logic self.ConsumeSymbol('{') self.CompileStatements() self.ConsumeSymbol('}') # Go back to L1 for another iteration self.WriteCode("goto {0}".format(L1)) # While termination point self.WriteCode("label {0}".format(L2)) self.ExitScope("whileStatement") def CompileReturn(self): """ Compiles a return statement. """ self.EnterScope("returnStatement") self.ConsumeKeyword([Keyword.RETURN]) if not self.IsSymbol([';']): self.CompileExpression() else: self.WriteCode("push constant 0") self.ConsumeSymbol(';') self.WriteCode("return") self.ExitScope("returnStatement") def CompileIf(self): """ Compiles an if statement, possibly with a trailing else clause. """ self.EnterScope("ifStatement") self.ConsumeKeyword([Keyword.IF]) IF_TRUE = self.GenerateUniqueLabel() IF_FALSE = self.GenerateUniqueLabel() IF_END = self.GenerateUniqueLabel() # The if statement condition self.ConsumeSymbol('(') self.CompileExpression() self.ConsumeSymbol(')') # Jump to L1 if condition doesn't hold self.WriteCode("if-goto {0}".format(IF_TRUE)) self.WriteCode("goto {0}".format(IF_FALSE)) self.WriteCode("label {0}".format(IF_TRUE)) self.ConsumeSymbol('{') self.CompileStatements() self.ConsumeSymbol('}') self.WriteCode("goto {0}".format(IF_END)) self.WriteCode("label {0}".format(IF_FALSE)) if self.IsKeyword([Keyword.ELSE]): self.ConsumeKeyword([Keyword.ELSE]) self.ConsumeSymbol('{') self.CompileStatements() self.ConsumeSymbol('}') self.WriteCode("label {0}".format(IF_END)) self.ExitScope("ifStatement") def CompileExpression(self): """ Compiles an expression. """ self.EnterScope("expression") self.CompileTerm() while (self.IsSymbol(op_symbols.keys())): op = self.ConsumeSymbol(self.tokenizer.symbol()) self.CompileTerm() self.WriteCode(op_symbols[op]) self.ExitScope("expression") def CompileTerm(self): """ Compiles a term. """ self.EnterScope("term") keyword_constants = [Keyword.TRUE, Keyword.FALSE, Keyword.NULL, Keyword.THIS] termName = None if self.IsType(TokenType.INT_CONST): self.WriteCode("push constant {0}". format(self.ConsumeIntegerConstant())) elif self.IsType(TokenType.STRING_CONST): self.ConsumeStringConstant() elif self.IsKeyword(keyword_constants): keyword = self.ConsumeKeyword(keyword_constants) if keyword == "false": self.WriteCode("push constant 0") elif keyword == "true": self.WriteCode("push constant 0") self.WriteCode("not") elif keyword == "this": self.WriteCode("push pointer 0") elif keyword == "null": self.WriteCode("push constant 0") elif self.IsSymbol(['(']): self.ConsumeSymbol('(') self.CompileExpression() self.ConsumeSymbol(')') elif self.IsSymbol(unary_symbols.keys()): symbol = self.ConsumeSymbol(self.tokenizer.symbol()) self.CompileTerm() self.WriteCode(unary_symbols[symbol]) else: termName = self.ConsumeIdentifier() entry = self.SymbolTableLookup(termName) if entry is not None: if CategoryUtils.IsIndexed(entry.category): self.WriteCode("push {0} {1} //{2}". format(CategoryUtils. GetSegment(entry.category), entry.index, termName)) if self.IsSymbol(['[']): # varName '[' expression ']' self.ConsumeSymbol('[') self.CompileExpression() self.WriteCode("add") self.WriteCode("pop pointer 1") self.WriteCode("push that 0") self.ConsumeSymbol(']') elif self.IsSymbol(['(']): # subroutineCall self.ConsumeSymbol('(') self.WriteCode("call {0} {1}". format(termName, self.CompileExpressionList())) self.ConsumeSymbol(')') elif self.IsSymbol(['.']): self.ConsumeSymbol('.') funcName = self.ConsumeIdentifier() entry = self.GetSubroutineEntry(termName, funcName) extraParam = 0 if entry is not None and entry.type == "method": termName = self.SymbolTableLookup(termName).type extraParam = 1 self.ConsumeSymbol('(') self.WriteCode("call {0}.{1} {2}". format(termName, funcName, self.CompileExpressionList() + extraParam)) self.ConsumeSymbol(')') self.ExitScope("term") return termName def GetSubroutineEntry(self, prefix, postfix): entry = self.SymbolTableLookup(postfix) if entry is not None: return entry varEntry = self.SymbolTableLookup(prefix) if varEntry is not None: return self.ClassSymbolTableLookup(postfix, varEntry.type) return None def CompileExpressionList(self): """ Compiles a (possibly empty) comma-separated list of expressions. """ self.EnterScope("expressionList") nArgs = 0 if not self.IsSymbol(')'): self.CompileExpression() nArgs += 1 while self.IsSymbol([',']): self.ConsumeSymbol(',') self.CompileExpression() nArgs += 1 self.ExitScope("expressionList") return nArgs def IsKeyword(self, keyword_list): return (self.IsType(TokenType.KEYWORD) and self.tokenizer.keyword() in keyword_list) def IsSymbol(self, symbol_list): return (self.IsType(TokenType.SYMBOL) and self.tokenizer.symbol() in symbol_list) def IsType(self, tokenType): return self.tokenizer.tokenType() == tokenType def ConsumeType(self): if (self.tokenizer.tokenType() == TokenType.IDENTIFIER): return self.ConsumeIdentifier() else: return self.ConsumeKeyword([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN]) def ConsumeKeyword(self, keywordList): self.VerifyTokenType(TokenType.KEYWORD) actual = self.tokenizer.keyword() if actual not in keywordList: raise Exception("Expected keywords: {}, Actual: {}". format(keywordList, actual)) self.OutputTag("keyword", actual) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() return actual def ConsumeSymbol(self, symbol): self.VerifyTokenType(TokenType.SYMBOL) actual = self.tokenizer.symbol() if actual != symbol: raise Exception("Expected symbol: {}, Actual: {}". format(symbol, actual)) self.OutputTag("symbol", actual) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() return actual def ConsumeIntegerConstant(self): self.VerifyTokenType(TokenType.INT_CONST) actual = self.tokenizer.intVal() self.OutputTag("integerConstant", self.tokenizer.intVal()) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() return actual def ConsumeStringConstant(self): self.VerifyTokenType(TokenType.STRING_CONST) actual = self.tokenizer.stringVal() self.WriteCode("push constant {0}".format(len(actual))) self.WriteCode("call String.new 1") for c in actual: self.WriteCode("push constant {0}".format(ord(c))) self.WriteCode("call String.appendChar 2") self.OutputTag("stringConstant", self.tokenizer.stringVal()) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() return actual def ConsumeIdentifier(self): self.VerifyTokenType(TokenType.IDENTIFIER) actual = self.tokenizer.identifier() self.OutputTag("identifierName", self.tokenizer.identifier()) if self.tokenizer.hasMoreTokens(): self.tokenizer.advance() return actual def VerifyTokenType(self, tokenType): actual = self.tokenizer.tokenType() if actual != tokenType: raise Exception("Expected token type: {}, Actual: {}". format(tokenType, actual)) def EnterScope(self, name): self.Output("<{}>".format(name)) self.indent_level += 1 def ExitScope(self, name): self.indent_level -= 1 self.Output("</{}>".format(name)) def ClassSymbolTableLookup(self, name, containingClass): return self.class_symbol_tables[containingClass].GetEntry(name) def SymbolTableLookup(self, name): entry = self.local_symbol_table.GetEntry(name) if entry is not None: return entry else: return self.ClassSymbolTableLookup(name, self.current_class_name) def WriteCode(self, line): self.code_file.write(line + '\n') def OutputTag(self, tag, value): self.Output("<{}> {} </{}>".format(tag, value, tag)) def Output(self, text): self.output_file.write((" " * self.indent_level) + text + '\n') def GenerateUniqueLabel(self): self.unique_label_index += 1 return "pfl{0}".format(self.unique_label_index - 1)