Пример #1
0
 def __init__(self, in_address):
     self.tokenizer = Tokenizer(in_address)
     self.curr_token = self.tokenizer.get_current_token()
     self.out_address = in_address.replace(".jack", ".xml")
     self.output = ""
     self.indent = 0
     self.compile_class()
Пример #2
0
def tokens_to_xml(path):
    """Write the tokens into a xml file with its type as tags. The outpath
    is the dirpath of the a new directory in the module path to avoid name
    clashes."""
    paths = retrive_files(path)
    out_dirpath = os.path.join(path, 'Xmlresult')
    for path in paths:
        outfile = os.path.basename(path).replace('.jack', 'T.xml')
        outpath = os.path.join(out_dirpath, outfile)
        tokenizer = Tokenizer(path)
        analyzer = TokenAnalyzer(outpath)
        while tokenizer.has_more_tokens():
            tokenizer.advance()
            t_type = tokenizer.token_type
            tag = token_tags[t_type]
            if t_type == T_KEYWORD:
                analyzer.write_info(tokenizer.keyword, tag)
            elif t_type == T_SYMBOL:
                analyzer.write_info(tokenizer.symbol, tag)
            elif t_type == T_ID:
                analyzer.write_info(tokenizer.identifier, tag)
            elif t_type == T_INTEGER:
                analyzer.write_info(tokenizer.intval, tag)
            elif t_type == T_STRING:
                analyzer.write_info(tokenizer.stringval, tag)
        analyzer.close()
Пример #3
0
 def __init__(self, inpath, outpath):
     self.tokenizer = Tokenizer(inpath)
     self.symboltable = SymbolTable()
     self.vmwriter = VMWriter(outpath)
     self._class_name = None
     if self.tokenizer.has_more_tokens():
         self.compile_class()
     self.vmwriter.close()
     print("{0} completed.".format(outpath))
Пример #4
0
 def __init__(self, in_address):
     self.tokenizer = Tokenizer(in_address)
     self.symbol_table = SymbolTable()
     self.vm_writer = VMWriter(in_address.replace(".jack", ".vm"))
     self.curr_token = self.tokenizer.get_current_token()
     self.out_address = in_address.replace(".jack", ".xml")
     self.output = ""
     self.indent = 0
     self.label_count = -1
     self.class_name = ""
     self.compile_class()
 def __init__(self, inFile):
     self.t = Tokenizer(inFile)
     self.symTable = SymbolTable()
     self.vmName = inFile.rstrip('.jack') + '.vm'
     self.vm = VMWriter(self.vmName)
     self.className = ''
     self.types = ['int', 'char', 'boolean', 'void']
     self.stmnt = ['do', 'let', 'if', 'while', 'return']
     self.subroutType = ''
     self.whileIndex = 0
     self.ifIndex = 0
     self.fieldNum = 0
Пример #6
0
 def __init__(self, filepath):
     self.jackfiles = []
     self.fileOrDir(filepath)
     for file in self.jackfiles:
         print(file)
         tokenizer = Tokenizer(file)
         outputname = filepath.split('.')[0]+".0000.xml"
         print(outputname)
         print(tokenizer.file)
Пример #7
0
    def SetClass(self, input_path, output_path):
        self.tokenizer = Tokenizer(input_path)
        self.output_file = open("{0}.xml".format(output_path), 'w')
        self.code_file = open(output_path, 'w')
        self.tokenizer.advance()
        self.indent_level = 0

        self.current_class_name = None
        self.current_sub_name = None
Пример #8
0
def run_jack_compiler(file_path):
    jack_files = []
    if file_path.endswith(".jack"):
        jack_files.append(file_path)
    else:
        file_list = listdir(file_path)
        for file in file_list:
            if file.endswith(".jack"):
                jack_files.append(file_path + '/' + file)

    for jack_file in jack_files:
        # print("   Now processing: ", jack_file)
        curr_tokenizer = Tokenizer(jack_file)
        curr_compilation_engine = CompilationEngine(curr_tokenizer, jack_file)
        curr_compilation_engine.compile_class()
Пример #9
0
def run_jack_tokenizer(file_path):
    jack_files = []
    if file_path.endswith(".jack"):
        jack_files.append(file_path)
    else:
        file_list = listdir(file_path)
        for file in file_list:
            if file.endswith(".jack"):
                jack_files.append(file_path + '/' + file)

    for jack_file in jack_files:
        print("   Now processing: ", jack_file)
        curr_tokenizer = Tokenizer(jack_file)
        print("writing into", jack_file[:-5]+"test.xml")
        xml_stream = open(jack_file[:-5]+"test.xml", "w+")
        xml_stream.write("<tokens>\n")
        single_file_tokenize(curr_tokenizer, xml_stream)
        xml_stream.write("</tokens>\n")
        xml_stream.close()
Пример #10
0
class CompilationEngine:
    _OPEN_PARENTHESIS = "\("
    _CLOSE_PARENTHESIS = "\)"
    _OPEN_BRACKET = "\["
    _CLOSE_BRACKET = "\]"
    _DOT = "\."
    _OPS = "\+|-|\*|\/|&|\||<|>|="

    def __init__(self, in_address):
        self.tokenizer = Tokenizer(in_address)
        self.curr_token = self.tokenizer.get_current_token()
        self.out_address = in_address.replace(".jack", ".xml")
        self.output = ""
        self.indent = 0
        self.compile_class()

    def write_file(self):
        with open(self.out_address, 'w') as f:
            f.write(self.output)

    def write(self, to_write):
        """
        Writes to the output, with indentation.
        :param to_write: The string to write
        """
        self.output += (self.indent * " ") + to_write + "\n"

    def compile_class(self):
        """
        Compiles a complete class.
        """
        def comp_class():
            self.eat("class")
            self.eat(NAME_REG)
            self.eat("{")
            self.compile_class_var_dec()
            self.compile_subroutine()
            self.eat("}")

        self.wrap("class", comp_class)

    def compile_class_var_dec(self):
        """
        Compiles a static or field declaration.
        :return:
        """
        var_type_reg = "static|field"
        if self.peek_token(var_type_reg):
            self.wrap("classVarDec", self.__class_var_dec)
            self.compile_class_var_dec()

    def __class_var_dec(self):
        """
        Compiles a single class var declaration.
        """
        var_type_reg = "static|field"
        # (static|field)
        var_type = self.curr_token.get_token()
        self.eat(var_type_reg)
        # type
        self.__compile_type(False)
        # Compile varName combo until no more ","
        self.__single_var()
        self.eat(";")

    def __single_var(self):
        """
        Compiles a single set of variables separated by commas.
        """
        # varName
        self.eat(NAME_REG)
        if self.peek_token(","):
            self.eat(",")
            self.__single_var()

    def __compile_type(self, for_function):
        """
        Compiles a type for a function or variable, determined by
        a received boolean value.
        :param for_function: True if is type of function, false otherwise.
        :return:
        """
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if for_function:
            type_reg += "|void"
        self.eat(type_reg)

    def compile_subroutine(self):
        """
        Compiles a complete method, function or constructor.
        :return:
        """
        sub_regex = "(constructor|function|method)"

        def subroutine_dec():
            self.eat(sub_regex)
            self.__compile_type(True)
            # subroutine name
            self.__compile_name()
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_parameter_list()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            self.wrap("subroutineBody", subroutine_body)

        def subroutine_body():
            self.eat("{")
            if self.peek_token("var"):
                self.compile_var_dec()
            self.compile_statements()
            self.eat("}")

        if self.peek_token(sub_regex):
            self.wrap("subroutineDec", subroutine_dec)
            # Handle next subroutine if there is one
            self.compile_subroutine()

    def __compile_name(self):

        if self.peek_type() == IDENTIFIER:
            self.eat(NAME_REG)
        else:
            print("ERROR: Identifier Expected")
            exit(-1)

    def compile_parameter_list(self):
        """
        Compiles a possibly empty parameter list, not including the
        enclosing ()
        :return:
        """
        self.wrap("parameterList", self.__params)

    def __params(self):
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if self.peek_token(type_reg):
            self.__compile_type(False)
            self.eat(NAME_REG)
            if self.peek_token(","):
                self.eat(",")
                self.__params()

    def compile_var_dec(self):
        """
        Compiles a var declaration.
        :return:
        """
        self.wrap("varDec", self.__comp_var_dec)
        if self.peek_token("var"):
            self.compile_var_dec()

    def __comp_var_dec(self):
        self.eat("var")
        self.__compile_type(False)
        self.__single_var()
        self.eat(";")

    def compile_statements(self):
        """
        Compiles a sequence of statements, not including the enclosing {}
        :return:
        """
        def statement():
            """
            Determines the type of statement and compiles it. Calls itself
            afterwards to check for more statements.
            :return:
            """
            # statement_reg = "let|if|while|do|return"
            # if self.peek_token(statement_reg):
            if self.peek_token("let"):
                self.compile_let()
                statement()
            if self.peek_token("if"):
                self.compile_if()
                statement()
            if self.peek_token("while"):
                self.compile_while()
                statement()
            if self.peek_token("do"):
                self.compile_do()
                statement()
            if self.peek_token("return"):
                self.compile_return()
                statement()

        self.wrap("statements", statement)

    def compile_do(self):
        """
        Compiles a do statement
        :return:
        """
        def do():
            self.eat("do")
            self.__subroutine_call()
            self.eat(";")

        self.wrap("doStatement", do)

    def __comp_do(self):
        self.eat("do")
        self.__subroutine_call()
        self.eat(";")

    def compile_let(self):
        """
        Compiles a let statement
        :return:
        """
        self.wrap("letStatement", self.__comp_let)

    def __comp_let(self):
        self.eat("let")
        self.__compile_name()
        # Determine [expression]
        if self.peek_token(CompilationEngine._OPEN_BRACKET):
            self.eat(CompilationEngine._OPEN_BRACKET)
            self.compile_expression()
            self.eat(CompilationEngine._CLOSE_BRACKET)
        self.eat("=")
        self.compile_expression()
        self.eat(";")

    def compile_while(self):
        """
        Compiles a while statement.
        :return:
        """
        def comp_while():
            self.eat("while")
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_expression()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            self.eat("{")
            self.compile_statements()
            self.eat("}")

        self.wrap("whileStatement", comp_while)

    def compile_return(self):
        """
        Compiles a return statement.
        :return:
        """
        def comp_return():
            self.eat("return")
            # if next is expression:
            if self.is_term():
                self.compile_expression()
            self.eat(";")

        self.wrap("returnStatement", comp_return)

    def compile_if(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        :return:
        """
        def comp_if():
            self.eat("if")
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_expression()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            self.eat("{")
            # self.indent += 1
            self.compile_statements()
            self.eat("}")
            # Handle else:
            if self.peek_token("else"):
                self.eat("else")
                self.eat("{")
                self.compile_statements()
                self.eat("}")

        self.wrap("ifStatement", comp_if)

    def compile_expression(self):
        """
        Compiles an expression.
        :return:
        """
        def comp_expression():
            self.compile_term()
            # Case: term op term
            if self.peek_token(CompilationEngine._OPS):
                self.eat(CompilationEngine._OPS)
                self.compile_term()

        self.wrap("expression", comp_expression)

    def compile_term(self):
        """
        Compiles a term.
        :return:
        """
        def term():
            curr_type = self.peek_type()
            is_const = curr_type == STRING_CONST or \
                       curr_type == INT_CONST or \
                       curr_type == KEYWORD
            # Case: term is integerConstant or stringConstant or
            # keywordConstant
            if is_const:
                self.write(self.tokenizer.get_current_token().get_xml_wrap())
                self.__advance_token()
            # Case: token is a varName or a subroutineName
            elif curr_type == IDENTIFIER:
                # self.write(self.tokenizer.get_current_token().get_xml_wrap())
                # self.tokenizer.advance()
                # Case: varName [ expression ]
                if self.peek_next(CompilationEngine._OPEN_BRACKET):
                    self.__var_name_array()
                # Case: subroutineCall:
                elif self.peek_next(
                        CompilationEngine._OPEN_PARENTHESIS) or self.peek_next(
                            CompilationEngine._DOT):
                    self.__subroutine_call()
                else:
                    self.eat(NAME_REG)
            # Case: ( expression )
            elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS):
                self.eat(CompilationEngine._OPEN_PARENTHESIS)
                self.compile_expression()
                self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            # Case: unaryOp term
            elif self.peek_token("-|~"):
                self.eat("-|~")
                self.compile_term()
            else:
                print("Error: Incorrect Term")
                exit(-1)

        self.wrap("term", term)

    def __var_name_array(self):
        """
        Handles the case of varName[expression]
        :return:
        """
        self.eat(NAME_REG)
        self.eat(CompilationEngine._OPEN_BRACKET)
        self.compile_expression()
        self.eat(CompilationEngine._CLOSE_BRACKET)

    def is_term(self):
        curr_type = self.peek_type()
        return curr_type == STRING_CONST or curr_type == INT_CONST or \
               curr_type == KEYWORD or curr_type == IDENTIFIER or \
               self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \
               self.peek_token(CompilationEngine._OPS)

    def __subroutine_call(self):
        if self.curr_token.get_type() == IDENTIFIER:
            # self.write(self.curr_token.get_xml_wrap())
            # self.__advance_token()
            if self.peek_next(CompilationEngine._OPEN_PARENTHESIS):
                self.__subroutine_name()
            elif self.peek_next(CompilationEngine._DOT):
                self.__object_subroutine_call()
            else:
                print("Error: ( or . expected")
                exit(-1)

    def __object_subroutine_call(self):
        self.eat(NAME_REG)
        self.eat(CompilationEngine._DOT)
        self.__subroutine_name()

    def __subroutine_name(self):
        """
        Handles the case of subroutineName(expressionList)
        :return:
        """
        if self.curr_token.get_type() == IDENTIFIER:
            self.eat(NAME_REG)
            self.eat(CompilationEngine._OPEN_PARENTHESIS)
            self.compile_expression_list()
            self.eat(CompilationEngine._CLOSE_PARENTHESIS)

    def compile_expression_list(self):
        """
        Compiles a possibly empty list of comma separated expressions
        :return:
        """
        def exp_list():
            if self.is_term():
                self.compile_expression()
                while self.peek_token(","):
                    self.eat(",")
                    self.compile_expression()

        self.wrap("expressionList", exp_list)

    def wrap(self, section_name, func):
        """
        Wraps a program structure block with the section_name, and executes
        its function
        :param section_name: The name of the section
        :param func: The function to perform
        :return:
        """
        self.write("<{}>".format(section_name))
        self.indent += 2
        func()
        self.indent -= 2
        self.write("</{}>".format(section_name))

    def eat(self, token):
        """
        Handles advancing and writing terminal tokens.
        Will exit the program if an error occurs.
        :param token: The regex of the token to compare
        :return:
        """
        if re.match(token, self.curr_token.get_token()):
            self.write(self.curr_token.get_xml_wrap())
            self.__advance_token()
        else:
            # if self.tokenizer.get_current_token() != token:
            print("Error: Expected " + token)
            exit(-1)

    def peek_token(self, compare_next):
        """
        :param compare_next: The regex to compare.
        :return: True if the current token matches the regex, False otherwise.
        """
        if self.curr_token:
            return re.match(compare_next, self.curr_token.get_token())
        return False

    def peek_type(self):
        """
        :return: the type of the current token
        """
        return self.curr_token.get_type()

    def peek_next(self, comp):
        next_token = self.tokenizer.get_next_token()
        # Case: There actually is a next token
        if next_token:
            return re.match(comp, self.tokenizer.get_next_token().get_token())
        return False

    def __advance_token(self):
        self.tokenizer.advance()
        if self.tokenizer.has_more_tokens():
            self.curr_token = self.tokenizer.get_current_token()
Пример #11
0
class CompilationEngine:
    _OPEN_PARENTHESIS = "\("
    _CLOSE_PARENTHESIS = "\)"
    _OPEN_BRACKET = "\["
    _CLOSE_BRACKET = "\]"
    _DOT = "\."
    _OPS = "\+|-|\*|\/|&|\||<|>|="

    def __init__(self, in_address):
        self.tokenizer = Tokenizer(in_address)
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(in_address.replace(".jack", ".vm"))
        self.curr_token = self.tokenizer.get_current_token()
        self.out_address = in_address.replace(".jack", ".xml")
        self.output = ""
        self.indent = 0
        self.label_count = -1
        self.class_name = ""
        self.compile_class()

    def write_file(self):
        # with open(self.out_address, 'w') as f:
        #     f.write(self.output)
        self.vm_writer.write_file()

    def write(self, to_write):
        """
        Writes to the output, with indentation.
        :param to_write: The string to write
        """
        self.output += (self.indent * " ") + to_write + "\n"

    # ========== Compilation Methods ========== #

    def compile_class(self):
        """
        Compiles a complete class.
        """
        def comp_class():
            self.eat("class")
            self.class_name = self.eat(NAME_REG)
            self.eat("{")
            self.compile_class_var_dec()
            self.compile_subroutine()
            self.eat("}")

        self.wrap("class", comp_class)

    def compile_class_var_dec(self):
        """
        Compiles a static or field declaration.
        :return:
        """
        var_type_reg = "static|field"
        if self.peek_token(var_type_reg):
            self.wrap("classVarDec", self.__class_var_dec)
            self.compile_class_var_dec()

    def compile_subroutine(self):
        """
        Compiles a complete method, function or constructor.
        :return:
        """
        sub_regex = "(constructor|function|method)"
        self.symbol_table.start_subroutine()
        kind = self.eat(sub_regex)
        self.__compile_type(True)
        # subroutine name
        name = self.__compile_name()
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        self.compile_parameter_list(kind)
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        if self.peek_token("var"):
            self.compile_var_dec()
        num_locals = self.symbol_table.var_count("local")
        self.vm_writer.write_function("{}.{}".format(self.class_name, name),
                                      num_locals)
        self.__set_pointer(kind)
        self.compile_statements()
        self.eat("}")

        # def subroutine_dec():
        #     kind = self.eat(sub_regex)
        #     self.__compile_type(True)
        #     # subroutine name
        #     name = self.__compile_name()
        #     self.eat(CompilationEngine._OPEN_PARENTHESIS)
        #     self.compile_parameter_list(kind)
        #     self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        #     subroutine_body(name)
        #     # self.wrap("subroutineBody", subroutine_body)
        #
        # def subroutine_body(name):
        #     self.eat("{")
        #     num_locals = 0
        #     if self.peek_token("var"):
        #         num_locals = self.compile_var_dec()
        #     self.vm_writer.write_function("{}.{}".format(self.class_name,
        #                                                  name), num_locals)
        #
        #     self.compile_statements()
        #     # if sub_type == "void":
        #     #     self.vm_writer.write_push("constant", 0)
        #     self.eat("}")
        # Handle next subroutine if there is one
        if self.peek_token(sub_regex):
            self.compile_subroutine()

    def compile_parameter_list(self, kind):
        """
        Compiles a possibly empty parameter list, not including the
        enclosing ()
        :return:
        """
        if kind == "method":
            self.symbol_table.define("this", self.class_name, "argument")
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        while self.peek_token(type_reg):
            self.__params()

    def compile_var_dec(self):
        """
        Compiles a var declaration.
        :return:
        """
        # self.wrap("varDec", self.__comp_var_dec)
        self.eat("var")
        var_type = self.__compile_type(False)
        self.__var_declare(var_type, "var")
        self.eat(";")
        if self.peek_token("var"):
            self.compile_var_dec()

    def compile_statements(self):
        """
        Compiles a sequence of statements, not including the enclosing {}
        :return:
        """
        statement_reg = "let|if|while|do|return"
        if self.peek_token(statement_reg):
            if self.peek_token("let"):
                self.compile_let()
            elif self.peek_token("if"):
                self.compile_if()
            elif self.peek_token("while"):
                self.compile_while()
            elif self.peek_token("do"):
                self.compile_do()
            elif self.peek_token("return"):
                self.compile_return()
            self.compile_statements()

    def compile_do(self):
        """
        Compiles a do statement
        """
        self.eat("do")
        self.__subroutine_call()
        # Since we don't use the return value, we pop it to temp
        self.vm_writer.write_pop("temp", 0)
        self.eat(";")

    def compile_let(self):
        """
        Compiles a let statement
        """
        self.eat("let")
        name = self.__compile_name()
        is_array = False
        # Determine [expression]
        if self.peek_token(CompilationEngine._OPEN_BRACKET):
            is_array = True
            self.__handle_array(name)
        self.eat("=")
        self.compile_expression()
        # Pop the value to the spot in the memory
        if is_array:
            self.vm_writer.write_pop("temp", 0)
            self.vm_writer.write_pop("pointer", 1)
            self.vm_writer.write_push("temp", 0)
            self.vm_writer.write_pop("that", 0)
        else:
            self.__write_pop(name)
        self.eat(";")

    def compile_while(self):
        """
        Compiles a while statement.
        :return:
        """
        self.eat("while")
        loop_label = self.__get_label("WHILE_START")
        exit_label = self.__get_label("WHILE_END")
        self.vm_writer.write_label(loop_label)
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        # Compute ~condition
        self.compile_expression()
        self.vm_writer.write_arithmetic("~")
        # if ~condition exit loop
        self.vm_writer.write_if(exit_label)
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        self.compile_statements()
        self.vm_writer.write_goto(loop_label)
        self.vm_writer.write_label(exit_label)
        self.eat("}")

    def compile_return(self):
        """
        Compiles a return statement.
        """
        self.eat("return")
        # if next is expression:
        if self.__is_term():
            self.compile_expression()
        else:
            # Void function - push 0
            self.vm_writer.write_push(CONSTANT, 0)
        self.vm_writer.write_return()
        self.eat(";")

    def compile_if(self):
        """
        Compiles an if statement, possibly with a trailing else clause.
        :return:
        """
        self.eat("if")
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        # ~cond
        self.compile_expression()
        # self.vm_writer.write_arithmetic("~")
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.eat("{")
        if_true = self.__get_label("IF_TRUE")
        self.vm_writer.write_if(if_true)
        if_false = self.__get_label("IF_FALSE")
        self.vm_writer.write_goto(if_false)
        self.vm_writer.write_label(if_true)
        self.compile_statements()
        self.eat("}")
        # Handle else:
        if self.peek_token("else"):
            if_end = self.__get_label("IF_END")
            self.vm_writer.write_goto(if_end)
            self.vm_writer.write_label(if_false)
            self.eat("else")
            self.eat("{")
            self.compile_statements()
            self.eat("}")
            self.vm_writer.write_label(if_end)
        else:
            self.vm_writer.write_label(if_false)

    def compile_expression(self):
        """
        Compiles an expression.
        :return:
        """
        def comp_expression():
            self.compile_term()
            # Case: term op term
            if self.peek_token(CompilationEngine._OPS):
                operation = self.eat(CompilationEngine._OPS)
                self.compile_term()
                self.vm_writer.write_arithmetic(operation)

        self.wrap("expression", comp_expression)

    def compile_term(self):
        """
        Compiles a term.
        :return:
        """
        def term():
            curr_type = self.peek_type()
            val = self.curr_token.get_token()
            # Handle integer constant
            if curr_type == INT_CONST:
                self.vm_writer.write_push(CONSTANT, int(val))
                self.__advance_token()
            # Handle String constant
            elif curr_type == STRING_CONST:
                self.__handle_string_constant(val)
                self.__advance_token()
            # Handle Keyword constant
            elif curr_type == KEYWORD:
                self.__handle_keyword_constant(val)
                self.__advance_token()
            # Case: token is a varName or a subroutineName
            elif curr_type == IDENTIFIER:
                self.__handle_identifier()
            # Case: ( expression )
            elif self.peek_token(CompilationEngine._OPEN_PARENTHESIS):
                self.eat(CompilationEngine._OPEN_PARENTHESIS)
                self.compile_expression()
                self.eat(CompilationEngine._CLOSE_PARENTHESIS)
            # Case: unaryOp term
            elif self.peek_token("-|~"):
                self.__handle_unary_op()
            else:
                print("Error: Incorrect Term")
                exit(-1)

        term()
        # self.wrap("term", term)

    def compile_expression_list(self):
        """
        Compiles a possibly empty list of comma separated expressions
        :return:
        """
        def exp_list():
            count = 0
            if self.__is_term():
                self.compile_expression()
                count += 1
                while self.peek_token(","):
                    self.eat(",")
                    self.compile_expression()
                    count += 1
            return count

        return exp_list()
        # self.wrap("expressionList", exp_list)

    # ========== Compilation Helper ========== #

    def __class_var_dec(self):
        """
        Compiles a single class var declaration.
        """
        var_type_reg = "static|field"
        # (static|field)
        kind = self.eat(var_type_reg)
        # type
        var_type = self.__compile_type(False)
        # Compile varName combo until no more ","
        self.__var_declare(var_type, kind)
        self.eat(";")

    def __var_declare(self, var_type, kind):
        name = self.eat(NAME_REG)
        self.symbol_table.define(name, var_type, kind)
        if self.peek_token(","):
            self.eat(",")
            self.__var_declare(var_type, kind)

    def __compile_type(self, for_function):
        """
        Compiles a type for a function or variable, determined by
        a received boolean value.
        :param for_function: True if is type of function, false otherwise.
        :return:
        """
        type_reg = r"int|char|boolean|[A-Za-z_]\w*"
        if for_function:
            type_reg += "|void"
        return self.eat(type_reg)

    def __set_pointer(self, kind):
        if kind == "method":
            self.vm_writer.write_push("argument", 0)
            self.vm_writer.write_pop("pointer", 0)
        elif kind == "constructor":
            self.__handle_constructor()

    def __handle_constructor(self):
        # Allocate memory for the new object
        var_num = self.symbol_table.var_count("this")
        self.vm_writer.write_push(CONSTANT, var_num)
        self.vm_writer.write_call("Memory.alloc", 1)
        # Set the new memory spot to this
        self.vm_writer.write_pop("pointer", 0)

    def __compile_name(self):
        if self.peek_type() == IDENTIFIER:
            return self.eat(NAME_REG)
        else:
            print("ERROR: Identifier Expected")
            exit(-1)

    def __params(self):
        var_type = self.__compile_type(False)
        name = self.eat(NAME_REG)
        self.symbol_table.define(name, var_type, "argument")
        if self.peek_token(","):
            self.eat(",")

    def __handle_unary_op(self):
        command = self.eat("-|~")
        self.compile_term()
        if command == "-":
            self.vm_writer.write_arithmetic("neg")
        else:
            self.vm_writer.write_arithmetic(command)

    def __handle_identifier(self):
        """
        Handles the case of an identifier given as a term
        """
        # Case: varName [ expression ]
        if self.peek_next(CompilationEngine._OPEN_BRACKET):
            name = self.__compile_name()
            self.__handle_array(name)
            self.vm_writer.write_pop("pointer", 1)
            self.vm_writer.write_push("that", 0)
            # self.__var_name_array()
        # Case: subroutineCall:
        elif self.peek_next(CompilationEngine._OPEN_PARENTHESIS) or \
                self.peek_next(CompilationEngine._DOT):
            self.__subroutine_call()
        else:
            name = self.eat(NAME_REG)
            self.__write_push(name)

    def __handle_string_constant(self, string):
        """
        Handles the case of a string constant in a term
        :param string: the constant
        """
        self.vm_writer.write_push(CONSTANT, len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push(CONSTANT, ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def __handle_keyword_constant(self, word):
        """
        Handles the case of a keyword constant given in a term.
        If the word is not valid the program prints a relevant message and
        exits.
        :param word: The keyword
        """
        if word == "this":
            self.vm_writer.write_push("pointer", 0)
        else:
            self.vm_writer.write_push(CONSTANT, 0)
            if word == "true":
                self.vm_writer.write_arithmetic("~")

    def __is_term(self):
        curr_type = self.peek_type()
        return curr_type == STRING_CONST or curr_type == INT_CONST or \
               curr_type == KEYWORD or curr_type == IDENTIFIER or \
               self.peek_token(CompilationEngine._OPEN_PARENTHESIS) or \
               self.peek_token(CompilationEngine._OPS)

    def __subroutine_call(self):
        if self.curr_token.get_type() == IDENTIFIER:
            if self.peek_next(CompilationEngine._OPEN_PARENTHESIS):
                self.vm_writer.write_push("pointer", 0)
                self.__subroutine_name(self.class_name, 1)
            elif self.peek_next(CompilationEngine._DOT):
                self.__object_subroutine_call()
            else:
                print("Error: ( or . expected")
                exit(-1)

    def __object_subroutine_call(self):
        name = self.eat(NAME_REG)

        n_args = 0
        # Push the object reference to the stack
        if self.symbol_table.kind_of(name):
            self.__write_push(name)
            name = self.symbol_table.type_of(name)
            n_args = 1
        self.eat(CompilationEngine._DOT)
        self.__subroutine_name(name, n_args)

    def __subroutine_name(self, type_name, n_args):
        """
        Handles the case of subroutineName(expressionList)
        :return:
        """
        name = self.eat(NAME_REG)
        self.eat(CompilationEngine._OPEN_PARENTHESIS)
        nargs = self.compile_expression_list()
        self.eat(CompilationEngine._CLOSE_PARENTHESIS)
        self.vm_writer.write_call("{}.{}".format(type_name, name),
                                  nargs + n_args)

    def __handle_array(self, name):
        self.eat(CompilationEngine._OPEN_BRACKET)
        self.compile_expression()
        self.eat(CompilationEngine._CLOSE_BRACKET)
        self.__write_push(name)
        self.vm_writer.write_arithmetic("+")

    # ========== XML Handling ========== #

    def wrap(self, section_name, func):
        """
        Wraps a program structure block with the section_name, and executes
        its function
        :param section_name: The name of the section
        :param func: The function to perform
        :return:
        """
        self.write("<{}>".format(section_name))
        self.indent += 2
        func()
        self.indent -= 2
        self.write("</{}>".format(section_name))

    # ========== Token Handling ========== #

    def eat(self, token):
        """
        Handles advancing and writing terminal tokens.
        Will exit the program if an error occurs.
        :param token: The regex of the token to compare
        :return:
        """
        ctoken = self.curr_token.get_token()
        if re.match(token, self.curr_token.get_token()):
            # self.write(self.curr_token.get_xml_wrap())
            self.__advance_token()
            return ctoken
            # else:
            #     # if self.tokenizer.get_current_token() != token:
            #     print("Error: Expected " + token)
            #     exit(-1)

    def peek_token(self, compare_next):
        """
        :param compare_next: The regex to compare.
        :return: True if the current token matches the regex, False otherwise.
        """
        if self.curr_token:
            return re.match(compare_next, self.curr_token.get_token())
        return False

    def peek_type(self):
        """
        :return: the type of the current token
        """
        return self.curr_token.get_type()

    def peek_next(self, comp):
        next_token = self.tokenizer.get_next_token()
        # Case: There actually is a next token
        if next_token:
            return re.match(comp, self.tokenizer.get_next_token().get_token())
        return False

    def __advance_token(self):
        self.tokenizer.advance()
        if self.tokenizer.has_more_tokens():
            self.curr_token = self.tokenizer.get_current_token()

    # ========== VM Helper ========== #

    def __get_label(self, label):
        self.label_count += 1
        return "{}{}".format(label, str(self.label_count))

    def __write_pop(self, name):
        self.vm_writer.write_pop(self.symbol_table.kind_of(name),
                                 self.symbol_table.index_of(name))

    def __write_push(self, name):
        self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                  self.symbol_table.index_of(name))
Пример #12
0
# Developed for project 10 of nand2tetris course
from JackParser import Parser
from JackTokenizer import Tokenizer

from pathlib import Path
import sys

if __name__ == '__main__':
    input_arg = sys.argv[1]

    input_path = Path(input_arg).resolve()

    files = input_path.glob('*.jack')
    for file in files:
        print(file)
        output_path = str(file.with_name(file.stem + '.xml'))

        tokenizer = Tokenizer(input_file=str(file)).tokenize()

        parser = Parser(tokenizer)
        parser.parse_to_file(output_path)
Пример #13
0
class CompilationEngine:
    def __init__(self):
        self.local_symbol_table = None
        self.class_symbol_tables = {}
        self.type_size_map = {"int": 1, "bool": 1, "char": 1}
        self.unique_label_index = 0

    def SetClass(self, input_path, output_path):
        self.tokenizer = Tokenizer(input_path)
        self.output_file = open("{0}.xml".format(output_path), 'w')
        self.code_file = open(output_path, 'w')
        self.tokenizer.advance()
        self.indent_level = 0

        self.current_class_name = None
        self.current_sub_name = None

    def CompileClass(self):
        """
        Compiles a complete class.
        """
        self.EnterScope("class")

        self.ConsumeKeyword([Keyword.CLASS])
        self.ConsumeDeclaration("class", None)

        self.class_symbol_tables[self.current_class_name] = SymbolTable()

        self.ConsumeSymbol('{')

        totalSize = 0
        while (self.IsKeyword([Keyword.STATIC, Keyword.FIELD])):
            totalSize += self.CompileClassVarDec()

        self.type_size_map[self.current_class_name] = totalSize

        # subroutineDec*
        while (self.IsKeyword(subroutine_types)):
            self.CompileSubroutine()

        self.ConsumeSymbol('}')

        self.ExitScope("class")
        self.output_file.close()

    def CompileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        """
        self.EnterScope("classVarDec")
        amount = 0
        category = self.tokenizer.keyword()
        self.ConsumeKeyword([Keyword.STATIC, Keyword.FIELD])
        varType = self.ConsumeType()
        self.ConsumeDeclaration(category, varType)
        amount += 1
        while (self.IsSymbol([','])):
            self.ConsumeSymbol(',')
            self.ConsumeDeclaration(category, varType)
            amount += 1

        self.ConsumeSymbol(';')

        self.ExitScope("classVarDec")

        return amount

    def CompileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        """
        self.EnterScope("subroutineDec")
        self.local_symbol_table = SymbolTable()

        subType = self.tokenizer.keyword()
        self.ConsumeKeyword([Keyword.CONSTRUCTOR, Keyword.FUNCTION,
                             Keyword.METHOD])
        if (self.IsKeyword([Keyword.VOID])):
            self.ConsumeKeyword([Keyword.VOID])
        else:
            self.ConsumeType()

        # The first param is converted to internal rep. the second is preserved
        self.ConsumeDeclaration(subType, subType)

        if subType == "method":
            self.local_symbol_table.indexList[Categories.ARGUMENT[0]] += 1

        self.ConsumeSymbol('(')
        self.CompileParameterList()
        self.ConsumeSymbol(')')

        self.CompileSubroutineBody()

        self.ExitScope("subroutineDec")

    def CompileSubroutineBody(self):
        self.EnterScope("subroutineBody")

        nVars = 0
        self.ConsumeSymbol('{')
        while (self.IsKeyword([Keyword.VAR])):
            nVars += self.CompileVarDec()

        self.WriteCode("function {0}.{1} {2}".format(self.current_class_name,
                                                     self.current_sub_name,
                                                     str(nVars)))

        entry = self.SymbolTableLookup(self.current_sub_name)

        if entry.type == "constructor":
            self.WriteCode("push constant {0}".
                           format(self.type_size_map[self.current_class_name]))
            self.WriteCode("call Memory.alloc 1")
            self.WriteCode("pop pointer 0")
        elif entry.type == "method":
            self.WriteCode("push argument 0")
            self.WriteCode("pop pointer 0")

        self.CompileStatements()
        self.ConsumeSymbol('}')

        self.ExitScope("subroutineBody")

    def ConsumeDeclaration(self, category, entry_type):
        entry = SymbolTableEntry()
        entry.SetCategory(category)
        entry.name = self.ConsumeIdentifier()
        entry.type = entry_type

        local_categories = [Categories.VAR, Categories.ARGUMENT]
        class_categories = [Categories.SUBROUTINE, Categories.FIELD,
                            Categories.STATIC]

        # Updating current class / subroutine names
        if entry.category == Categories.CLASS:
            self.current_class_name = entry.name
        elif entry.category == Categories.SUBROUTINE:
            self.current_sub_name = entry.name

        # Updating local / class symbol tables
        if entry.category in local_categories:
            self.local_symbol_table.InsertEntry(entry)
        elif entry.category in class_categories:
            self.class_symbol_tables[self.current_class_name].\
                InsertEntry(entry)

    def CompileParameterList(self):
        """
        Compiles a (possibly empty) parameter list,
        not including the enclosing "()".
        """
        self.EnterScope("parameterList")
        nVars = 0

        if (not self.IsSymbol([')'])):
            varType = self.ConsumeType()
            self.ConsumeDeclaration("argument", varType)
            nVars += 1

        while(self.IsSymbol([','])):
            self.ConsumeSymbol(',')
            varType = self.ConsumeType()
            self.ConsumeDeclaration("argument", varType)
            nVars += 1

        self.ExitScope("parameterList")

        return nVars

    def CompileVarDec(self):
        """
        Compiles a var declaration.
        """
        self.EnterScope("varDec")
        nVars = 0
        self.ConsumeKeyword([Keyword.VAR])
        varType = self.ConsumeType()
        self.ConsumeDeclaration("var", varType)
        nVars += 1
        while (self.IsSymbol([','])):
            self.ConsumeSymbol(',')
            self.ConsumeDeclaration("var", varType)
            nVars += 1

        self.ConsumeSymbol(';')

        self.ExitScope("varDec")

        return nVars

    def CompileStatements(self):
        """
        Compiles a sequence of statements, not including the
        enclosing "{}".
        """
        self.EnterScope("statements")

        while self.IsKeyword([Keyword.LET, Keyword.IF, Keyword.WHILE,
                              Keyword.DO, Keyword.RETURN]):
            if self.IsKeyword([Keyword.LET]):
                self.CompileLet()

            if self.IsKeyword([Keyword.IF]):
                self.CompileIf()

            if self.IsKeyword([Keyword.WHILE]):
                self.CompileWhile()

            if self.IsKeyword([Keyword.DO]):
                self.CompileDo()

            if self.IsKeyword([Keyword.RETURN]):
                self.CompileReturn()

        self.ExitScope("statements")

    def CompileDo(self):
        """
        Compiles a do statement.
        """
        self.EnterScope("doStatement")
        self.ConsumeKeyword([Keyword.DO])
        prefix = self.ConsumeIdentifier()
        calleeLocation = None
        subName = None

        if self.IsSymbol(['.']):
            self.ConsumeSymbol('.')
            entry = self.SymbolTableLookup(prefix)
            if entry is not None and entry.category != Categories.CLASS:
                calleeLocation = "{0} {1}".format(entry.segment, entry.index)
                prefix = entry.type
            postfix = self.ConsumeIdentifier()
            subName = "{0}.{1}".format(prefix, postfix)
        else:
            subName = "{0}.{1}".format(self.current_class_name, prefix)
            calleeLocation = "pointer 0"

        nArgs = 0
        # This means we are calling an instance method, so we push it first
        if calleeLocation is not None:
            self.WriteCode("push {0} //Pushing callee".format(calleeLocation))
            nArgs += 1

        self.ConsumeSymbol('(')
        nArgs += self.CompileExpressionList()
        self.ConsumeSymbol(')')
        self.ConsumeSymbol(';')

        self.WriteCode("call {0} {1}".format(subName, nArgs))

        # Get rid of the return value (garbage)
        self.WriteCode("pop temp 0")

        self.ExitScope("doStatement")

    def CompileLet(self):
        """
        Compiles a let statement.
        """
        self.EnterScope("letStatement")

        self.ConsumeKeyword([Keyword.LET])
        varName = self.ConsumeIdentifier()
        entry = self.SymbolTableLookup(varName)
        isArray = False
        if self.IsSymbol(['[']):
            isArray = True
            self.ConsumeSymbol('[')
            self.CompileExpression()
            self.WriteCode("push {0} {1}".
                           format(entry.segment, entry.index))  # array base
            self.WriteCode("add")  # Add offset
            self.ConsumeSymbol(']')
        self.ConsumeSymbol('=')
        self.CompileExpression()
        self.ConsumeSymbol(';')

        if isArray:
            self.WriteCode("pop temp 0")  # Save the expression result
            self.WriteCode("pop pointer 1")  # Align THAT
            self.WriteCode("push temp 0")  # Push the exp result
            # Put the exp result in the array position
            self.WriteCode("pop that 0")
        else:
            self.WriteCode("pop {0} {1}".format(entry.segment, entry.index))

        self.ExitScope("letStatement")

    def CompileWhile(self):
        """
        Compiles a while statement.
        """
        self.EnterScope("whileStatement")

        self.ConsumeKeyword([Keyword.WHILE])
        L1 = self.GenerateUniqueLabel()
        L2 = self.GenerateUniqueLabel()

        # While entry point
        self.WriteCode("label {0}".format(L1))

        # while loop condition
        self.ConsumeSymbol('(')
        self.CompileExpression()
        self.ConsumeSymbol(')')

        # Jump to L2 if condition doesn't hold
        self.WriteCode("not")
        self.WriteCode("if-goto {0}".format(L2))

        # While loop logic
        self.ConsumeSymbol('{')
        self.CompileStatements()
        self.ConsumeSymbol('}')

        # Go back to L1 for another iteration
        self.WriteCode("goto {0}".format(L1))

        # While termination point
        self.WriteCode("label {0}".format(L2))

        self.ExitScope("whileStatement")

    def CompileReturn(self):
        """
        Compiles a return statement.
        """
        self.EnterScope("returnStatement")

        self.ConsumeKeyword([Keyword.RETURN])
        if not self.IsSymbol([';']):
            self.CompileExpression()
        else:
            self.WriteCode("push constant 0")
        self.ConsumeSymbol(';')

        self.WriteCode("return")

        self.ExitScope("returnStatement")

    def CompileIf(self):
        """
        Compiles an if statement, possibly with a trailing
        else clause.
        """
        self.EnterScope("ifStatement")

        self.ConsumeKeyword([Keyword.IF])
        IF_TRUE = self.GenerateUniqueLabel()
        IF_FALSE = self.GenerateUniqueLabel()
        IF_END = self.GenerateUniqueLabel()

        # The if statement condition
        self.ConsumeSymbol('(')
        self.CompileExpression()
        self.ConsumeSymbol(')')

        # Jump to L1 if condition doesn't hold
        self.WriteCode("if-goto {0}".format(IF_TRUE))
        self.WriteCode("goto {0}".format(IF_FALSE))
        self.WriteCode("label {0}".format(IF_TRUE))

        self.ConsumeSymbol('{')
        self.CompileStatements()
        self.ConsumeSymbol('}')

        self.WriteCode("goto {0}".format(IF_END))
        self.WriteCode("label {0}".format(IF_FALSE))
        if self.IsKeyword([Keyword.ELSE]):
            self.ConsumeKeyword([Keyword.ELSE])
            self.ConsumeSymbol('{')
            self.CompileStatements()
            self.ConsumeSymbol('}')

        self.WriteCode("label {0}".format(IF_END))
        self.ExitScope("ifStatement")

    def CompileExpression(self):
        """
        Compiles an expression.
        """
        self.EnterScope("expression")

        self.CompileTerm()
        while (self.IsSymbol(op_symbols.keys())):
            op = self.ConsumeSymbol(self.tokenizer.symbol())
            self.CompileTerm()
            self.WriteCode(op_symbols[op])

        self.ExitScope("expression")

    def CompileTerm(self):
        """
        Compiles a term.
        """
        self.EnterScope("term")

        keyword_constants = [Keyword.TRUE, Keyword.FALSE, Keyword.NULL,
                             Keyword.THIS]
        termName = None

        if self.IsType(TokenType.INT_CONST):
            self.WriteCode("push constant {0}".
                           format(self.ConsumeIntegerConstant()))

        elif self.IsType(TokenType.STRING_CONST):
            self.ConsumeStringConstant()

        elif self.IsKeyword(keyword_constants):
                keyword = self.ConsumeKeyword(keyword_constants)
                if keyword == "false":
                    self.WriteCode("push constant 0")
                elif keyword == "true":
                    self.WriteCode("push constant 0")
                    self.WriteCode("not")
                elif keyword == "this":
                    self.WriteCode("push pointer 0")
                elif keyword == "null":
                    self.WriteCode("push constant 0")

        elif self.IsSymbol(['(']):
            self.ConsumeSymbol('(')
            self.CompileExpression()
            self.ConsumeSymbol(')')

        elif self.IsSymbol(unary_symbols.keys()):
            symbol = self.ConsumeSymbol(self.tokenizer.symbol())
            self.CompileTerm()
            self.WriteCode(unary_symbols[symbol])
        else:
            termName = self.ConsumeIdentifier()
            entry = self.SymbolTableLookup(termName)
            if entry is not None:
                if CategoryUtils.IsIndexed(entry.category):
                    self.WriteCode("push {0} {1} //{2}".
                                   format(CategoryUtils.
                                          GetSegment(entry.category),
                                          entry.index, termName))

            if self.IsSymbol(['[']):  # varName '[' expression ']'
                self.ConsumeSymbol('[')
                self.CompileExpression()
                self.WriteCode("add")
                self.WriteCode("pop pointer 1")
                self.WriteCode("push that 0")
                self.ConsumeSymbol(']')
            elif self.IsSymbol(['(']):  # subroutineCall
                self.ConsumeSymbol('(')
                self.WriteCode("call {0} {1}".
                               format(termName, self.CompileExpressionList()))
                self.ConsumeSymbol(')')
            elif self.IsSymbol(['.']):
                self.ConsumeSymbol('.')
                funcName = self.ConsumeIdentifier()
                entry = self.GetSubroutineEntry(termName, funcName)
                extraParam = 0
                if entry is not None and entry.type == "method":
                    termName = self.SymbolTableLookup(termName).type
                    extraParam = 1

                self.ConsumeSymbol('(')
                self.WriteCode("call {0}.{1} {2}".
                               format(termName, funcName,
                                      self.CompileExpressionList() +
                                      extraParam))
                self.ConsumeSymbol(')')

        self.ExitScope("term")

        return termName

    def GetSubroutineEntry(self, prefix, postfix):
        entry = self.SymbolTableLookup(postfix)
        if entry is not None:
            return entry

        varEntry = self.SymbolTableLookup(prefix)
        if varEntry is not None:
            return self.ClassSymbolTableLookup(postfix, varEntry.type)

        return None

    def CompileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated
        list of expressions.
        """
        self.EnterScope("expressionList")
        nArgs = 0
        if not self.IsSymbol(')'):
            self.CompileExpression()
            nArgs += 1

        while self.IsSymbol([',']):
            self.ConsumeSymbol(',')
            self.CompileExpression()
            nArgs += 1

        self.ExitScope("expressionList")

        return nArgs

    def IsKeyword(self, keyword_list):
        return (self.IsType(TokenType.KEYWORD) and
                self.tokenizer.keyword() in keyword_list)

    def IsSymbol(self, symbol_list):
        return (self.IsType(TokenType.SYMBOL) and
                self.tokenizer.symbol() in symbol_list)

    def IsType(self, tokenType):
        return self.tokenizer.tokenType() == tokenType

    def ConsumeType(self):
        if (self.tokenizer.tokenType() == TokenType.IDENTIFIER):
            return self.ConsumeIdentifier()
        else:
            return self.ConsumeKeyword([Keyword.INT, Keyword.CHAR,
                                        Keyword.BOOLEAN])

    def ConsumeKeyword(self, keywordList):
        self.VerifyTokenType(TokenType.KEYWORD)
        actual = self.tokenizer.keyword()
        if actual not in keywordList:
            raise Exception("Expected keywords: {}, Actual: {}".
                            format(keywordList, actual))

        self.OutputTag("keyword", actual)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        return actual

    def ConsumeSymbol(self, symbol):
        self.VerifyTokenType(TokenType.SYMBOL)
        actual = self.tokenizer.symbol()
        if actual != symbol:
            raise Exception("Expected symbol: {}, Actual: {}".
                            format(symbol, actual))
        self.OutputTag("symbol", actual)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        return actual

    def ConsumeIntegerConstant(self):
        self.VerifyTokenType(TokenType.INT_CONST)
        actual = self.tokenizer.intVal()
        self.OutputTag("integerConstant", self.tokenizer.intVal())
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        return actual

    def ConsumeStringConstant(self):
        self.VerifyTokenType(TokenType.STRING_CONST)
        actual = self.tokenizer.stringVal()
        self.WriteCode("push constant {0}".format(len(actual)))
        self.WriteCode("call String.new 1")
        for c in actual:
            self.WriteCode("push constant {0}".format(ord(c)))
            self.WriteCode("call String.appendChar 2")
        self.OutputTag("stringConstant", self.tokenizer.stringVal())
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        return actual

    def ConsumeIdentifier(self):
        self.VerifyTokenType(TokenType.IDENTIFIER)
        actual = self.tokenizer.identifier()
        self.OutputTag("identifierName", self.tokenizer.identifier())
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

        return actual

    def VerifyTokenType(self, tokenType):
        actual = self.tokenizer.tokenType()
        if actual != tokenType:
            raise Exception("Expected token type: {}, Actual: {}".
                            format(tokenType, actual))

    def EnterScope(self, name):
        self.Output("<{}>".format(name))
        self.indent_level += 1

    def ExitScope(self, name):
        self.indent_level -= 1
        self.Output("</{}>".format(name))

    def ClassSymbolTableLookup(self, name, containingClass):
        return self.class_symbol_tables[containingClass].GetEntry(name)

    def SymbolTableLookup(self, name):
        entry = self.local_symbol_table.GetEntry(name)
        if entry is not None:
            return entry
        else:
            return self.ClassSymbolTableLookup(name, self.current_class_name)

    def WriteCode(self, line):
        self.code_file.write(line + '\n')

    def OutputTag(self, tag, value):
        self.Output("<{}> {} </{}>".format(tag, value, tag))

    def Output(self, text):
        self.output_file.write(("  " * self.indent_level) + text + '\n')

    def GenerateUniqueLabel(self):
        self.unique_label_index += 1
        return "pfl{0}".format(self.unique_label_index - 1)
Пример #14
0
class CompilationEngine:
    def __init__(self, inpath, outpath):
        self.tokenizer = Tokenizer(inpath)
        self.symboltable = SymbolTable()
        self.vmwriter = VMWriter(outpath)
        self._class_name = None
        if self.tokenizer.has_more_tokens():
            self.compile_class()
        self.vmwriter.close()
        print("{0} completed.".format(outpath))

    def _subroutine_init(self):
        self._sub_kind = None
        self._sub_name = None
        self._ret_type = None

    def _advance(self):
        self._check_EOF()
        self.tokenizer.advance()

    @property
    def _current_token(self):
        t_type = self.tokenizer.token_type
        return (self.tokenizer.keyword if t_type == T_KEYWORD else
                self.tokenizer.symbol if t_type == T_SYMBOL else
                self.tokenizer.identifier if t_type == T_ID else self.tokenizer
                .intval if t_type == T_INTEGER else self.tokenizer.stringval)

    @property
    def _current_tok_type(self):
        return self.tokenizer.token_type

    @property
    def _current_tok_tag(self):
        return token_tags[self._current_tok_type]

    @property
    def _next_token(self):
        """return raw next_token in the tokenizer"""
        return str(self.tokenizer.next_token)

    def _require_token(self, tok_type, token=None):
        """Check whether the next_token(terminal) in the tokenizer meets the 
        requirement (specific token or just token type). If meets, tokenizer
        advances (update current_token and next_token)  and terminal will be 
        writed into outfile; If not, report an error."""
        self._advance()
        if token and self._current_token != token:
            return self._error(expect_toks=(token, ))
        elif self._current_tok_type != tok_type:
            return self._error(expect_types=(tok_type, ))

    def _require_id(self):
        self._require_token(T_ID)

    def _require_kw(self, token):
        return self._require_token(T_KEYWORD, token=token)

    def _require_sym(self, token):
        return self._require_token(T_SYMBOL, token=token)

    def _require_brackets(self, brackets, procedure):
        front, back = brackets
        self._require_sym(front)
        procedure()
        self._require_sym(back)

    def _fol_by_class_vardec(self):
        return self._next_token in (KW_STATIC, KW_FIELD)

    def _fol_by_subroutine(self):
        return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _fol_by_vardec(self):
        return self._next_token == KW_VAR

    #########################
    # structure compilation #
    #########################

    def compile_class_name(self):
        self._require_id()
        self._class_name = self._current_token

    def compile_subroutine_name(self):
        self._require_id()
        self._sub_name = self._current_token

    def compile_var_name(self, kind=None, type=None, declare=False):
        self._require_id()
        name = self._current_token
        if declare is True:  # kind and type are not None
            self.symboltable.define(name, type, kind)
        else:
            self.check_var_name(name, type)

    def check_var_name(self, name, type=None):
        recorded_kind = self.symboltable.kindof(name)
        if recorded_kind is None:
            self._traceback('name used before declared: {0}'.format(name))
        elif type is not None:
            recorded_type = self.symboltable.typeof(name)
            if recorded_type != type:
                get = '{0} "{1}"'.format(recorded_type, name)
                self._error(expect_types=(type, ), get=get)

    def compile_type(self, advanced=False, expect='type'):
        # int, string, boolean or identifier(className)
        if advanced is False:
            self._advance()
        if (self._current_token not in SymbolTable.builtIn_types
                and self._current_tok_type != T_ID):
            return self._error(expect=expect)

    def compile_return_type(self):
        # void or type
        self._advance()
        if self._current_token != KW_VOID:
            self.compile_type(True, '"void" or type')
        self._ret_type = self._current_token
        if self._sub_kind == KW_CONSTRUCTOR and self._ret_type != self._class_name:
            me = 'constructor expect current class as return type'
            self._traceback(me)

    @record_non_terminal('class')
    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self._require_kw(KW_CLASS)
        self.compile_class_name()
        self._require_sym('{')
        while self._fol_by_class_vardec():
            self.compile_class_vardec()
        while self._fol_by_subroutine():
            self.compile_subroutine()
        self._advance()
        if self._current_token != '}':
            self._traceback("Except classVarDec first, subroutineDec second.")
        if self.tokenizer.has_more_tokens():
            if self._next_token == KW_CLASS:
                self._traceback('Only expect one classDec.')
            self._traceback('Unexpected extra tokens.')

    def compile_declare(self):
        self._advance()
        id_kind = self._current_token  # ('static | field | var')
        # type varName (',' varName)* ';'
        self.compile_type()
        id_type = self._current_token
        self.compile_var_name(id_kind, id_type, declare=True)
        # compile ',' or ';'
        self._advance()
        while self._current_token == ',':
            self.compile_var_name(id_kind, id_type, declare=True)
            self._advance()
        if self._current_token != ';':
            return self._error((',', ';'))

    @record_non_terminal('classVarDec')
    def compile_class_vardec(self):
        # ('static|field') type varName (',' varName)* ';'
        self.compile_declare()

    @record_non_terminal('subroutineDec')
    def compile_subroutine(self):
        # ('constructor'|'function'|'method')
        # ('void'|type) subroutineName '(' parameterList ')' subroutineBody
        self._subroutine_init()
        self.symboltable.start_subroutine()
        self._advance()
        self._sub_kind = self._current_token
        if self._sub_kind == KW_METHOD:
            self.symboltable.define('this', self._class_name, 'argument')
        self.compile_return_type()
        self.compile_subroutine_name()
        self._require_brackets('()', self.compile_parameter_list)
        self.compile_subroutine_body()

    @record_non_terminal('parameterList')
    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        if self._next_token == ')':
            return
        self.compile_type()
        self.compile_var_name('argument', self._current_token, True)
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_type()
            self.compile_var_name('argument', self._current_token, True)

    @record_non_terminal('subroutineBody')
    def compile_subroutine_body(self):
        # '{' varDec* statements '}'
        self._require_sym('{')
        while self._fol_by_vardec():
            self.compile_vardec()
        self.compile_function()
        self.compile_statements()
        self._require_sym('}')

    def compile_function(self):
        fn_name = '.'.join((self._class_name, self._sub_name))
        num_locals = self.symboltable.varcount(KW_VAR)
        self.vmwriter.write_function(fn_name,
                                     num_locals)  # function fn_name num_locals
        # set up pointer this
        if self._sub_kind == KW_CONSTRUCTOR:
            num_fields = self.symboltable.varcount(KW_FIELD)
            self.vmwriter.write_push('constant', num_fields)
            self.vmwriter.write_call('Memory.alloc', 1)
            self.vmwriter.write_pop('pointer', 0)
        elif self._sub_kind == KW_METHOD:
            self.vmwriter.write_push('argument', 0)
            self.vmwriter.write_pop('pointer', 0)

    @record_non_terminal('varDec')
    def compile_vardec(self):
        # 'var' type varName (',' varName)* ';'
        self.compile_declare()

    #########################
    # statement compilation #
    #########################

    @record_non_terminal('statements')
    def compile_statements(self):
        # (letStatement | ifStatement | whileStatement | doStatement |
        # returnStatement)*
        last_statement = None
        while self._next_token != '}':
            self._advance()
            last_statement = self._current_token
            if last_statement == 'do':
                self.compile_do()
            elif last_statement == 'let':
                self.compile_let()
            elif last_statement == 'while':
                self.compile_while()
            elif last_statement == 'return':
                self.compile_return()
            elif last_statement == 'if':
                self.compile_if()
            else:
                return self._error(expect='statement expression')
        #if STACK[-2] == 'subroutineBody' and last_statement != 'return':
        #    self._error(expect='return statement', get=last_statement)

    @record_non_terminal('doStatement')
    def compile_do(self):
        # 'do' subroutineCall ';'
        self._advance()
        self.compile_subroutine_call()
        self.vmwriter.write_pop('temp', 0)  # temp[0] store useless value
        self._require_sym(';')

    @record_non_terminal('letStatement')
    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self.compile_var_name()
        var_name = self._current_token
        array = (self._next_token == '[')
        if array:
            self.compile_array_subscript(
                var_name)  # push (array base + subscript)
        self._require_sym('=')
        self.compile_expression()  # push expression value
        self._require_sym(';')
        if array:
            self.vmwriter.write_pop('temp', 1)  # pop exp value to temp[1]
            self.vmwriter.write_pop('pointer',
                                    1)  # that = array base + subscript
            self.vmwriter.write_push('temp', 1)
            self.vmwriter.write_pop('that', 0)
        else:
            self.assign_variable(var_name)

    kind_segment = {
        'static': 'static',
        'field': 'this',
        'argument': 'argument',
        'var': 'local'
    }

    def assign_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_pop(self.kind_segment[kind], index)

    def load_variable(self, name):
        kind = self.symboltable.kindof(name)
        index = self.symboltable.indexof(name)
        self.vmwriter.write_push(self.kind_segment[kind], index)

    label_num = 0

    @record_non_terminal('whileStatement')
    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        start_label = 'WHILE_START_' + str(self.label_num)
        end_label = 'WHILE_END_' + str(self.label_num)
        self.label_num += 1
        self.vmwriter.write_label(start_label)
        self.compile_cond_expression(start_label, end_label)

    @record_non_terminal('ifStatement')
    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        else_label = 'IF_ELSE_' + str(self.label_num)
        end_label = 'IF_END_' + str(self.label_num)
        self.label_num += 1
        self.compile_cond_expression(end_label, else_label)
        # else clause
        if self._next_token == KW_ELSE:
            self._require_kw(KW_ELSE)
            self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_label(end_label)

    def compile_cond_expression(self, goto_label, end_label):
        self._require_brackets('()', self.compile_expression)
        self.vmwriter.write_arithmetic('not')
        self.vmwriter.write_if(end_label)
        self._require_brackets('{}', self.compile_statements)
        self.vmwriter.write_goto(goto_label)  # meet
        self.vmwriter.write_label(end_label)

    @record_non_terminal('returnStatement')
    def compile_return(self):
        # 'return' expression? ';'
        if self._sub_kind == KW_CONSTRUCTOR:
            self._require_kw(KW_THIS)  # constructor must return 'this'
            self.vmwriter.write_push('pointer', 0)
        elif self._next_token != ';':
            self.compile_expression()
        else:
            if self._ret_type != KW_VOID:
                self._traceback('expect return ' + self._ret_type)
            self.vmwriter.write_push('constant', 0)
        self._require_sym(';')
        self.vmwriter.write_return()

    ##########################
    # expression compilation #
    ##########################

    unary_ops = {'-': 'neg', '~': 'not'}
    binary_ops = {
        '+': 'add',
        '-': 'sub',
        '*': None,
        '/': None,
        '&': 'and',
        '|': 'or',
        '<': 'lt',
        '>': 'gt',
        '=': 'eq'
    }

    @record_non_terminal('expression')
    def compile_expression(self):
        # term (op term)*
        self.compile_term()
        while self._next_token in self.binary_ops:
            self._advance()
            if self._current_tok_type != T_SYMBOL:
                self._error(expect_types=(T_SYMBOL, ))
            op = self._current_token
            self.compile_term()
            self.compile_binaryop(op)

    def compile_binaryop(self, op):
        if op == '*':
            self.vmwriter.write_call('Math.multiply', 2)
        elif op == '/':
            self.vmwriter.write_call('Math.divide', 2)
        else:
            self.vmwriter.write_arithmetic(self.binary_ops[op])

    kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    @record_non_terminal('term')
    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        if self._next_token == '(':
            self._require_brackets('()', self.compile_expression)
        else:
            self._advance()
            tok = self._current_token
            tok_type = self._current_tok_type
            if tok_type == T_KEYWORD and tok in self.kw_consts:
                self.compile_kw_consts(tok)
            elif tok_type == T_INTEGER:
                self.vmwriter.write_push('constant', tok)
            elif tok_type == T_STRING:
                self.compile_string(tok)
            elif tok_type == T_ID:
                if self._next_token in '(.':
                    self.compile_subroutine_call()
                elif self._next_token == '[':
                    self.check_var_name(tok)
                    self.compile_array_subscript(tok)
                    self.vmwriter.write_pop('pointer', 1)
                    self.vmwriter.write_push('that', 0)
                else:
                    self.check_var_name(tok)
                    self.load_variable(tok)
            elif tok_type == T_SYMBOL and tok in self.unary_ops:
                self.compile_term()
                self.vmwriter.write_arithmetic(self.unary_ops[tok])
            else:
                self._error(expect='term')

    # keywordConstant: 'true' | 'false' | 'null' | 'this'
    def compile_kw_consts(self, kw):
        if kw == KW_THIS:
            self.vmwriter.write_push('pointer', 0)
        elif kw == KW_TRUE:
            self.vmwriter.write_push('constant', 1)
            self.vmwriter.write_arithmetic('neg')
        else:
            self.vmwriter.write_push('constant', 0)

    def compile_string(self, string):
        self.vmwriter.write_push('constant', len(string))
        self.vmwriter.write_call('String.new', 1)
        for char in string:
            self.vmwriter.write_push('constant', ord(char))
            self.vmwriter.write_call('String.appendChar', 2)

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')' |
        # (className | varName) '.' subroutineName '(' expressionList ')'
        ## the first element of structure has already been compiled.
        fn_name, num_args = self.compile_call_name()
        self._require_sym('(')
        num_args = self.compile_expressionlist(num_args)
        self._require_sym(')')
        self.vmwriter.write_call(fn_name, num_args)

    def compile_call_name(self):
        # the fisrt name of subroutine call could be (className or varName) if
        # it is followed by '.', or subroutineName if followed by '('.
        # return name of function call and num_args (1: means pushing this, 0:
        # means don't)
        if self._current_tok_type != T_ID:
            self._error(expect_types=(T_ID, ))
        name = self._current_token
        if self._next_token == '.':
            self._require_sym('.')
            self.compile_subroutine_name()
            sub_name = self._current_token
            if (name in self.symboltable.all_class_types()
                    or name in SymbolTable.builtIn_class
                    or name == self._class_name):
                return '.'.join((name, sub_name)), 0  # className
            else:
                self.check_var_name(name)  # varName with class type
                type = self.symboltable.typeof(name)
                if type in SymbolTable.builtIn_types:
                    return self._error(expect='class instance or class',
                                       get=type)
                self.load_variable(name)
                return '.'.join((type, sub_name)), 1
        elif self._next_token == '(':
            self.vmwriter.write_push('pointer', 0)  # push this to be 1st arg
            return '.'.join((self._class_name, name)), 1  # subroutineName

    @record_non_terminal('expressionList')
    def compile_expressionlist(self, num_args):
        # (expression (',' expression)*)?
        if self._next_token != ')':
            self.compile_expression()
            num_args += 1
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_expression()
            num_args += 1
        return num_args

    def compile_array_subscript(self, var_name):
        # varName '[' expression ']'
        self.check_var_name(var_name, 'Array')
        self._require_brackets(
            '[]', self.compile_expression)  # push expression value
        self.load_variable(var_name)
        self.vmwriter.write_arithmetic('add')  # base + subscript

    def _check_EOF(self):
        if not self.tokenizer.has_more_tokens():
            self._traceback("Unexpected EOF.")

    def _error(self, expect_toks=(), expect_types=(), expect=None, get=None):
        if expect is None:
            exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks))
            exp_types = ('type {0}'.format(token_tags[t])
                         for t in expect_types)
            exp_type = ' or '.join(exp_types)
            if exp_tok and exp_type:
                expect = ' or '.join(exp_tok, exp_type)
            else:
                expect = exp_tok + exp_type
        if get is None:
            get = self._current_token
        me = 'Expect {0} but get "{1}"'.format(expect, get)
        return self._traceback(me)

    def _traceback(self, message):
        if DEBUG:
            print('--------------------------------------------')
            print(self.symboltable)
            print(self.symboltable.all_class_types())
            print('--------------------------------------------')
        file_info = 'file: "{0}"'.format(self.tokenizer.filename)
        line_info = 'line {0}'.format(self.tokenizer.line_count)
        raise CompileError("{0}, {1}: {2}".format(file_info, line_info,
                                                  message))
Пример #15
0
 def __init__(self, inputPath, outputPath):
     self.tokenizer = Tokenizer(inputPath)
     self.outputFile = open(outputPath, 'w')
     self.tokenizer.advance()
     self.indentLevel = 0
Пример #16
0
class CompilationEngine:
    def __init__(self, inputPath, outputPath):
        self.tokenizer = Tokenizer(inputPath)
        self.outputFile = open(outputPath, 'w')
        self.tokenizer.advance()
        self.indentLevel = 0

    def CompileClass(self):
        """
        Compiles a complete class.
        """
        self.EnterScope("class")

        self.ConsumeKeyword([Keyword.CLASS])
        self.ConsumeIdentifier()  # className
        self.ConsumeSymbol('{')

        while (self.IsKeyword([Keyword.STATIC, Keyword.FIELD])):
            self.CompileClassVarDec()

        # subroutineDec*
        while (self.IsKeyword([Keyword.CONSTRUCTOR, Keyword.FUNCTION,
                               Keyword.METHOD])):
            self.CompileSubroutine()

        self.ConsumeSymbol('}')

        self.ExitScope("class")
        self.outputFile.close()

    def CompileClassVarDec(self):
        """
        Compiles a static declaration or a field declaration.
        """
        self.EnterScope("classVarDec")

        self.ConsumeKeyword([Keyword.STATIC, Keyword.FIELD])
        self.ConsumeType()
        self.ConsumeIdentifier()  # varName

        while (self.IsSymbol([','])):
            self.ConsumeSymbol(',')
            self.ConsumeIdentifier()  # varName

        self.ConsumeSymbol(';')

        self.ExitScope("classVarDec")

    def CompileSubroutine(self):
        """
        Compiles a complete method, function, or constructor.
        """
        self.EnterScope("subroutineDec")

        self.ConsumeKeyword([Keyword.CONSTRUCTOR, Keyword.FUNCTION,
                             Keyword.METHOD])
        if (self.IsKeyword([Keyword.VOID])):
            self.ConsumeKeyword([Keyword.VOID])
        else:
            self.ConsumeType()

        self.ConsumeIdentifier()  # subroutineName

        self.ConsumeSymbol('(')
        self.CompileParameterList()
        self.ConsumeSymbol(')')

        self.CompileSubroutineBody()

        self.ExitScope("subroutineDec")

    def CompileSubroutineBody(self):
        self.EnterScope("subroutineBody")

        self.ConsumeSymbol('{')
        while (self.IsKeyword([Keyword.VAR])):
            self.CompileVarDec()
        self.CompileStatements()
        self.ConsumeSymbol('}')

        self.ExitScope("subroutineBody")

    def CompileParameterList(self):
        """
        Compiles a (possibly empty) parameter list,
        not including the enclosing "()".
        """
        self.EnterScope("parameterList")

        if (not self.IsSymbol([')'])):
            self.ConsumeType()
            self.ConsumeIdentifier()

        while(self.IsSymbol([','])):
            self.ConsumeSymbol(',')
            self.ConsumeType()
            self.ConsumeIdentifier()

        self.ExitScope("parameterList")

    def CompileVarDec(self):
        """
        Compiles a var declaration.
        """
        self.EnterScope("varDec")

        self.ConsumeKeyword([Keyword.VAR])
        self.ConsumeType()
        self.ConsumeIdentifier()  # varName
        while (self.IsSymbol([','])):
            self.ConsumeSymbol(',')
            self.ConsumeIdentifier()  # varName

        self.ConsumeSymbol(';')

        self.ExitScope("varDec")

    def CompileStatements(self):
        """
        Compiles a sequence of statements, not including the
        enclosing "{}".
        """
        self.EnterScope("statements")

        while self.IsKeyword([Keyword.LET, Keyword.IF, Keyword.WHILE,
                              Keyword.DO, Keyword.RETURN]):
            if self.IsKeyword([Keyword.LET]):
                self.CompileLet()

            if self.IsKeyword([Keyword.IF]):
                self.CompileIf()

            if self.IsKeyword([Keyword.WHILE]):
                self.CompileWhile()

            if self.IsKeyword([Keyword.DO]):
                self.CompileDo()

            if self.IsKeyword([Keyword.RETURN]):
                self.CompileReturn()

        self.ExitScope("statements")

    def CompileDo(self):
        """
        Compiles a do statement.
        """
        self.EnterScope("doStatement")

        self.ConsumeKeyword([Keyword.DO])
        self.ConsumeIdentifier()
        if self.IsSymbol(['.']):
            self.ConsumeSymbol('.')
            self.ConsumeIdentifier()
        self.ConsumeSymbol('(')
        self.CompileExpressionList()
        self.ConsumeSymbol(')')
        self.ConsumeSymbol(';')

        self.ExitScope("doStatement")

    def CompileLet(self):
        """
        Compiles a let statement.
        """
        self.EnterScope("letStatement")

        self.ConsumeKeyword([Keyword.LET])
        self.ConsumeIdentifier()
        if self.IsSymbol(['[']):
            self.ConsumeSymbol('[')
            self.CompileExpression()
            self.ConsumeSymbol(']')
        self.ConsumeSymbol('=')
        self.CompileExpression()
        self.ConsumeSymbol(';')

        self.ExitScope("letStatement")

    def CompileWhile(self):
        """
        Compiles a while statement.
        """
        self.EnterScope("whileStatement")

        self.ConsumeKeyword([Keyword.WHILE])
        self.ConsumeSymbol('(')
        self.CompileExpression()
        self.ConsumeSymbol(')')

        self.ConsumeSymbol('{')
        self.CompileStatements()
        self.ConsumeSymbol('}')

        self.ExitScope("whileStatement")

    def CompileReturn(self):
        """
        Compiles a return statement.
        """
        self.EnterScope("returnStatement")

        self.ConsumeKeyword([Keyword.RETURN])
        if not self.IsSymbol([';']):
            self.CompileExpression()
        self.ConsumeSymbol(';')

        self.ExitScope("returnStatement")

    def CompileIf(self):
        """
        Compiles an if statement, possibly with a trailing
        else clause.
        """
        self.EnterScope("ifStatement")

        self.ConsumeKeyword([Keyword.IF])
        self.ConsumeSymbol('(')
        self.CompileExpression()
        self.ConsumeSymbol(')')

        self.ConsumeSymbol('{')
        self.CompileStatements()
        self.ConsumeSymbol('}')

        if self.IsKeyword([Keyword.ELSE]):
            self.ConsumeKeyword([Keyword.ELSE])
            self.ConsumeSymbol('{')
            self.CompileStatements()
            self.ConsumeSymbol('}')

        self.ExitScope("ifStatement")

    def CompileExpression(self):
        """
        Compiles an expression.
        """
        self.EnterScope("expression")

        op_symbols = ['+', '-', '*', '/', '&amp;', '|', "&lt;", "&gt;", '=']
        self.CompileTerm()
        while (self.IsSymbol(op_symbols)):
            self.ConsumeSymbol(self.tokenizer.symbol())
            self.CompileTerm()

        self.ExitScope("expression")

    def CompileTerm(self):
        """
        Compiles a term.
        """
        self.EnterScope("term")

        keyword_constants = [Keyword.TRUE, Keyword.FALSE, Keyword.NULL,
                             Keyword.THIS]
        unary_symbols = ['-', '~']

        if self.IsType(TokenType.INT_CONST):
            self.ConsumeIntegerConstant()

        elif self.IsType(TokenType.STRING_CONST):
            self.ConsumeStringConstant()

        elif self.IsKeyword(keyword_constants):
                self.ConsumeKeyword(keyword_constants)

        elif self.IsSymbol(['(']):
            self.ConsumeSymbol('(')
            self.CompileExpression()
            self.ConsumeSymbol(')')

        elif self.IsSymbol(unary_symbols):
            self.ConsumeSymbol(self.tokenizer.symbol())
            self.CompileTerm()
        else:
            self.ConsumeIdentifier()
            if self.IsSymbol(['[']):    # varName '[' expression ']'
                self.ConsumeSymbol('[')
                self.CompileExpression()
                self.ConsumeSymbol(']')
            elif self.IsSymbol(['(']):  # subroutineCall
                self.ConsumeSymbol('(')
                self.CompileExpressionList()
                self.ConsumeSymbol(')')
            elif self.IsSymbol(['.']):
                self.ConsumeSymbol('.')
                self.ConsumeIdentifier()
                self.ConsumeSymbol('(')
                self.CompileExpressionList()
                self.ConsumeSymbol(')')

        self.ExitScope("term")

    def CompileExpressionList(self):
        """
        Compiles a (possibly empty) comma-separated
        list of expressions.
        """
        self.EnterScope("expressionList")

        if not self.IsSymbol(')'):
            self.CompileExpression()

        while self.IsSymbol([',']):
            self.ConsumeSymbol(',')
            self.CompileExpression()

        self.ExitScope("expressionList")

    def IsKeyword(self, keyword_list):
        return (self.IsType(TokenType.KEYWORD) and
                self.tokenizer.keyword() in keyword_list)

    def IsSymbol(self, symbol_list):
        return (self.IsType(TokenType.SYMBOL) and
                self.tokenizer.symbol() in symbol_list)

    def IsType(self, tokenType):
        return self.tokenizer.tokenType() == tokenType

    def ConsumeType(self):
        if (self.tokenizer.tokenType() == TokenType.IDENTIFIER):
            self.ConsumeIdentifier()
        else:
            self.ConsumeKeyword([Keyword.INT, Keyword.CHAR, Keyword.BOOLEAN])

    def ConsumeKeyword(self, keywordList):
        self.VerifyTokenType(TokenType.KEYWORD)
        actual = self.tokenizer.keyword()
        if actual not in keywordList:
            raise Exception("Expected keywords: {}, Actual: {}".
                            format(keywordList, actual))
        self.OutputTag("keyword", actual)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def ConsumeSymbol(self, symbol):
        self.VerifyTokenType(TokenType.SYMBOL)
        actual = self.tokenizer.symbol()
        if actual != symbol:
            raise Exception("Expected symbol: {}, Actual: {}".
                            format(symbol, actual))
        self.OutputTag("symbol", actual)
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def ConsumeIntegerConstant(self):
        self.VerifyTokenType(TokenType.INT_CONST)
        self.OutputTag("integerConstant", self.tokenizer.intVal())
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def ConsumeStringConstant(self):
        self.VerifyTokenType(TokenType.STRING_CONST)
        self.OutputTag("stringConstant", self.tokenizer.stringVal())
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def ConsumeIdentifier(self):
        self.VerifyTokenType(TokenType.IDENTIFIER)
        self.OutputTag("identifier", self.tokenizer.identifier())
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def VerifyTokenType(self, tokenType):
        actual = self.tokenizer.tokenType()
        if actual != tokenType:
            raise Exception("Expected token type: {}, Actual: {}".
                            format(tokenType, actual))

    def EnterScope(self, name):
        self.Output("<{}>".format(name))
        self.indentLevel += 1

    def ExitScope(self, name):
        self.indentLevel -= 1
        self.Output("</{}>".format(name))

    def OutputTag(self, tag, value):
        self.Output("<{}> {} </{}>".format(tag, value, tag))

    def Output(self, text):
        self.outputFile.write(("  " * self.indentLevel) + text + '\n')
Пример #17
0
            token.text = token.text.strip()
        value, type = token.text, token.tag
        return value, type

    def _format_string(self, s):
        if s[0] == '<':
            if s[1] == '/':
                return s
            else:
                return '\n' + s
        elif s[0] == '>':
            return s
        else:
            return ' {} '.format(s)

    def _dump_tree(self, filename='/Users/rsenseman/Desktop/dump.xml'):
        with open(filename, 'w') as f:
            f.write(tostring(self.tree.getroot(), encoding='utf8').decode('utf8'))
        return None

if __name__ == '__main__':
    input_arg = sys.argv[1]

    input_path = Path(input_arg).resolve()
    output_path = input_path.with_name(input_path.stem + '_parsed.xml')

    tokenizer = Tokenizer(input_file=input_path).tokenize()

    parser = Parser(tokenizer)
    parser.parse_to_file(output_path)
class CompilationEngine(object):
    def __init__(self, inFile):
        self.t = Tokenizer(inFile)
        self.symTable = SymbolTable()
        self.vmName = inFile.rstrip('.jack') + '.vm'
        self.vm = VMWriter(self.vmName)
        self.className = ''
        self.types = ['int', 'char', 'boolean', 'void']
        self.stmnt = ['do', 'let', 'if', 'while', 'return']
        self.subroutType = ''
        self.whileIndex = 0
        self.ifIndex = 0
        self.fieldNum = 0

    def compile_class(self):

        self.t.advance()
        self.validator('class')
        self.className = self.t.current_token()
        self.t.advance()
        self.validator('{')
        self.fieldNum = self.compile_class_var_dec()
        while self.t.symbol() != '}':  # subroutines
            self.compile_subroutine()
        self.validator('}')
        self.vm.close()

        return

    def compile_class_var_dec(self):
        varKeyWords = ['field', 'static']
        name = ''
        kind = ''
        varType = ''
        counter = 0
        while self.t.keyword() in varKeyWords:
            kind = self.t.current_token()
            self.validator(varKeyWords)
            # variable type
            varType = self.t.current_token()
            self.validator(['int', 'char', 'boolean', 'IDENTIFIER'])
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            if kind == 'field':
                counter += 1

            while self.t.symbol() != ';':  # checks multiple vars
                self.validator(',')
                name = self.t.current_token()
                self.symTable.define(name, varType, kind)
                self.t.advance()
                if kind == 'field':
                    counter += 1
            self.validator(';')

        return counter

    def compile_subroutine(self):
        current_subrout_scope = self.symTable.subDict
        self.symTable.start_subroutine()

        subroutKword = self.t.current_token()
        self.validator(['constructor', 'function', 'method'])

        self.subroutType = self.t.current_token()
        self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])

        name = self.t.current_token()
        subroutName = self.className + '.' + name
        self.t.advance()
        self.validator('(')

        if subroutKword == 'method':
            self.compile_parameter_list(method=True)
        else:
            self.compile_parameter_list()

        self.validator(')')
        self.validator('{')

        if self.t.symbol() == '}':
            self.t.advance()

            return

        self.validator(['var', 'let', 'do', 'if', 'while', 'return'],
                       advance=False)
        numLocals = 0
        if self.t.keyword() == 'var':
            numLocals = self.compile_var_dec()

        self.vm.write_function(subroutName, numLocals)

        if subroutKword == 'constructor':
            self.vm.write_push('constant', self.fieldNum)
            self.vm.write_call('Memory.alloc', 1)
            self.vm.write_pop('pointer', 0)
        elif subroutKword == 'method':
            self.vm.write_push('argument', 0)
            self.vm.write_pop('pointer', 0)

        if self.t.keyword() in self.stmnt:
            self.compile_statements()

        self.validator('}')
        self.symTable.subDict = current_subrout_scope
        self.whileIndex = 0
        self.ifIndex = 0

        return

    def compile_parameter_list(self, method=False):
        name = ''
        varType = ''
        kind = ''
        counter = 0

        if self.t.symbol() == ')':
            return counter
        varType = self.t.current_token()
        self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
        kind = 'arg'
        name = self.t.current_token()
        if method:
            self.symTable.define(name, varType, kind, method=True)
        else:
            self.symTable.define(name, varType, kind)

        self.t.advance()
        counter += 1
        while self.t.symbol() == ',':
            self.validator(',')
            self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
            kind = 'arg'
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            counter += 1

        return counter

    def compile_var_dec(self):
        name = ''
        kind = ''
        varType = ''
        counter = 0

        while self.t.keyword() == 'var':  # check multiple lines of var
            kind = 'var'
            self.t.advance()
            varType = self.t.current_token()
            self.validator(['int', 'char', 'boolean', 'void', 'IDENTIFIER'])
            name = self.t.current_token()
            self.symTable.define(name, varType, kind)
            self.t.advance()
            counter += 1

            while self.t.symbol() == ',':  # multiple varNames
                self.t.advance()
                name = self.t.current_token()
                self.symTable.define(name, varType, kind)
                self.t.advance()
                counter += 1
            self.validator(';')

        return counter

    def compile_statements(self):

        while self.t.keyword() in self.stmnt:
            if self.t.keyword() == 'let':
                self.compile_let()
            elif self.t.keyword() == 'do':
                self.compile_do()
            elif self.t.keyword() == 'if':
                self.compile_if()
            elif self.t.keyword() == 'while':
                self.compile_while()
            elif self.t.keyword() == 'return':
                self.compile_return()
            else:
                raise Exception(self.t.current_token() + ' is not valid')

        return

    def compile_do(self):
        lookAhead = ''
        self.t.advance()  # do
        lookAhead = self.t.tokens[self.t.tokenIndex + 1]

        if lookAhead == '(':  # subroutineName(exprlist)
            subroutName = self.className + '.' + self.t.current_token()
            self.t.advance()
            self.validator('(')

            self.vm.write_push('pointer', 0)
            numArgs = self.compile_expression_list()
            self.vm.write_call(subroutName, numArgs + 1)  # add 1 for 'this'

            self.validator(')')
            self.validator(';')
            self.vm.write_pop('temp', 0)  # throws away returned value

            return
        else:
            className = self.t.current_token()
            self.t.advance()
            self.validator('.')  # name.subroutine(exprList)
            subroutName = self.t.current_token()
            self.t.advance()
            self.validator('(')

            if self.symTable.kind_of(className) in [
                    'this', 'static', 'local', 'argument'
            ]:
                # used 'this' for 'field'
                typeName = self.symTable.type_of(className)
                subroutName = typeName + '.' + subroutName
                segment = self.symTable.kind_of(className)
                index = self.symTable.index_of(className)
                self.vm.write_push(segment, index)
                numArgs = self.compile_expression_list()
                self.vm.write_call(subroutName, numArgs + 1)
            else:
                subroutName = className + '.' + subroutName
                numArgs = self.compile_expression_list()
                self.vm.write_call(subroutName, numArgs)

            self.validator(')')
            self.validator(';')
            self.vm.write_pop('temp', 0)

            return

    def compile_let(self):
        name = ''
        kind = ''
        array = False
        self.t.advance()  # let
        while self.t.symbol() != ';':
            name = self.t.identifier()
            kind = self.symTable.kind_of(name)
            index = self.symTable.index_of(name)
            if name in self.symTable.classDict:
                self.t.advance()
            elif name in self.symTable.subDict:
                self.t.advance()
            else:
                raise Exception(self.t.identifier() + ' is not defined')
            if self.t.symbol() == '[':  # array index
                array = True
                self.vm.write_push(kind, index)
                self.validator('[')
                self.compile_expression()
                self.validator(']')
                self.vm.write_arithmetic('+')

            self.validator('=')
            self.compile_expression()
            if array:
                self.vm.write_pop('temp', 0)
                self.vm.write_pop('pointer', 1)
                self.vm.write_push('temp', 0)
                self.vm.write_pop('that', 0)
            else:
                self.vm.write_pop(kind, index)
        self.validator(';')

        return

    def compile_while(self):
        currentWhile = 'WHILE' + str(self.whileIndex)
        self.vm.write_label(currentWhile)
        self.whileIndex += 1
        self.t.advance()  # while
        self.validator('(')

        self.compile_expression()
        self.vm.write_arithmetic('~')
        self.vm.write_if('END' + currentWhile)

        self.validator(')')
        self.validator('{')

        self.compile_statements()
        self.vm.write_goto(currentWhile)

        self.validator('}')
        self.vm.write_label('END' + currentWhile)

        return

    def compile_return(self):
        self.t.advance()  # return
        if self.t.symbol() == ';':
            self.vm.write_push('constant', '0')
            self.vm.write_return()
            self.t.advance()
        else:
            self.compile_expression()
            self.validator(';')
            self.vm.write_return()

        return

    def compile_if(self):
        endIf = 'END_IF' + str(self.ifIndex)
        currentElse = 'IF_ELSE' + str(self.ifIndex)
        self.ifIndex += 1
        self.t.advance()  # if
        self.validator('(')
        self.compile_expression()
        self.vm.write_arithmetic('~')
        self.vm.write_if(currentElse)

        self.validator(')')
        self.validator('{')

        self.compile_statements()
        self.vm.write_goto(endIf)
        self.validator('}')
        self.vm.write_label(currentElse)

        if self.t.keyword() == 'else':
            self.t.advance()  # else
            self.validator('{')

            self.compile_statements()

            self.validator('}')
        self.vm.write_label(endIf)

        return

    def compile_expression(self):
        op = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        self.compile_term()
        while self.t.symbol() in op:
            opToken = self.t.current_token()
            self.t.advance()
            self.compile_term()
            self.vm.write_arithmetic(opToken)

        return

    def compile_term(self):

        keyConst = ['true', 'false', 'null', 'this']
        unOps = ['-', '~']
        lookAhead = ''
        name = ''
        current_subrout_scope = ''

        if self.t.token_type() == 'INT_CONST':
            self.vm.write_push('constant', self.t.int_val())
            self.t.advance()
        elif self.t.token_type() == 'STRING_CONST':
            string = self.t.string_val()
            length = len(string)
            self.vm.write_push('constant', length)
            self.vm.write_call('String.new', 1)
            for char in string:
                char = ord(char)  # gives the ASCII number
                self.vm.write_push('constant', char)
                self.vm.write_call('String.appendChar', 2)
            self.t.advance()

        elif self.t.token_type() == 'KEYWORD':
            self.validator(keyConst, advance=False)
            if self.t.current_token() in ['false', 'null']:
                self.t.advance()
                self.vm.write_push('constant', '0')
            elif self.t.current_token() == 'true':
                self.vm.write_push('constant', '1')
                self.vm.write_arithmetic('-', neg=True)
                self.t.advance()
            else:
                self.vm.write_push('pointer', '0')
                self.t.advance()

        elif self.t.token_type() == 'SYMBOL':
            if self.t.symbol() in unOps:  # unary operator
                unOpToken = self.t.current_token()
                self.t.advance()
                self.compile_term()
                self.vm.write_arithmetic(unOpToken, neg=True)
            elif self.t.symbol() == '(':  # (expression))
                self.t.advance()
                self.compile_expression()
                self.t.advance()
            else:
                raise Exception(self.t.current_token() + ' is not valid')
        elif self.t.token_type() == 'IDENTIFIER':  # varName, array, or subcall
            lookAhead = self.t.tokens[self.t.tokenIndex + 1]
            if lookAhead == '[':  # array item
                name = self.t.identifier()
                kind = self.symTable.kind_of(name)
                index = self.symTable.index_of(name)
                if name in self.symTable.classDict:
                    self.t.advance()
                elif name in self.symTable.subDict:
                    self.t.advance()
                else:
                    raise Exception(self.t.identifier() + ' is not defined')
                self.vm.write_push(kind, index)
                self.validator('[')
                self.compile_expression()

                self.vm.write_arithmetic('+')
                self.vm.write_pop('pointer', 1)
                self.vm.write_push('that', 0)

                self.validator(']')

            elif lookAhead == '(':  # subcall
                current_subrout_scope = self.symTable.subDict
                name = self.className + '.' + self.t.current_token()
                self.t.advance()
                self.validator('(')
                numArgs = self.compile_expression_list()
                self.vm.write_call(name, numArgs + 1)
                self.validator(')')
                self.symTable.subDict = current_subrout_scope

            elif lookAhead == '.':  # name.subroutName(expressList)
                current_subrout_scope = self.symTable.subDict
                className = self.t.current_token()
                self.t.advance()
                self.validator('.')
                subroutName = self.t.current_token()
                self.validator('IDENTIFIER')
                name = className + '.' + subroutName
                self.validator('(')
                if self.symTable.kind_of(className) in [
                        'this', 'static', 'local', 'argument'
                ]:
                    # used 'this' for 'field'
                    classType = self.symTable.type_of(className)
                    name = classType + '.' + subroutName
                    kind = self.symTable.kind_of(className)
                    index = self.symTable.index_of(className)
                    self.vm.write_push(kind, index)
                    numArgs = self.compile_expression_list()
                    self.vm.write_call(name, numArgs + 1)
                else:
                    numArgs = self.compile_expression_list()
                    self.vm.write_call(name, numArgs)
                self.validator(')')
                self.symTable.subDict = current_subrout_scope
            else:
                name = self.t.identifier()  # varName
                kind = self.symTable.kind_of(name)
                index = self.symTable.index_of(name)
                self.vm.write_push(kind, index)
                self.t.advance()
        else:
            raise Exception(self.t.current_token() + ' is not valid')

        return

    def compile_expression_list(self):  # only in subroutineCall
        counter = 0
        if self.t.symbol() == ')':
            return counter
        else:
            self.compile_expression()
            counter += 1
            while self.t.symbol() == ',':
                self.t.advance()
                self.compile_expression()
                counter += 1

        return counter

    def validator(self, syntax, advance=True):
        tokenType = self.t.token_type()
        token = self.t.current_token()
        if advance:
            self.t.advance()
        if type(syntax) != list:
            syntax = [syntax]
        for item in syntax:
            if item in [tokenType, token]:
                return True
        raise Exception(self.t.current_token() + ' is not valid')
Пример #19
0
 def __init__(self, inpath, outpath):
     self.tokenizer = Tokenizer(inpath)
     XMLWriter.set_filepath(outpath)
     if self.tokenizer.has_more_tokens():
         self.compile_class()
     XMLWriter.close()
Пример #20
0
class Compiler:
    def __init__(self, inpath, outpath):
        self.tokenizer = Tokenizer(inpath)
        XMLWriter.set_filepath(outpath)
        if self.tokenizer.has_more_tokens():
            self.compile_class()
        XMLWriter.close()

    def _write_current_terminal(self):
        XMLWriter.write_terminal(self._current_token, self._current_tok_tag)

    def _advance(self):
        self._check_EOF()
        self.tokenizer.advance()

    type_kws = (KW_INT, KW_CHAR, KW_BOOLEAN)
    kw_consts = (KW_TRUE, KW_FALSE, KW_NULL, KW_THIS)

    @property
    def _current_token(self):
        t_type = self.tokenizer.token_type
        return (self.tokenizer.keyword    if t_type == T_KEYWORD else
                self.tokenizer.symbol     if t_type == T_SYMBOL  else
                self.tokenizer.identifier if t_type == T_ID      else
                self.tokenizer.intval     if t_type == T_INTEGER else
                self.tokenizer.stringval)

    @property
    def _current_tok_type(self):
        return self.tokenizer.token_type

    @property
    def _current_tok_tag(self):
        return token_tags[self._current_tok_type]

    @property
    def _next_token(self):
        """return raw next_token in the tokenizer"""
        return str(self.tokenizer.next_token)

    def _require_token(self, tok_type, token=None):
        """Check whether the next_token(terminal) in the tokenizer meets the 
        requirement (specific token or just token type). If meets, tokenizer
        advances (update current_token and next_token)  and terminal will be 
        writed into outfile; If not, report an error."""
        self._advance()
        if token and self._current_token != token:
            return self._error(expect_toks=(token,))
        elif self._current_tok_type != tok_type:
            return self._error(expect_types=(tok_type,))
        self._write_current_terminal()

    def _require_id(self):
        return self._require_token(T_ID)

    def _require_kw(self, token):
        return self._require_token(T_KEYWORD, token=token)

    def _require_sym(self, token):
        return self._require_token(T_SYMBOL, token=token)

    def _require_brackets(self, brackets, procedure):
        front, back = brackets
        self._require_sym(front)
        procedure()
        self._require_sym(back)

    def _fol_by_class_vardec(self):
        return self._next_token in (KW_STATIC, KW_FIELD)

    def _fol_by_subroutine(self):
        return self._next_token in (KW_CONSTRUCTOR, KW_FUNCTION, KW_METHOD)

    def _fol_by_vardec(self):
        return self._next_token == KW_VAR

    #########################
    # structure compilation #
    #########################

    # the compilation of three types of name might seem redundant here, but
    # it was for abstraction and later code generation in project 11.
    def compile_class_name(self):
        self._require_id()

    def compile_subroutine_name(self):
        self._require_id()

    def compile_var_name(self):
        self._require_id()

    def compile_type(self, advanced=False, expect='type'):
        # int, string, boolean or identifier(className)
        if advanced is False:
            self._advance()
        if self._current_token in self.type_kws:
            return self._write_current_terminal()
        elif self._current_tok_type == T_ID:
            return self._write_current_terminal()
        else:
            return self._error(expect=expect)

    def compile_void_or_type(self):
        # void or type
        self._advance()
        if self._current_token == KW_VOID:
            self._write_current_terminal()
        else:
            self.compile_type(True, '"void" or type')

    @record_non_terminal('class')
    def compile_class(self):
        # 'class' className '{' classVarDec* subroutineDec* '}'
        self._require_kw(KW_CLASS)
        self.compile_class_name()
        self._require_sym('{')
        while self._fol_by_class_vardec():
            self.compile_class_vardec()
        while self._fol_by_subroutine():
            self.compile_subroutine()
        self._advance()
        if self._current_token != '}':
            self._traceback("Except classVarDec or subroutineDec.")
        self._write_current_terminal()

    def compile_declare(self):
        self._advance()
        self._write_current_terminal()
        # type varName (',' varName)* ';'
        self.compile_type()
        self.compile_var_name()
        # compile ',' or ';'
        self._advance()
        while self._current_token == ',':
            self._write_current_terminal()
            self.compile_var_name()
            self._advance()
        if self._current_token != ';':
            return self._error((',', ';'))
        self._write_current_terminal()

    @record_non_terminal('classVarDec')
    def compile_class_vardec(self):
        # ('static|field') type varName (',' varName)* ';'
        self.compile_declare()

    @record_non_terminal('subroutineDec')
    def compile_subroutine(self):
        # ('constructor'|'function'|'method')
        # ('void'|type) subroutineName '(' parameterList ')' subroutineBody
        self._advance()
        self._write_current_terminal()      # ('constructor'|'function'|'method')
        self.compile_void_or_type()
        self.compile_subroutine_name()
        self._require_brackets('()', self.compile_parameter_list)
        self.compile_subroutine_body()

    @record_non_terminal('parameterList')
    def compile_parameter_list(self):
        # ((type varName) (',' type varName)*)?
        if self._next_token == ')':
            return
        self.compile_type()
        self.compile_var_name()
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_type()
            self.compile_var_name()

    @record_non_terminal('subroutineBody')
    def compile_subroutine_body(self):
        # '{' varDec* statements '}'
        self._require_sym('{')
        while self._fol_by_vardec():
            self.compile_vardec()
        self.compile_statements()
        self._require_sym('}')

    @record_non_terminal('varDec')
    def compile_vardec(self):
        # 'var' type varName (',' varName)* ';'
        self.compile_declare()

    #########################
    # statement compilation #
    #########################

    @record_non_terminal('statements')
    def compile_statements(self):
        # (letStatement | ifStatement | whileStatement | doStatement | 
        # returnStatement)*
        last_statement = None
        while self._next_token != '}':
            self._advance()
            last_statement = self._current_token
            if last_statement == 'do':
                self.compile_do()
            elif last_statement == 'let':
                self.compile_let()
            elif last_statement == 'while':
                self.compile_while()
            elif last_statement == 'return':
                self.compile_return()
            elif last_statement == 'if':
                self.compile_if()
            else:
                return self._error(expect='statement expression')
        if STACK[-2] == 'subroutineBody' and last_statement != 'return':
            self._error(expect='return statement', get=last_statement)

    @record_non_terminal('doStatement')
    def compile_do(self):
        # 'do' subroutineCall ';'
        self._write_current_terminal()
        # compile identifier first
        self._advance()
        self.compile_subroutine_call()
        self._require_sym(';')

    @record_non_terminal('letStatement')
    def compile_let(self):
        # 'let' varName ('[' expression ']')? '=' expression ';'
        self._write_current_terminal()
        self.compile_var_name()
        if self._next_token == '[':
            self._compile_array_subscript()
        self._require_sym('=')
        self.compile_expression()
        self._require_sym(';')

    @record_non_terminal('whileStatement')
    def compile_while(self):
        # 'while' '(' expression ')' '{' statements '}'
        self._write_current_terminal()
        self._require_brackets('()', self.compile_expression)
        self._require_brackets('{}', self.compile_statements)

    @record_non_terminal('returnStatement')
    def compile_return(self):
        # 'return' expression? ';'
        self._write_current_terminal()
        if self._next_token != ';':
            self.compile_expression()
        self._require_sym(';')

    @record_non_terminal('ifStatement')
    def compile_if(self):
        # 'if' '(' expression ')' '{' statements '}'
        # ('else' '{' statements '}')?
        self._write_current_terminal()
        self._require_brackets('()', self.compile_expression)
        self._require_brackets('{}', self.compile_statements)
        # else clause
        if self._next_token == KW_ELSE:
            self._require_kw(KW_ELSE)
            self._require_brackets('{}', self.compile_statements)

    ##########################
    # expression compilation #
    ##########################

    @record_non_terminal('expression')
    def compile_expression(self):
        # term (op term)*
        self.compile_term()
        while is_op(self._next_token):
            self.compile_op()
            self.compile_term()

    @record_non_terminal('term')
    def compile_term(self):
        # integerConstant | stringConstant | keywordConstant |
        # varName | varName '[' expression ']' | subroutineCall |
        # '(' expression ')' | unaryOp term
        if self._next_token == '(':
            self._require_brackets('()', self.compile_expression)
        elif self._next_token in set('-~'):
            self.compile_unaryop()
        else:
            self._advance()
            tok = self._current_token
            tok_type = self._current_tok_type
            if tok in self.kw_consts or tok_type in (T_INTEGER, T_STRING):
                self._write_current_terminal()
            elif tok_type == T_ID:
                if self._next_token in '(.':
                    self.compile_subroutine_call()
                else:
                    self._write_current_terminal()
                    if self._next_token == '[':
                        self._compile_array_subscript()
            else:
                self._error(expect='term')

    def compile_call_name(self):
        # the fisrt name of subroutine call could be (className or varName) if
        # it is followed by '.', or subroutineName if followed by '('.
        if self._current_tok_type != T_ID:
            self._error(expect_types=(T_ID,))
        self._write_current_terminal()
        # just write it without analysis.
        # this method will be extended to decide which kind the name is.

    def compile_subroutine_call(self):
        # subroutineName '(' expressionList ')' | (className |
        # varName) '.' subroutineName '(' expressionList ')'
        ## the first element of structure has already been compiled.
        self.compile_call_name()
        if self._next_token == '.':
            self._require_sym('.')
            self.compile_subroutine_name()
        self._require_brackets('()', self.compile_expressionlist)

    @record_non_terminal('expressionList')
    def compile_expressionlist(self):
        # (expression (',' expression)*)?
        if self._next_token != ')':
            self.compile_expression()
        while self._next_token != ')':
            self._require_sym(',')
            self.compile_expression()

    def compile_op(self):
        # exclude '~'
        self._advance()
        if self._current_token == '~':
            self._traceback('Unexpected operator: ~')
        self._write_current_terminal()

    def compile_unaryop(self):
        self._advance()
        self._write_current_terminal()      # symbol: - or ~
        self.compile_term()

    def _compile_array_subscript(self):
        # '[' expression ']'
        self._require_brackets('[]', self.compile_expression)

    def _check_EOF(self):
        if not self.tokenizer.has_more_tokens():
            self._traceback("Unexpected EOF.")

    def _error(self, expect_toks=(), expect_types=(), expect=None, get=None):
        if expect is None:
            exp_tok = ' or '.join(('"{0}"'.format(t) for t in expect_toks))
            exp_types = ('type {0}'.format(token_tags[t]) for t in expect_types)
            exp_type = ' or '.join(exp_types)
            if exp_tok and exp_type:
                expect = ' or '.join(expect_tok, expect_type)
            else:
                expect = exp_toks + exp_types
        if get is None:
            get = self._current_token
        me = 'Expect {0} but get "{1}"'.format(expect, get)
        return self._traceback(me)

    def _traceback(self, message):
        file_info = 'file: "{0}"'.format(self.tokenizer.filename)
        line_info = 'line {0}'.format(self.tokenizer.line_count)
        raise CompileError("{0}, {1}: {2}".format(file_info, line_info, message))