Пример #1
0
def analyze(src_jack_file):
    tokenizer = JackTokenizer(src_jack_file)
    while tokenizer.has_more_tokens():
        tokenizer.advance()
        print(tokenizer.get_current_token())
class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    BINARY_OPERATORS_TO_COMMAND = {
        '+': 'add',
        '-': 'sub',
        '=': 'eq',
        '>': 'gt',
        '<': 'lt',
        '&': 'and',
        '|': 'or'
    }
    UNARY_OPERATORS_TO_COMMAND = {'-': 'neg', '~': 'not'}
    TYPE_TO_TAG = {
        'STRING_CONST': 'stringConstant',
        'INT_CONST': 'integerConstant',
        'KEYWORD': 'keyword',
        'IDENTIFIER': 'identifier',
        'SYMBOL': 'symbol'
    }

    SYMBOLS_TO_XML_CONVENTION = {
        '<': '&lt;',
        '>': '&gt;',
        '&': '&amp;',
        '"': '&quot;'
    }

    def __init__(self, input_file_path, vm_writer: VMWriter):
        self.jack_tokenizer = JackTokenizer(input_file_path)
        self.symbol_table = SymbolTable()
        self.vm_writer = vm_writer
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                 'CLASS')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()

        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.jack_tokenizer.advance()
        self.vm_writer.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[
            self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            kind = ''
            if self.jack_tokenizer.key_word() == 'field':
                kind = 'FIELD'
            elif self.jack_tokenizer.key_word() == 'static':
                kind = 'STATIC'
            self.jack_tokenizer.advance()
            field_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     field_type, kind)

            self.jack_tokenizer.advance()

            while self.jack_tokenizer.symbol() != ';':
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         field_type, kind)
                self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        self.vm_writer.zero_branching_indexes()
        while self.jack_tokenizer.key_word(
        ) in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.symbol_table.start_subroutine()
            constructor = True if self.jack_tokenizer.key_word(
            ) == 'constructor' else False

            method = False
            if self.jack_tokenizer.key_word() == 'method':
                method = True
                self.symbol_table.define('this',
                                         self.symbol_table.get_class_name(),
                                         'ARG')

            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(), 'NONE',
                                     'SUBROUTINE')
            name = self.symbol_table.get_class_name(
            ) + '.' + self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_parameter_list()
            self.jack_tokenizer.advance()

            self.jack_tokenizer.advance()
            var_num = 0
            while self.jack_tokenizer.key_word() == 'var':
                var_num += self.compile_var_dec()
            self.vm_writer.write_function(name, var_num)
            if method:
                self.vm_writer.write_push('ARG', 0)
                self.vm_writer.write_pop('POINTER', 0)
            elif constructor:
                field_count = self.symbol_table.var_count('FIELD')
                self.vm_writer.write_push('CONST', field_count)
                self.vm_writer.write_call('Memory.alloc', 1)
                self.vm_writer.write_pop('POINTER', 0)
            self.compile_statements()
            self.jack_tokenizer.advance()

    def compile_parameter_list(self):
        if self.jack_tokenizer.symbol() != ')':
            parameter_type = self.get_type()
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     parameter_type, 'ARG')
            self.jack_tokenizer.advance()
            while self.jack_tokenizer.symbol() == ",":
                self.jack_tokenizer.advance()
                parameter_type = self.get_type()
                self.jack_tokenizer.advance()
                self.symbol_table.define(self.jack_tokenizer.identifier(),
                                         parameter_type, 'ARG')
                self.jack_tokenizer.advance()

    def get_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            parameter_type = self.jack_tokenizer.key_word()
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            parameter_type = self.jack_tokenizer.identifier()
        return parameter_type

    def compile_var_dec(self):
        var_num = 1
        self.jack_tokenizer.advance()
        var_type = self.get_type()
        self.jack_tokenizer.advance()
        self.symbol_table.define(self.jack_tokenizer.identifier(), var_type,
                                 'VAR')
        self.jack_tokenizer.advance()

        while self.jack_tokenizer.symbol() == ",":
            var_num += 1
            self.jack_tokenizer.advance()
            self.symbol_table.define(self.jack_tokenizer.identifier(),
                                     var_type, 'VAR')
            self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        return var_num

    def compile_statements(self):
        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

    def compile_do(self):
        self.jack_tokenizer.advance()

        name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        self.compile_subroutine_call(name)

        # must dispose of void function return value
        self.vm_writer.write_pop('TEMP', 0)
        self.jack_tokenizer.advance()

    def compile_subroutine_call(self, prefix_call=''):
        if self.jack_tokenizer.symbol() == '(':
            subroutine = False
            # If not in symbol table - then subroutine
            if not self.symbol_table.kind_of(
                    prefix_call) or self.symbol_table.kind_of(
                        prefix_call) == 'SUBROUTINE':
                subroutine = True
            self.jack_tokenizer.advance()

            args_count = 0
            if subroutine:
                self.vm_writer.write_push('POINTER', 0)
                args_count += 1
            args_count += self.compile_expression_list()

            if subroutine:
                self.vm_writer.write_call(
                    self.symbol_table.get_class_name() + '.' + prefix_call,
                    args_count)
            else:
                self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()
        elif self.jack_tokenizer.symbol() == '.':
            variable = False
            self.jack_tokenizer.advance()
            if self.symbol_table.kind_of(prefix_call) in ['VAR', 'FIELD']:
                variable = True
                variable_name = prefix_call
                prefix_call = self.symbol_table.type_of(prefix_call)
            prefix_call += '.{0}'.format(self.jack_tokenizer.identifier())
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()

            args_count = 0
            if variable:
                self.vm_writer.write_push(
                    self.symbol_table.kind_of(variable_name),
                    self.symbol_table.index_of(variable_name))
                args_count += 1
            args_count += self.compile_expression_list()

            self.vm_writer.write_call(prefix_call, args_count)
            self.jack_tokenizer.advance()

    def compile_let(self):
        self.jack_tokenizer.advance()
        var_name = self.jack_tokenizer.identifier()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() == '[':
            self.vm_writer.write_push(self.symbol_table.kind_of(var_name),
                                      self.symbol_table.index_of(var_name))
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_arithmetic("add")
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop('TEMP', 0)
            self.vm_writer.write_pop('POINTER', 1)
            self.vm_writer.write_push('TEMP', 0)
            self.vm_writer.write_pop('THAT', 0)
        else:
            self.jack_tokenizer.advance()
            self.compile_expression()
            self.vm_writer.write_pop(self.symbol_table.kind_of(var_name),
                                     self.symbol_table.index_of(var_name))

        self.jack_tokenizer.advance()

    def compile_while(self):
        while_idx = self.vm_writer.get_next_label_index('while')
        if_label = 'WHILE_IF_{0}'.format(while_idx)
        end_label = 'WHILE_END_{0}'.format(while_idx)

        self.vm_writer.write_label(if_label)
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.vm_writer.write_if(end_label)
        self.compile_statements()
        self.vm_writer.write_goto(if_label)
        self.jack_tokenizer.advance()
        self.vm_writer.write_label(end_label)

    def compile_return(self):
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        else:
            self.vm_writer.write_push('CONST', 0)
        self.vm_writer.write_return()
        self.jack_tokenizer.advance()

    def compile_if(self):
        if_idx = self.vm_writer.get_next_label_index('if')
        else_label = 'IF_ELSE_{0}'.format(if_idx)
        end_label = 'IF_END_{0}'.format(if_idx)

        self.jack_tokenizer.advance()
        self.jack_tokenizer.advance()
        self.compile_expression()
        self.vm_writer.write_arithmetic('not')
        self.jack_tokenizer.advance()

        self.jack_tokenizer.advance()
        self.vm_writer.write_if(else_label)
        self.compile_statements()
        self.jack_tokenizer.advance()
        self.vm_writer.write_goto(end_label)

        self.vm_writer.write_label(else_label)
        if self.jack_tokenizer.key_word() == 'else':
            self.jack_tokenizer.advance()
            self.jack_tokenizer.advance()
            self.compile_statements()
            self.jack_tokenizer.advance()

        self.vm_writer.write_label(end_label)

    def compile_expression(self):
        self.compile_term()

        while self.jack_tokenizer.symbol(
        ) in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.advance()

            self.compile_term()

            if symbol in self.BINARY_OPERATORS_TO_COMMAND:
                self.vm_writer.write_arithmetic(
                    self.BINARY_OPERATORS_TO_COMMAND[symbol])
            elif symbol == '*':
                self.vm_writer.write_call('Math.multiply', 2)
            elif symbol == '/':
                self.vm_writer.write_call('Math.divide', 2)

    def compile_term(self):
        token_type = self.jack_tokenizer.token_type()

        if token_type == 'IDENTIFIER':
            name = self.jack_tokenizer.identifier()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol(
            ) == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call(name)
            elif self.jack_tokenizer.symbol() == '[':
                self.vm_writer.write_push(self.symbol_table.kind_of(name),
                                          self.symbol_table.index_of(name))
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
                self.vm_writer.write_arithmetic('add')
                self.vm_writer.write_pop('POINTER', 1)
                self.vm_writer.write_push('THAT', 0)
            else:
                kind = self.symbol_table.kind_of(name)
                idx = self.symbol_table.index_of(name)
                self.vm_writer.write_push(kind, idx)
        elif token_type == 'STRING_CONST':
            string_const = self.jack_tokenizer.string_val()

            self.vm_writer.write_push("CONST", len(string_const))
            self.vm_writer.write_call("String.new", 1)
            for char in string_const:
                self.vm_writer.write_push('CONST', ord(char))
                self.vm_writer.write_call("String.appendChar", 2)
            self.jack_tokenizer.advance()
        elif token_type == 'KEYWORD':
            keyword = self.jack_tokenizer.key_word()
            if keyword == 'true':
                self.vm_writer.write_push('CONST', 1)
                self.vm_writer.write_arithmetic('neg')
            elif keyword == 'false' or keyword == 'null':
                self.vm_writer.write_push('CONST', 0)
            elif keyword == 'this':
                self.vm_writer.write_push('POINTER', 0)
            self.jack_tokenizer.advance()
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.jack_tokenizer.advance()
                self.compile_expression()
                self.jack_tokenizer.advance()
            elif self.jack_tokenizer.symbol(
            ) in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                command = CompilationEngine.UNARY_OPERATORS_TO_COMMAND[
                    self.jack_tokenizer.symbol()]
                self.jack_tokenizer.advance()
                self.compile_term()
                self.vm_writer.write_arithmetic(command)
        elif token_type == 'INT_CONST':
            self.vm_writer.write_push('CONST', self.jack_tokenizer.int_val())
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        expression_count = 0
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            expression_count += 1
            while self.jack_tokenizer.symbol() == ',':
                self.jack_tokenizer.advance()
                self.compile_expression()
                expression_count += 1
        return expression_count
Пример #3
0
class CompilationEngine:
    """NOTE remember that "is_xxx()" checks on the next token,
    and load the next token to curr_token before starting sub-methods
    using "load_next_token()" and you can use values with it
    """
    def __init__(self, jack_file):
        self.vm_writer = VMWriter(jack_file)
        self.tokenizer = JackTokenizer(jack_file)
        self.symbol_table = SymbolTable()

        self.if_index = -1
        self.while_index = -1

    # 'class' className '{' classVarDec* subroutineDec* '}'
    def compile_class(self):
        #! Beginning of all
        # * save name of the class and move on
        self.load_next_token()  # 'class'
        self.class_name = self.load_next_token()  # className
        self.load_next_token()  # curr_token = '{'

        # while next token == 'static' | 'field',
        while self.is_class_var_dec():  # check next token
            self.compile_class_var_dec()  # classVarDec*
        # while next_token == constructor | function | method
        while self.is_subroutine_dec():
            self.compile_subroutine()  # subroutineDec*
        self.vm_writer.close()

    # ('static' | 'field' ) type varName (',' varName)* ';'
    def compile_class_var_dec(self):
        kind = self.load_next_token()  # curr_token = static | field
        type = self.load_next_token()  # curr_token = type
        name = self.load_next_token()  # curr_token = varName
        self.symbol_table.define(name, type, kind.upper())
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, kind.upper())
        self.load_next_token()  # ';'
        # next_token = 'constructor' | 'function' | 'method'

    # subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
    # subroutineBody: '{' varDec* statements '}'
    def compile_subroutine(self):
        subroutine_kind = (self.load_next_token()
                           )  # ('constructor' | 'function' | 'method')
        self.load_next_token()  # ('void' | type)
        subroutine_name = self.load_next_token()  # subroutineName

        self.symbol_table.start_subroutine()  # init subroutine table
        if subroutine_kind == "method":
            self.symbol_table.define("instance", self.class_name, "ARG")

        self.load_next_token()  # curr_token '('
        self.compile_parameter_list()  # parameterList
        # next_token == ')' when escaped
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        while self.check_next_token() == "var":
            self.compile_var_dec()  # varDec*
        # NOTE next_token is neither 'var' or ';'
        # NOTE next_token is statements* (zero or more)

        # ANCHOR actual writing
        func_name = f"{self.class_name}.{subroutine_name}"  # Main.main
        num_locals = self.symbol_table.counts["VAR"]  # get 'var' count
        self.vm_writer.write_function(func_name, num_locals)
        if subroutine_kind == "constructor":
            num_fields = self.symbol_table.counts["FIELD"]
            self.vm_writer.write_push("CONST", num_fields)
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop("POINTER", 0)
        elif subroutine_kind == "method":
            self.vm_writer.write_push("ARG", 0)
            self.vm_writer.write_pop("POINTER", 0)

        # NOTE statement starts here
        self.compile_statements()  # statements
        self.load_next_token()  # '}

        # ( (type varName) (',' type varName)*)?

    def compile_parameter_list(self):
        # curr_token == '('
        if self.check_next_token() != ")":
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        while self.check_next_token() != ")":
            self.load_next_token()  # ','
            type = self.load_next_token()  # type
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "ARG")
        # NOTE param compilation finishes when next_token == ')'

        # 'var' type varName (',' varName)* ';'

    def compile_var_dec(self):
        self.load_next_token()  # 'var'
        type = self.load_next_token()  # type
        name = self.load_next_token()  #  # varName
        self.symbol_table.define(name, type, "VAR")
        while self.check_next_token() != ";":  # (',' varName)*
            self.load_next_token()  # ','
            name = self.load_next_token()  # varName
            self.symbol_table.define(name, type, "VAR")
        self.load_next_token()  # ';'

    # statement*
    # letStatement | ifStatement | whileStatement | doStatement | returnStatement
    def compile_statements(self):
        # if next_token == let | if | while | do | return
        while self.is_statement():
            statement = (self.load_next_token()
                         )  # curr_token == let | if | while | do | return
            if statement == "let":
                self.compile_let()
            elif statement == "if":
                self.compile_if()
            elif statement == "while":
                self.compile_while()
            elif statement == "do":
                self.compile_do()
            elif statement == "return":
                self.compile_return()

        # 'let' varName ('[' expression ']')? '=' expression ';'

    def compile_let(self):
        var_name = self.load_next_token()  # curr_token == varName
        var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
        var_index = self.symbol_table.index_of(var_name)
        # if next_token == "["
        if self.is_array():  # array assignment
            self.load_next_token()  # curr_token == '['
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ']'
            self.vm_writer.write_push(var_kind, var_index)
            self.vm_writer.write_arithmetic("ADD")

            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # curr_token == ';'
            #! POP TEMP and PUSH TEMP location changed
            self.vm_writer.write_pop("TEMP", 0)
            self.vm_writer.write_pop("POINTER", 1)
            self.vm_writer.write_push("TEMP", 0)
            self.vm_writer.write_pop("THAT", 0)
        else:  # regular assignment
            self.load_next_token()  # curr_token == '='
            self.compile_expression()  # expression
            self.load_next_token()  # ';'
            self.vm_writer.write_pop(var_kind, var_index)

    # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
    def compile_if(self):
        # curr_token == if
        self.if_index += 1
        if_index = self.if_index
        # TODO IF indexes count separately
        self.load_next_token()  # curr_token == '('
        self.compile_expression()  # expression
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        # S = statement, L = label
        self.vm_writer.write_if(f"IF_TRUE{if_index}")  #! if-goto L1
        self.vm_writer.write_goto(f"IF_FALSE{if_index}")  #! goto L2
        self.vm_writer.write_label(f"IF_TRUE{if_index}")  #! label L1
        self.compile_statements()  # statements #! executing S1
        self.vm_writer.write_goto(f"IF_END{if_index}")  #! goto END
        self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_FALSE{if_index}")  #! label L2
        if self.check_next_token() == "else":  # ( 'else' '{' statements '}' )?
            self.load_next_token()  # 'else'
            self.load_next_token()  # '{'
            self.compile_statements()  # statements #! executing S2
            self.load_next_token()  # '}'
        self.vm_writer.write_label(f"IF_END{if_index}")

    # 'while' '(' expression ')' '{' statements '}'
    def compile_while(self):
        # curr_token == while
        self.while_index += 1
        while_index = self.while_index
        self.vm_writer.write_label(f"WHILE{while_index}")
        self.load_next_token()  # '('
        self.compile_expression()  # expression
        self.vm_writer.write_arithmetic("NOT")  # eval false condition first
        self.load_next_token()  # ')'
        self.load_next_token()  # '{'
        self.vm_writer.write_if(f"WHILE_END{while_index}")
        self.compile_statements()  # statements
        self.vm_writer.write_goto(f"WHILE{while_index}")
        self.vm_writer.write_label(f"WHILE_END{while_index}")
        self.load_next_token()  # '}'

        # 'do' subroutineCall ';'

    def compile_do(self):
        # curr_token == do
        self.load_next_token()  #! to sync with compile_term()
        self.compile_subroutine_call()
        self.vm_writer.write_pop("TEMP", 0)
        self.load_next_token()  # ';'

        # 'return' expression? ';'

    def compile_return(self):
        # curr_token == return
        if self.check_next_token() != ";":
            self.compile_expression()
        else:
            self.vm_writer.write_push("CONST", 0)
        self.vm_writer.write_return()
        self.load_next_token()  # ';'

    # term (op term)*
    def compile_expression(self):
        self.compile_term()  # term
        while self.is_op():  # (op term)*
            op: str = self.load_next_token()  # op
            self.compile_term()  # term
            if op in ARITHMETIC.keys():
                self.vm_writer.write_arithmetic(ARITHMETIC[op])
            elif op == "*":
                self.vm_writer.write_call("Math.multiply", 2)
            elif op == "/":
                self.vm_writer.write_call("Math.divide", 2)

    # integerConstant | stringConstant | keywordConstant | varName |
    # varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
    def compile_term(self):
        # if next_token == '~' | '-'
        if self.is_unary_op_term():
            unary_op = self.load_next_token()  # curr_token == '~' | '-'
            self.compile_term()  # term (recursive)
            self.vm_writer.write_arithmetic(ARITHMETIC_UNARY[unary_op])
        # if next_token == '(' => '(' expression ')'
        elif self.check_next_token() == "(":
            self.load_next_token()  # '('
            self.compile_expression()  # expression
            self.load_next_token()  # ')'
        # if next_token == INTEGER(const)
        elif self.check_next_type() == "INT_CONST":  # integerConstant
            self.vm_writer.write_push("CONST", self.load_next_token())  # )
        # if next_token == STRING(const)
        elif self.check_next_type() == "STRING_CONST":  # stringConstant
            self.compile_string()
        # if next_token == KEYWORD(const)
        elif self.check_next_type() == "KEYWORD":  # keywordConstant
            self.compile_keyword()
        # varName | varName '[' expression ']' | subroutineCall
        else:
            #! (varName | varName for expression | subroutine)'s base
            var_name = self.load_next_token(
            )  # curr_token = varName | subroutineCall
            # (e.g. Screen.setColor | show() )
            #! next_token == '[' | '(' or '.' | just varName
            # varName '[' expression ']'
            if self.is_array():  # if next_token == '['
                self.load_next_token()  # '['
                self.compile_expression()  # expression
                self.load_next_token()  # ']'
                array_kind = self.symbol_table.kind_of(var_name)
                array_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(CONVERT_KIND[array_kind],
                                          array_index)
                self.vm_writer.write_arithmetic("ADD")
                self.vm_writer.write_pop("POINTER", 1)
                self.vm_writer.write_push("THAT", 0)
            # if next_token == "(" | "." => curr_token == subroutineCall

            #! if varName is not found, assume class or function name
            elif self.is_subroutine_call():
                # NOTE curr_token == subroutineName | className | varName
                self.compile_subroutine_call()
            # varName
            else:
                # curr_token == varName
                # FIXME cannot catch subroutine call and pass it to 'else' below
                # TODO error caught on Math.abs() part on Ball.vm
                var_kind = CONVERT_KIND[self.symbol_table.kind_of(var_name)]
                var_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(var_kind, var_index)

    # subroutineCall: subroutineName '(' expressionList ')' |
    # ( className | varName) '.' subroutineName '(' expressionList ')'

    # e.g.) (do) game.run()
    # ! in case of 'do' order is different from 'let game = Class.new()'
    def compile_subroutine_call(self):
        # NOTE curr_token == subroutineName | className | varName
        subroutine_caller = self.get_curr_token()
        function_name = subroutine_caller
        # _next_token()  # FIXME now it loads '.' or '('
        # func_name = identifier
        number_args = 0
        #! '.' or '(' 2 cases
        if self.check_next_token() == ".":
            self.load_next_token()  # curr_token == '.'
            subroutine_name = self.load_next_token(
            )  # curr_token == subroutineName
            type = self.symbol_table.type_of(subroutine_caller)
            if type != "NONE":  # it's an instance
                kind = self.symbol_table.kind_of(subroutine_caller)
                index = self.symbol_table.index_of(subroutine_caller)
                self.vm_writer.write_push(CONVERT_KIND[kind], index)
                function_name = f"{type}.{subroutine_name}"
                number_args += 1
            else:  # it's a class
                class_name = subroutine_caller
                function_name = f"{class_name}.{subroutine_name}"
        elif self.check_next_token() == "(":
            subroutine_name = subroutine_caller
            function_name = f"{self.class_name}.{subroutine_name}"
            number_args += 1
            self.vm_writer.write_push("POINTER", 0)
        self.load_next_token()  # '('
        number_args += self.compile_expression_list()  # expressionList
        self.load_next_token()  # ')'
        self.vm_writer.write_call(function_name, number_args)

    # (expression (',' expression)* )?
    def compile_expression_list(self):
        number_args = 0
        if self.check_next_token() != ")":
            number_args += 1
            self.compile_expression()
        while self.check_next_token() != ")":
            number_args += 1
            self.load_next_token()  # curr_token == ','
            self.compile_expression()
        return number_args

    def compile_string(self):
        string = self.load_next_token()  # curr_token == stringConstant
        self.vm_writer.write_push("CONST", len(string))
        self.vm_writer.write_call("String.new", 1)
        for char in string:
            self.vm_writer.write_push("CONST", ord(char))
            self.vm_writer.write_call("String.appendChar", 2)

    def compile_keyword(self):
        keyword = self.load_next_token()  # curr_token == keywordConstant
        if keyword == "this":
            self.vm_writer.write_push("POINTER", 0)
        else:
            self.vm_writer.write_push("CONST", 0)
            if keyword == "true":
                self.vm_writer.write_arithmetic("NOT")

    def is_subroutine_call(self):
        return self.check_next_token() in [".", "("]

    def is_array(self):
        return self.check_next_token() == "["

    def is_class_var_dec(self):
        return self.check_next_token() in ["static", "field"]

    def is_subroutine_dec(self):
        return self.check_next_token() in ["constructor", "function", "method"]

    def is_statement(self):
        return self.check_next_token() in [
            "let", "if", "while", "do", "return"
        ]

    def is_op(self):
        return self.check_next_token() in [
            "+", "-", "*", "/", "&", "|", "<", ">", "="
        ]

    def is_unary_op_term(self):
        return self.check_next_token() in ["~", "-"]

    def check_next_token(self):
        return self.tokenizer.next_token[1]

    def check_next_type(self):
        return self.tokenizer.next_token[0]

    def get_curr_token(self):
        return self.tokenizer.curr_token[1]

    def load_next_token(self):
        if self.tokenizer.has_more_tokens():
            self.tokenizer.advance()  # curr_token = next_token
            return self.tokenizer.curr_token[1]
        else:
            return ""
class CompilationEngine:
    CLASS_VAR_DEC_KEYWORDS = ['static', 'field']
    SUBROUTINE_DEC_KEYWORDS = ['constructor', 'function', 'method']
    BINARY_OPERATOR_SYMBOLS = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
    UNARY_OPERATOR_SYMBOLS = ['-', '~']
    TYPE_TO_TAG = {'STRING_CONST': 'stringConstant', 'INT_CONST': 'integerConstant', 'KEYWORD': 'keyword',
                   'IDENTIFIER': 'identifier', 'SYMBOL': 'symbol'}

    SYMBOLS_TO_XML_CONVENTION = {'<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;'}

    def __init__(self, input_file_path, output_file_path):
        self.output_file = open(output_file_path, 'w')
        self.jack_tokenizer = JackTokenizer(input_file_path)
        if self.jack_tokenizer.has_more_tokens():
            self.compile_class()

    def compile_class(self):
        self.output_file.write('<class>\n')  # get first token
        self.jack_tokenizer.advance()
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())

        if self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.compile_class_var_dec()
        if self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.compile_subroutine()

        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</class>')
        self.output_file.close()

    def write_token(self, token_name):
        type_tag = CompilationEngine.TYPE_TO_TAG[self.jack_tokenizer.token_type()]
        self.output_file.write('<{0}> {1} </{0}>\n'.format(
            type_tag, token_name))
        self.jack_tokenizer.advance()

    def compile_class_var_dec(self):
        while self.jack_tokenizer.key_word() in CompilationEngine.CLASS_VAR_DEC_KEYWORDS:
            self.output_file.write('<classVarDec>\n')

            self.write_token(self.jack_tokenizer.key_word())
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())

            while self.jack_tokenizer.symbol() != ';':
                self.write_token(self.jack_tokenizer.symbol())
                self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())

            self.output_file.write('</classVarDec>\n')

    def write_type(self):
        if self.jack_tokenizer.token_type() == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif self.jack_tokenizer.token_type() == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())

    def compile_subroutine(self):
        while self.jack_tokenizer.key_word() in CompilationEngine.SUBROUTINE_DEC_KEYWORDS:
            self.output_file.write('<subroutineDec>\n')
            self.write_token(self.jack_tokenizer.key_word())
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_parameter_list()
            self.write_token(self.jack_tokenizer.symbol())


            self.output_file.write('<subroutineBody>\n')
            self.write_token(self.jack_tokenizer.symbol())
            while self.jack_tokenizer.key_word() == 'var':
                self.compile_var_dec()
            self.compile_statements()
            self.write_token(self.jack_tokenizer.symbol())
            self.output_file.write('</subroutineBody>\n')

            self.output_file.write('</subroutineDec>\n')

    def compile_parameter_list(self):
        self.output_file.write('<parameterList>\n')
        if self.jack_tokenizer.symbol() != ')':
            self.write_type()
            self.write_token(self.jack_tokenizer.identifier())
            while self.jack_tokenizer.symbol() == ",":
                self.write_token(self.jack_tokenizer.symbol())
                self.write_type()
                self.write_token(self.jack_tokenizer.identifier())
        self.output_file.write('</parameterList>\n')


    def compile_var_dec(self):
        self.output_file.write('<varDec>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_type()
        self.write_token(self.jack_tokenizer.identifier())
        while self.jack_tokenizer.symbol() == ",":
            self.write_token(self.jack_tokenizer.symbol())
            self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</varDec>\n')


    def compile_statements(self):
        self.output_file.write('<statements>\n')

        while self.jack_tokenizer.token_type() == 'KEYWORD':
            if self.jack_tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.jack_tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.jack_tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.jack_tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.jack_tokenizer.key_word() == 'return':
                self.compile_return()

        self.output_file.write('</statements>\n')

    def compile_do(self):
        self.output_file.write('<doStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())

        self.write_token(self.jack_tokenizer.identifier())
        self.compile_subroutine_call()

        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</doStatement>\n')

    def compile_subroutine_call(self):
        if self.jack_tokenizer.symbol() == '(':
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.write_token(self.jack_tokenizer.symbol())
        elif self.jack_tokenizer.symbol() == '.':
            self.write_token(self.jack_tokenizer.symbol())
            self.write_token(self.jack_tokenizer.identifier())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.write_token(self.jack_tokenizer.symbol())

    def compile_let(self):
        self.output_file.write('<letStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.identifier())
        if self.jack_tokenizer.symbol() == '[':
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_expression()
            self.write_token(self.jack_tokenizer.symbol())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</letStatement>\n')

    def compile_while(self):
        self.output_file.write('<whileStatement>\n')
        self.write_token(self.jack_tokenizer.identifier())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_statements()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</whileStatement>\n')


    def compile_return(self):
        self.output_file.write('<returnStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        if self.jack_tokenizer.symbol() != ';':
            self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())
        self.output_file.write('</returnStatement>\n')


    def compile_if(self):
        self.output_file.write('<ifStatement>\n')
        self.write_token(self.jack_tokenizer.key_word())
        self.write_token(self.jack_tokenizer.symbol())
        self.compile_expression()
        self.write_token(self.jack_tokenizer.symbol())

        self.write_token(self.jack_tokenizer.symbol())
        self.compile_statements()
        self.write_token(self.jack_tokenizer.symbol())
        if self.jack_tokenizer.key_word() == 'else':
            self.write_token(self.jack_tokenizer.key_word())
            self.write_token(self.jack_tokenizer.symbol())
            self.compile_statements()
            self.write_token(self.jack_tokenizer.symbol())

        self.output_file.write('</ifStatement>\n')


    def compile_expression(self):
        self.output_file.write('<expression>\n')
        self.compile_term()
        while self.jack_tokenizer.symbol() in CompilationEngine.BINARY_OPERATOR_SYMBOLS:
            symbol = self.jack_tokenizer.symbol()
            if symbol in CompilationEngine.SYMBOLS_TO_XML_CONVENTION:
                symbol = CompilationEngine.SYMBOLS_TO_XML_CONVENTION[symbol]
            self.write_token(symbol)
            self.compile_term()
        self.output_file.write('</expression>\n')


    def compile_term(self):
        self.output_file.write('<term>\n')
        token_type = self.jack_tokenizer.token_type()
        if token_type == 'IDENTIFIER':
            self.write_token(self.jack_tokenizer.identifier())
            if self.jack_tokenizer.symbol() == '[':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
                self.write_token(self.jack_tokenizer.symbol())
            elif self.jack_tokenizer.symbol() == '(' or self.jack_tokenizer.symbol() == '.':
                self.compile_subroutine_call()
        elif token_type == 'STRING_CONST':
            self.write_token(self.jack_tokenizer.string_val())
        elif token_type == 'INT_CONST':
            self.write_token(self.jack_tokenizer.int_val())
        elif token_type == 'KEYWORD':
            self.write_token(self.jack_tokenizer.key_word())
        elif token_type == 'INT_CONST':
            self.write_token(self.jack_tokenizer.int_val())
        elif token_type == 'SYMBOL':
            if self.jack_tokenizer.symbol() == '(':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
                self.write_token(self.jack_tokenizer.symbol())
            elif self.jack_tokenizer.symbol() in CompilationEngine.UNARY_OPERATOR_SYMBOLS:
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_term()
        self.output_file.write('</term>\n')

    def compile_expression_list(self):
        self.output_file.write('<expressionList>\n')
        if self.jack_tokenizer.symbol() != ')':
            self.compile_expression()
            while self.jack_tokenizer.symbol() == ',':
                self.write_token(self.jack_tokenizer.symbol())
                self.compile_expression()
        self.output_file.write('</expressionList>\n')