Exemplo n.º 1
0
class CompilationEngine:
    """
    The compilation engine compile the jack code given in the input file
    into an xml code saved in the out_file
    """
    def __init__(self, in_file, out_file):
        """
        A compilation engine constructor
        :param in_file: the file we are currently compiling
        :param out_file: the file where we save the output
        """
        self.tokenizer = JackTokenizer(in_file)
        self.out_file = open(out_file, 'w')
        self._indent_count = 0

    def compile_class(self):
        """
        compiles a class according to the grammar
        """
        self._write_outer_tag(CLASS_TAG)
        self.tokenizer.advance()
        if self.tokenizer.key_word() != CLASS_TAG:
            print(COMPILE_CLASS_ERROR)
            sys.exit()
        self._write_token(self.tokenizer.token_type())
        self._check_write_name()
        self._check_write_symbol("{")
        # there may be multiple variable declarations
        while self._check_if_var_dec():
            self.compile_class_var_dec()
        # there may be multiple subroutine declarations
        while self._check_subroutine_dec():
            self.compile_subroutine_dec()
        self._check_write_symbol("}")
        self._write_outer_tag(CLASS_TAG, IS_ENDING_TAG)

    def compile_class_var_dec(self):
        """
        compiles the class's variables declarations
        """
        self._write_outer_tag(CLASS_VAR_DEC_TAG)
        # we only come in the function if the current token is correct so we
        # can just write it
        self._write_token(self.tokenizer.token_type())
        self._check_write_type()
        self._check_write_name()
        while self._check_if_comma():  # there are more variables
            self._check_write_symbol(",")
            self._check_write_name()
        self._check_write_symbol(";")
        self._write_outer_tag(CLASS_VAR_DEC_TAG, IS_ENDING_TAG)

    def compile_subroutine_dec(self):
        """
        compiles the class's subroutine (methods and functions) declarations
        """
        self._write_outer_tag(SUBROUTINE_DEC_TAG)
        # we only come in the function if the current token is correct so we
        # can just write it
        self._write_token(self.tokenizer.token_type())
        # the function is either void or has a type
        if self.tokenizer.key_word() == 'void':
            self._write_token(self.tokenizer.token_type())
        else:
            self._check_write_type()
        self._check_write_name()
        self._check_write_symbol("(")
        self.compile_parameter_list()
        self._check_write_symbol(")")
        self.compile_subroutine_body()
        self._write_outer_tag(SUBROUTINE_DEC_TAG, IS_ENDING_TAG)

    def compile_parameter_list(self):
        """
        compiles the parameter list for the subroutines
        """
        self._write_outer_tag(PARAMETER_LIST_TAG)
        # if curr_token is ')' it means the param list is empty
        if self.tokenizer.symbol() != ')':
            self._check_write_type()
            self._check_write_name()
            while self._check_if_comma():  # there are more params
                self._check_write_symbol(",")
                self._check_write_type()
                self._check_write_name()
        self._write_outer_tag(PARAMETER_LIST_TAG, IS_ENDING_TAG)

    def compile_subroutine_body(self):
        """
        compiles the body of the subroutine
        """
        self._write_outer_tag(SUBROUTINE_BODY_TAG)
        self._check_write_symbol("{")
        # there may be multiple variable declarations at the beginning of
        # the subroutine
        while self.tokenizer.key_word() == 'var':
            self.compile_var_dec()
        self.compile_statements()
        self._check_write_symbol("}")
        self._write_outer_tag(SUBROUTINE_BODY_TAG, IS_ENDING_TAG)

    def compile_var_dec(self):
        """
        compiles the variable declarations
        """
        self._write_outer_tag(VAR_DEC_TAG)
        self._write_token(self.tokenizer.token_type())
        self._check_write_type()
        self._check_write_name()
        # there may be multiple variable names in the dec
        while self._check_if_comma():
            self._check_write_symbol(",")
            self._check_write_name()
        self._check_write_symbol(";")
        self._write_outer_tag(VAR_DEC_TAG, IS_ENDING_TAG)

    def compile_statements(self):
        """
        compiles the statements (0 or more statements)
        """
        self._write_outer_tag(STATEMENTS_TAG)
        while self._check_if_statement():
            if self.tokenizer.key_word() == 'let':
                self.compile_let()
            elif self.tokenizer.key_word() == 'if':
                self.compile_if()
            elif self.tokenizer.key_word() == 'while':
                self.compile_while()
            elif self.tokenizer.key_word() == 'do':
                self.compile_do()
            elif self.tokenizer.key_word() == 'return':
                self.compile_return()
        self._write_outer_tag(STATEMENTS_TAG, IS_ENDING_TAG)

    def compile_do(self):
        """
        compiles the do statement
        """
        self._write_outer_tag(DO_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        self.compile_subroutine_call()
        self._check_write_symbol(";")
        self._write_outer_tag(DO_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_let(self):
        """
        compiles the let statement
        """
        self._write_outer_tag(LET_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        self._check_write_name()
        if self.tokenizer.symbol() == '[':  # if there is an array
            self._check_write_symbol("[")
            self.compile_expression()
            self._check_write_symbol("]")
        self._check_write_symbol("=")
        self.compile_expression()
        self._check_write_symbol(";")
        self._write_outer_tag(LET_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_if(self):
        """
        compiles the if statements
        """
        self._write_outer_tag(IF_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        self._check_write_symbol("(")
        self.compile_expression()
        self._check_write_symbol(")")
        self._check_write_symbol("{")
        self.compile_statements()
        self._check_write_symbol("}")
        # there can also be an if else scenario
        if self.tokenizer.key_word() == 'else':
            self._write_token(self.tokenizer.token_type())
            self._check_write_symbol("{")
            self.compile_statements()
            self._check_write_symbol("}")
        self._write_outer_tag(IF_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_while(self):
        """
        compiles the while statements
        """
        self._write_outer_tag("whileStatement")
        self._write_token(self.tokenizer.token_type())
        self._check_write_symbol("(")
        self.compile_expression()
        self._check_write_symbol(")")
        self._check_write_symbol("{")
        self.compile_statements()
        self._check_write_symbol("}")
        self._write_outer_tag("whileStatement", IS_ENDING_TAG)

    def compile_return(self):
        """
        compiles the return statements
        """
        self._write_outer_tag(RETURN_STATEMENT_TAG)
        self._write_token(self.tokenizer.token_type())
        # if cur token is ; we return nothing, otherwise we return something
        if not self.tokenizer.symbol() == ';':
            self.compile_expression()
        self._check_write_symbol(";")
        self._write_outer_tag(RETURN_STATEMENT_TAG, IS_ENDING_TAG)

    def compile_subroutine_call(self):
        """
        compiles the subroutine calls ( when we actually call a subroutine
        as  opposed to declaring it)
        """
        self._check_write_name()
        # there may be a '.' if it is a foo.bar() scenario (or Foo.bar())
        if self.tokenizer.symbol() == ".":
            self._check_write_symbol(".")
            self._check_write_name()
        self._check_write_symbol("(")
        self.compile_expression_list()
        self._check_write_symbol(")")

    def compile_expression(self):
        """
        compiles expressions which are terms and possibly operators and more
        terms
        """
        self._write_outer_tag(EXPRESSION_TAG)
        self.compile_term()
        # there may be a few operators in one expression
        while self.tokenizer.symbol() in OPERATIONS:
            self._write_op()
            self.compile_term()
        self._write_outer_tag(EXPRESSION_TAG, IS_ENDING_TAG)

    def compile_term(self):
        """
        compiles terms according to the grammar
        """
        self._write_outer_tag(TERM_TAG)
        cur_type = self.tokenizer.token_type()
        # either a string/int constant
        if self.tokenizer.token_type() in ["INT_CONST", "STRING_CONST"]:
            self._write_token(cur_type)
        # or a constant keyword (true, false, null, this)
        elif self.tokenizer.key_word() in KEYWORD_CONST:
            self._write_token(cur_type)
        # or an expression within brown brackets
        elif self.tokenizer.symbol() == '(':
            self._write_token(cur_type)
            self.compile_expression()
            self._check_write_symbol(")")
        # or a unary op and then a term
        elif self.tokenizer.symbol() in UNARY_OPS:
            self._write_op()
            self.compile_term()
        # or it is an identifier which could be:
        elif self.tokenizer.identifier():
            self._compile_term_identifier()
        else:
            print(COMPILE_TERM_ERROR)
            sys.exit()
        self._write_outer_tag(TERM_TAG, IS_ENDING_TAG)

    def _compile_term_identifier(self):
        """
         compiles terms in case of identifier token
        """
        # an array
        if self.tokenizer.get_next_token() == '[':
            self._check_write_name()
            self._check_write_symbol("[")
            self.compile_expression()
            self._check_write_symbol("]")
        # or a subroutine call
        elif self.tokenizer.get_next_token() in [".", "("]:
            self.compile_subroutine_call()
        else:
            self._check_write_name()  # or just a variable name

    def compile_expression_list(self):
        """
        compiles the expression lists
        """
        self._write_outer_tag(EXPRESSION_LIST_TAG)
        # if it is ')' then the expression list is empty
        if self.tokenizer.symbol() != ')':
            self.compile_expression()
            while self._check_if_comma():  # while there are more expressions
                self._write_token(self.tokenizer.token_type())
                self.compile_expression()
        self._write_outer_tag(EXPRESSION_LIST_TAG, IS_ENDING_TAG)

    def _check_if_var_dec(self):
        """
        check if we are currently compiling a variable declaration
        :return: true iff the current token is either 'static' or 'field'
        """
        return self.tokenizer.key_word() in CLASS_VAR_KEYWORDS

    def _check_subroutine_dec(self):
        """
        checks if we are currently compiling a subroutine declaration
        :return: true iff the current token is either 'constructor' or
        'function' or 'method'
        """
        return self.tokenizer.key_word() in SUBROUTINE

    def _check_if_comma(self):
        """
        checks if current token is a comma
        :return: true iff the current token is a ','
        """
        return self.tokenizer.symbol() == ','

    def _check_if_statement(self):
        """
        checks if we are currently compiling a statement
        :return: true iff the current token
        is in ['let', 'if', 'while', 'do', 'return']
        """
        return self.tokenizer.key_word() in STATEMENTS

    def _check_write_type(self):
        """
        checks if the current token is a valid type and if so, it writes it
        to  the output file
        """
        if self.tokenizer.key_word() in TYPE_KEYWORDS:
            self._write_token(self.tokenizer.token_type())
        else:
            self._check_write_name()

    def _check_write_symbol(self, expected_symbol):
        """
        checks if the current token is the expected symbol, if so it write
        it to the output file
        :param expected_symbol: the symbol we are validating is the current
        token
        :return: prints illegal statement error if it is not the expected
        symbol and exits the program
        """
        if self.tokenizer.symbol() != expected_symbol:
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()
        self._write_token(self.tokenizer.token_type())

    def _check_write_name(self):
        """
        checks the current token is a name (identifier), and if so, write
        it to the output file
        :return: prints illegal statement error if it is not a name and
        exits the program
        """
        if self.tokenizer.identifier():
            self._write_token("IDENTIFIER")
        else:
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()

    def _write_outer_tag(self, tag_str, end=False):
        """
        writes the outer tags of the different sections we are compiling
        :param tag_str: the string of the current section we are compiling
        :param end: true iff it is an end tag
        """
        if end:  # we decrease the indent count before the closing tag
            self._indent_count -= 1
            self.out_file.write("\t" * self._indent_count)
            self.out_file.write("</" + tag_str + ">\n")
        else:  # we increase the indent count after the opening tag
            self.out_file.write("\t" * self._indent_count)
            self.out_file.write("<" + tag_str + ">\n")
            self._indent_count += 1

    def _write_op(self):
        """
        writes an op symbol to the out file
        """
        self.out_file.write("\t" * self._indent_count)
        self.out_file.write("<symbol> ")
        if self.tokenizer.symbol() == '<':
            self.out_file.write("&lt;")
        elif self.tokenizer.symbol() == '>':
            self.out_file.write("&gt;")
        elif self.tokenizer.symbol() == '&':
            self.out_file.write("&amp;")
        elif self.tokenizer.symbol() == '\"':
            self.out_file.write("&quot;")
        else:
            self.out_file.write(self.tokenizer.symbol())
        self.out_file.write(" </symbol>\n")
        self.tokenizer.advance()

    def _write_token(self, cur_type):
        """
        writes the current token to the output file
        :param cur_type: the type of the current token
        """
        self.out_file.write("\t" * self._indent_count)
        self.out_file.write("<" + TOKEN_TYPE_STR[cur_type] + "> ")
        self.out_file.write(str(self.tokenizer.get_token_str()))
        self.out_file.write(" </" + TOKEN_TYPE_STR[cur_type] + ">\n")
        self.tokenizer.advance()
Exemplo n.º 2
0
class CompilationEngine:
    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.symbol_table = SymbolTable()
        self.vm_writer = VMWriter(output_file)
        self.current_sub_name = None
        self.class_name = None
        self.func_counter = 0
        self.while_counter = 0
        self.if_counter = 0

        # starts the process
        self.tokenizer.advance()
        self.compile_class()
        self.vm_writer.close()

    def compile_class(self):
        """
        compiles the class function
        :return: none
        """
        # advances a single step to get the class name
        self.tokenizer.advance()
        # set class's name
        self.class_name = self.tokenizer.current_token
        # moves to the symbol {
        self.tokenizer.advance()

        # move to the next symbol and check what it is
        self.tokenizer.advance()

        # compiles class variable
        while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \
                KEY_WORDS.get(self.tokenizer.current_token) == FIELD:
            self.compile_class_var_dec()
        # compiles subroutine
        while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \
                KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \
                KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION:
            self.compile_sub_routine()
        # we are now at the <symbol> } <symbol> which closes the class

    def compile_class_var_dec(self):
        """
        compiles a var dec
        :return: none
        """
        var_kind = self.tokenizer.key_word()
        # advances the token to the var's type
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            var_type = self.tokenizer.key_word()
        else:
            var_type = self.tokenizer.identifier()
        # advances the token to the var's identifier
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            var_name = self.tokenizer.key_word()
        else:
            var_name = self.tokenizer.identifier()

        # update symbol table
        self.symbol_table.define(var_name, var_type, var_kind)

        # advance to next token, and check if there are more var_names
        self.tokenizer.advance()
        while self.tokenizer.current_token != ";":
            # token is <symbol> , <symbol>
            # advance to var's identifier
            self.tokenizer.advance()
            var_name = self.tokenizer.current_token
            # update symbol table
            self.symbol_table.define(var_name, var_type, var_kind)
            self.tokenizer.advance()

        # the current token is <symbol> ; <symbol>, advance to next
        self.tokenizer.advance()

    def compile_sub_routine(self):
        """
        compiles a single sub routine
        :return: none
        """
        # start new subroutine symbol table
        self.symbol_table.start_subroutine()
        # get subroutine type (method/construction/function)
        sub_type = self.tokenizer.key_word()

        # advances the token to what the subroutine returns
        self.tokenizer.advance()
        # updates the return type
        if self.tokenizer.token_type() == KEY_WORD:
            return_type = self.tokenizer.key_word()
        else:
            return_type = self.tokenizer.identifier()

        # advances the token to <identifier> sub_name <identifier>
        self.tokenizer.advance()
        # update the subroutine name
        subroutine_name = self.tokenizer.identifier()
        self.current_sub_name = subroutine_name

        # advance to <symbol> ( <symbol>
        self.tokenizer.advance()
        # if subroutine is a method, add 'this' to the symbol table as argument 0
        if sub_type == METHOD:
            self.symbol_table.define("this", self.class_name, "ARG")
        # compiles the parameter list
        self.compile_parameter_list()
        # we are at <symbol> ) <symbol>
        # advance to subroutine body, and compile it
        self.tokenizer.advance()
        self.compile_subroutine_body(sub_type)

    def compile_subroutine_body(self, sub_type):
        """
        the method compiles the subroutine body
        :return: none
        """
        # we are at bracket {, advance
        self.tokenizer.advance()

        # compile var dec
        while KEY_WORDS.get(self.tokenizer.current_token) == VAR:
            self.compile_var_dec()

        # write function label
        self.vm_writer.write_function(
            self.class_name + '.' + self.current_sub_name,
            self.symbol_table.var_count("VAR"))

        # if is method, update THIS to the object
        if sub_type == METHOD:
            self.vm_writer.write_push(ARG, 0)
            self.vm_writer.write_pop("POINTER", 0)

        # if is constructor, allocate memory, and put in this
        if sub_type == CONSTRUCTOR:
            self.vm_writer.write_push("CONST",
                                      self.symbol_table.var_count("FIELD"))
            self.vm_writer.write_call("Memory.alloc", 1)
            self.vm_writer.write_pop("POINTER", 0)

        if self.tokenizer.current_token != "}":
            self.compile_statements()

        # we are at bracket }, advance
        self.tokenizer.advance()

    def compile_parameter_list(self):
        """
        compiles a parameter list
        :return: none
        """
        # advance to first parameter
        self.tokenizer.advance()
        # while there are more parameters
        while self.tokenizer.current_token != ')':
            # tests what to put as the type of the object
            if self.tokenizer.token_type() == KEY_WORD:
                var_type = self.tokenizer.key_word()
            else:
                var_type = self.tokenizer.identifier()

            # advance to variables name <identifier> var_name <identifier>
            self.tokenizer.advance()
            var_name = self.tokenizer.identifier()

            # define new variable
            self.symbol_table.define(var_name, var_type, "ARG")

            # gets the next token
            self.tokenizer.advance()

            # advance to next token if we are at ','
            if self.tokenizer.current_token == ",":
                self.tokenizer.advance()

    def compile_var_dec(self):
        """
        compiles a declaration of a variable
        :return: none
        """
        # we are at <keyword> var <keyword>
        # advance to variable type
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            var_type = self.tokenizer.key_word()
        else:
            var_type = self.tokenizer.identifier()

        # advance to the variables name
        self.tokenizer.advance()
        while self.tokenizer.current_token != ';':
            # we are at <identifier> var_name <identifier>
            var_name = self.tokenizer.identifier()
            # define variable in symbol table
            self.symbol_table.define(var_name, var_type, "VAR")
            # advance to next token
            self.tokenizer.advance()
            # tests what to put as the type of the object
            if self.tokenizer.current_token == ",":
                self.tokenizer.advance()
        # we are at <symbol> ; <symbol>
        # advance to next token
        self.tokenizer.advance()

    def compile_statements(self):
        """
        the method compiles statements
        :return: none
        """
        # while there are more statements, deal with each one
        while self.tokenizer.current_token != '}':
            statement_type = self.tokenizer.key_word()
            if statement_type == LET:
                self.compile_let()
            elif statement_type == IF:
                self.compile_if()
            elif statement_type == WHILE:
                self.compile_while()
            elif statement_type == DO:
                self.compile_do()
            elif statement_type == RETURN:
                self.compile_return()

    def compile_do(self):
        """
        the method compiles a do command
        :return: none
        """
        # we are at <keyword> do <keyword>
        # advance to next token <identifier> name_of_func <identifier>
        self.tokenizer.advance()
        func_name = self.tokenizer.identifier()
        self.tokenizer.advance()
        # compile the subroutine call
        self.compile_subroutine_call(func_name)
        # pop the result from the function into temp
        self.vm_writer.write_pop("TEMP", 0)
        # we are at <symbol> ; <symbol>, advance to next token
        self.tokenizer.advance()

    def compile_let(self):
        """
        the method compiles a let statement
        :return: none
        """
        # we are at <keyword> let <keyword>
        # advance to next token (var_name)
        self.tokenizer.advance()
        # we are at <identifier> var_name <identifier>
        var_name = self.tokenizer.identifier()
        # get variable data
        var_index = self.symbol_table.index_of(var_name)
        var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name))
        # advance to next token ('[' | '=')
        self.tokenizer.advance()
        is_array = False
        if self.tokenizer.current_token == '[':
            is_array = True
            # push arr
            self.vm_writer.write_push(var_kind, var_index)
            # advance to expression and compile it
            self.tokenizer.advance()
            self.compile_expression()
            # we are at <symbol> ] <symbol>, advance to next token
            self.tokenizer.advance()
            # add the index of array and the expression to get the correct location
            self.vm_writer.write_arithmetic("ADD")
        # we are at <symbol> = <symbol>
        # advance to expression and compile it
        self.tokenizer.advance()
        self.compile_expression()

        # if var is an array
        if is_array:
            self.vm_writer.write_pop("TEMP", 0)
            self.vm_writer.write_pop("POINTER", 1)
            self.vm_writer.write_push("TEMP", 0)
            self.vm_writer.write_pop("THAT", 0)
        # if var is not an array
        else:
            self.vm_writer.write_pop(var_kind, var_index)

        # we are at <symbol> ; <symbol>, advance to next
        self.tokenizer.advance()
        return

    def compile_while(self):
        """
        the method compiles a while statement
        :return: none
        """
        while_counter = str(self.while_counter)
        # update the while counter
        self.while_counter += 1
        # create new label for the start of the while
        self.vm_writer.write_label("While_" + while_counter)
        # we are at <keyword> while <keyword>, advance to next token
        self.tokenizer.advance()
        # we are at <symbol> ( <symbol>, advance to next token
        self.tokenizer.advance()
        self.compile_expression()
        # we are at <symbol> ) <symbol>, advance to next token
        self.tokenizer.advance()
        # negate expression
        self.vm_writer.write_arithmetic("NOT")
        # if condition is not met, go to the end of the while
        self.vm_writer.write_if("End_While_" + while_counter)
        # we are at <symbol> { <symbol>, advance to next token
        self.tokenizer.advance()
        # compile statements
        self.compile_statements()
        # go back to the start of the while
        self.vm_writer.write_goto("While_" + while_counter)
        # create new label for the end of the while
        self.vm_writer.write_label("End_While_" + while_counter)
        # we are at <symbol> } <symbol>, advance to next token
        self.tokenizer.advance()
        return

    def compile_return(self):
        """
        the method compiles a return statement
        :return: none
        """
        # we are at <keyword> return <keyword>, advance to next token
        self.tokenizer.advance()
        if self.tokenizer.current_token != ';':
            self.compile_expression()
        else:
            # if function is void, push const 0 to the stack
            self.vm_writer.write_push("CONST", 0)
        # we are at <symbol> ; <symbol>, advance to next token
        self.tokenizer.advance()
        self.vm_writer.write_return()
        return

    def compile_if(self):
        """
        the method compiles an if statement
        :return: none
        """
        if_count = str(self.if_counter)
        # update if counter
        self.if_counter += 1
        # we are at <keyword> if <keyword>, advance to next token
        self.tokenizer.advance()
        # we are at <symbol> ( <symbol>, advance to next token
        self.tokenizer.advance()
        # compile expression
        self.compile_expression()
        # negate the expression
        self.vm_writer.write_arithmetic("NOT")
        # check if condition is met
        self.vm_writer.write_if("ELSE_" + if_count)
        # we are at <symbol> ) <symbol>, advance to next token
        self.tokenizer.advance()
        # we are at <symbol> { <symbol>, advance to next token
        self.tokenizer.advance()
        self.compile_statements()
        # jump to the end of the if
        self.vm_writer.write_goto("END_IF_" + if_count)
        # we are at <symbol> } <symbol>, advance to next token
        self.tokenizer.advance()
        # create else label (which may be empty)
        self.vm_writer.write_label("ELSE_" + if_count)
        if self.tokenizer.current_token == 'else':
            # we are at <keyword> else <keyword>, advance
            self.tokenizer.advance()
            # we are at <symbol> { <symbol>, advance
            self.tokenizer.advance()
            self.compile_statements()
            # we are at <symbol> } <symbol>, advance
            self.tokenizer.advance()
        # create new label
        self.vm_writer.write_label("END_IF_" + if_count)
        return

    def compile_expression(self):
        """
        the method compiles an expression
        :return:
        """
        # compile the term
        self.compile_term()
        while self.tokenizer.current_token in OP_LST:
            call_math = False
            # we are at <symbol> op <symbol>
            op = OP_DICT.get(self.tokenizer.current_token)
            # check if operator needs to call math
            if self.tokenizer.current_token == '*' or self.tokenizer.current_token == '/':
                call_math = True
            # advance to next term and compile term
            self.tokenizer.advance()
            self.compile_term()
            # output the operator
            if call_math:
                self.vm_writer.write_call(op[0], op[1])
            else:
                self.vm_writer.write_arithmetic(op)
        return

    def compile_term(self):
        """
        the method compiles a term
        :return: none
        """
        token_type = self.tokenizer.token_type()
        if token_type == INT_CONST:
            # push the const int
            self.vm_writer.write_push("CONST", self.tokenizer.int_val())
            self.tokenizer.advance()
        elif token_type == STRING_CONST:
            # write without the ""
            string_val = self.tokenizer.string_val()
            # push the len of the string and call the string constructor
            self.vm_writer.write_push("CONST", len(string_val))
            self.vm_writer.write_call("String.new", 1)
            # update new string
            for char in string_val:
                self.vm_writer.write_push("CONST", ord(char))
                self.vm_writer.write_call("String.appendChar", 2)
            self.tokenizer.advance()
        elif self.tokenizer.current_token in KEY_WORD_CONST:
            segment, idx = KEY_WORD_CONST.get(self.tokenizer.current_token)
            self.vm_writer.write_push(segment, idx)
            if self.tokenizer.current_token == 'true':
                self.vm_writer.write_arithmetic('NOT')
            self.tokenizer.advance()
        elif self.tokenizer.current_token == '(':
            # we are at <symbol> ( <symbol>, advance to next token
            self.tokenizer.advance()
            self.compile_expression()
            # we are at <symbol> ) <symbol>, advance to next token
            self.tokenizer.advance()
        elif self.tokenizer.current_token in UNARY_OP:
            op_command = UNARY_OP.get(self.tokenizer.current_token)
            self.tokenizer.advance()
            self.compile_term()
            self.vm_writer.write_arithmetic(op_command)
        # var/var[expression]/subroutine_call
        else:
            # we are at <identifier> var_name <identifier>
            var_name = self.tokenizer.identifier()
            self.tokenizer.advance()
            # if is var_name[expression]
            if self.tokenizer.current_token == '[':
                var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name))
                var_index = self.symbol_table.index_of(var_name)
                # push arr
                self.vm_writer.write_push(var_kind, var_index)
                # we are at <symbol> [ <symbol>, advance to expression and compile it
                self.tokenizer.advance()
                self.compile_expression()
                # add the index of array and the expression to get the correct location
                self.vm_writer.write_arithmetic("ADD")
                # set the that pointer
                self.vm_writer.write_pop("POINTER", 1)
                # push to the stack what is in the arr[i]
                self.vm_writer.write_push("THAT", 0)
                # we are at <symbol> ] <symbol>, advance
                self.tokenizer.advance()
            # if is a subroutine call
            elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.':
                self.compile_subroutine_call(var_name)
            else:
                # if is just 'var'
                var_kind = TYPE_DICT.get(self.symbol_table.kind_of(var_name))
                var_index = self.symbol_table.index_of(var_name)
                self.vm_writer.write_push(var_kind, var_index)
        return

    def compile_expression_list(self):
        """
        the method compiles a list of expressions
        :return: amount of arguments in the expression list
        """
        expression_counter = 0
        # check that list is not empty
        if self.tokenizer.current_token != ')':
            expression_counter += 1
            # compile first expression
            self.compile_expression()
            # if there are more expressions, compile them
            while self.tokenizer.current_token == ',':
                expression_counter += 1
                # we are at <symbol> , <symbol>, advance
                self.tokenizer.advance()
                # compile expression
                self.compile_expression()
        return expression_counter

    def compile_subroutine_call(self, identifier):
        """
        the method compiles a subroutine call (not including the subroutine
        first varName
        :return: none
        """
        func_name = self.class_name + "." + identifier
        num_of_arguments = 0
        if self.tokenizer.current_token == '.':
            # change func name to its class name
            if self.symbol_table.type_of(identifier) is not None:
                func_name = self.symbol_table.type_of(identifier)
                # we are at <symbol> . <symbol>, advance
                self.tokenizer.advance()
                # we are at <identifier> sub_name <identifier>
                func_name = func_name + "." + self.tokenizer.identifier()
                self.tokenizer.advance()
                # push the object to the stack
                segment = TYPE_DICT.get(self.symbol_table.kind_of(identifier))
                idx = self.symbol_table.index_of(identifier)
                self.vm_writer.write_push(segment, idx)
                num_of_arguments += 1
            else:
                # we are at <symbol> . <symbol>, advance
                self.tokenizer.advance()
                # we are at <identifier> sub_name <identifier>
                func_name = identifier + "." + self.tokenizer.identifier()
                self.tokenizer.advance()
        else:
            self.vm_writer.write_push("POINTER", 0)
            num_of_arguments += 1
        # we are at <symbol> ( <symbol>, advance
        self.tokenizer.advance()
        num_of_arguments += self.compile_expression_list()
        # we are at <symbol> ) <symbol>, advance
        self.tokenizer.advance()
        self.vm_writer.write_call(func_name, num_of_arguments)
        return
Exemplo n.º 3
0
class CompilationEngine:
    def __init__(self, input_file, output_file):
        self.tokenizer = JackTokenizer(input_file)
        self.xml_file = open(output_file, "w")
        self.space_depth = 0

        # starts the process
        self.tokenizer.advance()
        self.compile_class()
        self.xml_file.close()

    def compile_class(self):
        """
        compiles the class function
        :return: none
        """
        # write <class>
        self.non_terminal_open(XML_CLASS)
        # write <keyword> class <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        # advances a single step to get the class name
        self.tokenizer.advance()
        # write <identifier> class_name <identifier>
        self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        # moves for the symbol
        self.tokenizer.advance()
        # write <symbol> { <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()

        # compiles class variable
        while KEY_WORDS.get(self.tokenizer.current_token) == STATIC or \
                KEY_WORDS.get(self.tokenizer.current_token) == FIELD:
            self.compile_class_var_dec()
        # compiles subroutine
        while KEY_WORDS.get(self.tokenizer.current_token) == CONSTRUCTOR or \
                KEY_WORDS.get(self.tokenizer.current_token) == METHOD or \
                KEY_WORDS.get(self.tokenizer.current_token) == FUNCTION:
            self.compile_sub_routine()
        # write <symbol> } <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # write <class>
        self.non_terminal_end(XML_CLASS)

    def non_terminal_end(self, xml_type):
        """
        closes a non terminal function
        :param xml_type: the xml type we are working with
        :return: none
        """
        self.space_depth -= 1
        self.write_line(self.terminal_end(xml_type))

    def non_terminal_open(self, xml_type):
        """
        an opening for a non terminal
        :param xml_type: the xml type
        :return: none
        """
        self.write_line(self.terminal_opening(xml_type) + "\n")
        self.space_depth += 1

    def terminal_opening(self, word):
        """
        makes the word a starts of a function
        :param word: the word to make a start
        :return: the word as a start
        """

        return "<" + word + ">"

    def terminal_end(self, word):
        """
        makes the word a start and end
        :param word: the word to work with
        :return: the word as an end
        """

        return "</" + word + ">\n"

    def write_line(self, word):
        """
        writes the line to the file with the correct depth
        :param word: the word we are writing
        :return: none
        """

        self.xml_file.write("\t" * self.space_depth + word)

    def one_liner(self, xml_type, token):
        """
        writes the one liner function
        :param xml_type: the type
        :param token: thw token to put in the xml
        :return:
        """

        self.write_line(
            self.terminal_opening(xml_type) + " " + token + " " +
            self.terminal_end(xml_type))

    def compile_class_var_dec(self):
        """
        compiles a var dec
        :return: none
        """
        # write <class_var_dict>
        self.non_terminal_open(XML_CLASS_VAR_DEC)
        # write <keyword> static/field <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        # advances the token
        self.tokenizer.advance()
        # tests what to put as the type of the object
        if self.tokenizer.token_type() == KEY_WORD:
            self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        else:
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        self.tokenizer.advance()
        # write <identifier> var_name <identifier>
        self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        self.tokenizer.advance()

        # check if there are more var_names
        while self.tokenizer.current_token != ";":
            # write <symbol> , <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
            # write <identifier> var_name <identifier>
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
            self.tokenizer.advance()

        # write <symbol> ; <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # closes the statement
        self.non_terminal_end(XML_CLASS_VAR_DEC)
        self.tokenizer.advance()

    def compile_sub_routine(self):
        """
        compiles a single sub routine
        :return: none
        """
        # writes <subroutine_dec>
        self.non_terminal_open(XML_SUBROUTINE_DEC)
        # write <keyword> function/method/const <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        # advances the token
        self.tokenizer.advance()
        # tests what to put as the type of the object
        if self.tokenizer.token_type() == KEY_WORD:
            self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        else:
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        # advances the token
        self.tokenizer.advance()
        # write <identifier> sub_name <identifier>
        self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        self.tokenizer.advance()
        # write <symbol> ( <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # compiles the parameter list
        self.compile_parameter_list()
        # write <symbol> ) <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        # compile subroutine body
        self.compile_subroutine_body()
        # closes the sub routine
        self.non_terminal_end(XML_SUBROUTINE_DEC)

    def compile_subroutine_body(self):
        """
        the method compiles the subroutine body
        :return: none
        """
        # write <sub routine>
        self.non_terminal_open(XML_SUBROUTINE_BODY)

        # opens the bracket {
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()

        # compile var dec
        while KEY_WORDS.get(self.tokenizer.current_token) == VAR:
            self.compile_var_dec()

        if self.tokenizer.current_token != "}":
            self.compile_statements()

        # closes the bracket
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        # closes the sub routine body (write <sub routine>)
        self.non_terminal_end(XML_SUBROUTINE_BODY)

    def compile_parameter_list(self):
        """
        compiles a parameter list
        :return: none
        """
        # writes <parameter_list>
        self.non_terminal_open(XML_PARAMETER_LIST)
        self.tokenizer.advance()

        while self.tokenizer.current_token != ')':
            # tests what to put as the type of the object
            if self.tokenizer.token_type() == KEY_WORD:
                self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
            else:
                self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)

            # gets the variables name
            self.tokenizer.advance()
            # write <identifier> var_name <identifier>
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)

            # gets the next token
            self.tokenizer.advance()

            # tests what to put as the type of the object
            if self.tokenizer.current_token == ",":
                self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
                self.tokenizer.advance()

        # closes the statement
        self.non_terminal_end(XML_PARAMETER_LIST)

    def compile_var_dec(self):
        """
        compiles a declaration of a variable
        :return: none
        """
        # writes the opening
        self.non_terminal_open(XML_VAR_DEC)
        # write <keyword> var <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        # tests what to put as the type of the object
        self.tokenizer.advance()
        if self.tokenizer.token_type() == KEY_WORD:
            self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        else:
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        # gets the variables name
        self.tokenizer.advance()
        while self.tokenizer.current_token != ';':
            # writes <identifier> var_name <identifier>
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
            # gets the next token
            self.tokenizer.advance()
            # tests what to put as the type of the object
            if self.tokenizer.current_token == ",":
                self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
                self.tokenizer.advance()
        # writes <symbol> ; <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        # closes the statement
        self.non_terminal_end(XML_VAR_DEC)

    def compile_statements(self):
        """
        the method compiles statements
        :return: none
        """
        # write <statements>
        self.non_terminal_open(XML_STATEMENTS)
        # while there are more statements, deal with each one
        while self.tokenizer.current_token != '}':
            statement_type = self.tokenizer.key_word()
            if statement_type == LET:
                self.compile_let()
            elif statement_type == IF:
                self.compile_if()
            elif statement_type == WHILE:
                self.compile_while()
            elif statement_type == DO:
                self.compile_do()
            elif statement_type == RETURN:
                self.compile_return()
        # write <statements>
        self.non_terminal_end(XML_STATEMENTS)

    def compile_do(self):
        """
        the method compiles a do command
        :return: none
        """
        # write <do_statement>
        self.non_terminal_open(XML_DO_STATEMENT)
        # write <keyword> do <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        # advance to next token (subroutine call)
        self.tokenizer.advance()
        # write <identifier> name_of_func <identifier>
        self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        self.tokenizer.advance()
        # compile the subroutine call
        self.compile_subroutine_call()
        # write <symbol> ; <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # write <do_statement>
        self.non_terminal_end(XML_DO_STATEMENT)
        self.tokenizer.advance()

    def compile_let(self):
        """
        the method compiles a let statement
        :return: none
        """
        # write <let_statement>
        self.non_terminal_open(XML_LET_STATEMENT)
        # write <keyword> let <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        # advance to next token (var_name)
        self.tokenizer.advance()
        # write <identifier> var_name <identifier>
        self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
        # advance to next token ('[' | '=')
        self.tokenizer.advance()
        if self.tokenizer.current_token == '[':
            # write <symbol> [ <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            # advance to expression and compile it
            self.tokenizer.advance()
            self.compile_expression()
            # write <symbol> ] <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
        # write <symbol> = <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # advance to expression and compile it
        self.tokenizer.advance()
        self.compile_expression()
        # write <symbol> ; <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # write <let_statement>
        self.non_terminal_end(XML_LET_STATEMENT)
        self.tokenizer.advance()
        return

    def compile_while(self):
        """
        the method compiles a while statement
        :return: none
        """
        # write <while_statement>
        self.non_terminal_open(XML_WHILE_STATEMENT)
        # write <keyword> while <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        self.tokenizer.advance()
        # write <symbol> ( <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        self.compile_expression()
        # write <symbol> ) <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        # write <symbol> { <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        self.compile_statements()
        # write <symbol> } <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # write <while_statement>
        self.non_terminal_end(XML_WHILE_STATEMENT)
        self.tokenizer.advance()
        return

    def compile_return(self):
        """
        the method compiles a return statement
        :return: none
        """
        # write <return_statement>
        self.non_terminal_open(XML_RETURN_STATEMENT)
        # write <keyword> return <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        self.tokenizer.advance()
        if self.tokenizer.current_token != ';':
            self.compile_expression()
        # write <symbol> ; <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        # write <return_statement>
        self.non_terminal_end(XML_RETURN_STATEMENT)
        self.tokenizer.advance()
        return

    def compile_if(self):
        """
        the method compiles an if statement
        :return: none
        """
        # write <if_statement>
        self.non_terminal_open(XML_IF_STATEMENT)
        # write <keyword> if <keyword>
        self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
        self.tokenizer.advance()
        # write <symbol> ( <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        self.compile_expression()
        # write <symbol> ) <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        # write <symbol> { <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        self.compile_statements()
        # write <symbol> } <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        if self.tokenizer.current_token == 'else':
            # write <keyword> else <keyword>
            self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
            self.tokenizer.advance()
            # write <symbol> { <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
            self.compile_statements()
            # write <symbol> } <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
        # write <if_statement>
        self.non_terminal_end(XML_IF_STATEMENT)
        return

    def compile_expression(self):
        """
        the method compiles an expression
        :return:
        """
        # write <expression>
        self.non_terminal_open(XML_EXPRESSION)
        self.compile_term()
        while self.tokenizer.current_token in OP_LST:
            # write <symbol> op <symbol>
            if self.tokenizer.current_token in OP_DICT:
                self.one_liner(XML_SYMBOL,
                               OP_DICT.get(self.tokenizer.current_token))
            else:
                self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
            self.compile_term()
        # write <expression>
        self.non_terminal_end(XML_EXPRESSION)
        return

    def compile_term(self):
        """
        the method compiles a term
        :return: none
        """
        # write <term>
        self.non_terminal_open(XML_TERM)
        token_type = self.tokenizer.token_type()
        if token_type == INT_CONST:
            self.one_liner(XML_INT_CONST, self.tokenizer.current_token)
            self.tokenizer.advance()
        elif token_type == STRING_CONST:
            # write without the ""
            self.one_liner(XML_STRING_CONST,
                           self.tokenizer.current_token[1:-1])
            self.tokenizer.advance()
        elif self.tokenizer.current_token in KEY_WORD_CONST:
            self.one_liner(XML_KEY_WORD, self.tokenizer.current_token)
            self.tokenizer.advance()
        elif self.tokenizer.current_token == '(':
            # write <symbol> ( <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
            self.compile_expression()
            # write <symbol> ) <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
        elif self.tokenizer.current_token in UNARY_OP:
            # write <symbol> unary_op <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
            self.compile_term()
        # var/var[expression]/subroutine_call
        else:
            # write <identifier> var_name <identifier>
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
            self.tokenizer.advance()
            # if is var_name[expression]
            if self.tokenizer.current_token == '[':
                # write <symbol> [ <symbol>
                self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
                self.tokenizer.advance()
                self.compile_expression()
                # write <symbol> ] <symbol>
                self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
                self.tokenizer.advance()
            # if is a subroutine call
            elif self.tokenizer.current_token == '(' or self.tokenizer.current_token == '.':
                self.compile_subroutine_call()
            # write <term>
        self.non_terminal_end(XML_TERM)
        return

    def compile_expression_list(self):
        """
        the method compiles a list of expressions
        :return: none
        """
        # write <expression_list>
        self.non_terminal_open(XML_EXPRESSION_LIST)
        # check that list is not empty
        if self.tokenizer.current_token != ')':
            # compile first expression
            self.compile_expression()
            # if there are more expressions, compile them
            while self.tokenizer.current_token == ',':
                # write <symbol> , <symbol>
                self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
                self.tokenizer.advance()
                # compile expression
                self.compile_expression()
        # write <expression_list>
        self.non_terminal_end(XML_EXPRESSION_LIST)
        return

    def compile_subroutine_call(self):
        """
        the method compiles a subroutine call (not including the subroutine
        first varName
        :return: none
        """
        if self.tokenizer.current_token == '.':
            # write <symbol> . <symbol>
            self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
            self.tokenizer.advance()
            # write <identifier> sub_name <identifier>
            self.one_liner(XML_IDENTIFIER, self.tokenizer.current_token)
            self.tokenizer.advance()
        # write <symbol> ( <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        self.compile_expression_list()
        # write <symbol> ) <symbol>
        self.one_liner(XML_SYMBOL, self.tokenizer.current_token)
        self.tokenizer.advance()
        return
Exemplo n.º 4
0
class CompilationEngine:
    """
    The compilation engine compile the jack code given in the input file
    into an xml code saved in the out_file
    """
    def __init__(self, in_file, out_file):
        """
        A compilation engine constructor
        :param in_file: the file we are currently compiling
        :param out_file: the file where we save the output
        """
        self._tokenizer = JackTokenizer(in_file)
        self._class_table = SymbolTable()
        self._method_table = SymbolTable()
        self._cur_class_name = ""
        self._vm_writer = VMWriter(out_file)
        self._label_count_while = 0
        self._label_count_if = 0

    def compile_class(self):
        """
        compiles a class according to the grammar
        """
        self._class_table.start_subroutine()
        self._tokenizer.advance()
        # check if the current keyword is the right class tag
        if self._tokenizer.key_word() != CLASS_TAG:
            print(COMPILE_CLASS_ERROR)
            sys.exit()
        self._tokenizer.advance()
        self._cur_class_name = self.get_cur_token()
        self._tokenizer.advance()
        self._check_symbol("{")

        # there may be multiple variable declarations
        while self._check_if_var_dec():
            self.compile_class_var_dec()
        # there may be multiple subroutine declarations
        while self._check_subroutine_dec():
            self.compile_subroutine_dec()
        self._check_symbol("}")

    def compile_class_var_dec(self):
        """
        compiles the class's variables declarations
        """

        cur_kind = self.get_cur_token()
        self._tokenizer.advance()
        cur_type = self.get_cur_token()
        self._check_type()
        cur_name = self.get_cur_token()
        self._check_name()
        self._class_table.define(cur_name, cur_type, cur_kind)
        while self._check_if_comma():  # there are more variables
            self._tokenizer.advance()
            cur_name = self.get_cur_token()
            self._check_name()
            self._class_table.define(cur_name, cur_type, cur_kind)

        self._check_symbol(";")

    def get_cur_token(self):
        return self._tokenizer.get_token_str()

    def compile_subroutine_dec(self):
        """
        compiles the class's subroutine (methods and functions) declarations
        """
        # re-initialize the method symbol table
        self._method_table.start_subroutine()
        key_word = self._tokenizer.key_word()
        self._tokenizer.advance()
        self._tokenizer.advance()
        cur_name = self.get_cur_token()
        self._tokenizer.advance()

        # method get the as argument the base address of the current object
        if key_word == "method":
            self._method_table.define("this", self._cur_class_name, "argument")

        self._check_symbol("(")
        self.compile_parameter_list()
        self._check_symbol(")")

        subroutine_path = self._cur_class_name + '.' + cur_name
        # the function is either void or has a type

        self.compile_subroutine_body(subroutine_path, key_word)

    def compile_parameter_list(self):
        """
        compiles the parameter list for the subroutines
        """

        # if curr_token is ')' it means the param list is empty
        if self._tokenizer.symbol() == ')':
            return
        cur_type = self.get_cur_token()
        self._check_type()
        cur_name = self.get_cur_token()
        self._check_name()
        self._method_table.define(cur_name, cur_type, "argument")
        while self._check_if_comma():  # there are more params
            self._tokenizer.advance()
            cur_type = self.get_cur_token()
            self._check_type()
            cur_name = self.get_cur_token()
            self._check_name()
            self._method_table.define(cur_name, cur_type, "argument")

    def compile_subroutine_body(self, subroutine_name, subroutine_kind):
        """
        compiles the body of the subroutine
        """
        self._check_symbol("{")
        # there may be multiple variable declarations at the beginning of
        # the subroutine
        while self._tokenizer.key_word() == 'var':
            self.compile_var_dec()
        # define the subroutine
        n_locals = self._method_table.var_count("local")
        self._vm_writer.write_function(subroutine_name, n_locals)

        if subroutine_kind == "constructor":
            # allocating memory for the object's fields
            num_of_fields = self._class_table.var_count("field")
            self._vm_writer.write_push("constant", num_of_fields)
            self._vm_writer.write_call("Memory.alloc", 1)
            # make 'this' to point to address returned by Memory.alloc
            self._vm_writer.write_pop("pointer", 0)

        if subroutine_kind == "method":
            # assign pointer[0] to the object's base address in order to
            # get access to 'this' segment
            self._vm_writer.write_push("argument", 0)
            self._vm_writer.write_pop("pointer", 0)

        self.compile_statements()
        self._check_symbol("}")

    def compile_var_dec(self):
        """
        compiles the variable declarations
        """
        self._tokenizer.advance()
        cur_type = self.get_cur_token()
        self._check_type()
        cur_name = self.get_cur_token()
        self._check_name()
        self._method_table.define(cur_name, cur_type, "local")
        # there may be multiple variable names in the dec
        while self._check_if_comma():
            self._tokenizer.advance()
            self._method_table.define(self.get_cur_token(), cur_type, "local")
            self._check_name()
        self._check_symbol(";")

    def compile_statements(self):
        """
        compiles the statements (0 or more statements)
        """
        while self._check_if_statement():
            if self._tokenizer.key_word() == 'let':
                self.compile_let()
            elif self._tokenizer.key_word() == 'if':
                self.compile_if()
            elif self._tokenizer.key_word() == 'while':
                self.compile_while()
            elif self._tokenizer.key_word() == 'do':
                self.compile_do()
            elif self._tokenizer.key_word() == 'return':
                self.compile_return()

    def compile_do(self):
        """
        compiles the do statement
        """
        self._tokenizer.advance()
        self.compile_subroutine_call()
        self._check_symbol(";")
        self._vm_writer.write_pop("temp", 0)

    def compile_let(self):
        """
        compiles the let statement
        """
        self._tokenizer.advance()
        name = self.get_cur_token()
        info = self._get_symbol_info(name)
        self._check_if_declared(info)
        s_type, s_kind, s_id = info
        seg = self._get_segment(s_kind)
        is_and_array = False

        if self._tokenizer.get_next_token() == '[':  # if there is an array
            is_and_array = True
            self.compile_term()
        else:
            self._tokenizer.advance()
        self._check_symbol("=")
        self.compile_expression()

        if is_and_array:
            # save the value created after compiling the expression which
            # appears right after '=' in temp[0]
            self._vm_writer.write_pop("temp", 0)
            # now the top of the stack should be the address of the right cell
            # in the array so we assign it to pointer[1]
            self._vm_writer.write_pop("pointer", 1)
            # re-pushing the value we saved in temp[0]
            self._vm_writer.write_push("temp", 0)
            # the value of the array is located in that[0]
            seg = "that"
            s_id = 0
        # execute the assignment
        self._vm_writer.write_pop(seg, s_id)
        self._check_symbol(";")

    @staticmethod
    def _check_if_declared(info):
        if info is None:
            print("Unknown Symbol")
            sys.exit()

    def compile_if(self):
        """
        compiles the if statements
        """
        false_label = self._get_if_label()
        end_label = self._get_if_label()

        self._tokenizer.advance()
        self._check_symbol("(")
        self.compile_expression()
        self._check_symbol(")")
        self._check_symbol("{")
        self._vm_writer.write_arithmetic("not")
        self._vm_writer.write_if_goto(false_label)
        self.compile_statements()
        self._check_symbol("}")
        # there can also be an if else scenario
        self._vm_writer.write_goto(end_label)
        self._vm_writer.write_label(false_label)

        if self._tokenizer.key_word() == 'else':
            self._tokenizer.advance()
            self._check_symbol("{")
            self.compile_statements()
            self._check_symbol("}")

        self._vm_writer.write_label(end_label)

    def compile_while(self):
        """
        compiles the while statements
        """
        self._tokenizer.advance()
        first_label = self._get_while_label()
        second_label = self._get_while_label(END_WHILE)
        self._check_symbol("(")
        self._vm_writer.write_label(first_label)
        self.compile_expression()
        self._vm_writer.write_arithmetic("not")
        self._vm_writer.write_if_goto(second_label)
        self._check_symbol(")")
        self._check_symbol("{")
        self.compile_statements()
        self._vm_writer.write_goto(first_label)
        self._vm_writer.write_label(second_label)
        self._check_symbol("}")

    def compile_return(self):
        """
        compiles the return statements
        """
        self._tokenizer.advance()
        # if cur token is ; we return nothing, otherwise we return something
        if not self._tokenizer.symbol() == ';':
            self.compile_expression()
        else:
            self._vm_writer.write_push("constant", 0)
        self._check_symbol(";")
        self._vm_writer.write_return()

    def compile_subroutine_call(self):
        """
        compiles the subroutine calls ( when we actually call a subroutine
        as  opposed to declaring it)
        """
        method_name = self.get_cur_token()
        self._check_name()
        num_of_args = 0
        # there may be a '.' if it is a foo.bar() scenario (or Foo.bar())

        if self._tokenizer.symbol() == ".":

            self._tokenizer.advance()
            class_name = method_name
            method_name = self.get_cur_token()
            self._check_name()
            symbol_info = self._get_symbol_info(class_name)

            if symbol_info is None:
                cur_name = class_name + '.' + method_name
            else:
                type_of, kind_of, id_of = symbol_info
                num_of_args += 1
                self._vm_writer.write_push(self._get_segment(kind_of), id_of)
                cur_name = type_of + '.' + method_name
        else:
            cur_name = self._cur_class_name + '.' + method_name
            num_of_args += 1
            self._vm_writer.write_push("pointer", 0)

        self._check_symbol("(")
        num_of_args += self.compile_expression_list()
        self._check_symbol(")")
        self._vm_writer.write_call(cur_name, num_of_args)

    def compile_expression(self):
        """
        compiles expressions which are terms and possibly operators and more
        terms
        """
        symbol = self._tokenizer.symbol()
        self.compile_term()
        # write the 'not' operator if necessary
        if symbol == '~':
            self._vm_writer.write_arithmetic("not")

        # there may be a few operators in one expression
        while self._tokenizer.symbol() in OPERATIONS:
            symbol = self._tokenizer.symbol()
            self.compile_term()
            # executing operators after handling the the operands
            # in order to evaluate the current expression as postfix expression
            op = self._get_op(symbol)
            self._vm_writer.write_arithmetic(op)

    def compile_term(self):
        """
        compiles terms according to the grammar
        """
        cur_type = self._tokenizer.token_type()
        key_word = self._tokenizer.key_word()
        cur_token = self.get_cur_token()

        # either a string/int constant
        if cur_type in ["INT_CONST", "STRING_CONST"]:
            self._compile_string_int_term(cur_token, cur_type)

        # or a constant keyword (true, false, null, this)
        elif key_word in KEYWORD_CONST:
            self._compile_const_keyword_term(key_word)

        # or an expression within brown brackets
        elif self._tokenizer.symbol() == '(':
            self._tokenizer.advance()
            self.compile_expression()
            self._check_symbol(")")

        # or a unary op and then a term
        elif self._tokenizer.symbol() in OPERATIONS:

            self._tokenizer.advance()
            self.compile_term()

        # or it is an identifier which could be:
        elif self._tokenizer.identifier():
            self._compile_term_identifier()
        else:
            print(COMPILE_TERM_ERROR)
            sys.exit()

    def _compile_const_keyword_term(self, key_word):
        """
       compile term in case the current token type is constant keyword
       :param key_word: string from {'true', 'false', 'null', 'this'}
       """
        if key_word == "this":
            self._vm_writer.write_push("pointer", 0)
        else:
            self._vm_writer.write_push("constant", 0)
        if key_word == "true":
            self._vm_writer.write_arithmetic("not")
        self._tokenizer.advance()

    def _compile_string_int_term(self, cur_token, cur_type):
        """
        compile term in case the given token type is constant string
        or constant integer
        :param cur_token: the current token as a string
        :param cur_type:  the type of the current token
        """
        if cur_type == "INT_CONST":
            self._vm_writer.write_push("constant", cur_token)

        else:  # is string
            n = len(cur_token)
            self._vm_writer.write_push("constant", n)
            self._vm_writer.write_call("String.new", 1)
            for c in cur_token:
                self._vm_writer.write_push("constant", ord(c))
                self._vm_writer.write_call("String.appendChar", 2)
        self._tokenizer.advance()

    def _compile_term_identifier(self):
        """
         compiles terms in case of identifier token
        """
        cur_token = self.get_cur_token()
        info = self._get_symbol_info(cur_token)
        next_token = self._tokenizer.get_next_token()
        if info is not None and next_token not in [".", "("]:
            type_of, kind_of, id_of = info
            seg = self._get_segment(kind_of)
            self._vm_writer.write_push(seg, id_of)

        # an array
        if next_token == '[':

            self._check_name()
            self._check_symbol("[")
            self.compile_expression()
            self._check_symbol("]")
            self._vm_writer.write_arithmetic("add")
            if self._tokenizer.symbol() != '=':
                self._vm_writer.write_pop("pointer", 1)
                self._vm_writer.write_push("that", 0)
        # or a subroutine call
        elif next_token in [".", "("]:
            self.compile_subroutine_call()
        else:
            self._tokenizer.advance()

    def compile_expression_list(self):
        """
        compiles the expression lists
        """
        # if it is ')' then the expression list is empty
        if self._tokenizer.symbol() == ')':
            return 0
        num_of_args = 1  # at least one argument
        self.compile_expression()
        # while there are more expressions
        while self._check_if_comma():
            self._tokenizer.advance()
            cur_symbol = self._tokenizer.symbol()
            self.compile_expression()
            if cur_symbol == '-':  # negative int
                self._vm_writer.write_arithmetic("neg")
            num_of_args += 1
        return num_of_args

    def _check_if_var_dec(self):
        """
        check if we are currently compiling a variable declaration
        :return: true iff the current token is either 'static' or 'field'
        """
        return self._tokenizer.key_word() in CLASS_VAR_KEYWORDS

    def _check_subroutine_dec(self):
        """
        checks if we are currently compiling a subroutine declaration
        :return: true iff the current token is either 'constructor' or
        'function' or 'method'
        """
        return self._tokenizer.key_word() in SUBROUTINE

    def _check_if_comma(self):
        """
        checks if current token is a comma
        :return: true iff the current token is a ','
        """
        return self._tokenizer.symbol() == ','

    def _check_if_statement(self):
        """
        checks if we are currently compiling a statement
        :return: true iff the current token
        is in ['let', 'if', 'while', 'do', 'return']
        """
        return self._tokenizer.key_word() in STATEMENTS

    def _check_type(self):
        """
        checks if the current token is a valid type and if so, it writes it
        to  the output file
        """
        if not self._tokenizer.key_word() in TYPE_KEYWORDS:
            self._check_name()
        else:
            self._tokenizer.advance()

    def _check_symbol(self, expected_symbol):
        """
        checks if the current token is the expected symbol, if so it write
        it to the output file
        :param expected_symbol: the symbol we are validating is the current
        token
        :return: prints illegal statement error if it is not the expected
        symbol and exits the program
        """
        if self._tokenizer.symbol() != expected_symbol:
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()
        self._tokenizer.advance()

    def _check_name(self):
        """
        checks the current token is a name (identifier), and if so, write
        it to the output file
        :return: prints illegal statement error if it is not a name and
        exits the program
        """
        if not self._tokenizer.identifier():
            print(ILLEGAL_STATEMENT_ERROR)
            sys.exit()
        self._tokenizer.advance()

    @staticmethod
    def _get_op(symbol):
        """
       writes an op symbol to the out file
       """
        if symbol == '<':
            return "lt"
        elif symbol == '>':
            return "gt"
        elif symbol == '=':
            return "eq"
        elif symbol == '&':
            return "and"
        elif symbol == '|':
            return "or"
        elif symbol == '+':
            return "add"
        elif symbol == '-':
            return "sub"
        elif symbol == '~':
            return "not"
        elif symbol == "*":
            return "call Math.multiply 2"
        elif symbol == "/":
            return "call Math.divide 2"

    def _get_symbol_info(self, symbol_name):
        """
        first checks if the given symbol in the method symbol table
        if the method table contains the symbol it returns it's information:
        (type,kind,id)
        otherwise check if the class symbol table contains the symbol
        if it does it return the symbol information from the class table
        else returns None
        :param symbol_name: string
        """
        info = self._method_table.get_info(symbol_name)
        if info is None:
            info = self._class_table.get_info(symbol_name)
        return info

    @staticmethod
    def _get_segment(cur_kind):
        """
        :param cur_kind: Jack kind - from the list:
         ["var", "argument", "field", "class", "subroutine", "local", "static"]
        :return: if the given kind is "field" it returns 'this'
        otherwise returns the given kind
        """
        if cur_kind == "field":
            return "this"
        else:
            return cur_kind

    def _get_if_label(self):
        """
        create new if label and increment the if label counter
        :return: if unused label
        """
        curr_counter = str(self._label_count_if)
        self._label_count_if += 1
        return "IF" + curr_counter

    def _get_while_label(self, is_end_while=False):
        """
        creates label according to the given flag, if the method creates
        end while label it increments the while label counter
        :param is_end_while: if true creates end while label
        otherwise creates while label
        :return: unused while label or end while label according to the flag
        """
        curr_counter = str(self._label_count_while)
        if is_end_while:
            self._label_count_while += 1
            return "WHILE_END" + curr_counter
        return "WHILE" + curr_counter