Пример #1
0
class JackAnalyzer:
    def __init__(self, infile):
        self.infile = infile

    def run(self):
        infile = self.infile
        if infile.find('.jack') != -1:
            outfile = os.path.splitext(infile)[0] + '.xml'
            print()
            print(infile, '->', outfile)
            self.tk = JackTokenizer(infile)
            self.ce = CompilationEngine(outfile)
            try:
                self.ce.compileClass(self.tk, 0)
            except UserWarning:
                print('Syntax Error: \'', self.tk.getToken(),
                      '\' is unexpected')
                print('Source Code:', self.tk.curLine)
        else:
            for root, dirs, files in os.walk(infile):
                for name in files:
                    if name.find('.jack') != -1:
                        inf = os.path.join(root, name)
                        outf = os.path.splitext(inf)[0] + '.xml'
                        print()
                        print(inf, '->', outf)
                        self.tk = JackTokenizer(inf)
                        self.ce = CompilationEngine(outf)
                        try:
                            self.ce.compileClass(self.tk, 0)
                        except UserWarning:
                            print('Syntax Error: \'', self.tk.getToken(),
                                  '\' is unexpected')
                            print('Source Code:', self.tk.curLine)
Пример #2
0
 def __init__(self, file_path):
     self._tokenizer = JackTokenizer(file_path)
     self._vm_writer = VMWriter(file_path.replace(".jack", "Compiled.vm"))
     self._symbol_table = SymbolTable()
     self.class_name = ""
     self.label_value = 1
     self.compile_class()
Пример #3
0
def main():
    # Quit if no file name has been provided or if the file extension isn't .jack
    if len(sys.argv) == 1:
        print("You must provide a valid jack file or directory name.")
        sys.exit(1)

    user_input = sys.argv[1]

    files_to_translate = []
    is_directory = ".jack" not in user_input
    # If user provided directory name, iterate over each jack file in it and add it's name to the list
    if is_directory:
        try:
            for file_name in glob.glob(f"{user_input}/*.jack"):
                files_to_translate.append(file_name)
        except FileNotFoundError:
            sys.exit(1)
    else:
        files_to_translate.append(user_input)

    for file in files_to_translate:
        tokenizer = JackTokenizer(file)
        symbol_table = SymbolTable()
        vmwriter = VMWriter(file)
        compilator = CompilationEngine(tokenizer, symbol_table, vmwriter)
        # compilator = CompilationEngineCOPY(file, tokenizer, symbol_table)

    tokenizer.close()
Пример #4
0
def main():
    if len(sys.argv) != 2:
        print("Usage: python3 JackAnalyzer.py path/file.jack\nor...\
        \nUsage: python3 JackAnalyzer.py path/dir")
        sys.exit(1)

    #check path is valid
    #and return list of in_path(s)
    in_f_paths = check_path_type()

    #Generate Tokens...
    token_xml_files = []
    for fp in in_f_paths:
        #setup outpath:
        token_fp = change_fp_name(fp, ".jack", "Tokens.xml")
        token_xml_files.append(token_fp)  #use this later
        with open(token_fp, 'w') as f:
            f.write("<tokens>\n")
            #create Tokenizer
            tokenizer = JackTokenizer(fp)
            while tokenizer.hasMoreTokens():
                crnt_tkn, type = tokenizer.advance()
                if crnt_tkn:
                    out_string = "<{}> {} </{}>\n".format(type, crnt_tkn, type)
                    f.write(out_string)
            f.write("</tokens>\n")

    #Create compiler:
    for fp in token_xml_files:
        out_fp = change_fp_name(fp, "Tokens.xml", "new.xml")
        Compiler(fp, out_fp)
 def writeTokenizerFile(self, inputFile, inputDirName):
     from JackTokenizer import JackTokenizer
     import os
     outputFileName = os.path.join(inputDirName, "output", 
         os.path.splitext(os.path.basename(inputFile.name))[0] + ".xml")
     if(not os.path.exists(os.path.dirname(outputFileName))):
         os.makedirs(os.path.dirname(outputFileName))
     outputFile = open(outputFileName, 'w')
     outputFile.write("<tokens>\n")
     tokenizer = JackTokenizer(inputFile)
     while(tokenizer.hasMoreTokens()):
         tokenizer.advance()
         if tokenizer.tokenType == JackTokenizer.KEYWORD:
             outputFile.write("\t<keyword>" + tokenizer.currentToken + "</keyword>\n")
         elif tokenizer.tokenType == JackTokenizer.SYMBOL:
             outputFile.write("\t<symbol>")
             if tokenizer.currentToken == "&":
                 outputFile.write("&amp;")
             elif tokenizer.currentToken == "<":
                 outputFile.write("&lt;")
             elif tokenizer.currentToken == ">":
                 outputFile.write("&gt;")
             elif tokenizer.currentToken == "\"":
                 outputFile.write("&quot;")
             else:
                 outputFile.write(tokenizer.currentToken)
             outputFile.write("</symbol>\n")
         elif tokenizer.tokenType == JackTokenizer.IDENTIFIER:
             outputFile.write("\t<identifier>" + tokenizer.currentToken + "</identifier>\n")
         elif tokenizer.tokenType == JackTokenizer.INT_CONST:
             outputFile.write("\t<integerConstant>" + tokenizer.currentToken + "</integerConstant>\n")
         elif tokenizer.tokenType == JackTokenizer.STRING_CONST:
             outputFile.write("\t<stringConstant>" + tokenizer.currentToken + "</stringConstant>\n")
     
     outputFile.write("</tokens>\n")
    def __init__(self, input_file_path, output_path):
        """

        :param fileToRead:
        """
        self._indentation = 0
        self._tokenizer = JackTokenizer(input_file_path)
        self._output = open(output_path, "w+")
Пример #7
0
 def __init__(self, inputFile, outputFile):
     self.tokenizer = JackTokenizer(inputFile)
     self.vmWriter = VMWriter(outputFile)
     self.symbolTable = SymbolTable()
     self.classname = ""
     self.CompileClass()
     self.whilecounter = 0
     self.ifcounter = 0
Пример #8
0
 def __init__(self, filename):
     self.tokenizer = JackTokenizer(filename)
     self.token = self.tokenizer.advance()
     self.xml = []
     self.vcode = []
     self.classcode = []
     self.labelIndex = 1
     self.compileClass()
Пример #9
0
 def tokenize(self):
     self._tokenizer = JackTokenizer(self._file_name)
     self._tokenizer.makeCommentAndWhiteSpaceFree()
     self._tokenizer.prepare_tokens()
     print "###################################"
     compilation_engine = CompliationEngine(
         self._tokenizer, string.replace(self._file_name, '.jack', '.xml'))
     compilation_engine.Compile()
Пример #10
0
 def __init__(self, input_file):
     self.st=SymbolTable()
     self.vmW = VMWriter()
     self.tknz = JackTokenizer(input_file)
     self._vm_string = ''
     self.tknz.advance()
     self.Op=[]
     self.Function=[]
Пример #11
0
 def __init__(self, input_stream, output_stream):
     """
     Creates a new compilation engine with the
     given input and output. The next routine
     called must be compileClass().
     """
     self.__prefix = ""
     self.__tokenizer = JackTokenizer(input_stream)
     self.__output_stream = output_stream
Пример #12
0
    def __init__(self, input_file, output_file, use_text_as_input=False):
        """Creates a new compilation engine with the
        given input and output.

        The next routine called must be compile_class
        """
        self.tokenizer = JackTokenizer(input_file, use_text_as_input)
        self.output_file = output_file
        self.output = []
Пример #13
0
 def extract_instruction(self):
     extractor = CodeExtractor(self.src)
     instructions = extractor.get_instruction()
     self.codes.append('<tokens>')
     for instruction in instructions:
         tokenizer = JackTokenizer(instruction)
         # print(len(tokenizer.get_tokens()),instruction)
         self.codes.extend(tokenizer.get_tokens())
     self.codes.append('</tokens>')
Пример #14
0
def compile_jack(jackfile):
    out_vm_file = jackfile.with_suffix('.vm')
    # Tokenize
    jack_tokenizer = JackTokenizer(jackfile)
    tokens_with_tokenType = jack_tokenizer.tokenize()
    # Compile
    jack_compilation_engine = CompilationEngine(tokens_with_tokenType,
                                                out_vm_file)
    jack_compilation_engine.compile()
Пример #15
0
def compile_jack(jackfile):
    out_xml_file = jackfile.with_suffix('.xml')
    out_token_xml_file = jackfile.parent / (jackfile.stem + 'T.xml')
    # Tokenize
    jack_tokenizer = JackTokenizer(jackfile, out_token_xml_file)
    tokens_with_tokenType = jack_tokenizer.tokenize()
    # Compile
    jack_compilation_engine = CompilationEngine(tokens_with_tokenType,
                                                out_xml_file)
    jack_compilation_engine.compile()
Пример #16
0
 def __init__(self, filepath):
     self._tokenizer = JackTokenizer(filepath) 
     self._writer = VMWriter(filepath)
     self._classVariables = SymbolTable()
     self._subroutineVariables = SymbolTable()
     self._currentToken = None
     self._preserveCurrentToken = False
     self._className = ''
     self._currentCompilingFunction = {'kind': '', 'name': ''}
     self._numberConditionalsStatementsCurrentFunction = 0
Пример #17
0
 def tokenize(self):
     self._tokenizer = JackTokenizer(self._file_name)
     self._tokenizer.makeCommentAndWhiteSpaceFree()
     self._tokenizer.prepare_tokens()
     print "###################################"
     output_filename = string.replace(self._file_name, '.jack', '.vm')
     #output_filename = 'E:\\Nand2Tetris\\nand2tetris\\projects\\test\Main.vm'
     compilation_engine = CompliationEngine(self._tokenizer,
                                            output_filename)
     compilation_engine.Compile()
Пример #18
0
def compile():
    fileDest = open(tokenizerDestFilePath, "w")
    tokens = JackTokenizer(filePath)

    lines = []
    fileDest.write("<tokens>\n")
    while (tokens.hasMoreTokens()):
        tokens.advance()
        curType = tokens.tokenType()
        if curType == "stringConstant":
            curToken = tokens.stringVal()
        elif curType == "symbol":
            curToken = tokens.symbol()
        else:
            curToken = tokens.getCurrentToken()

        toWrite = "<" + curType + ">" + " " + curToken + " " + "</" + curType + ">\n"
        lines.append(toWrite)
        fileDest.write(toWrite)
    fileDest.write("</tokens>")
    fileDest.close()

    finalDestFile = open(finalPath, "w")
    engine = CompilationEngine(lines, finalDestFile)

    engine.CompileClass()
    finalDestFile.close()
    def __init__(self, filename):

        self.tokenizer = JackTokenizer(filename)
        self.types = ['int', 'char', 'boolean']
        self.operators = ['+', '-', '*', '/', '&', '|', '<', '>', '=']
        self.keywordsConstant = ['true', 'false', 'null', 'this']
        self.fileName = splitext(filename)[0]
        self.symbolTable = SymbolTable()
        self.vm = VMWriter(splitext(filename)[0])
        self.whileLabelNum = 0
        self.ifLabelNum = 0
Пример #20
0
def handle_file(file_name):
    """
    this function is handling a single jack file
    :param file_name:
    :return:
    """
    with open(file_name, "r") as file:
        tokenizer = JackTokenizer(file)
        file_name = file_name.replace(".jack", ".xml")
    with open(file_name, "w") as out_file:
        CompilationEngine(tokenizer.get_tokens(), out_file)
Пример #21
0
 def __init__(self, input_path, output_path):
     self.class_name = ''
     self.subroutine_name = ''
     self.if_counter = -1
     self.while_counter = -1
     self.subroutine_num_arg = 0
     self.tkx = JackTokenizer(input_path)
     self.class_table = symbolTable()
     self.subroutine_table = symbolTable()
     self.vm_writer = VMWriter(output_path)
     self.compile_class(output_path)
Пример #22
0
def processFiles(folder):
    if '.jack' in folder:
        tokenizer = JackTokenizer(folder)
        engine = CompilationEngine(tokenizer)
        analyze(tokenizer, engine)
    else:
        if os.getcwd().split('/')[-1] != folder:
            os.chdir(folder)
        for fileName in glob.glob('*.jack'):
            tokenizer = JackTokenizer(fileName)
            engine = CompilationEngine(tokenizer)
            analyze(tokenizer, engine)
Пример #23
0
 def __init__(self, input_stream, output_stream):
     """
     Creates a new compilation engine with the
     given input and output. The next routine
     called must be compileClass().
     """
     self.__prefix = ""
     self.__tokenizer = JackTokenizer(input_stream)
     self.__writer = VMWriter(output_stream)
     self.__symbol_table = SymbolTable()
     self.__label_counter = 0
     self.__class_name = None
Пример #24
0
 def handle_file(self):
     """
     --------------------------------------------------------------
     WE WILL CREATE ONE .XML FILE FOR OUR .JACK FILE NAME file_name
                       INSIDE WHERE file_name is
     --------------------------------------------------------------
     handling a file of jack
     :return: none
     """
     jack_tokenizer = JackTokenizer(self.jack_input)
     jack_tokenizer.tokenize()
     compilation_engine = CompilationEngine(self.jack_input, jack_tokenizer)
     compilation_engine.compile()
Пример #25
0
 def __init__(self, input_file, output_file):
     """
     the constructor of the class
     :param input_file: the jack file that the user want to compile
     :param output_file: the path for the output xml file
     """
     self.file_reader = JackFileReader(input_file)
     self.jack_tokens = JackTokenizer(self.file_reader.get_one_liner())
     self.curr_token = self.jack_tokens.advance()
     self.to_output_file = []
     self.depth = 0
     self.compile_class()
     self.export_file(output_file)
Пример #26
0
    def create_token(self, path):
        token_file_name = path.replace('.jack', '.token.xml')
        token_file = open(token_file_name, 'w')
        tokenizer = JackTokenizer(path)

        token_file.write('<tokens>\n')
        while tokenizer.hasMoreTokens():
            tokenizer.advance()
            token_file.write(self.xml_token(tokenizer.current_token))

        token_file.write('</tokens>\n')
        token_file.close()

        return token_file_name
 def writeFile(self, inputFile, inputDirName):
     from JackTokenizer import JackTokenizer
     from CompilationEngine import CompilationEngine
     import os
     outputFileName = os.path.join(inputDirName, "output",
         os.path.splitext(os.path.basename(inputFile.name))[0] + ".xml")
     
     if(not os.path.exists(os.path.dirname(outputFileName))):
         os.makedirs(os.path.dirname(outputFileName))
     outputFile = open(outputFileName, 'w')
     tokenizer = JackTokenizer(inputFile)
     engine = CompilationEngine(tokenizer, outputFile)
     tokenizer.advance()
     engine.compileClass()
Пример #28
0
    def __init__(self, inFile):
        super(CompilationEngine, self).__init__()
        # create an internal tokenizer to iterate through
        self.tokenizer = JackTokenizer(inFile)

        # spacing so I can make nicely formatted xml, this will increase by
        # 4 spaces every time I recurse
        self.spacing = ""

        # setup the output file
        self.outputPath = inFile.name.replace(".jack", ".xml")
        self.outputFile = open(self.outputPath, 'w')
        self.outputFile.close()
        self.outputFile = open(self.outputPath, 'a')
Пример #29
0
def main(argv):
    print(argv)

    opts, args = getopt.getopt(argv, "")

    if not args:
        print(
            'JackAnalyzer.py <inputfile.jack> or <directory> containing jack files'
        )
        sys.exit(2)
    else:
        filename = args[0]

        if os.path.isdir(filename):
            for f in os.listdir(filename):
                file_n, file_ext = os.path.splitext(f)
                if file_ext == '.jack':
                    tokenizer = JackTokenizer(
                        os.path.join(filename, f),
                        os.path.join(filename, file_n + 'T' + '.xml'))
                    tokenizer.write_file()

        else:
            file_n, file_ext = os.path.splitext(filename)
            tokenizer = JackTokenizer(
                filename, os.path.join(filename, file_n + 'T' + '.xml'))
            tokenizer.test()
Пример #30
0
 def compileOneFile(self):
     if len(self.file_list) == 0:
         print("No more file to be compiled!")
         return False
     input_file_name = self.file_list.pop()
     output_file = open(
         self.output_dir + input_file_name.split("/")[-1].split(".")[0] +
         ".xml", "w")
     tokenizer = JackTokenizer(input_file_name)
     compeng = CompilationEngine(tokenizer, output_file)
     while tokenizer.hasMoreTokens():
         tokenizer.advance()
         compeng.compileClass()
     output_file.close()
     print("done:  " + input_file_name)
Пример #31
0
    def _file_processing(self, filename, filepath=None):
        # If target path is a file
        if (filename.split('.')[1] == 'jack'):
            if (filepath):

                # MethodMap(filepath+'\\'+filename).map_funcs()
                self.tokenizer = JackTokenizer(filepath + '\\' + filename)

                comp_engine = CompilationEngine(filepath + '\\' + filename,
                                                self.tokenizer)
                comp_engine.create_file()
            else:
                self.tokenizer = JackTokenizer(filename)
                # MethodMap(filepath).map_funcs()

                comp_engine = CompilationEngine(filepath, self.tokenizer)
Пример #32
0
def start(arg_content):
    files = load_files(arg_content)
    for file in files:
        file.token = JackTokenizer(file.content)
        file.token.tokenize()
        file.engine = CompilationEngine(file.token, file.filename)
        file.engine.compile()
Пример #33
0
 def tokenize(self):
     self._tokenizer = JackTokenizer(self._file_name)
     self._tokenizer.makeCommentAndWhiteSpaceFree()
     self._tokenizer.prepare_tokens()
     print "###################################"
     compilation_engine = CompliationEngine(self._tokenizer, string.replace(self._file_name, '.jack', '.xml'))
     compilation_engine.Compile()        
    def writeFile(self, inputFile, inputDirName):
        from JackTokenizer import JackTokenizer
        from CompilationEngine import CompilationEngine
        import os

        basename = os.path.splitext(os.path.basename(inputFile.name))[0]
        xmlFileName = os.path.join(inputDirName, "output", basename + ".xml")
        vmFileName = os.path.join(inputDirName, "output", basename + ".vm")

        if not os.path.exists(os.path.dirname(xmlFileName)):
            os.makedirs(os.path.dirname(xmlFileName))
        xmlFile = open(xmlFileName, "w")
        vmFile = open(vmFileName, "w")
        tokenizer = JackTokenizer(inputFile)
        engine = CompilationEngine(tokenizer, xmlFile, vmFile)
        tokenizer.advance()
        engine.compileClass()
Пример #35
0
 def tokenize(self):
     self._tokenizer = JackTokenizer(self._file_name)
     self._tokenizer.makeCommentAndWhiteSpaceFree()
     self._tokenizer.prepare_tokens()
     print "###################################"
     output_filename = string.replace(self._file_name, '.jack', '.vm')
     #output_filename = 'E:\\Nand2Tetris\\nand2tetris\\projects\\test\Main.vm'
     compilation_engine = CompliationEngine(self._tokenizer, output_filename)
     compilation_engine.Compile()        
Пример #36
0
class JackAnalyzer(object):
    '''
    JackAnalyzer, top-level driver that sets up and invokes the other modules
    '''


    def __init__(self, file_name):
        '''
        Constructor
        '''
        self._file_name = file_name
        self._tokenizer = None
    
    def tokenize(self):
        self._tokenizer = JackTokenizer(self._file_name)
        self._tokenizer.makeCommentAndWhiteSpaceFree()
        self._tokenizer.prepare_tokens()
        print "###################################"
        compilation_engine = CompliationEngine(self._tokenizer, string.replace(self._file_name, '.jack', '.xml'))
        compilation_engine.Compile()        
 def __init__(self, inFile, outFile):
     """Creates a new compilation engine with the given input and output.
     The next routine called must be compileClass()"""
     self.tokenizer = JackTokenizer(inFile)
     self.targetFile = open(outFile, 'w')
     self.getNext()
     self.classTable = None
     self.className = ''
     self.writer = VMWriter(outFile)
     self.labelWhile = 1
     self.labelIf = 1
Пример #38
0
class JackAnalyzer(object):
    '''
    JackAnalyzer, top-level driver that sets up and invokes the other modules
    '''


    def __init__(self, file_name):
        '''
        Constructor
        '''
        self._file_name = file_name
        self._tokenizer = None
    
    def tokenize(self):
        self._tokenizer = JackTokenizer(self._file_name)
        self._tokenizer.makeCommentAndWhiteSpaceFree()
        self._tokenizer.prepare_tokens()
        print "###################################"
        output_filename = string.replace(self._file_name, '.jack', '.vm')
        #output_filename = 'E:\\Nand2Tetris\\nand2tetris\\projects\\test\Main.vm'
        compilation_engine = CompliationEngine(self._tokenizer, output_filename)
        compilation_engine.Compile()        
 def __init__(self, input_file, output_file):
     self.jack_tokenizer = JackTokenizer(input_file)
     self.symbol_table = SymbolTable()
     self.writer = VMWriter(output_file)
     self.class_name = ""
     self.subroutine_name = ""
     self.return_type = ""
     self.label_counter_if = 0
     self.label_counter_while = 0
     self.num_args_called_function = 0
     self.is_unary = False
     self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2",
                            "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"}
 def __init__(self, input_file, output_file):
     self.jack_tokenizer = JackTokenizer(input_file)
     self.output = open(output_file, "w")
     self.level = 0
     self.is_unary = False
class CompilationEngine:

    def __init__(self, input_file, output_file):
        self.jack_tokenizer = JackTokenizer(input_file)
        self.output = open(output_file, "w")
        self.level = 0
        self.is_unary = False

    def compile_class(self):
        self.print_title("class", True)
        self.level += 1
        # "class className {
        for i in range(NUM_TOKENS_CLASS_DEC):
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(token_type, self.jack_tokenizer.key_word())
            if token_type == IDENTIFIER:
                self.print_tag(token_type, self.jack_tokenizer.identifier())
            if token_type == SYMBOL:
                self.print_tag(token_type, self.jack_tokenizer.symbol())
        # classVarDec* or SubroutineDec*
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            curr_keyword = self.jack_tokenizer.key_word()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or
                                                  self.jack_tokenizer.key_word() == "field"):
                self.compile_class_var_dec()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or
                                                  self.jack_tokenizer.key_word() == "method" or
                                                  self.jack_tokenizer.key_word() == "constructor"):
                self.compile_subroutine()
                # self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                # break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                break
        self.level -= 1
        self.print_title("class", False)

    def compile_class_var_dec(self):
        self.print_title("classVarDec", True)
        self.level += 1
        # "static" or "field"
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(token_type, self.jack_tokenizer.key_word())
            elif token_type == IDENTIFIER:
                self.print_tag(token_type, self.jack_tokenizer.identifier())
            elif token_type == SYMBOL:
                self.print_tag(token_type, self.jack_tokenizer.symbol())
                if self.jack_tokenizer.symbol() == ";":
                    break
        self.level -= 1
        self.print_title("classVarDec", False)


    def compile_subroutine(self):
        self.print_title("subroutineDec", True)
        self.level += 1
        # "constructor" or "function" or "method"
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            elif token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            elif token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.print_tag(token_type, self.jack_tokenizer.symbol())
                    self.compile_parameter_list()
                    # should print ")"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                # if self.jack_tokenizer.symbol() == "}":
                #     break
                if self.jack_tokenizer.symbol() == '{':
                    self.compile_subroutine_body()
                    break
        self.level -= 1
        self.print_title("subroutineDec", False)

    def compile_subroutine_body(self):
        self.print_title("subroutineBody", True)
        self.level += 1
        self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            if token_type == KEYWORD:
                if self.jack_tokenizer.key_word() == "var":
                    self.compile_var_dec()
                    continue
                else:
                    self.compile_statements()
                    # print "}"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    break
        self.level -= 1
        self.print_title("subroutineBody", False)


    def compile_parameter_list(self):
        self.print_title("parameterList", True)
        self.level += 1
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            elif token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            else:
                if self.jack_tokenizer.symbol() == ")":
                    break
                else:
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("parameterList", False)


    def compile_var_dec(self):
        self.print_title("varDec", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            if token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            if token_type == SYMBOL:
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                if self.jack_tokenizer.symbol() == ";":
                    break
        self.level -= 1
        self.print_title("varDec", False)


    def compile_statements(self):
        self.print_title("statements", True)
        self.level += 1
        while True:
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do":
                self.compile_do()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let":
                self.compile_let()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while":
                self.compile_while()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return":
                self.compile_return()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if":
                self.compile_if()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break
        self.level -= 1
        self.print_title("statements", False)


    def compile_do(self):
        self.print_title("doStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        self.compile_subroutine_call()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # return from compile_subroutine_call with ";"
        self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("doStatement", False)

    def compile_let(self):
        self.print_title("letStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
                #continue
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "[":# or self.jack_tokenizer.symbol() == "=":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # print "]"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                # should return from the compile_expression only with ";" or "]"
                if self.jack_tokenizer.symbol() == "=":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                if self.jack_tokenizer.symbol() == ";":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    break
        self.level -= 1
        self.print_title("letStatement", False)

    def compile_while(self):
        self.print_title("whileStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                if self.jack_tokenizer.symbol() == "{":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.level -= 1
        self.print_title("whileStatement", False)


    def compile_return(self):
        self.print_title("returnStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";":
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        else:
            self.compile_expression()
            # should return from "compile_expression" only with ";"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("returnStatement", False)

    def compile_if(self):
        self.print_title("ifStatement", True)
        self.level += 1
        self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                if self.jack_tokenizer.symbol() == "{":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else":
            # print "else"
            self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # print "{"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.compile_statements()
            # print "}"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
        self.level -= 1
        self.print_title("ifStatement", False)

    def compile_subroutine_call(self):
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.compile_expression_list()
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        # (className | varName).subroutineName(expressionList)
        elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # subroutineName
            self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # "("
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            # expressionList
            self.compile_expression_list()
            # ")"
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())


    def compile_expression(self):
        self.print_title("expression", True)
        self.level += 1
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
            self.is_unary = True
        self.compile_term()
        while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                ["+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
            self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
                self.is_unary = True
            self.compile_term()
        self.level -= 1
        self.print_title("expression", False)

    def compile_term(self):
        keywords_list = ["true", "false", "null", "this"]
        self.print_title("term", True)
        self.level += 1
        while True:
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\
                    [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                break
            if token_type == INT_CONST:
                self.print_tag(INT_CONST, self.jack_tokenizer.int_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == STRING_CONST:
                self.print_tag(STRING_CONST, self.jack_tokenizer.string_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == KEYWORD and self.jack_tokenizer.key_word() in keywords_list:
                self.print_tag(KEYWORD, self.jack_tokenizer.key_word())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]:
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                self.is_unary = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_term()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(":
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_expression()
                # should return from compile_expression only with ")"
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == IDENTIFIER:
                self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                        [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # should print only "]"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.compile_expression_list()
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                # (className | varName).subroutineName(expressionList)
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # subroutineName
                    self.print_tag(IDENTIFIER, self.jack_tokenizer.identifier())
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # "("
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
                    # expressionList
                    self.compile_expression_list()
                    # ")"
                    self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
        self.level -= 1
        self.print_title("term", False)

    def compile_expression_list(self):
        self.print_title("expressionList", True)
        self.level += 1
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")":
                break
            else:
                self.compile_expression()
                if self.jack_tokenizer.symbol() == ")":
                    break
                # print ","
                self.print_tag(SYMBOL, self.jack_tokenizer.symbol())
        self.level -= 1
        self.print_title("expressionList", False)


    def print_tag(self, token_type, value):
        tabs = ""
        tag = ""
        for i in range(self.level):
            tabs += "\t"
        if token_type == KEYWORD:
           tag = "<keyword> " + value + " </keyword>\n"
        elif token_type == SYMBOL:
            tag = "<symbol> " + value + " </symbol>\n"
        elif token_type == IDENTIFIER:
            tag = "<identifier> " + value + " </identifier>\n"
        elif token_type == INT_CONST:
            tag = "<integerConstant> " + value + " </integerConstant>\n"
        elif token_type == STRING_CONST:
            tag = "<stringConstant> " + value + " </stringConstant>\n"
        else:
            tag = "<" + value + ">" + " </" + value + ">\n"
        self.output.write(tabs + tag)

    def print_title(self, title, is_title):
        tabs = ""
        for i in range(self.level):
            tabs += "\t"
        if is_title:
            self.output.write(tabs + "<" + title + ">\n")
        # print closer
        else:
            self.output.write(tabs + "</" + title + ">\n")
Пример #42
0
class CompilationEngine:

    def __init__(self, inputFile, outputFile):
        self.tokenizer = JackTokenizer(inputFile)
        self.vmWriter = VMWriter(outputFile)
        self.symbolTable = SymbolTable()
        self.classname = ""
        self.CompileClass()
        self.whilecounter = 0
        self.ifcounter = 0

    def CompileClass(self):
        #classname
        self.tokenizer.advance()
        self.classname = self.tokenizer.identifier()
        self.tokenizer.advance()
        # ignore {
        self.tokenizer.advance()

        while self.tokenizer.keyWord() == "static" or self.tokenizer.keyWord() == "field":
            self.CompileClassVarDec()

        while self.tokenizer.keyWord() == "constructor" or self.tokenizer.keyWord() == "function" or self.tokenizer.keyWord() == "method":
            self.CompileSubroutine()

        #ignore }
        self.tokenizer.advance()



    def CompileClassVarDec(self):

        kind = self.tokenizer.keyWord()
        self.tokenizer.advance()
        type = self.compileType()
        name = self.tokenizer.identifier()
        self.symbolTable.define(name, type, kind)
        self.tokenizer.advance()

        # add the rest of var names, if there are
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            name = self.tokenizer.identifier()
            self.symbolTable.define(name, type, kind)
            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()

    def CompileSubroutine(self):

        self.symbolTable.startSubroutine()
        self.ifcounter = 0
        self.whilecounter = 0
        # constructor | function | method
        functype = self.tokenizer.keyWord()
        self.tokenizer.advance()

        if functype == "method":
            self.symbolTable.define("this", self.classname, "arg")

        self.tokenizer.advance()

        subrotineName = self.classname + "." + self.tokenizer.identifier()
        self.tokenizer.advance()

        # ( parameterList )
        self.tokenizer.advance()
        self.compileParameterList()
        self.tokenizer.advance()

        # subrotineBody
        # ignore {
        self.tokenizer.advance()
        # varDec*
        while self.tokenizer.keyWord() == "var":
            self.compileVarDec()

        self.vmWriter.writeFunction(subrotineName, self.symbolTable.varCount("var"))
        # allocate memory for constructor
        # if functype == "constructor":
        #     self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
        #     self.vmWriter.writeCall("Memory.alloc", "1")

        if functype == "constructor" or functype == "method":
            if functype == "constructor":
                self.vmWriter.writePush("constant" , self.symbolTable.varCount("field"))
                self.vmWriter.writeCall("Memory.alloc", "1")
            else:
                self.vmWriter.writePush("argument", "0")
            self.vmWriter.writePop("pointer", "0")


        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()

    def compileParameterList(self):
        # if not )
        if self.tokenizer.tokenType() != 1:

            # type varName
            argtype = self.compileType()
            argname = self.tokenizer.identifier()
            self.symbolTable.define(argname, argtype, "arg")
            self.tokenizer.advance()

            # (, type varName)*
            while self.tokenizer.symbol() == ",":
                self.tokenizer.advance()
                argtype = self.compileType()
                argname = self.tokenizer.identifier()
                self.symbolTable.define(argname, argtype, "arg")
                self.tokenizer.advance()

    def compileVarDec(self):

        # var
        self.tokenizer.advance()

        # type
        type = self.compileType()

        # varName
        varname = self.tokenizer.identifier()
        self.symbolTable.define(varname, type, "var")
        self.tokenizer.advance()

        # (, varName)*
        while self.tokenizer.symbol() == ",":
            self.tokenizer.advance()
            varname = self.tokenizer.identifier()
            self.symbolTable.define(varname, type, "var")

            self.tokenizer.advance()

        # ignore ;
        self.tokenizer.advance()


    def compileStatements(self):

        while self.tokenizer.tokenType() == 0:
            if self.tokenizer.keyWord() == "let":
                self.compileLet()
            elif self.tokenizer.keyWord() == "if":
                self.compileIf()
            elif self.tokenizer.keyWord() == "while":
                self.compileWhile()
            elif self.tokenizer.keyWord() == "do":
                self.compileDo()
            elif self.tokenizer.keyWord() == "return":
                self.compileReturn()


    def compileDo(self):

        self.tokenizer.advance()
        self.compileSubRoutineCall()
        self.vmWriter.writePop("temp", "0")

        # ignore ;
        self.tokenizer.advance()

    def compileLet(self):

        # let
        self.tokenizer.advance()
        # varName
        varname = self.tokenizer.identifier()
        varkind = self.symbolTable.kindOf(varname)

        self.tokenizer.advance()

        # ([ expression ])?
        if self.tokenizer.symbol() == "[":
            self.tokenizer.advance()
            self.CompileExpression()
            if varkind == "field":
                self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
            self.vmWriter.writeArithmetic("add")

            #ignore ]
            self.tokenizer.advance()
            #ignore =
            self.tokenizer.advance()
            self.CompileExpression()
            self.vmWriter.writePop("temp", "0")

            # that
            self.vmWriter.writePop("pointer", "1")
            self.vmWriter.writePush("temp", "0")
            self.vmWriter.writePop("that", "0")
            self.tokenizer.advance()


        else:

            # ignore =
            self.tokenizer.advance()

            # expression
            self.CompileExpression()

            if varkind == "field":
                self.vmWriter.writePop("this", self.symbolTable.indexOf(varname))
            elif varkind == "var":
                self.vmWriter.writePop("local", self.symbolTable.indexOf(varname))
            elif varkind == "arg":
                self.vmWriter.writePop("argument", self.symbolTable.indexOf(varname))
            elif varkind == "static":
                self.vmWriter.writePop("static", self.symbolTable.indexOf(varname))

            #ignore ;
            self.tokenizer.advance()


    def compileWhile(self):

        # while
        self.tokenizer.advance()

        # ( expression )
        self.tokenizer.advance()
        whileindex = self.whilecounter
        self.whilecounter += 1
        self.vmWriter.writeLabel("WHILE_EXP" + str(whileindex))
        self.CompileExpression()
        self.vmWriter.writeArithmetic("not")
        self.vmWriter.writeIf("WHILE_END" + str(whileindex))
        self.tokenizer.advance()

        # ignore {
        self.tokenizer.advance()

        # statements
        self.compileStatements()

        # ignore }
        self.tokenizer.advance()
        self.vmWriter.writeGoto("WHILE_EXP" + str(whileindex))
        self.vmWriter.writeLabel("WHILE_END" + str(whileindex))

    def compileReturn(self):

        # return
        self.tokenizer.advance()

        # expression?
        if self.isTerm():
            self.CompileExpression()
            self.vmWriter.writeReturn()
        else:
            self.vmWriter.writePush("constant", "0")
            self.vmWriter.writeReturn()

        # ignore;
        self.tokenizer.advance()


    def compileIf(self):
        #if
        self.tokenizer.advance()
        # ( expression )
        self.tokenizer.advance()
        self.CompileExpression()
        ifindex = self.ifcounter
        self.ifcounter += 1
        self.vmWriter.writeIf("IF_TRUE" + str(ifindex))
        self.vmWriter.writeGoto("IF_FALSE" + str(ifindex))
        self.vmWriter.writeLabel("IF_TRUE" + str(ifindex))
        self.tokenizer.advance()

        # { statements }
        self.tokenizer.advance()
        self.compileStatements()
        self.tokenizer.advance()

        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() == "else":
            # else
            self.vmWriter.writeGoto("IF_END" + str(ifindex))
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))

            self.tokenizer.advance()

            # { statements }
            self.tokenizer.advance()
            self.compileStatements()
            self.tokenizer.advance()

            self.vmWriter.writeLabel("IF_END" + str(ifindex))

        else:
            self.vmWriter.writeLabel("IF_FALSE" + str(ifindex))


    def CompileExpression(self):
        #term
        self.CompileTerm()
        # (op term)*
        op = self.tokenizer.symbol()
        while self.tokenizer.tokenType() == 1 and op in operators:
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "=":
                self.vmWriter.writeArithmetic("eq")
            elif op == "+":
                self.vmWriter.writeArithmetic("add")
            elif op == "-":
                self.vmWriter.writeArithmetic("sub")
            elif op == "*":
                self.vmWriter.writeCall("Math.multiply", "2")
            elif op == "/":
                self.vmWriter.writeCall("Math.divide", "2")
            elif op == "&amp;":
                self.vmWriter.writeArithmetic("and")
            elif op == "|":
                self.vmWriter.writeArithmetic("or")
            elif op == "&lt;":
                self.vmWriter.writeArithmetic("lt")
            elif op == "&gt;":
                self.vmWriter.writeArithmetic("gt")
            op = self.tokenizer.symbol()

    def CompileTerm(self):
        if self.tokenizer.tokenType() == 3:
            self.vmWriter.writePush("constant", self.tokenizer.intVal())
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 4:
            conststring = self.tokenizer.stringVal()
            self.vmWriter.writePush("constant", str(len(conststring)))
            self.vmWriter.writeCall("String.new", "1")
            for i in range(len(conststring)):
                self.vmWriter.writePush("constant", str(ord(conststring[i])))
                self.vmWriter.writeCall("String.appendChar", "2")

            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 0:
            keywordconst = self.tokenizer.keyWord()
            if keywordconst == "true":
                self.vmWriter.writePush("constant", "0")
                self.vmWriter.writeArithmetic("not")
            elif keywordconst == "false" or keywordconst == "null":
                self.vmWriter.writePush("constant", "0")
            elif keywordconst == "this":
                self.vmWriter.writePush("pointer", "0")
            self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 2:
            # varName [ expression]
            if self.tokenizer.tokens[self.tokenizer.currentToken +1] == '[':
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                self.tokenizer.advance()
                # [ expression ]
                self.tokenizer.advance()
                self.CompileExpression()
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.vmWriter.writeArithmetic("add")
                # that
                self.vmWriter.writePop("pointer", "1")
                self.vmWriter.writePush("that", "0")
                self.tokenizer.advance()
            # subrutine call
            elif self.tokenizer.tokens[self.tokenizer.currentToken +1] == '(' or self.tokenizer.tokens[self.tokenizer.currentToken +1] == '.':
                self.compileSubRoutineCall()
            # varname
            else:
                varname = self.tokenizer.identifier()
                varkind = self.symbolTable.kindOf(varname)
                if varkind == "field":
                    self.vmWriter.writePush("this", self.symbolTable.indexOf(varname))
                elif varkind == "var":
                    self.vmWriter.writePush("local", self.symbolTable.indexOf(varname))
                elif varkind == "arg":
                    self.vmWriter.writePush("argument", self.symbolTable.indexOf(varname))
                elif varkind == "static":
                    self.vmWriter.writePush("static", self.symbolTable.indexOf(varname))
                self.tokenizer.advance()

        elif self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(':
            # ( expression )
            self.tokenizer.advance()
            self.CompileExpression()
            self.tokenizer.advance()
        else:
            #unary!!!
            op = self.tokenizer.symbol()
            self.tokenizer.advance()
            self.CompileTerm()
            if op == "-":
                self.vmWriter.writeArithmetic("neg")
            elif op == "~":
                self.vmWriter.writeArithmetic("not")

    def compileSubRoutineCall(self):
        # subroutineName  | (className | varName)
        identifier = self.tokenizer.identifier()
        self.tokenizer.advance()
        #no "." only name
        if self.tokenizer.symbol() == '(':
            # ( expressionList ) -- subroutine of type method
            self.tokenizer.advance()
            self.vmWriter.writePush("pointer", "0")
            argnum = self.CompileExpressionList()
            self.vmWriter.writeCall(self.classname + "." + identifier, str(argnum +1))

            self.tokenizer.advance()
        else:
            # . -- class.function or var.method
            self.tokenizer.advance()
            # subroutineName
            subname = self.tokenizer.identifier()
            self.tokenizer.advance()

            self.tokenizer.advance()
            if identifier in self.symbolTable.classtable or identifier in self.symbolTable.subroutinetable:
                # varname!!!
                if identifier in self.symbolTable.subroutinetable:
                    if self.symbolTable.kindOf(identifier) == "var":
                        self.vmWriter.writePush("local", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("argument", self.symbolTable.indexOf(identifier))
                else:
                    if self.symbolTable.kindOf(identifier) == "static":
                        self.vmWriter.writePush("static", self.symbolTable.indexOf(identifier))
                    else:
                        self.vmWriter.writePush("this", self.symbolTable.indexOf(identifier))


                argnum = self.CompileExpressionList()
                identifierclass = self.symbolTable.typeOf(identifier)
                self.vmWriter.writeCall(identifierclass + "." + subname, str(argnum +1))
            else:
                argnum = self.CompileExpressionList()
                self.vmWriter.writeCall(identifier + "." + subname, str(argnum))
            self.tokenizer.advance()

    def CompileExpressionList(self):
        # (expression
        i = 0
        if self.isTerm():
            i += 1
            # (, expression)
            self.CompileExpression()
            while self.tokenizer.symbol() == ',':
                i+= 1
                self.tokenizer.advance()
                self.CompileExpression()
        return i

    def isTerm(self):
        if self.tokenizer.tokenType() == 3 or self.tokenizer.tokenType() == 4:
            return True
        if self.tokenizer.tokenType() == 0 and self.tokenizer.keyWord() in keyword_const:
            return True
        if self.tokenizer.tokenType() == 1 and self.tokenizer.symbol() == '(' :
            return True
        if self.tokenizer.tokenType() == 1 and (self.tokenizer.symbol() == '-' or self.tokenizer.symbol() == '~'):
            return True
        if self.tokenizer.tokenType() == 2:
            return True
        return False

    def compileType(self):
        if self.tokenizer.tokenType() == 0:
            typen = self.tokenizer.keyWord()
        else:
            typen = self.tokenizer.identifier()
        self.tokenizer.advance()
        return typen
Пример #43
0
class JackCompiler:

    def __init__(self, program):
        self._tokens = JackTokenizer(program).get_tokens()


    def array_access(self):
        return self._tokens[1].value == '['

    def subroutine_call(self):
        tokenVal = self._tokens[1].value
        return  ( tokenVal == '.' or tokenVal == '(')

    def end_of_term(self):
        if len(self._tokens) == 1:
            return True
        elif (self.array_access() or
              self.subroutine_call()):
            return False
        else:
            return True

    def eat(self, type=None, value=None):
        token = self._tokens.pop(0)
        if type is not None and type != token.type:
            raise UnexpectedToken("Unexpected token type: expected {0}, got {1}".format(type, token.type))
        if value is not None and value != token.value:
            raise UnexpectedToken("Unexpected token val: expected {0}, got {1}".format(value, token.value))

        return token.value


    def end_of_list(self):
        return (len(self._tokens) == 0 or self._tokens[0].value == ')')

    def compile_expression_list(self):
        exps = []
        while (True):
            if self.end_of_list():
                break
            else:
                exp = self.compile_expression()
                exps.append(exp)
            if self.end_of_list():
                break
            else:
                self.eat(value=',')
        base = TEMPLATES["expression_list"]
        expressions = "<symbol> , </symbol>".join(exps)
        return base.format(expressions)

    def compile_expression(self):
        base = TEMPLATES["expression"]
        term1 =  self.compile_term()
        # compile term can return none if there is no term to compile
        if term1 is None:
            return ""
        elif self.has_op():
            sym_char = self.eat('symbol')
            sym_tag = TEMPLATES["symbol"].format(sym_char)
            term2 = self.compile_term()
            return base.format(term1, sym_tag, term2)
        return base.format(term1, "", "") # TODO: refactor to use default

    def has_op(self):
        if len(self._tokens) < 1:
            return False
        next_token = self._tokens[0].value
        regexp = re.compile(r'[/+\-/\/*&///|<>=]')
        return not (re.match(regexp, next_token) is None)

    def compile_integer_constant(self):
        base = TEMPLATES["integer_constant"]
        n = self.eat() # pass optional value/type for error correction
        return  base.format(n)

    def compile_string_constant(self):
        base = TEMPLATES["string_constant"]
        s = self.eat(type="stringConstant")
        return base.format(s)

    def compile_keyword_constant(self):
        base = TEMPLATES["keyword_constant"]
        s = self.eat(type="keyword")
        return base.format(s)

    def compile_var_name(self):
        base = TEMPLATES["var_name"]
        var_name = self.eat(type='identifier')
        return base.format(var_name)

    def compile_unary_op(self):
        base = TEMPLATES["unary_op"]
        sym = self.eat(type="symbol")
        term = self.compile_term()
        return base.format(sym, term)

    def compile_array_access(self):
        base = TEMPLATES["array_access"]
        array_name = self.eat(type='identifier')
        self.eat(value='[')
        array_index = self.compile_expression()
        self.eat(value=']')
        return base.format(array_name, array_index)

    def compile_paren(self):
        base = TEMPLATES["parens"]
        self.eat(value='(')
        expression = self.compile_expression()
        self.eat(value=')')
        return base.format(expression)

    def compile_call_prefix(self):
        if self._tokens[1].value == '.':
            cls_base = TEMPLATES["subroutine_call"]["class"]
            cls_name = self.eat(type='identifier')
            cls_tree = cls_base.format(cls_name)
            self.eat(value='.')
            return cls_tree
        else:
            return ""

    def compile_call_suffix(self):
        base = TEMPLATES["subroutine_call"]["fun"]
        fun_name = self.eat(type='identifier')
        return base.format(fun_name)


    def compile_fun_call(self):
        cls_tree = self.compile_call_prefix()
        fun_tree = self.compile_call_suffix()
        self.eat(value='(')
        exp_list = self.compile_expression_list()
        self.eat(value=')')
        base = TEMPLATES["subroutine_call"]["base"]
        return base.format(cls_tree, fun_tree, exp_list)


    def compile_term(self):
        # can return None 
        next_token = self._tokens[0]
        if next_token.type == 'integerConstant':
            return self.compile_integer_constant()
        elif next_token.type == 'stringConstant':
            return self.compile_string_constant()
        elif next_token.type == 'keyword':
            return self.compile_keyword_constant()
        elif next_token.type == 'symbol':
            if next_token.value == '(':
                return self.compile_paren()
            elif next_token.value == '~' or next_token.value == '-':
                return self.compile_unary_op()
        elif next_token.type == 'identifier':
            if self.end_of_term():
                return self.compile_var_name()
            else:
                token = self._tokens[1]
                if token.value == '.' or token.value == '(':
                    return "<term>\n" + self.compile_fun_call() + "</term>\n"
                elif token.value == '[':
                    return  "<term>\n" + self.compile_array_access() + "</term>\n"


    def compile_statement(self):
        next_token = self._tokens[0]
        if next_token.value == 'do':
            return self.compile_do()
        elif next_token.value == 'let':
            return self.compile_let()
        elif next_token.value == 'if':
            return self.compile_if()
        elif next_token.value == "while":
            return self.compile_while()
        elif next_token.value == "return":
            return self.compile_return()
        else:
            return False

    def compile_statements(self):
        statements = []
        while (len(self._tokens) > 0):
            statement = self.compile_statement()
            if not statement:
                break
            else:
                statements.append(statement)
                
        statements = "\n".join(statements)
        base = STATEMENTS["statements"]
        return base.format(statements)
    
    def compile_if(self):
        self.eat(value='if')
        self.eat(value='(')
        condition = self.compile_expression()
        self.eat(value=')')
        self.eat(value='{')
        statements = self.compile_statements()
        self.eat(value='}')
        base = STATEMENTS["if"]
        return base.format(condition, statements)
        
        
    def compile_do(self):
        self.eat(value='do')
        call = self.compile_fun_call()
        self.eat(value=';')
        base = STATEMENTS["do"]
        return base.format(call)

    def compile_let(self):
        self.eat(value='let')
        token = self._tokens[1]
        if token.value == '[':
            #array accesss
            var_name = self.compile_array_access()
        elif token.value == '=':
            var_name = self.compile_identifier()
            #identifier
        
        self.eat(value='=')
        expr = self.compile_expression()
        self.eat(value=';')
        base = STATEMENTS["let"]
        return base.format(var_name, expr)

    def compile_while(self):
        self.eat(value='while')
        self.eat(value='(')
        condition = self.compile_expression()
        self.eat(value=')')
        self.eat(value='{')
        statements = self.compile_statements()
        self.eat(value='}')
        base = STATEMENTS["while"]
        return base.format(condition, statements)

    def compile_return(self):
        self.eat(value="return")
        # optional, if no expression this call does nothing (same as local var decs)
        expression = self.compile_expression() 
        base = STATEMENTS["return"]
        self.eat(value=';')
        return base.format(expression)

    def compile_class(self):
        self.eat(value="class")
        class_name = self.compile_identifier()
        self.eat(value='{')
        class_var_decs = self.compile_class_var_decs()
        subroutines = self.compile_subroutine_declarations()
        base = STRUCTURE["class"]
        return base.format(class_name, class_var_decs, subroutines)
        
    
    def compile_class_var_decs(self):
        # same pattern as in local_va_decs
        class_var_decs = []
        while (len(self._tokens) > 0):
            class_var_dec = self.compile_class_var_dec()
            if not class_var_dec:
                break
            else:
                class_var_decs.append(class_var_dec)
        declarations = "\n".join(class_var_decs)
        return declarations

    def compile_class_var_dec(self):
        next_token = self._tokens[0]
        if (next_token.value != "field" and next_token.value != "static" ):
            return False
        field = self.eat(type="keyword")
        typ = self.compile_type()
        ident = self.compile_identifier()
        idents = [ident]
        
        while (self._tokens[0].value == ','):
            self.eat(value=',')
            ident = self.compile_identifier()
            idents.append(ident)
        self.eat(value=';')
        variables = "<symbol> , </symbol>".join(idents)
        return STRUCTURE["class_variable_declaration"].format(field, typ, variables)
    
    def compile_local_var_decs(self):
        # follow same pattern as compile_statements
        local_var_decs = []
        while (len(self._tokens) > 0):
            local_var_dec = self.compile_local_var_dec()
            if not local_var_dec:
               break
            else:
                local_var_decs.append(local_var_dec)
        declarations = "\n".join(local_var_decs)
        return declarations
            
    def compile_local_var_dec(self):        
        if (self._tokens[0].value != "var"):
            return False
        self.eat(value="var")
        typ = self.compile_type()
        ident = self.compile_identifier()
        idents =  [ident]
        while (self._tokens[0].value == ','):
            self.eat(value=',')
            ident = self.compile_identifier()
            idents.append(ident)
        self.eat(value=';')
        variables = "<symbol> , </symbol>".join(idents)
        return STRUCTURE["variable_declaration"].format(typ, variables)


    def compile_identifier(self):
        return STRUCTURE["tag"].format('identifier', self.eat(type='identifier'))
    
    def compile_type(self):
        base = STRUCTURE["tag"]
        next_token_type = self._tokens[0].type
        next_token_val = self.eat()
        return base.format(next_token_type, next_token_val)


    # same pattern as in compile expression list REFACTOR
    def compile_param(self):
        typ = self.compile_type()
        param_name = self.compile_identifier()
        return typ + "\n" + param_name

    def compile_param_list(self):
        params = []
        while (True):
            if self.end_of_list():
                break
            else:
                param = self.compile_param()
                params.append(param)
                if self.end_of_list():
                    break
                else:
                    self.eat(value=',')
        base = STRUCTURE["param_list"]
        parameters = "<symbol> , </symbol>".join(params)
        return base.format(parameters)                    
        
        
    def compile_subroutine_body(self):
        self.eat(value='{')
        variable_declarations = self.compile_local_var_decs() # only one!
        statements = self.compile_statements()
        self.eat(value='}')
        base = STRUCTURE["subroutine_body"]
        return base.format(variable_declarations, statements)


    def compile_subroutine_declarations(self):
        # same pattern as compile class_var_decs
        subroutines = []
        while (len(self._tokens) > 0):
            subroutine = self.compile_subroutine_declaration()
            if not subroutine:
                break
            else:
                subroutines.append(subroutine)
        declarations = "\n".join(subroutines)
        return declarations

    def compile_subroutine_declaration(self):
        if (self._tokens[0].type != "keyword"):
            return False
        subroutine_type = self.compile_type()
        return_type = self.compile_type()
        fun_name = self.compile_identifier()
        
        self.eat(value='(')
        parameter_list = self.compile_param_list()
        self.eat(value=')')
        signature = STRUCTURE["signature"].format(subroutine_type, return_type, fun_name, parameter_list)
        body = self.compile_subroutine_body()
        return STRUCTURE["subroutine"].format(signature, body)
class CompilationEngine:

    def __init__(self, input_file, output_file):
        self.jack_tokenizer = JackTokenizer(input_file)
        self.symbol_table = SymbolTable()
        self.writer = VMWriter(output_file)
        self.class_name = ""
        self.subroutine_name = ""
        self.return_type = ""
        self.label_counter_if = 0
        self.label_counter_while = 0
        self.num_args_called_function = 0
        self.is_unary = False
        self.dic_arithmetic = {"+" : "add" , "-" : "sub", "*" : "call Math.multiply 2",
                               "/" : "call Math.divide 2", "&" : "and", "|" : "or", "<" : "lt", ">" : "gt", "=" : "eq"}

    def compile_class(self):
        # "class className {
        for i in range(NUM_TOKENS_CLASS_DEC):
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # saves the className
            if self.jack_tokenizer.token_type() == IDENTIFIER:
                self.class_name = self.jack_tokenizer.identifier()
        # classVarDec* or SubroutineDec*
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "static" or
                                                  self.jack_tokenizer.key_word() == "field"):
                self.compile_class_var_dec()
            if token_type == KEYWORD and (self.jack_tokenizer.key_word() == "function" or
                                                  self.jack_tokenizer.key_word() == "method" or
                                                  self.jack_tokenizer.key_word() == "constructor"):
                self.compile_subroutine()
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break

    def compile_class_var_dec(self):
        # "static" of "field"
        kind = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # type
        if self.jack_tokenizer.token_type() == KEYWORD:
            type = self.jack_tokenizer.key_word()
        else:
            type = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name,type,kind)
            elif token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_subroutine(self):
        self.symbol_table.start_subroutine()
        self.subroutine_name = ""
        self.return_type = ""
        self.label_counter_if = 0
        self.label_counter_while = 0
        #  the curr token : "constructor" or "function" or "method
        type_of_subroutine = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # the curr token : return type of the subroutine
        if self.jack_tokenizer.token_type() == KEYWORD:
            self.return_type = self.jack_tokenizer.key_word()
        else:
            self.return_type = self.jack_tokenizer.identifier()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        self.subroutine_name = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.symbol() == "(":
                if type_of_subroutine == "method":
                    self.symbol_table.define(THIS, self.class_name, ARG)
                self.compile_parameter_list()
                # the curr token should be -  ")"
            if self.jack_tokenizer.symbol() == '{':
                while self.jack_tokenizer.has_more_tokens():
                    self.jack_tokenizer.advance()
                    token_type = self.jack_tokenizer.token_type()
                    if token_type == KEYWORD:
                        if self.jack_tokenizer.key_word() == "var":
                            self.compile_var_dec()
                            continue
                        else:
                            self.writer.write_function(self.class_name +
                                                       "." + self.subroutine_name, self.symbol_table.var_count(VAR))
                            if type_of_subroutine == "constructor":
                                self.writer.write_push(CONST, self.symbol_table.var_count(FIELD))
                                self.writer.write_call("Memory.alloc", 1)
                                self.writer.write_pop("pointer", 0)
                            elif type_of_subroutine == "method":
                                self.writer.write_push(ARGUMENT, 0)
                                self.writer.write_pop("pointer", 0)
                            self.compile_statements()
                            # the curr token should be -  "}"
                            break
                break


    def compile_parameter_list(self):
        kind = ARG
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            # int, bool....
            if token_type == KEYWORD:
                type = self.jack_tokenizer.key_word()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            # className
            elif token_type == IDENTIFIER:
                type = self.jack_tokenizer.identifier()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            # end of parameter list
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == ")":
                    break


    def compile_var_dec(self):
        # should be "var"
        kind = self.jack_tokenizer.key_word()
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # type
        if self.jack_tokenizer.token_type() == KEYWORD:
            type = self.jack_tokenizer.key_word()
        else:
            type = self.jack_tokenizer.identifier()
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                self.symbol_table.define(name, type, kind)
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_statements(self):
        while True:
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "do":
                self.compile_do()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "let":
                self.compile_let()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "while":
                self.compile_while()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "return":
                self.compile_return()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
            # compile_if returns advanced
            if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "if":
                self.compile_if()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                break


    def compile_do(self):
        self.num_args_called_function = 0
        self.compile_subroutine_call()
        self.writer.write_pop(TEMP , 0)
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # return from compile_subroutine_call with ";"

    def compile_let(self):
        init = True
         # the curr token - "let"
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == IDENTIFIER:
                name = self.jack_tokenizer.identifier()
                type = self.symbol_table.type_of(name)
                kind = self.symbol_table.kind_of(name)
                index = self.symbol_table.index_of(name)
            if token_type == SYMBOL:
                # there is an assignment to an array
                if self.jack_tokenizer.symbol() == "[":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    #  handle  - [expression]
                    self.compile_expression()
                    # the curr token -  "]"
                    self.writer.write_push(self.find_segment(kind), index)
                    self.writer.write_arithmetic("add")
                    self.writer.write_pop("pointer", 1)
                    init = False
                # should return from the compile_expression only with ";" or "]"
                if self.jack_tokenizer.symbol() == "=":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # handle the = expression
                    self.compile_expression()
                    # that is only for array
                    if init == False: # was also if type == "Array"
                        self.writer.write_pop(THAT, 0)
                    else:
                        self.writer.write_pop(self.find_segment(kind), index)
                # end of let statement
                if self.jack_tokenizer.symbol() == ";":
                    break


    def compile_while(self):
        while_counter = self.label_counter_while
        self.label_counter_while += 1
        # the curr token - "while"
        self.writer.write_label("WHILE_EXP" + str(while_counter))
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # the curr token - ")"
                    self.writer.write_arithmetic("not")
                    self.writer.write_if("WHILE_END" + str(while_counter))
                if self.jack_tokenizer.symbol() == "{":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                    # the curr token - "}"
                    self.writer.write_go_to("WHILE_EXP" + str(while_counter))
                    self.writer.write_label("WHILE_END" + str(while_counter))
                if token_type == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break


    def compile_return(self):
        # the curr token - "return"
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ";":
            self.writer.write_push(CONST, "0")
        else:
            self.compile_expression()
            # should return from "compile_expression" only with ";"
        self.writer.write_return()

    def compile_if(self):
        if_counter = self.label_counter_if
        self.label_counter_if += 1
        # the curr token - "if"
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL:
                if self.jack_tokenizer.symbol() == "(":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # the curr token - ")"
                    self.writer.write_if("IF_TRUE" + str(if_counter))
                    self.writer.write_go_to("IF_FALSE" + str(if_counter))
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "{":
                    self.writer.write_label("IF_TRUE" + str(if_counter))
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_statements()
                # ~~~~~~~~~~ change : was token_type ~~~~~~~~~~~~~~
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "}":
                    break
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        if self.jack_tokenizer.token_type() == KEYWORD and self.jack_tokenizer.key_word() == "else":
            # print "else"
            self.writer.write_go_to("IF_END" + str(if_counter))
            self.writer.write_label("IF_FALSE" + str(if_counter))
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # print "{"
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.compile_statements()
            # print "}"
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            self.writer.write_label("IF_END" + str(if_counter))
        else:
            self.writer.write_label("IF_FALSE" + str(if_counter))


    def compile_subroutine_call(self):
        to_add = False
        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # "subRoutineName" or ("className" | "varName", as part of className.subRoutineName)
        called_statement = self.jack_tokenizer.identifier()
        type = self.symbol_table.type_of(called_statement)
        kind = self.symbol_table.kind_of(called_statement)
        index = self.symbol_table.index_of(called_statement)


        self.jack_tokenizer.has_more_tokens()
        self.jack_tokenizer.advance()
        # case of "subRoutineCall(expressionList)
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
            to_add = True
            called_statement = self.class_name + "." + called_statement
            self.writer.write_push(POINTER, 0)
            self.compile_expression_list()
            # the curr token - ")"
        # (className | varName).subroutineName(expressionList)
        elif self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # subroutineName
            if kind <> NONE:
                to_add = True
                self.writer.write_push(self.find_segment(kind), index)
                called_statement = type + "." + self.jack_tokenizer.identifier()
            else:
               called_statement = called_statement + "." + self.jack_tokenizer.identifier()
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            # "("
            # expressionList
            self.compile_expression_list()
            # ")"
        if to_add:
            self.writer.write_call(called_statement, self.num_args_called_function + 1)
        else:
            self.writer.write_call(called_statement, self.num_args_called_function)

    def compile_expression(self):
        is_print_unary = False
        if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
            self.is_unary = True
        self.compile_term()
        while self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                ["+", "-", "*", "/", "&", "|", "<", ">", "="]:
            arit_symbol = self.jack_tokenizer.symbol()
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "-":
                self.is_unary = True
                is_print_unary = True
            self.compile_term()
            # if not is_print_unary and
            self.writer.write_arithmetic(self.dic_arithmetic[arit_symbol])


    def compile_term(self):
        while True:
            token_type = self.jack_tokenizer.token_type()
            if token_type == SYMBOL and not self.is_unary and self.jack_tokenizer.symbol() in\
                    [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "="]:
                break
            if token_type == INT_CONST:
                self.writer.write_push(CONST, self.jack_tokenizer.int_val())
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == STRING_CONST:
                self.compile_string()
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == KEYWORD and self.jack_tokenizer.key_word() in ["true", "false", "null"]:
                self.writer.write_push(CONST, 0)
                if self.jack_tokenizer.key_word() == "true":
                    self.writer.write_arithmetic("not")
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            # like in return this
            if token_type == KEYWORD and self.jack_tokenizer.key_word() == "this":
                self.writer.write_push(POINTER, 0)
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() in ["~", "-"]:
                symbol = self.jack_tokenizer.symbol()
                self.is_unary = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_term()
                if symbol == "~":
                    self.writer.write_arithmetic("not")
                else:
                    self.writer.write_arithmetic("neg")
                break
            if token_type == SYMBOL and self.jack_tokenizer.symbol() == "(":
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                self.compile_expression()
                # should return from compile_expression only with ")"
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                break
            if token_type == IDENTIFIER:
                is_add = True
                name = self.jack_tokenizer.identifier()
                kind = self.symbol_table.kind_of(name)
                index = self.symbol_table.index_of(name)
                if name[0].isupper():
                    is_add = False
                self.jack_tokenizer.has_more_tokens()
                self.jack_tokenizer.advance()
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() in\
                        [",", ";", ")", "}","]", "+", "-", "*", "/", "&", "|", "<", ">", "=", "&amp;", "&lt;","&gt;"]:
                    # in case of a > ...or b;
                    self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name))
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "[":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    self.compile_expression()
                    # should return only "]"
                    self.writer.write_push(self.find_segment(kind), self.symbol_table.index_of(name))
                    self.writer.write_arithmetic("add")
                    self.writer.write_pop(POINTER, 1)
                    self.writer.write_push(THAT, 0)
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    break
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == "(":
                    self.writer.write_push(POINTER, 0)
                    self.compile_expression_list()
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # case of a = ... bar()
                    self.writer.write_call(self.class_name + "." + name,self.num_args_called_function + 1)
                    break
                # (className | varName).subroutineName(expressionList)
                if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ".":
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # subroutineName
                    if is_add:
                        type = self.symbol_table.type_of(name)
                        name = type + "." + self.jack_tokenizer.identifier()
                    else:
                        name = name + "." + self.jack_tokenizer.identifier()
                    self.jack_tokenizer.has_more_tokens()
                    self.jack_tokenizer.advance()
                    # "("
                    # expressionList
                    if is_add:
                        self.writer.write_push(self.find_segment(kind), index)
                    self.compile_expression_list()
                    # ")"
                    if is_add:
                        self.writer.write_call(name, self.num_args_called_function + 1)
                    else:
                        self.writer.write_call(name, self.num_args_called_function)
            self.jack_tokenizer.has_more_tokens()
            self.jack_tokenizer.advance()

    def compile_expression_list(self):
        num_args = 0
        while self.jack_tokenizer.has_more_tokens():
            self.jack_tokenizer.advance()
            if self.jack_tokenizer.token_type() == SYMBOL and self.jack_tokenizer.symbol() == ")":
                break
            else:
                num_args += 1
                self.compile_expression()
                if self.jack_tokenizer.symbol() == ")":
                    break
                # print ","
        self.num_args_called_function = num_args

    def find_segment(self, kind):
        if kind == ARG:
            return ARGUMENT
        if kind == VAR:
            return LCL
        if kind == FIELD:
            return THIS
        if kind == STATIC:
            return STATIC

    def compile_string(self):
        length = len(self.jack_tokenizer.string_val())
        self.writer.write_push(CONST, length)
        self.writer.write_call("String.new", 1)
        for i in range(len(self.jack_tokenizer.string_val())):
            uni = ord(self.jack_tokenizer.string_val()[i])
            self.writer.write_push(CONST, uni)
            self.writer.write_call("String.appendChar", 2)
class CompilationEngine:
    """Recursive top-down parser"""

    def __init__(self, inFile, outFile):
        """Creates a new compilation engine with the given input and output.
        The next routine called must be compileClass()"""
        self.tokenizer = JackTokenizer(inFile)
        self.targetFile = open(outFile, 'w')
        self.getNext()
        self.classTable = None
        self.className = ''
        self.writer = VMWriter(outFile)
        self.labelWhile = 1
        self.labelIf = 1

    def getNext(self):
        if self.tokenizer.hasMoreTokens():
            self.tokenizer.advance()

    def compileClass(self):
        """Compiles a complete class"""
        self.classTable = SymbolTable()
        # 'class' className '{' classVarDec* subroutineDec* '}'
        # class
        self.getNext()
        # className
        self.className = self.tokenizer.getToken()
        self.getNext()
        # {
        self.getNext()

        token = self.tokenizer.getToken()
        while token in ["static", "field"]:
            self.compileDec()
            token = self.tokenizer.getToken()

        token = self.tokenizer.getToken()
        while token in ["constructor", "function", "method"]:
            self.compileSubroutine()
            token = self.tokenizer.getToken()
        # }
        self.getNext()


    def compileSubroutine(self):
        """Compiles a complete method, function, or constructor."""
        # subroutine dec
        self.classTable.startSubroutine()
        # ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
        # ('constructor' | 'function' | 'method')
        subroutineType = self.tokenizer.getToken()
        self.getNext()
        # ('void' | type)
        self.getNext()

        # subroutineName
        name = self.tokenizer.getToken()
        self.getNext()
        # (
        self.getNext()
        # parameterList
        self.compileParameterList(subroutineType == 'method')
        # )
        self.getNext()

        # subroutine body
        # '{' varDec* statements '}'
        # {
        self.getNext()

        # varDec*
        while self.tokenizer.getToken() == 'var':
            self.compileDec()
        numOfVars = self.classTable.varCount(Toolbox.VAR)

        if subroutineType == 'function':
            self.writer.writeFunction(self.className + "." + name, numOfVars)
        elif subroutineType == 'constructor':
            self.writer.writeFunction(self.className + "." + name, numOfVars)
            # push constant (num of fields)
            # call Memory.alloc 1
            # pop pointer 0
            fields = self.classTable.varCount(Toolbox.FIELD)
            self.writer.writePush(Toolbox.CONST, fields)
            self.writer.writeCall('Memory.alloc', 1)
            self.writer.writePop(Toolbox.POINTER, 0)
        else:  # method
            self.writer.writeFunction(self.className + "." + name, numOfVars)
            # push argument 0
            # pop pointer 0
            self.writer.writePush(Toolbox.SEG_ARG, 0)
            self.writer.writePop(Toolbox.POINTER, 0)

        # statements
        self.compileStatements()
        # }
        self.getNext()

    def compileParameterList(self, method=False):
        """Compiles a (possibly empty) parameter list,
        not including the enclosing "()"."""
        tokenType, name = '', ''

        if method:  # Add this to method's var list.
            self.classTable.define(None, None, Toolbox.ARG)

        if self.tokenizer.tokenType() != self.tokenizer.SYMBOL:  # param list not empty
            while True:
                tokenType = self.tokenizer.getToken()
                self.getNext()

                name = self.tokenizer.getToken()
                self.classTable.define(name, tokenType, Toolbox.ARG)
                self.getNext()

                if self.tokenizer.getToken() == ')':
                    break

                self.getNext()  # ','

    def compileStatements(self):  # (letStatement | ifStatement | whileStatement | doStatement | returnStatement)*
        """Compiles a sequence of statements,
        not including the enclosing "{}"."""
        token = self.tokenizer.getToken()
        while token in ["let", "if", "while", "do", "return"]:
            if token == 'let':
                self.compileLet()
            elif token == 'if':
                self.compileIf()
            elif token == 'while':
                self.compileWhile()
            elif token == 'do':
                self.compileDo()
            elif token == 'return':
                self.compileReturn()
            token = self.tokenizer.getToken()

    def compileSubroutineCall(self, name, printIdentifier=True):
     # subroutineName '(' expressionList ') ' | ( className | varName) '.' subroutineName '(' expressionList ') '

        var = None
        nArgs = 0
        if printIdentifier:
            # subroutineName | ( className | varName)
            self.getNext()

        var = self.classTable.searchScope(name)

        if self.tokenizer.getToken() == '.':
            if var:
                # push <this>
                self.writer.writePush(var[0], var[1])
                nArgs += 1
                className = var[2]  # Use the type instead of the variable name
            else:
                className = name
            self.getNext()
            subroutineName = self.tokenizer.getToken()
            self.getNext()
        else:
            # push <this>
            self.writer.writePush(Toolbox.POINTER, 0)
            nArgs += 1
            className = self.className
            subroutineName = name

        name = className + '.' + subroutineName
        # '('
        self.getNext()
        nArgs += self.compileExpressionList()

        self.writer.writeCall(name, nArgs)
        # ')'
        self.getNext()

    def compileDo(self):  # 'do' subroutineCall ';'
        """Compiles a do statement"""
        # do
        self.getNext()
        # subroutineCall
        self.compileSubroutineCall(self.tokenizer.getToken())
        self.writer.writePop(Toolbox.TEMP, 0)
        # ;
        if self.tokenizer.getToken() == ';':
            self.getNext()

    def compileLet(self):  # 'let' varName ('[' expression ']')? '=' expression ';'
        """Compiles a let statement"""
        # let
        # self.targetFile.write(T_LET)
        self.getNext()
        # var name
        name = self.tokenizer.getToken()
        # search scope
        segment, index, type = self.classTable.searchScope(name)

        self.getNext()
        # [
        array = False
        if self.tokenizer.getToken() == '[':
            array = True
            self.writer.writePush(segment, index)
            self.getNext()
            # expression
            self.compileExpression()
            # ]
            self.getNext()
            self.writer.writeArithmetic('add')
        # =
        self.getNext()
        # expression
        self.compileExpression()

        if array:
            self.writer.writePop(Toolbox.TEMP, 0)
            self.writer.writePop(Toolbox.TEMP, 1)
            self.writer.writePush(Toolbox.TEMP, 0)
            self.writer.writePush(Toolbox.TEMP, 1)

            self.writer.writePop(Toolbox.POINTER, 1)
            self.writer.writePop(Toolbox.THAT, 0)
        else:
            self.writer.writePop(segment, index)

        # ;
        token = self.tokenizer.getToken()
        if token == ';':
            self.getNext()

    def compileWhile(self):  # while' '(' expression ')' '{' statements '}'
        """Compiles a while statement"""
        # while
        label = str(self.labelWhile)
        self.labelWhile += 1
        self.writer.writeLabel('while' + label)
        self.getNext()
        # (
        self.getNext()
        # expression
        self.compileExpression()
        # )
        self.getNext()
        self.writer.writeArithmetic('not')
        self.writer.writeIf('endwhile' + label)
        # {
        self.getNext()
        # statements
        self.compileStatements()
        # }
        self.getNext()
        self.writer.writeGoto('while' + label)
        self.writer.writeLabel('endwhile' + label)

    def compileReturn(self):  # 'return' expression? ';'
        """Compiles a return statement"""
        # return
        self.getNext()
        # expression
        if not (self.tokenizer.getToken() == ";"):
            self.compileExpression()
        else:
            self.writer.writePush(Toolbox.CONST, 0)
        self.writer.writeReturn()
        # ;
        self.getNext()

    def compileIf(self):  # 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
        """Compiles an if statement, possibly with a trailing else clause"""
        # if
        label = 'if' + str(self.labelIf)
        self.labelIf += 1

        self.getNext()
        # (
        self.getNext()
        # expression
        self.compileExpression()
        # )
        self.getNext()

        self.writer.writeArithmetic('not')
        self.writer.writeIf('else' + label)

        # {
        self.getNext()
        # statements
        self.compileStatements()
        # }
        self.getNext()

        self.writer.writeGoto('end' + label)
        self.writer.writeLabel('else' + label)

        # else
        if self.tokenizer.getToken() == 'else':
            self.getNext()
            # {
            self.getNext()
            # expression
            self.compileStatements()
            # }
            self.getNext()
        self.writer.writeLabel('end' + label)

    def compileExpression(self):
        """Compiles an expression"""
        # term (op term)*

        self.compileTerm()
        token = self.tokenizer.getToken()
        while token in ['+', '/', '-', '*', '&', '|', '>', '<', '=']:
            self.getNext()
            self.compileTerm()
            self.writer.writeArithmetic(token)

            token = self.tokenizer.getToken()


    def compileTerm(self):  #integerConstant | stringConstant | keywordConstant | varName | varName '[' expression']' |
                            # subroutineCall | '(' expression ')' | unaryOp term
        """Compiles a term"""

        token = self.tokenizer.getToken()
        tokenType = self.tokenizer.tokenType()

        if tokenType == self.tokenizer.INT_CONST:
            self.writer.writePush(Toolbox.CONST, token)
            self.getNext()
        elif tokenType == self.tokenizer.STRING_CONST:
            self.writer.writePush(Toolbox.CONST, len(token))
            self.writer.writeCall('String.new', 1)

            for c in token:
                self.writer.writePush(Toolbox.CONST, ord(c))
                self.writer.writeCall('String.appendChar', 2)

            self.getNext()
        elif tokenType == self.tokenizer.KEYWORD:  # true | false | null | this
            self.compileKeywordConstant(token)
        elif tokenType == self.tokenizer.IDENTIFIER:
            name = token
            self.getNext()
            token = self.tokenizer.getToken()
            if token == '[':
                self.compileVarName(name)
                self.getNext()
                self.compileExpression()
                self.getNext()
                self.writer.writeArithmetic('add')
                self.writer.writePop(Toolbox.POINTER, 1)
                self.writer.writePush(Toolbox.THAT, 0)
            elif token in ['(', '.']:
                self.compileSubroutineCall(name, False)
            else:
                self.compileVarName(name)

        elif token == '(':
            self.getNext()
            self.compileExpression()
            self.getNext()
        elif token in ['-', '~']:
            self.compileUnary(token)


    def compileExpressionList(self):
        """Compiles a (possibly empty) comma separated list of expressions"""
        nArgs = 0

        if self.tokenizer.getToken() != ')':
            self.compileExpression()
            nArgs += 1

            while self.tokenizer.getToken() == ',':
                self.getNext()
                self.compileExpression()
                nArgs += 1

        return nArgs

    def compileDec(self):  # 'var' type varName (',' varName)* ';'
        """Compiles a var declaration"""
        # keyword 'var'
        token = self.tokenizer.getToken()
        kind = None
        if token == 'var':
            kind = Toolbox.VAR
        elif token == 'field':
            kind = Toolbox.FIELD
        elif token == 'static':
            kind = Toolbox.STATIC
        self.getNext()
        tokenType = self.tokenizer.getToken()

        # type can be an identifier or a keyword
        self.getNext()

        # var name
        name = self.tokenizer.getToken()
        self.classTable.define(name, tokenType, kind)
        self.getNext()
        while self.tokenizer.tokenType() == self.tokenizer.SYMBOL and self.tokenizer.getToken() == ',':
            # ,
            self.getNext()
            name = self.tokenizer.getToken()
            self.classTable.define(name, tokenType, kind)
            # var name
            self.getNext()
        # ;
        self.getNext()

    def compileVarName(self, name):
        segment, index, type = self.classTable.searchScope(name)
        self.writer.writePush(segment, index)

    def compileKeywordConstant(self, keyword):
        if keyword == 'false' or keyword == 'null':
            self.writer.writePush(Toolbox.CONST, 0)
        if keyword == 'true':
            self.writer.writePush(Toolbox.CONST, 0)
            self.writer.writeArithmetic('not')
        if keyword == 'this':
            self.writer.writePush(Toolbox.POINTER, 0)
        self.getNext()

    def compileUnary(self, token):
        """
        Compiles an unary operator with its operand (term)
        :param token: unary token
        """
        self.getNext()  # '~' or '-'
        self.compileTerm()  # operand

        if token == '-':
            self.writer.writeArithmetic('neg')
        else:  # token is '~'
            self.writer.writeArithmetic('not')
Пример #46
0
  def create_token_file(jack_file_name):
    token_file_name = jack_file_name.replace('.jack', 'T.xml')
    token_file      = open(token_file_name, 'w')
    jack_file       = open(jack_file_name, 'rU')
    tokenizer       = JackTokenizer(jack_file)

    token_file.write('<tokens>\n')

    while tokenizer.hasMoreTokens():
      tokenizer.advance()

      if tokenizer.tokenType() is 'KEYWORD':
        token_file.write('<keyword> {} </keyword>\n'.format(tokenizer.keyWord().lower()))
      elif tokenizer.tokenType() is 'SYMBOL':
        symbol = tokenizer.symbol()

        if symbol in ['<', '>', '&']:
          symbol = Main.XML_CONVSERSIONS[symbol]

        token_file.write('<symbol> {} </symbol>\n'.format(symbol))
      elif tokenizer.tokenType() is 'IDENTIFIER':
        token_file.write('<identifier> {} </identifier>\n'.format(tokenizer.identifier()))
      elif tokenizer.tokenType() is 'INT_CONST':
        token_file.write('<integerConstant> {} </integerConstant>\n'.format(tokenizer.intVal()))
      elif tokenizer.tokenType() is 'STRING_CONST':
        token_file.write('<stringConstant> {} </stringConstant>\n'.format(tokenizer.stringVal()))

    token_file.write('</tokens>\n')
    token_file.close()

    return token_file_name
Пример #47
0
	def __init__(self, src, output):
		self.tokenizer = JackTokenizer(src)
		self.writer = VMWriter(output)
		self.symbolTable = SymbolTable()
		self.labelIndex = 0
Пример #48
0
class CompilationEngine(object):
	def __init__(self, src, output):
		self.tokenizer = JackTokenizer(src)
		self.writer = VMWriter(output)
		self.symbolTable = SymbolTable()
		self.labelIndex = 0

	def _acceptNextToken(self, token):
		if self.tokenizer.hasMoreToken():
			self.tokenizer.advance()
			typ = self.tokenizer.tokenType()
			tok = self.tokenizer.tokenValue()
			if type(token) != list:
				token = [token]
			if typ in token or tok in token:
				return tok
		raise SyntaxError('Parse Error')

	def _tryNextToken(self, token):
		if self.tokenizer.hasMoreToken():
			typ, tok = self.tokenizer.next()
			if type(token) != list:
				token = [token]
			if typ in token or tok in token:
				return True
		return False

	def compileClass(self):
		#'class' className '{' classVarDec* subroutineDec* '}'
		self._acceptNextToken('class')
		self.classname = self._acceptNextToken('identifier')
		self._acceptNextToken('{')

		while self._tryNextToken(['static', 'field']):
			self.compileClassVarDec()
		while self._tryNextToken(['constructor', 'function', 'method']):
			self.compileSubroutine()
		self._acceptNextToken('}')

		self.writer.close()

	def compileClassVarDec(self):
		#('static'|'field') type varName (','varName)* ';'
		kind = self._acceptNextToken(['static', 'field'])
		type = self._acceptNextToken(['int', 'char', 'boolean', 'identifier'])
		self.symbolTable.define(self._acceptNextToken('identifier'), type, kind)

		while self._tryNextToken(','):
			self._acceptNextToken(',')
			self.symbolTable.define(self._acceptNextToken('identifier'), type, kind)
		self._acceptNextToken(';')

	def compileSubroutine(self):
		#('constructor'|'function'|'method')
		#('void'|type)subroutineName'('parameterList')'
		#subroutineBody
		self.labelIndex = 0

		self.symbolTable.startSubroutine()
		subroutine = self._acceptNextToken(['constructor', 'function', 'method'])
		self._acceptNextToken(['void', 'int', 'char', 'boolean', 'identifier'])
		functionname = self._acceptNextToken('identifier')

		if subroutine == 'method':
			self.symbolTable.define('this', self.classname, 'argument')

		self._acceptNextToken('(')
		self.compileParameterList()
		self._acceptNextToken(')')
		self._acceptNextToken('{')

		argc = 0
		while self._tryNextToken('var'):
			argc += self.compileVarDec()
		self.writer.writeFunction(self.classname + '.' + functionname, argc)

		if subroutine == 'constructor':
			self.writer.writePush('constant', self.symbolTable.varCount('field'))
			self.writer.writeCall('Memory.alloc', 1)
			self.writer.writePop('pointer', 0)
		elif subroutine == 'method':
			self.writer.writePush('argument', 0)
			self.writer.writePop('pointer', 0)
		while self._tryNextToken(STATEMENT):
			self.compileStatements()
		self._acceptNextToken('}')

	def compileParameterList(self):
		#((type varName)(','type varName)*)?
		if self._tryNextToken(TYPE):
			type = self._acceptNextToken(TYPE)
			self.symbolTable.define(self._acceptNextToken('identifier'), type, 'argument')
			while self._tryNextToken(','):
				self._acceptNextToken(',')
				type = self._acceptNextToken(TYPE)
				self.symbolTable.define(self._acceptNextToken('identifier'), type, 'argument')

	def compileVarDec(self):
		#'var' type varName (',' varName)*';'
		argc = 1
		self._acceptNextToken('var')
		type = self._acceptNextToken(TYPE)
		self.symbolTable.define(self._acceptNextToken('identifier'), type, 'local')

		while self._tryNextToken(','):
			self._acceptNextToken(',')
			argc += 1
			self.symbolTable.define(self._acceptNextToken('identifier'), type, 'local')
		self._acceptNextToken(';')
		return argc

	def compileStatements(self):
		#statement*
		#letStatement|ifStatement|whileStatement|doStatement|returnStatement
		while self._tryNextToken(STATEMENT):
			if self._tryNextToken('let'):
				self.compileLet()
			elif self._tryNextToken('if'):
				self.compileIf()
			elif self._tryNextToken('while'):
				self.compileWhile()
			elif self._tryNextToken('do'):
				self.compileDo()
			elif self._tryNextToken('return'):
				self.compileReturn()

	def compileDo(self):
		#'do' subroutineCall ';'
		#subroutineName '(' expressionList ')' | (className | varName) '.' subroutineName '(' expressionList ')'
		self._acceptNextToken('do')
		funcname = self._acceptNextToken('identifier')

		argc = 0
		if self._tryNextToken('.'):
			self._acceptNextToken('.')
			type = self.symbolTable.typeOf(funcname)
			if type != None:
				argc += 1
				self.writer.writePush(self.symbolTable.kindOf(funcname), self.symbolTable.indexOf(funcname))
				funcname = type + '.' + self._acceptNextToken('identifier')				#game.run()
			else:
				funcname = funcname + '.' + self._acceptNextToken('identifier')			#Game.run()
		else:
			argc += 1
			funcname = self.classname + '.' + funcname 										#run()
			self.writer.writePush('pointer', 0)
	
		self._acceptNextToken('(')
		argc += self.compileExpressionList()
		self._acceptNextToken(')')
		self._acceptNextToken(';')

		self.writer.writeCall(funcname, argc)
		self.writer.writePop('temp', 0)

	def compileLet(self):
		#'let' varName ('[' expression ']')? '=' expression ';'
		self._acceptNextToken('let')
		varName = self._acceptNextToken('identifier')
		if self._tryNextToken('['):
			self.writer.writePush(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName))
			self._acceptNextToken('[')
			self.compileExpression()
			self._acceptNextToken(']')
			self.writer.writeArithmetic('add')
			self._acceptNextToken('=')
			self.compileExpression()
			self._acceptNextToken(';')
			self.writer.writePop('temp', 0)
			self.writer.writePop('pointer', 1)
			self.writer.writePush('temp', 0)
			self.writer.writePop('that', 0)
		else:
			self._acceptNextToken('=')
			self.compileExpression()
			self._acceptNextToken(';')
			self.writer.writePop(self.symbolTable.kindOf(varName), self.symbolTable.indexOf(varName))

	def compileWhile(self):
		#'while' '(' expression ')''{' statements '}'
		index = str(self.labelIndex)
		self.labelIndex += 1

		self.writer.writeLabel('WHILE' + index)
		self._acceptNextToken('while')
		self._acceptNextToken('(')
		self.compileExpression()
		self._acceptNextToken(')')
		self.writer.writeArithmetic('not')

		self.writer.writeIf('WHILE_END' + index)
		self._acceptNextToken('{')
		self.compileStatements()
		self._acceptNextToken('}')
		self.writer.writeGoto('WHILE' + index)
		self.writer.writeLabel('WHILE_END' + index)

	def compileReturn(self):
		#'return' expression? ';'
		self._acceptNextToken('return')

		if self._tryNextToken(';'):
			self._acceptNextToken(';')
			self.writer.writePush('constant', 0)
		else:
			self.compileExpression()
			self._acceptNextToken(';')
		self.writer.writeReturn()

	def compileIf(self):
		#'if' '(' expression ')' '{' statements '}'
		#('else' '{' statements '}')?
		index = str(self.labelIndex);
		self.labelIndex += 1

		self._acceptNextToken('if')
		self._acceptNextToken('(')
		self.compileExpression()
		self._acceptNextToken(')')
		self.writer.writeArithmetic('not')
		self.writer.writeIf('IF_TRUE' + index)

		self._acceptNextToken('{')
		self.compileStatements()
		self._acceptNextToken('}')
		self.writer.writeGoto('IF_FALSE' + index)
		self.writer.writeLabel('IF_TRUE' + index)

		if self._tryNextToken('else'):
			self._acceptNextToken('else')
			self._acceptNextToken('{')
			self.compileStatements()
			self._acceptNextToken('}')
		self.writer.writeLabel('IF_FALSE' + index)

	def compileExpression(self):
		#term(op term)*
		self.compileTerm()
		while self._tryNextToken(OP):
			op = self._acceptNextToken(OP)
			self.compileTerm()
			if op == '*':
				self.writer.writeCall('Math.multiply', 2)
			elif op == '/':
				self.writer.writeCall('Math.divide', 2)
			else:
				self.writer.writeArithmetic(OP_COMMAND[op])

	def compileTerm(self):
		#integerConstant|stringConstant|keywordConstant|varName|
		
		if self._tryNextToken('('):										#'('expression')'
			self._acceptNextToken('(')
			self.compileExpression()
			self._acceptNextToken(')')
		elif self._tryNextToken(['-', '~']):							#unaryOp term
			unaryOp = self._acceptNextToken(['-', '~'])
			self.compileTerm()
			if unaryOp == '-':
				self.writer.writeArithmetic('neg')
			else:
				self.writer.writeArithmetic('not')
		else:
			first_s = self._acceptNextToken(TERM)
			if self._tryNextToken('['):									#varName'['expression']'
				self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				self._acceptNextToken('[')
				self.compileExpression()
				self._acceptNextToken(']')
				self.writer.writeArithmetic('add')
				self.writer.writePop('pointer', 1)
				self.writer.writePush('that', 0)
			elif self._tryNextToken('('):								#subroutineCall run()
				self.writer.writePush('pointer', 0)
				self._acceptNextToken('(')
				argc = self.compileExpressionList() + 1
				self._acceptNextToken(')')
				self.writer.writeCall(self.classname + '.' + first_s, argc)
			elif self._tryNextToken('.'):								#subroutineCall game.run()
				self._acceptNextToken('.')
				idenfitier = self._acceptNextToken('identifier')
				type = self.symbolTable.typeOf(first_s)
				argc = 0
				callname = first_s
				if type != None:
					argc += 1
					callname = type
					self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				self._acceptNextToken('(')
				argc += self.compileExpressionList()
				self._acceptNextToken(')')
				self.writer.writeCall(callname + '.' + idenfitier, argc)
			else:
				tokenType = self.tokenizer.tokenType()
				if tokenType == 'integerConstant':
					self.writer.writePush('constant', int(first_s))
				elif tokenType == 'stringConstant':
					self.writer.writePush('constant', len(first_s))
					self.writer.writeCall('String.new', 1)
					for c in first_s:
						self.writer.writePush('constant', ord(c))
						self.writer.writeCall('String.appendChar', 2)
				elif tokenType == 'identifier':
					self.writer.writePush(self.symbolTable.kindOf(first_s), self.symbolTable.indexOf(first_s))
				else:
					if first_s == 'null' or first_s == 'false':
						self.writer.writePush('constant', 0)
					elif first_s == 'true':
						self.writer.writePush('constant', 1)
						self.writer.writeArithmetic('neg')
					elif first_s == 'this':
						self.writer.writePush('pointer', 0)

	def compileExpressionList(self):
		#(expression(','expression)*))?
		argc = 0
		if self._tryNextToken(TERM):
			self.compileExpression()
			argc += 1
			while self._tryNextToken(','):
				self._acceptNextToken(',')
				self.compileExpression()
				argc += 1
		return argc
Пример #49
0
# specifying a dir name
else:
    files = glob.glob('*.jack')

# remove the asm file
for filename in files:
    tmp = re.sub(r'\.jack', '.xml', filename)
    if os.path.exists(tmp):
        os.remove(tmp)
        if debug: print "rm : ", tmp
    else:
        if debug: print "not exist : ", tmp
    tmp = re.sub(r'\.jack', 'T.xml', filename)
    if os.path.exists(tmp):
        os.remove(tmp)
        if debug: print "rm : ", tmp
    else:
        if debug: print "not exist : ", tmp


while len(files):
    filename = files.pop(0)
    if debug: print "load source filename : ", filename
    J = JackTokenizer(filename)
    J.write_all(re.sub(r'\.jack', 'T.xml', filename))

    J = JackTokenizer(filename)
    C = CompilationEngine(re.sub(r'\.jack', '.xml', filename), J)
    C.compileClass()

#Author: Josh Wretlind
#Class: CSCI 410 - Elements of Computing Systems
#Project: ECS 10 - Compiler part #1
#Date: 04/07/13

import sys,string,os
from JackTokenizer import JackTokenizer
from CompilationEngine import CompilationEngine
from Parse import Parse

infile = sys.argv[1] # Sys.argv is the system argument list object

outfile = infile.replace(".jack",".xml")
parse = Parse(infile)
infileText = ""
jtok = JackTokenizer()

tokenList = []

while parse.hasMoreCommands():
    parse.advance()
    blah = parse.output()
    infileText += blah
    jtok.advance(blah)

tokenList.extend(jtok.listOfTokens)

ce = CompilationEngine()
ce.setListOfTokens(tokenList)
ce.run()
Пример #51
0
 def __init__(self, program):
     self._tokens = JackTokenizer(program).get_tokens()