def test_is_local_label(self): for i in xrange(0,10): self.assertEqual(SymbolTable.is_local_label('%dH' % i), True) self.assertEqual(SymbolTable.is_local_label('%dh' % i), True) for l in '1F 1B blah 10H 123 4Q4'.split(): self.assertEqual(SymbolTable.is_local_label(l), False)
def generate_symbols(file): symbol_table = SymbolTable() instruction_number = 0 with open(file) as f: parser = Parser(f) while parser.has_more_commands(): if parser.command_type() is 'L_COMMAND' and not symbol_table.contains(parser.symbol()): symbol_table.add_entry(parser.symbol(), instruction_number) if parser.command_type() is 'A_COMMAND' or parser.command_type() is 'C_COMMAND': instruction_number += 1 # line = decoder.bin(p.symbol()) + '\n' parser.advance() return symbol_table
def __init__(self, file_name): """ Open the input file/stream and gets ready to parse it """ super(AssmParser, self).__init__(file_name) self.RAM = 16 self.symbol_table = SymbolTable(self.buff) self.symbol_table.find_symbols()
def __init__(self, file_name): self.lines = [] self.current_position = 0 self.symbol_table = SymbolTable() self.readInFile(file_name) self.addLabelsToSymbolTable() self.substituteVars()
def __init__(self, filepath, vm_writer): self.wf = open(filepath[:-5] + ".myImpl.xml", 'w') self.tokenizer = JackTokenizer(filepath) self.symbol_table = SymbolTable() self.vmw = vm_writer self.compiled_class_name = None self.label_num = 0
def parse_line(text_line): words = split_line(text_line.upper()) # empty line or comment line if len(words) == 0 or text_line[0] == '*': return None # line without a label if text_line[0].isspace(): words.insert(0, None) if len(words) < 2: raise MissingOperationError # line without an operand if len(words) < 3: words.append(None) label, operation, argument = words[0:3] # check label if label is not None: if not SymbolTable.is_label(label): raise InvalidLabelError(label) if len(label) > 10: raise TooLongLabelError(label) # check operation if not operations.is_valid_operation(operation): raise UnknownOperationError(operation) # check arg for directives if operations.is_arg_required(operation) and argument is None: raise ArgumentRequiredError(operation) return Line(label, operation, argument)
def __init__(self, code_string=None): self._KEYWORDS = ['read', 'write'] self._token = None self._line = 0 self._tokenizer = Tokenizer(code_string, ['+','-','/','*','(',')',':='], ['\n',' ']) self._symboltable = SymbolTable()
def __init__(self, filename): self.filename = filename self.next_output_line_num = 0 self.symbol_table = SymbolTable() # TODO: it would be great if this could be defined globally instead of # in the class init self.command_type_to_code_generator = { CommandType.A_COMMAND: self.build_a_command, CommandType.C_COMMAND: CCommandBuilder.build_c_command, }
def __init__(self, filename=None): super(TableManager, self).__init__() self.table = SymbolTable() self.line_counts = [0] self.scope_names = ['global'] self.numscopes = {} if filename: self.load_file(filename)
def __init__(self, tokens): self.index = len(tokens) self.tokens = iter(tokens) self.cur_token = self.tokens.next() #Default token holder self.next_token = self.tokens.next() #LL2 lookahead token holder for when needed self.cur_symbol_table = None self.root_table = SymbolTable(None) self.sem_analyzer = SemanticAnalyzer(self.root_table) self.program_name = '' self.cur_proc_name = '' self.cur_func_name = ''
def main(): parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('asm_file', type=str, help='asm file') args = parser.parse_args() asm_file = args.asm_file save_file = os.path.splitext(asm_file)[0] + ".hack" st = SymbolTable() with HackParser(asm_file) as hp: op_address = 0 while hp.advance() != None: cmd_type = hp.command_type() if cmd_type == A_COMMAND or cmd_type == C_COMMAND: op_address += 1 elif cmd_type == L_COMMAND: st.add_entry(hp.symbol(), op_address) with HackParser(asm_file) as hp: with open(save_file, 'w') as wf: while hp.advance() != None: cmd_type = hp.command_type() if cmd_type == A_COMMAND: symbol = hp.symbol() m = symbol_pattern.match(symbol) if m.group(1): # @value bincode = "0" + int2bin(int(m.group(1)), 15) elif m.group(2): # @symbol symbol = m.group(2) if st.contains(symbol): address = st.get_address(symbol) bincode = "0" + int2bin(address, 15) else: st.add_variable(symbol) address = st.get_address(symbol) bincode = "0" + int2bin(address, 15) elif cmd_type == C_COMMAND: bincode = '111' + code_writer.comp(hp.comp()) + code_writer.dest(hp.dest()) + code_writer.jump(hp.jump()) if cmd_type != L_COMMAND: wf.write(bincode + '\n')
def __init__(self, scanner, output_file=None): self.scanner = scanner self.tokens = scanner.tokens self.log = logging.getLogger("parser") self.table = SymbolTable() if not len(self.log.handlers): self.log.setLevel(logging.DEBUG) hdlr = logging.FileHandler("/tmp/myapp.log") formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s") hdlr.setFormatter(formatter) self.log.addHandler(hdlr) self.writer = CodeWriter(out_file=output_file)
def test_is_local_label_reference(self): for i in xrange(0,10): self.assertEqual(SymbolTable.is_local_label_reference('%df' % i), True) self.assertEqual(SymbolTable.is_local_label_reference('%dF' % i), True) self.assertEqual(SymbolTable.is_local_label_reference('%db' % i), True) self.assertEqual(SymbolTable.is_local_label_reference('%dB' % i), True) self.assertEqual(SymbolTable.is_local_label_reference('%dH' % i), False) self.assertEqual(SymbolTable.is_local_label_reference('%dh' % i), False)
def __init__(self, src, include_dirs=()): self.gen = gen.Generation(src) self.cur_byte = 0 self.symtab = SymbolTable() self.pass1 = [] self.asm_inst = [] self.symtab = self.gen.symbol_pass(self.symtab) # numNames = self.symtab.num_symbols() # guard = 0 # while True: # # set a limit to how many passes, just in case. # guard += 1 # if guard > 1: # break #util.die("having issues resolving symbols") # self.gen.replace_idents(self.symtab) # symtab = self.gen.symbol_pass(self.symtab) # if symtab.num_symbols() == numNames: # break self.asm_inst = self.gen.generate(self.symtab)
def test_compile_continue_break(self): # Test continue and break statements opcodes = [OpCode("continue", "", ""), OpCode("break", "", "")] table = SymbolTable() compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual(data, ["", "\tcontinue;", "\tbreak;", ""])
def setUp(self): TemporaryVariables.reset() Label.reset() symbol_table = SymbolTable() symbol_table.add_symbol("a_int", Types.INT, 1) symbol_table.add_symbol("b_int", Types.INT, 1) symbol_table.add_symbol("c_float", Types.FLOAT, 2) symbol_table.add_symbol("d_float", Types.FLOAT, 2) self.transformer = CPLTransformer(symbol_table)
def test_lexical_analyze_address_of(self): source_code = """var a = 1 var *n = &a """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) address_of = Token("address_of", "", 2) self.assertEqual(tokens[-3], address_of)
def test_compile_do_while_do(self): # Testing do while loop (both the do part and the while part) opcodes = [ OpCode("var_assign", "i---0", "int"), OpCode("do", "", ""), OpCode("scope_begin", "", ""), OpCode("print", '"%d", i', None), OpCode("scope_over", "", ""), OpCode("while_do", "i <= 0", None), ] table = SymbolTable() table.symbol_table = { 1: ["i", "int", "variable"], 2: ["0", "int", "constant"], 3: ["0", "int", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, [ "#include <stdio.h>", "\tint i = 0;", "\tdo {", '\tprintf("%d", i);', "}", "\twhile(i <= 0);", ], )
def test_compile_ptr_only_assign(self): ## TODO: Fix this test after bug #23 gets fixed opcodes = [ OpCode("var_assign", "a---1", "int"), OpCode("ptr_assign", "n---&a---1", "int"), OpCode("ptr_only_assign", "n---=---2---1", ""), ] table = SymbolTable() table.symbol_table = { 1: ["a", "int", "variable"], 2: ["1", "int", "constant"], 3: ["n", "int", "variable"], 4: ["2", "int", "constant"], } compile(opcodes, "testing.c", table) with open("testing.c", "r") as file: data = file.read().split("\n") os.remove("testing.c") self.assertEqual( data, ["", "\tint a = 1;", "\tint *n = &a;", "\t**n = =;", ""])
def test_keyword_identifier_identifier(self): # Test an identifier source_code = "a\\0" i = 0 table = SymbolTable() line_num = 1 token, _ = keyword_identifier(source_code, i, table, line_num) other = Token("id", 1, 1) self.assertEqual(token, other) self.assertEqual(table.symbol_table, {1: ["a", "var", "variable"]})
def main(): for path in Path('./../').rglob('*.asm'): filename = './' + str(path) new_filename = filename[0:-3] + 'hack' file = open(filename, 'r') text = file.read() file.close() parser = Parser(text) symbol_table = SymbolTable(parser.text) code = Code(parser.text, symbol_table.symbols) file = open(new_filename, 'a') file.write(code.text) file.close()
def main(argv): if not check_args(argv): return # extracting asm file to be processed asm_file_path = argv[1] # creating a .hack file to contain asm file translation to hack machine language pre, post = os.path.splitext(asm_file_path) hack_file_path = pre + '.hack' with open(hack_file_path, 'w') as hack_file, open(asm_file_path) as asm_file: symbol_table = SymbolTable() first_pass(asm_file, symbol_table) second_pass(asm_file, hack_file, symbol_table)
def test_assn4(): src = \ ''' a = b + 1 b = c + 1 c = 1 ''' a = gen.Generation(src) symtab = a.symbol_pass(SymbolTable()) assert symtab.resolveName('c') == ast.Number(1) a.replace_idents(symtab) symtab = a.symbol_pass(symtab) assert symtab.resolveName('b') == ast.Number(2) a.replace_idents(symtab) symtab = a.symbol_pass(symtab) assert symtab.resolveName('a') == ast.Number(3)
def call(self, context: Context, args: List[LangType]) -> LangType: new_ctx = Context( self.name, SymbolTable(), context, self.pos_start, ) new_ctx.add_parent(self.context) self.arg.set_value(args.pop()) self.arg.visit(new_ctx) result_cpy = self.body_node.visit(new_ctx).copy() if args: val = result_cpy.call(new_ctx, args) result_cpy = val.copy() return result_cpy
def test_lexical_analyze_assignment_equal(self): # Test assignment and equal source_code = "var a = 1 == 1" with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) assignment = Token("assignment", "", 1) equal = Token("equal", "", 1) self.assertEqual(tokens[2], assignment) self.assertEqual(tokens[-2], equal)
def test_lexical_analyze_left_right_paren_call_end(self): # Test left_paren, right_paren, and call_end source_code = "var a = (1)" with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) left_paren = Token("left_paren", "", 1) right_paren = Token("right_paren", "", 1) call_end = Token("call_end", "", 1) self.assertEqual(tokens[3], left_paren) self.assertEqual(tokens[5], right_paren) self.assertEqual(tokens[6], call_end)
def _handle_tuple(self, context: Context) -> LangType: assert isinstance(self.var, LangTuple) type_names = map(lambda type_desc: type_desc[0].value, self.types) if not self.var.is_of_type(type_names): return LangNoMatchType.instance() new_ctx = Context( f"Case {list(type_names)}", SymbolTable(), context, self.pos_start, ) node = VariableAssignmentNode([ self._convert_nth_value_to_assignment_node(value) for value in self.types ]) node.set_value(self.var) node.visit(context) return self.expr_node.visit(new_ctx)
def test_lexical_analyze_multiply_equal_multiply(self): # Test multiply_equal and multiply source_code = """var a = 1 * 2 a *= 1 """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) multiply_equal = Token("multiply_equal", "", 2) multiply = Token("multiply", "", 1) self.assertEqual(tokens[8], multiply_equal) self.assertEqual(tokens[4], multiply)
def test_lexical_analyze_modulus_equal_modulus(self): # Test modulus_equal and modulus source_code = """var a = 1 % 2 a %= 3 """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) modulus_equal = Token("modulus_equal", "", 2) modulus = Token("modulus", "", 1) self.assertEqual(tokens[8], modulus_equal) self.assertEqual(tokens[4], modulus)
def eval(self, st): new_st = SymbolTable(st) decFunc = new_st.getter(self.value) if len(decFunc.args) == len(self.args): for decVar, callVar in zip(decFunc.args, self.args): new_st.setter(decVar.getstr(), callVar.eval(new_st)) else: raise ValueError( f"error: Expected {len(decFunc.args)} arguments. got {len(self.args)}" ) decFunc.func_statements.eval(new_st) return new_st.getter(decFunc.value)
def eval(self, st): new_st = SymbolTable(st) decFunc = new_st.getter(self.value) if len(decFunc.args) == len(self.args): for decVar, callVar in zip(decFunc.args, self.args): new_st.setter(decVar.getstr(), callVar.eval(new_st)) else: raise ValueError( "AST Error (FuncCall): Expected {} arguments. got {}".format( len(decFunc.args), len(self.args))) decFunc.func_statements.eval(new_st) return new_st.getter(decFunc.value)
def main(): filename = sys.argv[1].split('.')[0] symbol_table = SymbolTable() first_iter(symbol_table) parser = Parser(filename) code = Code() output = [] address = 16 while (parser.has_more_commands()): parser.advance() if parser.command_type() == 'C_COMMAND': dest = parser.dest() comp = parser.comp() jump = parser.jump() # print(parser.current_command, code.dest(dest), code.comp(comp), code.jump(jump)) output.append("111" + code.comp(comp) + code.dest(dest) + code.jump(jump)) else: symbol = parser.symbol() try: symbol_address = int(symbol) except: if not symbol_table.contains(symbol): symbol_table.add_entry(symbol, address) address += 1 symbol_address = symbol_table.get_address(symbol) finally: output.append(bin(symbol_address)[2:].zfill(16)) # print(parser.current_command, bin(symbol_table.get_address(symbol))) # if not symbol_table.contains(symbol): # symbol_table.add_entry(symbol, address) # address += 1 # symbol_address = symbol_table.get_address(symbol) # print(symbol_address) # output.append(bin(symbol_table.get_address(symbol))[2:].zfill(16)) # print(parser.current_command, bin(symbol_table.get_address(symbol))) # print(symbol_address) # output.append(bin(symbol_table.get_address(symbol))[2:].zfill(16)) # print(parser.current_command, bin(symbol_table.get_address(symbol))) parser.close() hack_file = open(filename + '.hack', 'w') for line in output: hack_file.write(line + '\n') hack_file.close()
def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file)
def compiler(cpl_string): """ The function simulates a CPL compiler. :param cpl_string: String which represent the CPL program. :return pair of two lists (errors, quad). """ quad = [] errors, ast = build_ast(CPLTokenizer(cpl_string)) if errors and not ast: return errors, [] try: _errors, symbol_table = SymbolTable.build_form_ast(ast) errors.extend(_errors) quad = get_quad(get_ir(ast, symbol_table)) except CPLCompoundException as exception: errors.extend(exception.exceptions) return errors, quad
def _build_symbol_table(self): sym_tab = SymbolTable() line_no = 0 for tokens, instr_type in Parser.parse_lines(self._in_fname): if instr_type == Parser.LABEL_DECLARATION: if self._is_symbol(tokens[0]): sym_tab[tokens[0]] = line_no continue line_no += 1 mem_addr = 16 for tokens, instr_type in Parser.parse_lines(self._in_fname): if (instr_type == Parser.A_INSTRUCTION and self._is_symbol(tokens[0]) and tokens[0] not in sym_tab): sym_tab[tokens[0]] = mem_addr mem_addr += 1 return sym_tab
def __init__(self): self.debugger = DebugPrinter() self.tk_list = None self.token_index = 1 self.token_list_length = None self.current_token = None self.next_token = None # debug stuff self.debug_mode_on = True self.state = None # initialize symbol table self.symbol_table = SymbolTable() self.temp_store = [] # initialize stack machine self.stack_machine = StackMachine(self.symbol_table) # machine instructions self.instructions = None
def test_lexical_analyze_left_right_brace_newline(self): # Test left_brace, right_brace, and newline source_code = """if(1 == 1) { print(1) } """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) left_brace = Token("left_brace", "", 1) right_brace = Token("right_brace", "", 3) newline = Token("newline", "", 1) self.assertEqual(tokens[7], left_brace) self.assertEqual(tokens[-2], right_brace) self.assertEqual(tokens[8], newline)
def two_pass_assembly(self): symbol_table = SymbolTable() rom_address = 0 print("starting first pass") while(self.parser.has_more_commands()): cmd = self.parser.get_current_command() if(self.parser.commands.L_COMMAND==self.parser.command_type()): print "L: " + self.parser.symbol() symbol_table.add_entry(self.parser.symbol(),rom_address) else: rom_address += 1 self.parser.advance() self.parser.reset() #the second pass is the same as the first without the print statements, #and without handling (Xxx) syntax print("starting second pass") while(self.parser.has_more_commands()): cmd = self.parser.get_current_command() if(self.parser.commands.A_COMMAND==self.parser.command_type()): sym = self.parser.symbol() if(sym.isdigit()): val = sym else: if(symbol_table.contains(sym)): val = symbol_table.get_address(sym) else: symbol_table.add_entry(sym, rom_address) rom_address += 1 val = rom_address self.parser.output.write("0" + '{0:015b}'.format(int(val))+"\n") elif(self.parser.commands.C_COMMAND==self.parser.command_type()): self.parser.output.write("111" + self.code.comp(self.parser.comp()) + self.code.dest(self.parser.dest()) + self.code.jump(self.parser.jump()) +"\n") self.parser.advance()
def do_test_generate(src, expected): a = gen.Generation(src) symtab = a.symbol_pass(SymbolTable()) numNames = symtab.num_symbols() while True: a.replace_idents(symtab) symtab = a.symbol_pass(symtab) if symtab.num_symbols() == numNames: break numNames = symtab.num_symbols() words = a.generate(symtab) for w in words: print w if words != expected: print "GENERATED:", words print "EXPECTED :", expected # print a.ast # a.ast.pretty(0) util.die()
def test_lexical_analyze_less_than_less_than_equal_left_shift(self): # Test less_than, less_than_equal, left_shift source_code = """1 < 2 1 <= 2 1 << 2 """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) less_than = Token("less_than", "", 1) less_than_equal = Token("less_than_equal", "", 2) left_shift = Token("left_shift", "", 3) self.assertEqual(tokens[1], less_than) self.assertEqual(tokens[5], less_than_equal) self.assertEqual(tokens[9], left_shift)
def test_lexical_analyze_plus_equal_increment_plus(self): # Test plus_equal, increment, and plus source_code = """var a = 1 + 2 a += 1 a++ """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) plus_equal = Token("plus_equal", "", 2) increment = Token("increment", "", 3) plus = Token("plus", "", 1) self.assertEqual(tokens[8], plus_equal) self.assertEqual(tokens[12], increment) self.assertEqual(tokens[4], plus)
def test_lexical_analyze_greater_than_greater_than_equal_right_shift(self): # Test greater_than, greater_than_equal, right_shift source_code = """1 > 2 1 >= 2 1 >> 2 """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) greater_than = Token("greater_than", "", 1) greater_than_equal = Token("greater_than_equal", "", 2) right_shift = Token("right_shift", "", 3) self.assertEqual(tokens[1], greater_than) self.assertEqual(tokens[5], greater_than_equal) self.assertEqual(tokens[9], right_shift)
def test_lexical_analyze_minus_equal_decrement_minus(self): # Test minus_equal, decrement, and minus source_code = """var a = 1 - 2 a -= 1 a-- """ with open("testing.simc", "w") as file: file.write(source_code) table = SymbolTable() tokens = lexical_analyze("testing.simc", table) minus_equal = Token("minus_equal", "", 2) decrement = Token("decrement", "", 3) minus = Token("minus", "", 1) self.assertEqual(tokens[8], minus_equal) self.assertEqual(tokens[12], decrement) self.assertEqual(tokens[4], minus)
def main(): print("Mini Java Compiler") if len(sys.argv) < 2: sys.exit("Error: Compiler needs source file as argument.") with pathlib.Path(sys.argv[1]).resolve().open(mode="r") as f: buffer = f.read() target_dir = pathlib.Path(__file__).parent.joinpath("../dumps") target_dir.mkdir(parents=True, exist_ok=True) print(f"{'':-<50}\nLexer Test") lexer = Lexer(buffer) with target_dir.joinpath("./tokens.txt").open("w") as f: print(f"{'Position':10}{'Stream':<10}{'Token name':20}{'Value':20}", file=f) for token in lexer.tokens(): print(token, file=f) print("Lexing completed.") print(f"{'':-<50}\nSymbol Table Test") symtable = SymbolTable(lexer) with target_dir.joinpath("./symtable.json").open("w") as f: json.dump(symtable.data, f, indent=4) print("Symbol table completed.") print(f"{'':-<50}\nParser Test") parser = Parser(lexer) ast = parser.program() print("Parsing completed.") print(f"{'':-<50}\nCode Generator Test") code_gen = CodeGen(ast) code = code_gen.generate_code() with target_dir.joinpath("./output.c").open("w") as f: print(code, file=f) print("Code generation completed.")
def test_assn5(): src = \ ''' x = a + b a = b + 1 b = c + 1 c = 1 ''' a = gen.Generation(src) symtab = a.symbol_pass(SymbolTable()) numNames = symtab.num_symbols() while True: a.replace_idents(symtab) symtab = a.symbol_pass(symtab) if symtab.num_symbols() == numNames: break numNames = symtab.num_symbols() assert symtab.resolveName('c') == ast.Number(1) assert symtab.resolveName('b') == ast.Number(2) assert symtab.resolveName('a') == ast.Number(3) assert symtab.resolveName('x') == ast.Number(5) assert symtab.num_symbols() == 4
def test_switch(self): cpl_program = """ a, b: int; { switch(a) { case 1: { write(1); break; } case 2: write(2); case 3: { switch(b) { case 5: write(5); default: break; } } case 4: write(4); default: write(0); } } """ ast = build_ast(CPLTokenizer(cpl_program)) sym = SymbolTable.build_form_ast(ast) code = [i.code for i in get_ir(ast, sym)] self.assertEqual(code, [ 'case_1_label_3:', 'IEQL t2 a 1', 'JMPZ case_2_label_4 t2', 'IPRT 1', 'JUMP end_switch_label_7', 'JUMP end_switch_label_7', 'case_2_label_4:', 'IEQL t3 a 2', 'JMPZ case_3_label_5 t3', 'IPRT 2', 'JUMP end_switch_label_7', 'case_3_label_5:', 'IEQL t4 a 3', 'JMPZ case_4_label_6 t4', 'case_5_label_0:', 'IEQL t1 b 5', 'JMPZ default_label_2 t1', 'IPRT 5', 'JUMP end_switch_label_1', 'default_label_2:', 'JUMP end_switch_label_1', 'end_switch_label_1:', 'JUMP end_switch_label_7', 'case_4_label_6:', 'IEQL t5 a 4', 'JMPZ default_label_8 t5', 'IPRT 4', 'JUMP end_switch_label_7', 'default_label_8:', 'IPRT 0', 'end_switch_label_7:', 'HALT' ])
def function_declaration(self): """ Expanding Rule 16: FunctionDeclaration -> FunctionHeading ";" Block ";" """ if self.t_type() == 'MP_FUNCTION': old_func_name = self.cur_func_name func_sym_table = SymbolTable(self.cur_symbol_table) func_sym_table.create() self.cur_symbol_table = func_sym_table self.sem_analyzer.sym_table = self.cur_symbol_table Parser.print_tree('16') self.function_heading() func_sym_table.name = self.cur_func_name self.match(';') self.block() self.match(';') self.print_symbol_table("Function",func_sym_table) self.cur_func_name = old_func_name self.cur_symbol_table = self.cur_symbol_table.parent_table func_sym_table.destroy() else: self.error('MP_FUNCTION')
def procedure_declaration(self): """ Expanding Rule 15: ProcedureDeclaration -> ProcedureHeading ";" Block ";" """ if self.t_type() == 'MP_PROCEDURE': old_proc_name = self.cur_proc_name proc_sym_table = SymbolTable(self.cur_symbol_table) proc_sym_table.create() self.cur_symbol_table = proc_sym_table self.sem_analyzer.sym_table = self.cur_symbol_table Parser.print_tree('15') self.procedure_heading() proc_sym_table.name = self.cur_proc_name self.match(';') self.block() self.match(';') self.print_symbol_table("Procedure", proc_sym_table) self.cur_symbol_table = self.cur_symbol_table.parent_table self.cur_proc_name = old_proc_name proc_sym_table.destroy() else: self.error('MP_PROCEDURE')
class CompilationEngine(object): # the destination file for writing destination_file = None # the tokenizer for the input file tokenizer = None # symbol table symbol_table = None # vm writer vm_writer = None # the class name class_name = "" # indicies for if and while loops # start at -1 because we increment before use while_index = -1 if_index = -1 # the constructor for compiling a single class # the next method to be called after construction must be compile_class # source_filename must be a single file, not a directory def __init__(self, source_filename): # destination filename # if the original extension was .jack, then make the extension .vm # if the original extension was not .jack, then append .vm if source_filename.lower().endswith(".jack"): destination_filename = source_filename[:-5] + ".vm" else: destination_filename = source_filename + ".vm" # open the destination filename for writing self.destination_file = open(destination_filename, 'w') # create a tokenizer for the input file self.tokenizer = JackTokenizer(source_filename) # create the symbol table self.symbol_table = SymbolTable() # create the vm writer self.vm_writer = VMWriter(self.destination_file) # compiles a complete class and closes the output file def compile_class(self): # class keyword tt, t = self._token_next(True, "KEYWORD", "class") # name of class tt, t = self._token_next(True, "IDENTIFIER") self.class_name = t # open brace tt, t = self._token_next(True, "SYMBOL", "{") # one or more variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["field", "static"]: self.compile_class_var_dec() else: # stop trying to process variable declarations break # one or more subroutine declarations while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["constructor", "function", "method"]: self.compile_subroutine() else: # stop trying to process functions break # close brace # do not advance because we already advanced upon exiting the last loop tt, t = self._token_next(False, "SYMBOL", "}") # done with compilation; close the output file self.destination_file.close() # compiles a static declaration or field declaration def compile_class_var_dec(self): # compile the variable declaration # False means this is a class (not a subroutine) self.compile_var_dec(False) # compiles a complete method, function, or constructor def compile_subroutine(self): # start of subroutine self.symbol_table.start_subroutine() # constructor, function, or method keyword tt, type = self._token_next(False, "KEYWORD") # type of the return value # can be either keyword (void) or an identifier (any type) tt, t = self._token_next(True) # name of the method/function/constructor tt, name = self._token_next(True) name = self.class_name + "." + name # if the type is a method, "define" this as an argument, so the other # argument indexes work correctly if type == "method": self.symbol_table.define("this", self.class_name, SymbolTable.ARG) # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # arguments self.tokenizer.advance() self.compile_parameter_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # variable declarations self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t == "var": self.compile_var_dec() else: # stop trying to process variable declarations break # write the function num_locals = self.symbol_table.var_count(self.symbol_table.VAR) self.vm_writer.write_function(name, num_locals) # write any special code at the top of the function if type == "constructor": # code to allocate memory and set "this" size = self.symbol_table.var_count(self.symbol_table.FIELD) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("Memory.alloc", 1) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) elif type == "function": # nothing special pass elif type == "method": # put argument 0 into pointer 0 (this) self.vm_writer.write_push(self.vm_writer.ARG, 0) self.vm_writer.write_pop(self.vm_writer.POINTER, 0) else: print "WARNING: Expected constructor, function, or name; got", type # statements self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") self.tokenizer.advance() # compiles a (possibly empty) parameter list, not including the enclosing # parentheses def compile_parameter_list(self): # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # keyword (variable type) tt, type = self._token_next(False) # identifier (variable name) tt, name = self._token_next(True) # the kind is always an arg, since these are all parameters to the # function kind = SymbolTable.ARG # define the variable in the symbol table self.symbol_table.define(name, type, kind) # possible comma tt, t = self._token_next(True) if tt != "SYMBOL" or t != ",": # not a comma; stop processing parameters break self.tokenizer.advance() # compiles a var declaration # if subroutine is true, only the var keyword can be used # if subroutine is false, only the static and field keywords can be used def compile_var_dec(self, subroutine=True): # the keyword to start the declaration tt, kind = self._token_next(False, "KEYWORD") # check for required types if subroutine: if kind == "var": kind = SymbolTable.VAR else: print "WARNING: expecting var, but received %s" % (str(kind)) else: if kind == "static": kind = SymbolTable.STATIC elif kind == "field": kind = SymbolTable.FIELD else: print "WARNING: expecting static or field, but received %s" % (str(kind)) # type of the declaration # could be an identifier or a keyword (int, etc) tt, type = self._token_next(True) # name of the declaration tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) # can support more than one identifier name, to declare more than one # variable, separated by commas; process the 2nd-infinite variables self.tokenizer.advance() while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": # another variable name follows tt, name = self._token_next(True, "IDENTIFIER") # define the variable in the symbol table self.symbol_table.define(name, type, kind) self.tokenizer.advance() else: # no more variable names break # should be on the semicolon at the end of the line tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a sequence of statements, not including the enclosing {} def compile_statements(self): while True: tt, t = self._token_next(False) if tt == "KEYWORD" and t in ["do", "let", "while", "return", "if"]: # call compile_t, where t is the type of compilation we want token = getattr(self, "compile_" + t)() else: # not a statement; stop processing statements break # compiles a do statement def compile_do(self): # do keyword tt, t = self._token_next(False, "KEYWORD", "do") # subroutine call self.tokenizer.advance() self.compile_subroutine_call() # do statements do not have a return value, so eliminate the return # off of the stack self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a let statement def compile_let(self): # let keyword tt, t = self._token_next(False, "KEYWORD", "let") # variable name tt, name = self._token_next(True, "IDENTIFIER") # possible brackets for array tt, t = self._token_next(True) if tt == "SYMBOL" and t == "[": # array - write operation array = True # compile the offset expression self.tokenizer.advance() self.compile_expression() # write the base address onto the stack segment, index = self._resolve_symbol(name) self.vm_writer.write_push(segment, index) # add base and offset self.vm_writer.write_arithmetic("add") # we cannot yet put the result into pointer 1, since the read # operation (which hasn't been parsed/computed yet) may use pointer 1 # to read from an arrya value # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance to the next token, since we are expected to be on the = for # the next line self.tokenizer.advance() else: array = False # equals sign tt, t = self._token_next(False, "SYMBOL", "=") # expression self.tokenizer.advance() self.compile_expression() if array: # our stack now looks like this: # TOP OF STACK # computed result to store # address in which value should be stored # ... previous stuff ... # pop the computed value to temp 0 self.vm_writer.write_pop(self.vm_writer.TEMP, 0) # pop the array address to pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # put the computed value back onto the stack self.vm_writer.write_push(self.vm_writer.TEMP, 0) # pop to the variable name or the array reference self.vm_writer.write_pop(self.vm_writer.THAT, 0) else: # not an array - pop the expression to the variable segment, index = self._resolve_symbol(name) self.vm_writer.write_pop(segment, index) # semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.tokenizer.advance() # compiles a while statement def compile_while(self): # labels for this while loop self.while_index += 1 while_start = "WHILE_START_%d" % (self.while_index) while_end = "WHILE_END_%d" % (self.while_index) # while keyword tt, t = self._token_next(False, "KEYWORD", "while") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # label for the start of the while statement self.vm_writer.write_label(while_start) # the expression that is the condition of the while statement self.tokenizer.advance() self.compile_expression() # the closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto to the end of the loop # to do this, negate and then call if-goto self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(while_end) # the opening brace tt, t = self._token_next(True, "SYMBOL", "{") # the statments that is the body of the while loop self.tokenizer.advance() self.compile_statements() # the closing brace tt, t = self._token_next(False, "SYMBOL", "}") # after the last statement of the while loop # need to jump back up to the top of the loop to evaluate again self.vm_writer.write_goto(while_start) # label at the end of the loop self.vm_writer.write_label(while_end) self.tokenizer.advance() # compiles a return statement def compile_return(self): # return keyword tt, t = self._token_next(False, "KEYWORD", "return") # possible expression to return tt, t = self._token_next(True) if tt != "SYMBOL" and t != ";": self.compile_expression() else: # no return expression; return 0 self.vm_writer.write_push(self.vm_writer.CONST, 0) # ending semicolon tt, t = self._token_next(False, "SYMBOL", ";") self.vm_writer.write_return() self.tokenizer.advance() # compiles a if statement, including a possible trailing else clause def compile_if(self): # it is more efficient in an if-else case to have the else portion first # in the code when testing, but we use the less-efficient but # easier-to-write true-false pattern here # labels for this if statement self.if_index += 1 if_false = "IF_FALSE_%d" % (self.if_index) if_end = "IF_END_%d" % (self.if_index) # if keyword tt, t = self._token_next(False, "KEYWORD", "if") # opening parenthesis tt, t = self._token_next(True, "SYMBOL", "(") # expression of if statement self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # the result of the evaluation is now on the stack # if false, then goto the false label # if true, fall through to executing code # if there is no else, then false and end are the same, but having two # labels does not increase code size self.vm_writer.write_arithmetic("not") self.vm_writer.write_if(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements for true portion self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") tt, t = self._token_next(True) if tt == "KEYWORD" and t == "else": # else statement exists # goto the end of the if statement at the end of the true portion self.vm_writer.write_goto(if_end) # label for the start of the false portion self.vm_writer.write_label(if_false) # opening brace tt, t = self._token_next(True, "SYMBOL", "{") # statements self.tokenizer.advance() self.compile_statements() # closing brace tt, t = self._token_next(False, "SYMBOL", "}") # end label self.vm_writer.write_label(if_end) # advance tokenizer only if we are in the else, since otherwise the # token was advanced by the else check self.tokenizer.advance() else: # no else portion; only put in a label for false, since end is not # used self.vm_writer.write_label(if_false) # compiles an expression (one or more terms connected by operators) def compile_expression(self): # the first term self.compile_term() # finish any number of operators followed by terms while True: tt, t = self._token_next(False) if tt == "SYMBOL" and t in "+-*/&|<>=": # found an operator # postfix order - add the next term and then do the operator # the next term self.tokenizer.advance() self.compile_term() # the operator if t == "+": self.vm_writer.write_arithmetic("add") if t == "-": self.vm_writer.write_arithmetic("sub") if t == "=": self.vm_writer.write_arithmetic("eq") if t == ">": self.vm_writer.write_arithmetic("gt") if t == "<": self.vm_writer.write_arithmetic("lt") if t == "&": self.vm_writer.write_arithmetic("and") if t == "|": self.vm_writer.write_arithmetic("or") if t == "*": self.vm_writer.write_call("Math.multiply", 2) if t == "/": self.vm_writer.write_call("Math.divide", 2) else: # no term found; done parsing the expression break # compiles a term # this routine is faced with a slight difficulty when trying to decide # between some of the alternative parsing rules. specifically, if the # current token is an identifier, the routine must distinguish between a # variable, an array entry, and a subroutine call. a single lookahead token, # which may be one of [, (, or ., suffices to distinguish between the three # possibilities. any other token is not part of this term and should not # be advanced over. def compile_term(self): # a term: integer_constant | string_constant | keyword_constant | # varname | varname[expression] | subroutine_call | (expression) | # unary_op term tt, t = self._token_next(False) if tt == "INT_CONST": self.vm_writer.write_push(self.vm_writer.CONST, t) # advance for the next statement self.tokenizer.advance() elif tt == "STRING_CONST": # after this portion is run, a pointer to a string should be on the # stack # we create a new string of a certain size and then append characters # one by one; each append operation returns the pointer to the same # string # create the string # string is a len, data tuple; not null-terminated size = len(t) self.vm_writer.write_push(self.vm_writer.CONST, size) self.vm_writer.write_call("String.new", 1) # append each character for char in t: self.vm_writer.write_push(self.vm_writer.CONST, ord(char)) self.vm_writer.write_call("String.appendChar", 2) # advance for the next statement self.tokenizer.advance() elif tt == "KEYWORD": if t == "true": # true is -1, which is 0 negated self.vm_writer.write_push(self.vm_writer.CONST, 0) self.vm_writer.write_arithmetic("not") elif t == "false" or t == "null": self.vm_writer.write_push(self.vm_writer.CONST, 0) elif t == "this": self.vm_writer.write_push(self.vm_writer.POINTER, 0) # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t == "(": # ( expression ) # parse the expression self.tokenizer.advance() self.compile_expression() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # advance for the next statement self.tokenizer.advance() elif tt == "SYMBOL" and t in "-~": # unary_op term # postfix order - add the next term and then do the operator # parse the rest of the term self.tokenizer.advance() self.compile_term() # write the unary operation if t == "-": self.vm_writer.write_arithmetic("neg") elif t == "~": self.vm_writer.write_arithmetic("not") elif tt == "IDENTIFIER": # varname, varname[expression], subroutine_call # do not write the identifer yet # get the next bit of the expression # if it is a [, then array; if it is a ( or ., then subroutine call # if none of above, then pass over tt2, t2 = self._token_next(True) if tt2 == "SYMBOL" and t2 in "(.": # subroutine call # back up and then compile the subroutine call self.tokenizer.retreat() self.compile_subroutine_call() elif tt2 == "SYMBOL" and t2 == "[": # array - read operation # write the base address onto the stack segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) # compile the offset expression self.tokenizer.advance() self.compile_expression() # add base and offset self.vm_writer.write_arithmetic("add") # put the resulting address into pointer 1 (that) self.vm_writer.write_pop(self.vm_writer.POINTER, 1) # read from that 0 onto the stack self.vm_writer.write_push(self.vm_writer.THAT, 0) # closing bracket tt, t = self._token_next(False, "SYMBOL", "]") # advance for the next statement self.tokenizer.advance() else: # none of above - just a single identifier segment, index = self._resolve_symbol(t) self.vm_writer.write_push(segment, index) else: # unknown print "WARNING: Unknown term expression object:", tt, t # compiles a (possible empty) comma-separated list of expressions def compile_expression_list(self): num_args = 0 # check for empty list tt, t = self._token_next(False) if tt == "SYMBOL" and t == ")": # the parameter list was empty; do not process any more pass else: # there are things in the parameter list while True: # expression to pass self.compile_expression() num_args += 1 # possible comma tt, t = self._token_next(False) if tt == "SYMBOL" and t == ",": self.tokenizer.advance() else: # not a comma; stop processing parameters break return num_args # compiles a subroutine call # two cases: # - subroutineName(expressionList) # - (class|var).subroutineName(expressionList) def compile_subroutine_call(self): # first part of name tt, name1 = self._token_next(False, "IDENTIFIER") # a dot and another name may exist, or it could be a parenthesis name2 = None tt, t = self._token_next(True) if tt == "SYMBOL" and t == ".": # the name after the dot tt, name2 = self._token_next(True, "IDENTIFIER") # advance so that we are on the parenthesis self.tokenizer.advance() # determine if this is a method call # three possibilities # - class.func() - function call # - var.func() - method call # - func() - method call on current object if self.symbol_table.contains(name1): method_call = True local_call = False elif name2 == None: method_call = True local_call = True else: method_call = False # if a method call, push variable name1 # this a method call if the symbol table contains name1 and name2 exists # OR name1 is a method in the current object if method_call and local_call: # push the current object onto the stack as a hidden argument self.vm_writer.write_push(self.vm_writer.POINTER, 0) elif method_call and not local_call: # push the variable onto the stack as a hidden argument segment, index = self._resolve_symbol(name1) self.vm_writer.write_push(segment, index) # opening parenthesis tt, t = self._token_next(False, "SYMBOL", "(") # expression list self.tokenizer.advance() num_args = self.compile_expression_list() # closing parenthesis tt, t = self._token_next(False, "SYMBOL", ")") # write the call if method_call and local_call: # methd + <blank> # get the name of the vm function to call classname = self.class_name vm_function_name = classname + "." + name1 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) elif method_call and not local_call: # variable name + method # get the name of the vm function to call classname = self.symbol_table.get(name1)[1] vm_function_name = classname + "." + name2 # increase arguments by 1, since there is the hidden "this" num_args += 1 # make the call self.vm_writer.write_call(vm_function_name, num_args) else: # get the name of the vm function to call vm_function_name = name1 + "." + name2 # make the call self.vm_writer.write_call(vm_function_name, num_args) self.tokenizer.advance() # returns the token_type and token of the next token after advancing the # tokenizer before reading if advance is True def _token_next(self, advance=False, expected_type=None, expected_value=None): # advance the tokenizer, if requested if advance: self.tokenizer.advance() # get the token type and the token itself token_type = self.tokenizer.token_type() token = str(getattr(self.tokenizer, token_type.lower())()) if expected_type and token_type != expected_type: print "WARNING: Type", token_type, "found; expected", expected_type import traceback, sys traceback.print_stack() sys.exit(1) if expected_value and token != expected_value: print "WARNING: Value", token, "found; expected", expected_value import traceback, sys traceback.print_stack() sys.exit(1) return token_type, token # convets a symbol table type into a segment type def _type_to_segment(self, type): if type == self.symbol_table.STATIC: return self.vm_writer.STATIC elif type == self.symbol_table.FIELD: return self.vm_writer.THIS elif type == self.symbol_table.ARG: return self.vm_writer.ARG elif type == self.symbol_table.VAR: return self.vm_writer.LOCAL else: print "ERROR: Bad type %s" % (str(type)) # resolves the symbol from the symbol table # the segment and index is returned as a 2-tuple def _resolve_symbol(self, name): kind, type, index = self.symbol_table.get(name) return self._type_to_segment(kind), index
def translate(node, st = None, strings = None, funcName = False): if isinstance(node, oast.Add): left = translate(node.left, st, strings, funcName) right = translate(node.right, st, strings, funcName) return ast.Add(left, right) elif isinstance(node, oast.And): left = translate(node.nodes[0], st, strings, funcName) right = translate(node.nodes[1], st, strings, funcName) return ast.And(left, right) elif isinstance(node, oast.Assign): # Translate the right hand side first so it can use the older version # of the left hand side. exp = translate(node.expr, st, strings, funcName) var = node.nodes.pop() if isinstance(var, oast.AssAttr): string = strings.setdefault(var.attrname, ast.String(var.attrname)) var = translate(var.expr, st, strings, funcName) return ast.SetAttr(var, string, exp) else: var = translate(var, st, strings, funcName) return ast.Assign(var, exp) elif isinstance(node, oast.AssName): return st.getSymbol(node.name, True) elif isinstance(node, oast.CallFunc): name = translate(node.node, st, strings, True) args = [translate(a, st, strings) for a in node.args] return ast.FunctionCall(name, *args) elif isinstance(node, oast.Class): bases = [translate(base, st, strings, funcName) for base in node.bases] body = translate(node.code, st, strings, funcName) body = ast.BasicBlock(body) sym = st.getSymbol(node.name, True) name = st.getName(node.name, True) # This is here temporarily. It will be moved to the typify pass # later. sym['type'] = 'class' klass = ast.Class(name, bases, body) return ast.Assign(sym, klass) elif isinstance(node, oast.Compare): left = translate(node.expr, st, strings, funcName) op, right = node.ops[0] right = translate(right, st, strings, funcName) if op == '==': return ast.Eq(left, right) elif op == '!=': return ast.Ne(left, right) elif op == 'is': return ast.Is(left, right) elif isinstance(node, oast.Const): return ast.Integer(node.value) elif isinstance(node, oast.Dict): pairs = {} for pair in node.items: key, value = pair key = translate(key, st, strings, funcName) value = translate(value, st, strings, funcName) pairs[key] = value return ast.Dictionary(pairs) elif isinstance(node, oast.Discard): return translate(node.expr, st, strings, funcName) elif isinstance(node, oast.Div): left = translate(node.left, st, strings, funcName) right = translate(node.right, st, strings, funcName) return ast.Div(left, right) elif isinstance(node, oast.Function): sym = st.getSymbol(node.name, True) name = st.getName(node.name, True) sym['type'] = 'function' newST = SymbolTable(st) argSymbols = [newST.getSymbol(argName, True) for argName in node.argnames] body = translate(node.code, newST, strings, funcName) body = ast.BasicBlock(body) fun = ast.Function(name, argSymbols, body, newST) fun['simplified'] = False st.update(newST) return ast.Assign(sym, fun) elif isinstance(node, oast.Getattr): exp = translate(node.expr, st, strings, funcName) name = strings.setdefault(node.attrname, ast.String(node.attrname)) return ast.GetAttr(exp, name) elif isinstance(node, oast.If): tests = node.tests cond, then = tests.pop(0) # Translate the conditional expression. cond = translate(cond, st, strings) # Snapshot the SymbolTable st.snapshot() # Translate the 'then' clause. then = translate(then, st, strings, funcName) then = ast.BasicBlock(then) # Roll-back the SymbolTable for the 'else' clause. st.rollback() # Translate the 'else' clause. if len(tests) > 0: els = [translate(oast.If(tests, node.else_), st, funcName)] else: els = translate(node.else_, st, strings, funcName) els = ast.BasicBlock(els) return ast.If(cond, then, els, st) elif isinstance(node, oast.IfExp): cond = translate(node.test, st, strings, funcName) then = translate(node.then, st, strings, funcName) els = translate(node.else_, st, strings, funcName) return ast.IfExp(cond, then, els) elif isinstance(node, oast.Lambda): name = st.getName('lambda', True) newST = SymbolTable(st) argSymbols = map(lambda name: newST.getSymbol(name, True), node.argnames) code = ast.Return(translate(node.code, newST, strings, funcName)) block = ast.BasicBlock([code]) fun = ast.Function(name, argSymbols, block, newST) fun['simplified'] = False st.update(newST) return fun elif isinstance(node, oast.List): elements = [] for n in node.nodes: elements.append(translate(n, st, strings, funcName)) return ast.List(elements) elif isinstance(node, oast.Module): # Create a new SymbolTable for this module. st = SymbolTable() strings = {} children = translate(node.node, st, strings, funcName) block = ast.BasicBlock(children) fun = ast.Function(st.getBIF('main'), [], block, st) # Mark the main function as migrated so that it doesn't get moved # later. fun['simplified'] = True return ast.Module([fun], strings) elif isinstance(node, oast.Mul): left = translate(node.left, st, strings, funcName) right = translate(node.right, st, strings, funcName) return ast.Mul(left, right) elif isinstance(node, oast.Name): ret = 'input_int' if node.name == 'input' else node.name if ret == 'input_int': ret = st.getBIF(ret) else: if ret == 'True': ret = ast.Tru() elif ret == 'False': ret = ast.Fals() else: ret = st.getSymbol(ret) return ret elif isinstance(node, oast.Not): operand = translate(node.expr, st, strings, funcName) return ast.Not(operand) elif isinstance(node, oast.Or): left = translate(node.nodes[0], st, strings, funcName) right = translate(node.nodes[1], st, strings, funcName) return ast.Or(left, right) elif isinstance(node, oast.Printnl): children = [translate(e, st, strings, funcName) for e in node.getChildNodes()] children = util.flatten(children) return ast.FunctionCall(st.getBIF('print_any'), *children) elif isinstance(node, oast.Return): return ast.Return(translate(node.value, st, strings, funcName)) elif isinstance(node, oast.Stmt): stmts = [translate(s, st, strings, funcName) for s in node.getChildNodes()] return util.flatten(stmts) elif isinstance(node, oast.Sub): left = translate(node.left, st, strings, funcName) right = translate(node.right, st, strings, funcName) return ast.Sub(left, right) elif isinstance(node, oast.Subscript): sym = translate(node.expr, st, strings, funcName) sub = translate(node.subs[0], st, strings, funcName) return ast.Subscript(sym, sub) elif isinstance(node, oast.While): cond = translate(node.test, st, strings, funcName) body = translate(node.body, st, strings, funcName) body = ast.BasicBlock(body) return ast.While(cond, body, st) elif isinstance(node, oast.UnarySub): operand = translate(node.expr, st, strings, funcName) return ast.Negate(operand) else: raise Exception("Unsupported AST node encountered: {}".format(node.__class__.__name__))
def main(): filename = os.path.join(os.getcwd(), Util.getCommandLineArg(1)) first_parser = Parser(filename) second_parser = Parser(filename) symbol_table = SymbolTable() hack_filename = filename.replace('asm', 'hack') hack_file = open(hack_filename, 'w') ann_filename = filename.replace('asm', 'ann') ann_file = open(ann_filename, 'w') rom_address = 0 ram_address = 16 assembly = '' while first_parser.has_more_commands(): first_parser.advance() if first_parser.command_type() is 'A_COMMAND' or first_parser.command_type() is 'C_COMMAND': rom_address += 1 elif first_parser.command_type() is 'L_COMMAND': symbol_table.add_entry(first_parser.symbol(), rom_address, 'LAB') while second_parser.has_more_commands(): second_parser.advance() machine_command = '' if second_parser.command_type() is 'A_COMMAND': if second_parser.symbol()[0].isdigit(): binary = second_parser.symbol() else: if symbol_table.contains(second_parser.symbol()): binary = symbol_table.get_address(second_parser.symbol()) else: binary = ram_address symbol_table.add_entry(second_parser.symbol(), ram_address, 'VAR') ram_address += 1 machine_command = '{0:016b}\n'.format(int(binary)) hack_file.write(machine_command) elif second_parser.command_type() is 'C_COMMAND': dest = Code.dest(second_parser.dest()) comp = Code.comp(second_parser.comp()) jump = Code.jump(second_parser.jump()) machine_command = '111{0}{1}{2}\n'.format(comp, dest, jump) hack_file.write(machine_command) assembly = second_parser.original_command().strip() mc = machine_command.strip() annotated_machine = '{} {} {} {}'.format(mc[0:4], mc[4:8], mc[8:12], mc[12:16]) symbolless_command = '' if second_parser.command_type() is 'L_COMMAND': symbolless_command = symbol_table.get_address(second_parser.symbol()) elif second_parser.command_type() is 'A_COMMAND' and not second_parser.symbol().isdigit(): symbolless_command = '@{}'.format(symbol_table.get_address(second_parser.symbol())) else: symbolless_command = second_parser.command annotated_command = '{:<39} {} {:<11} {}\n'.format(assembly, '//' if second_parser.command_type() else '', symbolless_command, annotated_machine) ann_file.write(annotated_command) ann_file.write('\n// Symbol Table:\n') for symbol, address in symbol_table.symbol_table.items(): ann_file.write('// {}: {:<30} -> {}\n'.format(address[1], symbol, address[0])) hack_file.close() ann_file.close()
class Parser(object): def __init__(self, tokens): self.index = len(tokens) self.tokens = iter(tokens) self.cur_token = self.tokens.next() #Default token holder self.next_token = self.tokens.next() #LL2 lookahead token holder for when needed self.cur_symbol_table = None self.root_table = SymbolTable(None) self.sem_analyzer = SemanticAnalyzer(self.root_table) self.program_name = '' self.cur_proc_name = '' self.cur_func_name = '' ############### Utility Functions ############### def error(self, expected=None): logging.error("Couldn't match: \"%s\" near line: %s, col: %s in %s(). Received %s" % (self.t_lexeme(), self.cur_token.line, self.cur_token.column, inspect.stack()[1][3],self.t_type())) logging.error('Expected tokens: %s' % expected) logging.error("Three level parse tree (stack) trace, most recent call last.\n\t^ %s()\n\t^ %s()\n\t> %s()" % (inspect.stack()[3][3], inspect.stack()[2][3], inspect.stack()[1][3])) exit() def t_type(self): """ So that we don't have to call the line below every time we need a current token type """ return self.cur_token.token_type def t_lexeme(self): """ Same as above - just a wrapper to make code more elegant """ return self.cur_token.token_value @classmethod def print_tree(cls, level): """ A method for printing where we are at in parse tree In case future assignments will require more complexity """ logging.debug(level) def match(self, lexeme): self.cur_token = self.next_token try: self.next_token = self.tokens.next() except StopIteration: pass logging.info("Matched '%s' in %s()" % (lexeme, inspect.stack()[1][3])) return False def print_symbol_table(self, type, table): level = log.getEffectiveLevel() log.setLevel(logging.DEBUG) logging.debug(type + " Symbol Table " + table.name + table.__repr__() + '\n') log.setLevel(level) ############### Rule handling functions ############### def system_goal(self): """ Expanding Rule 1: System Goal -> Program $ """ if self.t_type() == 'MP_PROGRAM': Parser.print_tree('1') self.program() if self.t_type() == 'MP_EOF': self.match('EOF') self.print_symbol_table("Program",self.root_table) self.sem_analyzer.write_IR() return "The input program parses!" exit() else: self.error('MP_PROGRAM') def program(self): """ Expanding Rule 2: Program -> ProgramHeading ";" Block "." """ if self.t_type() == 'MP_PROGRAM': Parser.print_tree('2') self.program_heading() self.match(';') self.root_table.create_root() self.root_table.name = self.program_name self.cur_symbol_table = self.root_table self.sem_analyzer.sym_table = self.cur_symbol_table self.sem_analyzer.gen_begin() self.block() self.sem_analyzer.gen_end() self.match('.') else: self.error('MP_PROGRAM') def program_heading(self): """ Expanding Rule 3: "program" Identifier """ if self.t_type() == 'MP_PROGRAM': Parser.print_tree('3') self.match('program') self.program_identifier() else: self.error('MP_PROGRAM') def block(self): """ Expanding Rule 4: Block -> VariableDeclarationPart ProcedureAndFunctionDeclarationPart StatementPart """ accepted_list = ['MP_VAR', 'MP_PROCEDURE', 'MP_BEGIN', 'MP_FUNCTION'] if self.t_type() in accepted_list: Parser.print_tree('4') self.variable_declaration_part() self.procedure_and_function_declaration_part() self.statement_part() else: self.error(accepted_list) def variable_declaration_part(self): """ Expanding Rules 5, 6: VariableDeclarationPart -> "var" VariableDeclaration ";" VariableDeclarationTail -> e """ eps_list = ['MP_BEGIN', 'MP_FUNCTION', 'MP_PROCEDURE'] if self.t_type() == 'MP_VAR': Parser.print_tree('5') self.match('var') self.variable_declaration() self.match(';') self.variable_declaration_tail() elif self.t_type() in eps_list: Parser.print_tree('6') self.epsilon() else: self.error(eps_list.append('MP_VAR')) self.sem_analyzer.gen_add_sp() def epsilon(self): """ Branch went to epsilon - pass """ pass def variable_declaration_tail(self): """ Expanding Rules 7, 8: VariableDeclarationTail -> VariableDeclaration ";" VariableDeclarationTail -> e """ eps_list = ['MP_BEGIN', 'MP_FUNCTION', 'MP_PROCEDURE'] if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('7') self.variable_declaration() self.match(';') self.variable_declaration_tail() elif self.t_type() in eps_list: Parser.print_tree('8') self.epsilon() else: self.error(eps_list.append('MP_IDENTIFIER')) def variable_declaration(self): """ Expanding Rule 9: VariableDeclaration -> IdentifierList ":" Type """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('9') var_list = self.identifier_list([]) self.match(':') type = self.type() #iterate through the list of vars for var in var_list: record = SemanticRecord() record.type = type record.lexeme = var record.set_size(type) record.kind = "var" record.depth = self.cur_symbol_table.cur_depth self.cur_symbol_table.insert(record) else: self.error('MP_IDENTIFIER') def type(self): """ Expanding Rules 10, 11: Type -> "Integer" -> "Float" """ lexeme = '' if self.t_type() == 'MP_FLOAT': Parser.print_tree('11') lexeme = self.t_lexeme() self.match(lexeme) elif self.t_type() == 'MP_INTEGER': Parser.print_tree('10') lexeme = self.t_lexeme() self.match(lexeme) else: self.error(['MP_FLOAT', 'MP_INTEGER']) return lexeme def procedure_and_function_declaration_part(self): """ Expanding Rules 12, 13, 14: ProcedureAndFunctionDeclarationPart -> ProcedureDeclaration ProcedureAndFunctionDeclarationPart -> FunctionDeclaration ProcedureAndFunctionDeclarationPart -> epsilon """ if self.t_type() == 'MP_PROCEDURE': Parser.print_tree('12') self.procedure_declaration() self.procedure_and_function_declaration_part() elif self.t_type() == 'MP_FUNCTION': Parser.print_tree('13') self.function_declaration() self.procedure_and_function_declaration_part() elif self.t_type() == 'MP_BEGIN': Parser.print_tree('14') self.epsilon() else: self.error(['MP_PROCEDURE', 'MP_FUNCTION', 'MP_BEGIN']) def procedure_declaration(self): """ Expanding Rule 15: ProcedureDeclaration -> ProcedureHeading ";" Block ";" """ if self.t_type() == 'MP_PROCEDURE': old_proc_name = self.cur_proc_name proc_sym_table = SymbolTable(self.cur_symbol_table) proc_sym_table.create() self.cur_symbol_table = proc_sym_table self.sem_analyzer.sym_table = self.cur_symbol_table Parser.print_tree('15') self.procedure_heading() proc_sym_table.name = self.cur_proc_name self.match(';') self.block() self.match(';') self.print_symbol_table("Procedure", proc_sym_table) self.cur_symbol_table = self.cur_symbol_table.parent_table self.cur_proc_name = old_proc_name proc_sym_table.destroy() else: self.error('MP_PROCEDURE') def function_declaration(self): """ Expanding Rule 16: FunctionDeclaration -> FunctionHeading ";" Block ";" """ if self.t_type() == 'MP_FUNCTION': old_func_name = self.cur_func_name func_sym_table = SymbolTable(self.cur_symbol_table) func_sym_table.create() self.cur_symbol_table = func_sym_table self.sem_analyzer.sym_table = self.cur_symbol_table Parser.print_tree('16') self.function_heading() func_sym_table.name = self.cur_func_name self.match(';') self.block() self.match(';') self.print_symbol_table("Function",func_sym_table) self.cur_func_name = old_func_name self.cur_symbol_table = self.cur_symbol_table.parent_table func_sym_table.destroy() else: self.error('MP_FUNCTION') def procedure_heading(self): """ Expanding Rule 17: ProcedureHeading -> "procedure" procedureIdentifier OptionalFormalParameterList """ if self.t_type() == 'MP_PROCEDURE': Parser.print_tree('17') self.match('procedure') self.procedure_identifier() self.optional_formal_parameter_list() else: self.error('MP_PROCEDURE') def function_heading(self): """ Expanding Rule 18: FunctionHeading -> "function" functionIdentifier OptionalFormalParameterList ":" Type """ if self.t_type() == 'MP_FUNCTION': Parser.print_tree('18') self.match('function') self.function_identifier() self.optional_formal_parameter_list() self.match(':') type = self.type() return type else: self.error('MP_FUNCTION') def optional_formal_parameter_list(self): """ Expanding Rules 19, 20: OptionalFormalParameterList -> "(" FormalParameterSection FormalParameterSectionTail ")" -> epsilon """ eps_list = ['MP_FLOAT', 'MP_INTEGER', 'MP_SCOLON'] if self.t_type() == 'MP_LPAREN': Parser.print_tree('19') self.match('(') self.formal_parameter_section() self.formal_parameter_section_tail() self.match(')') elif self.t_type() in eps_list: Parser.print_tree('20') self.epsilon() else: self.error(eps_list.append('MP_LPAREN')) def formal_parameter_section_tail(self): """ Expanding Rules 21, 22: FormalParameterSectionTail -> ";" FormalParameterSection FormalParameterSectionTail -> epsilon """ if self.t_type() == 'MP_SCOLON': Parser.print_tree('21') self.match(';') self.formal_parameter_section() self.formal_parameter_section_tail() elif self.t_type() == 'MP_RPAREN': Parser.print_tree('22') self.epsilon() else: self.error(['MP_SCOLON', 'MP_RPAREN']) def formal_parameter_section(self): """ Expanding Rules 23, 24: FormalParameterSection -> ValueParameterSection -> VariableParameterSection """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('23') self.value_parameter_section() elif self.t_type() == 'MP_VAR': Parser.print_tree('24') self.variable_parameter_section() else: self.error(['MP_IDENTIFIER', 'MP_VAR']) def value_parameter_section(self): """ Expanding Rule 25: ValueParameterSection -> IdentifierList ":" Type """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('25') val_param_list = self.identifier_list([]) self.match(':') type = self.type() for var in val_param_list: record = SemanticRecord() record.type = type record.lexeme = var record.set_size(type) record.kind = "var" record.depth = self.cur_symbol_table.cur_depth self.cur_symbol_table.insert(record) else: self.error('MP_IDENTIFIER') def variable_parameter_section(self): """ Expanding Rule 26: VariableParameterSection -> "var" IdentifierList ":" Type """ if self.t_type() == 'MP_VAR': Parser.print_tree('26') self.match(self.t_type()) var_param_list = self.identifier_list([]) self.match(':') type = self.type() #iterate through the list of vars for var in var_param_list: record = SemanticRecord() record.type = type record.lexeme = var record.set_size(type) record.kind = "var" record.depth = self.cur_symbol_table.cur_depth self.cur_symbol_table.insert(record) else: self.error('MP_VAR') def statement_part(self): """ Expanding Rule 27: StatementPart -> CompoundStatement """ if self.t_type() == 'MP_BEGIN': Parser.print_tree('27') self.compound_statement() else: self.error('MP_BEGIN') def compound_statement(self): """ Expanding Rule 28: CompoundStatement -> "begin" StatementSequence "end" """ if self.t_type() == 'MP_BEGIN': Parser.print_tree('28') self.match('begin') self.statement_sequence() self.match('end') else: self.error('MP_BEGIN') def statement_sequence(self): """ Expanding Rule 29: StatementSequence -> Statement StatementTail """ accepted_list = ['MP_BEGIN', 'MP_END', 'MP_READ', 'MP_WRITE', 'MP_IF', 'MP_WHILE', 'MP_REPEAT', 'MP_FOR', 'MP_IDENTIFIER'] if self.t_type() in accepted_list: Parser.print_tree('29') self.statement() self.statement_tail() else: self.error(accepted_list) def statement_tail(self): """ Expanding Rules 30, 31 : StatementTail -> ";" Statement StatementTail -> epsilon """ eps_list = [ 'MP_END', 'MP_UNTIL'] if self.t_type() == 'MP_SCOLON': Parser.print_tree('30') self.match(';') self.statement() self.statement_tail() elif self.t_type() in eps_list: Parser.print_tree('31') self.epsilon() else: self.error(eps_list.append('MP_SCOLON')) def statement(self): """ Expanding Rule 32 - 41 : Statement -> EmptyStatement -> CompoundStatement -> ReadStatement -> WriteStatement -> AssignmentStatement -> IfStatement -> WhileStatement -> RepeatStatement -> ForStatement -> ProcedureStatement """ if self.t_type() == 'MP_END': Parser.print_tree('32') self.empty_statement() elif self.t_type() == 'MP_BEGIN': Parser.print_tree('33') self.compound_statement() elif self.t_type() == 'MP_READ': Parser.print_tree('34') self.read_statement() elif self.t_type() == 'MP_WRITE': Parser.print_tree('35') self.write_statement() elif self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('36') procedure_list = ['MP_END', 'MP_SCOLON', 'MP_LPAREN'] procedure_list_2 = ['MP_COLON', 'MP_ASSIGN'] if self.next_token.token_type in procedure_list: self.procedure_statement() elif self.next_token.token_type in procedure_list_2: self.assignment_statement() else: self.error(['MP_END', 'MP_SCOLON', 'MP_LPAREN', 'MP_COLON', 'MP_ASSIGN']) elif self.t_type() == 'MP_IF': Parser.print_tree('37') self.if_statement() elif self.t_type() == 'MP_WHILE': Parser.print_tree('38') self.while_statement() elif self.t_type() == 'MP_REPEAT': Parser.print_tree('39') self.repeat_statement() elif self.t_type() == 'MP_FOR': Parser.print_tree('40') self.for_statement() elif self.t_type() == 'MP_UNTIL': pass elif self.t_type() == 'MP_ELSE': pass else: self.error(['MP_END', 'MP_BEGIN', 'MP_WRITE', 'MP_IDENTIFIER', 'MP_IF', 'MP_WHILE', 'MP_REPEAT', 'MP_FOR']) def empty_statement(self): """ Expanding Rule 42: EmptyStatement -> epsilon """ accepted_list = ['MP_SCOLON', 'MP_ELSE', 'MP_END', 'MP_UNTIL'] if self.t_type() in accepted_list: Parser.print_tree('42') self.epsilon() else: self.error(accepted_list) def read_statement(self): """ Expanding Rule 43: ReadStatement -> "read" "(" ReadParameter ReadParameterTail ")" """ if self.t_type() == 'MP_READ': Parser.print_tree('43') self.match('read') self.match('(') self.read_parameter() self.read_parameter_tail() self.match(')') else: self.error('MP_READ') def read_parameter_tail(self): """ Expanding Rules 44, 45 : ReadParameterTail -> "," ReadParameter ReadParameterTail -> epsilon """ if self.t_type() == 'MP_COMMA': Parser.print_tree('44') self.match(',') self.read_parameter() self.read_parameter_tail() elif self.t_type() == 'MP_RPAREN': Parser.print_tree('45') self.epsilon() else: self.error(['MP_COMMA', 'MP_RPAREN']) def read_parameter(self): """ Expanding Rule 46 : ReadParameter -> VariableIdentifier """ read_param_rec = SemanticRecord() if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('46') self.variable_identifier(read_param_rec) self.sem_analyzer.gen_read(read_param_rec) else: self.error('MP_IDENTIFIER') def write_statement(self): #AAA """ Expanding Rule 47: WriteStatement -> "write" "(" WriteParameter WriteParameterTail ")" """ write_param_rec = SemanticRecord() if self.t_type() == 'MP_WRITE': Parser.print_tree('47') self.match('write') self.match('(') self.write_parameter() # this is an expression self.write_parameter_tail() # this too is an expression of some sort self.match(')') # todo: handle combining the two returns from param and param_tail, push on the stack else: self.error('MP_WRITE') def write_parameter_tail(self): """ Expanding Rules 48, 49 : WriteParameterTail -> "," WriteParameter WriteParameterTail -> epsilon """ if self.t_type() == 'MP_COMMA': Parser.print_tree('48') self.match(',') self.write_parameter() self.write_parameter_tail() elif self.t_type() == 'MP_RPAREN': Parser.print_tree('49') self.epsilon() else: self.error(['MP_COMMA', 'MP_PAREN']) def write_parameter(self): """ Expanding Rule 50 : WriteParameter -> OrdinalExpression """ write_param_rec = SemanticRecord() accepted_list = ['MP_LPAREN','MP_PLUS','MP_MINUS','MP_IDENTIFIER', 'MP_INTEGER','MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('50') self.ordinal_expression(write_param_rec) self.sem_analyzer.gen_write(write_param_rec) else: self.error(accepted_list) def assignment_statement(self): """ Expanding Rules 51, 52: AssignmentStatement -> VariableIdentifier ":=" Expression -> FunctionIdentifier ":=" Expression """ # the conflict here should be considered resolved, because in the end #both those guys lead to identifier ident_rec = SemanticRecord() express_rec = SemanticRecord() if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('51') self.variable_identifier(ident_rec) self.match(':=') self.expression(express_rec) self.sem_analyzer.gen_ass_statement(ident_rec, express_rec) else: self.error('MP_IDENTIFIER') def if_statement(self): """ Expanding Rule 53: IfStatement -> "if" BooleanExpression "then" Statement OptionalElsePart """ if_rec = SemanticRecord() if self.t_type() == 'MP_IF': Parser.print_tree('53') self.match('if') self.boolean_expression(if_rec) self.sem_analyzer.begin_if(if_rec) self.match('then') self.statement() self.optional_else_part(if_rec) self.sem_analyzer.end_if(if_rec) else: self.error('MP_IF') def optional_else_part(self, if_rec): """ Expanding Rule 54: OptionalElsePart -> "else" Statement """ eps_list = ['MP_SCOLON', 'MP_END', 'MP_UNTIL'] self.sem_analyzer.opt_else(if_rec) if self.t_type() == 'MP_ELSE': Parser.print_tree('54') self.match('else') self.statement() elif self.t_type() in eps_list: Parser.print_tree('55') self.epsilon() else: self.error(eps_list.extend('MP_ELSE')) def repeat_statement(self): """ Expanding Rule 56: RepeatStatement -> "repeat" StatementSequence "until" BooleanExpression """ rep_rec = SemanticRecord() if self.t_type() == 'MP_REPEAT': Parser.print_tree('56') self.match('repeat') self.sem_analyzer.begin_repeat(rep_rec) self.statement_sequence() self.match('until') self.boolean_expression(rep_rec) self.sem_analyzer.end_repeat(rep_rec) else: self.error('MP_REPEAT') def while_statement(self): """ Expanding Rule 57: WhileStatement -> "while" BooleanExpression "do" Statement """ while_rec = SemanticRecord() expr_rec = SemanticRecord() if self.t_type() == 'MP_WHILE': Parser.print_tree('57') self.match('while') self.sem_analyzer.begin_while(while_rec) self.boolean_expression(expr_rec) self.sem_analyzer.gen_while(while_rec, expr_rec) self.match('do') self.statement() self.sem_analyzer.end_while(while_rec) else: self.error('MP_WHILE') def for_statement(self): """ Expanding Rule 58: ForStatement -> "for" ControlVariable ":=" InitialValue StepValue FinalValue "do" Statement """ control_var_rec = SemanticRecord() initial_rec = SemanticRecord() final_rec = SemanticRecord() for_rec = SemanticRecord() if self.t_type() == 'MP_FOR': Parser.print_tree('58') self.match('for') self.control_variable(control_var_rec) self.match(':=') self.initial_value(initial_rec) self.step_value(for_rec) self.final_value(final_rec) self.match('do') self.sem_analyzer.begin_for(for_rec) self.sem_analyzer.gen_for(for_rec, control_var_rec, initial_rec) self.statement() self.sem_analyzer.end_for(for_rec, control_var_rec, final_rec) else: self.error('MP_FOR') def control_variable(self, control_var_rec): """ Expanding Rule 59: ControlVariable -> VariableIdentifier """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('59') self.variable_identifier(control_var_rec) else: self.error('MP_IDENTIFIER') def initial_value(self, initial_rec): """ Expanding Rule 60: InitialValue -> OrdinalExpression """ accepted_list = ['MP_LPAREN','MP_PLUS','MP_MINUS','MP_IDENTIFIER', 'MP_INTEGER','MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('60') self.ordinal_expression(initial_rec) else: self.error(accepted_list) def step_value(self, step_rec): """ Expanding Rules 61, 62 : StepValue -> "to" -> "downto" """ if self.t_type() == 'MP_TO': Parser.print_tree('61') self.match(self.t_lexeme()) step_rec.lexeme = 'lte' elif self.t_type() == 'MP_DOWNTO': Parser.print_tree('62') self.match(self.t_lexeme()) step_rec.lexeme = 'gte' else: self.error(['MP_TO', 'MP_DOWNTO']) def final_value(self, final_rec): """ Expanding Rule 63: FinalValue -> OrdinalExpression """ accepted_list = ['MP_LPAREN','MP_PLUS','MP_MINUS','MP_IDENTIFIER', 'MP_INTEGER','MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('63') self.ordinal_expression(final_rec) else: self.error(accepted_list) def procedure_statement(self): """ Expanding Rule 64: ProcedureStatement -> ProcedureIdentifier OptionalActualParameterList """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('64') self.procedure_identifier() self.optional_actual_parameter_list() else: self.error('MP_IDENTIFIER') def optional_actual_parameter_list(self): """ Expanding Rules 65, 66 : OptionalActualParameterList -> "(" ActualParameter ActualParameterTail ")" """ eps_list = ['MP_TIMES', 'MP_RPAREN', 'MP_PLUS', 'MP_COMMA', 'MP_MINUS', 'MP_SCOLON', 'MP_LTHAN', 'MP_LEQUAL', 'MP_GTHAN', 'MP_GEQUAL', 'MP_EQUAL', 'MP_NEQUAL', 'MP_AND', 'MP_DIV', 'MP_DO', 'MP_DOWNTO', 'MP_ELSE', 'MP_END', 'MP_MOD', 'MP_OR', 'MP_THEN', 'MP_TO', 'MP_UNTIL'] actual_rec = SemanticRecord() if self.t_type() == 'MP_LPAREN': Parser.print_tree('65') self.match('(') self.actual_parameter(actual_rec) self.actual_parameter_tail(actual_rec) self.match(')') elif self.t_type() in eps_list: Parser.print_tree('66') self.epsilon() else: self.error(eps_list.append('MP_LPAREN')) def actual_parameter_tail(self, act_rec): """ Expanding Rules 67, 68: ActualParameterTail -> "," ActualParameter ActualParameterTail -> epsilon """ if self.t_type() == 'MP_COMMA': Parser.print_tree('67') self.match(',') self.actual_parameter(act_rec) self.actual_parameter_tail(act_rec) elif self.t_type() == 'MP_RPAREN': Parser.print_tree('68') self.epsilon() else: self.error(['MP_COMMA', 'MP_RPAREN']) def actual_parameter(self,act_rec): """ Expanding Rule 69: ActualParameter -> OrdinalExpression """ accepted_list = ['MP_LPAREN','MP_PLUS','MP_MINUS','MP_IDENTIFIER', 'MP_INTEGER','MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('69') self.ordinal_expression(act_rec) else: self.error(accepted_list) def expression(self, sem_rec): """ Expanding Rule 70 : Expression -> SimpleExpression OptionalRelationalPart """ accepted_list = ['MP_LPAREN','MP_PLUS','MP_MINUS','MP_IDENTIFIER', 'MP_INTEGER','MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('70') self.simple_expression(sem_rec) self.optional_relational_part(sem_rec) else: self.error(accepted_list) def optional_relational_part(self, rel_rec): """ Expanding Rule 71, 72: OptionalRelationalPart -> RelationalOperator SimpleExpression -> epsilon """ accepted_list = ['MP_LTHAN', 'MP_LEQUAL', 'MP_GTHAN', 'MP_GEQUAL', 'MP_EQUAL', 'MP_NEQUAL'] eps_list = ['MP_RPAREN', 'MP_COMMA', 'MP_SCOLON', 'MP_DO', 'MP_DOWNTO', 'MP_ELSE', 'MP_END', 'MP_THEN', 'MP_TO', 'MP_UNTIL'] if self.t_type() in accepted_list: Parser.print_tree('71') self.relational_operator(rel_rec) self.simple_expression(sem_rec = SemanticRecord()) elif self.t_type() in eps_list: Parser.print_tree('72') self.epsilon() else: self.error(accepted_list.extend(eps_list)) def relational_operator(self, expr_rec): """ Expanding Rules 73 - 78: RelationalOperator -> "=" -> "<" -> ">" -> "<=" -> ">=" -> "<>" """ if self.t_type() == 'MP_EQUAL': Parser.print_tree('73') self.match(self.t_lexeme()) expr_rec.lexeme = 'eq' elif self.t_type() == 'MP_LTHAN': Parser.print_tree('74') self.match(self.t_lexeme()) expr_rec.lexeme = 'lt' elif self.t_type() == 'MP_GTHAN': Parser.print_tree('75') self.match(self.t_lexeme()) expr_rec.lexeme = 'gt' elif self.t_type() == 'MP_LEQUAL': Parser.print_tree('76') self.match(self.t_lexeme()) expr_rec.lexeme = 'lte' elif self.t_type() == 'MP_GEQUAL': Parser.print_tree('77') self.match(self.t_lexeme()) expr_rec.lexeme = 'gte' elif self.t_type() == 'MP_NEQUAL': Parser.print_tree('78') self.match(self.t_lexeme()) expr_rec.lexeme = 'ne' else: self.error(['MP_EQUAL', 'MP_LTHAN', 'MP_GTHAN', 'MP_LEQUAL', 'MP_GEQUAL', 'MP_NEQUAL']) def simple_expression(self, sem_rec): """ Expanding Rule 79 : SimpleExpression -> OptionalSign Term TermTail """ accepted_list = ['MP_LPAREN', 'MP_PLUS', 'MP_MINUS', 'MP_IDENTIFIER', 'MP_INTEGER', 'MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('79') self.optional_sign(sem_rec) self.term(sem_rec) self.term_tail(sem_rec) else: self.error(accepted_list) def term_tail(self, term_tail_rec): # term_tail_rec contains information about the left operand of the expression """ Expanding Rule 80,81 : TermTail -> AddingOperator Term TermTail TermTail -> ? """ result_sem_rec = SemanticRecord() term_sem_rec = SemanticRecord() add_op_sem_rec = SemanticRecord() eps_list = ['MP_RPAREN', 'MP_COMMA', 'MP_SCOLON', 'MP_LTHAN', 'MP_LEQUAL', 'MP_NEQUAL', 'MP_EQUAL', 'MP_GTHAN', 'MP_GEQUAL', 'MP_DO', 'MP_DOWNTO', 'MP_ELSE', 'MP_TO', 'MP_UNTIL'] accepted_list = ['MP_PLUS', 'MP_MINUS', 'MP_OR'] if self.t_type() in accepted_list: Parser.print_tree('80') #self.optional_sign() self.adding_operator(add_op_sem_rec) self.term(term_sem_rec) self.sem_analyzer.gen_arithmetic(term_tail_rec, add_op_sem_rec, term_sem_rec, result_sem_rec) self.term_tail(result_sem_rec) elif self.t_type() in eps_list: Parser.print_tree('81') self.epsilon() else: self.error(accepted_list.extend(eps_list)) def optional_sign(self, sem_rec): """ Expanding Rule 82,83,84: OptionalSign -> "+" OptionalSign -> "-" OptionalSign -> ? """ eps_list = ['MP_IDENTIFIER', 'MP_INTEGER','MP_NOT', 'MP_LPAREN'] if self.t_type() == 'MP_PLUS': Parser.print_tree('82') self.match(self.t_lexeme()) elif self.t_type() == 'MP_MINUS': Parser.print_tree('83') self.match(self.t_lexeme()) sem_rec.negative = 1 elif self.t_type() in eps_list: Parser.print_tree('84') self.epsilon() else: self.error(eps_list.extend(['MP_PLUS','MP_MINUS'])) def adding_operator(self, sem_rec): """ Expanding Rule 85,86,87: AddingOperator -> "+" AddingOperator -> "-" AddingOperator -> "or" """ accepted_list = ['MP_PLUS','MP_MINUS','MP_OR'] if self.t_type() in accepted_list: in_lexeme = self.t_lexeme() self.match(in_lexeme) sem_rec.lexeme = in_lexeme else: self.error(accepted_list) def term(self, sem_rec): """ Expanding Rule 88: Term -> Factor FactorTail """ accepted_list = ['MP_LPAREN', 'MP_IDENTIFIER', 'MP_INTEGER', 'MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('88') self.factor(sem_rec) self.factor_tail(sem_rec) else: self.error(accepted_list) def factor_tail(self, sem_rec): """ Expanding Rule 89,90: FactorTail -> MultiplyingOperator Factor FactorTail FactorTail -> ? """ result_sem_rec = SemanticRecord() factor_sem_rec = SemanticRecord() mul_op_sem_rec = SemanticRecord() accepted_list = ['MP_TIMES', 'MP_AND', 'MP_DIV', 'MP_MOD'] eps_list = ['MP_RPAREN', 'MP_PLUS', 'MP_COMMA', 'MP_MINUS', 'MP_SCOLON', 'MP_LTHAN', 'MP_LEQUAL', 'MP_NEQUAL', 'MP_EQUAL', 'MP_GTHAN', 'MP_GEQUAL', 'MP_DO', 'MP_DOWNTO', 'MP_ELSE', 'MP_END', 'MP_OR', 'MP_THEN', 'MP_TO', 'MP_UNTIL'] if self.t_type() in accepted_list: Parser.print_tree('89') self.multiplying_operator(mul_op_sem_rec) self.factor(factor_sem_rec) self.sem_analyzer.gen_arithmetic(sem_rec, mul_op_sem_rec, factor_sem_rec, result_sem_rec) self.factor_tail(result_sem_rec) elif self.t_type() in eps_list: Parser.print_tree('90') self.epsilon() else: self.error(accepted_list.extend(eps_list)) def multiplying_operator(self, mul_op_sem_rec): """ Expanding Rule 91,92,93,94: MultiplyingOperator -> "*" MultiplyingOperator -> "div" MultiplyingOperator -> "mod" MultiplyingOperator -> "and" """ if self.t_type() == 'MP_TIMES': Parser.print_tree('91') self.match(self.t_lexeme()) mul_op_sem_rec.lexeme = '*' elif self.t_type() == 'MP_DIV': Parser.print_tree('92') self.match(self.t_lexeme()) mul_op_sem_rec.lexeme = 'div' elif self.t_type() == 'MP_MOD': Parser.print_tree('93') self.match(self.t_lexeme()) mul_op_sem_rec.lexeme = 'mod' elif self.t_type() == 'MP_AND': Parser.print_tree('94') self.match(self.t_lexeme()) mul_op_sem_rec.lexeme = 'and' else: self.error(['MP_TIMES', 'MP_DIV', 'MP_MOD', 'MP_AND']) def factor(self, sem_rec): #AAA """ Expanding Rule 95,96,97,98,99: Factor -> UnsignedInteger Factor -> VariableIdentifier Factor -> "not" Factor Factor -> "(" Expression ")" Factor -> FunctionIdentifier OptionalActualParameterList """ if self.t_type() == 'MP_INTEGER': Parser.print_tree('95') sem_rec.lexeme = self.t_lexeme() sem_rec.type = 'Integer' self.sem_analyzer.gen_push_int(sem_rec) self.match(self.t_lexeme()) elif self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('96') sem_rec.lexeme = self.t_lexeme() self.match(self.t_lexeme()) self.sem_analyzer.gen_push_id(sem_rec, SemanticRecord()) elif self.t_type() == 'MP_NOT': Parser.print_tree('97') self.match(self.t_lexeme()) self.factor(sem_rec) elif self.t_type() == 'MP_LPAREN': Parser.print_tree('98') self.match(self.t_lexeme()) # if self.t_type() == 'MP_LPAREN': # self.match('(') # self.expression(sem_rec) # self.match(')') # else: self.expression(sem_rec) if self.t_type() == 'MP_RPAREN': self.match(self.t_lexeme()) else: self.error('MP_RPAREN') else: self.error(['MP_INTEGER', 'MP_IDENTIFIER', 'MP_NOT', 'MP_LPAREN']) return sem_rec def program_identifier(self): """ Expanding Rule 100: ProgramIdentifier -> Identifier """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('100') self.program_name = self.t_lexeme() self.match(self.program_name) else: self.error('MP_IDENTIFIER') def variable_identifier(self, sem_rec): """ Expanding Rule 101: VariableIdentifier -> Identifier """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('101') in_lexeme = self.t_lexeme() self.match(in_lexeme) sem_rec.lexeme = in_lexeme sem_rec.type = 'Integer' else: self.error('MP_IDENTIFIER') def procedure_identifier(self): """ Expanding Rule 102: ProcedureIdentifier -> Identifier """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('102') self.cur_proc_name = self.t_lexeme() self.match(self.cur_proc_name) else: self.error('MP_IDENTIFIER') def function_identifier(self): """ Expanding Rule 103: ProgramIdentifier -> Identifier """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('103') self.cur_func_name = self.t_lexeme() self.match(self.cur_func_name) else: self.error('MP_IDENTIFIER') def boolean_expression(self, expr_rec): """ Expanding Rule 104: BooleanExpression -> Expression """ accepted_list = ['MP_LPAREN', 'MP_PLUS', 'MP_MINUS', 'MP_IDENTIFIER', 'MP_INTEGER', 'MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('104') self.match(self.t_lexeme()) self.expression(expr_rec) self.match(self.t_lexeme()) else: self.error(accepted_list) def ordinal_expression(self, sem_rec): """ Expanding Rule 105: OrdinalExpression -> Expression """ accepted_list = ['MP_LPAREN', 'MP_PLUS', 'MP_MINUS', 'MP_IDENTIFIER', 'MP_INTEGER', 'MP_NOT'] if self.t_type() in accepted_list: Parser.print_tree('105') self.expression(sem_rec) else: self.error(accepted_list) def identifier_list(self, id_list): """ Expanding Rule 106: IdentifierList -> Identifier IdentifierTail """ if self.t_type() == 'MP_IDENTIFIER': Parser.print_tree('106') id_list.append(self.identifier()) self.identifier_tail(id_list) else: self.error() return id_list def identifier_tail(self, id_list): """ Expanding Rule 107,108: IdentifierTail -> "," Identifier IdentifierTail IdentifierTail -> ? """ if self.t_type() == 'MP_COMMA': Parser.print_tree('107') self.match(',') id_list.append(self.identifier()) self.identifier_tail(id_list) elif self.t_type() == 'MP_COLON': Parser.print_tree('108') self.epsilon() else: self.error(['MP_COMMMA', 'MP_COLON']) return id_list def identifier(self): id = self.t_lexeme() self.match(id) return id
class Parser: def __init__(self): self.debugger = DebugPrinter() self.tk_list = None self.token_index = 1 self.token_list_length = None self.current_token = None self.next_token = None # debug stuff self.debug_mode_on = True self.state = None # initialize symbol table self.symbol_table = SymbolTable() self.temp_store = [] # initialize stack machine self.stack_machine = StackMachine(self.symbol_table) # machine instructions self.instructions = None def run(self): """ CompilationUnit --> ProgramModule """ self.parse_state = 'run' self._program_module() # self.symbol_table.print_table() # self.stack_machine.print_instruction_list() # get list of instructions to be sent to VirtualRunTime module. self.instructions = self.stack_machine.export_instructions() def _program_module(self): """ ProgramModule --> yprogram yident ProgramParameters ';' Block '.' """ self.parse_state = 'program_module' self._match('TK_PROGRAM') self._match('TK_IDENTIFIER') # ignoring program parameters for now, dont know what its used for. self._parse_program_parameters() self._match('TK_SEMICOLON') # parse tokens betwen tk_begin and tk_end self._parse_block() self._match('TK_PERIOD') self.stack_machine.generate_halt() def _parse_program_parameters(self): """ ProgramParameters --> '(' IdentList ')' """ self.parse_state = 'program_parameters' if self._current_tk_type() == 'TK_L_PAREN': self._match('TK_L_PAREN') self._parse_identifier_list() self._match('TK_R_PAREN') def _parse_block(self): """ Block --> [Declarations] StatementSequence """ self.parse_state = 'block' # only variable declarations are currently implemented so this works. if self._current_tk_type() == 'TK_VAR': self._parse_declarations() self._parse_statement_sequence() def _parse_declarations(self): """Declarations -> [ConstantDefBlock] # not implemented yet. | [TypeDefBlock] # not implemented yet. | [VariableDeclBlock] # implemented | [SubprogDeclList] # not implemented yet. """ self.parse_state = 'declarations' self._parse_variable_decl_block() def _parse_variable_decl_block(self): """ VariableDeclBlock --> yvar VariableDecl ';' {VariableDecl ';'} """ self._match('TK_VAR') self._parse_variable_decl() self._match('TK_SEMICOLON') while self._current_tk_type() == 'TK_VAR': self._match('TK_VAR') self._parse_variable_decl() self._match('TK_SEMICOLON') def _parse_variable_decl(self): """ VariableDecl --> IdentList ':' Type """ self.parse_state = 'variable_declaration' self._parse_identifier_list() self._match('TK_COLON') self._parse_type() def _parse_identifier_list(self): """IdentList --> yident {',' yident}""" self.parse_state = 'identifier_list' self.temp_store.append(self.current_token) self._match('TK_IDENTIFIER') while self._current_tk_type() == 'TK_COMMA': self._match('TK_COMMA') self.temp_store.append(self.current_token) self._match('TK_IDENTIFIER') def _parse_constant_def_block(self): pass def _parse_type_def_block(self): pass def _parse_type(self): """ Type --> yident | ArrayType # not implemented yet. | PointerType # not implemented yet. | RecordType # not implemented yet. | SetType # not implmeneted yet. """ self.parse_state = 'type' self.temp_store.append(self.current_token) self.symbol_table.parse_variable_declaration(self.temp_store) # clear the temp symbol buffer after this iteration self.temp_store = [] self._match('TK_IDENTIFIER') def _parse_statement_sequence(self): """ StatementSequence --> ybegin Statement {';' Statement} yend """ self.state = 'statement_sequence' self._match('TK_BEGIN') self._parse_statement() while self._current_tk_type() == 'TK_SEMICOLON': self._match('TK_SEMICOLON') self._parse_statement() self._match('TK_END') def _parse_statement(self): """ Statement --> Assignment # implemented | ProcedureCall | IfStatement # implemented | CaseStatement # partial | WhileStatement # partial | RepeatStatement | ForStatement | IOStatement # implemented | MemoryStatement | StatementSequence | empty """ self.state = 'parse_statement' if self.current_token.get_type() == 'TK_IDENTIFIER': self._parse_assignment() if self.current_token.get_type() == 'TK_IF': self._parse_if_statement() if self.current_token.is_io_operator(): self._parse_io_statement() # since there is a EMPTY, we do not need to raise exception. def _parse_if_statement(self): """ IfStatement --> yif Expression ythen Statement [yelse Statement] """ if self._current_tk_type() == 'TK_IF': self._match('TK_IF') self._parse_expression() self._match('TK_THEN') self._parse_statement() if self._current_tk_type() == 'TK_ELSE': self._match('TK_ELSE') self._parse_statement() def _parse_case_statement(self): """ CaseStatement --> ycase Expression yof Case {';' Case} yend """ self._match('TK_CASE') self._parse_expression() self._match('TK_OF') self._parse_case() while self._current_tk_type() == 'TK_SEMICOLON': self._parse_case() self._match('TK_END') def _parse_case(self): """ Case --> CaseLabelList ':' Statement """ self._parse_case_label_list() self._match('TK_COLON') self._parse_statement() def _parse_case_label_List(self): """CaseLabelList --> ConstExpression {',' ConstExpression } """ self._parse_const_expression() # not implemented yet. while self._current_tk_type() == 'TK_COMMA': self._parse_const_expression() # again, not implemented yet. def _parse_while_statement(self): """ WhileStatement --> ywhile Expression ydo Statement """ self._match('TK_WHILE') self._parse_expression() self._match('TK_DO') self._parse_statement() def _parse_assignment(self): """ Assignment --> Designator ':=' Expression """ self.state = 'parse_assignment' identifier = self._parse_designator() self._match('TK_ASSIGNMENT') self._parse_expression() self.stack_machine.generate_pop(identifier) def _parse_designator(self): """ Designator --> yident [DesignatorStuff] """ self.state = 'parse_designator' print('desig') print self.current_token.get_value() identifier = self.current_token.get_value() self._match('TK_IDENTIFIER') #print(self.current_token.get_value()) #self.stack_machine.gen_debug('here') # attempt to append # self.temp_store.append(self.current_token) if self._current_tk_type() == 'TK_PERIOD': self._parse_designator_stuff() return identifier def _parse_expression(self): """ Expression --> SimpleExpression [ Relation SimpleExpression ] """ self.state = 'parse_expression' val = self._parse_simple_expression() if self.current_token.is_relation_operator(): self._parse_relation() self._parse_simple_expression() print("the val is", val) return val def _parse_relation(self): """ Relation --> '=' | '<>' | '<' | '>' | '<=' | '>=' | in """ if self._current_tk_type() == 'TK_EQUALS': self._match('TK_EQUALS') elif self._current_tk_type() == 'TK_GREATER_THAN': self._match('TK_GREATER_THAN') elif self._current_tk_type() == 'TK_LESS_THAN': self._match('TK_LESS_THAN') def _parse_simple_expression(self): """ SimpleExpression --> [UnaryOperator] Term {AddOperator Term} """ self.state = 'parse_simple_exression' op = None if self.current_token.is_unary_operator(): op = self._parse_unary_operator() t1 = self._parse_term() while self.current_token.is_add_operator(): if self._current_tk_type() == 'TK_ADDITION': op = self.current_token.get_type() self._match('TK_ADDITION') if self._current_tk_type() == 'TK_SUBTRACTION': op = self.current_token.get_type() self._match('TK_SUBTRACTION') if self._current_tk_type() == 'TK_OR': self._match('TK_OR') t2 = self._parse_term() t1 = self.stack_machine.generate(self.state, op, t1, t2) return t1 def _parse_unary_operator(self): """ UnaryOperator --> '+' | '-' """ self.state = 'parse_unary_operator' if self._current_tk_type() == 'TK_ADDITION': op = self.current_token.get_type() self._match('TK_ADDITION') return op if self._current_tk_type() == 'TK_SUBTRACTION': op = self.current_token.get_type() self._match('TK_SUBTRACTION') return op def _parse_term(self): """ Term --> Factor {MultOperator Factor} """ self.state = 'parse_term' f1 = self._parse_factor() while self.current_token.is_mult_operator(): op = self._parse_mult_operator() f2 = self._parse_factor() f1 = self.stack_machine.generate(self.state, op, f1, f2) return f1 def _parse_mult_operator(self): """ MultOperator --> '*' | '/' | div | mod | and """ self.state = 'parse_mult_operator' if self._current_tk_type() == 'TK_MULTIPLICATION': op = self.current_token.get_type() self._match('TK_MULTIPLICATION') return op elif self._current_tk_type() == 'TK_DIVISION': op = self.current_token.get_type() self._match('TK_DIVISION') return op elif self._current_tk_type() == 'TK_DIV': # not implemented. op = self.current_token.get_type() self._match('TK_DIV') return op def _parse_factor(self): """ Factor --> ynumber | ystring | ytrue | yfalse | ynil | Designator | '(' Expression ')' | ynot Factor | Setvalue # not implemented | FunctionCall # not implemented """ self.state = 'parse_factor' val = None tk_type = self._current_tk_type() #print self.current_token if tk_type == 'TK_INT_LITERAL': val = self.current_token.get_type() self.stack_machine.generate_pushi(self.current_token) self._match('TK_INT_LITERAL') return val elif tk_type == 'TK_STRING_LITERAL': self._match('TK_STRING_LITERAL') elif tk_type == 'TK_TRUE': self._match('TK_TRUE') elif tk_type == 'TK_NIL': self._match('TK_NIL') elif tk_type == 'TK_L_PAREN': self._match('TK_L_PAREN') self._parse_expression() self._match('TK_R_PAREN') elif tk_type == 'TK_NOT': self._match('TK_NOT') self._parse_factor() elif tk_type == 'TK_IDENTIFIER': print("in parse_factor") val = self._parse_designator() return val else: pass #print self.current_token.get_type() #raise Exception('no matches in _parse_factor') def _parse_io_statement(self): """ IOStatement --> yread '(' DesignatorList ')' | yreadln [ '(' DesignatorList ')' ] | ywrite '(' ExpList ')' | ywriteln [ '(' ExpList ')' ] """ self.state = 'parse_io_statement' if self._current_tk_type() == 'TK_READ': self._match('TK_L_PAREN') self._parse_designator_list() # not implemented self._match('TK_R_PAREN') elif self._current_tk_type() == 'TK_READLN': self._match('TK_READLN') # incomplete. elif self._current_tk_type() == 'TK_WRITELN': self._match('TK_WRITELN') if self._current_tk_type() == 'TK_L_PAREN': self._match('TK_L_PAREN') val = self._parse_exp_list() self.stack_machine.generate_writeln(val) self._match('TK_R_PAREN') else: raise Exception('no match in _parse_io_statement') def _parse_exp_list(self): """ExpList --> Expression { ',' Expression }""" val = self._parse_expression() while self._current_tk_type() == 'TK_COMMA': self._match('TK_COMMA') self._parse_expression() return val def load_tokens(self, list_of_tokens): self.tk_list = deque(list_of_tokens) self.current_token = self.tk_list.popleft() self.token_list_length = len(self.tk_list) + 1 def _current_tk_type(self): return self.current_token.get_type() def _get_next_token(self): try: self.current_token = self.tk_list.popleft() self.token_index += 1 except IndexError: print("Finished parsing all tokens.\n") def _match(self, expected_tk_type): """matches the current token with an expected token.""" debugger = self.debugger current_token = self.current_token if (expected_tk_type == current_token.get_type()): # pass token to debuggger and print debug statement debugger.print_debug( current_token, self.token_index, self.token_list_length ) else: debugger.raise_match_tk_err( current_token, self.token_index, self.token_list_length, expected_tk_type, current_token.get_type() ) self._get_next_token() def get_instructions(self): return self.instructions def get_symbol_table(self): """reaches in to symbol table and returns data segment. probably shouldn't return the entire symbol table but idk, screw it.""" return self.symbol_table def _E1(): """ E1 -> empty | tk_plus T tk_plus E1 | tk_minus T tk_minus E1 """ self._match('TK_PLUS') self._T(); self.E1(); pass def _T(): """ T -> F T1 """ pass def _T1(): """ T1 -> empty | tk_mult F tk_mult T1 | tk_div F tk_div T1 """ pass def _F(): """ F -> literal(down_arrow) | ident(down_arrow) | tk_minus F | tk_plus F | not F | '(' F ') """ pass
class Parser(object): A_CMD = 'A_COMMAND' C_CMD = 'C_COMMAND' L_CMD = 'L_COMMAND' def __init__(self, file_name): self.lines = [] self.current_position = 0 self.symbol_table = SymbolTable() self.readInFile(file_name) self.addLabelsToSymbolTable() self.substituteVars() def readInFile(self, file_name): with open (file_name, "r") as fp: for line in fp.readlines(): line = line.strip() # skip empty newlines and comments if not line or line.startswith("//"): continue self.lines.append(line.split()[0]) def addLabelsToSymbolTable(self): line_count = 1 while (self.hasMoreCommands()): self.advance() if self.commandType() == Parser.L_CMD: # TODO check this, this is wonky self.symbol_table.addEntry(self.symbol(), line_count-1) else: line_count += 1 # reset current_position after reading self.current_position = 0 def substituteVars(self): while (self.hasMoreCommands()): self.advance() if self.commandType() != Parser.A_CMD: continue var = self.symbol() if var.isdigit(): continue if not self.symbol_table.contains(var): self.symbol_table.addEntry(var) self.lines[self.current_position-1] = "@{}".format(self.symbol_table.getAddress(var)) # reset current_position after reading self.current_position = 0 def commandType(self): if not self.current_command: raise Exception("current_command is empty") if self.current_command.startswith("@"): return self.A_CMD if self.current_command.startswith("("): return self.L_CMD return self.C_CMD def hasMoreCommands(self): return self.current_position < len(self.lines) def advance(self): self.current_command = self.lines[self.current_position] self.current_position += 1 def symbol(self): return self.current_command.strip("@()") def dest(self): if len(self.current_command.split("=")) == 2: return self.current_command.split("=")[0] def comp(self): if len(self.current_command.split(";")) == 2: return self.current_command.split(";")[0] elif len(self.current_command.split("=")) == 2: return self.current_command.split("=")[1] def jump(self): if len(self.current_command.split(";")) == 2: return self.current_command.split(";")[1]
def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable()
class Assembler: def __init__(self): self.table = SymbolTable() self.address = 16 def first_pass(self, filename): # first pass: advancing through file step by step to build up symbol table p = Parser(filename) current_address = 0 while p.has_more_commands(): p.advance() c_type = p.command_type() if c_type == Commands.C_COMMAND or c_type == Commands.A_COMMAND: # incrementing the instruction address current_address += 1 elif c_type == Commands.L_COMMAND: # adding new symbol/label to table self.table.add_entry(p.symbol(), current_address) def second_pass(self, filename): # second pass: actually generate the binary for each instruction using the # symbol table built up from the first pass p = Parser(filename) # creating new file name, with .hack ending newfile = filename.split('.')[0] + '.hack' with open(newfile, 'w') as f: while p.has_more_commands(): p.advance() c_type = p.command_type() if c_type == Commands.C_COMMAND: d, c, j = p.tokenize_C_inst() line = code.get_C_inst(d, c, j) elif c_type == Commands.A_COMMAND: # we need to check if the symbol is in the table and get its value line = code.get_A_inst(self.check_symbol(p.symbol())) else: # if its anything else, don't write anything and start again from # next line continue # writing the line to file f.write(line + '\n') f.close() def check_symbol(self, symbol): # method for checking if the symbol is a symbol and getting its value # from the table if symbol.isdigit(): # if it's a digit, then return itself since we want that value return symbol else: if symbol not in self.table.table: # if not in table, then add it to the table starting from address 16 self.table.add_entry(symbol, self.address) self.address += 1 return self.table.get_address(symbol) def assemble(self, filename): #print('First pass...') self.first_pass(filename) #print('Second pass...') self.second_pass(filename)
class CompilationEngine: def __init__(self, source, destination): self.src = source self.dst = destination self.writer = VMWriter(destination) self.iter = Lookahead(tokenizor.newTokenizor(self.src)) self._symbol_table = SymbolTable() def compile(self): root = self._compileClass() return root def _compileClass(self): classE = Element(ELEMENTS.CLASS) self._readKeyword(classE, ELEMENTS.CLASS) self.className = self._readIdentifier(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileClassVarDec(classE) self._compileSubroutine(classE) self._readSymbol(classE, _SYMBOLS.BRACKET_CURLY_CLOSE) return classE def _compileClassVarDec(self, parent): while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.FIELD_TYPES: classVarDecE = Element(ELEMENTS.CLASSVARDEC) self._readKeyword(classVarDecE) self._readType(classVarDecE) self._readIdentifier(classVarDecE) while self._readSymbolOptional(classVarDecE, _SYMBOLS.COMMA): self._readIdentifier(classVarDecE) self._readSymbol(classVarDecE, _SYMBOLS.SEMI_COLON) parent.append(classVarDecE) def _compileSubroutine(self, parent): while self.nextTok and self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.TYPES: subroutineDecE = Element(ELEMENTS.SUBROUTINEDEC) function_type = self._readKeyword(subroutineDecE) self._readReturnType(subroutineDecE) self.methodName = self._readIdentifier(subroutineDecE) self._symbol_table.startSubroutine(self.className, self.methodName) if function_type == _SUBROUTINEDEC.METHOD: self._symbol_table.define("this", self.className, SYM_KINDS.ARG) self._uid = -1 self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_OPEN) self._compileParameters(subroutineDecE) self._readSymbol(subroutineDecE, _SYMBOLS.PARENTHESES_CLOSE) self._compileSubroutineBody(subroutineDecE, function_type) parent.append(subroutineDecE) def _gen_label(self, type_): self._uid += 1 return "%s.%s.%s.%d" % (self.className, self.methodName, type_, self._uid) def _gen_labels(self, *parts): self._uid += 1 return ["%s.%s.%s.%d" % (self.className, self.methodName, part, self._uid) for part in parts] def _compileSubroutineBody(self, parent, function_type): bodyE = Element(ELEMENTS.SUBROUTINEBODY) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_OPEN) nArgs = self._compileVarDec(bodyE) function_name = parent[2].text function_full_name = "%s.%s" % (self.className, function_name) self.writer.writeFunction(function_full_name, nArgs) if function_type == _SUBROUTINEDEC.CONSTRUCTOR: field_count = self._symbol_table.varCount(SYM_KINDS.FIELD) self.writer.writePush(SEGMENT.CONST, field_count) self.writer.writeCall("Memory.alloc", 1) self.writer.writePop(SEGMENT.POINTER, 0) elif function_type == _SUBROUTINEDEC.METHOD: self.writer.writePush(SEGMENT.ARG, 0) self.writer.writePop(SEGMENT.POINTER, 0) self._compileStatements(bodyE) self._readSymbol(bodyE, _SYMBOLS.BRACKET_CURLY_CLOSE) parent.append(bodyE) def _compileStatements(self, parent): statementsE = Element(ELEMENTS.STATEMENTS) while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _STATEMENTS.TYPE_NAMES: if self.nextTok.value == _STATEMENTS.LET: statementE = Element(ELEMENTS.STATEMENT_LET) self._readKeyword(statementE) identifier = self._readIdentifier(statementE) is_array = False if self._readSymbolOptional(statementE, _SYMBOLS.BRACKET_OPEN): is_array = True self._compileExpression(statementE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self._readSymbol(statementE, _SYMBOLS.BRACKET_CLOSE) self._readSymbol(statementE, _SYMBOLS.EQUAL) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) if is_array: self.writer.writePop(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.TEMP, 0) self.writer.writePop(SEGMENT.THAT, 0) else: self.writer.writePop(*self._identifier_data(identifier)) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.IF: label_else, label_end = self._gen_labels("if.else", "if.end") statementE = Element(ELEMENTS.STATEMENT_IF) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_else) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeGoto(label_end) self.writer.writeLabel(label_else) if self._readKeywordOptional(statementE, _KEYWORDS.ELSE): self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) self.writer.writeLabel(label_end) statementsE.append(statementE) elif self.nextTok.value == _STATEMENTS.WHILE: label_start, label_end = self._gen_labels("while.start", "while.end") self.writer.writeLabel(label_start) statementE = Element(ELEMENTS.STATEMENT_WHILE) self._readKeyword(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_OPEN) self._compileExpression(statementE) self._readSymbol(statementE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeArithmetic("not") self.writer.writeIf(label_end) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_OPEN) self._compileStatements(statementE) self._readSymbol(statementE, _SYMBOLS.BRACKET_CURLY_CLOSE) statementsE.append(statementE) self.writer.writeGoto(label_start) self.writer.writeLabel(label_end) elif self.nextTok.value == _STATEMENTS.DO: self._compileDo(statementsE) elif self.nextTok.value == _STATEMENTS.RETURN: statementE = Element(ELEMENTS.STATEMENT_RETURN) self._readKeyword(statementE) if not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.SEMI_COLON): self._compileExpression(statementE) else: self.writer.writePush(SEGMENT.CONST, 0) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeReturn() statementsE.append(statementE) if len(statementsE) == 0: statementsE.text = "\n" parent.append(statementsE) def _compileExpression(self, parent): expressionE = Element(ELEMENTS.EXPRESSION) self._readTerm(expressionE) while self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.OPERATORS: symbol = self._readSymbol(expressionE) self._readTerm(expressionE) self.writer.writeArithmetic(symbol) parent.append(expressionE) def _compileExpressionList(self, parent): self._readSymbol(parent, _SYMBOLS.PARENTHESES_OPEN) expListE = Element(ELEMENTS.EXPRESSION_LIST) nArgs = 0 while not (self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_CLOSE): self._compileExpression(expListE) self._readSymbolOptional(expListE, _SYMBOLS.COMMA) nArgs += 1 # hack for TextComparer if len(expListE) == 0: expListE.text = "\n" parent.append(expListE) self._readSymbol(parent, _SYMBOLS.PARENTHESES_CLOSE) return nArgs def _compileDo(self, parent): statementE = Element(ELEMENTS.STATEMENT_DO) self._readKeyword(statementE, _STATEMENTS.DO) identifier = self._readIdentifier(statementE) nArgs = 0 if self._readSymbolOptional(statementE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(statementE)) else: identifier = "%s.%s" % (self.className, identifier) self.writer.writePush(SEGMENT.POINTER, 0) nArgs += 1 nArgs += self._compileExpressionList(statementE) self._readSymbol(statementE, _SYMBOLS.SEMI_COLON) self.writer.writeCall(identifier, nArgs) self.writer.writePop(SEGMENT.TEMP, 0) parent.append(statementE) def _compileVarDec(self, parent): nArgs = 0 while self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == _KEYWORDS.VAR: varDecE = Element(ELEMENTS.VAR_DEC) self._readKeyword(varDecE, _KEYWORDS.VAR) self._readType(varDecE) self._readIdentifier(varDecE) nArgs += 1 while self._readSymbolOptional(varDecE, _SYMBOLS.COMMA): self._readIdentifier(varDecE) nArgs += 1 self._readSymbol(varDecE, _SYMBOLS.SEMI_COLON) parent.append(varDecE) return nArgs def _compileParameters(self, parent): paramListE = Element(ELEMENTS.PARAM_LIST) while (self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES) or self.nextTok.type == tokenizor.IDENTIFIER: self._readType(paramListE) self._readIdentifier(paramListE) self._readSymbolOptional(paramListE, _SYMBOLS.COMMA) if len(paramListE) == 0: paramListE.text = "\n" parent.append(paramListE) ############################## ########## READ ############## ############################## def _readTerm(self, parent): termE = Element(ELEMENTS.TERM) if self.nextTok.type == tokenizor.INTEGER: self.next() termE.append(_leafElement(ELEMENTS.INTEGER_CONSTANT, self.tok.value)) self.writer.writePush(SEGMENT.CONST, self.tok.value) elif self.nextTok.type == tokenizor.STRING: self.next() termE.append(_leafElement(ELEMENTS.STRING_CONSTANT, self.tok.value)) string_value = self.tok.value self.writer.writePush(SEGMENT.CONST, len(string_value)) self.writer.writeCall("String.new", 1) for char in string_value: self.writer.writePush(SEGMENT.CONST, ord(char)) self.writer.writeCall("String.appendChar", 2) elif self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _KEYWORDS.CONSTANTS: self.next() termE.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) _KW_CONT_WRITE[self.tok.value](self.writer) elif self.nextTok.type == tokenizor.IDENTIFIER: identifier = self._readIdentifier(termE) nArgs = 0 if self._readSymbolOptional(termE, _SYMBOLS.BRACKET_OPEN): self._compileExpression(termE) self.writer.writePush(*self._identifier_data(identifier)) self.writer.writeArithmetic("add") self.writer.writePop(SEGMENT.POINTER, 1) self.writer.writePush(SEGMENT.THAT, 0) self._readSymbol(termE, _SYMBOLS.BRACKET_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: nArgs = self._compileExpressionList(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) self.writer.writeCall(identifier, nArgs) elif self._readSymbolOptional(termE, _SYMBOLS.DOT): type_ = self._symbol_table.typeOf(identifier) if type_: segment, index = self._identifier_data(identifier) self.writer.writePush(segment, index) nArgs += 1 identifier = "%s.%s" % (type_, self._readIdentifier(termE)) else: identifier = "%s.%s" % (identifier, self._readIdentifier(termE)) nArgs += self._compileExpressionList(termE) self.writer.writeCall(identifier, nArgs) else: self.writer.writePush(*self._identifier_data(identifier)) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == _SYMBOLS.PARENTHESES_OPEN: self.next() termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._compileExpression(termE) self._readSymbol(termE, _SYMBOLS.PARENTHESES_CLOSE) elif self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value in _SYMBOLS.UNARY_OPARTORS: self.next() sym = self.tok.value termE.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) self._readTerm(termE) self.writer.writeArithmeticUnary(sym) else: raise self._syntaxError("Unexpected %s." % self.tok.value) parent.append(termE) def _identifier_data(self, identifier): return _SEG_TRANSLATE[self._symbol_table.kindOf(identifier)], self._symbol_table.indexOf(identifier) def _readIdentifier(self, parent): self.next() self._assertToken(self.tok, ELEMENTS.IDENTIFIER, type_=tokenizor.IDENTIFIER) name = self.tok.value element = _leafElement(ELEMENTS.IDENTIFIER, name) type_ = self._symbol_table.typeOf(name) kind = None index = None if type_ is None: if parent.tag in (ELEMENTS.CLASSVARDEC, ELEMENTS.VAR_DEC) and len(parent) > 1: type_ = parent[1].text kind = _SYM_KIND_MAP[parent[0].text] elif parent.tag == ELEMENTS.PARAM_LIST and len(parent) > 0: type_ = parent[-1].text kind = SYM_KINDS.ARG if kind is not None: index = self._symbol_table.define(name, type_, kind) else: type_ = self._symbol_table.typeOf(name) kind = self._symbol_table.kindOf(name) index = self._symbol_table.indexOf(name) if kind is not None: element.set("type", type_) element.set("kind", str(kind)) element.set("index", str(index)) parent.append(element) return name def _readType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _CLASSVARDEC.VAR_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readReturnType(self, parent): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value in _SUBROUTINEDEC.RETURN_TYPES: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) else: self._readIdentifier(parent) def _readSymbol(self, parent, expected = None): self.next() expectedStr = expected if expected is not None else ELEMENTS.SYMBOL self._assertToken(self.tok, expectedStr, type_=tokenizor.SYMBOL) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return self.tok.value def _readKeyword(self, parent, expected = None): self.next() expectedStr = expected if expected is not None else ELEMENTS.KEYWORD self._assertToken(self.tok, expectedStr, type_=tokenizor.KEYWORD) if expected is not None: self._assertToken(self.tok, expected, value_=expected) parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return self.tok.value def _readSymbolOptional(self, parent, expected): if self.nextTok.type == tokenizor.SYMBOL and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.SYMBOL, self.tok.value)) return True return False def _readKeywordOptional(self, parent, expected): if self.nextTok.type == tokenizor.KEYWORD and self.nextTok.value == expected: self.next() parent.append(_leafElement(ELEMENTS.KEYWORD, self.tok.value)) return True return False def next(self): self.tok = self.iter.next() self.nextTok = self.iter.lookahead() def _assertToken(self, tok, expected_str, type_ = None, value_ = None): if (type_ != None and tok.type != type_) or (value_ != None and tok.value != value_): raise self._syntaxError("Expected %s but found %s" % (expected_str, tok.value), tok) def _syntaxError(self, msg, tok = None): if tok is None: tok = self.tok return SyntaxError(msg, (None, tok.srow, tok.scol, tok.line))
class Interpreter: def __init__(self, code_string=None): self._KEYWORDS = ['read', 'write'] self._token = None self._line = 0 self._tokenizer = Tokenizer(code_string, ['+','-','/','*','(',')',':='], ['\n',' ']) self._symboltable = SymbolTable() def reset(self): self._line = 0 self._token = None self._tokenizer.clear() def interpret(self, code_string=None): if code_string is not None: self._tokenizer.append(code_string) self._consume() self.program() def _consume(self, _nomable=None): if _nomable == '$$': self.reset() return True if _nomable == 'id': self._symboltable.add(self._token, self._line) # TODO: add current token to AST self._token = self._tokenizer.next() def _is_token_id(self, _id=None): if self._token is None: raise ParseError(self._line, 'unexpected EOF') if _id is None: _id = self._token if self._symboltable.has(_id): return True elif _id.isalpha() and _id not in self._KEYWORDS: return True else: return False def _is_token_num(self, _num=None): if self._token is None: raise ParseError(self._line, 'unexpected EOF') if _num is None: _num = self._token if _num.isdigit(): return True else: return False def _is_token_id_or_num(self, _token=None): if _token is None: _token = self._token if self._is_token_id(_token) or self._is_token_num(_token): return True else: return False def _match(self, expected): # TODO: might conflict with id's named 'id' or 'number' if expected == self._token or expected in ['id', 'number']: self._consume(self._token) else: raise TokenError(self._line, self._token, expected) def _skip(self): pass def program(self): if self._token in ['read', 'write', '$$'] or self._is_token_id(): self._stmt_list() self._match('$$') else: raise ParseError(self._line, 'program') def _stmt_list(self): if self._token == '$$': self._skip() elif self._token in ['read', 'write'] or self._is_token_id(): self._line += 1 self._stmt() self._stmt_list() else: raise ParseError(self._line, 'stmt_list') def _stmt(self): if self._token == 'read': self._match('read') self._match('id') elif self._token == 'write': self._match('write') self._expr() elif self._is_token_id(): self._match('id') self._match(':=') self._expr() else: raise ParseError(self._line, 'stmt') def _expr(self): if self._token == '(' or self._is_token_id_or_num(): self._term() self._term_tail() else: raise ParseError(self._line, 'expr') def _term_tail(self): if self._token in ['+', '-']: self._add_op() self._term() self._term_tail() elif self._token in [')', 'read', 'write', '$$'] or self._is_token_id(): self._skip() else: raise ParseError(self._line, 'term_tail') def _term(self): if self._token == '(' or self._is_token_id_or_num(): self._factor() self._factor_tail() else: raise ParseError(self._line, 'term') def _factor_tail(self): if self._token in ['*', '/']: self._mult_op() self._factor() self._factor_tail() elif self._token in ['+', '-', ')', 'read', 'write', '$$'] or self._is_token_id(): self._skip() else: raise ParseError(self._line, 'factor_tail') def _factor(self): if self._token == '(': self._match('(') self._expr() self._match(')') elif self._is_token_id(): self._match('id') elif self._is_token_num(): self._match('number') else: raise ParseError(self._line, 'factor') def _add_op(self): if self._token == '+': self._match('+') elif self._token == '-': self._match('-') else: raise ParseError(self._line, 'add_op') def _mult_op(self): if self._token == '*': self._match('*') elif self._token == '/': self._match('/') else: raise ParseError(self._line, 'mult_op')
def __init__(self): self.table = SymbolTable() self.address = 16
def assemble(self): # # first pass - build the symbol table for labels # parser = Parser(self.source_filename) symbol_table = SymbolTable() # the current instruction instruction = 0 # parse each command while parser.hasMoreCommands(): # advance to the next command parser.advance() # parse the command type and look for symbols command_type = parser.commandType() if command_type == "L": # look for an instruction label symbol symbol = parser.symbol() if symbol not in symbol_table: symbol_table.addEntry(symbol, instruction) else: # increment the instruction count if this was not a label if command_type != "L": instruction += 1 # # second pass - build the symbol table for variables # parser = Parser(self.source_filename) # the memory location for the next variable variable_address = 16 # parse each command while parser.hasMoreCommands(): # advance to the next command parser.advance() # parse the command type and look for symbols command_type = parser.commandType() if command_type == "A": # look for a variable value symbol symbol = parser.symbol() if symbol[0] not in map(str, range(0, 10)): # the symbol is not a number; that is, it is actually a symbol if symbol not in symbol_table: symbol_table.addEntry(symbol, variable_address) variable_address += 1 # # third pass - generate assembly # parser = Parser(self.source_filename) code = Code() # parse all commands while parser.hasMoreCommands(): # advance to the next command parser.advance() command_type = parser.commandType() if command_type == "A": # a command symbol = parser.symbol() if symbol in symbol_table: symbol = symbol_table.getAddress(symbol) symbol_binary = code.decimalToBinary(symbol) self.destination_file.write("0" + symbol_binary + "\n") elif command_type == "C": # c command comp = code.comp(parser.comp()) dest = code.dest(parser.dest()) jump = code.jump(parser.jump()) self.destination_file.write("111" + comp + dest + jump + "\n") elif command_type == "L": # label - do nothing in this stage pass else: # unknown command raise Exception("ERROR: Unknown command type encountered") # close the output file self.destination_file.close()