def Evaluate(self, table): #recupera no do ST, get symbol #evaluate de todos filhos #da o setter nos argumentos #evaluate no ultimo filho #retorna via o nome dec = table.get_value2(self.value)[0] #no func ou sub tipo = table.get_value2(self.value)[1] #tipo do no, func ou sub new_table = SymbolTable(table) init = 0 if tipo == "FUNCAO": new_table.table[self.value] = [ None, dec.children[0].Evaluate(table) ] init = 1 j = 0 for i in range(init, len(dec.children) - 1): dec.children[i].Evaluate(new_table) argument = self.children[j].Evaluate(table) argument_value = argument[0] argument_type = argument[1] parameter_type = dec.children[i].children[1].Evaluate( table) #new_table.get_value(argument_type) if argument_type != parameter_type: raise Exception("tipos errados") j += 1 new_table.set_value(dec.children[i].children[0].value, argument_value) if (j) != len(self.children): raise Exception("quantidade de argumentos distintos") dec.children[-1].Evaluate(new_table) if tipo == "FUNCAO": return new_table.get_value(self.value)
def __init__(self, input_stream, output_file): self.tokenizer = input_stream self.outfile = output_file self.class_name = None self.out_stream = [] self.buffer = [] self.if_count = 0 self.while_count = 0 self.generator = VMWriter(self.out_stream) self.symbol_table = SymbolTable() self.op_table = { '+': 'ADD', '-': 'SUB', '&': 'AND', '|': 'OR', '<': 'LT', '>': 'GT', '=': 'EQ' } self.convert_kind = { 'ARG': 'ARG', 'STATIC': 'STATIC', 'VAR': 'LOCAL', 'FIELD': 'THIS' }
def __init__(self): self.tempCount = 0 self.tempCountBool = 0 self.codeGenerator = CodeGenerator() self.symbolTable = SymbolTable() self.array_length = 0 self.var_stack = [] self.latest_bool = None
def __init__(self, inp_file, out_file): self.tokenizer = Tokenizer(inp_file) self.sym_tbl = SymbolTable() self.vm_writer = VMWriter(out_file) self.out_file = open(out_file, "a") self.current_token = "" self.current_token_type = "" self.curr_token_ptr = -1 self.label_counter = { "if": 0, "while": 0 } self.advance() self.compileClass()
def assemble(fileName): import assemblerParser from assemblerParser import Parser import code import symbolTable from symbolTable import SymbolTable WORD_SIZE = 16 #words are 16 bits long currentDataAddr = 16 file = open(fileName, 'r') parser = Parser(file) table = SymbolTable() #Symbol generating pass counter = 0 while (parser.hasMoreCommands()): parser.advance() if (parser.commandType() == Parser.L_COMMAND): table.addEntry(parser.symbol(), counter) else: counter += 1 newFileName = fileName[:fileName.find(".", 1)] + ".hack" outputFile = open(newFileName, 'w') #Code generating pass file = open(fileName, 'r') parser = Parser(file) while parser.hasMoreCommands(): parser.advance() output = "BLANK" if (parser.commandType() == Parser.C_COMMAND): output = "111" + code.comp(parser.comp()) + code.dest( parser.dest()) + code.jump(parser.jump()) outputFile.write(output + "\n") elif parser.commandType() == Parser.A_COMMAND: symbol = parser.symbol() try: symbol = int(symbol) except: pass if type(symbol) is int: binVal = bin( int(symbol) )[2:] #the slice is because the value will be in the form 0b# so we need to remove the 0b elif table.contains(symbol): binVal = bin(table.getAddress(symbol))[2:] else: table.addEntry(symbol, currentDataAddr) binVal = bin(currentDataAddr)[2:] currentDataAddr += 1 output = "0" * (WORD_SIZE - len(binVal)) + binVal outputFile.write(output + "\n") elif parser.commandType() == Parser.L_COMMAND: pass else: print("Bad Munkey!") print("Original is " + parser.current() + " BIN: " + output + "COMP: " + parser.comp())
def p_oscope(p): '''oscope : LEFTCURLYBRACKET''' main_table.inScope=main_table.prev_inScope+1 main_table.prev_inScope+=1 main_table.outScope+=1 tab = SymbolTable(main_table.outScope) main_table.add_table(tab)
def assemble(fileName): import assemblerParser from assemblerParser import Parser import code import symbolTable from symbolTable import SymbolTable WORD_SIZE = 16 #words are 16 bits long currentDataAddr = 16 file = open(fileName, 'r') parser = Parser(file) table = SymbolTable() #Symbol generating pass counter = 0 while(parser.hasMoreCommands()): parser.advance() if(parser.commandType() == Parser.L_COMMAND): table.addEntry(parser.symbol(), counter) else: counter += 1 newFileName = fileName[:fileName.find(".", 1)] + ".hack" outputFile = open(newFileName, 'w') #Code generating pass file = open(fileName, 'r') parser = Parser(file) while parser.hasMoreCommands(): parser.advance() output = "BLANK" if(parser.commandType() == Parser.C_COMMAND): output = "111" + code.comp(parser.comp()) + code.dest(parser.dest()) + code.jump(parser.jump()) outputFile.write(output + "\n") elif parser.commandType() == Parser.A_COMMAND: symbol = parser.symbol() try: symbol = int(symbol) except: pass if type(symbol) is int: binVal=bin(int(symbol))[2:] #the slice is because the value will be in the form 0b# so we need to remove the 0b elif table.contains(symbol): binVal = bin(table.getAddress(symbol))[2:] else: table.addEntry(symbol, currentDataAddr) binVal = bin(currentDataAddr)[2:] currentDataAddr += 1 output = "0" * (WORD_SIZE - len(binVal)) + binVal outputFile.write(output + "\n") elif parser.commandType() == Parser.L_COMMAND: pass else: print("Bad Munkey!") print("Original is " + parser.current() + " BIN: " + output + "COMP: " + parser.comp())
def main(): test = read_file("input3.txt") try: parser = Parser(test) symbolTable = SymbolTable(None) result = parser.parseProgram() # percorrer_arvore(result) result.Evaluate(symbolTable) print(AssemblyCode.assembly_code) AssemblyCode.writeFile("teste.asm") except ValueError as err: print(err)
def main(): test = read_file("input3.txt") try: parser = Parser(test) result = parser.parseProgram() symbolTable = SymbolTable(None) result.Evaluate(symbolTable) except ValueError as err: print(err)
def Evaluate(self, SymbolTable, whileFlag=0): func_name = self.value func_node = SymbolTable.getSymbol(func_name, "func").getValue() funcSymbolTable = SymbolTableClass(SymbolTable) var_dec = func_node.children[0] args = [x.children[0] for x in var_dec.children] func_node.children[0].Evaluate(funcSymbolTable, whileFlag) if (len(args) != len(self.children)): raise ValueError("Number of arguments must \ be the same as declaration") for i in range(len(args)): symbol = args[i].Evaluate(funcSymbolTable, whileFlag).getValue() symbol_type = funcSymbolTable.getSymbol(symbol).getType() value_obj = self.children[i].Evaluate(SymbolTable, whileFlag) if (symbol_type != value_obj.getType()): raise ValueError("Function argument must be \ the same as declared") value = value_obj.getValue() funcSymbolTable.setSymbol(symbol, value) for i in range(1, len(func_node.children)): func_node.children[i].Evaluate(funcSymbolTable, whileFlag) result = funcSymbolTable.getSymbol(func_name) return result
def main(*args): scanner = Scanner() symbolTable = SymbolTable() while True: # Get the next token from scanner token = scanner.nextToken() # Pretty print the token token.__repr__() if token.TokenCode == 'tc_ID': # Check if token exists in SymbolTable entry = symbolTable.lookup(token.DataValue[0].lower()) if entry == -1: # -1 means not found in table # Entry does not exist -> add it! num = symbolTable.insert(token.DataValue[0].lower()) # Associate the token with the entry token.setSymTabEntry(num) else: # Token exists: # Associate the token with the entry token.setSymTabEntry(entry) elif token.TokenCode == 'tc_NUMBER': # Same as for entry .. entry = symbolTable.lookup(token.DataValue[0].lower()) if entry == -1: num = symbolTable.insert(token.DataValue[0].lower()) token.setSymTabEntry(num) else: token.setSymTabEntry(entry) elif token.TokenCode == 'tc_EOF': # Reached end of input -> quit loop! break # Pretty print our table symbolTable.__repr__()
def main(*args): scanner = Scanner() symbolTable = SymbolTable() while True: # Get the next token from scanner token = scanner.nextToken() # Pretty print the token token.__repr__() if token.TokenCode == 'tc_ID': # Check if token exists in SymbolTable entry = symbolTable.lookup(token.DataValue[0].lower()) if entry == -1 : # -1 means not found in table # Entry does not exist -> add it! num = symbolTable.insert(token.DataValue[0].lower()) # Associate the token with the entry token.setSymTabEntry(num) else: # Token exists: # Associate the token with the entry token.setSymTabEntry(entry) elif token.TokenCode == 'tc_NUMBER': # Same as for entry .. entry = symbolTable.lookup(token.DataValue[0].lower()) if entry == -1: num = symbolTable.insert(token.DataValue[0].lower()) token.setSymTabEntry(num) else: token.setSymTabEntry(entry) elif token.TokenCode == 'tc_EOF': # Reached end of input -> quit loop! break # Pretty print our table symbolTable.__repr__()
class CompilationEngine: """Creates an AST of the input file. """ def __init__(self, input_stream, output_file): self.tokenizer = input_stream self.outfile = output_file self.class_name = None self.out_stream = [] self.buffer = [] self.if_count = 0 self.while_count = 0 self.generator = VMWriter(self.out_stream) self.symbol_table = SymbolTable() self.op_table = { '+': 'ADD', '-': 'SUB', '&': 'AND', '|': 'OR', '<': 'LT', '>': 'GT', '=': 'EQ' } self.convert_kind = { 'ARG': 'ARG', 'STATIC': 'STATIC', 'VAR': 'LOCAL', 'FIELD': 'THIS' } def compile_class(self): """Compiles a Jack class to VM file. Raises: SyntaxError: If the current token is not expected, a SyntaxError \ is raised. Returns: list: Output stream containing the commands """ tk = self.tokenizer tk.advance() # "class" self.class_name = tk.curr_token tk.advance() tk.advance() # "{" while tk.curr_token in ('static', 'field'): self.compile_class_var_dec() while tk.curr_token in ('constructor', 'function', 'method'): self.compile_subroutine() if tk.curr_token != '}': raise SyntaxError('} expected at end.') with open(self.outfile, 'w') as f: f.write('\n'.join(self.out_stream)) def compile_class_var_dec(self): """Compiles the Jack class variable declaration(s). Raises: SyntaxError: When the programmer is idiot. """ tk = self.tokenizer cat = tk.curr_token.upper() tk.advance() # "static" or "field" # variable type _type = tk.curr_token tk.advance() # Check if variable name is a valid identifier if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifier'.format( tk.curr_token)) self.symbol_table.define(tk.curr_token, _type, cat) tk.advance() while tk.curr_token != ';': tk.advance() # "," if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifer.'.format( tk.curr_token)) self.symbol_table.define(tk.curr_token, _type, cat) tk.advance() tk.advance() # ";" def compile_subroutine(self): """Compiles a Jack subroutine. Raises: SyntaxError: When unexpected input is given. """ tk = self.tokenizer self.symbol_table.reset() subroutine_type = tk.curr_token if subroutine_type == 'method': self.symbol_table.define('this', self.class_name, 'ARG') tk.advance() tk.advance() # ("void" | type) if tk.token_type() != 'IDENTIFIER': raise SyntaxError( "Subroutine name ({}) not a valid identifier".format( tk.curr_token)) func_name = "{}.{}".format(self.class_name, tk.curr_token) tk.advance() tk.advance() # "(" self.compile_parameter_list() tk.advance() # ")" tk.advance() # "{" while 'var' == tk.curr_token: self.compile_var_dec() n_args = self.symbol_table.var_count('VAR') self.generator.write_function(func_name, n_args) if subroutine_type == 'constructor': n_fields = self.symbol_table.var_count('FIELD') self.generator.write_push_pop('push', 'CONST', n_fields) self.generator.write_call('Memory.alloc', 1) self.generator.write_push_pop('pop', 'POINTER', 0) elif subroutine_type == 'method': self.generator.write_push_pop('push', 'ARG', 0) self.generator.write_push_pop('pop', 'POINTER', 0) self.compile_statements() tk.advance() # "}" def compile_parameter_list(self): """Compiles parameter list for a Jack subroutine. Raises: SyntaxError: When unexpected input is given. """ tk = self.tokenizer cat = 'ARG' if tk.curr_token == ')': return _type = tk.curr_token tk.advance() if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifier'.format( tk.curr_token)) self.symbol_table.define(tk.curr_token, _type, cat) tk.advance() while tk.curr_token != ')': tk.advance() # "," _type = tk.curr_token tk.advance() if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifer.'.format( tk.curr_token)) self.symbol_table.define(tk.curr_token, _type, cat) tk.advance() def compile_var_dec(self): """Compiles Jack variable declaration(s). Raises: SyntaxError: When unexpected input is provided. """ tk = self.tokenizer tk.advance() cat = 'VAR' _type = tk.curr_token tk.advance() if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifer.'.format( tk.curr_token)) self.symbol_table.define(tk.curr_token, _type, cat) tk.advance() while tk.curr_token != ';': tk.advance() # "," if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifer.'.format( tk.curr_token)) self.symbol_table.define(tk.curr_token, _type, cat) tk.advance() tk.advance() # ";" def compile_statements(self): """Compiles a Jack if/while/do/let/return statement. """ tk = self.tokenizer func_to_call = { 'if': self.compile_if_statement, 'let': self.compile_let_statement, 'do': self.compile_do_statement, 'while': self.compile_while_statement, 'return': self.compile_return_statement } while tk.curr_token in ('if', 'while', 'let', 'do', 'return'): f = func_to_call.get(tk.curr_token) f() def compile_let_statement(self): """Compiles a Jack "let" statement. Raises: SyntaxError: Unexpected input """ tk = self.tokenizer tk.advance() # "let" if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid Jack identifer.'.format( tk.curr_token)) _type, cat, i = self.symbol_table.get(tk.curr_token) cat = self.convert_kind[cat] tk.advance() if tk.curr_token == '[': # array assignment tk.advance() # [ self.compile_expression() tk.advance() # ] self.generator.write_push_pop('push', cat, i) self.generator.write_arithmetic('ADD') self.generator.write_push_pop('pop', 'TEMP', 0) tk.advance() # = self.compile_expression() self.generator.write_push_pop('push', 'TEMP', 0) self.generator.write_push_pop('pop', 'POINTER', 1) self.generator.write_push_pop('pop', 'THAT', 0) else: tk.advance() # = self.compile_expression() self.generator.write_push_pop('pop', cat, i) tk.advance() # ";" def compile_if_statement(self): """Compiles a Jack "if" statement. """ tk = self.tokenizer tk.advance() # "if" tk.advance() # "(" self.compile_expression() tk.advance() # ")" l1 = "IF_TRUE{}".format(self.if_count) l2 = "IF_FALSE{}".format(self.if_count) l3 = "IF_END{}".format(self.if_count) self.generator.write_ifgoto(l1) self.generator.write_goto(l2) self.generator.write_label(l1) self.if_count += 1 tk.advance() # "{" self.compile_statements() self.generator.write_goto(l3) tk.advance() # "}" self.generator.write_label(l2) if tk.curr_token == 'else': tk.advance() # "else" tk.advance() # "{" self.compile_statements() tk.advance() # "}" self.generator.write_label(l3) def compile_while_statement(self): """Compiles a Jack "while" statement. """ tk = self.tokenizer tk.advance() # "while" l1 = "WHILE_EXP{}".format(self.while_count) l2 = "WHILE_END{}".format(self.while_count) self.while_count += 1 self.generator.write_label(l1) tk.advance() # "(" self.compile_expression() self.generator.write_arithmetic("NOT") tk.advance() # ")" tk.advance() # "{" self.generator.write_ifgoto(l2) self.compile_statements() self.generator.write_goto(l1) self.generator.write_label(l2) tk.advance() # "}" def compile_do_statement(self): """Compiles a Jack "do" statement. Raises: SyntaxError: Unexpected input """ tk = self.tokenizer tk.advance() # "do" if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a proper identifier.'.format( tk.curr_token)) var_name = tk.curr_token tk.advance() self.compile_subroutine_call(var_name) self.generator.write_push_pop('pop', 'TEMP', 0) # void method tk.advance() # ";" def compile_return_statement(self): """Compiles a Jack "return" statement. """ tk = self.tokenizer tk.advance() # "return" if tk.curr_token != ';': self.compile_expression() else: # if no val to return, push 0 to stack self.generator.write_push_pop('push', 'CONST', 0) self.generator.write_return() tk.advance() # ";" def compile_expression_list(self): """Compiles a Jack expression list. Returns: n_args (int): Number of arguments for subroutine call Raises: SyntaxError: Unexpected input """ tk = self.tokenizer n_args = 0 if tk.curr_token == ')': return n_args self.compile_expression() n_args += 1 while tk.curr_token != ')': tk.advance() # "," self.compile_expression() n_args += 1 return n_args def compile_expression(self): """Compiles a Jack expression. """ tk = self.tokenizer self.compile_term() while tk.curr_token in ('+', '-', '*', '/', '&', '|', '<', '>', '='): op = tk.curr_token tk.advance() self.compile_term() if op in self.op_table: self.generator.write_arithmetic(self.op_table.get(op)) elif op == '*': self.generator.write_call('Math.multiply', 2) elif op == '/': self.generator.write_call('Math.divide', 2) else: raise ValueError("{} not supported op.".format(op)) def compile_term(self): """Compiles a Jack term. Raises: SyntaxError: Unexpected input """ tk = self.tokenizer if tk.token_type() == 'STRING_CONST': self.compile_string() elif tk.token_type() == 'INT_CONST': self.generator.write_push_pop('push', 'CONST', int(tk.curr_token)) tk.advance() elif tk.curr_token in ('true', 'false', 'null'): self.generator.write_push_pop('push', 'CONST', 0) if tk.curr_token == 'true': self.generator.write_arithmetic("NOT") tk.advance() elif tk.curr_token == 'this': # "this" is the 0th argument self.generator.write_push_pop('push', 'POINTER', 0) tk.advance() elif tk.curr_token in ('-', '~'): op = tk.curr_token tk.advance() self.compile_term() if op == '-': self.generator.write_arithmetic('NEG') else: self.generator.write_arithmetic('NOT') elif tk.curr_token == '(': tk.advance() # "(" self.compile_expression() tk.advance() # ")" else: if tk.token_type() != 'IDENTIFIER': raise SyntaxError('{} is not a valid identifier.'.format( tk.curr_token)) var_name = tk.curr_token tk.advance() if tk.curr_token == '[': tk.advance() # "[" self.compile_expression() tk.advance() # "]" _type, cat, i = self.symbol_table.get(var_name) cat = self.convert_kind[cat] self.generator.write_push_pop('push', cat, i) self.generator.write_arithmetic('ADD') self.generator.write_push_pop('pop', 'POINTER', 1) self.generator.write_push_pop('push', 'THAT', 0) elif tk.curr_token in ('.', '('): self.compile_subroutine_call(var_name) else: _type, cat, i = self.symbol_table.get(var_name) cat = self.convert_kind[cat] self.generator.write_push_pop('push', cat, i) def compile_subroutine_call(self, var_name): tk = self.tokenizer func_name = var_name n_args = 0 if tk.curr_token == '.': tk.advance() # "." sub_name = tk.curr_token # subroutine name tk.advance() _type, cat, i = self.symbol_table.get(var_name) if _type != None: # it's an instance cat = self.convert_kind[cat] self.generator.write_push_pop('push', cat, i) func_name = "{}.{}".format(_type, sub_name) n_args += 1 else: # it's a class func_name = "{}.{}".format(var_name, sub_name) elif tk.curr_token == '(': sub_name = var_name func_name = "{}.{}".format(self.class_name, sub_name) n_args += 1 self.generator.write_push_pop('push', 'POINTER', 0) tk.advance() # "(" n_args += self.compile_expression_list() tk.advance() # ")" self.generator.write_call(func_name, n_args) def compile_string(self): tk = self.tokenizer string = tk.curr_token[1:] self.generator.write_push_pop('push', 'CONST', len(string)) self.generator.write_call('String.new', 1) for char in string: self.generator.write_push_pop('push', 'CONST', ord(char)) self.generator.write_call('String.appendChar', 2) tk.advance()
t_DEREF_ONE =r'\.' t_DEREF_TWO =r'–>' t_QUES_MARK = r'\?' t_COMMA = r',' t_CHAR_CONST = r"\'.\'" t_DOLAR='@' def t_COMMENT(t): r"(/\*([^*]|[\r\n]|(\*+([^*/]|[\r\n])))*\*+/)|(//.*)" pass # No return value. Token discarded # A regular expression rule with some action code main_table = MainSymbolTable() main_table.add_table(SymbolTable(main_table.outScope)) def t_FLOATNUM(t): r'[0-9]+\.[0-9]+' t.value = float(t.value) return t def t_INTNUM(t): r'[0-9]+' t.value = int(t.value) return t # Define a rule so we can track line numbers def t_newline(t):
from semanticCube import SemanticCube from symbolTable import SymbolTable, Variable from quad import Quad import condHelpers import helpers quadruple = Quad.instantiate() symbolTable = SymbolTable.instantiate() varPointer = None tempCounter = 0 # getPointingScope # What: Gets the pointer to either global or class memory # Parameters: The current Scope (function or class) # Returns the Scope from the sent function or class # When is it used: To get the Scope for the quad to be saved def getPointingScope(currentScope): if currentScope.getContext() == 'classFunction': return symbolTable.getGlobalScope().getScopeClasses()[symbolTable.getStack()] else: return symbolTable.getGlobalScope() # check_multdiv_operator # What: Gets and checks left and right operators for multiplication and division # Parameters: A quadruple object # When is it used: When PLY reads a multiplication or division def check_multdiv_operator(quadruple): workingStack = quadruple.getWorkingStack() if workingStack: if workingStack[-1] == '*' or workingStack[-1] == '/':
from parser import Parser from node import Node from symbolTable import SymbolTable import sys if len(sys.argv) == 1: raise Exception("didn't pass the filename") file = sys.argv[1] f = open(file, "r") code = f.read() tree = Parser.run(code) table = SymbolTable(None) tree.Evaluate(table)
import myLanguageSpecs from programInternalForm import ProgramInternalForm from symbolTable import SymbolTable import re st = SymbolTable() pif = ProgramInternalForm() filename = 'input.txt' # def detectToken() # define token -> keyword, operator, separator (addToPif(code_of_token, -1) # -> identifier (pos = pos(token, symTable), addToPif(token, st)) # -> constant (pos + addToPif(code_of_constant, pos)) def isSeparator(char): return char in myLanguageSpecs.separators def isOperator(char): return char in myLanguageSpecs.operators def isReserved(word): return word in myLanguageSpecs.reservedWords def isIdentifier(word): return re.match(r'^[a-zA-Z]([a-zA-Z]|[0-9]){0,8}$', word) is not None
class CompilationEngine: def __init__(self, inp_file, out_file): self.tokenizer = Tokenizer(inp_file) self.sym_tbl = SymbolTable() self.vm_writer = VMWriter(out_file) self.out_file = open(out_file, "a") self.current_token = "" self.current_token_type = "" self.curr_token_ptr = -1 self.label_counter = { "if": 0, "while": 0 } self.advance() self.compileClass() def appendToOutFile(self, content): # self.out_file.write(content) pass def xmlify(self, tag=None, content=None): if tag == None: tag = self.current_token_type if content == None: content = self.current_token html_alternate = { "<": "<", ">": ">", '"': """, "&": "&" } if content in ("<", ">", '"', "&"): content = html_alternate[content] # self.appendToOutFile(f"<{tag}> {content} </{tag}>\n") def compileClass(self): # self.appendToOutFile("<class>\n") self.eat("class") self.class_name = self.current_token # self.xmlify() self.advance() self.eat("{") self.compileClassVarDec() self.compileSubroutineDec() self.eat("}") # self.appendToOutFile("</class>") def compileClassVarDec(self): while self.current_token in ("field", "static"): var_kind = self.current_token if self.current_token == "field": var_kind = "this" # self.appendToOutFile("<classVarDec>\n") # self.xmlify() self.advance() var_type = self.current_token # self.xmlify() # print(var_type) self.advance() var_name = self.current_token # self.xmlify() # print(var_name) self.advance() # print(var_kind, var_type, var_name) self.sym_tbl.define(var_name, var_type, var_kind) while self.current_token == ",": self.eat(",") var_name_cont = self.current_token # self.xmlify() # print(var_kind, var_type, var_name_cont) self.sym_tbl.define(var_name_cont, var_type, var_kind) self.advance() self.eat(";") # self.appendToOutFile("</classVarDec>\n") # print(self.sym_tbl.class_table) def compileSubroutineDec(self): while self.current_token in ("constructor", "function", "method"): # self.appendToOutFile("<subroutineDec>\n") subroutine = "" subroutine_name = "" if self.current_token == "constructor": self.eat("constructor") self.eat(self.class_name) self.eat("new") subroutine = "constructor" subroutine_name = "new" elif self.current_token in ("function", "method"): subroutine = self.current_token # self.xmlify() self.advance() subroutine_type = self.current_token # self.xmlify() # print(subroutine_type) self.advance() subroutine_name = self.current_token # self.xmlify() # print(subroutine_name) self.advance() self.eat("(") self.compileParamList(subroutine) self.eat(")") self.compileSubroutineBody(subroutine, subroutine_name) # self.appendToOutFile("</subroutineDec>\n") def compileParamList(self, subroutine): # self.appendToOutFile("<parameterList>\n") self.sym_tbl.startSubroutine() if subroutine == "method": self.sym_tbl.define("this", self.class_name, "argument") if self.current_token != ")": param_type = self.current_token # self.xmlify() # print(param_type) self.advance() param_name = self.current_token # self.xmlify() # print(param_name) self.advance() self.sym_tbl.define(param_name, param_type, "argument") while self.current_token == ",": self.eat(",") param_type_cont = self.current_token # self.xmlify() # print(param_type_cont) self.advance() param_name_cont = self.current_token # self.xmlify() # print(param_name_cont) self.advance() self.sym_tbl.define( param_name_cont, param_type_cont, "argument") # self.appendToOutFile("</parameterList>\n") def compileSubroutineBody(self, subroutine, subroutine_name): # self.appendToOutFile("<subroutineBody>\n") self.eat("{") while self.current_token == "var": self.compilevarDec() func_name = f"{self.class_name}.{subroutine_name}" print(func_name) vars = self.sym_tbl.varCount("local") self.vm_writer.writeFunction(func_name, vars) if subroutine == "constructor": fields = self.sym_tbl.varCount("this") self.vm_writer.writePushPop("push", "constant", fields) self.vm_writer.writeCall("Memory.alloc", 1) self.vm_writer.writePushPop("pop", "pointer", 0) elif subroutine == "method": self.vm_writer.writePushPop("push", "argument", 0) self.vm_writer.writePushPop("pop", "pointer", 0) self.compileStatements() self.eat("}") # self.appendToOutFile("</subroutineBody>\n") def compilevarDec(self): # self.appendToOutFile("<varDec>\n") self.eat("var") var_type = self.current_token # self.xmlify() # print(var_type) self.advance() var_name = self.current_token # self.xmlify() # print(var_name) self.advance() self.sym_tbl.define(var_name, var_type, "local") while self.current_token == ",": self.eat(",") var_name_cont = self.current_token # self.xmlify() # print(var_name_cont) self.advance() self.sym_tbl.define(var_name_cont, var_type, "local") self.eat(";") # self.appendToOutFile("</varDec>\n") # print(self.sym_tbl.func_table) def compileStatements(self): # self.appendToOutFile("<statements>\n") while self.current_token in ("let", "if", "while", "do", "return"): if self.current_token == "let": # self.appendToOutFile("<letStatement>\n") self.compileLet() # self.appendToOutFile("</letStatement>\n") elif self.current_token == "if": # self.appendToOutFile("<ifStatement>\n") self.compileIf() # self.appendToOutFile("</ifStatement>\n") elif self.current_token == "while": # self.appendToOutFile("<whileStatement>\n") self.compileWhile() # self.appendToOutFile("</whileStatement>\n") elif self.current_token == "do": # self.appendToOutFile("<doStatement>\n") self.compileDo() # self.appendToOutFile("</doStatement>\n") elif self.current_token == "return": # self.appendToOutFile("<returnStatement>\n") self.compileReturn() # self.appendToOutFile("</returnStatement>\n") # self.appendToOutFile("</statements>\n") def compileLet(self): self.eat("let") var_name = self.current_token # self.xmlify() # print(var_name) (_type, kind, index) = self.sym_tbl.getVariable(var_name) self.advance() if self.current_token == "[": self.eat("[") self.compileExpression() self.eat("]") self.vm_writer.writePushPop("push", kind, index) self.vm_writer.writeArithmetic("add") self.vm_writer.writePushPop("pop", "temp", 0) self.eat("=") self.compileExpression() self.vm_writer.writePushPop("push", "temp", 0) self.vm_writer.writePushPop("pop", "pointer", 1) self.vm_writer.writePushPop("pop", "that", 0) else: self.eat("=") self.compileExpression() self.vm_writer.writePushPop("pop", kind, index) self.eat(";") def compileIf(self): self.eat("if") self.eat("(") self.compileExpression() self.eat(")") label_true = f"IF_TRUE{self.label_counter['if']}" label_false = f"IF_FALSE{self.label_counter['if']}" label_end = f"IF_END{self.label_counter['if']}" self.label_counter["if"] += 1 self.vm_writer.writeIf(label_true) self.vm_writer.writeGoto(label_false) self.vm_writer.writeLabel(label_true) self.eat("{") self.compileStatements() self.vm_writer.writeGoto(label_end) self.eat("}") self.vm_writer.writeLabel(label_false) if self.current_token == "else": self.eat("else") # if self.current_token == "if": # self.eat("if") # self.eat("(") # self.compileExpression() # self.eat(")") self.eat("{") self.compileStatements() self.eat("}") self.vm_writer.writeLabel(label_end) def compileWhile(self): label_while = f"WHILE_EXP{self.label_counter['while']}" label_end = f"WHILE_END{self.label_counter['while']}" self.label_counter['while'] += 1 self.eat("while") self.vm_writer.writeLabel(label_while) self.eat("(") self.compileExpression() self.vm_writer.writeArithmetic("not") self.vm_writer.writeIf(label_end) self.eat(")") self.eat("{") self.compileStatements() self.vm_writer.writeGoto(label_while) self.eat("}") self.vm_writer.writeLabel(label_end) def compileDo(self): self.eat("do") func_name = self.current_token # self.xmlify() # print(name1) self.advance() if self.current_token == ".": self.eat(".") name2 = self.current_token # method_name func_name = f"{func_name}.{name2}" # self.xmlify() # print(name2) self.advance() self.handleSubroutineCall(func_name) self.vm_writer.writePushPop("pop", "temp", 0) self.eat(";") def compileReturn(self): self.eat("return") if self.current_token != ";": self.compileExpression() else: self.vm_writer.writePushPop("push", "constant", 0) self.vm_writer.writeReturn() self.eat(";") def compileExpression(self): op_table = { '+': 'add', '-': 'sub', '&': 'and', '|': 'or', '<': 'lt', '>': 'gt', '=': 'eq' } exp = "" # self.appendToOutFile("<expression>\n") term = self.compileTerm() exp = exp + str(term) while self.current_token in ("+", "-", "*", "/", "&", "|", "<", ">", "="): op = self.current_token # self.xmlify() self.advance() term_cont = self.compileTerm() if op in op_table: self.vm_writer.writeArithmetic(op_table[op]) elif op == "*": self.vm_writer.writeCall("Math.multiply", 2) elif op == "/": self.vm_writer.writeCall("Math.divide", 2) else: raise SyntaxError("Invalid Operator") exp = exp + f" {op} {term_cont}" # self.appendToOutFile("</expression>\n") return exp def compileExpressionList(self): args = 0 # self.appendToOutFile("<expressionList>\n") if self.current_token != ")": self.compileExpression() args += 1 while self.current_token == ",": self.eat(",") self.compileExpression() args += 1 # self.appendToOutFile("</expressionList>\n") return args def compileTerm(self): full_term = "" # self.appendToOutFile("<term>\n") if self.current_token_type in ("identifier", "stringConstant", "integerConstant") or self.current_token in ("true", "false", "null", "this"): term = self.current_token # self.xmlify() # print(term) if self.current_token_type == "stringConstant": self.compileString(term) elif self.current_token_type == "integerConstant": self.vm_writer.writePushPop("push", "constant", term) print(f"push constant {term}") elif self.current_token in ("true", "false", "null"): self.vm_writer.writePushPop("push", "constant", 0) if self.current_token == "true": self.vm_writer.writeArithmetic("not") elif self.current_token == "this": self.vm_writer.writePushPop("push", "pointer", 0) full_term = str(term) if self.current_token_type == "identifier": # print(term) (_type, kind, index) = self.sym_tbl.getVariable(self.current_token) args = 0 self.advance() if self.current_token == "[": self.eat("[") exp = self.compileExpression() self.eat("]") full_term = full_term + f"[{exp}]" elif self.current_token == ".": self.eat(".") name2 = self.current_token # method_name # self.xmlify() full_term = full_term + f".{name2}" # print(name2) self.advance() if self.current_token == "(": full_term = full_term + "()" if "[" in full_term: self.vm_writer.writePushPop("push", kind, index) self.vm_writer.writeArithmetic("add") self.vm_writer.writePushPop("pop", "pointer", 1) self.vm_writer.writePushPop("push", "that", 0) elif "(" in full_term: self.handleSubroutineCall(full_term) else: self.vm_writer.writePushPop("push", kind, index) else: self.advance() elif self.current_token == "(": self.eat("(") exp = self.compileExpression() self.eat(")") full_term = full_term + f"({exp})" elif self.current_token in ("-", "~"): uop = self.current_token # self.xmlify() # print(uop) self.advance() term_cont = self.compileTerm() if uop == "-": self.vm_writer.writeArithmetic("neg") else: self.vm_writer.writeArithmetic("not") full_term = full_term + f"{uop}{term_cont}" # self.appendToOutFile("</term>\n") return full_term def handleSubroutineCall(self, func_name): args = 0 if "(" in func_name: func_name = func_name[0:-2] if "." not in func_name: func_name = f"{self.class_name}.{func_name}" args += 1 self.vm_writer.writePushPop("push", "pointer", 0) # print("pointer 0") if "." in func_name: c_name = func_name.split(".")[0] s_name = func_name.split(".")[1] (_type, kind, index) = self.sym_tbl.getVariable(c_name) if _type != None: self.vm_writer.writePushPop("push", kind, index) # print(f"push {kind} {index}") func_name = f"{_type}.{s_name}" args += 1 self.eat("(") args += self.compileExpressionList() self.eat(")") print(func_name, args) self.vm_writer.writeCall(func_name, args) def compileString(self, string): self.vm_writer.writePushPop("push", "constant", len(string)) self.vm_writer.writeCall("String.new", 1) for char in string: self.vm_writer.writePushPop("push", "constant", ord(char)) self.vm_writer.writeCall("String.appendChar", 2) def eat(self, string): if self.current_token != string: raise SyntaxError( f"Expected {string} in place of {self.current_token}") # self.xmlify() self.advance() def advance(self): if self.tokenizer.advance(): (token, token_type) = self.tokenizer.tokenWithType() html_alternate = { "<": "<", ">": ">", """: '"', "&": "&" } if token in ("<", ">", """, "&"): token = html_alternate[token] self.current_token = token self.current_token_type = token_type self.curr_token_ptr += 1 return True return False
output = '\n'.join(compiler.output_line_list) + '\n' self.path_w = f'{self.p_file.parent}/{self.p_file.stem}.vm' with open(self.path_w, mode='w') as f: f.write(output) if __name__ == '__main__': path: str = sys.argv[1] p_path: pathlib.Path = pathlib.Path(path) p_file_list: 'list[pathlib.Path]' = [] if p_path.is_dir(): p_file_list = list(p_path.glob('**/*.jack')) else: p_file_list = [p_path] symbol_table = SymbolTable() # for p_file in p_file_list: # print(p_file) # compiler = CompilationEngine(symbol_table, p_file) # compiler.compile(is_first_run=True) # print(compiler.symbol_table.subroutine_dic) # print(compiler.symbol_table.class_scope) for p_file in p_file_list: print(p_file) compiler = CompilationEngine(symbol_table, p_file) compiler.compile() compiler.saveToFile()
from parser import Parser from node import Node from symbolTable import SymbolTable from writer import * import sys if len(sys.argv) == 1: raise Exception("didn't pass the filename") file = sys.argv[1] f = open(file, "r") code = f.read() tree = Parser.run(code) table = SymbolTable() tree.Evaluate(table) Writer.file()
class Parser: tokens = Lexer().tokens def __init__(self): self.tempCount = 0 self.tempCountBool = 0 self.codeGenerator = CodeGenerator() self.symbolTable = SymbolTable() self.array_length = 0 self.var_stack = [] self.latest_bool = None def p_program(self, p): """program : declist MAIN LRB RRB block""" print("program : declist MAIN LRB RRB block") self.symbolTable.end() self.symbolTable.print_symbolTable() self.codeGenerator.end() print(NonTerminal.nonTerminals_list) def p_program_simple(self, p): """program : MAIN LRB RRB block""" print("program : MAIN LRB RRB block") self.symbolTable.end() self.symbolTable.print_symbolTable() self.codeGenerator.end() print(NonTerminal.nonTerminals_list) def p_declist_dec(self, p): """declist : dec""" print("declist : dec") def p_declist_declist(self, p): """declist : declist dec""" print("declist : declist dec") def p_dec_vardec(self, p): """dec : vardec""" print("dec : vardec") def p_dec_funcdec(self, p): """dec : funcdec""" print("dec : funcdec") def p_type_integer(self, p): """type : INTEGER""" print("type : INTEGER") self.var_stack.clear() self.symbolTable.set_var_type('Int') def p_type_float(self, p): """type : FLOAT""" print("type : FLOAT") self.var_stack.clear() self.symbolTable.set_var_type('Float') def p_type_boolean(self, p): """type : BOOLEAN""" print("type : BOOLEAN") self.var_stack.clear() self.symbolTable.set_var_type('Boolean') def p_iddec_lvalue(self, p): """iddec : lvalue""" print("iddec : lvalue") def p_iddec_lvalue_assign(self, p): """iddec : lvalue ASSIGN exp""" print("iddec : lvalue ASSIGN exp") print(p[1]) print(p[3]) place = self.new_temp() exp_place = '' if p[3] is None: exp_place = self.find_place() self.symbolTable.add_place(self.var_stack.pop(), place) self.codeGenerator.assign(p, place, exp_place=exp_place) def p_idlist_iddec(self, p): """idlist : iddec""" print("idlist : iddec") def p_idlist_idlist(self, p): """idlist : idlist COMMA iddec""" print("idlist : idlist COMMA iddec") def p_vardec_idlist(self, p): """vardec : idlist COLON type SEMICOLON""" print("vardec : idlist COLON type SEMICOLON") def p_funcdec_type_block(self, p): """funcdec : FUNCTION ID LRB paramdecs RRB COLON type block""" print("funcdec : FUNCTION ID LRB paramdecs RRB COLON type block") self.symbolTable.new_scope_end(p[2]) def p_funcdec_block(self, p): """funcdec : FUNCTION ID LRB paramdecs RRB block""" print("funcdec : FUNCTION ID LRB paramdecs RRB block") self.symbolTable.new_scope_end(p[2]) def p_paramdecs_paramdecslist(self, p): """paramdecs : paramdecslist""" print("paramdecs : paramdecslist") def p_paramdecs_lambda(self, p): """paramdecs : """ print("paramdecs : ") def p_paramdecslist_paramdec(self, p): """paramdecslist : paramdec""" print("paramdecslist : paramdec") self.symbolTable.new_scope_begin() def p_paramdecslist_paramdecslist(self, p): """paramdecslist : paramdecslist COMMA paramdec""" print("paramdecslist : paramdecslist COMMA paramdec") self.symbolTable.new_scope_begin() def p_paramdec_id(self, p): """paramdec : ID COLON type""" print("paramdec : ID COLON type") self.symbolTable.add_param_var(p[1]) # TODO def p_paramdec_id_array(self, p): """paramdec : ID LSB RSB COLON type""" print("paramdec : ID LSB RSB COLON type") def p_block_stmtlist(self, p): """block : LCB stmtlist RCB""" print("block : LCB stmtlist RCB") def p_stmtlist_stmt(self, p): """stmtlist : stmt""" print("stmtlist : stmt") def p_stmtlist_stmtlist(self, p): """stmtlist : stmtlist stmt""" print("stmtlist : stmtlist") def p_lvalue_id(self, p): """lvalue : ID""" print("lvalue : ID") print(p[1]) self.var_stack.append(p[1]) if not self.symbolTable.already_defined(p[1]): self.symbolTable.add_variable(p[1]) self.array_length = self.find_place(p[1]) def p_lvalue_id_array(self, p): """lvalue : ID array""" print("lvalue : ID array") index = self.array_length if type(index) == str: index = NonTerminal.nonTerminals_list[index] name = p[1] + '[' + str(index) + ']' print("!!!!!!!!!!!!!") else: print("@@@@@@@@@@") name = p[1] + '[' + str(index) + ']' print(name) self.var_stack.append(name) if not self.symbolTable.already_defined(p[1] + '[' + str(0) + ']'): self.symbolTable.add_array(p[1], index) def p_array(self, p): """array : LSB exp RSB""" print("array : LSB exp RSB") def p_case_where(self, p): """case : WHERE exp COLON stmtlist""" print("case : WHERE exp COLON stmtlist") def p_cases_case(self, p): """cases : case""" print("cases : case") def p_cases_cases(self, p): """cases : cases case""" print("cases : cases case") def p_stmt_return(self, p): """stmt : RETURN exp SEMICOLON""" print("stmt : RETURN exp SEMICOLON") def p_stmt_exp(self, p): """stmt : exp SEMICOLON""" print("stmt : exp SEMICOLON") self.symbolTable.stack.clear() def p_stmt_block(self, p): """stmt : block""" print("stmt : block") def p_stmt_vardec(self, p): """stmt : vardec""" print("stmt : vardec") def p_stmt_while(self, p): """stmt : WHILE LRB exp RRB stmt""" print("stmt : WHILE LRB exp RRB stmt") def p_stmt_on(self, p): """stmt : ON LRB exp RRB LCB cases RCB SEMICOLON""" print("stmt : ON LRB exp RRB LCB cases RCB SEMICOLON") def p_stmt_for_exp(self, p): """stmt : FOR LRB exp SEMICOLON exp SEMICOLON exp RRB stmt""" print("stmt : FOR LRB exp SEMICOLON exp SEMICOLON exp RRB stmt") def p_stmt_for_id(self, p): """stmt : FOR LRB ID IN ID RRB stmt""" print("stmt : FOR LRB ID IN ID RRB stmt") def p_stmt_print(self, p): """stmt : PRINT LRB ID RRB SEMICOLON""" print("stmt : PRINT LRB ID RRB SEMICOLON") self.codeGenerator.print(self.find_place(name=p[3])) def p_stmt_IF(self, p): """stmt : IF LRB exp RRB stmt elseiflist elsestmt""" print("stmt : IF LRB exp RRB stmt elseiflist elsestmt") def p_elsestmt(self, p): """elsestmt : ELSE stmt""" print("elsestmt : ELSE stmt") def p_elsestmt_Lambda(self, p): """elsestmt : %prec IF""" print("elsestmt : ") def p_elseiflist(self, p): """elseiflist : elseiflist ELSEIF LRB exp RRB stmt""" print("""elseiflist : elseiflist ELSEIF LRB exp RRB stmt""") def p_elseiflist_Lambda(self, p): """elseiflist : """ print("elseiflist : ") def p_exp_lvalue_assign(self, p): """exp : lvalue ASSIGN exp""" print("exp : lvalue ASSIGN exp") place = self.new_temp() exp_place = '' if p[3] is None: exp_place = self.find_place() self.symbolTable.add_place(self.var_stack.pop(), place) self.codeGenerator.assign(p, place, exp_place=exp_place) def p_exp_lvalue(self, p): """exp : lvalue""" print("exp : lvalue") def p_exp_id_explist(self, p): """exp : ID LRB explist RRB""" print("exp : ID LRB explist RRB") # TODO care, may cause problem in future def p_exp_parenthesis_exp(self, p): """exp : LRB exp RRB""" print("exp : LRB exp RRB") print(p[2]) p[0] = NonTerminal() if p[2] is None: p[0].place = self.find_place() else: p[0].place = p[2].get_value() def p_exp_id(self, p): """exp : ID LRB RRB""" print("exp : ID LRB RRB") def p_exp_SUB_exp(self, p): """exp : SUB exp""" print("exp : SUB exp") place = self.new_temp() exp_place = '' if p[2] is None: exp_place = self.find_place() self.symbolTable.add_place(self.var_stack.pop(), place) self.codeGenerator.not_assign(p, place, exp_place=exp_place) self.var_stack.append(place) def p_exp_not_exp(self, p): """exp : NOT exp""" print("exp : NOT exp") def p_exp_or(self, p): """exp : exp OR exp""" print("exp : OR") def p_exp_and(self, p): """exp : exp AND exp""" print("exp : exp AND exp") def p_exp_sum(self, p): "exp : exp SUM exp" print("exp : exp SUM exp") # TODO place1 = '' place3 = '' if p[1] is None: place1 = self.find_place() if p[3] is None: place3 = self.find_place() self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place1, place3=place3) self.array_length = p[0].get_value() def p_exp_sub(self, p): "exp : exp SUB exp" print("exp : exp SUB exp") place1 = '' place3 = '' if p[1] is None: place1 = self.find_place() if p[3] is None: place3 = self.find_place() if p[1] is None and p[3] is None: self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place3, place3=place1) else: self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place1, place3=place3) self.array_length = p[0].place def p_exp_mul(self, p): "exp : exp MUL exp" print("exp : exp MUL exp") place1 = '' place3 = '' if p[1] is None: place1 = self.find_place() if p[3] is None: place3 = self.find_place() self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place1, place3=place3) self.array_length = p[0].place def p_exp_div(self, p): "exp : exp DIV exp" print("exp : exp DIV exp") place1 = '' place3 = '' if p[1] is None: place1 = self.find_place() if p[3] is None: place3 = self.find_place() if p[1] is None and p[3] is None: self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place3, place3=place1) else: self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place1, place3=place3) self.array_length = p[0].place def p_exp_mod(self, p): """exp : exp MOD exp""" print("exp : exp MOD exp") place1 = '' place3 = '' if p[1] is None: place1 = self.find_place() if p[3] is None: place3 = self.find_place() if p[1] is None and p[3] is None: self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place3, place3=place1) else: self.codeGenerator.generate_arithmetic_code(p, self.new_temp(), place1=place1, place3=place3) self.array_length = p[0].place def p_exp_gt_exp(self, p): """exp : exp GT exp""" print("exp : exp GT exp") print("*****************") print(p[1]) print(p[2]) print(p[3]) print("*****************") place1 = '' place3 = '' if p[1] is not None and p[1].value == '': place1 = self.latest_bool self.latest_bool = None if p[1] is None: place1 = self.find_place() if p[3] is None: place3 = self.find_place() if p[1] is None and p[3] is None: self.codeGenerator.boolean_expression(p, self.new_bool_temp(), place1=place3, place3=place1) else: self.codeGenerator.boolean_expression(p, self.new_bool_temp(), place1=place1, place3=place3) if p[3] is None: self.latest_bool = place3 else: self.latest_bool = p[3].value def p_exp_lt_exp(self, p): """exp : exp LT exp""" print("exp : exp LT exp") print("*****************") print(p[1]) print(p[2]) print(p[3]) print("*****************") def p_exp_ne_exp(self, p): """exp : exp NE exp""" print("exp : exp NE exp") def p_exp_eq_exp(self, p): """exp : exp EQ exp""" print("exp : exp EQ exp") def p_exp_ge_exp(self, p): """exp : exp GE exp""" print("exp : exp GE exp") def p_exp_le_exp(self, p): """exp : exp LE exp""" print("exp : exp LE exp") def p_exp_int(self, p): """exp : INTEGERNUMBER""" print("exp : INTEGERNUMBER") self.array_length = p[1] p[0] = NonTerminal() p[0].value = str(p[1]) def p_exp_float(self, p): """exp : FLOATNUMBER""" print("exp : FLOATNUMBER") p[0] = NonTerminal() p[0].value = str(int(p[1])) def p_exp_true(self, p): """exp : TRUE""" print("exp : TRUE") p[0] = NonTerminal() p[0].value = '1' def p_exp_false(self, p): """exp : FALSE""" print("exp : FALSE") p[0] = NonTerminal() p[0].value = '0' def p_explist_exp(self, p): """explist : exp""" print("explist : exp") def p_explist_explist(self, p): """explist : explist COMMA exp""" print("explist : explist COMMA exp") precedence = (('right', 'ASSIGN'), ('left', 'OR'), ('left', 'AND'), ('left', 'LT', 'GT', 'NE', 'EQ', 'LE', 'GE'), ('left', 'SUM', 'SUB'), ('left', 'MUL', 'DIV', 'MOD'), ('left', 'NOT'), ('left', 'IF'), ('left', 'ELSEIF', 'ELSE')) def new_temp(self): temp = "T" + str(self.tempCount) self.tempCount += 1 return temp def new_bool_temp(self): temp = "B" + str(self.tempCountBool) self.tempCountBool += 1 return temp def find_place(self, name=''): tmp = name if name == '': tmp = self.var_stack.pop() return self.symbolTable.find_place(tmp) def p_error(self, p): # print(p.value) print(p) raise Exception('ParsingError: invalid grammar at ', p) def build(self, **kwargs): """build the parser""" self.parser = yacc.yacc(module=self, **kwargs) return self.parser
from PIF import ProgramInternalForm from symbolTable import SymbolTable from languageSpecs import * import re st = SymbolTable() pif = ProgramInternalForm() filename = 'input2' pif_filename = 'PIF.out' st_filename = 'ST.out' def tokenize(): result = [] with open(filename) as file: re_separator = '(' for separator in separators: re_separator += re.escape(separator) + '|' re_separator = re_separator[:-1] + ')' for line in file: line = line.strip() new_line = re.split(re_separator, line) result.append(new_line) return result def algorithm(): lines_array = tokenize() for line in lines_array: for token in line: if token != '':
def _pushdown(self): ''' 自顶向下预测分析程序语法,同时进行语义分析及中间代码生成 ''' tok = self.lexer.token() recentAttr = {} globalAttr = {'offset': 0, 'top': SymbolTable()} '''构建开始文法的结点并将其压入栈中''' self.root = MySyntaxer.syntaxNode(self.begin) curRecord = MySyntaxer.stackRecord('SYMBOL') curRecord.node = self.root endRecord = MySyntaxer.stackRecord('SYMBOL') endRecord.node = MySyntaxer.syntaxNode('EOF') symStack = [endRecord, MySyntaxer.stackRecord('SYN'), curRecord] curRecord = symStack[-1] syntaxError = False #若已发生语法错误,则停止语义分析 while curRecord.type != 'SYMBOL' or curRecord.node.sym != 'EOF': if self.debug: MySyntaxer._debugPrint(symStack, curRecord) if curRecord.type == 'SYMBOL': '''若为文法结点''' X = curRecord.node if X.sym == tok.type: '''匹配栈顶终结符号''' recentAttr = {'value': tok.value, 'type': tok.type} #保存终结符号的综合属性 X.val = tok.value tok = self.lexer.token() symStack.pop() elif X.sym == 'BLANK': '''匹配栈顶空串''' symStack.pop() elif X.sym in self.tokens: # 读入的token未能匹配当前栈顶的终结符: # 弹出栈顶终结符,试图继续语法分析 self._error(tok, symStack) print(f'pop {X.sym}, continue analyze.') syntaxError = True symStack.pop() else: num = self.M[X.sym].get(tok.type) if num is None: # 找不到可行的产生式以展开当前栈顶的非终结符 # 采取错误恢复措施: # 1) 若tok.type在X.sym的Follow集内,则弹出X.sym # 2) 否则根据恐慌模式,忽略输入符号a self._error(tok, symStack) syntaxError = True if tok.type in self.Follow[X.sym]: print(f'pop {X.sym}, continue analyze.') symStack.pop() else: print(f'ignore {tok.type}, continue analyze.') tok = self.lexer.token() else: symStack.pop() seq = self.APSet[X.sym][num] _tmp = list(range(len(seq))) actionRecord = None #用以寻找第一个动作片段 for i in range(len(seq)): if not seq[i].startswith('act'): _tmp[i] = MySyntaxer.syntaxNode(seq[i]) for i in range(len(seq)): '''建立语法分析树''' if not seq[i].startswith('act'): X.children.append(_tmp[i]) '''将产生式倒序入栈''' _X = seq[len(seq)-i-1] if _X.startswith('act'): newRecord = MySyntaxer.stackRecord('ACTION') newRecord.act = _X actionRecord = newRecord symStack.append(newRecord) else: if not _X in self.tokens: '''仅为非终结符号创建综合记录''' newRecord = MySyntaxer.stackRecord('SYN') symStack.append(newRecord) newRecord = MySyntaxer.stackRecord('SYMBOL') newRecord.node = _tmp[len(seq)-i-1] symStack.append(newRecord) if actionRecord: actionRecord.inh = curRecord.inh.copy() elif curRecord.type == 'SYN': recentAttr = curRecord.syn.copy() symStack.pop() else: actID = int(curRecord.act[3:]) top = len(symStack)-1 if not syntaxError: try: SDTUtil.execAction(actID, recentAttr, globalAttr, symStack, top) except Exception as e: print(f"Error at Line {tok.lineno}:\n\t{e}") raise e break symStack.pop() curRecord = symStack[-1] print() print("-"*50) #print("symbol table:") print(globalAttr['top'])
fopen = open(filename, 'r') print("Code: ") for line in fopen: print(line) print("\nTOKENIZED: ") with open(filename, 'r') as fopen: for line in fopen: print([token for token in scanner.tokenize(line, separators)]) print("\nCODES: ") for i in range(len(everything)): print(everything[i] + " " + str(codification[everything[i]])) symbolTable = SymbolTable() pif = ProgramInternalForm() with open(filename, 'r') as fopen: flag = 0 count = 0 for line in fopen: count += 1 for token in scanner.tokenize(line[0:-1], separators): if token in everything: if token != ' ': pif.add(codification[token], -1) elif scanner.isIdentifier(token): #print("ID: "+token) symbolTable.add([token, codification['id']])
def main(): # g = Grammar.readFromFile('grammar.txt') g = Grammar.readFromFile('my_mini_grammar.txt') parser = Parser(g) st = SymbolTable() pif = ProgramInternalForm() filename = 'input.txt' # def detectToken() # define token -> keyword, operator, separator (addToPif(code_of_token, -1) # -> identifier (pos = pos(token, symTable), addToPif(token, st)) # -> constant (pos + addToPif(code_of_constant, pos)) def isSeparator(char): return char in myLanguageSpecs.separators def isOperator(char): return char in myLanguageSpecs.operators def isReserved(word): return word in myLanguageSpecs.reservedWords def isIdentifier(word): return re.match(r'^[a-zA-Z]([a-zA-Z]|[0-9]){0,8}$', word) is not None def isConstant(token): return re.match(r'((\'[a-z]\'|\'[0-9]\')|(\+|-){0,1}[0-9]*\d$)', token) is not None def getCodeOfToken(token): try: return myLanguageSpecs.codification[token] except: raise Exception("The token is not in codification table") def tokenize(): result = [] with open(filename) as file: re_separator = r'(' for separator in myLanguageSpecs.separators: re_separator += re.escape(separator) + '|' re_separator = re_separator[:-1] + ')' for line in file: line = line.strip() new_line = re.split(re.compile(re_separator), line) result.append(new_line) return result def algo(): lines_array = tokenize() for line in lines_array: for token in line: if token is not '': if token is ' ': pass elif isReserved(token) or isOperator(token) or isSeparator(token): pif.add(getCodeOfToken(token), -1) elif isIdentifier(token): pos = st.addIdentifier(token) pif.add(getCodeOfToken('identifier'), pos) elif isConstant(token): pos = st.addConstant(token) pif.add(getCodeOfToken('constant'), pos) else: raise Exception("Lexical error at line", ''.join(line)) algo() revereCodification = {} for key in myLanguageSpecs.codification: revereCodification[myLanguageSpecs.codification[key]] = key inputStack = [] for (code, id) in pif.pif: inputStack += [str(code)] print("Productions: ", g.P) print(pif) # print(parser.parse(inputStack)) # print(parser.parse('abbc')) # print(parser.derivationStrings('abbc')) print(parser.derivationStrings(inputStack))
import sys import ply.yacc as yacc import lexer from symbolTable import SymbolTable ST = SymbolTable() ST.newScope() ST.addFunc('println') ST.addFunc('readInt') ST.endScope() x = 1 def newLabel(): global x x = x + 1 return "label" + str(x) def is_number(var): try: int(var) return True except Exception: return False def check(var, op): if is_number(var) and op != "=": t = ST.getTemp() l = ['=', t, var] myList = ','.join(map(str, l))
global ST from symbolTable import SymbolTable ST = SymbolTable()
file_out = file_name.split('.asm') file_out = file_out[0] + '.hack' """ After reading the file, we start reading its content twice by calling the parser, and in the first run, we check any LOOP Tags inside the Assembly Code, add their line_of_code and their name in the Symbols Table, and add a 1 to the ROM counter, so the next LOOP tag, if there are more, may be added to the Symbols Table. The loop runs until there aren't instructions left to check inside the file. """ countROM = 0 countRAM = 16 final_instruction = '' firstRun = Parser(file_name) symbolTable = SymbolTable() while firstRun.hasMoreInstructions(): firstRun.nextLine() type = firstRun.instructionType() if type == "L_Instruction": symbol = firstRun.getSymbol() if not symbolTable.contains(symbol): symbolTable.addEntry(symbol, countROM) else: countROM += 1 """ Then, we run the parser for the second twice, Where we now, start the translating each instruction of the code. We first check the instruction type, and then we start to 'extract' its content and translate it one by one, create a new line uniting them (because they compose one instruction after all) with and '/n' (new line)