def codegen(self, builder, symbolTable, module): # generate function unique identifier (qualified by namespace) ns = symbolTable.getNameSpace() unique_name = self.name if ns == "main" else "{}.{}".format( ns, self.name) params_ir = [ symbolTable.get_ir_type(param.type) for param in self.params ] fType = ir.FunctionType(symbolTable.get_ir_type(self.retType), params_ir) func = ir.Function(module, fType, name=unique_name) for i in range(len(self.params)): func.args[i].name = self.params[i].name # add func to symboltable symbol = Symbol(self.name, func, self.retType, id_type='function', unique_name=unique_name) if not symbolTable.add(symbol): self.errList.append(self.line, "duplicate declaration") return None # add function's local scope and populate with params symbolTable.pushLocal(unique_name) for i, var in enumerate(func.args): symbol = Symbol(self.params[i].name, var, self.params[i].type) if not symbolTable.add(symbol): self.errList.append(self.line, "duplicate parameter declaration") return None # make new function builder and add entry entryBB = func.append_basic_block(name="funcEntry") func_builder = ir.IRBuilder(entryBB) has_errors = False for statement in self.body: res = statement.codegen(func_builder, symbolTable, module) if res == None: has_errors = True self.errList += statement.errList # remove local scope and handle errors symbolTable.popLocal() if has_errors: self.errList += statement.errList return None elif not func_builder.block.is_terminated: self.errList.append( (self.line, "expected return statement before end of function")) return func
def codegen(self, builder, symbolTable, module=None): ir_type = symbolTable.get_ir_type(self.type) if self.type == tkn.STRING_TYPE: val = "" if self.array_size == None: # all strings are arrays O_O join us. self.array_size = LiteralExpr(1, tkn.INT_TYPE, self.line) else: val = 0 id_type = "variable" initial_val = ir.Constant(ir_type, val) if self.array_size != None: id_type = "array" array_size = int(self.array_size.value) arr_val = ir.Constant.literal_array( [initial_val for _ in range(int(self.array_size.value))]) initial_val = builder.alloca(ir_type, array_size, name="arrayPtr") initial_tmp_ptr = builder.bitcast( initial_val, ir.PointerType(ir.ArrayType(ir_type, array_size))) builder.store(arr_val, initial_tmp_ptr) symbol = Symbol(self.name, initial_val, self.type, id_type) if symbolTable.add(symbol, is_global=self.is_global): return initial_val else: err_msg = "duplicate declaration of variable '{}'".format( self.name) self.errList.append((self.line, err_msg)) return None
def finish_Unary(self, node: UnaryNode): flag = node.update_type() if flag: add_name, add_level, add_block = self.sym_table.add_temp_symbol( self.cur_level, self.cur_block, Symbol(node.type)) node.upward(add_name, add_level, add_block) else: node.upward()
def finish_Rel(self, node: RelNode): # Rel -> Expr if len(node.children) == 1: node.upward() # Rel -> Expr < Expr else: add_name, add_level, add_block = self.sym_table.add_temp_symbol( self.cur_level, self.cur_block, Symbol('bool')) node.upward(add_name, add_level, add_block)
def finish_Equality(self, node: EqualityNode): # Equality -> Rel if len(node.children) == 1: node.upward() # Equality -> Equality ==/!= Rel else: add_name, add_level, add_block = self.sym_table.add_temp_symbol( self.cur_level, self.cur_block, Symbol('bool')) node.upward(add_name, add_level, add_block)
def finish_Join(self, node: JoinNode): # Join -> Equality if len(node.children) == 1: node.upward() # Join -> Join & Equality else: add_name, add_level, add_block = self.sym_table.add_temp_symbol( self.cur_level, self.cur_block, Symbol('bool')) node.upward(add_name, add_level, add_block)
def finish_Bool(self, node: BoolNode): # Bool -> Join if len(node.children) == 1: node.upward() # Bool -> Bool || Join else: add_name, add_level, add_block = self.sym_table.add_temp_symbol( self.cur_level, self.cur_block, Symbol('bool')) node.upward(add_name, add_level, add_block)
def meet_Decl(self, node: DeclNode): # Decl->Type i # add symbol at cur_level cur_index # TODO 数组情况 name = node.children[1].val type = node.children[0].children[0].val sym = Symbol(type) flag = self.sym_table.add_symbol(self.cur_level, self.cur_block, name, sym) if not flag: raise DuplicateDeclareIdentifierException(name, self.cur_level, self.cur_block, node.row) return
def _decode_struct(self, struct): """ decode all members in struct recursive call itself to decode structs within structs, if any exists struct: a list of Symbol objects or Symbol object return: a list of all members in struct """ struct_list = [] if isinstance(struct, list): for struct_member in struct: if isinstance(struct_member.data_type, list): for member_list in struct_member.data_type: members = self._decode_struct(member_list) for member in members: member_name = "{0}.{1}".format( struct_member.name, member.name) member_address = struct_member.address + member.address member_symbol = Symbol(member_name, member_address, member.size, member.data_type) struct_list.append(member_symbol) else: struct_list.append(struct_member) elif isinstance(struct, Symbol): members = self._decode_struct(struct.data_type) for member in members: member_name = "{0}.{1}".format(struct.name, member.name) member_address = struct.address + member.address member_symbol = Symbol(member_name, member_address, member.size, member.data_type) struct_list.append(member_symbol) if not isinstance(struct.data_type, list): struct_list.append(struct) return struct_list
def _get_struct_or_union_members(self, offset): """ get structure or union members starting at offset offset: offset number in dwarf info return: a list of Symbol object which contain struct member information it is possible for Symbol["type"] to be either a string or a list Symbol["type"] is a list if that member is a struct and ["type"] contains the struct member """ struct_members = [] for key, val in dropwhile(lambda x: x[0] != offset, self._dwarf_info.items()): # we start iterating from offset if val["offset"] == offset: # we skip it, we are interested in the struct members base_die_depth = val["depth"] continue if val["tag"] == "DW_TAG_member" and val[ "depth"] == base_die_depth + 1: member_name = self._get_description(key, "DW_AT_name") member_location_offset = self._get_description( key, "DW_AT_data_member_location") if not member_location_offset: # for union, since they do not have "DW_AT_data_member_location" type member_location_offset = 0 else: member_location_offset = int(member_location_offset) base_member_offset, base_member_type, base_member_size = self._get_type( key) if base_member_type == "struct": # struct inside struct base_member_type = self._get_struct_or_union_members( base_member_offset) # embed struct members in "type" key to be decoded new_struct_member = Symbol(member_name, member_location_offset, base_member_size, base_member_type) struct_members.append(new_struct_member) else: return struct_members
def codegen(self, builder, symbolTable, module=None): # check that dest is defined dest_val = self.dest.codegen(builder, symbolTable, module) dest_name = self.dest.name if dest_val == None: self.errList += self.dest.errList return None # evaluate expr expr_val = self.expr.codegen(builder, symbolTable, module) if expr_val == None: self.errList += self.expr.errList return None # handle case that expr is an element of an array if isinstance(self.expr, VariableExpr): if self.expr.array_index != None: index_val = self.expr.array_index.codegen( builder, symbolTable, module) expr_ptr = builder.gep(expr_val, [index_val], name="indexPtr") expr_val = builder.load(expr_ptr, name="arrayElement") # expr_val = builder.extract_value(expr_val, int(self.expr.array_index.value), name="itemTmp") # handle case that dest is an element of an array if isinstance(self.dest, VariableExpr): if self.dest.array_index != None: # dest_val does not get overwritten here bc the array pointer hasn't changed index_val = self.dest.array_index.codegen( builder, symbolTable, module) dest_ptr = builder.gep(dest_val, [index_val], name="destIndexPtr") builder.store(expr_val, dest_ptr) # dest_val = builder.insert_value(dest_val, expr_val, int(self.dest.array_index.value), name="arrayTmp") symbol = Symbol(dest_name, expr_val, self.dest.data_type) if not symbolTable.update(symbol): self.errList.append( (self.line, "could not update symbol {}".format(dest_name))) return None else: return expr_val
def load_builtins(self): """ create builtin get and put functions (empty for now)""" # name returnType Param builtins = [ ("putbool", ir.VoidType(), [ir.IntType(1)]), ("putstring", ir.VoidType(), [ir.PointerType(ir.IntType(8))]), ("putinteger", ir.VoidType(), [ir.IntType(32)]), ("putfloat", ir.VoidType(), [ir.FloatType()]), ("getbool", ir.IntType(1), []), ("getinteger", ir.IntType(32), []), ("getfloat", ir.FloatType(), []), ("main.StringEquals", ir.IntType(1), [ir.PointerType(ir.IntType(8)), ir.PointerType(ir.IntType(8))]), ("getstring", ir.PointerType(ir.IntType(8)), []) ] for entry in builtins: fType = ir.FunctionType(entry[1], entry[2]) func = ir.Function(self.module, fType, name=entry[0]) symbol = Symbol(entry[0], func, fType, id_type='function') self.symbolTable.add(symbol)
def parse_symbol_table(self): """ build symbol table data structure :return: list of symbols """ if self.symbol_table is None: self.symbol_table = SymbolTable() symbol_tables = [ section for section in self._elf.iter_sections() if isinstance(section, SymbolTableSection) ] for section in symbol_tables: for symbol in section.iter_symbols(): if ((int(symbol["st_size"]) > 0) and ("OBJECT" == describe_symbol_type( symbol["st_info"]["type"]))): symbol_entry = Symbol(symbol.name, symbol["st_value"], symbol["st_size"]) self.symbol_table.add_symbol(symbol_entry) return self.symbol_table
def generate_local_assign(assign: LocalAssignStmt): left = assign.left.name_list right = assign.right.expr_list # left -- add local variable for name in left: generate_name(name) # right -- calc value and assign to left variable for idx, name in enumerate(left): res = FuncStat.instance().symbol_stack.lookup(Symbol(name.value)) val = right[idx] reg_right = generate_expr(val) if ExprEnum.BINOP == val.kind and BinOpEnum.AND == val.value.operator: pc = FuncStat.instance().pc() _back_path(val.false_list, pc) elif ExprEnum.BINOP == val.kind and BinOpEnum.OR == val.value.operator: pc = FuncStat.instance().pc() _back_path(val.true_list, pc) code = Instruction(OpCode.LOADK, res.get, reg_right) add_instruction(code)
def generate_symbol_table(self): """ build valid symbol table data structure compare symbol name and address from symbol table with dwarf info attributes valid symbol if address match and symbol name within dwarf attribute get the base type for all valid symbols """ if self.valid_symbol_table is None: self.valid_symbol_table = SymbolTable() if self._symbol_table is None or self._dwarf_info is None: raise ValueError( "Either Symbol Table or DWARF Info not populated") for entry in self._symbol_table: for abbrev in self._dwarf_info: symbol_found = False addr_found = False for attr in self._dwarf_info[abbrev]["attr"]: # There may be multiple symbol names so we do not match exact string if isinstance(attr["desc"], str): if attr["desc"] in entry.name: if not symbol_found: symbol_found = True # We do however, match address since its definitive if isinstance(attr["desc"], int): if attr["desc"] == entry.address: if not addr_found: addr_found = True # We only want symbols that have address in its DWARF attributes # get symbol type also if symbol_found and addr_found: # time to go down rabbit holes... base_offset, base_type, base_size = self._get_type( abbrev) if base_type == "struct": # collect member info from struct members = self._get_struct_or_union_members( base_offset) decoded_struct = self._decode_struct(members) for member in decoded_struct: # append struct info to its member if member.data_type not in ["void", "ptr"]: member_name = "{0}.{1}".format( entry.name, member.name) member_address = entry.address + member.address data_type = self._decode_data_type( member.data_type, member.size) symbol_entry = Symbol( member_name, hex(member_address), str(member.size), data_type) self.valid_symbol_table.add_symbol( symbol_entry) elif base_type in ["void", "ptr"]: # do not include these in symbol table pass else: # base types or pointers entry.data_type = base_type data_type = self._decode_data_type( base_type, base_size) symbol_entry = Symbol(entry.name, hex(entry.address), str(base_size), data_type) self.valid_symbol_table.add_symbol(symbol_entry) break return self.valid_symbol_table
def generate_name(name: TermName): # if Var.NAME == name.kind: FuncStat.instance().symbol_stack.insert(Symbol(name.value))