def get_frame_base(filename, pc, rebased_addr): """ Call to get frame base :param filename: name of the executable file :param pc: The address of the beginning of the function :param rebased_addr: Should be project.loader.memory.min_addr :return: the frame base for the function """ target_loc = pc - rebased_addr with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) min_greater = 1000000000000000000000 offset = 0 for CFI in dwarfinfo.EH_CFI_entries(): if isinstance(CFI, FDE): decoded = CFI.get_decoded() for entry in decoded.table: if entry['pc'] >= target_loc and entry['pc'] < min_greater: offset = entry['cfa'].offset min_greater = entry['pc'] return offset
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format( CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
def process_dwarf_info(in_file, out_file): ''' Main function processing the dwarf informations from debug sections ''' DEBUG('Processing file: {0}'.format(in_file)) with open(in_file, 'rb') as f: f_elf = ELFFile(f) if not f_elf.has_dwarf_info(): DEBUG("{0} has no debug informations!".format(file)) return False M = CFG_pb2.Module() M.name = "GlobalVariable".format('utf-8') set_global_machine_arch(f_elf.get_machine_arch()) dwarf_info = f_elf.get_dwarf_info() process_types(dwarf_info, TYPES_MAP) process_frames(dwarf_info, EH_FRAMES) section_offset = dwarf_info.debug_info_sec.global_offset # Iterate through all the compile units for CU in dwarf_info.iter_CUs(): DEBUG('Found a compile unit at offset {0}, length {1}'.format(CU.cu_offset, CU['unit_length'])) top_DIE = CU.get_top_DIE() c_unit = CUnit(top_DIE, CU['unit_length'], CU.cu_offset, section_offset) c_unit.decode_control_unit(M, GLOBAL_VARIABLES) for key, value in GLOBAL_VARIABLES.iteritems(): if value["size"] > 0: gvar = M.global_vars.add() gvar.name = value["name"] gvar.ea = value["addr"] gvar.size = value["size"] else: DEBUG("Look for {}".format(pprint.pformat(value))) #for func in M.funcs: # DEBUG("Function name {}".format(func.name)) # for sv in func.stackvars: # DEBUG_PUSH() # DEBUG("{} : {}, ".format(sv.name, sv.sp_offset)) # DEBUG_POP() with open(out_file, "w") as outf: outf.write(M.SerializeToString()) DEBUG("Global Vars\n") DEBUG('Number of Global Vars: {0}'.format(len(GLOBAL_VARIABLES))) DEBUG("{}".format(pprint.pformat(GLOBAL_VARIABLES))) DEBUG("End Global Vars\n")
def __init__(self, fname): if platform.system() == "Windows": elf_data = open(fname, "r") else: with open(fname, "r") as f: elf_data = StringIO(f.read()) self.elf = ELFFile(elf_data) if self.elf.has_dwarf_info(): self.dwarf = self.elf.get_dwarf_info() set_global_machine_arch(self.elf.get_machine_arch()) self.__tame_dwarf() self.get_expr_evaluator = lambda: ExprLiveEval(self)
def get_func_bounds(filename, function_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" high_addr = 0 low_addr = 0 c = False for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if attr.name == "DW_AT_low_pc": low_addr = attr.value if attr.name == "DW_AT_high_pc": high_addr = attr.value if high_addr < low_addr: high_addr = low_addr + high_addr if fname == function_name: return (low_addr, high_addr)
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % ( CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): if attribute_has_location_list(attr): # This is a location list. Its value is an offset into # the .debug_loc section, so we can use the location # lists object to decode it. loclist = location_lists.get_location_list_at_offset( attr.value) print(' DIE %s. attr %s.\n%s' % ( DIE.tag, attr.name, show_loclist(loclist, dwarfinfo, indent=' ')))
def process_file(filename): #print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): raise ValueError(filename + ' has no DWARF info') # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) alldies = OrderedDict() for CU in dwarfinfo.iter_CUs(): # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): alldies[DIE.offset] = DIE return alldies
def display_debug_dump(self, dump_what): """ Dump a DWARF section """ self._init_dwarfinfo() if self._dwarfinfo is None: return set_global_machine_arch(self.elffile.get_machine_arch()) if dump_what == 'info': self._dump_debug_info() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': self._dump_debug_frames() elif dump_what == 'frames-interp': self._dump_debug_frames_interp() else: self._emitline('debug dump not yet supported for "%s"' % dump_what)
def test_describe_CFI_instructions(self): # The data here represents a single CIE data = (b'' + b'\x16\x00\x00\x00' + # length b'\xff\xff\xff\xff' + # CIE_id b'\x03\x00\x04\x7c' + # version, augmentation, caf, daf b'\x08' + # return address b'\x0c\x07\x02' + b'\x10\x02\x07\x03\x01\x02\x00\x00\x06\x06') s = BytesIO(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) cfi = CallFrameInfo(s, len(data), structs) entries = cfi.get_entries() set_global_machine_arch('x86') self.assertEqual(describe_CFI_instructions(entries[0]), ( ' DW_CFA_def_cfa: r7 (edi) ofs 2\n' + ' DW_CFA_expression: r2 (edx) (DW_OP_addr: 201; DW_OP_deref; DW_OP_deref)\n'))
def test_describe_CFI_instructions(self): # The data here represents a single CIE data = ( b'' + b'\x16\x00\x00\x00' + # length b'\xff\xff\xff\xff' + # CIE_id b'\x03\x00\x04\x7c' + # version, augmentation, caf, daf b'\x08' + # return address b'\x0c\x07\x02' + b'\x10\x02\x07\x03\x01\x02\x00\x00\x06\x06') s = BytesIO(data) structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) cfi = CallFrameInfo(s, len(data), structs) entries = cfi.get_entries() set_global_machine_arch('x86') self.assertEqual(describe_CFI_instructions(entries[0]), ( ' DW_CFA_def_cfa: r7 (edi) ofs 2\n' + ' DW_CFA_expression: r2 (edx) (DW_OP_addr: 201; DW_OP_deref; DW_OP_deref)\n' ))
def main(path_to_sample): global dwarf_info global location_lists global call_frame_information_entries with open(path_to_sample, 'rb') as f: pyelftools_elf_file = ELFFile(f) #print elffile.little_endian assert pyelftools_elf_file.has_dwarf_info(), 'file has no DWARF info' # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarf_info = pyelftools_elf_file.get_dwarf_info() call_frame_information_entries = dwarf_info.CFI_entries() location_lists = dwarf_info.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(pyelftools_elf_file.get_machine_arch()) # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. for compile_unit in dwarf_info.iter_CUs(): process_compile_unit(dwarf_info, pyelftools_elf_file, compile_unit) with open('funcs.json', 'wb') as f: # remove dw_op_call_frame_cda, as it is not serializable map(lambda f: f.pop('dw_op_call_frame_cfa'), functions) # convert into a nice dict funcs = {f['address']: f for f in functions} #import IPython; IPython.embed() f.write(json.dumps(funcs))
def get_var_offset(filename, function_name, var_name): with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Find the function if DIE.tag == "DW_TAG_subprogram": fname = "" base = 0 for attr in itervalues(DIE.attributes): if attr.name == "DW_AT_name": fname = attr.value if fname == function_name: for CHILD in DIE.iter_children(): if CHILD.tag == "DW_TAG_variable" or CHILD.tag == "DW_TAG_formal_parameter": right_name = False location = 0 for attr in itervalues(CHILD.attributes): if attr.name == "DW_AT_name": if attr.value == var_name: right_name = True # Check if this attribute contains location information if attr.name == "DW_AT_location": loc = loc_parser.parse_from_attribute(attr, CU['version']) if isinstance(loc, LocationExpr): parser = DWARFExprParser(dwarfinfo.structs) parsed = parser.parse_expr(loc.loc_expr) for op in parsed: if op.op_name == 'DW_OP_fbreg': location = op.args[0] if right_name: return location
def __init__(self, elf_file_path, inputfile, resultdir): self.elf_file_path = elf_file_path self.result_file_path = self.elf_file_path + ".type" self.inputfile = inputfile self.resultdir = resultdir # To save the basic information. self.base_type_map = {} self.addr2type_map = {} self.CU_TYPE = None # self.compile_unit_base_types = {} # self.compile_unit_const_types = {} # self.compile_unit_pointer_types = {} # self.compile_unit_enumeration_types = {} # self.compile_unit_union_types = {} # self.compile_unit_array_types = {} # self.compile_unit_subrange_types = {} # self.compile_unit_structure_types = {} # self.compile_unit_typedef_types = {} self.functions = [] self.global_var = [] # To support extract the dwarf self.loc_parser = None self.CU = None self.dwarfinfo = None # print('Processing file:', self.elf_file_path) with open(self.elf_file_path, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. self.dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = self.dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. self.loc_parser = LocationParser(location_lists) for CU in self.dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) self.CU = CU self.CU_TYPE = get_compile_unit_types(self.CU) # print(self.CU_TYPE) # A CU provides a simple API to iterate over all the DIEs in it. for die in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. if die.tag == 'DW_TAG_subprogram': print("") self.process_subprogram(die) elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes: print("") self.process_global_var(die) pincmd = [ '../pin/pin', '-t', '../TaintAnalysisWithPin/obj-ia32/taint.so', '--', elf_file_path ] print(pincmd) result = self.resultdir + '/' + elf_file_path.split( b'/')[-1] + ".out" print(result) try: # trace = subprocess.check_output(pincmd) trace = "" if inputfile != None: print("fead input") process = subprocess.Popen(pincmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) with open(self.inputfile, 'rb') as inputfile: for line in inputfile.readlines(): print("Give Std Input:%s" % line) process.stdin.write(line) trace = process.communicate()[0] process.stdin.close() else: print("running without input") trace = subprocess.check_output(pincmd) tracelist = loadtrace(trace) extractfromtrace(tracelist, self.global_var, self.functions, result) except subprocess.CalledProcessError as e: print("run pin error(%s)")
def __init__(self, config, elffile, debug_elffile=None): self.config = config self.name = self.config.BINARY_NAME self.path = self.config.BINARY_PATH self.elffile = ELFFile(elffile) self.entry_point = self.elffile.header[ 'e_entry'] if 'e_entry' in self.elffile.header else None self.binary_type = self.elffile.header[ 'e_type'] if 'e_type' in self.elffile.header else None self.init_pc = None self.fini_pc = None self.insns = dict() self.flags = dict() self.giv_regs = dict() self.giv_offsets = dict() self.int_consts = dict() self.string_consts = dict() self.code_offsets = dict() self.switch_tables = dict() self.virtual_elms = dict() self.temp_offsets = dict() self.node_types = dict() self.othervar_nodes = dict() self.size_nodes = dict() self.op_nodes = dict() self.unknown_node = None self.direct_offsets = dict() self.predicted = set() self.types = dict() set_global_machine_arch(self.elffile.get_machine_arch()) utils.set_global_machine_arch(self.elffile.get_machine_arch(), self) self.sections = Sections(binary=self) if self.config.BAP_FILE_PATH == '' or not os.path.exists( self.config.BAP_FILE_PATH): if self.config.BYTEWEIGHT_SIGS_PATH == '': if self.sections.has_sec(SYMTAB): bap_result = subprocess.getoutput( 'bap {} --pass=loc --symbolizer=objdump --rooter=internal' .format(self.path)) else: bap_result = subprocess.getoutput( 'bap {} --pass=loc --symbolizer=objdump'.format( self.path)) else: bap_result = subprocess.getoutput( 'bap {} --pass=loc --symbolizer=objdump --byteweight-sigs={}' .format(self.path, self.config.BYTEWEIGHT_SIGS_PATH)) bap_json = json.loads(bap_result) else: bap_json = json.load(open(self.config.BAP_FILE_PATH)) self.insn_map = InsnMap(**bap_json) self.bap = Prog(**bap_json, binary=self, has_symtab=self.sections.has_sec(SYMTAB)) self.functions = Functions(bap=self.bap.subs, binary=self) self.functions.initialize() self.low_pc = self.functions.low_pc self.high_pc = self.functions.high_pc self.sections.init_dynsym_offsets() self.nodes = Nodes(binary=self) self.edges = Edges(binary=self) self.factors = Factors(binary=self) self.constraints = Constraints(binary=self) self.string_table = None self.symbol_table = None self.debug_loc = None if self.config.MODE == self.config.TRAIN: self.stats = Stats(self) self.debug_info = DebugInfo(binary=self, debug_elffile=debug_elffile) self.debug_info.binary_train_info() self.nodes.initialize() self.edges.initialize()
def main(): """ Main program. """ parse_args() # Init colorama init() assert os.path.exists(args.kernel), "{0} does not exist.".format( args.kernel) if args.target == "ram": targets = ["ram"] elif args.target == "rom": targets = ["rom"] elif args.target == "all": targets = ["rom", "ram"] for t in targets: elf = ELFFile(open(args.kernel, "rb")) assert elf.has_dwarf_info(), "ELF file has no DWARF information" set_global_machine_arch(elf.get_machine_arch()) addr_ranges = get_section_ranges(elf) symbols = get_symbols(elf, addr_ranges) for sym in symbols["unassigned"].values(): print("WARN: Symbol '{0}' is not in RAM or ROM".format( sym["name"])) symbol_dict = None if args.json: jsonout = args.json else: jsonout = os.path.join(args.output, f"{t}.json") symbol_dict = symbols[t] symsize = addr_ranges[f"{t}_total_size"] ranges = addr_ranges[t] if symbol_dict is not None: processed = { "mapped_symbols": set(), "mapped_addr": set(), "unmapped_symbols": set(symbol_dict.keys()), } do_simple_name_matching(elf, symbol_dict, processed) mark_address_aliases(symbol_dict, processed) do_address_range_matching(elf, symbol_dict, processed) mark_address_aliases(symbol_dict, processed) common_path_prefix = find_common_path_prefix(symbol_dict) set_root_path_for_unmapped_symbols(symbol_dict, ranges, processed) if args.verbose: for sym in processed["unmapped_symbols"]: print("INFO: Unmapped symbol: {0}".format(sym)) root = generate_any_tree(symbol_dict, symsize, common_path_prefix) if not args.quiet: print_any_tree(root, symsize, args.depth) exporter = DictExporter() data = dict() data["symbols"] = exporter.export(root) data["total_size"] = symsize with open(jsonout, "w") as fp: json.dump(data, fp, indent=4)
import collections import utils from elftools.elf.elffile import ELFFile from elftools.dwarf.descriptions import describe_attr_value, set_global_machine_arch from elftools.common.py3compat import itervalues with open(utils.get_lib_loc(),'rb') as f: elffile = ELFFile(f) if elffile.has_dwarf_info(): dwarfinfo = elffile.get_dwarf_info() else: dwarfinfo = None machine_arch = set_global_machine_arch(elffile.get_machine_arch()) if not dwarfinfo.has_debug_info: raise ValueError("No debug ino") # Offset of the .debug_info section in the stream section_offset = dwarfinfo.debug_info_sec.global_offset data = [] for cu in dwarfinfo.iter_CUs(): pointer_size = cu['address_size'] # The nesting depth of each DIE within the tree of DIEs must be # displayed. To implement this, a counter is incremented each time
def setUp(self): self.visitor = ExprDumper(self.structs32) set_global_machine_arch('x64')
def setUp(self): set_global_machine_arch('x64')
def compare_CFI_register_rule(a, b): if not strict and (a.type == 'UNDEFINED' or b.type == 'UNDEFINED'): return True return a.type == b.type and a.arg == b.arg elf_files = [ELFFile(open(filename, 'rb')) for filename in filenames] machine_arch = None for elf_file in elf_files: arch = elf_file.get_machine_arch() if machine_arch is None: machine_arch = arch elif machine_arch != arch: raise "Cannot compare ELF files with different machine architectures" set_global_machine_arch(machine_arch) # TODO: optimize dwarf_tables data type dwarf_tables = [] pcs = set() for elf_file in elf_files: if not elf_file.has_dwarf_info(): raise "ELF file is missing DWARF info" dwarf_info = elf_file.get_dwarf_info() dwarf_table = {} cfi_entries = None if dwarf_info.has_EH_CFI(): cfi_entries = dwarf_info.EH_CFI_entries() # TODO: .debug_frame if cfi_entries is None:
def get_dwarf_cu_and_die_paths(location): """ Yield tuple of (path type, path) extracted from DWARFs in the ELF file at ``location``. Path type is either "primary" for CU paths or "secondary" for indirect references to DIE paths. """ if not os.path.exists(location): return T = contenttype.get_type(location) if (not T.is_elf) or T.is_stripped_elf: return with open(location, 'rb') as inp: elffile = ELFFile(inp) if not elffile.has_dwarf_info(): return dwarfinfo = elffile.get_dwarf_info() # warning this is a global meaning that the library may not be thread safe set_global_machine_arch(elffile.get_machine_arch()) seen = set() for cu in dwarfinfo.iter_CUs(): # The first Debug Informnation Entry in a CU has the paths. top_die = cu.get_top_DIE() path = top_die.get_full_path() if path not in seen: yield 'primary', path seen.add(path) lineprogram = dwarfinfo.line_program_for_CU(cu) try: cu_filename = bytes2str(lineprogram['file_entry'][0].name) if len(lineprogram['include_directory']) > 0: # add directory if possible dir_index = lineprogram['file_entry'][0].dir_index if dir_index > 0: pdir = lineprogram['include_directory'][dir_index - 1] cu_filename = f'{bytes2str(pdir)}/{cu_filename}' if cu_filename not in seen and not any( x.endswith(f'/{cu_filename}') for x in seen): yield 'secondary-lp1', cu_filename seen.add(cu_filename) else: if cu_filename not in seen and not any( x.endswith(f'/{cu_filename}') for x in seen): yield 'secondary-lp2', cu_filename seen.add(cu_filename) except IndexError: pass # also yield other dies for die in cu.iter_DIEs(): if not die: continue decl_file_attrib = die.attributes.get("DW_AT_decl_file") if not decl_file_attrib or not decl_file_attrib.value: continue die_lineprogram = die.dwarfinfo.line_program_for_CU(die.cu) file_entry = die_lineprogram.header.file_entry[ decl_file_attrib.value - 1] fname = bytes2str(file_entry.name) try: file_dir = bytes2str(die_lineprogram['include_directory'][ file_entry.dir_index - 1]) except: continue path = f'{file_dir}/{fname}' if path not in seen: yield 'secondary-decl1', path seen.add(path) comp_dir_attr = die.attributes.get('DW_AT_comp_dir', None) comp_dir = bytes2str( comp_dir_attr.value) if comp_dir_attr else '' fname_attr = die.attributes.get('DW_AT_name', None) fname = bytes2str(fname_attr.value) if fname_attr else '' if comp_dir: path = f'{comp_dir}/{fname}' if path not in seen: yield 'secondary-decl2', path seen.add(path)