def _process_subprogram_tag(die, section_offset, M, global_var_data): if die.tag != 'DW_TAG_subprogram': return F = M.funcs.add() F.ea = 0 F.name = get_name(die) F.is_entrypoint = 0 has_frame = False frame_regname = "" if 'DW_AT_frame_base' in die.attributes: frame_attr = die.attributes['DW_AT_frame_base'] has_frame = True loc_expr = "{}".format( describe_DWARF_expr(frame_attr.value, die.cu.structs)).split(' ') if loc_expr[0][1:][:-1] == "DW_OP_call_frame_cfa": lowpc_attr = die.attributes['DW_AT_low_pc'] #DEBUG("loc_expr {0} {1:x}".format(loc_expr, lowpc_attr.value)) frame = EH_FRAMES[ lowpc_attr.value] if lowpc_attr.value in EH_FRAMES else None if frame: DEBUG("{0:x}, {1}".format(frame['initial_location'], frame)) for instr in frame.instructions: name = instruction_name(instr.opcode) if name == 'DW_CFA_def_cfa_register': frame_regname = describe_reg_name( instr.args[0], None, False) for child in die.iter_children(): if child.tag != 'DW_TAG_variable': continue stackvar = F.stack_vars.add() stackvar.name = get_name(child) stackvar.sp_offset = 0 stackvar.has_frame = has_frame stackvar.reg_name = frame_regname (type, size, offset) = get_types(child) stackvar.size = size if size > 0 else 0 if 'DW_AT_location' in child.attributes: attr = child.attributes['DW_AT_location'] if attr.form not in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): loc_expr = "{}".format( describe_DWARF_expr(attr.value, child.cu.structs)).split(' ') if loc_expr[0][1:][:-1] == 'DW_OP_fbreg': offset = int(loc_expr[1][:-1]) stackvar.sp_offset = offset
def get_die_mapped_address(die, parser, dwarfinfo): """Get the bounding addresses from a DIE variable or subprogram""" low = None high = None if die.tag == "DW_TAG_variable": if "DW_AT_location" in die.attributes: loc_attr = die.attributes["DW_AT_location"] if parser.attribute_has_location(loc_attr, die.cu["version"]): loc = parser.parse_from_attribute(loc_attr, die.cu["version"]) if isinstance(loc, LocationExpr): addr = describe_DWARF_expr(loc.loc_expr, dwarfinfo.structs) matcher = DT_LOCATION.match(addr) if matcher: low = int(matcher.group(1), 16) high = low + 1 if die.tag == "DW_TAG_subprogram": if "DW_AT_low_pc" in die.attributes: low = die.attributes["DW_AT_low_pc"].value high_pc = die.attributes["DW_AT_high_pc"] high_pc_class = describe_form_class(high_pc.form) if high_pc_class == "address": high = high_pc.value elif high_pc_class == "constant": high = low + high_pc.value return low, high
def process_file(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) if not elffile.has_dwarf_info(): print(' file has no DWARF info') return # get_dwarf_info returns a DWARFInfo context object, which is the # starting point for all DWARF-based processing in pyelftools. dwarfinfo = elffile.get_dwarf_info() # The location lists are extracted by DWARFInfo from the .debug_loc # section, and returned here as a LocationLists object. location_lists = dwarfinfo.location_lists() # This is required for the descriptions module to correctly decode # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) # Create a LocationParser object that parses the DIE attributes and # creates objects representing the actual location information. loc_parser = LocationParser(location_lists) for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some # computed attributes (such as its offset in the section) and # a header which conforms to the DWARF standard. The access to # header elements is, as usual, via item-lookup. print(' Found a compile unit at offset %s, length %s' % (CU.cu_offset, CU['unit_length'])) # A CU provides a simple API to iterate over all the DIEs in it. for DIE in CU.iter_DIEs(): # Go over all attributes of the DIE. Each attribute is an # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): # Check if this attribute contains location information # pdb.set_trace() if loc_parser.attribute_has_location(attr, CU['version']): var_name = DIE.attributes['DW_AT_name'].value print(' Varname:%s' % (var_name)) print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) loc = loc_parser.parse_from_attribute( attr, CU['version']) # We either get a list (in case the attribute is a # reference to the .debug_loc section) or a LocationExpr # object (in case the attribute itself contains location # information). if isinstance(loc, LocationExpr): print(' %s' % (describe_DWARF_expr( loc.loc_expr, dwarfinfo.structs))) elif isinstance(loc, list): print(show_loclist(loc, dwarfinfo, indent=' '))
def _process_subprogram_tag(die, section_offset, M, global_var_data): if die.tag != 'DW_TAG_subprogram': return F = M.funcs.add() F.ea = 0 F.name = get_name(die) F.is_entrypoint = 0 has_frame = False frame_regname = "" if 'DW_AT_frame_base' in die.attributes: frame_attr = die.attributes['DW_AT_frame_base'] has_frame = True loc_expr = "{}".format(describe_DWARF_expr(frame_attr.value, die.cu.structs)).split(' ') if loc_expr[0][1:][:-1] == "DW_OP_call_frame_cfa": lowpc_attr = die.attributes['DW_AT_low_pc'] #DEBUG("loc_expr {0} {1:x}".format(loc_expr, lowpc_attr.value)) frame = EH_FRAMES[lowpc_attr.value] if lowpc_attr.value in EH_FRAMES else None if frame: DEBUG("{0:x}, {1}".format(frame['initial_location'], frame)) for instr in frame.instructions: name = instruction_name(instr.opcode) if name == 'DW_CFA_def_cfa_register': frame_regname = describe_reg_name(instr.args[0], None, False) for child in die.iter_children(): if child.tag != 'DW_TAG_variable': continue stackvar = F.stack_vars.add() stackvar.name = get_name(child) stackvar.sp_offset = 0 stackvar.has_frame = has_frame stackvar.reg_name = frame_regname (type, size, offset) = get_types(child) stackvar.size = size if size > 0 else 0 if 'DW_AT_location' in child.attributes: attr = child.attributes['DW_AT_location'] if attr.form not in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): loc_expr = "{}".format(describe_DWARF_expr(attr.value, child.cu.structs)).split(' ') if loc_expr[0][1:][:-1] == 'DW_OP_fbreg': offset = int(loc_expr[1][:-1]) stackvar.sp_offset = offset
def show_loclist(loclist, dwarfinfo, indent): """ Display a location list nicely, decoding the DWARF expressions contained within. """ d = [] for loc_entity in loclist: if isinstance(loc_entity, LocationEntry): d.append('%s <<%s>>' % ( loc_entity, describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) else: d.append(str(loc_entity)) return '\n'.join(indent + s for s in d)
def extract_string_variables(elf): """ Find all string variables (char) in all Compilation Units and Debug information Entry (DIE) in ELF file. """ dwarf_info = elf.get_dwarf_info() loc_lists = dwarf_info.location_lists() loc_parser = LocationParser(loc_lists) strings = [] # Loop through all Compilation Units and # Debug information Entry (DIE) to extract all string variables for compile_unit in dwarf_info.iter_CUs(): for die in compile_unit.iter_DIEs(): # Only care about variables with location information # and of type "char" if die.tag == 'DW_TAG_variable': if ('DW_AT_type' in die.attributes and 'DW_AT_location' in die.attributes and is_die_var_const_char(compile_unit, die)): # Extract location information, which is # its address in memory. loc_attr = die.attributes['DW_AT_location'] if loc_parser.attribute_has_location( loc_attr, die.cu['version']): loc = loc_parser.parse_from_attribute( loc_attr, die.cu['version']) if isinstance(loc, LocationExpr): try: addr = describe_DWARF_expr( loc.loc_expr, dwarf_info.structs) matcher = DT_LOCATION_REGEX.match(addr) if matcher: addr = int(matcher.group(1), 16) if addr > 0: strings.append({ 'name': die.attributes['DW_AT_name'].value, 'addr': addr, 'die': die }) except KeyError: pass return strings
def _load_variable(self, die): """Load the dwarf variable properties from die""" self._name = get_name(die) self._dw_type = DWARFCache._get_type_die(die) self._size = self._dw_type.size() self._type = self._dw_type.type() if 'DW_AT_location' in die.attributes: attr = die.attributes['DW_AT_location'] if attr.form not in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): loc_expr = "{}".format( describe_DWARF_expr(attr.value, die.cu.structs)).split(':') if loc_expr[0][1:] == 'DW_OP_addr': self._address = int(loc_expr[1][:-1][1:], 16)
def _process_variable_tag(die, section_offset, M, global_var_data): if die.tag != 'DW_TAG_variable': return name = get_name(die) if 'DW_AT_location' in die.attributes: attr = die.attributes['DW_AT_location'] if attr.form not in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): loc_expr = "{}".format(describe_DWARF_expr(attr.value, die.cu.structs)).split(':') if loc_expr[0][1:] == 'DW_OP_addr': memory_ref = int(loc_expr[1][:-1][1:], 16) if memory_ref not in global_var_data: global_var_data[memory_ref] = _create_variable_entry(name, die.offset) global_var_data[memory_ref]['is_global'] = True global_var_data[memory_ref]['addr'] = memory_ref (type, size, offset) = get_types(die) global_var_data[memory_ref]['type'] = type global_var_data[memory_ref]['size'] = size DEBUG("{}".format(pprint.pformat(global_var_data[memory_ref]))) # DEBUG_ENABLE
def _process_variable_tag(die, section_offset, M, global_var_data): if die.tag != 'DW_TAG_variable': return name = get_name(die) if 'DW_AT_location' in die.attributes: attr = die.attributes['DW_AT_location'] if attr.form not in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): loc_expr = "{}".format( describe_DWARF_expr(attr.value, die.cu.structs)).split(':') if loc_expr[0][1:] == 'DW_OP_addr': memory_ref = int(loc_expr[1][:-1][1:], 16) if memory_ref not in global_var_data: global_var_data[memory_ref] = _create_variable_entry( name, die.offset) global_var_data[memory_ref]['is_global'] = True global_var_data[memory_ref]['addr'] = memory_ref (type, size, offset) = get_types(die) global_var_data[memory_ref]['type'] = type global_var_data[memory_ref]['size'] = size DEBUG("{}".format( pprint.pformat( global_var_data[memory_ref]))) # DEBUG_ENABLE
# please ignore it! # from __future__ import print_function import sys, pprint from elftools.elf.structs import ELFStructs from elftools.elf.elffile import ELFFile from elftools.elf.sections import * from elftools.elf.relocation import * stream = open('test/testfiles/exe_simple64.elf', 'rb') efile = ELFFile(stream) print('elfclass', efile.elfclass) print('===> %s sections!' % efile.num_sections()) print(efile.header) dinfo = efile.get_dwarf_info() from elftools.dwarf.locationlists import LocationLists from elftools.dwarf.descriptions import describe_DWARF_expr llists = LocationLists(dinfo.debug_loc_sec.stream, dinfo.structs) for loclist in llists.iter_location_lists(): print('----> loclist!') for li in loclist: print(li) print(describe_DWARF_expr(li.loc_expr, dinfo.structs))
def die_info_rec(dwarfinfo, output_file, die, in_struct, indent_level='', print_pointers=False): """ A recursive function for showing information about a DIE and its children. """ children_in_struct = False children_in_enum = False child_indent = indent_level + ' ' # # depth_key_func = lambda die: 1 + depth_key_func(die_get_type(die) if die_get_type(die) else 0) # if die.tag not in ['DW_TAG_structure_type', 'DW_TAG_union_type']: # die._children = sorted( die._children, key = depth_key_func) if die.tag == 'DW_TAG_structure_type': assert (not in_struct) if die._children: output_file.write( (indent_level + 'struct(%d) %s\n' + indent_level + '{\n') % (die_get_byte_size(die), die_get_name_repr(die))) children_in_struct = True elif die.tag == 'DW_TAG_union_type': assert (not in_struct) output_file.write( (indent_level + 'union(%d) %s\n' + indent_level + '{\n') % (die_get_byte_size(die), die_get_name_repr(die))) children_in_struct = True elif die.tag == die.tag == 'DW_TAG_typedef': type_name = die_get_name_repr(die) basetype = die_get_type_info(die) if basetype: basetype_name = basetype.name # No need to re-typedef structre with same name (as if struct X typedefed as X) if basetype.name != type_name or basetype.array_indices_repr: output_file.write('%stypedef %s%s %s\n' % (indent_level, basetype.name, basetype.array_indices_repr, type_name)) elif die.tag == 'DW_TAG_member': member_name = die_get_name_repr(die) type = die_get_type_info(die) # This should always be the case assert (type) if type.is_ptr: if not print_pointers: return # ptr_str = ' *' ptr_str = '' # comment_str = '// ' comment_str = '' else: ptr_str = '' comment_str = '' if 'DW_AT_bit_offset' in die.attributes_dict: bit_offset = die.attributes_dict['DW_AT_bit_offset']['value'] # TODO # (Very rarely) Some structures have a HUGE offset for some reason # Why? Can GCC be trusted? Maybe it's in some other represenation? # Are DW_FORM_data1 and DW_FORM_data8 different? # dwarfdump -a also shows the same number # if bit_offset > 100: # pdb.set_trace() bit_offset_str = '.' + str(bit_offset) else: bit_offset = 0 bit_offset_str = '' if 'DW_AT_data_member_location' in die.attributes_dict: # I must use GenericExprVisitor instead of this # This handles only a specific case # Which GDB appears to solely rely upon # When generating structure member offsets byte_offset = re.match( '\(DW_OP_plus_uconst: (\d+)\)', describe_DWARF_expr( die.attributes_dict['DW_AT_data_member_location']['value'], dwarfinfo.structs)).group(1) total_offset = (int(byte_offset) * 8) + bit_offset offset_str = str(byte_offset + bit_offset_str + ' ') else: offset_str = '' output_file.write('%s%s%s%s%s %s%s%s%s\n' % (indent_level, comment_str, offset_str, type.name, ptr_str, member_name, type.array_indices_repr, die_get_type_size_str(die, is_member=True), '')) elif die.tag == 'DW_TAG_base_type': type_name = die_get_name_repr(die) type_size = die_get_byte_size(die) if ' ' in type_name: type_name = '"' + type_name + '"' output_file.write('%stypedef %s(%s) %s\n' % (indent_level, die_get_modtype(die), die_get_type_size_str(die), type_name)) for child in die.iter_children(): die_info_rec(dwarfinfo, output_file, child, children_in_struct, child_indent, print_pointers) if children_in_struct or children_in_enum: output_file.write(indent_level + '}\n')