Пример #1
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    # Check if this attribute contains location information
                    # pdb.set_trace()
                    if loc_parser.attribute_has_location(attr, CU['version']):
                        var_name = DIE.attributes['DW_AT_name'].value
                        print(' Varname:%s' % (var_name))
                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                        loc = loc_parser.parse_from_attribute(
                            attr, CU['version'])
                        # We either get a list (in case the attribute is a
                        # reference to the .debug_loc section) or a LocationExpr
                        # object (in case the attribute itself contains location
                        # information).
                        if isinstance(loc, LocationExpr):
                            print('      %s' % (describe_DWARF_expr(
                                loc.loc_expr, dwarfinfo.structs)))
                        elif isinstance(loc, list):
                            print(show_loclist(loc, dwarfinfo,
                                               indent='      '))
Пример #2
0
def test_dwarfinfo(di):
    # Some global cache setup in line with the app proper
    di._ranges = None
    di._CUs = [cu for cu in di.iter_CUs()]
    di._locparser = None

    m = False
    dummy_index = QModelIndex()
    for (i, CU) in enumerate(di._CUs):
        top_die = CU.get_top_DIE()
        print("%s" %
              strip_path(top_die.attributes['DW_AT_name'].value.
                         decode('utf-8', errors='ignore')) if 'DW_AT_name' in
              top_die.attributes else "(no name)")
        CU._lineprogram = None
        CU._exprparser = None
        for die in CU.iter_DIEs():
            if not die.is_null():
                assert die.tag.startswith('DW_TAG_')

                if not m:
                    # With prefix, with low level data, decimal
                    m = DIETableModel(die, True, True, False, True)
                else:
                    m.display_DIE(die)

                rc = m.rowCount(dummy_index)
                cc = m.columnCount(dummy_index)
                keys = list(die.attributes.keys())
                # Assuming rows correspond to attributes;
                # if we introduce non-attribute metadata into the DIE table, this will break
                for r in range(m.meta_count, rc):
                    key = keys[r - m.meta_count]
                    attr = die.attributes[key]
                    form = attr.form
                    value = attr.value
                    # Check the elftools' results first

                    # Check if the key is interpreted properly
                    assert str(key).startswith('DW_AT_')
                    assert str(form).startswith('DW_FORM_')

                    # Check if attributes with locations are all found
                    if form == 'DW_FORM_exprloc':
                        assert LocationParser.attribute_has_location(
                            attr, CU['version'])
                    # The converse is not true; on DWARF2, location expressions can have form DW_FORM_block1

                    # Now check the spell out logic
                    for c in range(0, cc):
                        m.data(m.index(r, c, dummy_index), Qt.DisplayRole)
                    details = m.get_attribute_details(
                        m.index(r, 0, dummy_index))
                    if form == 'DW_FORM_section_offset':
                        assert details is not None
Пример #3
0
 def get_attribute_details(self, index):
     row = index.row()
     if row >= self.meta_count:
         row -= self.meta_count
         key = self.keys[row]
         attr = self.attributes[key]
         form = attr.form
         if key == "DW_AT_ranges":
             di = self.die.dwarfinfo
             if not di._ranges:
                 di._ranges = di.range_lists()
             if not di._ranges: # Absent in the DWARF file
                 return None
             ranges = di._ranges.get_range_list_at_offset(attr.value)
             # TODO: handle base addresses. Never seen those so far...
             cu_base = get_cu_base(self.die)
             return GenericTableModel(("Start offset", "End offset"),
                 ((hex(cu_base + r.begin_offset), hex(cu_base + r.end_offset)) for r in ranges))
         elif LocationParser.attribute_has_location(attr, self.die.cu['version']):
             # Expression is a list of ints
             ll = self.parse_location(attr)
             if isinstance(ll, LocationExpr):
                 return GenericTableModel(("Command",), ((cmd,) for cmd in self.dump_expr(ll.loc_expr)))
             else:
                 cu_base = get_cu_base(self.die)
                 if self.lowlevel:
                     headers = ("Start offset", "End offset", "Expr bytes", "Expression")
                     values = ((hex(cu_base + l.begin_offset),
                         hex(cu_base + l.end_offset),
                         ' '.join("%02x" % b for b in l.loc_expr),
                         '; '.join(self.dump_expr(l.loc_expr))) for l in ll)
                 else:
                     headers = ("Start offset", "End offset", "Expression")
                     values = ((hex(cu_base + l.begin_offset), hex(cu_base + l.end_offset), '; '.join(self.dump_expr(l.loc_expr))) for l in ll)
                 return GenericTableModel(headers, values)
         elif key == 'DW_AT_stmt_list':
             if self.die.cu._lineprogram is None:
                 self.die.cu._lineprogram = self.die.dwarfinfo.line_program_for_CU(self.die.cu)
             lpe = self.die.cu._lineprogram.get_entries()
             files = self.die.cu._lineprogram.header.file_entry
             def format_state(state):
                 return (hex(state.address),
                     files[state.file-1].name.decode('utf-8', errors='ignore') if state.file > 0 else '(N/A)',
                     state.line,
                     'Y' if state.is_stmt  else '',
                     'Y' if state.basic_block else '',
                     'Y' if state.end_sequence else '',
                     'Y' if state.prologue_end else '',
                     'Y' if state.epilogue_begin else '')
             states = [format_state(e.state) for e in lpe if e.state]
             # TODO: low level flavor with extra details
             # TODO: commands vs states
             return GenericTableModel(('Address', 'File', 'Line', 'Stmt', 'Basic block', 'End seq', 'End prologue', 'Begin epilogue'), states)
     return None
 def _test_file(self, filename):
     filepath = os.path.join('test', 'testfiles_for_unittests', filename)
     with open(filepath, 'rb') as f:
         elffile = ELFFile(f)
         dwarfinfo = elffile.get_dwarf_info()
         locparser = LocationParser(dwarfinfo.location_lists())
         for CU in dwarfinfo.iter_CUs():
             ver = CU['version']
             for DIE in CU.iter_DIEs():
                 for key in DIE.attributes:
                     attr = DIE.attributes[key]
                     if LocationParser.attribute_has_location(attr, ver):
                         # This will crash on unpatched library on DIE at 0x9f
                         locparser.parse_from_attribute(attr, ver)
Пример #5
0
def extract_string_variables(elf):
    """
    Find all string variables (char) in all Compilation Units and
    Debug information Entry (DIE) in ELF file.
    """
    dwarf_info = elf.get_dwarf_info()
    loc_lists = dwarf_info.location_lists()
    loc_parser = LocationParser(loc_lists)

    strings = []

    # Loop through all Compilation Units and
    # Debug information Entry (DIE) to extract all string variables
    for compile_unit in dwarf_info.iter_CUs():
        for die in compile_unit.iter_DIEs():
            # Only care about variables with location information
            # and of type "char"
            if die.tag == 'DW_TAG_variable':
                if ('DW_AT_type' in die.attributes
                        and 'DW_AT_location' in die.attributes
                        and is_die_var_const_char(compile_unit, die)):
                    # Extract location information, which is
                    # its address in memory.
                    loc_attr = die.attributes['DW_AT_location']
                    if loc_parser.attribute_has_location(
                            loc_attr, die.cu['version']):
                        loc = loc_parser.parse_from_attribute(
                            loc_attr, die.cu['version'])
                        if isinstance(loc, LocationExpr):
                            try:
                                addr = describe_DWARF_expr(
                                    loc.loc_expr, dwarf_info.structs)

                                matcher = DT_LOCATION_REGEX.match(addr)
                                if matcher:
                                    addr = int(matcher.group(1), 16)
                                    if addr > 0:
                                        strings.append({
                                            'name':
                                            die.attributes['DW_AT_name'].value,
                                            'addr':
                                            addr,
                                            'die':
                                            die
                                        })
                            except KeyError:
                                pass

    return strings
Пример #6
0
 def _test_file(self, filename):
     filepath = os.path.join('test', 'testfiles_for_unittests', filename)
     with open(filepath, 'rb') as f:
         elffile = ELFFile(f)
         dwarfinfo = elffile.get_dwarf_info()
         for CU in dwarfinfo.iter_CUs():
             ver = CU['version']
             for DIE in CU.iter_DIEs():
                 for key in DIE.attributes:
                     attr = DIE.attributes[key]
                     if attr.form == 'DW_FORM_exprloc':
                         self.assertTrue(
                             LocationParser.attribute_has_location(
                                 attr, CU['version']),
                             "Attribute %s not recognized as a location" %
                             key)
Пример #7
0
 def format_value(self, attr):
     key = attr.name
     val = attr.value
     form = attr.form
     if form == 'DW_FORM_addr' and isinstance(val, int):
         return hex(val)
     elif form == 'DW_FORM_flag_present':
         return ''
     elif form in ('DW_FORM_ref0', 'DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref_addr'):
         return "Ref: 0x%x" % val # There are several other reference forms in the spec
     elif LocationParser.attribute_has_location(attr, self.die.cu['version']):
         ll = self.parse_location(attr)
         if isinstance(ll, LocationExpr):
             return '; '.join(self.dump_expr(ll.loc_expr))
         else:
             return "Loc list: 0x%x" % attr.value
     elif key == 'DW_AT_language':
         return "%d %s" % (val, _DESCR_DW_LANG[val]) if val in _DESCR_DW_LANG else val
     elif key == 'DW_AT_encoding':
         return "%d %s" % (val, _DESCR_DW_ATE[val]) if val in _DESCR_DW_ATE else val
     elif key == 'DW_AT_accessibility':
         return "%d %s" % (val, _DESCR_DW_ACCESS[val]) if val in _DESCR_DW_ACCESS else val
     elif key == 'DW_AT_inline':
         return "%d %s" % (val, _DESCR_DW_INL[val]) if val in _DESCR_DW_INL else val
     elif key == 'DW_AT_decl_file':
         if self.die.cu._lineprogram is None:
             self.die.cu._lineprogram = self.die.dwarfinfo.line_program_for_CU(self.die.cu)
         return "%d: %s" % (val, self.die.cu._lineprogram.header.file_entry[val-1].name.decode('utf-8', errors='ignore')) if val > 0 else "0: (N/A)"
     elif key == 'DW_AT_stmt_list':
         return 'LNP at 0x%x' % val
     elif isinstance(val, bytes):
         if form in ('DW_FORM_strp', 'DW_FORM_string'):
             return val.decode('utf-8', errors='ignore')
         elif val == b'': # What's a good value for a blank blob?
             return '[]'
         else:
             return ' '.join("%02x" % b for b in val) # Something like "01 ff 33 55"
     elif isinstance(val, list): # block1 comes across as this
         if val == []:
             return '[]'
         elif isinstance(val[0], int): # Assuming it's a byte array diguised as int array
             return ' '.join("%02x" % b for b in val)
         else: # List of something else
             return str(val)
     else:
         return hex(val) if self.hex and isinstance(val, int) else str(val)
Пример #8
0
class ElfDwarf:
    """
    :elf_file_path(must): the path of elf file
    :addr2type_map: map address to type
    :base_type_map: map id to type 
    """
    def __init__(self, elf_file_path, inputfile, resultdir):
        self.elf_file_path = elf_file_path
        self.result_file_path = self.elf_file_path + ".type"
        self.inputfile = inputfile
        self.resultdir = resultdir
        # To save the basic information.
        self.base_type_map = {}
        self.addr2type_map = {}
        self.CU_TYPE = None
        # self.compile_unit_base_types = {}
        # self.compile_unit_const_types = {}
        # self.compile_unit_pointer_types = {}
        # self.compile_unit_enumeration_types = {}
        # self.compile_unit_union_types = {}
        # self.compile_unit_array_types = {}
        # self.compile_unit_subrange_types = {}
        # self.compile_unit_structure_types = {}
        # self.compile_unit_typedef_types = {}
        self.functions = []
        self.global_var = []

        # To support extract the dwarf
        self.loc_parser = None
        self.CU = None
        self.dwarfinfo = None
        #
        print('Processing file:', self.elf_file_path)
        with open(self.elf_file_path, 'rb') as f:
            elffile = ELFFile(f)

            if not elffile.has_dwarf_info():
                print('  file has no DWARF info')
                return

            # get_dwarf_info returns a DWARFInfo context object, which is the
            # starting point for all DWARF-based processing in pyelftools.
            self.dwarfinfo = elffile.get_dwarf_info()

            # The location lists are extracted by DWARFInfo from the .debug_loc
            # section, and returned here as a LocationLists object.
            location_lists = self.dwarfinfo.location_lists()

            # This is required for the descriptions module to correctly decode
            # register names contained in DWARF expressions.
            set_global_machine_arch(elffile.get_machine_arch())

            # Create a LocationParser object that parses the DIE attributes and
            # creates objects representing the actual location information.
            self.loc_parser = LocationParser(location_lists)

            for CU in self.dwarfinfo.iter_CUs():
                # DWARFInfo allows to iterate over the compile units contained in
                # the .debug_info section. CU is a CompileUnit object, with some
                # computed attributes (such as its offset in the section) and
                # a header which conforms to the DWARF standard. The access to
                # header elements is, as usual, via item-lookup.
                print('  Found a compile unit at offset %s, length %s' %
                      (CU.cu_offset, CU['unit_length']))
                self.CU = CU
                self.CU_TYPE = get_compile_unit_types(self.CU)
                # print(self.CU_TYPE)

                # A CU provides a simple API to iterate over all the DIEs in it.
                for die in CU.iter_DIEs():
                    # Go over all attributes of the DIE. Each attribute is an
                    # AttributeValue object (from elftools.dwarf.die), which we
                    # can examine.
                    if die.tag == 'DW_TAG_subprogram':
                        print("")
                        self.process_subprogram(die)
                    elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes:
                        print("")
                        self.process_global_var(die)

            pincmd = [
                '../pin/pin', '-t',
                '../TaintAnalysisWithPin/obj-ia32/taint.so', '--',
                elf_file_path
            ]
            print(pincmd)
            result = self.resultdir + '/' + elf_file_path.split(
                b'/')[-1] + ".out"
            print(result)
            try:
                # trace = subprocess.check_output(pincmd)
                trace = ""
                if inputfile != None:
                    print("fead input")
                    process = subprocess.Popen(pincmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE)
                    with open(self.inputfile, 'rb') as inputfile:
                        for line in inputfile.readlines():
                            print("Give Std Input:%s" % line)
                            process.stdin.write(line)
                    trace = process.communicate()[0]
                    process.stdin.close()
                else:
                    print("running without input")
                    trace = subprocess.check_output(pincmd)
                tracelist = loadtrace(trace)
                extractfromtrace(tracelist, self.global_var, self.functions,
                                 result)

            except subprocess.CalledProcessError as e:
                print("run pin error(%s)")
                # return None

    def show_loclist(self, loclist, dwarfinfo, indent):
        """ Display a location list nicely, decoding the DWARF expressions
            contained within.
        """
        d = []
        for loc_entity in loclist:
            if isinstance(loc_entity, LocationEntry):
                d.append('%s <<%s>>' %
                         (loc_entity,
                          describe_DWARF_expr(loc_entity.loc_expr,
                                              dwarfinfo.structs)))
            else:
                d.append(str(loc_entity))
        return '\n'.join(indent + s for s in d)

    def process_global_var(self, DIE):
        self.global_var.append({})
        try:

            self.global_var[-1]["name"] = DIE.attributes['DW_AT_name'].value
        except KeyError:
            #print "DIE has no attribute 'DW_AT_name'"
            self.global_var[-1]["name"] = None

        variable_size, variable_type_name = get_variable_size_and_name(
            DIE, self.CU, self.CU_TYPE)
        # print(" name:%s, size:%d, type_name:%s" % (self.global_var[-1]["name"],variable_size, variable_type_name))
        if variable_size != None and variable_type_name != None:
            self.global_var[-1]["size"] = variable_size
            self.global_var[-1]["type_name"] = variable_type_name
        else:
            self.global_var.pop()
            return

        for attr in itervalues(DIE.attributes):
            # Check if this attribute contains location information
            # pdb.set_trace()
            if self.loc_parser.attribute_has_location(attr,
                                                      self.CU['version']):
                # print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                loc = self.loc_parser.parse_from_attribute(
                    attr, self.CU['version'])
                # We either get a list (in case the attribute is a
                # reference to the .debug_loc section) or a LocationExpr
                # object (in case the attribute itself contains location
                # information).
                if isinstance(loc, LocationExpr):
                    dwarf_expr_dumper = extract_DWARF_expr(
                        loc.loc_expr, self.dwarfinfo.structs)
                    exp_info = dwarf_expr_dumper._str_parts
                    for item in exp_info:
                        baseregister = item[0:item.find(':')]
                        offset = int(item[item.find(':') + 2:], 16)
                        print("%s:%s:%d:%s" %
                              (self.global_var[-1]["name"], baseregister,
                               offset, self.global_var[-1]["type_name"]))
                        self.global_var[-1]["offset"] = offset
                        self.global_var[-1]["breg"] = baseregister
                        return
        self.global_var.pop()
        return

    def process_subprogram(self, subprogram_die):

        # Print name, start_address and DW_AT_frame_base of the current function
        # print(subprogram_die)
        self.functions.append({})
        if 'DW_AT_name' in subprogram_die.attributes:
            self.functions[-1]["name"] = subprogram_die.attributes[
                'DW_AT_name'].value
        else:
            print("Does not find function name")
            self.functions[-1]["name"] = None
        print("function name")
        print(self.functions[-1]["name"])
        # try:
        #     dw_at_frame_base = subprogram_die.attributes['DW_AT_frame_base']
        # except:
        #     # I am not sure if every subprogram has a DW_AT_frame_base
        #     print(subprogram_die)
        #     print("subprogram [%s]  has no a DW_AT_frame_base (and thus no stack variables (?)). Skipping." % self.functions[-1]['name'])
        #     self.functions.pop()
        #     return

        if subprogram_die.has_children:
            #print "subprogram [%s] has children!" % self.functions[-1]['name']
            self.functions[-1]["stack_variables"] = []

            # Print names of all variables that are children of the current DIE (the current function)
            for child in subprogram_die.iter_children():
                if child.tag == 'DW_TAG_variable' or child.tag == 'DW_TAG_formal_parameter':
                    self.process_subprogram_variable(child)

    def process_subprogram_variable(self, DIE):
        if self.functions[-1].get("stack_variables") is None:
            return
        self.functions[-1]["stack_variables"].append({})
        try:
            self.functions[-1]["stack_variables"][-1]["name"] = DIE.attributes[
                'DW_AT_name'].value
        except KeyError:
            #print "subprogram_variable_die has no attribute 'DW_AT_name'"
            self.functions[-1]["stack_variables"][-1]["name"] = None

        variable_size, variable_type_name = get_variable_size_and_name(
            DIE, self.CU, self.CU_TYPE)
        if variable_size != None and variable_type_name != None:
            self.functions[-1]["stack_variables"][-1]["size"] = variable_size
            self.functions[-1]["stack_variables"][-1][
                "type_name"] = variable_type_name
        else:
            self.functions[-1]["stack_variables"].pop()
            return
        for attr in itervalues(DIE.attributes):
            # Check if this attribute contains location information
            # pdb.set_trace()
            if self.loc_parser.attribute_has_location(attr,
                                                      self.CU['version']):
                # print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                loc = self.loc_parser.parse_from_attribute(
                    attr, self.CU['version'])
                # We either get a list (in case the attribute is a
                # reference to the .debug_loc section) or a LocationExpr
                # object (in case the attribute itself contains location
                # information).
                if isinstance(loc, LocationExpr):
                    dwarf_expr_dumper = extract_DWARF_expr(
                        loc.loc_expr, self.dwarfinfo.structs)
                    exp_info = dwarf_expr_dumper._str_parts
                    for item in exp_info:
                        print(item)
                        index = item.find(':')
                        if index == -1:
                            continue
                        baseregister = item[0:item.find(':')]
                        if baseregister == "DW_OP_addr":
                            offset = int(item[item.find(':') + 1:], 16)
                            print(
                                "%s:%s:%s:%d:%s" %
                                (self.functions[-1]["name"], self.functions[-1]
                                 ["stack_variables"][-1]["name"], baseregister,
                                 offset, self.functions[-1]["stack_variables"]
                                 [-1]["type_name"]))
                            self.functions[-1]["stack_variables"][-1][
                                "offset"] = offset
                            self.functions[-1]["stack_variables"][-1][
                                "breg"] = baseregister
                        else:
                            offset = int(item[item.find(':') + 1:])
                            print(
                                "%s:%s:%s:%d:%s" %
                                (self.functions[-1]["name"], self.functions[-1]
                                 ["stack_variables"][-1]["name"], baseregister,
                                 offset, self.functions[-1]["stack_variables"]
                                 [-1]["type_name"]))
                            self.functions[-1]["stack_variables"][-1][
                                "offset"] = offset
                            self.functions[-1]["stack_variables"][-1][
                                "breg"] = baseregister

        if "breg" not in self.functions[-1]["stack_variables"][-1]:
            self.functions[-1]["stack_variables"].pop()