def _test_file(self, filename):
     filepath = os.path.join('test', 'testfiles_for_unittests', filename)
     with open(filepath, 'rb') as f:
         elffile = ELFFile(f)
         dwarfinfo = elffile.get_dwarf_info()
         locparser = LocationParser(dwarfinfo.location_lists())
         for CU in dwarfinfo.iter_CUs():
             ver = CU['version']
             for DIE in CU.iter_DIEs():
                 for key in DIE.attributes:
                     attr = DIE.attributes[key]
                     if LocationParser.attribute_has_location(attr, ver):
                         # This will crash on unpatched library on DIE at 0x9f
                         locparser.parse_from_attribute(attr, ver)
Пример #2
0
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.
            print('  Found a compile unit at offset %s, length %s' %
                  (CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Go over all attributes of the DIE. Each attribute is an
                # AttributeValue object (from elftools.dwarf.die), which we
                # can examine.
                for attr in itervalues(DIE.attributes):
                    # Check if this attribute contains location information
                    # pdb.set_trace()
                    if loc_parser.attribute_has_location(attr, CU['version']):
                        var_name = DIE.attributes['DW_AT_name'].value
                        print(' Varname:%s' % (var_name))
                        print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                        loc = loc_parser.parse_from_attribute(
                            attr, CU['version'])
                        # We either get a list (in case the attribute is a
                        # reference to the .debug_loc section) or a LocationExpr
                        # object (in case the attribute itself contains location
                        # information).
                        if isinstance(loc, LocationExpr):
                            print('      %s' % (describe_DWARF_expr(
                                loc.loc_expr, dwarfinfo.structs)))
                        elif isinstance(loc, list):
                            print(show_loclist(loc, dwarfinfo,
                                               indent='      '))
Пример #3
0
def extract_string_variables(elf):
    """
    Find all string variables (char) in all Compilation Units and
    Debug information Entry (DIE) in ELF file.
    """
    dwarf_info = elf.get_dwarf_info()
    loc_lists = dwarf_info.location_lists()
    loc_parser = LocationParser(loc_lists)

    strings = []

    # Loop through all Compilation Units and
    # Debug information Entry (DIE) to extract all string variables
    for compile_unit in dwarf_info.iter_CUs():
        for die in compile_unit.iter_DIEs():
            # Only care about variables with location information
            # and of type "char"
            if die.tag == 'DW_TAG_variable':
                if ('DW_AT_type' in die.attributes
                        and 'DW_AT_location' in die.attributes
                        and is_die_var_const_char(compile_unit, die)):
                    # Extract location information, which is
                    # its address in memory.
                    loc_attr = die.attributes['DW_AT_location']
                    if loc_parser.attribute_has_location(
                            loc_attr, die.cu['version']):
                        loc = loc_parser.parse_from_attribute(
                            loc_attr, die.cu['version'])
                        if isinstance(loc, LocationExpr):
                            try:
                                addr = describe_DWARF_expr(
                                    loc.loc_expr, dwarf_info.structs)

                                matcher = DT_LOCATION_REGEX.match(addr)
                                if matcher:
                                    addr = int(matcher.group(1), 16)
                                    if addr > 0:
                                        strings.append({
                                            'name':
                                            die.attributes['DW_AT_name'].value,
                                            'addr':
                                            addr,
                                            'die':
                                            die
                                        })
                            except KeyError:
                                pass

    return strings
Пример #4
0
def get_var_offset(filename, function_name, var_name):
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()

        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)

        for CU in dwarfinfo.iter_CUs():
            # DWARFInfo allows to iterate over the compile units contained in
            # the .debug_info section. CU is a CompileUnit object, with some
            # computed attributes (such as its offset in the section) and
            # a header which conforms to the DWARF standard. The access to
            # header elements is, as usual, via item-lookup.

            # A CU provides a simple API to iterate over all the DIEs in it.
            for DIE in CU.iter_DIEs():
                # Find the function
                if DIE.tag == "DW_TAG_subprogram":
                    fname = ""
                    base = 0
                    for attr in itervalues(DIE.attributes):
                        if attr.name == "DW_AT_name":
                            fname = attr.value
                    if fname == function_name:
                        for CHILD in DIE.iter_children():
                            if CHILD.tag == "DW_TAG_variable" or CHILD.tag == "DW_TAG_formal_parameter":
                                right_name = False
                                location = 0
                                for attr in itervalues(CHILD.attributes):
                                    if attr.name == "DW_AT_name":
                                        if attr.value == var_name:
                                            right_name = True
                                    # Check if this attribute contains location information
                                    if attr.name == "DW_AT_location":
                                        loc = loc_parser.parse_from_attribute(attr,
                                                                              CU['version'])
                                        if isinstance(loc, LocationExpr):
                                            parser = DWARFExprParser(dwarfinfo.structs)
                                            parsed = parser.parse_expr(loc.loc_expr)
                                            for op in parsed:
                                                if op.op_name == 'DW_OP_fbreg':
                                                    location = op.args[0]
                                if right_name:
                                    return location
Пример #5
0
class ElfDwarf:
    """
    :elf_file_path(must): the path of elf file
    :addr2type_map: map address to type
    :base_type_map: map id to type 
    """
    def __init__(self, elf_file_path, inputfile, resultdir):
        self.elf_file_path = elf_file_path
        self.result_file_path = self.elf_file_path + ".type"
        self.inputfile = inputfile
        self.resultdir = resultdir
        # To save the basic information.
        self.base_type_map = {}
        self.addr2type_map = {}
        self.CU_TYPE = None
        # self.compile_unit_base_types = {}
        # self.compile_unit_const_types = {}
        # self.compile_unit_pointer_types = {}
        # self.compile_unit_enumeration_types = {}
        # self.compile_unit_union_types = {}
        # self.compile_unit_array_types = {}
        # self.compile_unit_subrange_types = {}
        # self.compile_unit_structure_types = {}
        # self.compile_unit_typedef_types = {}
        self.functions = []
        self.global_var = []

        # To support extract the dwarf
        self.loc_parser = None
        self.CU = None
        self.dwarfinfo = None
        #
        print('Processing file:', self.elf_file_path)
        with open(self.elf_file_path, 'rb') as f:
            elffile = ELFFile(f)

            if not elffile.has_dwarf_info():
                print('  file has no DWARF info')
                return

            # get_dwarf_info returns a DWARFInfo context object, which is the
            # starting point for all DWARF-based processing in pyelftools.
            self.dwarfinfo = elffile.get_dwarf_info()

            # The location lists are extracted by DWARFInfo from the .debug_loc
            # section, and returned here as a LocationLists object.
            location_lists = self.dwarfinfo.location_lists()

            # This is required for the descriptions module to correctly decode
            # register names contained in DWARF expressions.
            set_global_machine_arch(elffile.get_machine_arch())

            # Create a LocationParser object that parses the DIE attributes and
            # creates objects representing the actual location information.
            self.loc_parser = LocationParser(location_lists)

            for CU in self.dwarfinfo.iter_CUs():
                # DWARFInfo allows to iterate over the compile units contained in
                # the .debug_info section. CU is a CompileUnit object, with some
                # computed attributes (such as its offset in the section) and
                # a header which conforms to the DWARF standard. The access to
                # header elements is, as usual, via item-lookup.
                print('  Found a compile unit at offset %s, length %s' %
                      (CU.cu_offset, CU['unit_length']))
                self.CU = CU
                self.CU_TYPE = get_compile_unit_types(self.CU)
                # print(self.CU_TYPE)

                # A CU provides a simple API to iterate over all the DIEs in it.
                for die in CU.iter_DIEs():
                    # Go over all attributes of the DIE. Each attribute is an
                    # AttributeValue object (from elftools.dwarf.die), which we
                    # can examine.
                    if die.tag == 'DW_TAG_subprogram':
                        print("")
                        self.process_subprogram(die)
                    elif die.tag == 'DW_TAG_variable' and 'DW_AT_external' in die.attributes:
                        print("")
                        self.process_global_var(die)

            pincmd = [
                '../pin/pin', '-t',
                '../TaintAnalysisWithPin/obj-ia32/taint.so', '--',
                elf_file_path
            ]
            print(pincmd)
            result = self.resultdir + '/' + elf_file_path.split(
                b'/')[-1] + ".out"
            print(result)
            try:
                # trace = subprocess.check_output(pincmd)
                trace = ""
                if inputfile != None:
                    print("fead input")
                    process = subprocess.Popen(pincmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE)
                    with open(self.inputfile, 'rb') as inputfile:
                        for line in inputfile.readlines():
                            print("Give Std Input:%s" % line)
                            process.stdin.write(line)
                    trace = process.communicate()[0]
                    process.stdin.close()
                else:
                    print("running without input")
                    trace = subprocess.check_output(pincmd)
                tracelist = loadtrace(trace)
                extractfromtrace(tracelist, self.global_var, self.functions,
                                 result)

            except subprocess.CalledProcessError as e:
                print("run pin error(%s)")
                # return None

    def show_loclist(self, loclist, dwarfinfo, indent):
        """ Display a location list nicely, decoding the DWARF expressions
            contained within.
        """
        d = []
        for loc_entity in loclist:
            if isinstance(loc_entity, LocationEntry):
                d.append('%s <<%s>>' %
                         (loc_entity,
                          describe_DWARF_expr(loc_entity.loc_expr,
                                              dwarfinfo.structs)))
            else:
                d.append(str(loc_entity))
        return '\n'.join(indent + s for s in d)

    def process_global_var(self, DIE):
        self.global_var.append({})
        try:

            self.global_var[-1]["name"] = DIE.attributes['DW_AT_name'].value
        except KeyError:
            #print "DIE has no attribute 'DW_AT_name'"
            self.global_var[-1]["name"] = None

        variable_size, variable_type_name = get_variable_size_and_name(
            DIE, self.CU, self.CU_TYPE)
        # print(" name:%s, size:%d, type_name:%s" % (self.global_var[-1]["name"],variable_size, variable_type_name))
        if variable_size != None and variable_type_name != None:
            self.global_var[-1]["size"] = variable_size
            self.global_var[-1]["type_name"] = variable_type_name
        else:
            self.global_var.pop()
            return

        for attr in itervalues(DIE.attributes):
            # Check if this attribute contains location information
            # pdb.set_trace()
            if self.loc_parser.attribute_has_location(attr,
                                                      self.CU['version']):
                # print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                loc = self.loc_parser.parse_from_attribute(
                    attr, self.CU['version'])
                # We either get a list (in case the attribute is a
                # reference to the .debug_loc section) or a LocationExpr
                # object (in case the attribute itself contains location
                # information).
                if isinstance(loc, LocationExpr):
                    dwarf_expr_dumper = extract_DWARF_expr(
                        loc.loc_expr, self.dwarfinfo.structs)
                    exp_info = dwarf_expr_dumper._str_parts
                    for item in exp_info:
                        baseregister = item[0:item.find(':')]
                        offset = int(item[item.find(':') + 2:], 16)
                        print("%s:%s:%d:%s" %
                              (self.global_var[-1]["name"], baseregister,
                               offset, self.global_var[-1]["type_name"]))
                        self.global_var[-1]["offset"] = offset
                        self.global_var[-1]["breg"] = baseregister
                        return
        self.global_var.pop()
        return

    def process_subprogram(self, subprogram_die):

        # Print name, start_address and DW_AT_frame_base of the current function
        # print(subprogram_die)
        self.functions.append({})
        if 'DW_AT_name' in subprogram_die.attributes:
            self.functions[-1]["name"] = subprogram_die.attributes[
                'DW_AT_name'].value
        else:
            print("Does not find function name")
            self.functions[-1]["name"] = None
        print("function name")
        print(self.functions[-1]["name"])
        # try:
        #     dw_at_frame_base = subprogram_die.attributes['DW_AT_frame_base']
        # except:
        #     # I am not sure if every subprogram has a DW_AT_frame_base
        #     print(subprogram_die)
        #     print("subprogram [%s]  has no a DW_AT_frame_base (and thus no stack variables (?)). Skipping." % self.functions[-1]['name'])
        #     self.functions.pop()
        #     return

        if subprogram_die.has_children:
            #print "subprogram [%s] has children!" % self.functions[-1]['name']
            self.functions[-1]["stack_variables"] = []

            # Print names of all variables that are children of the current DIE (the current function)
            for child in subprogram_die.iter_children():
                if child.tag == 'DW_TAG_variable' or child.tag == 'DW_TAG_formal_parameter':
                    self.process_subprogram_variable(child)

    def process_subprogram_variable(self, DIE):
        if self.functions[-1].get("stack_variables") is None:
            return
        self.functions[-1]["stack_variables"].append({})
        try:
            self.functions[-1]["stack_variables"][-1]["name"] = DIE.attributes[
                'DW_AT_name'].value
        except KeyError:
            #print "subprogram_variable_die has no attribute 'DW_AT_name'"
            self.functions[-1]["stack_variables"][-1]["name"] = None

        variable_size, variable_type_name = get_variable_size_and_name(
            DIE, self.CU, self.CU_TYPE)
        if variable_size != None and variable_type_name != None:
            self.functions[-1]["stack_variables"][-1]["size"] = variable_size
            self.functions[-1]["stack_variables"][-1][
                "type_name"] = variable_type_name
        else:
            self.functions[-1]["stack_variables"].pop()
            return
        for attr in itervalues(DIE.attributes):
            # Check if this attribute contains location information
            # pdb.set_trace()
            if self.loc_parser.attribute_has_location(attr,
                                                      self.CU['version']):
                # print('   DIE %s. attr %s.' % (DIE.tag, attr.name))
                loc = self.loc_parser.parse_from_attribute(
                    attr, self.CU['version'])
                # We either get a list (in case the attribute is a
                # reference to the .debug_loc section) or a LocationExpr
                # object (in case the attribute itself contains location
                # information).
                if isinstance(loc, LocationExpr):
                    dwarf_expr_dumper = extract_DWARF_expr(
                        loc.loc_expr, self.dwarfinfo.structs)
                    exp_info = dwarf_expr_dumper._str_parts
                    for item in exp_info:
                        print(item)
                        index = item.find(':')
                        if index == -1:
                            continue
                        baseregister = item[0:item.find(':')]
                        if baseregister == "DW_OP_addr":
                            offset = int(item[item.find(':') + 1:], 16)
                            print(
                                "%s:%s:%s:%d:%s" %
                                (self.functions[-1]["name"], self.functions[-1]
                                 ["stack_variables"][-1]["name"], baseregister,
                                 offset, self.functions[-1]["stack_variables"]
                                 [-1]["type_name"]))
                            self.functions[-1]["stack_variables"][-1][
                                "offset"] = offset
                            self.functions[-1]["stack_variables"][-1][
                                "breg"] = baseregister
                        else:
                            offset = int(item[item.find(':') + 1:])
                            print(
                                "%s:%s:%s:%d:%s" %
                                (self.functions[-1]["name"], self.functions[-1]
                                 ["stack_variables"][-1]["name"], baseregister,
                                 offset, self.functions[-1]["stack_variables"]
                                 [-1]["type_name"]))
                            self.functions[-1]["stack_variables"][-1][
                                "offset"] = offset
                            self.functions[-1]["stack_variables"][-1][
                                "breg"] = baseregister

        if "breg" not in self.functions[-1]["stack_variables"][-1]:
            self.functions[-1]["stack_variables"].pop()