Пример #1
0
def test_disasm_against_objdump(objdump_path, binary_path):
    # TODO: code repetition from test_disasm_standalone, encapsulate inner functionality.

    start_time = time.time()
    total_inst = 0
    match_inst = 0

    print(('Processing file:', binary_path))
    elf_file = ELFFile(open(binary_path, 'rb'))

    if elf_file.num_segments() == 0:
        print('There are no program headers in this file.')
        return

    objdump = ObjdumpWrapper(objdump_path)
    disasm = HexagonDisassembler(objdump_compatible=True)

    for segment in elf_file.iter_segments():
        if segment['p_flags'] & P_FLAGS.PF_X:

            print("Offset: {:x}".format(segment['p_offset']))
            print("VirtAddr: {:x}".format(segment['p_vaddr']))
            print("FileSiz: {:x}".format(segment['p_filesz']))

            segment_data = segment.data()
            data_pos = 0

            while data_pos + INST_SIZE <= len(segment_data):

                addr = segment['p_vaddr'] + data_pos

                inst_as_int = struct.unpack(
                    '<I', segment_data[data_pos:data_pos + 4])[0]

                disasm_output = disasm.disasm_one_inst(inst_as_int,
                                                       addr).text.strip()

                objdump_output = objdump.disasm_packet_raw(
                    segment_data[data_pos:min(data_pos + 4 * 4, segment_data)],
                    addr).strip()

                if (objdump_output != disasm_output):
                    print("[{:08x}] {:s}".format(addr, objdump_output))
                    print("[{:08x}] {:s}".format(addr, disasm_output))
                    print()
                else:
                    match_inst += 1

                data_pos += 4
                total_inst += 1

    elapsed_time = time.time() - start_time

    print("Elapsed time: {0:.2f}".format(elapsed_time))
    print('Match: {0:.2f}%'.format(match_inst / total_inst * 100))
Пример #2
0
def test_disasm_against_objdump(objdump_path, binary_path):
    # TODO: code repetition from test_disasm_standalone, encapsulate inner functionality.

    start_time = time.time()
    total_inst = 0
    match_inst = 0

    print(('Processing file:', binary_path))
    elf_file = ELFFile(open(binary_path, 'rb'))

    if elf_file.num_segments() == 0:
        print('There are no program headers in this file.')
        return

    objdump = ObjdumpWrapper(objdump_path)
    disasm = HexagonDisassembler(objdump_compatible=True)

    for segment in elf_file.iter_segments():
        if segment['p_flags'] & P_FLAGS.PF_X:

            print("Offset: {:x}".format(segment['p_offset']))
            print("VirtAddr: {:x}".format(segment['p_vaddr']))
            print("FileSiz: {:x}".format(segment['p_filesz']))

            segment_data = segment.data()
            data_pos = 0

            while data_pos + INST_SIZE <= len(segment_data):

                addr = segment['p_vaddr'] + data_pos

                inst_as_int = struct.unpack('<I', segment_data[data_pos: data_pos + 4])[0]

                disasm_output = disasm.disasm_one_inst(inst_as_int, addr).text.strip()

                objdump_output = objdump.disasm_packet_raw(
                    segment_data[data_pos: min(data_pos + 4 * 4, segment_data)],
                    addr).strip()

                if (objdump_output != disasm_output):
                    print("[{:08x}] {:s}".format(addr, objdump_output))
                    print("[{:08x}] {:s}".format(addr, disasm_output))
                    print()
                else:
                    match_inst += 1

                data_pos += 4
                total_inst += 1

    elapsed_time = time.time() - start_time

    print("Elapsed time: {0:.2f}".format(elapsed_time))
    print('Match: {0:.2f}%'.format(match_inst / total_inst * 100))
Пример #3
0
def test_disasm_standalone(binary_path, timeout=None):

    profile = cProfile.Profile()
    profile.enable()

    start_time = time.time()

    print(('Processing file:', binary_path))
    elf_file = ELFFile(open(binary_path, 'rb'))

    if elf_file.num_segments() == 0:
        print('There are no program headers in this file.')
        return

    disasm = HexagonDisassembler()

    total_inst = 0

    for segment in elf_file.iter_segments():
        if segment['p_flags'] & P_FLAGS.PF_X:
            print("Offset: {:x}".format(segment['p_offset']))
            print("VirtAddr: {:x}".format(segment['p_vaddr']))
            print("FileSiz: {:x}".format(segment['p_filesz']))

            segment_data = segment.data()
            data_pos = 0

            while data_pos + INST_SIZE <= len(segment_data):

                addr = segment['p_vaddr'] + data_pos

                inst_as_int = struct.unpack(
                    '<I', segment_data[data_pos:data_pos + 4])[0]

                dis = disasm.disasm_one_inst(inst_as_int, addr)
                print("[{:08x}] {:s}".format(addr, dis.text))

                data_pos += 4
                total_inst += 1

                if timeout and (time.time() - start_time) > timeout:
                    break

    profile.disable()
    prof_stats = pstats.Stats(profile)
    prof_stats.strip_dirs().sort_stats('cumulative').print_stats(20)

    print("Total instructions: " + str(total_inst))
    elapsed_time = time.time() - start_time
    print("Elapsed time: " + str(elapsed_time))
Пример #4
0
def test_disasm_standalone(binary_path, timeout = None):

    profile = cProfile.Profile()
    profile.enable()

    start_time = time.time()

    print(('Processing file:', binary_path))
    elf_file = ELFFile(open(binary_path, 'rb'))

    if elf_file.num_segments() == 0:
        print('There are no program headers in this file.')
        return

    disasm = HexagonDisassembler()

    total_inst = 0

    for segment in elf_file.iter_segments():
        if segment['p_flags'] & P_FLAGS.PF_X:
            print("Offset: {:x}".format(segment['p_offset']))
            print("VirtAddr: {:x}".format(segment['p_vaddr']))
            print("FileSiz: {:x}".format(segment['p_filesz']))

            segment_data = segment.data()
            data_pos = 0

            while data_pos + INST_SIZE <= len(segment_data):

                addr = segment['p_vaddr'] + data_pos

                inst_as_int = struct.unpack('<I', segment_data[data_pos: data_pos + 4])[0]

                dis = disasm.disasm_one_inst(inst_as_int, addr)
                print("[{:08x}] {:s}".format(addr, dis.text))

                data_pos += 4
                total_inst += 1

                if timeout and (time.time() - start_time) > timeout:
                    break

    profile.disable()
    prof_stats = pstats.Stats(profile)
    prof_stats.strip_dirs().sort_stats('cumulative').print_stats(20)

    print("Total instructions: " + str(total_inst))
    elapsed_time = time.time() - start_time
    print("Elapsed time: " + str(elapsed_time))
Пример #5
0
    def __init__(self):
        idaapi.processor_t.__init__(self)

        # TODO: logging not working.
        # self.work_folder = ""
        # self.log_fn = self.work_folder + 'work.log'
        # logging.basicConfig(filename=self.log_fn, level=logging.DEBUG, filemode='w')
        # self.logger = open(self.log_fn, 'w')

        self.relocatable_file = re.search(r'\.o$', GetInputFile()) != None

        self.init_instructions()
        self.prev_addr_analyzed = -1
        self.current_hex_packet = None
        self.hd = HexagonDisassembler()
        # TODO: this should be instatiated on demand, because I think the init is called every time IDA starts
        self.disasm_cache = {}
        # TODO: use orderdict to remove old entries

        self.profiler = cProfile.Profile()
        hexagondisasm.profiler = self.profiler
Пример #6
0
def bin_func_find(base_file_name, target_file_name, function_name_adr,
                  max_results):
    """
    converts a list of offsets in an ELF file to memory addresses

    :param base_file_name:      name of the base input file (ELF or object file)
    :param target_file_name:    name of the target ELF file
    :param func_name_adr:       address OR name of the function of interest
    :param max_results:         maximum number of results to return, if the search could not determine less matches an empty array is returned
    """
    # check input files
    reference_file = open(base_file_name, 'rb')
    search_file = open(target_file_name, 'rb')
    if (not is_elf(reference_file) or not is_elf(search_file)):
        print "error: at least one of the input files is not an ELF file"
        return -1

    ref_offset = get_offset_in_reference_file(base_file_name,
                                              function_name_adr)

    if (ref_offset == 0):
        print "error: could not find function in base file"
        return -2

    # determine target file length, read it and parse its metadata
    search_file.seek(0, os.SEEK_END)
    search_file_length = search_file.tell()
    search_file_length -= search_file_length % 4
    search_file.seek(0)
    search_file_data = struct.unpack("<%dI" % (search_file_length / 4),
                                     search_file.read(search_file_length))
    search_file.seek(0)
    search_file_metadata = parse_metadata(search_file)
    search_file.close()

    # determine base file length
    reference_file.seek(0, os.SEEK_END)
    max_size = reference_file.tell() - ref_offset

    min_size = 2
    cur_size = 2

    print "searching for function %s" % function_name_adr

    disasm = HexagonDisassembler(objdump_compatible=True)

    # loop to search with different search string lengths in order to try to find a unique match
    while (True):
        print "current search string length: %d min length: %s max length: %s" % (
            cur_size, min_size, max_size)

        # generate search data and perform the search
        search_data, search_mask = generate_search_mask(
            reference_file, ref_offset, cur_size, disasm)
        res = find_function_in_target(search_file_data, search_data,
                                      search_mask, 2)
        num_res = len(res)

        if (num_res == 1):  # unique match: we are done!
            phys_adr = file_offset_to_elf(search_file_metadata, res)
            print "found function %s of base file at 0x%08X of target" % (
                function_name_adr, phys_adr[0])
            return phys_adr

        if (num_res > 1):  # multiple matches
            if (cur_size >= max_size
                ):  # stop if we reached the maximum size..we cannot get better
                print "could not identify function, multiple matches for maximal size"
                res = find_function_in_target(search_file_data, search_data,
                                              search_mask, max_results + 1)
                if (len(res) <= max_results):
                    return file_offset_to_elf(search_file_metadata, res)
                else:
                    return []
            # compute search data length for next iteration
            min_size = cur_size + 1
            cur_size *= 2
            if (cur_size > max_size):
                cur_size = max_size

        if (num_res == 0):  # no match
            if (
                    cur_size <= min_size
            ):  # stop if we reached minimal size, we cannot find a unique match
                print "could not identify function, no matches for minimal size"
                # regenerate last result with multiple macthes (cur_size-1=
                search_data, search_mask = generate_search_mask(
                    reference_file, ref_offset, cur_size - 1, disasm)
                res = find_function_in_target(search_file_data, search_data,
                                              search_mask, max_results + 1)
                if (len(res) <= max_results):
                    return file_offset_to_elf(search_file_metadata, res)
                else:
                    return []
            # compute search data length for next iteration
            max_size = cur_size - 1
            cur_size = ((max_size - min_size)) / 2 + min_size
Пример #7
0
def generate_function(org_func_name, org_func, symtab, base_elf, metadata,
                      func_symtab):
    """
    generates a helper function to call the original version of an overwritten firmware function
    
    :param org_func_name:   name of the new function
    :param org_func:        original function name, the one we replaced
    :param symtab:          symbol table
    :param base_elf:        base firmware ELF file
    :param metadata:        base firmware metadata
    :param func_symtab:     table of new symbols needed by generated functions
    """
    addr_str, ret_type, param_str = resolve_symbol_all(org_func, symtab)
    if (addr_str == '"unknown"'):
        print "error: trying to generate fw_org function for a function with unknown location"
        exit(1)
    address = int(addr_str, 0)

    # read first 5 instruction of destination function
    # we have to read 5 instructions in case the first packet contains only 1 instruction,
    # then the we have to relocate 2 packages (with up to 5 instructions in total)
    elf_pos, elf_blob = get_offset_in_elf(metadata, address)
    base_elf.seek(elf_pos)
    data = struct.unpack("<IIIII", base_elf.read(20))

    disasm = HexagonDisassembler(objdump_compatible=True)
    disasm0 = HexagonDisassembler(objdump_compatible=True)
    next_prefix = ""

    # generate function start text
    function_decl = "%s %s(%s)" % (ret_type, org_func_name, param_str)
    function_def = "%s {\n\tasm(\n" % function_decl
    function_decl += ";"

    # iterate over the four instructions fetched
    reloc_size = 0
    for pos in range(0, 5):
        reloc_size += 1
        # disassemble instruction with correct position
        hi = disasm.disasm_one_inst(data[pos], address + pos * 4)
        # disassemble instruction again with position 0 to check for PC relative immediates
        hi0 = disasm0.disasm_one_inst(data[pos], pos * 4)

        if (hi.immext is not None):
            if (hi.start_packet):
                next_prefix = "{ "
            continue

        disasm_output = hi.text.strip()
        disasm_output_hi0 = hi0.text.strip()
        # if we have a realtive immediate
        if (disasm_output != disasm_output_hi0):
            # loop over all immediates
            for pos_imm, imm in enumerate(hi.imm_ops):
                imm0 = hi0.imm_ops[pos_imm]
                # check for difference -> PC relative immediate
                if (imm0 != imm):
                    # generate a dummy symbol at the destination address
                    # and write our target relative to this, by this we
                    # force the compiler to generate a relocation for
                    # the immediate for us
                    dest_address = address + imm0.value
                    symbol = "sym_0x%X" % dest_address
                    func_symtab[symbol] = dest_address
                    rel_adr = "%s" % symbol
                    disasm_output = disasm_output.replace(
                        ("0x%X" % imm.value).lower(), rel_adr)

        function_def += '\t\t"%s%s\\n\\t" \n' % (next_prefix, disasm_output)
        next_prefix = ""

        if ((pos != 0) & (hi.end_packet)):
            break

    # generate a jump instruction to the start of remaining original function
    org_func_start = (address + (reloc_size << 2))
    symbol = "sym_0x%X" % org_func_start
    func_symtab[symbol] = org_func_start
    function_def += '\t\t"{ jump %s }"\n' % symbol
    function_def += "\t);\n}"
    return function_decl, function_def, func_symtab