Exemplo n.º 1
0
    def adjust_instruction_layout(self):
        """
        Adjusts the binary layout that has changed due to the address
        and the relatively operand of the instruction being changed during
        the instrumenting.
        """
        # instructions = self.Disassembler.getDisassembleList()
        instructions = self.get_instructions()
        if not self.pe_manager.is_possible_relocation():
            print("Not Support PE without relocation, yet.")
            exit()
        self._save_instruction_log()
        self.log = LoggerFactory().get_new_logger("AdjustDirectBranches.log")
        for instAddress, instruction in instructions:
            if ((not self.disassembler.is_indirect_branch(instruction)) and
                (self.disassembler.is_branch(instruction)
                 or self.disassembler.is_relative_branch(instruction))):
                self.adjust_direct_branches(instruction)
            if instruction.mnemonic.startswith('prefetch'):
                self.adjust_registers_instruction_operand(instruction)
        self.log.fin()

        if self._is_instrument_overflow_occurred():
            overflowed_inst_handled = self.handle_overflowed_instrument()
            if overflowed_inst_handled:
                self.adjust_instruction_layout()
            else:
                print("ERROR WHILE HANDLE OVERFLOW")
                exit()
Exemplo n.º 2
0
 def _save_instruction_log(self):
     self.log = LoggerFactory().get_new_logger("final_instructions.log")
     instructions = self.get_instructions()
     for address, inst in instructions:
         self.log.log("[0x{:04x}]\t{}\t{}\n".format(
             address + self.pe_manager.get_image_base() + 0x1000,
             inst.mnemonic, inst.op_str))
Exemplo n.º 3
0
    def __init__(self, _code_manager):
        self.code_manager = _code_manager
        self.instructions_dict = {}
        self.instructions_list = []
        self._code_need_handled = True
        self._instructions_list_need_handled = True
        self._instruction_dict_need_handled = True
        self.Logger = LoggerFactory()

        # initiation disassembler
        self.disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
        self.disassembler.skipdata = True
        self.disassembler.detail = True
Exemplo n.º 4
0
    def merge_adjust_pos_with_prev(self, src_adjust_dict, dst_adjust_dict):
        """
        Merging previous adjust map with later adjust map

        Args:
            src_adjust_dict(dict): previous adjust map.
            dst_adjust_dict(dict): later adjust map.

        Returns:
            :obj:`dict` : adjusted instrumented map.
        """
        self.log = LoggerFactory().get_new_logger("AdjustingMerge.log")
        adjusted_dict = {}
        sorted_src_adjust_dict = sorted(src_adjust_dict.items(),
                                        key=operator.itemgetter(0))
        sorted_dst_adjust_dict = sorted(dst_adjust_dict.items(),
                                        key=operator.itemgetter(0))
        # ready
        dst_index = 0
        instrumented_size_by_dst = 0
        # initialize destiny
        dst_instrumented_address, dst_instrumented_size = \
            sorted_dst_adjust_dict[dst_index]
        instrument_size_of_dst_address = dst_instrumented_size
        src_total_instrument_size = 0

        self.log.log("[instrument address]\t[instrument size]\t"
                     "[adjusted address]\t[total instrumented size]\n")
        # source instrumented iterating
        for src_instrumented_address, src_instrumented_size \
                in sorted_src_adjust_dict:
            adjust_instrument_address = src_instrumented_address
            # increase source instrument address by destiny instrument size.
            adjust_instrument_address += instrumented_size_by_dst

            # this condition be true when reach next destiny instrument address.
            # so, append current instrument point and load next.
            while adjust_instrument_address > dst_instrumented_address \
                    and len(sorted_dst_adjust_dict) > dst_index:
                # save current status for append.
                current_adjust_address = dst_instrumented_address
                current_instrument_size = instrument_size_of_dst_address

                # get next element of destiny instrument info.
                dst_index += 1
                if len(sorted_dst_adjust_dict) > dst_index:
                    # load next instrument info.
                    dst_instrumented_address, dst_instrumented_size = \
                        sorted_dst_adjust_dict[dst_index]
                    # if same address is exist, then occurred bug.
                    if current_adjust_address in adjusted_dict:
                        self.log.log("[OVERLAPPING]\t")
                    # update current status.
                    # increase current source address, cause
                    adjust_instrument_address += dst_instrumented_size
                    instrumented_size_by_dst += current_instrument_size
                    instrument_size_of_dst_address = dst_instrumented_size
                # this is last instrument
                else:
                    current_adjust_address = dst_instrumented_address
                    current_instrument_size = instrument_size_of_dst_address
                    instrumented_size_by_dst += current_instrument_size
                    print("LAST INSTRUMENT")
                # append current destiny instrument info to adjust dict.
                adjusted_dict[current_adjust_address] = current_instrument_size

                # logging
                self.log.log(
                    "{:>20}\t{:>17}\t{:>18}\t{:>25}\t[OVERFLOW]\n".format(
                        hex(current_adjust_address),
                        hex(current_instrument_size),
                        hex(current_instrument_size),
                        hex(instrumented_size_by_dst +
                            src_total_instrument_size),
                    ))

            # logging
            self.log.log("{:>20}\t{:>17}\t{:>18}\t{:>25}\n".format(
                hex(src_instrumented_address - src_total_instrument_size),
                hex(src_instrumented_size), hex(adjust_instrument_address),
                hex(src_total_instrument_size + instrumented_size_by_dst)))
            src_total_instrument_size += src_instrumented_size
            adjusted_dict[adjust_instrument_address] = src_instrumented_size
        return adjusted_dict
Exemplo n.º 5
0
    def handle_overflowed_instrument(self):
        """
        extend the size of the operand if exceed the range of operand values
        while instrument.

        Note:
            The formula for determining the operand value of a branch
            instruction in x86:
            [Destination.Address - Instruction.Address - Instruction.size]

            in this case, the operand value overflowed while we adjust direct
            branches operands. that mean, 1 byte of operand size is too small
            for adjusted operand value. cause we expand operand size to 4byte.

            instruction size increase to 5byte or 6byte. according in formula of
            determining operand value, The keystone adjusts the operand value
            when it compiled.

            the keystone is based on the address at which the instruction ends,

            like this,
            ks.asm('jmp 140') = [233, 135, 0, 0, 0]

            but since the value we pass is based on the start address of the
            instruction, it corrects the value of operand in the case of a
            positive branch.

            In the case of a negative branch, the base address is the starting
            address of the instruction, so do not change it.

        Returns:
            bool : True if occurred overflow while instrumentation, False otherwise.
        """
        self.log = \
            LoggerFactory().get_new_logger("HandleOverflowInstrument.log")
        total_instrument_size = 0
        # self.instrument_map.clear()
        handled_overflowed_pos_dict = {}
        sorted_instrument = sorted(self.overflowed_instrument_dict.items(),
                                   key=operator.itemgetter(0))
        for index, \
            (instruction_address,
             (instruction,
              (operand_value, adjusted_operand_value, instrumented_size_till)
              )
             ) in enumerate(sorted_instrument):
            instruction_address += total_instrument_size
            print("[{}] overflowed instrument instruction : [0x{:x}] {:s}  "
                  "{} ========[{}]==========> {}".format(
                      index, instruction_address, instruction.mnemonic,
                      operand_value, instrumented_size_till,
                      adjusted_operand_value))

            self.log.log(
                "[0x{:x}] {:s} {}\t {} ========[{}]==========> {}\n".format(
                    instruction.address, instruction.mnemonic,
                    instruction.op_str, operand_value, instrumented_size_till,
                    adjusted_operand_value))

            # adding 2 is to change the base of operand value to the
            # start address of the instruction.
            code = "{:s} {}".format(instruction.mnemonic,
                                    adjusted_operand_value + 2)

            hex_code = binascii.hexlify(code).decode('hex')

            try:
                encoding, count = self.ks.asm(hex_code)
                for inst in self.cs.disasm(bytearray(encoding),
                                           instruction_address):
                    self.log.log("\t" + code + "\t" + inst.mnemonic + " " +
                                 inst.op_str + "\n")
                instrumented_size = len(encoding)
                if instrumented_size == 5:
                    if adjusted_operand_value > 0:
                        encoding[1] += 3
                    else:
                        encoding[1] += 0
                elif instrumented_size == 6:
                    if adjusted_operand_value > 0:
                        encoding[2] += 4
                    else:
                        encoding[2] += 0
                else:
                    print("ERROR")

                # patch
                self.code_manager.set_instruction_at_offset(
                    instruction_address,
                    instruction_address + instruction.size, encoding)
                # save increased opcode, operand size for adjust again
                increased_size = instrumented_size - instruction.size
                handled_overflowed_pos_dict[instruction_address] \
                    = increased_size
                total_instrument_size += increased_size
                self.log.log("\t\t{} : {:d}\n".format(encoding,
                                                      increased_size))
            except KsError as ex:
                print("ERROR : {}".format(ex))
                exit()

        self.save_instrument_history(self.instrument_pos_dict,
                                     handled_overflowed_pos_dict)
        self.current_instrument_pos_dict = handled_overflowed_pos_dict
        self.overflowed_instrument_dict.clear()
        self._instrument_overflow_handled()
        self.log.fin()
        return True
Exemplo n.º 6
0
class PEInstrument(object):
    def __init__(self, pe_manager):
        """
        constructor of PEInstrument.

        Args:
            pe_manager(PEManager) : The manager of the file to instrument.
        """
        if not isinstance(pe_manager, PEManager):
            print("YOU MUST set up PE Manager")
            exit()
        self.pe_manager = pe_manager
        self.pe_manager.set_instrument(self)
        self.entryPointVA = self.pe_manager.get_entry_point_rva()
        self.ks = Ks(KS_ARCH_X86, KS_MODE_32)
        self.cs = Cs(CS_ARCH_X86, CS_MODE_32)

        execute_section = self.pe_manager.get_text_section()
        execute_section_data = \
            self.pe_manager.get_section_raw_data(execute_section)
        self.code_manager = CodeManager(execute_section_data,
                                        execute_section.VirtualAddress)
        self.disassembler = Disassembler(self.code_manager)

        # save history of instrument for relocation
        self.instrument_pos_dict = {}
        self.current_instrument_pos_dict = {}
        self.overflowed_instrument_dict = {}
        self.log = None
        # variable for handle overflow
        self.overflowed = False

        # function registry
        self.pre_indirect_branch_functions = []
        self.pre_relative_branch_functions = []
        self.pre_return_functions = []

        self.after_indirect_branch_functions = []
        self.after_relative_branch_functions = []
        self.after_return_functions = []

        self.replace_indirect_branch_functions = []
        self.replace_relative_branch_functions = []
        self.replace_return_functions = []

    @classmethod
    def from_filename(cls, filename):
        pe_manager = PEManager(filename)
        return cls(pe_manager)

    def get_pe_manager(self):
        return self.pe_manager

    def _is_instrument_overflow_occurred(self):
        return self.overflowed

    def _instrument_overflow_handled(self):
        self.overflowed = False

    def _instrument_overflow_occurred(self):
        self.overflowed = True

    def writefile(self, filename):
        """
        write to file with PE format.

        Args:
            filename(str)
                the name of the file to write.
        """
        self.pe_manager.writefile(filename)

    def get_instrumented_pos(self):
        return self.instrument_pos_dict

    def get_instructions(self):
        """
        get disassembled instructions. Instructions excluding data that
        exist in the text section.

        Returns:
            :obj:`list`: list containing :
                :obj:`tuple`
                    - int : instruction address.
                    - :obj:`instruction` : instruction.
        """
        relocation_dict = self.pe_manager.get_relocation()
        sorted_relocation = sorted(relocation_dict.items(),
                                   key=operator.itemgetter(0))
        instructions = self.disassembler.get_disassemble_dict()
        relocation_list = []
        if len(self.instrument_pos_dict) > 0:
            for index, (address, el) in enumerate(sorted_relocation):
                increased_size = self.get_instrumented_vector_size(address -
                                                                   0x1000)
                relocation_list.append(address - 0x1000 + increased_size)
        else:
            for index, (address, el) in enumerate(sorted_relocation):
                relocation_list.append(address - 0x1000)

        for address in relocation_list:
            for size in range(4):
                relocation_address_range = address + size
                if relocation_address_range in instructions:
                    del instructions[relocation_address_range]
        sorted_instructions = sorted(instructions.items(),
                                     key=operator.itemgetter(0))
        return sorted_instructions

    def register_pre_indirect_branch(self, fn):
        self.pre_indirect_branch_functions.append(fn)

    def register_pre_relative_branch(self, fn):
        self.pre_relative_branch_functions.append(fn)

    def register_pre_return(self, fn):
        self.pre_return_functions.append(fn)

    def is_pre_indirect_branch_instrument_exist(self):
        return len(self.pre_indirect_branch_functions) > 0

    def is_pre_relative_branch_instrument_exist(self):
        return len(self.pre_relative_branch_functions) > 0

    def is_pre_return_instrument_exist(self):
        return len(self.pre_return_functions) > 0

    def register_after_indirect_branch(self, fn):
        self.after_indirect_branch_functions.append(fn)

    def register_after_relative_branch(self, fn):
        self.after_relative_branch_functions.append(fn)

    def register_after_return(self, fn):
        self.after_return_functions.append(fn)

    def is_after_indirect_branch_instrument_exist(self):
        return len(self.after_indirect_branch_functions) > 0

    def is_after_relative_branch_instrument_exist(self):
        return len(self.after_relative_branch_functions) > 0

    def is_after_return_instrument_exist(self):
        return len(self.after_return_functions) > 0

    def do_instrument(self):
        """
        instrument instruction when reached instruction that has control flow as
        redirect.
        """
        self.log = LoggerFactory().get_new_logger("Instrument.log")
        instrument_total = 0
        instructions = self.get_instructions()
        for address, inst in instructions:
            try:
                if self.disassembler.is_indirect_branch(inst):
                    if self.is_pre_indirect_branch_instrument_exist():
                        self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                            inst.address + instrument_total, inst.address,
                            inst.mnemonic, inst.op_str))
                        for fn in self.pre_indirect_branch_functions:
                            result = \
                                self.instrument(fn, inst, instrument_total)
                            instrument_total += result
                    if self.is_after_indirect_branch_instrument_exist():
                        self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                            inst.address + instrument_total, inst.address,
                            inst.mnemonic, inst.op_str))
                        for fn in self.after_indirect_branch_functions:
                            result = \
                                self.instrument(fn, inst, instrument_total,
                                                position=_INSTRUMENT_POS_AFTER_)
                            instrument_total += result
                elif self.disassembler.is_relative_branch(inst) \
                        and self.disassembler.is_call(inst):
                    if self.is_pre_relative_branch_instrument_exist():
                        for fn in self.pre_relative_branch_functions:
                            result = \
                                self.instrument(fn, inst, instrument_total)
                            instrument_total += result
                    if self.is_after_relative_branch_instrument_exist():
                        self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                            inst.address + instrument_total, inst.address,
                            inst.mnemonic, inst.op_str))
                        for fn in self.after_relative_branch_functions:
                            result = \
                                self.instrument(fn, inst, instrument_total,
                                                position=_INSTRUMENT_POS_AFTER_)
                            instrument_total += result
                elif self.disassembler.is_return(inst):
                    if self.is_pre_return_instrument_exist():
                        for fn in self.pre_return_functions:
                            result = self.instrument(fn, inst,
                                                     instrument_total)
                            instrument_total += result
                    if self.is_after_return_instrument_exist():
                        self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                            inst.address + instrument_total, inst.address,
                            inst.mnemonic, inst.op_str))
                        for fn in self.after_return_functions:
                            result = \
                                self.instrument(fn, inst, instrument_total,
                                                position=_INSTRUMENT_POS_AFTER_)
                            instrument_total += result
            except Exception as e:
                print("ERROR WHILE INSTRUMENT")
                print(e)
                exit()
        self.adjust_instruction_layout()

    def instrument(self,
                   fn,
                   instruction,
                   total_count=0,
                   position=_INSTRUMENT_POS_PREV_):
        """
        The instrument passes the instruction to the user function. When
        the user function is finished and the instruction to be instrumented
        is returned, the instruction is inserted at the position of the current
        instruction. As a result, the position of the current instruction is
        pushed backward by the size of the inserted instruction.

        Args:
            fn(function)
                User function to return instruction to be instrumented
            instruction(instruction)
                Instruction to be passed to the user function.
            total_count(int)
                total count of instrumented.
            position(int)
                position where be instrumented.
        Returns:
            int : size of instrumented instructions.
        """
        instrument_size = 0
        instrument_inst, count = fn(instruction)
        if count > 0:
            if position == _INSTRUMENT_POS_PREV_:
                instrument_size = len(instrument_inst)
                offset = instruction.address + total_count
                self.code_manager.instrument(offset, instrument_inst)
                self.current_instrument_pos_dict[offset] = instrument_size
                self.instrument_pos_dict[offset] = instrument_size
            elif position == _INSTRUMENT_POS_AFTER_:
                instrument_size = len(instrument_inst)
                offset = instruction.address + total_count + instruction.size
                self.code_manager.instrument(offset, instrument_inst)
                self.current_instrument_pos_dict[offset] = instrument_size
                self.instrument_pos_dict[offset] = instrument_size
            elif position == _INSTRUMENT_POS_REPLACE_:
                instrument_size = len(instrument_inst) - instruction.size
                offset = instruction.address + total_count
                self.code_manager.instrument_with_replace(
                    offset, instruction.size, instrument_inst)
                self.current_instrument_pos_dict[offset] = instrument_size
                self.instrument_pos_dict[offset] = instrument_size
        return instrument_size

    def get_instrumented_size(self, instruction):
        """
        Calculate the instrumented size from the current address to the branch
        target. use this when instrumented thing is applied to disassembled one.
        but if not applied instrumented thing, then use method the
        get_instrumented_size_with_vector.

        Args:
            instruction(instruction):
                branch instruction that has relatively operand value
        Returns:
            int : size of instrumented until instruction's address.
        """
        instruction_address = instruction.address
        instruction_destiny = instruction.operands[0].imm
        instrument_size_till_destiny = 0
        if instruction_address <= instruction_destiny:
            sorted_instrument = sorted(
                self.current_instrument_pos_dict.items(),
                key=operator.itemgetter(0))
            for instrument_address, instrument_size in sorted_instrument:
                if instrument_address > instruction_destiny:
                    break
                if (instruction_address < instrument_address <=
                        instruction_destiny):
                    instrumented_size = instrument_size
                    instrument_size_till_destiny += instrumented_size
                    instruction_destiny += instrumented_size
        else:
            sorted_instrument = sorted(
                self.current_instrument_pos_dict.items(),
                key=operator.itemgetter(0),
                reverse=True)
            for instrument_address, instrument_size in sorted_instrument:
                # instruction_destiny can be instrumented instruction.
                # cause subtract instrument_size from instruction_destiny
                if (instruction_destiny - instrument_size <= instrument_address
                        < instruction_address):
                    if instrument_address \
                            < instruction_destiny - instrument_size:
                        break
                    instrumented_size = instrument_size
                    instrument_size_till_destiny += instrumented_size
                    instruction_destiny -= instrumented_size
        return instrument_size_till_destiny

    def get_instrumented_vector_size(self, rva, instrument_pos_dict=None):
        """
        Calculate the instrumented size until virtual address that argumented.
        if not applied instrumented thing, to disssembled one, use this.

        Args:
            rva(int): virtual address for calculate on.
            instrument_pos_dict(dict) : dict contains instruments position.
        Returns:
            int :
                instrumented size until argument virtual address with
                increasing of instrumented size.
        """
        if instrument_pos_dict is None:
            instrument_pos_dict = self.instrument_pos_dict
        sorted_instrument = sorted(instrument_pos_dict.items(),
                                   key=operator.itemgetter(0))
        instrumented_size = 0
        for address, size in sorted_instrument:
            if address <= rva:
                instrumented_size += size
                rva += size
            else:
                break
        return instrumented_size

    def get_instrumented_total_size(self):
        """
        Total size of instrument.

        Returns:
            int : Total size of instrumentation.
        """
        sorted_instrument = sorted(self.instrument_pos_dict.items(),
                                   key=operator.itemgetter(0))
        instrumented_size = 0
        for address, size in sorted_instrument:
            instrumented_size += size
        return instrumented_size

    def get_code(self):
        """
        get codes that working on.

        Returns:
            :obj:`bytearray` : text section's data.
        """
        return self.code_manager.get_code()

    def adjust_instruction_layout(self):
        """
        Adjusts the binary layout that has changed due to the address
        and the relatively operand of the instruction being changed during
        the instrumenting.
        """
        # instructions = self.Disassembler.getDisassembleList()
        instructions = self.get_instructions()
        if not self.pe_manager.is_possible_relocation():
            print("Not Support PE without relocation, yet.")
            exit()
        self._save_instruction_log()
        self.log = LoggerFactory().get_new_logger("AdjustDirectBranches.log")
        for instAddress, instruction in instructions:
            if ((not self.disassembler.is_indirect_branch(instruction)) and
                (self.disassembler.is_branch(instruction)
                 or self.disassembler.is_relative_branch(instruction))):
                self.adjust_direct_branches(instruction)
            if instruction.mnemonic.startswith('prefetch'):
                self.adjust_registers_instruction_operand(instruction)
        self.log.fin()

        if self._is_instrument_overflow_occurred():
            overflowed_inst_handled = self.handle_overflowed_instrument()
            if overflowed_inst_handled:
                self.adjust_instruction_layout()
            else:
                print("ERROR WHILE HANDLE OVERFLOW")
                exit()

    def adjust_direct_branches(self, instruction):
        """
        adjust instruction's operand value. Because the instructions calculate
        the address to branch relatively from the current position, it is
        necessary to apply the offset value changed by the instrument.

        Args:
            instruction(instruction):
                branch instruction that has relatively operand value
        """
        operand_value = 0
        adjusted_operand_value = 0
        instrumented_size_till = self.get_instrumented_size(instruction)
        # adjust operand value
        if instrumented_size_till > 0:
            operand_size = instruction.operands[0].size
            instruction_size = instruction.size
            if instruction_size == 2:
                operand_start = instruction.address + 1
                operand_end = instruction.address + 2
            elif instruction_size == 3:
                operand_start = instruction.address + 2
                operand_end = instruction.address + 3
            elif instruction_size == 6:
                operand_start = instruction.address + 2
                operand_end = instruction.address + 6
            elif instruction_size == 5:
                operand_start = instruction.address + 1
                operand_end = instruction.address + instruction_size
            else:
                instruction_length = \
                    self.disassembler.get_opcode_length(instruction)
                self.log.log("[CACULATED][{:d}]\t".format(instruction_length))
                operand_start = instruction.address + instruction_length
                operand_end = instruction.address + instruction_size

            self.log.log("[{:d}]\t".format(instruction_size))
            try:
                operand_value = \
                    self.code_manager.get_data_from_offset_with_format(
                        operand_start,
                        operand_end
                    )
            except Exception as e:
                print("[except]============================================")
                print(e)
                print("[0x{}]\t{} {}\t\tINSIZE:{}\tOPSIZE:{}\t"
                      "OP START:{}\tOP END:{}\tVALUE:{:x}".format(
                          instruction.address, instruction.mnemonic,
                          instruction.op_str, instruction_size, operand_size,
                          operand_start, operand_end,
                          instruction.operands[0].imm))
                exit()
            if operand_value > 0:
                adjusted_operand_value = operand_value + instrumented_size_till
            else:
                adjusted_operand_value = operand_value - instrumented_size_till
            try:
                self.code_manager.set_data_at_offset_with_format(
                    operand_start, operand_end, adjusted_operand_value)

                set_value = \
                    self.code_manager.get_data_from_offset_with_format(
                        operand_start,
                        operand_end
                    )

                if adjusted_operand_value != set_value:
                    print("ERROR WHILE ADJUST DIRECT BRANCH")
                    exit()
                self.log.log("[0x{:04x}]\t{:s}\t{:s}\t{:x}\t{:x}\n".format(
                    instruction.address, instruction.mnemonic,
                    instruction.op_str, operand_value, adjusted_operand_value))
            except Exception as e:
                print(e)
                self._instrument_overflow_occurred()
                self.overflowed_instrument_dict[instruction.address] = \
                    (instruction,
                     (operand_value, adjusted_operand_value,
                      instrumented_size_till)
                     )
                self.log.log(
                    "\t[OVERFLOWED] [0x{:04x}]\t{:s}\t{:s}\t{:x}\t{:x}\n".
                    format(instruction.address, instruction.mnemonic,
                           instruction.op_str, operand_value,
                           adjusted_operand_value))
        return adjusted_operand_value

    def adjust_registers_instruction_operand(self, instruction):
        """
        adjust instruction operand that register before.

        Args:
            instruction(instruction) : instruction to be adjusting.

        Returns:
            int : adjusted operand value.
        """
        operand_value = 0
        instrumented_size_till = self.get_instrumented_size(instruction)
        # fixed instruction opcode size for prefetch
        opcode_size = 3
        instruction_size = instruction.size
        operand_start = instruction.address + opcode_size
        operand_end = instruction.address + instruction_size
        self.log.log("[{:d}]\t".format(instruction_size))
        try:
            operand_value = \
                self.code_manager.get_data_from_offset_with_format(
                    operand_start,
                    operand_end
                )
        except Exception as e:
            print("[except]============================================")
            print(e)
            print("[0x%08x]\t%s 0x%x\t\tINS:%d\tOPS:%d\tOP START:%x\t"
                  "OP END:%x".format(instruction.address, instruction.mnemonic,
                                     instruction.operands[0].imm,
                                     instruction_size, operand_start,
                                     operand_end, operand_value))
            exit()

        image_base = self.pe_manager.get_image_base()
        if operand_value > 0:
            adjusted_operand_value = operand_value + instrumented_size_till
            if adjusted_operand_value < image_base:
                adjusted_operand_value += image_base
        else:
            adjusted_operand_value = operand_value - instrumented_size_till
            if adjusted_operand_value < image_base:
                adjusted_operand_value += image_base
        try:
            self.code_manager.set_data_at_offset_with_format(
                operand_start, operand_end, adjusted_operand_value)

            set_value = \
                self.code_manager.get_data_from_offset_with_format(
                    operand_start,
                    operand_end
                )

            if adjusted_operand_value != set_value:
                print("ERROR WHILE ADJUST DIRECT BRANCH")
                exit()
            self.log.log("[0x{:04x}]\t{:s}\t{:s}\t{:x}\t{:x}\n".format(
                instruction.address, instruction.mnemonic, instruction.op_str,
                operand_value, adjusted_operand_value))
        except Exception as e:
            print(e)
            self._instrument_overflow_occurred()
            self.overflowed_instrument_dict[instruction.address] = \
                (instruction,
                 (operand_value, adjusted_operand_value,
                  instrumented_size_till)
                 )
            self.log.log(
                "\t[OVERFLOWED] [0x{:04x}]\t{:s}\t{:s}\t{:x}\t{:x}\n".format(
                    instruction.address, instruction.mnemonic,
                    instruction.op_str, operand_value, adjusted_operand_value))
        return adjusted_operand_value

    def handle_overflowed_instrument(self):
        """
        extend the size of the operand if exceed the range of operand values
        while instrument.

        Note:
            The formula for determining the operand value of a branch
            instruction in x86:
            [Destination.Address - Instruction.Address - Instruction.size]

            in this case, the operand value overflowed while we adjust direct
            branches operands. that mean, 1 byte of operand size is too small
            for adjusted operand value. cause we expand operand size to 4byte.

            instruction size increase to 5byte or 6byte. according in formula of
            determining operand value, The keystone adjusts the operand value
            when it compiled.

            the keystone is based on the address at which the instruction ends,

            like this,
            ks.asm('jmp 140') = [233, 135, 0, 0, 0]

            but since the value we pass is based on the start address of the
            instruction, it corrects the value of operand in the case of a
            positive branch.

            In the case of a negative branch, the base address is the starting
            address of the instruction, so do not change it.

        Returns:
            bool : True if occurred overflow while instrumentation, False otherwise.
        """
        self.log = \
            LoggerFactory().get_new_logger("HandleOverflowInstrument.log")
        total_instrument_size = 0
        # self.instrument_map.clear()
        handled_overflowed_pos_dict = {}
        sorted_instrument = sorted(self.overflowed_instrument_dict.items(),
                                   key=operator.itemgetter(0))
        for index, \
            (instruction_address,
             (instruction,
              (operand_value, adjusted_operand_value, instrumented_size_till)
              )
             ) in enumerate(sorted_instrument):
            instruction_address += total_instrument_size
            print("[{}] overflowed instrument instruction : [0x{:x}] {:s}  "
                  "{} ========[{}]==========> {}".format(
                      index, instruction_address, instruction.mnemonic,
                      operand_value, instrumented_size_till,
                      adjusted_operand_value))

            self.log.log(
                "[0x{:x}] {:s} {}\t {} ========[{}]==========> {}\n".format(
                    instruction.address, instruction.mnemonic,
                    instruction.op_str, operand_value, instrumented_size_till,
                    adjusted_operand_value))

            # adding 2 is to change the base of operand value to the
            # start address of the instruction.
            code = "{:s} {}".format(instruction.mnemonic,
                                    adjusted_operand_value + 2)

            hex_code = binascii.hexlify(code).decode('hex')

            try:
                encoding, count = self.ks.asm(hex_code)
                for inst in self.cs.disasm(bytearray(encoding),
                                           instruction_address):
                    self.log.log("\t" + code + "\t" + inst.mnemonic + " " +
                                 inst.op_str + "\n")
                instrumented_size = len(encoding)
                if instrumented_size == 5:
                    if adjusted_operand_value > 0:
                        encoding[1] += 3
                    else:
                        encoding[1] += 0
                elif instrumented_size == 6:
                    if adjusted_operand_value > 0:
                        encoding[2] += 4
                    else:
                        encoding[2] += 0
                else:
                    print("ERROR")

                # patch
                self.code_manager.set_instruction_at_offset(
                    instruction_address,
                    instruction_address + instruction.size, encoding)
                # save increased opcode, operand size for adjust again
                increased_size = instrumented_size - instruction.size
                handled_overflowed_pos_dict[instruction_address] \
                    = increased_size
                total_instrument_size += increased_size
                self.log.log("\t\t{} : {:d}\n".format(encoding,
                                                      increased_size))
            except KsError as ex:
                print("ERROR : {}".format(ex))
                exit()

        self.save_instrument_history(self.instrument_pos_dict,
                                     handled_overflowed_pos_dict)
        self.current_instrument_pos_dict = handled_overflowed_pos_dict
        self.overflowed_instrument_dict.clear()
        self._instrument_overflow_handled()
        self.log.fin()
        return True

    def merge_adjust_pos_with_prev(self, src_adjust_dict, dst_adjust_dict):
        """
        Merging previous adjust map with later adjust map

        Args:
            src_adjust_dict(dict): previous adjust map.
            dst_adjust_dict(dict): later adjust map.

        Returns:
            :obj:`dict` : adjusted instrumented map.
        """
        self.log = LoggerFactory().get_new_logger("AdjustingMerge.log")
        adjusted_dict = {}
        sorted_src_adjust_dict = sorted(src_adjust_dict.items(),
                                        key=operator.itemgetter(0))
        sorted_dst_adjust_dict = sorted(dst_adjust_dict.items(),
                                        key=operator.itemgetter(0))
        # ready
        dst_index = 0
        instrumented_size_by_dst = 0
        # initialize destiny
        dst_instrumented_address, dst_instrumented_size = \
            sorted_dst_adjust_dict[dst_index]
        instrument_size_of_dst_address = dst_instrumented_size
        src_total_instrument_size = 0

        self.log.log("[instrument address]\t[instrument size]\t"
                     "[adjusted address]\t[total instrumented size]\n")
        # source instrumented iterating
        for src_instrumented_address, src_instrumented_size \
                in sorted_src_adjust_dict:
            adjust_instrument_address = src_instrumented_address
            # increase source instrument address by destiny instrument size.
            adjust_instrument_address += instrumented_size_by_dst

            # this condition be true when reach next destiny instrument address.
            # so, append current instrument point and load next.
            while adjust_instrument_address > dst_instrumented_address \
                    and len(sorted_dst_adjust_dict) > dst_index:
                # save current status for append.
                current_adjust_address = dst_instrumented_address
                current_instrument_size = instrument_size_of_dst_address

                # get next element of destiny instrument info.
                dst_index += 1
                if len(sorted_dst_adjust_dict) > dst_index:
                    # load next instrument info.
                    dst_instrumented_address, dst_instrumented_size = \
                        sorted_dst_adjust_dict[dst_index]
                    # if same address is exist, then occurred bug.
                    if current_adjust_address in adjusted_dict:
                        self.log.log("[OVERLAPPING]\t")
                    # update current status.
                    # increase current source address, cause
                    adjust_instrument_address += dst_instrumented_size
                    instrumented_size_by_dst += current_instrument_size
                    instrument_size_of_dst_address = dst_instrumented_size
                # this is last instrument
                else:
                    current_adjust_address = dst_instrumented_address
                    current_instrument_size = instrument_size_of_dst_address
                    instrumented_size_by_dst += current_instrument_size
                    print("LAST INSTRUMENT")
                # append current destiny instrument info to adjust dict.
                adjusted_dict[current_adjust_address] = current_instrument_size

                # logging
                self.log.log(
                    "{:>20}\t{:>17}\t{:>18}\t{:>25}\t[OVERFLOW]\n".format(
                        hex(current_adjust_address),
                        hex(current_instrument_size),
                        hex(current_instrument_size),
                        hex(instrumented_size_by_dst +
                            src_total_instrument_size),
                    ))

            # logging
            self.log.log("{:>20}\t{:>17}\t{:>18}\t{:>25}\n".format(
                hex(src_instrumented_address - src_total_instrument_size),
                hex(src_instrumented_size), hex(adjust_instrument_address),
                hex(src_total_instrument_size + instrumented_size_by_dst)))
            src_total_instrument_size += src_instrumented_size
            adjusted_dict[adjust_instrument_address] = src_instrumented_size
        return adjusted_dict

    def save_instrument_history(self, instrumented_pos_dict,
                                handled_overflowed_pos_dict):
        adjusted_dict = \
            self.merge_adjust_pos_with_prev(instrumented_pos_dict,
                                            handled_overflowed_pos_dict)
        self.instrument_pos_dict = adjusted_dict

    def append_code(self, code):
        """
        append code to last of code section.

        Args:
            code(str) : assembly code that append to last of code section.

        Returns:
            int : relative address of code that appended
        """
        _pad_size = 3
        pad = ";nop;" * _pad_size
        code = pad + code + pad
        encoding, count = self.ks.asm(code)
        code_offset = self.code_manager.instrument_at_last(encoding)
        code_rva = self.code_manager.get_base_rva() + code_offset + _pad_size
        return code_rva

    def falloc(self, size):
        """
        get allocated memory space from data segment.

        Args:
            size(int): size of space that allocate.

        Returns:
            :obj:`DataSegment`: DataSegment that represent for allocation.
        """
        data_chunk = DataSegment.Chunk(self.pe_manager, size)
        return data_chunk

    def _save_instruction_log(self):
        self.log = LoggerFactory().get_new_logger("final_instructions.log")
        instructions = self.get_instructions()
        for address, inst in instructions:
            self.log.log("[0x{:04x}]\t{}\t{}\n".format(
                address + self.pe_manager.get_image_base() + 0x1000,
                inst.mnemonic, inst.op_str))
Exemplo n.º 7
0
 def do_instrument(self):
     """
     instrument instruction when reached instruction that has control flow as
     redirect.
     """
     self.log = LoggerFactory().get_new_logger("Instrument.log")
     instrument_total = 0
     instructions = self.get_instructions()
     for address, inst in instructions:
         try:
             if self.disassembler.is_indirect_branch(inst):
                 if self.is_pre_indirect_branch_instrument_exist():
                     self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                         inst.address + instrument_total, inst.address,
                         inst.mnemonic, inst.op_str))
                     for fn in self.pre_indirect_branch_functions:
                         result = \
                             self.instrument(fn, inst, instrument_total)
                         instrument_total += result
                 if self.is_after_indirect_branch_instrument_exist():
                     self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                         inst.address + instrument_total, inst.address,
                         inst.mnemonic, inst.op_str))
                     for fn in self.after_indirect_branch_functions:
                         result = \
                             self.instrument(fn, inst, instrument_total,
                                             position=_INSTRUMENT_POS_AFTER_)
                         instrument_total += result
             elif self.disassembler.is_relative_branch(inst) \
                     and self.disassembler.is_call(inst):
                 if self.is_pre_relative_branch_instrument_exist():
                     for fn in self.pre_relative_branch_functions:
                         result = \
                             self.instrument(fn, inst, instrument_total)
                         instrument_total += result
                 if self.is_after_relative_branch_instrument_exist():
                     self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                         inst.address + instrument_total, inst.address,
                         inst.mnemonic, inst.op_str))
                     for fn in self.after_relative_branch_functions:
                         result = \
                             self.instrument(fn, inst, instrument_total,
                                             position=_INSTRUMENT_POS_AFTER_)
                         instrument_total += result
             elif self.disassembler.is_return(inst):
                 if self.is_pre_return_instrument_exist():
                     for fn in self.pre_return_functions:
                         result = self.instrument(fn, inst,
                                                  instrument_total)
                         instrument_total += result
                 if self.is_after_return_instrument_exist():
                     self.log.log('[0x{:x}]\t[0x{:x}]\t{:s}\t{:s}\n'.format(
                         inst.address + instrument_total, inst.address,
                         inst.mnemonic, inst.op_str))
                     for fn in self.after_return_functions:
                         result = \
                             self.instrument(fn, inst, instrument_total,
                                             position=_INSTRUMENT_POS_AFTER_)
                         instrument_total += result
         except Exception as e:
             print("ERROR WHILE INSTRUMENT")
             print(e)
             exit()
     self.adjust_instruction_layout()
Exemplo n.º 8
0
 def __init__(self, code, rva):
     self.code = code
     self.rva = rva
     self.log = LoggerFactory().get_new_logger("Instrument.log")
     self._code_need_handled = True
Exemplo n.º 9
0
class CodeManager(object):

    def __init__(self, code, rva):
        self.code = code
        self.rva = rva
        self.log = LoggerFactory().get_new_logger("Instrument.log")
        self._code_need_handled = True

    def __del__(self):
        self.log.fin()

    def get_base_rva(self):
        return self.rva

    def get_dword_from_offset(self, offset, offset_end):
        return self.get_data_from_offset_with_format(offset, offset_end)

    def get_data_from_offset_with_format(self, offset, offset_end):
        size = offset_end - offset
        return struct.unpack(self.get_format_from_size(size),
                             self.code[offset:offset_end])[0]

    def get_data_at_offset(self, offset, offset_end):
        return self.code[offset:offset_end]

    def instrument(self, offset, instrument_instruction):
        self.log.log(
            '[0] [0x{:05x}]\t{}\n'.format(offset, instrument_instruction))
        self.code[offset:offset] = instrument_instruction
        self.need_code_handle()

    def instrument_with_replace(self, offset, origin_instruction_size,
                                instrument_instruction):
        self.log.log(
            '[0] [0x{:05x}]\t{}\n'.format(offset, instrument_instruction))
        self.code[offset:origin_instruction_size] = instrument_instruction
        self.need_code_handle()

    def instrument_at_last(self, instrument_instruction):
        offset = len(self.code) - 1
        self.log.log("[LAST]")
        self.instrument(offset, instrument_instruction)
        return offset

    def set_instruction_at_offset(self, offset, offset_end, instruction):
        self.log.log(
            '[1] [0x{:05x}]\t{} \t{} \n'.format(offset,
                                                self.code[offset:offset_end],
                                                instruction))
        self.code[offset:offset_end] = instruction
        self.need_code_handle()

    def set_data_at_offset_with_format(self, offset, offset_end, data):
        size = offset_end - offset
        fmt = self.get_format_from_size(size)
        unpack_data = struct.unpack(fmt, self.code[offset:offset_end])
        self.log.log('[2] [0x{:05x}]\t{} \t{} \n'.format(offset,
                                                         unpack_data,
                                                         data))
        self.code[offset:offset_end] = struct.pack(fmt, data)
        self.need_code_handle()

    def get_code(self):
        return self.code

    def is_need_code_handle(self):
        return self._code_need_handled

    def code_handled(self):
        self._code_need_handled = False

    def need_code_handle(self):
        self._code_need_handled = True

    @staticmethod
    def get_format_from_size(size):
        if size == 8:
            fmt = 'q'
        elif size == 4:
            fmt = 'i'
        elif size == 2:
            fmt = 'h'
        elif size == 1:
            fmt = 'b'
        else:
            fmt = None
        return fmt

    @staticmethod
    def get_format_from_size_little_endian(size):
        if size == 8:
            fmt = '<q'
        elif size == 4:
            fmt = '<i'
        elif size == 2:
            fmt = '<h'
        elif size == 1:
            fmt = '<b'
        else:
            fmt = None
            print("ERROR")
            exit()
        return fmt

    def get_data_from_rva(self, rva, length):
        zero_relative_rva = rva - self.rva
        data = self.get_data_at_offset(zero_relative_rva,
                                       zero_relative_rva + length)
        return data