Exemplo n.º 1
0
 def __init__(self, registers, instruction_pointer, stack_pointer, stack_registers=None):
     self.registers = registers
     self.jcccontext = JccContext()
     self.memory = Memory()
     self.func_calls = {}  # Keeps track of function calls.
     self.executed_instructions = []  # Keeps track of the instructions that have been executed.
     self.memory_copies = collections.defaultdict(list)  # Keeps track of memory moves.
     self.bitness = utils.get_bits()
     self.byteness = self.bitness // 8
     self.stack_registers = stack_registers or []
     self.variables = VariableMap(self)
     self._sp = stack_pointer
     self._ip = instruction_pointer
Exemplo n.º 2
0
    def __init__(self, emulator, registers, instruction_pointer,
                 stack_pointer):
        self.emulator = emulator
        self.registers = registers
        self.jcccontext = JccContext()
        self.memory = Memory()
        self.func_calls = {}  # Keeps track of function calls.
        self.executed_instructions = [
        ]  # Keeps track of the instructions that have been executed.
        self.memory_copies = collections.defaultdict(
            list)  # Keeps track of memory moves.
        self.bitness = utils.get_bits()
        self.byteness = self.bitness // 8
        self.variables = VariableMap(self)
        self.objects = ObjectMap(self)
        self.actions = []  # List of action objects (namedtuples)

        # Function start address of a function we are currently hooking.
        self.hooking_call = None

        self._sp = stack_pointer
        self._ip = instruction_pointer
        self._sp_start = self.sp
Exemplo n.º 3
0
class ProcessorContext(object):
    """
    Stores the context of the processor during execution.

    :param registers: Instance of an initialized RegisterMap object used to store register values
        for the given architecture.
    :param str instruction_pointer: Name of the register used to point to the current instruction
        being currently executed or to-be executed.
    :param [str] stack_registers: List of register names used for handling the stack.
    """

    # Must be set by inherited classes.
    ARCH_NAME = None  # Name of architecture as reported by disassembler.
    OPCODES = {}  # Map of opcode mnemonics to functions that emulate them.

    def __init__(self,
                 registers,
                 instruction_pointer,
                 stack_pointer,
                 stack_registers=None):
        self.registers = registers
        self.jcccontext = JccContext()
        self.memory = Memory()
        self.func_calls = {}  # Keeps track of function calls.
        self.executed_instructions = [
        ]  # Keeps track of the instructions that have been executed.
        self.memory_copies = collections.defaultdict(
            list)  # Keeps track of memory moves.
        self.bitness = utils.get_bits()
        self.byteness = self.bitness / 8
        self.stack_registers = stack_registers or []
        self.stack_variables = {
        }  # maps memory addresses -> (frame_id, stack_offset)
        self.stack = []
        self._sp = stack_pointer
        self._ip = instruction_pointer

    @classmethod
    def from_arch(cls, arch_name=None):
        """
        Factory method for initializing a ProcessorContext based on detected architecture.

        :param arch_name: Name of architecture to initializes (according to the disassembler)
                          Architecture is automatically detected if not provided.

        :raises NotImplementedError: If architecture is not supported.
        """
        # Pull from disassembler if not provided.
        if not arch_name:
            info = idaapi.get_inf_structure()
            arch_name = info.procName

        for subclass in cls.__subclasses__():
            if subclass.ARCH_NAME == arch_name:
                return subclass(
                )  # Subclasses shouldn't have any initialization parameters.
        raise NotImplementedError(
            'Architecture not supported: {}'.format(arch_name))

    @property
    def ip(self):
        """Alias for retrieving instruction pointer."""
        return self.registers[self._ip]

    @ip.setter
    def ip(self, value):
        """Alias for setting instruction pointer."""
        self.registers[self._ip] = value

    @property
    def sp(self):
        """Alias for retrieving stack pointer."""
        return self.registers[self._sp]

    @sp.setter
    def sp(self, value):
        """Alias for setting stack pointer."""
        self.registers[self._sp] = value

    def execute(self, ip=None):
        """
        "Execute" the instruction at IP and store results in the context.
        The RIP/EIP register will be set to the value supplied in IP so that it is
        correct.

        :param ip: instruction address to execute (defaults to currently set ip)
        """
        if not ip:
            ip = self.ip

        # Set instruction pointer to where we are currently executing.
        self.ip = ip

        # Determine if a rep* instruction and add termination condition.
        term_condition = None
        if idc.get_wide_byte(ip) in (0xf2, 0xf3):
            insn = idc.GetDisasm(
                ip)  # IDA pro never has operands for rep opcodes.
            if insn.startswith('rep '):
                term_condition = lambda: self.registers.ecx == 0
            elif insn.startswith(('repe ', 'repz ')):
                term_condition = lambda: self.registers.ecx == 0 or self.registers.zf == 0
            elif insn.startswith(('repne ', 'repnz ')):
                term_condition = lambda: self.registers.ecx == 0 or self.registers.zf == 1

        # Emulate instruction.
        mnem = idc.print_insn_mnem(ip)
        operands = self.operands
        instruction = self.OPCODES.get(mnem)
        if instruction:
            try:
                if term_condition:
                    # As a safety measure, don't allow rep instructions to surpass
                    # our max memory read limit.
                    if self.registers.ecx > self.memory.MAX_MEM_READ:
                        logger.warning(
                            '0x{:08X} :: Emulation attempted to read {} instruction {} times. '
                            'Ignoring instruction.'.format(
                                ip, mnem, self.registers.ecx))
                    else:
                        logger.debug(
                            'Emulating {} instruction {} times.'.format(
                                mnem, self.registers.ecx))
                        while not term_condition():
                            instruction(self, ip, mnem, operands)
                            self.registers.ecx -= 1
                else:
                    instruction(self, ip, mnem, operands)
            except Exception:
                logger.exception('Failed to execute address 0x{:X}: {}'.format(
                    ip, idc.GetDisasm(ip)))
        else:
            logger.debug('{} instruction not implemented.'.format(mnem))

        # Record executed instruction.
        self.executed_instructions.append(ip)

        # After execution, set instruction pointer to next instruction assuming
        # standard code flow and if no jump was made.
        if self.ip == ip:
            self.ip = idc.next_head(ip)

    def get_call_history(self, func_name):
        """
        Returns the call history for a specific function name.

        :returns: List of tulples containing: (ea of call, list of function arguments)
        """
        return [(ea, args)
                for ea, (_func_name, args) in self.func_calls.items()
                if _func_name == func_name]

    def prep_for_branch(self, bb_start_ea):
        """
        Modify this current context in preparation for a specific path.
        """
        if self.jcccontext.is_alt_branch(bb_start_ea):
            logger.debug(
                "Modifying context for branch at 0x{:X}".format(bb_start_ea))
            dst_opnd = self.jcccontext.alt_branch_data_dst
            # TODO: There is probably a more elegant way of doing this. Jcccontext should not store the full operand objects.
            # Grab the operands relative to this current context and set the value.
            dst_opnd = self.get_operands(ip=dst_opnd.ip)[dst_opnd.idx]
            dst_opnd.value = self.jcccontext.alt_branch_data

        self.jcccontext = JccContext()

    def get_operands(self, ip=None):
        """
        Gets the Operand objects of all operands in the current instruction and returns them in a list.

        :param int ip: location of instruction pointer to pull operands from (defaults to current rip in context)

        :return: list of Operand objects
        """
        if ip is None:
            ip = self.ip

        operands = []
        cmd = idaapi.insn_t()
        inslen = idaapi.decode_insn(cmd, ip)
        for i in range(inslen):
            try:
                operand = Operand(self, ip, i)
                # IDA will sometimes create hidden or "fake" operands.
                # These are there to represent things like an implicit EAX register.
                # To help avoid confusion to the opcode developer, these fake operands will not be included.
                if not operand.is_fake:
                    operands.append(operand)
            except (IndexError, RuntimeError):
                # IDA will identify more operands than there actually are causing an issue.
                # Just break out of the loop if this happens.
                # IDA 7 throws RuntimeError instead of IndexError
                break

        return operands

    @property
    def operands(self):
        return self.get_operands()

    def reg_read(self, reg):
        """
        Read a register value

        >>> cpu_context = ProcessorContext()
        >>> cpu_context.reg_read("EIP")

        :param str reg: register name to be read

        :return int: value contained in specified register as int
        """
        return self.registers[reg]

    def reg_write(self, reg, val):
        """
        Write a register value

        :param str reg: register name to be written

        :param int val: value to be written to register as an int of width of the register (will be truncated as necessary)
        """
        self.registers[reg] = val

    def mem_alloc(self, size):
        """
        Allocates heap region with size number of bytes.

        :param size: Number of bytes to allocate.
        :return: starting address of allocated memory.
        """
        return self.memory.alloc(size)

    def mem_realloc(self, address, size):
        """
        Reallocates heap region with size number of bytes.

        :param address: base address to reallocate.
        :param size: Number of bytes to allocate.
        :return: address of the reallocated memory block.
        """
        new_address = self.memory.realloc(address, size)
        # Record a memory copy if pointer has changed.
        if new_address != address:
            self.memory_copies[self.ip].append((address, new_address, size))
        return new_address

    def mem_copy(self, src, dst, size):
        """
        Copy data from src address to dst address
        (Use this over mem_read/mem_write in order to allow the context to keep track of memory pointer history.)

        :param src: Source address
        :param dst: Destination address
        :param size: Number of bytes to copy over.
        :return:
        """
        self.memory_copies[self.ip].append((src, dst, size))
        self.mem_write(dst, self.mem_read(src, size))

    def get_pointer_history(self, ea):
        """
        Retrieves the history of a specific pointer.
        :param ea: Pointer to start with.
        :return: list of tuples containing (address of the memory copy, source pointer)
            - sorted by earliest to latest incarnation of the pointer. (not including itself)
        """
        history = []
        for ip, copies in sorted(self.memory_copies.items(), reverse=True):
            for src, dst, size in sorted(copies, reverse=True):
                if dst == ea:
                    history.append((ip, src))
                    ea = src
        history.reverse()
        return history

    def get_original_location(self, addr):
        """
        Retrieves the original location for a given address by looking through it's pointer history.

        :param addr: address of interest

        :return: a tuple containing:
            - instruction pointer where the original location was first copied
                or None if given address is already loaded or the original location could not be found.
            - either a loaded address, a tuple containing (frame_id, stack_offset) for a stack variable,
                or None if the original location could not be found.
        """
        # Pull either the first seen loaded address or last seen stack variable.
        if idc.is_loaded(addr):
            return None, addr
        ip = None
        if addr in self.stack_variables:
            stack_var = self.stack_variables[addr]
        else:
            stack_var = None
        for ip, ea in reversed(self.get_pointer_history(addr)):
            if idc.is_loaded(ea):
                return ip, ea
            if ea in self.stack_variables:
                stack_var = self.stack_variables[ea]
        return ip, stack_var

    # TODO: We should be recording local and global variables and their values.
    #   This will most likely require us making a "Variable" object similar
    #   to what we do with Operand.
    def get_variable_name(self, ea_or_stack_tuple):
        """
        Returns the name of the variable for the given ea or stack tuple.

        :param ea_or_stack_tuple: ea address or tuple containing: (frame_id, stack_offset)
        :return: string of name or None
        """
        if isinstance(ea_or_stack_tuple, tuple):
            frame_id, stack_offset = ea_or_stack_tuple
            member_id = idc.get_member_id(frame_id, stack_offset)
            return ida_struct.get_member_fullname(member_id)
        else:
            ea = ea_or_stack_tuple
            name = idc.get_name(ea)
            if name:
                return name
            _, original_location = self.get_original_location(ea)
            if original_location:
                return self.get_variable_name(original_location)

    def mem_read(self, address, size):
        """
        Read memory at the specified address of size size

        :param int address: address to read memory from
        :param int size: size of data to be read
        :return bytes: read data as bytes
        """
        return self.memory.read(address, size)

    def mem_write(self, address, data):
        """
        Write content contained in data to specified address

        :param int address: address to write data at
        :param bytes data: data to be written as bytes
        """
        self.memory.write(address, data)

    def mem_find(self, value, start=0, end=None):
        return self.memory.find(value, start=start, end=end)

    def mem_find_in_segment(self, value, seg_name_or_ea):
        return self.memory.find_in_segment(value, seg_name_or_ea)

    def mem_find_in_heap(self, value):
        return self.memory.find_in_heap(value)

    def read_data(self, addr, size=None, data_type=None):
        """
        Reads memory at the specified address, of the specified size and convert
        the resulting data into the specified type.

        :param int addr: address to read data from
        :param int size: size of data to read
        :param data_type: type of data to be extracted
            (default to byte string is size provided or C string if not)
        """
        if not data_type:
            data_type = STRING if size is None else BYTE_STRING
        if size is None:
            size = 0

        if data_type == STRING:
            null_offset = self.memory.find(b'\0', start=addr)
            # It should always eventually find a null since unmapped pages
            # are all null. If we get -1 we have a bug.
            assert null_offset != -1, "Unable to find a null character!"
            return self.memory.read(addr, null_offset - addr)

        elif data_type == WIDE_STRING:
            # Step by 2 bytes to find 2 nulls on an even alignment.
            # (This helps prevent the need to take endianness into account.)
            null_offset = addr
            while self.memory.read(null_offset, 2) != b'\0\0':
                null_offset += 2

            return self.memory.read(addr, null_offset - addr)

        elif data_type == BYTE_STRING:
            return self.memory.read(addr, size)

        elif data_type == BYTE:
            return utils.struct_unpack(self.mem_read(addr, 1))

        elif data_type == WORD:
            return utils.struct_unpack(self.mem_read(addr, 2))

        elif data_type == DWORD:
            return utils.struct_unpack(self.mem_read(addr, 4))

        elif data_type == QWORD:
            return utils.struct_unpack(self.mem_read(addr, 8))

        raise ValueError('Invalid data_type: {!r}'.format(data_type))

    def get_function_args(self, func_ea=None):
        """
        Returns the function argument values for this context based on the
        given function.

        >>> cpu_context = ProcessorContext()
        >>> args = cpu_context.get_function_args(0x180011772)

        :param int func_ea: Ea of the function to pull a signature from.

        :returns: list of function arguments
        """
        # If func_ea is not given, assume we are using the first operand from a call instruction.
        if not func_ea:
            operand = self.operands[0]
            # function pointer can be a memory reference or immediate.
            func_ea = operand.addr or operand.value

        # First get a func_type_data_t structure for the function
        funcdata = utils.get_function_data(func_ea)

        # Now use the data contained in funcdata to obtain the values for the arguments.
        args = []
        for i in range(funcdata.size()):
            loc_type = funcdata[i].argloc.atype()
            # Where was this parameter passed?
            if loc_type == 0:  # ALOC_NONE, not sure what this means...
                raise NotImplementedError(
                    "Argument {} location of type ALOC_NONE".format(i))
            elif loc_type == 1:  # ALOC_STACK
                # read the argument from the stack using the calculated stack offset from the disassembler
                cur_esp = self.sp + funcdata[i].argloc.stkoff()
                arg = self.mem_read(cur_esp, self.byteness)
                args.append(utils.struct_unpack(arg))
            elif loc_type == 2:  # ALOC_DIST, arguments described by multiple locations
                # TODO: Uses the scattered_aloc_t class, which is a qvector or argpart_t objects
                # funcdata[i].argloc.scattered()
                raise NotImplementedError(
                    "Argument {} location of type ALOC_DIST".format(i))
            elif loc_type == 3:  # ALOC_REG1, single register
                arg = self.reg_read(
                    utils.REG_MAP.get(funcdata[i].argloc.reg1()))
                width = funcdata[i].type.get_size()
                args.append(arg & utils.get_mask(width))
            elif loc_type == 4:  # ALOC_REG2, register pair (eg: edx:eax [reg2:reg1])
                # TODO: CURRENTLY UNTESTED
                logger.info(
                    "Argument {} of untested type ALOC_REG2.  Verify results and report issues"
                    .format(i))
                reg1_val = self.reg_read(
                    utils.REG_MAP.get(funcdata[i].argloc.reg1()))
                reg2_val = self.reg_read(
                    utils.REG_MAP.get(funcdata[i].argloc.reg2()))
                # TODO: Probably need to determine how to check the width of these register values in order to shift
                #       the data accordingly.  Will likely need examples for testing/verification of functionality.
                args.append(reg2_val << 32 | reg1_val)
            elif loc_type == 5:  # ALOC_RREL, register relative (displacement from address pointed by register
                # TODO: CURRENTLY UNTESTED
                logger.info(
                    "Argument {} of untested type ALOC_RREL.  Verify results and report issues."
                    .format(i))
                # Obtain the register-relative argument location
                rrel = funcdata[i].argloc.get_rrel()
                # Extract the pointer value in the register
                ptr_val = self.reg_read(utils.REG_MAP.get(rrel.reg))
                # Get the offset
                offset = rrel.off
                args.append(ptr_val + offset)
            elif loc_type == 6:  # ALOC_STATIC, global address
                # TODO: CURRENTLY UNTESTED
                logger.info(
                    "Argument {} of untested type ALOC_STATIC.  Verify results and report issues."
                    .format(i))
                args.append(funcdata[i].argloc.get_ea())
            elif loc_type >= 7:  # ALOC_CUSTOM, custom argloc
                # TODO: Will need to figure out the functionality and usage for the custloc_desc_t structure
                # funcdata[i].argloc.get_custom()
                raise NotImplementedError(
                    "Argument {} location of type ALOC_CUSTOM".format(i))

        return args
Exemplo n.º 4
0
def test_memory():
    """Tests the memory controller."""
    from kordesii.utils.function_tracing.memory import Memory

    m = Memory()

    # basic test
    assert m.read(0x00121000, 10) == b'\x00' * 10

    # test reading across pages
    m.write(0x00121FFB, b'helloworld')
    assert m.read(0x00121FFB, 10) == b'helloworld'
    assert m.read(0x00121FFB + 10, 10) == b'\x00' * 10
    assert m.read(0x00121FFB + 5, 10) == b'world' + b'\x00' * 5

    # test reading segment data
    assert m.read(0x0040C000, 11) == b'Idmmn!Vnsme'
    assert m.read(0x00401150, 3) == b'\x55\x8B\xEC'

    # test str print
    assert str(m) == dedent('''\
        Base Address             Address Range            Size
        0x00121000               0x00121000 - 0x00123000  8192
        0x00401000               0x00401000 - 0x0040F000  57344
    ''')

    # test searching
    assert m.find(b'helloworld', start=0x0011050) == 0x00121FFB
    assert m.find(b'helloworld') == 0x00121FFB
    assert m.find(b'helloworld', start=0x00121FFC) == -1
    assert m.find(b'helloworld', end=0x10) == -1
    assert m.find(b'helloworld', start=0x0011050, end=0x00121FFB) == -1
    assert m.find(b'helloworld', start=0x0011050, end=0x00122000) == -1
    assert m.find(b'helloworld', start=0x0011050, end=0x00122100) == 0x00121FFB
    assert m.find(b'`QFBWF') == 0x0040C120
    assert m.find(b'Idmmn!Vnsme') == 0x0040C000
    assert m.find_in_segment(b'Idmmn!Vnsme', '.data') == 0x0040C000
    assert m.find_in_segment(b'Idmmn!Vnsme', '.text') == -1
    assert m.find(b'\x5F\x5E\xC3', start=0x004035BD) == 0x004035E0

    # tests allocations
    first_alloc_ea = m.alloc(10)
    assert first_alloc_ea == m.HEAP_BASE
    second_alloc_ea = m.alloc(20)
    assert second_alloc_ea == m.HEAP_BASE + 10 + m.HEAP_SLACK
    m.write(second_alloc_ea, b'im in the heap!')
    assert m.read(second_alloc_ea, 15) == b'im in the heap!'
    assert m.find_in_heap(b'the heap!') == second_alloc_ea + 6
    m.write(second_alloc_ea, b'helloworld')
    assert m.find_in_heap(b'helloworld') == second_alloc_ea

    assert m.realloc(first_alloc_ea, 40) == first_alloc_ea  # no relocation
    assert m.realloc(first_alloc_ea, m.PAGE_SIZE *
                     5) == second_alloc_ea + 20 + m.HEAP_SLACK  # relocation
Exemplo n.º 5
0
class ProcessorContext(object):
    """
    Stores the context of the processor during execution.

    :param emulator: Instance of Emulator to use during emulation.
    :param registers: Instance of an initialized RegisterMap object used to store register values
        for the given architecture.
    :param str instruction_pointer: Name of the register used to point to the current instruction
        being currently executed or to-be executed.
    :param str stack_pointer: Name of the register used to hold the stack pointer.
    """

    # Must be set by inherited classes.
    ARCH_NAME = None  # Name of architecture as reported by disassembler.
    OPCODES = {}  # Map of opcode mnemonics to functions that emulate them.

    # Cache for keeping track of instructions and their operand indexes.
    _operand_indices = {}

    def __init__(self, emulator, registers, instruction_pointer,
                 stack_pointer):
        self.emulator = emulator
        self.registers = registers
        self.jcccontext = JccContext()
        self.memory = Memory()
        self.func_calls = {}  # Keeps track of function calls.
        self.executed_instructions = [
        ]  # Keeps track of the instructions that have been executed.
        self.memory_copies = collections.defaultdict(
            list)  # Keeps track of memory moves.
        self.bitness = utils.get_bits()
        self.byteness = self.bitness // 8
        self.variables = VariableMap(self)
        self.objects = ObjectMap(self)
        self.actions = []  # List of action objects (namedtuples)

        # Function start address of a function we are currently hooking.
        self.hooking_call = None

        self._sp = stack_pointer
        self._ip = instruction_pointer
        self._sp_start = self.sp

    @classmethod
    def from_arch(cls, emulator, arch_name=None):
        """
        Factory method for initializing a ProcessorContext based on detected architecture.

        :param emulator: Instance of Emulator to use during emulation.
        :param arch_name: Name of architecture to initializes (according to the disassembler)
                          Architecture is automatically detected if not provided.

        :raises NotImplementedError: If architecture is not supported.
        """
        # Pull from disassembler if not provided.
        if not arch_name:
            info = idaapi.get_inf_structure()
            arch_name = info.procName

        for subclass in cls.__subclasses__():
            if subclass.ARCH_NAME == arch_name:
                return subclass(emulator)
        raise NotImplementedError(
            "Architecture not supported: {}".format(arch_name))

    def __deepcopy__(self, memo):
        """Implementing our own deepcopy to improve speed."""
        # Create class, but avoid calling __init__()
        # so we don't trigger the unnecessary initialization of Memory and JccContext
        klass = self.__class__
        copy = klass.__new__(klass)
        memo[id(self)] = copy

        copy.emulator = self.emulator  # This is a reference, don't create a new instance.
        copy.hooking_call = self.hooking_call
        copy.registers = deepcopy(self.registers, memo)
        copy.jcccontext = deepcopy(self.jcccontext, memo)
        copy.memory = deepcopy(self.memory, memo)
        copy.variables = deepcopy(self.variables, memo)
        copy.objects = deepcopy(self.objects, memo)
        copy.actions = list(self.actions)
        copy.func_calls = dict(self.func_calls)
        copy.executed_instructions = list(self.executed_instructions)
        copy.memory_copies = self.memory_copies.copy()
        copy.bitness = self.bitness
        copy.byteness = self.byteness
        copy._sp = self._sp
        copy._ip = self._ip
        copy._sp_start = self._sp_start

        return copy

    @property
    def ip(self) -> int:
        """Alias for retrieving instruction pointer."""
        return self.registers[self._ip]

    @ip.setter
    def ip(self, value):
        """Alias for setting instruction pointer."""
        self.registers[self._ip] = value

    @property
    def sp(self) -> int:
        """Alias for retrieving stack pointer."""
        return self.registers[self._sp]

    @sp.setter
    def sp(self, value):
        """Alias for setting stack pointer."""
        self.registers[self._sp] = value

    @property
    def sp_diff(self) -> int:
        """
        The difference between the current stack pointer and the
        stack pointer at the beginning of the function.

        This helps with debugging since this number should match the number
        shown in the IDA disassembly.
        """
        return self._sp_start - self.sp

    @property
    def prev_instruction(self):
        """That last instruction that was executed or None if no instructions have been executed."""
        if self.executed_instructions:
            return self.executed_instructions[-1]
        else:
            return None

    def execute_instruction_hooks(self, start, mnem, pre=True):
        """
        Executes instructions hooks for the given start
        """
        hooks = (self.emulator.get_instruction_hooks(start, pre=pre) +
                 self.emulator.get_instruction_hooks(mnem, pre=pre))
        if hooks:
            operands = self.operands
            for hook in hooks:
                try:
                    hook(self, start, mnem, operands)
                except RuntimeError:
                    raise  # Allow RuntimeError exceptions to be thrown.
                except Exception as e:
                    logger.debug(
                        "Failed to execute instruction hook with error: %s", e)

    def execute(self, start=None, end=None, max_instructions=10000):
        """
        "Execute" the instruction at IP and store results in the context.
        The RIP/EIP register will be set to the value supplied in IP so that it is
        correct.

        :param start: instruction address to start execution (defaults to currently set ip)
        :param end: instruction to stop execution (not including)
            (defaults to only run start)
        :param max_instructions: Maximum number of instructions to execute before
            raising an RuntimeError

        :raises RuntimeError: If maximum number of instructions get hit.
        """
        if not start:
            start = self.ip

        # Set instruction pointer to where we are currently executing.
        self.ip = start

        # Extra processing if we are at the start of a function.
        func_obj = ida_funcs.get_func(self.ip)
        if func_obj.start_ea == self.ip:
            # Reset the sp_start
            self._sp_start = self.sp

            # Add the passed in arguments to the variables map.
            # (This also helps to standardize the argument names to "a*" instead of "arg_*")
            for arg in self.passed_in_args:
                addr = arg.addr
                # TODO: Support variables from registers?
                if addr is not None:
                    if arg.is_stack:
                        try:
                            frame = ida_frame.get_frame(func_obj)
                            if not frame:
                                logger.warning(
                                    f"Failed to get frame for function argument: {repr(arg)}"
                                )
                                continue

                            # Getting member stack offset from name is more reliable then calculating
                            # it from the address.
                            member = ida_struct.get_member_by_name(
                                frame, arg.name)
                            if not member:
                                logger.warning(
                                    f"Failed to get member for function argument: {repr(arg)}"
                                )
                                continue

                            self.variables.add(addr,
                                               frame_id=frame.id,
                                               stack_offset=member.soff)
                        except ValueError:
                            logger.warning(
                                f"Failed to get stack information for function argument: {repr(arg)}"
                            )
                    else:
                        self.variables.add(addr)

        # If end is provided, recursively run execute() until ip is end.
        if end is not None:
            count = max_instructions
            prev_ecx = self.registers.ecx
            prev_ecx_count = count
            while self.ip != end:
                if ida_ua.print_insn_mnem(self.ip) == 'retn':
                    return
                self.execute()
                # TODO: Re-enable this feature after rigorous testing.
                # # Dynamically allow more instructions to be executed if we detect we are in a loop
                # # and it is making progress.
                # # Ie. this will allow most while statements not to ding our max instruction quota.
                # if self.registers.ecx and (self.registers.ecx < prev_ecx or prev_ecx == 0):
                #     if prev_ecx:
                #         count += min(prev_ecx_count - count, 1000)
                #     prev_ecx = self.registers.ecx
                #     prev_ecx_count = count
                count -= 1
                if not count:
                    raise RuntimeError('Hit maximum number of instructions.')
            return

        # Determine if a rep* instruction and add termination condition.
        term_condition = None
        if idc.get_wide_byte(start) in (0xF2, 0xF3):
            insn = idc.GetDisasm(
                start)  # IDA pro never has operands for rep opcodes.
            if insn.startswith("rep "):
                term_condition = lambda: self.registers.ecx == 0
                term_condition.unconditional = True
            elif insn.startswith(("repe ", "repz ")):
                term_condition = lambda: self.registers.ecx == 0 or self.registers.zf == 0
                term_condition.unconditional = False
            elif insn.startswith(("repne ", "repnz ")):
                term_condition = lambda: self.registers.ecx == 0 or self.registers.zf == 1
                term_condition.unconditional = False

        # Emulate instruction.
        mnem = ida_ua.print_insn_mnem(start)

        # Log a header line for debug messages of this instruction.
        # This is simpler and faster then trying to include the information at each log line
        logger.debug("[0x%X %03X] :: %s", start, self.sp_diff, mnem)

        # Run any pre-hooks first.
        self.execute_instruction_hooks(start, mnem, pre=True)

        instruction = self.OPCODES.get(mnem)
        if instruction:
            operands = self.operands

            try:
                if term_condition:
                    if self.emulator.disabled_rep:
                        logger.debug("Ignoring rep instructions: DISABLED.")

                    # As a safety measure, don't allow rep instructions to surpass
                    # our max memory read limit.
                    # Only do this check if the terminating condition is unconditional, otherwise
                    # this number usually big because it expects zf to be toggled.
                    elif term_condition.unconditional and self.registers.ecx > self.memory.MAX_MEM_READ:
                        logger.warning(
                            "Emulation attempted to read %s instruction %d times. "
                            "Ignoring instruction.", mnem, self.registers.ecx)
                    else:
                        logger.debug("Emulating %s instruction %d times.",
                                     mnem, self.registers.ecx)
                        count = 0
                        while not term_condition():
                            instruction(self, start, mnem, operands)
                            self.registers.ecx -= 1
                            # Stop if we are iterating too much.
                            count += 1
                            if count > self.memory.MAX_MEM_READ:
                                logger.warning(
                                    "Looped too many times, exiting prematurely."
                                )
                                break
                else:
                    instruction(self, start, mnem, operands)
            except Exception:
                logger.exception("Failed to execute address 0x%X: %s", start,
                                 idc.GetDisasm(start))
        else:
            logger.debug("%s instruction not implemented.", mnem)

        # Record executed instruction.
        self.executed_instructions.append(start)

        # Run any post-hooks
        self.execute_instruction_hooks(start, mnem, pre=False)

        # Add a blank space to help visually separate logs for each instruction.
        logger.debug(" ")

        # After execution, set instruction pointer to next instruction assuming
        # standard code flow and if no jump was made.
        if self.ip == start:
            self.ip = idc.next_head(start)

    def get_call_history(self, func_name_or_ea) -> List[Tuple[int, List]]:
        """
        Returns the call history for a specific function name.

        :returns: List of tuples containing: (ea of call, list of function arguments)
        """
        if isinstance(func_name_or_ea, str):
            func_name = func_name_or_ea
        else:
            ea = func_name_or_ea
            func_name = utils.get_function_name(ea)
        return [(ea, args)
                for ea, (_func_name, args) in list(self.func_calls.items())
                if _func_name == func_name]

    def prep_for_branch(self, bb_start_ea):
        """
        Modify this current context in preparation for a specific path.
        """
        if self.jcccontext.is_alt_branch(bb_start_ea):
            logger.debug("Modifying context for branch at 0x%08X", bb_start_ea)
            # Set the destination operand relative to the current context
            # to a valid value that makes this branch true.
            dst_opnd = self.jcccontext.alt_branch_data_dst
            dst_opnd = self.get_operands(ip=dst_opnd.ip)[dst_opnd.idx]
            dst_opnd.value = self.jcccontext.alt_branch_data

        self.jcccontext = JccContext()

    def get_operands(self, ip=None) -> List[Operand]:
        """
        Gets the Operand objects of all operands in the current instruction and returns them in a list.

        :param int ip: location of instruction pointer to pull operands from (defaults to current rip in context)

        :return: list of Operand objects
        """
        if ip is None:
            ip = self.ip

        # Calling insn_t() and decode_insn() is somewhat expensive and this function gets called a LOT,
        # so we are going to cache the operand indices.
        try:
            indices = self._operand_indices[ip]
        except KeyError:
            indices = []
            insn = ida_ua.insn_t()
            # NOTE: We can't trust the instruction length returned by decode_ins.
            ida_ua.decode_insn(insn, ip)
            for idx, op in enumerate(insn.ops):
                if op.type == ida_ua.o_void:
                    break  # no more operands

                # IDA will sometimes create hidden or "fake" operands.
                # These are there to represent things like an implicit EAX register.
                # To help avoid confusion to the opcode developer, these fake operands will not be included.
                # TODO: Checking shown() may not work as expected.
                #   If things explode, go back to checking operand.is_hidden
                if op.shown():
                    indices.append((idx, op.type))

            self._operand_indices[ip] = indices

        return [Operand(self, ip, idx, _type=type) for idx, type in indices]

    @property
    def operands(self):
        return self.get_operands()

    def reg_read(self, reg):
        """
        Read a register value

        >>> cpu_context = ProcessorContext()
        >>> cpu_context.reg_read("EIP")

        :param str reg: register name to be read

        :return int: value contained in specified register as int
        """
        return self.registers[reg.lower()]

    def reg_write(self, reg, val):
        """
        Write a register value

        :param str reg: register name to be written

        :param int val: value to be written to register as an int of width of the register (will be truncated as necessary)
        """
        self.registers[reg.lower()] = val

    def mem_alloc(self, size):
        """
        Allocates heap region with size number of bytes.

        :param size: Number of bytes to allocate.
        :return: starting address of allocated memory.
        """
        return self.memory.alloc(size)

    def mem_realloc(self, address, size):
        """
        Reallocates heap region with size number of bytes.

        :param address: base address to reallocate.
        :param size: Number of bytes to allocate.
        :return: address of the reallocated memory block.
        """
        new_address = self.memory.realloc(address, size)
        # Record a memory copy if pointer has changed.
        if new_address != address:
            self.memory_copies[self.ip].append((address, new_address, size))
        return new_address

    def mem_copy(self, src, dst, size):
        """
        Copy data from src address to dst address
        (Use this over mem_read/mem_write in order to allow the context to keep track of memory pointer history.)

        :param src: Source address
        :param dst: Destination address
        :param size: Number of bytes to copy over.
        :return:
        """
        self.memory_copies[self.ip].append((src, dst, size))
        self.mem_write(dst, self.mem_read(src, size))

    def get_pointer_history(self, ea):
        """
        Retrieves the history of a specific pointer.
        :param ea: Pointer to start with.
        :return: list of tuples containing (address of the memory copy, source pointer)
            - sorted by earliest to latest incarnation of the pointer. (not including itself)
        """
        history = []
        for ip, copies in sorted(list(self.memory_copies.items()),
                                 reverse=True):
            for src, dst, size in sorted(copies, reverse=True):
                if dst == ea:
                    history.append((ip, src))
                    ea = src
        history.reverse()
        return history

    def get_original_location(self, addr):
        """
        Retrieves the original location for a given address by looking through it's pointer history.

        :param addr: address of interest

        :return: a tuple containing:
            - instruction pointer where the original location was first copied
                or None if given address is already loaded or the original location could not be found.
            - either a loaded address, a tuple containing (frame_id, stack_offset) for a stack variable,
                or None if the original location could not be found.
        """
        # TODO: Consider refactoring.

        # Pull either the first seen loaded address or last seen stack variable.
        if idc.is_loaded(addr):
            return None, addr
        ip = None

        var = self.variables.get(addr, None)
        for ip, ea in reversed(self.get_pointer_history(addr)):
            if idc.is_loaded(ea):
                return ip, ea
            var = self.variables.get(ea, var)

        if var and var.is_stack:
            return ip, (var.frame_id, var.stack_offset)
        else:
            return ip, None

    def mem_read(self, address, size):
        """
        Read memory at the specified address of size size

        :param int address: address to read memory from
        :param int size: size of data to be read
        :return bytes: read data as bytes
        """
        return self.memory.read(address, size)

    def mem_write(self, address, data):
        """
        Write content contained in data to specified address

        :param int address: address to write data at
        :param bytes data: data to be written as bytes
        """
        self.memory.write(address, data)

    def mem_find(self, value, start=0, end=None):
        return self.memory.find(value, start=start, end=end)

    def mem_find_in_segment(self, value, seg_name_or_ea):
        return self.memory.find_in_segment(value, seg_name_or_ea)

    def mem_find_in_heap(self, value):
        return self.memory.find_in_heap(value)

    # TODO: Move this into Memory class and automatically decode values.
    def read_data(self, addr, size=None, data_type=None):
        """
        Reads memory at the specified address, of the specified size and convert
        the resulting data into the specified type.

        :param int addr: address to read data from
        :param int size: size of data to read
        :param data_type: type of data to be extracted
            (default to BYTE_STRING is size provided or STRING if not)
        """
        if not data_type:
            data_type = STRING if size is None else BYTE_STRING
        if size is None:
            size = 0

        if data_type == STRING:
            null_offset = self.memory.find(b"\0", start=addr)
            # It should always eventually find a null since unmapped pages
            # are all null. If we get -1 we have a bug.
            assert null_offset != -1, "Unable to find a null character!"
            return self.memory.read(addr, null_offset - addr)

        elif data_type == WIDE_STRING:
            # Step by 2 bytes to find 2 nulls on an even alignment.
            # (This helps prevent the need to take endianness into account.)
            null_offset = addr
            while self.memory.read(null_offset, 2) != b"\0\0":
                null_offset += 2

            return self.memory.read(addr, null_offset - addr)

        elif data_type == BYTE_STRING:
            return self.memory.read(addr, size)

        elif data_type == BYTE:
            return utils.struct_unpack(self.mem_read(addr, 1))

        elif data_type == WORD:
            return utils.struct_unpack(self.mem_read(addr, 2))

        elif data_type == DWORD:
            return utils.struct_unpack(self.mem_read(addr, 4))

        elif data_type == QWORD:
            return utils.struct_unpack(self.mem_read(addr, 8))

        raise ValueError("Invalid data_type: {!r}".format(data_type))

    def write_data(self, addr, value, data_type=None):
        """
        Writes memory at the specified address after converting the value
        into data based on the specified data type.

        :param int addr: address to write data to
        :param value: integer or byte string to write
        :param data_type: type of data to convert value from.
            (defaults to BYTE_STRING, STRING, or DWORD based on input data)
        """
        if not data_type:
            if isinstance(value, str):
                data_type = STRING
            elif isinstance(value, bytes):
                data_type = BYTE_STRING
            elif isinstance(value, int):
                data_type = DWORD
            else:
                raise ValueError(f"Invalid data type: {type(value)}")

        if data_type == BYTE_STRING:
            data = value

        elif data_type == STRING:
            data = value
            if isinstance(data, str):
                data = data.encode("utf8")
            data += b"\0"

        elif data_type == WIDE_STRING:
            data = value
            if isinstance(data, str):
                data = data.encode("utf-16-le")
            data += b"\0\0"

        elif data_type == BYTE:
            data = bytes([value])

        elif data_type == WORD:
            data = utils.struct_pack(value, width=2)

        elif data_type == DWORD:
            data = utils.struct_pack(value, width=4)

        elif data_type == QWORD:
            data = utils.struct_pack(value, width=8)

        else:
            raise ValueError(f"Invalid data_type: {repr(data_type)}")

        self.mem_write(addr, data)

    def get_function_signature(self,
                               func_ea=None,
                               force=False,
                               num_args=None) -> Optional[FunctionSignature]:
        """
        Returns the function signature of the given func_ea with argument values pulled
        from this context.

        :param int func_ea: address of the function to pull signature from.
            The first operand is used if not provided. (helpful for a "call" instruction)
        :param bool force: Whether to force a function signature using cdecl calling
            convention and no arguments if we fail to generate the signature.
            (Useful when trying to declare a function that was dynamically created in a register)
        :param int num_args: Force a specific number of arguments in the signature.
            If not provided, number of arguments is determined by the disassembler.
            Extra arguments not defined by the disassembler are assumed to be 'int' type.
            Avoid using num_args and adjust the returned FunctionSignature manually
            if more customization is needed.
            (NOTE: The function signature will be forced on failure if this is set.)

        :return: FunctionSignature object or None if not applicable

        :raises RuntimeError: If a function signature could not be created from given ea.
        :raises ValueError: If num_args is negative
        """
        # If func_ea is not given, assume we are using the first operand from a call instruction.
        if not func_ea:
            if not self.operands:
                return None
            operand = self.operands[0]
            # function pointer can be a memory reference or immediate.
            func_ea = operand.addr or operand.value
        else:
            operand = None

        force = force or num_args is not None

        try:
            func_sig = FunctionSignature(self, func_ea, operand=operand)
        except RuntimeError as e:
            # If we fail to get a function signature but force is set, set the type to
            # cdecl with no arguments.
            if force:
                logger.warning(
                    "Failed to create function signature at 0x{:0X} with error: {}\n"
                    "Forcing signature with assumed cdecl calling convention.".
                    format(func_ea, e))
                idc.SetType(func_ea, "int __cdecl no_name();")
                func_sig = FunctionSignature(self, func_ea)
            else:
                raise

        if num_args is not None:
            if num_args < 0:
                raise ValueError("num_args is negative")
            arg_types = func_sig.arg_types
            if len(arg_types) > num_args:
                func_sig.arg_types = arg_types[:num_args]
            elif len(arg_types) < num_args:
                func_sig.arg_types = arg_types + ("int", ) * (num_args -
                                                              len(arg_types))

        return func_sig

    def get_function_arg_objects(self,
                                 func_ea=None,
                                 num_args=None) -> List[FunctionArg]:
        """
        Returns the FunctionArg objects for this context based on the
        given function.

        >>> cpu_context = ProcessorContext()
        >>> args = cpu_context.get_function_arg_objects(0x180011772)

        :param int func_ea: Ea of the function to pull a signature from.
        :param int num_args: Force a specific number of arguments.
            If not provided, number of arguments is determined by the disassembler.
            Extra arguments not defined by the disassembler are assumed to be 'int' type.
            Use get_function_signature() and adjust the FunctionSignature manually
            if more customization is needed.
            (NOTE: The function signature will be forced on failure if this is set.)

        :returns: list of FunctionArg objects
        """
        func_sig = self.get_function_signature(func_ea, num_args=num_args)
        if not func_sig:
            return []

        return func_sig.args

    # TODO: Replace this function with get_function_arg_objects()
    def get_function_args(self, func_ea=None, num_args=None) -> List[int]:
        """
        Returns the FunctionArg values for this context based on the given function.
        """
        args = self.get_function_arg_objects(func_ea=func_ea,
                                             num_args=num_args)
        return [arg.value for arg in args]

    @property
    def function_args(self) -> List[FunctionArg]:
        """
        The function arguments currently set based on the function in the first operand.
        """
        return self.get_function_arg_objects()

    @property
    def passed_in_args(self) -> List[FunctionArg]:
        """
        The function arguments for the current function.
        """
        func = ida_funcs.get_func(self.ip)
        return self.get_function_arg_objects(func.start_ea)

    @property
    def files(self) -> List[File]:
        """
        The opened files for this context.
        """
        return [obj for obj in self.objects if isinstance(obj, File)]

    def open_file(self, path: str, mode: str = None) -> File:
        """
        Adds opened file for tracking.
        """
        if not path:
            path = f"0x{self.ip:08x}.bin"

        # First see if the file already exists.
        for file in self.files:
            if file.path == path:
                if mode:
                    file.mode = mode  # update mode
                # Undo closed and delete indicators.
                file.closed = False
                file.deleted = False
                return file

        # Create a new file and store in object map.
        file = File(path=path, mode=mode)
        self.objects.add(file)
        logger.debug("Opened file: %s", file.path)
        return file

    def get_file(self, handle_or_path: Union[int, str], default=None) -> File:
        """
        Gets a file by handle or path. Returns default if not existent.

        :raises TypeError: if handle points and object that is not a File.
        """
        if isinstance(handle_or_path, int):
            try:
                handle = handle_or_path
                obj = self.objects[handle]
                if not isinstance(obj, File):
                    raise TypeError(
                        f"Expected handle {hex(handle)} to point to a File, got {type(obj)}"
                    )
                return obj
            except KeyError:
                return default

        else:
            path = handle_or_path
            for file in self.files:
                if file.path == path:
                    return file
            return default

    @property
    def regkeys(self) -> List[RegKey]:
        """
        The opened registry keys for this context.
        """
        return [obj for obj in self.objects if isinstance(obj, RegKey)]
Exemplo n.º 6
0
def test_memory():
    """Tests the memory controller."""
    from kordesii.utils.function_tracing.memory import Memory

    m = Memory()

    # basic test
    assert m.read(0x00121000, 10) == b"\x00" * 10

    # test reading across pages
    m.write(0x00121FFB, b"helloworld")
    assert m.read(0x00121FFB, 10) == b"helloworld"
    assert m.read(0x00121FFB + 10, 10) == b"\x00" * 10
    assert m.read(0x00121FFB + 5, 10) == b"world" + b"\x00" * 5

    # test reading segment data
    assert m.read(0x0040C000, 11) == b"Idmmn!Vnsme"
    assert m.read(0x00401150, 3) == b"\x55\x8B\xEC"

    # test str print
    assert str(m) == dedent(
        """\
        Base Address             Address Range            Size
        0x00121000               0x00121000 - 0x00123000  8192
        0x00401000               0x00401000 - 0x0040F000  57344
    """
    )

    # test searching
    assert m.find(b"helloworld", start=0x0011050) == 0x00121FFB
    assert m.find(b"helloworld") == 0x00121FFB
    assert m.find(b"helloworld", start=0x00121FFC) == -1
    assert m.find(b"helloworld", end=0x10) == -1
    assert m.find(b"helloworld", start=0x0011050, end=0x00121FFB) == -1
    assert m.find(b"helloworld", start=0x0011050, end=0x00122000) == -1
    assert m.find(b"helloworld", start=0x0011050, end=0x00122100) == 0x00121FFB
    assert m.find(b"`QFBWF") == 0x0040C120
    assert m.find(b"Idmmn!Vnsme") == 0x0040C000
    assert m.find_in_segment(b"Idmmn!Vnsme", ".data") == 0x0040C000
    assert m.find_in_segment(b"Idmmn!Vnsme", ".text") == -1
    assert m.find(b"\x5F\x5E\xC3", start=0x004035BD) == 0x004035E0

    # test bugfix when searching single length characters
    assert m.find(b"h", start=0x0011050) == 0x00121FFB
    assert m.find(b"h", start=0x0011050, end=0x00121FFB) == -1
    assert m.find(b"h", start=0x0011050, end=0x00121FFB + 1) == 0x00121FFB
    assert m.find(b"o", start=0x0011050) == 0x00121FFB + 4

    # tests allocations
    first_alloc_ea = m.alloc(10)
    assert first_alloc_ea == m.HEAP_BASE
    second_alloc_ea = m.alloc(20)
    assert second_alloc_ea == m.HEAP_BASE + 10 + m.HEAP_SLACK
    m.write(second_alloc_ea, b"im in the heap!")
    assert m.read(second_alloc_ea, 15) == b"im in the heap!"
    assert m.find_in_heap(b"the heap!") == second_alloc_ea + 6
    m.write(second_alloc_ea, b"helloworld")
    assert m.find_in_heap(b"helloworld") == second_alloc_ea

    # tests reallocations
    assert m.realloc(first_alloc_ea, 40) == first_alloc_ea  # no relocation
    assert m.realloc(first_alloc_ea, m.PAGE_SIZE * 5) == second_alloc_ea + 20 + m.HEAP_SLACK  # relocation
    assert m.realloc(second_alloc_ea, 40) == second_alloc_ea  # no relocation
    second_alloc_realloced_ea = m.realloc(second_alloc_ea, m.PAGE_SIZE * 6)
    assert second_alloc_realloced_ea != second_alloc_ea
    assert m.read(second_alloc_realloced_ea, 10) == b"helloworld"  # data should be copied over.
Exemplo n.º 7
0
class ProcessorContext(object):
    """
    Stores the context of the processor during execution.

    :param registers: Instance of an initialized RegisterMap object used to store register values
        for the given architecture.
    :param str instruction_pointer: Name of the register used to point to the current instruction
        being currently executed or to-be executed.
    :param [str] stack_registers: List of register names used for handling the stack.
    """

    # Must be set by inherited classes.
    ARCH_NAME = None  # Name of architecture as reported by disassembler.
    OPCODES = {}  # Map of opcode mnemonics to functions that emulate them.

    def __init__(self,
                 registers,
                 instruction_pointer,
                 stack_pointer,
                 stack_registers=None):
        self.registers = registers
        self.jcccontext = JccContext()
        self.memory = Memory()
        self.func_calls = {}  # Keeps track of function calls.
        self.executed_instructions = [
        ]  # Keeps track of the instructions that have been executed.
        self.memory_copies = collections.defaultdict(
            list)  # Keeps track of memory moves.
        self.bitness = utils.get_bits()
        self.byteness = self.bitness / 8
        self.stack_registers = stack_registers or []
        self.variables = VariableMap(self)
        self._sp = stack_pointer
        self._ip = instruction_pointer

    @classmethod
    def from_arch(cls, arch_name=None):
        """
        Factory method for initializing a ProcessorContext based on detected architecture.

        :param arch_name: Name of architecture to initializes (according to the disassembler)
                          Architecture is automatically detected if not provided.

        :raises NotImplementedError: If architecture is not supported.
        """
        # Pull from disassembler if not provided.
        if not arch_name:
            info = idaapi.get_inf_structure()
            arch_name = info.procName

        for subclass in cls.__subclasses__():
            if subclass.ARCH_NAME == arch_name:
                return subclass(
                )  # Subclasses shouldn't have any initialization parameters.
        raise NotImplementedError(
            'Architecture not supported: {}'.format(arch_name))

    def __deepcopy__(self, memo):
        """Implementing our own deepcopy to improve speed."""
        # Create class, but avoid calling __init__()
        # so we don't trigger the unnecessary initialization of Memory and JccContext
        klass = self.__class__
        copy = klass.__new__(klass)
        memo[id(self)] = copy

        copy.registers = deepcopy(self.registers, memo)
        copy.jcccontext = deepcopy(self.jcccontext, memo)
        copy.memory = deepcopy(self.memory, memo)
        copy.variables = deepcopy(self.variables, memo)
        copy.func_calls = dict(self.func_calls)
        copy.executed_instructions = list(self.executed_instructions)
        copy.memory_copies = self.memory_copies.copy()
        copy.bitness = self.bitness
        copy.byteness = self.byteness
        copy.stack_registers = self.stack_registers
        copy._sp = self._sp
        copy._ip = self._ip

        return copy

    @property
    def ip(self):
        """Alias for retrieving instruction pointer."""
        return self.registers[self._ip]

    @ip.setter
    def ip(self, value):
        """Alias for setting instruction pointer."""
        self.registers[self._ip] = value

    @property
    def sp(self):
        """Alias for retrieving stack pointer."""
        return self.registers[self._sp]

    @sp.setter
    def sp(self, value):
        """Alias for setting stack pointer."""
        self.registers[self._sp] = value

    @property
    def prev_instruction(self):
        """That last instruction that was executed or None if no instructions have been executed."""
        if self.executed_instructions:
            return self.executed_instructions[-1]
        else:
            return None

    def execute(self, ip=None):
        """
        "Execute" the instruction at IP and store results in the context.
        The RIP/EIP register will be set to the value supplied in IP so that it is
        correct.

        :param ip: instruction address to execute (defaults to currently set ip)
        """
        if not ip:
            ip = self.ip

        # Set instruction pointer to where we are currently executing.
        self.ip = ip

        # Determine if a rep* instruction and add termination condition.
        term_condition = None
        if idc.get_wide_byte(ip) in (0xf2, 0xf3):
            insn = idc.GetDisasm(
                ip)  # IDA pro never has operands for rep opcodes.
            if insn.startswith('rep '):
                term_condition = lambda: self.registers.ecx == 0
            elif insn.startswith(('repe ', 'repz ')):
                term_condition = lambda: self.registers.ecx == 0 or self.registers.zf == 0
            elif insn.startswith(('repne ', 'repnz ')):
                term_condition = lambda: self.registers.ecx == 0 or self.registers.zf == 1

        # Emulate instruction.
        mnem = idc.print_insn_mnem(ip)
        operands = self.operands
        instruction = self.OPCODES.get(mnem)
        if instruction:
            try:
                if term_condition:
                    # As a safety measure, don't allow rep instructions to surpass
                    # our max memory read limit.
                    if self.registers.ecx > self.memory.MAX_MEM_READ:
                        logger.warning(
                            '0x{:08X} :: Emulation attempted to read {} instruction {} times. '
                            'Ignoring instruction.'.format(
                                ip, mnem, self.registers.ecx))
                    else:
                        logger.debug(
                            'Emulating {} instruction {} times.'.format(
                                mnem, self.registers.ecx))
                        while not term_condition():
                            instruction(self, ip, mnem, operands)
                            self.registers.ecx -= 1
                else:
                    instruction(self, ip, mnem, operands)
            except Exception:
                logger.exception('Failed to execute address 0x{:X}: {}'.format(
                    ip, idc.GetDisasm(ip)))
        else:
            logger.debug('{} instruction not implemented.'.format(mnem))

        # Record executed instruction.
        self.executed_instructions.append(ip)

        # After execution, set instruction pointer to next instruction assuming
        # standard code flow and if no jump was made.
        if self.ip == ip:
            self.ip = idc.next_head(ip)

    def get_call_history(self, func_name):
        """
        Returns the call history for a specific function name.

        :returns: List of tulples containing: (ea of call, list of function arguments)
        """
        return [(ea, args)
                for ea, (_func_name, args) in self.func_calls.items()
                if _func_name == func_name]

    def prep_for_branch(self, bb_start_ea):
        """
        Modify this current context in preparation for a specific path.
        """
        if self.jcccontext.is_alt_branch(bb_start_ea):
            logger.debug(
                "Modifying context for branch at 0x{:X}".format(bb_start_ea))
            # Set the destination operand relative to the current context
            # to a valid value that makes this branch true.
            dst_opnd = self.jcccontext.alt_branch_data_dst
            dst_opnd = self.get_operands(ip=dst_opnd.ip)[dst_opnd.idx]
            dst_opnd.value = self.jcccontext.alt_branch_data

        self.jcccontext = JccContext()

    def get_operands(self, ip=None):
        """
        Gets the Operand objects of all operands in the current instruction and returns them in a list.

        :param int ip: location of instruction pointer to pull operands from (defaults to current rip in context)

        :return: list of Operand objects
        """
        if ip is None:
            ip = self.ip

        operands = []
        cmd = ida_ua.insn_t()
        # NOTE: We can't trust the instruction length returned by decode_ins.
        ida_ua.decode_insn(cmd, ip)
        for idx, op in enumerate(cmd.ops):
            operand = Operand(self, ip, idx)
            # IDA will sometimes create hidden or "fake" operands.
            # These are there to represent things like an implicit EAX register.
            # To help avoid confusion to the opcode developer, these fake operands will not be included.
            if not operand.is_hidden:
                operands.append(operand)

            if operand.is_void:
                break  # no more operands

        return operands

    @property
    def operands(self):
        return self.get_operands()

    def reg_read(self, reg):
        """
        Read a register value

        >>> cpu_context = ProcessorContext()
        >>> cpu_context.reg_read("EIP")

        :param str reg: register name to be read

        :return int: value contained in specified register as int
        """
        return self.registers[reg]

    def reg_write(self, reg, val):
        """
        Write a register value

        :param str reg: register name to be written

        :param int val: value to be written to register as an int of width of the register (will be truncated as necessary)
        """
        self.registers[reg] = val

    def mem_alloc(self, size):
        """
        Allocates heap region with size number of bytes.

        :param size: Number of bytes to allocate.
        :return: starting address of allocated memory.
        """
        return self.memory.alloc(size)

    def mem_realloc(self, address, size):
        """
        Reallocates heap region with size number of bytes.

        :param address: base address to reallocate.
        :param size: Number of bytes to allocate.
        :return: address of the reallocated memory block.
        """
        new_address = self.memory.realloc(address, size)
        # Record a memory copy if pointer has changed.
        if new_address != address:
            self.memory_copies[self.ip].append((address, new_address, size))
        return new_address

    def mem_copy(self, src, dst, size):
        """
        Copy data from src address to dst address
        (Use this over mem_read/mem_write in order to allow the context to keep track of memory pointer history.)

        :param src: Source address
        :param dst: Destination address
        :param size: Number of bytes to copy over.
        :return:
        """
        self.memory_copies[self.ip].append((src, dst, size))
        self.mem_write(dst, self.mem_read(src, size))

    def get_pointer_history(self, ea):
        """
        Retrieves the history of a specific pointer.
        :param ea: Pointer to start with.
        :return: list of tuples containing (address of the memory copy, source pointer)
            - sorted by earliest to latest incarnation of the pointer. (not including itself)
        """
        history = []
        for ip, copies in sorted(self.memory_copies.items(), reverse=True):
            for src, dst, size in sorted(copies, reverse=True):
                if dst == ea:
                    history.append((ip, src))
                    ea = src
        history.reverse()
        return history

    def get_original_location(self, addr):
        """
        Retrieves the original location for a given address by looking through it's pointer history.

        :param addr: address of interest

        :return: a tuple containing:
            - instruction pointer where the original location was first copied
                or None if given address is already loaded or the original location could not be found.
            - either a loaded address, a tuple containing (frame_id, stack_offset) for a stack variable,
                or None if the original location could not be found.
        """
        # TODO: Consider refactoring.

        # Pull either the first seen loaded address or last seen stack variable.
        if idc.is_loaded(addr):
            return None, addr
        ip = None

        var = self.variables.get(addr, None)
        for ip, ea in reversed(self.get_pointer_history(addr)):
            if idc.is_loaded(ea):
                return ip, ea
            var = self.variables.get(ea, var)

        if var and var.is_stack:
            return ip, (var.frame_id, var.stack_offset)
        else:
            return ip, None

    # TODO: We should be recording local and global variables and their values.
    #   This will most likely require us making a "Variable" object similar
    #   to what we do with Operand.
    def get_variable_name(self, ea_or_stack_tuple):
        """
        Returns the name of the variable for the given ea or stack tuple.

        :param ea_or_stack_tuple: ea address or tuple containing: (frame_id, stack_offset)
        :return: string of name or None
        """
        warnings.warn(
            'get_variable_name() is deprecated. Please use .variables attribute instead.',
            DeprecationWarning)

        if isinstance(ea_or_stack_tuple, tuple):
            frame_id, stack_offset = ea_or_stack_tuple
            member_id = idc.get_member_id(frame_id, stack_offset)
            return ida_struct.get_member_fullname(member_id)
        else:
            ea = ea_or_stack_tuple
            name = idc.get_name(ea)
            if name:
                return name
            _, original_location = self.get_original_location(ea)
            if original_location:
                return self.get_variable_name(original_location)

    def mem_read(self, address, size):
        """
        Read memory at the specified address of size size

        :param int address: address to read memory from
        :param int size: size of data to be read
        :return bytes: read data as bytes
        """
        return self.memory.read(address, size)

    def mem_write(self, address, data):
        """
        Write content contained in data to specified address

        :param int address: address to write data at
        :param bytes data: data to be written as bytes
        """
        self.memory.write(address, data)

    def mem_find(self, value, start=0, end=None):
        return self.memory.find(value, start=start, end=end)

    def mem_find_in_segment(self, value, seg_name_or_ea):
        return self.memory.find_in_segment(value, seg_name_or_ea)

    def mem_find_in_heap(self, value):
        return self.memory.find_in_heap(value)

    def read_data(self, addr, size=None, data_type=None):
        """
        Reads memory at the specified address, of the specified size and convert
        the resulting data into the specified type.

        :param int addr: address to read data from
        :param int size: size of data to read
        :param data_type: type of data to be extracted
            (default to byte string is size provided or C string if not)
        """
        if not data_type:
            data_type = STRING if size is None else BYTE_STRING
        if size is None:
            size = 0

        if data_type == STRING:
            null_offset = self.memory.find(b'\0', start=addr)
            # It should always eventually find a null since unmapped pages
            # are all null. If we get -1 we have a bug.
            assert null_offset != -1, "Unable to find a null character!"
            return self.memory.read(addr, null_offset - addr)

        elif data_type == WIDE_STRING:
            # Step by 2 bytes to find 2 nulls on an even alignment.
            # (This helps prevent the need to take endianness into account.)
            null_offset = addr
            while self.memory.read(null_offset, 2) != b'\0\0':
                null_offset += 2

            return self.memory.read(addr, null_offset - addr)

        elif data_type == BYTE_STRING:
            return self.memory.read(addr, size)

        elif data_type == BYTE:
            return utils.struct_unpack(self.mem_read(addr, 1))

        elif data_type == WORD:
            return utils.struct_unpack(self.mem_read(addr, 2))

        elif data_type == DWORD:
            return utils.struct_unpack(self.mem_read(addr, 4))

        elif data_type == QWORD:
            return utils.struct_unpack(self.mem_read(addr, 8))

        raise ValueError('Invalid data_type: {!r}'.format(data_type))

    def get_function_signature(self, func_ea=None):
        """
        Returns the function signature of the given func_ea with argument values pulled
        from this context.

        :param func_ea: address of the function to pull signature from.
            The first operand is used if not provided. (helpful for a "call" instruction)
        :return: FunctionSignature object
        """
        # If func_ea is not given, assume we are using the first operand from a call instruction.
        if not func_ea:
            operand = self.operands[0]
            # function pointer can be a memory reference or immediate.
            func_ea = operand.addr or operand.value

        return FunctionSignature(self, func_ea)

    def get_function_args(self, func_ea=None, num_args=None):
        """
        Returns the function argument values for this context based on the
        given function.

        >>> cpu_context = ProcessorContext()
        >>> args = cpu_context.get_function_args(0x180011772)

        :param int func_ea: Ea of the function to pull a signature from.
        :param int num_args: Force a specific number of arguments.
            If not provided, number of arguments is determined by the disassembler.
            Extra arguments not defined by the disassembler are assumed to be 'int' type.
            Use get_function_signature() and adjust the FunctionSignature manually
            if more customization is needed.

        :returns: list of function arguments
        """
        func_sig = self.get_function_signature(func_ea)

        if num_args is not None:
            if num_args < 0:
                raise ValueError('num_args is negative')
            arg_types = func_sig.arg_types
            if len(arg_types) > num_args:
                func_sig.arg_types = arg_types[:num_args]
            elif len(arg_types) < num_args:
                func_sig.arg_types = arg_types + ('int', ) * (num_args -
                                                              len(arg_types))

        return [arg.value for arg in func_sig.args]
class ProcessorContext(object):
    """
    Stores the context of the processor during execution.

    :param emulator: Instance of Emulator to use during emulation.
    :param registers: Instance of an initialized RegisterMap object used to store register values
        for the given architecture.
    :param str instruction_pointer: Name of the register used to point to the current instruction
        being currently executed or to-be executed.
    :param str stack_pointer: Name of the register used to hold the stack pointer.
    """

    # Must be set by inherited classes.
    OPCODES = {}  # Map of opcode mnemonics to functions that emulate them.

    # Class used to generate instructions.
    _instruction_class = Instruction

    # Cache for keeping track of instructions and their operand indexes.
    _operand_indices = {}

    def __init__(self, emulator, registers, instruction_pointer, stack_pointer,
                 return_register):
        self.emulator = emulator
        self.registers = registers
        self.jcccontext = JccContext()
        self.memory = Memory()
        self.func_calls = {}  # Keeps track of function calls.
        self.executed_instructions = [
        ]  # Keeps track of the instructions that have been executed.
        self.memory_copies = collections.defaultdict(
            list)  # Keeps track of memory moves.
        self.bitness = utils.get_bits()
        self.byteness = self.bitness // 8
        self.variables = VariableMap(self)
        self.objects = ObjectMap(self)
        self.actions = ActionList()

        # Function start address of a function we are currently hooking.
        self.hooking_call = None

        self._sp = stack_pointer
        self._ip = instruction_pointer
        self._ret = return_register
        self._sp_start = self.sp

    def __deepcopy__(self, memo):
        """Implementing our own deepcopy to improve speed."""
        # Create class, but avoid calling __init__()
        # so we don't trigger the unnecessary initialization of Memory and JccContext
        klass = self.__class__
        copy = klass.__new__(klass)
        memo[id(self)] = copy

        copy.emulator = self.emulator  # This is a reference, don't create a new instance.
        copy.hooking_call = self.hooking_call
        copy.registers = deepcopy(self.registers, memo)
        copy.jcccontext = deepcopy(self.jcccontext, memo)
        copy.memory = deepcopy(self.memory, memo)
        copy.variables = deepcopy(self.variables, memo)
        copy.objects = deepcopy(self.objects, memo)
        copy.actions = deepcopy(self.actions, memo)
        copy.func_calls = dict(self.func_calls)
        copy.executed_instructions = list(self.executed_instructions)
        copy.memory_copies = self.memory_copies.copy()
        copy.bitness = self.bitness
        copy.byteness = self.byteness
        copy._sp = self._sp
        copy._ip = self._ip
        copy._ret = self._ret
        copy._sp_start = self._sp_start

        return copy

    @property
    def ip(self) -> int:
        """Alias for retrieving instruction pointer."""
        return self.registers[self._ip]

    @ip.setter
    def ip(self, value):
        """Alias for setting instruction pointer."""
        self.registers[self._ip] = value

    @property
    def sp(self) -> int:
        """Alias for retrieving stack pointer."""
        return self.registers[self._sp]

    @sp.setter
    def sp(self, value):
        """Alias for setting stack pointer."""
        self.registers[self._sp] = value

    @property
    def sp_diff(self) -> int:
        """
        The difference between the current stack pointer and the
        stack pointer at the beginning of the function.

        This helps with debugging since this number should match the number
        shown in the IDA disassembly.
        """
        return self._sp_start - self.sp

    # TODO: A subroutine in ARM can technically pass in larger values, in which
    #   case the value spans multiple registers r0-r3
    @property
    def ret(self) -> int:
        """Alias for retrieving the return value."""
        return self.registers[self._ret]

    @ret.setter
    def ret(self, value):
        """Alias for setting return value."""
        logger.debug("Setting 0x%X into %s", value, self._ret)
        self.registers[self._ret] = value

    @property
    def prev_instruction(self):
        """That last instruction that was executed or None if no instructions have been executed."""
        if self.executed_instructions:
            return self.executed_instructions[-1]
        else:
            return None

    def execute(self, start=None, end=None, max_instructions=10000):
        """
        "Execute" the instruction at IP and store results in the context.
        The instruction pointer register will be set to the value supplied in .ip so that
        it is correct.

        :param start: instruction address to start execution (defaults to currently set ip)
        :param end: instruction to stop execution (not including)
            (defaults to only run start)
        :param max_instructions: Maximum number of instructions to execute before
            raising an RuntimeError

        :raises RuntimeError: If maximum number of instructions get hit.
        """
        if not start:
            start = self.ip

        # Set instruction pointer to where we are currently executing.
        self.ip = start

        # If end is provided, recursively run execute() until ip is end.
        if end is not None:
            count = max_instructions
            while self.ip != end:
                instruction = self.instruction
                if instruction.is_terminal:
                    return  # TODO: Should we be executing the terminal instruction?
                instruction.execute()
                count -= 1
                if not count:
                    raise RuntimeError('Hit maximum number of instructions.')
            return
        else:
            self.instruction.execute()

    def get_call_history(self, func_name_or_ea) -> List[Tuple[int, List]]:
        """
        Returns the call history for a specific function name.

        :returns: List of tuples containing: (ea of call, list of function arguments)
        """
        if isinstance(func_name_or_ea, str):
            func_name = func_name_or_ea
        else:
            ea = func_name_or_ea
            func_name = utils.get_function_name(ea)
        return [(ea, args)
                for ea, (_func_name, args) in list(self.func_calls.items())
                if _func_name == func_name]

    def prep_for_branch(self, bb_start_ea):
        """
        Modify this current context in preparation for a specific path.
        """
        if self.jcccontext.is_alt_branch(bb_start_ea):
            logger.debug("Modifying context for branch at 0x%08X", bb_start_ea)
            # Set the destination operand relative to the current context
            # to a valid value that makes this branch true.
            dst_opnd = self.jcccontext.alt_branch_data_dst
            dst_opnd = self.get_operands(ip=dst_opnd.ip)[dst_opnd.idx]
            dst_opnd.value = self.jcccontext.alt_branch_data

        self.jcccontext = JccContext()

    def get_instruction(self, ip=None) -> Instruction:
        """
        Gets the Instruction object for the current instruction pointed by the instruction pointer.

        :param ip: location of instruction pointer to pull Instruction from (default to current ip in context)
        :return: Instruction object
        """
        if ip is None:
            ip = self.ip
        return self._instruction_class(self, ip)

    @property
    def instruction(self) -> Instruction:
        return self.get_instruction()

    def get_operands(self, ip=None) -> List[Operand]:
        """
        Gets the Operand objects of all operands in the current instruction and returns them in a list.

        :param int ip: location of instruction pointer to pull operands from (defaults to current rip in context)

        :return: list of Operand objects
        """
        return self.get_instruction(ip=ip).operands

    @property
    def operands(self) -> List[Operand]:
        return self.get_operands()

    def reg_read(self, reg):
        """
        Read a register value

        >>> cpu_context = ProcessorContext()
        >>> cpu_context.reg_read("EIP")

        :param str reg: register name to be read

        :return int: value contained in specified register as int
        """
        return self.registers[reg.lower()]

    def reg_write(self, reg, val):
        """
        Write a register value

        :param str reg: register name to be written

        :param int val: value to be written to register as an int of width of the register (will be truncated as necessary)
        """
        self.registers[reg.lower()] = val

    def mem_alloc(self, size):
        """
        Allocates heap region with size number of bytes.

        :param size: Number of bytes to allocate.
        :return: starting address of allocated memory.
        """
        return self.memory.alloc(size)

    def mem_realloc(self, address, size):
        """
        Reallocates heap region with size number of bytes.

        :param address: base address to reallocate.
        :param size: Number of bytes to allocate.
        :return: address of the reallocated memory block.
        """
        new_address = self.memory.realloc(address, size)
        # Record a memory copy if pointer has changed.
        if new_address != address:
            self.memory_copies[self.ip].append((address, new_address, size))
        return new_address

    def mem_copy(self, src, dst, size):
        """
        Copy data from src address to dst address
        (Use this over mem_read/mem_write in order to allow the context to keep track of memory pointer history.)

        :param src: Source address
        :param dst: Destination address
        :param size: Number of bytes to copy over.
        :return:
        """
        self.memory_copies[self.ip].append((src, dst, size))
        self.mem_write(dst, self.mem_read(src, size))

    def get_pointer_history(self, ea):
        """
        Retrieves the history of a specific pointer.
        :param ea: Pointer to start with.
        :return: list of tuples containing (address of the memory copy, source pointer)
            - sorted by earliest to latest incarnation of the pointer. (not including itself)
        """
        history = []
        for ip, copies in sorted(list(self.memory_copies.items()),
                                 reverse=True):
            for src, dst, size in sorted(copies, reverse=True):
                if dst == ea:
                    history.append((ip, src))
                    ea = src
        history.reverse()
        return history

    def get_original_location(self, addr):
        """
        Retrieves the original location for a given address by looking through it's pointer history.

        :param addr: address of interest

        :return: a tuple containing:
            - instruction pointer where the original location was first copied
                or None if given address is already loaded or the original location could not be found.
            - either a loaded address, a tuple containing (frame_id, stack_offset) for a stack variable,
                or None if the original location could not be found.
        """
        # TODO: Consider refactoring.

        # Pull either the first seen loaded address or last seen stack variable.
        if idc.is_loaded(addr):
            return None, addr
        ip = None

        var = self.variables.get(addr, None)
        for ip, ea in reversed(self.get_pointer_history(addr)):
            if idc.is_loaded(ea):
                return ip, ea
            var = self.variables.get(ea, var)

        if var and var.is_stack:
            return ip, (var.frame_id, var.stack_offset)
        else:
            return ip, None

    def mem_read(self, address, size):
        """
        Read memory at the specified address of size size

        :param int address: address to read memory from
        :param int size: size of data to be read
        :return bytes: read data as bytes
        """
        return self.memory.read(address, size)

    def mem_write(self, address, data):
        """
        Write content contained in data to specified address

        :param int address: address to write data at
        :param bytes data: data to be written as bytes
        """
        self.memory.write(address, data)

    def mem_find(self, value, start=0, end=None):
        return self.memory.find(value, start=start, end=end)

    def mem_find_in_segment(self, value, seg_name_or_ea):
        return self.memory.find_in_segment(value, seg_name_or_ea)

    def mem_find_in_heap(self, value):
        return self.memory.find_in_heap(value)

    # TODO: Move this into Memory class and automatically decode values.
    def read_data(self, addr, size=None, data_type=None):
        """
        Reads memory at the specified address, of the specified size and convert
        the resulting data into the specified type.

        :param int addr: address to read data from
        :param int size: size of data to read
        :param data_type: type of data to be extracted
            (default to BYTE_STRING is size provided or STRING if not)
        """
        if not data_type:
            data_type = STRING if size is None else BYTE_STRING
        if size is None:
            size = 0

        if data_type == STRING:
            null_offset = self.memory.find(b"\0", start=addr)
            # It should always eventually find a null since unmapped pages
            # are all null. If we get -1 we have a bug.
            assert null_offset != -1, "Unable to find a null character!"
            return self.memory.read(addr, null_offset - addr)

        elif data_type == WIDE_STRING:
            # Step by 2 bytes to find 2 nulls on an even alignment.
            # (This helps prevent the need to take endianness into account.)
            null_offset = addr
            while self.memory.read(null_offset, 2) != b"\0\0":
                null_offset += 2

            return self.memory.read(addr, null_offset - addr)

        elif data_type == BYTE_STRING:
            return self.memory.read(addr, size)

        elif data_type == BYTE:
            return utils.struct_unpack(self.mem_read(addr, 1))

        elif data_type == WORD:
            return utils.struct_unpack(self.mem_read(addr, 2))

        elif data_type == DWORD:
            return utils.struct_unpack(self.mem_read(addr, 4))

        elif data_type == QWORD:
            return utils.struct_unpack(self.mem_read(addr, 8))

        raise ValueError("Invalid data_type: {!r}".format(data_type))

    def write_data(self, addr, value, data_type=None):
        """
        Writes memory at the specified address after converting the value
        into data based on the specified data type.

        :param int addr: address to write data to
        :param value: integer or byte string to write
        :param data_type: type of data to convert value from.
            (defaults to BYTE_STRING, STRING, or DWORD based on input data)
        """
        if not data_type:
            if isinstance(value, str):
                data_type = STRING
            elif isinstance(value, bytes):
                data_type = BYTE_STRING
            elif isinstance(value, int):
                data_type = DWORD
            else:
                raise ValueError(f"Invalid data type: {type(value)}")

        if data_type == BYTE_STRING:
            data = value

        elif data_type == STRING:
            data = value
            if isinstance(data, str):
                data = data.encode("utf8")
            data += b"\0"

        elif data_type == WIDE_STRING:
            data = value
            if isinstance(data, str):
                data = data.encode("utf-16-le")
            data += b"\0\0"

        elif data_type == BYTE:
            data = bytes([value])

        elif data_type == WORD:
            data = utils.struct_pack(value, width=2)

        elif data_type == DWORD:
            data = utils.struct_pack(value, width=4)

        elif data_type == QWORD:
            data = utils.struct_pack(value, width=8)

        else:
            raise ValueError(f"Invalid data_type: {repr(data_type)}")

        self.mem_write(addr, data)

    def get_function_signature(self,
                               func_ea=None,
                               force=False,
                               num_args=None) -> Optional[FunctionSignature]:
        """
        Returns the function signature of the given func_ea with argument values pulled
        from this context.

        :param int func_ea: address of the function to pull signature from.
            The first operand is used if not provided. (helpful for a "call" instruction)
        :param bool force: Whether to force a function signature using cdecl calling
            convention and no arguments if we fail to generate the signature.
            (Useful when trying to declare a function that was dynamically created in a register)
        :param int num_args: Force a specific number of arguments in the signature.
            If not provided, number of arguments is determined by the disassembler.
            Extra arguments not defined by the disassembler are assumed to be 'int' type.
            Avoid using num_args and adjust the returned FunctionSignature manually
            if more customization is needed.
            (NOTE: The function signature will be forced on failure if this is set.)

        :return: FunctionSignature object or None if not applicable

        :raises RuntimeError: If a function signature could not be created from given ea.
        :raises ValueError: If num_args is negative
        """
        # If func_ea is not given, assume we are using the first operand from a call instruction.
        if not func_ea:
            if not self.operands:
                return None
            operand = self.operands[0]
            # function pointer can be a memory reference or immediate.
            func_ea = operand.addr or operand.value
        else:
            operand = None

        force = force or num_args is not None

        try:
            func_sig = FunctionSignature(self, func_ea, operand=operand)
        except RuntimeError as e:
            # If we fail to get a function signature but force is set, set the type to
            # cdecl with no arguments.
            if force:
                logger.warning(
                    "Failed to create function signature at 0x{:0X} with error: {}\n"
                    "Forcing signature with assumed cdecl calling convention.".
                    format(func_ea, e))
                idc.SetType(func_ea, "int __cdecl no_name();")
                func_sig = FunctionSignature(self, func_ea)
            else:
                raise

        if num_args is not None:
            if num_args < 0:
                raise ValueError("num_args is negative")
            arg_types = func_sig.arg_types
            if len(arg_types) > num_args:
                func_sig.arg_types = arg_types[:num_args]
            elif len(arg_types) < num_args:
                func_sig.arg_types = arg_types + ("int", ) * (num_args -
                                                              len(arg_types))

        return func_sig

    def get_function_arg_objects(self,
                                 func_ea=None,
                                 num_args=None) -> List[FunctionArg]:
        """
        Returns the FunctionArg objects for this context based on the
        given function.

        >>> cpu_context = ProcessorContext()
        >>> args = cpu_context.get_function_arg_objects(0x180011772)

        :param int func_ea: Ea of the function to pull a signature from.
        :param int num_args: Force a specific number of arguments.
            If not provided, number of arguments is determined by the disassembler.
            Extra arguments not defined by the disassembler are assumed to be 'int' type.
            Use get_function_signature() and adjust the FunctionSignature manually
            if more customization is needed.
            (NOTE: The function signature will be forced on failure if this is set.)

        :returns: list of FunctionArg objects
        """
        func_sig = self.get_function_signature(func_ea, num_args=num_args)
        if not func_sig:
            return []

        return func_sig.args

    # TODO: Replace this function with get_function_arg_objects()
    def get_function_args(self, func_ea=None, num_args=None) -> List[int]:
        """
        Returns the FunctionArg values for this context based on the given function.
        """
        args = self.get_function_arg_objects(func_ea=func_ea,
                                             num_args=num_args)
        return [arg.value for arg in args]

    @property
    def function_args(self) -> List[FunctionArg]:
        """
        The function arguments currently set based on the function in the first operand.
        """
        return self.get_function_arg_objects()

    @property
    def passed_in_args(self) -> List[FunctionArg]:
        """
        The function arguments for the current function.
        """
        func = ida_funcs.get_func(self.ip)
        return self.get_function_arg_objects(func.start_ea)

    @property
    def files(self) -> List[File]:
        """
        All File objects in the current context.
        """
        return list(self.objects.query(File))

    def get_file(self, handle_or_path: Union[int, str], default=None) -> File:
        """
        Gets a file by handle or path. Returns default if not existent.

        :raises TypeError: if handle points and object that is not a File.
        """
        warnings.warn(
            "get_file() is deprecated. Please use objects.query() instead.",
            DeprecationWarning)
        if isinstance(handle_or_path, int):
            condition = dict(handle=handle_or_path)
        else:
            condition = dict(path=handle_or_path)

        for obj in self.objects.query(File, **condition):
            return obj
        return default

    @property
    def regkeys(self) -> List[RegKey]:
        """
        The opened registry keys for this context.
        """
        return list(self.objects.query(RegKey))

    @property
    def services(self) -> List[Service]:
        """
        The created services for this context.
        """
        return list(self.objects.query(Service))