Exemplo n.º 1
0
    def get_original_location(self, addr):
        """
        Retrieves the original location for a given address by looking through it's pointer history.

        :param addr: address of interest

        :return: a tuple containing:
            - instruction pointer where the original location was first copied
                or None if given address is already loaded or the original location could not be found.
            - either a loaded address, a tuple containing (frame_id, stack_offset) for a stack variable,
                or None if the original location could not be found.
        """
        # TODO: Consider refactoring.

        # Pull either the first seen loaded address or last seen stack variable.
        if idc.is_loaded(addr):
            return None, addr
        ip = None

        var = self.variables.get(addr, None)
        for ip, ea in reversed(self.get_pointer_history(addr)):
            if idc.is_loaded(ea):
                return ip, ea
            var = self.variables.get(ea, var)

        if var and var.is_stack:
            return ip, (var.frame_id, var.stack_offset)
        else:
            return ip, None
Exemplo n.º 2
0
def is_func_ptr(offset: int) -> bool:
    """Returns true if the given offset is a function pointer."""
    # As a first check, simply see if the offset is the start of a function.
    func = ida_funcs.get_func(offset)
    if func and func.start_ea == offset:
        return True

    # Sometimes we will get a really strange issue where the IDA disassember has set a type for an
    # address that should not have been set during our course of emulation.
    # Therefore, before attempting to use get_function_data() to test if it's a function pointer,
    # first see if guess_type() will return None while is_loaded() is true.
    # If it doesn't, we know that it shouldn't be a function pointer.
    # (plus it saves on time)
    # TODO: Determine if we could have false negatives.
    #   - this caused a false negatives, so I added the check if offset is the start of a function.
    try:
        if idc.is_loaded(offset) and not idc.guess_type(offset):
            return False
    except TypeError:
        return False
    try:
        get_function_data(offset)
        return True
    except RuntimeError:
        return False
    except Exception as e:
        # If we get any other type of exception raise a more friendly error message.
        raise FunctionTracingError(
            "Failed to retrieve function data from {!r}: {}".format(offset, e))
Exemplo n.º 3
0
def find_scatter_table():
    scatter_load_bytes = {
        "__scatterload": [
            "0A A0 90 E8 00 0C 82 44",
            "2C 00 8F E2 00 0C 90 E8 00 A0 8A E0 00 B0 8B E0",  # For 5G
        ],
    }

    tables = {}
    for name, prefixes in scatter_load_bytes.items():
        for prefix in prefixes:
            addrs = create_func_by_prefix(name, prefix, force=True)
            for addr in addrs:
                if addr == idc.BADADDR:
                    continue

                offset_addr = idc.get_operand_value(addr, 1)
                if offset_addr == -1:
                    old_flag = idc.get_sreg(addr, "T")
                    idc.split_sreg_range(addr, "T", not old_flag, idc.SR_user)
                    offset_addr = idc.get_operand_value(addr, 1)

                offset = ida_bytes.get_dword(offset_addr)
                offset2 = ida_bytes.get_dword(offset_addr + 4)
                start = (offset + offset_addr) & 0xFFFFFFFF
                end = (offset2 + offset_addr) & 0xFFFFFFFF
                if not idc.is_loaded(start):
                    continue

                tables[start] = end
                print("__scatter_table: 0x%x -> 0x%x" % (start, end))
                func_name = set_entry_name(start, "__scatter_table")

    return tables
Exemplo n.º 4
0
    def value(self):
        """
        Retrieve the value of the operand as it is currently in the cpu_context.
        NOTE: We can't cache this value because the value may change based on the cpu context.

        :return int: An integer of the operand value.
        """
        if self.is_hidden:
            return None

        if self.is_immediate:
            value = idc.get_operand_value(self.ip, self.idx)
            # Create variable/reference if global.
            if idc.is_loaded(value):
                self._cpu_context.variables.add(value, reference=self.ip)
            return value

        if self.is_register:
            value = self._cpu_context.registers[self.text]
            # Record reference if register is a variable address.
            if value in self._cpu_context.variables:
                self._cpu_context.variables[value].add_reference(self.ip)
            return value

        # TODO: Determine if this is still necessary.
        # FS, GS (at least) registers are identified as memory addresses.  We need to identify them as registers
        # and handle them as such
        if self.type == idc.o_mem:
            if "fs" in self.text:
                return self._cpu_context.registers.fs
            elif "gs" in self.text:
                return self._cpu_context.registers.gs

        # If a memory reference, return read in memory.
        if self.is_memory_reference:
            addr = self.addr

            # Record referenc if address is a variable address.
            if addr in self._cpu_context.variables:
                self._cpu_context.variables[addr].add_reference(self.ip)

            # If a function pointer, we want to return the address.
            # This is because a function may be seen as a memory reference, but we don't
            # want to dereference it in case it in a non-call instruction.
            # (e.g.  "mov  esi, ds:LoadLibraryA")
            # NOTE: Must use internal function to avoid recursive loop.
            if utils.is_func_ptr(addr):
                return addr

            # Return empty
            if not self.width:
                logger.debug("Width is zero for {}, returning empty string.".format(self.text))
                return b""

            # Otherwise, dereference the address.
            value = self._cpu_context.mem_read(addr, self.width)
            return utils.struct_unpack(value)

        raise FunctionTracingError("Invalid operand type: {}".format(self.type), ip=self.ip)
Exemplo n.º 5
0
def run_scatterload(debug=False):
    # Newly identified region may have additional scatter load procedure. Thus,
    # we continuously proceed until no changes left.
    is_changed = True
    while is_changed:
        is_changed = False
        tables = find_scatter_table()
        scatter_funcs = find_scatter_funcs()

        for start, end in tables.items():
            print("Processing table: 0x%x to 0x%x" % (start, end))
            while start < end:
                ida_bytes.create_dword(start, 16)
                ida_offset.op_offset(start, 0, idc.REF_OFF32)
                src = ida_bytes.get_dword(start)
                dst = ida_bytes.get_dword(start + 4)
                size = ida_bytes.get_dword(start + 8)
                how = ida_bytes.get_dword(start + 12)

                if how not in scatter_funcs:
                    print("%x: no addr 0x%x in scatter_funcs" % (start, how))
                    start += 16
                    continue

                func_name = scatter_funcs[how]
                start += 16
                print("%s: 0x%x -> 0x%x (0x%x bytes)" %
                      (func_name, src, dst, size))

                if func_name != "__scatterload_zeroinit":
                    if not idc.is_loaded(src) or size == 0:
                        print("0x%x is not loaded." % (src))
                        continue

                if debug:
                    # only show information above
                    continue

                if func_name == "__scatterload_copy":
                    if add_segment(dst, size, "CODE"):
                        memcpy(src, dst, size)
                        is_changed = True
                elif func_name == "__scatterload_decompress":
                    if add_segment(dst, size, "DATA"):
                        decomp(src, dst, size)
                        is_changed = True
                # some old firmware images have this.
                elif func_name == "__scatterload_decompress2":
                    if add_segment(dst, size, "DATA"):
                        decomp2(src, dst, size)
                        is_changed = True
                elif func_name == "__scatterload_zeroinit":
                    # No need to further proceed for zero init.
                    if add_segment(dst, size, "DATA"):
                        memclr(dst, size)

                ida_auto.auto_wait()
Exemplo n.º 6
0
def read_bytes_at(ea, count):
    """ """
    # check if byte has a value, see get_wide_byte doc
    if not idc.is_loaded(ea):
        return b""

    segm_end = idc.get_segm_end(ea)
    if ea + count > segm_end:
        return idc.get_bytes(ea, segm_end - ea)
    else:
        return idc.get_bytes(ea, count)
Exemplo n.º 7
0
def is_mapped(ea, size=1, value=True):
    """Check if the given address is mapped.
    Specify a size greater than 1 to check if an address range is mapped.
    Arguments:
        ea: The linear address to check.
    Options:
        size: The number of bytes at ea to check. Default is 1.
        value: Only consider an address mapped if it has a value. For example, the contents of a
            bss section exist but don't have a static value. If value is False, consider such
            addresses as mapped. Default is True.
    Notes:
        This function is currently a hack: It only checks the first and last byte.
    """
    if size < 1:
        raise ValueError('Invalid argument: size={}'.format(size))
    # HACK: We only check the first and last byte, not all the bytes in between.
    if value:
        return idc.is_loaded(ea) and (size == 1
                                      or idc.is_loaded(ea + size - 1))
    else:
        return idaapi.getseg(ea) and (size == 1
                                      or idaapi.getseg(ea + size - 1))
Exemplo n.º 8
0
    def _obtain_bytes(start, end):
        """
        Obtain bytes efficiently, sets non-loaded bytes to \x00

        :param int start: starting address
        :param int end: ending address

        :return bytearray: bytearray containing bytes within range
        """
        # Reconstruct the segment, account for bytes which are not loaded.
        bytes_range = range(start, end)  # a range from start -> end
        return bytearray(
            ida_bytes.get_wide_byte(i) if idc.is_loaded(i) else 0
            for i in bytes_range)
Exemplo n.º 9
0
def _obtain_bytes(start, end):
    """
    Obtain bytes efficiently, sets non-loaded bytes to \x00

    :param int start: starting address
    :param int end: ending address

    :return bytes: bytes contained within range
    """
    # Reconstruct the segment, account for bytes which are not loaded.
    # Can't use xrange() here because we can get a "Python int too large to conver to C long" error
    bytes_range = range(start, end)  # a range from start -> end
    return bytes(
        bytearray(
            ida_bytes.get_wide_byte(i) if idc.is_loaded(i) else 0
            for i in bytes_range))
Exemplo n.º 10
0
def get_string(ea, length=idc.BADADDR):
    end_ea = ea + length
    ret = []

    while ea < end_ea:
        # break if current ea is already assigned
        if not idc.is_loaded(ea):
            break

        byte = ida_bytes.get_byte(ea)
        if byte == 0:  # NULL terminate
            break

        ret.append(byte)
        ea += 1

    return bytes(ret)
Exemplo n.º 11
0
def handle_string_mov(ea, state):
    """Updates the stack based on a movs instruction.  Used by create_stack
        If a rep/repne prefix is used, takes the count from ecx.  If the count cannot be determined, will ignore
        the instruction.  Also assumes that esi points to memory within the executable, and edi points to the
        stack. On any errors, this will ignore the instruction.

        :param ea: instruction location
        :param state: the current TraceState

        :return: None - updates stack or regs
    """
    opcode = idaapi.get_bytes(ea, 1)
    rep_inst = opcode in [b"\xf2", b"\xf3"]
    count = state.get_reg_value("ecx") if rep_inst else 1
    if not count or count < 0:
        return

    cmd = idaapi.insn_t()
    inslen = idaapi.decode_insn(cmd, ea)
    dtype = cmd.ops[0].dtype
    word_size = [1, 2, 4][dtype] if dtype < 3 else 4
    count *= word_size

    src = state.get_reg_value("esi")
    dst = state.get_reg_value("edi")
    if src is None or dst is None:
        return
    # In IDA 7, get_bytes doesn't return None on failure, instead it will return
    # a string of \xff the size of count. My theory is that the function changed
    # to return -1 for each byte within the c code and something is casting it to a string before returning.
    # Since, all \xff's could be valid we need to check if src is valid instead.
    if not idc.is_loaded(src):
        return
    bytes_ = idaapi.get_bytes(src, count)
    if bytes_ in (
            None,
            -1):  # Keep this around in-case they fix it in a future version.
        return
    for i in range(count):
        state.stack[dst + i] = ((bytes_[i]), ea)

    if rep_inst:
        state.set_reg_value("ecx", 0, ea)
    state.set_reg_value("esi", src + count, ea)
    state.set_reg_value("edi", dst + count, ea)
Exemplo n.º 12
0
    def _get_segment_bytes(self, start, end):
        """
        Obtain segment bytes, setting non-loaded bytes to NULL

        :param int start: segment start EA

        :param int end: segment end EA

        :return string: bytes contained in segment
        """
        # Reconstruct the segment, account for bytes which are not loaded.
        # Can't use xrange() here because we can get a "Python int too large to conver to C long" error
        seg_range = iter(itertools.count(start).next,
                         end)  # a range from start -> end
        return str(
            bytearray(
                idc.get_wide_byte(i) if idc.is_loaded(i) else 0
                for i in seg_range))
Exemplo n.º 13
0
    def factory(cls, string_location, string_reference, size=None, offset=None, key=None,
                encoded_data=None, code_page=None, dest=None):
        """
        Factory function to generate an EncodedString or EncodedStackString based on type.

        :param string_location:
            Data segment pointer for static strings or stack pointer for stack strings.
        :param string_reference:
            The location the string is referenced from.
            This is required to pull the stack frame when string_location is a stack pointer.
        :param size: The size of the string. Required to use self.get_bytes.
        :param offset: Used when there is an offset based accessing scheme.
        :param key: Used when there is a key that can vary by string.
        :param encoded_data: encoded/encrypted data, if not provided data will be retrieved from IDA.
        :param code_page: known encoding page used to decode data to unicode (after data is decrypted)
            (code page is dynamically determined if not provided)
        :param dest: Location of decrypted data (if different from string_location)
        """
        if idc.is_loaded(string_location):
            return EncodedString(
                string_location, string_reference, size=size, offset=offset, key=key,
                encoded_data=encoded_data, code_page=code_page, dest=dest)

        # otherwise assume string_location is a pointer within the stack
        # (using function_tracing's CPU emulator) and create an EncodedStackString object.
        stack = idc.get_func_attr(string_reference, idc.FUNCATTR_FRAME)
        # FIXME: This method isn't always super accurate because... IDA
        stack_offset = (
                string_location
                + function_tracing.RSP_OFFSET
                + idc.get_func_attr(string_reference, idc.FUNCATTR_FRSIZE)
                - function_tracing.STACK_BASE
        )
        if stack_offset < 0:
            logger.warning(
                'Ignoring negative stack offset {:#x} pulled from 0x{:X}'.format(
                    stack_offset, string_location))
            stack_offset = None

        return EncodedStackString(
            encoded_data, frame_id=stack, stack_offset=stack_offset, string_reference=string_reference,
            size=size, offset=offset, key=key, code_page=code_page, dest=dest)
Exemplo n.º 14
0
 def _is_func_ptr(self, offset):
     """Returns true if the given offset is a function pointer."""
     # Sometimes we will get a really strange issue where the IDA disassember has set a type for an
     # address that should not have been set during our course of emulation.
     # Therefore, before attempting to use get_function_data() to test if it's a function pointer,
     # first see if guess_type() will return None while is_loaded() is true.
     # If it doesn't, we know that it shouldn't be a function pointer.
     # (plus it saves on time)
     # TODO: Determine if we could have false negatives.
     if idc.is_loaded(offset) and not idc.guess_type(offset):
         return False
     try:
         utils.get_function_data(offset)
         return True
     except RuntimeError:
         return False
     except Exception as e:
         # If we get any other type of exception raise a more friendly error message.
         raise FunctionTracingError(
             'Failed to retrieve function data from {!r}: {}'.format(
                 offset, e),
             ip=self.ip)
Exemplo n.º 15
0
    def parse_stack_strings(self, func):

        logger.debug("Processing function: 0x{:X}".format(func.start_ea))
        tracer = function_tracing.get_tracer(func.start_ea)

        waiting_for_call = []

        context = None
        for ea in func.heads():
            context = tracer.context_at(ea)
            if not context:
                continue
            context.execute()  # also include instruction we are looking at.

            # If we encounter a call, process pushed in variables.
            if idc.print_insn_mnem(ea) == "call":
                for ip, var in waiting_for_call:
                    self.process_string(context, var, ip)
                waiting_for_call = []
                continue

            # Look for instructions where a stack variable is being used for something other than
            # a move.
            # We can do this by only considering variables that are the last operand.
            operands = context.get_operands(ea)
            if not operands:
                continue
            addr = operands[-1].addr or operands[-1].value
            if not addr:
                continue
            var = context.variables.get(addr)
            if var and var.is_stack:
                # Ignore string if it comes from memory with no concatinations.
                if var.history and idc.is_loaded(var.history[0].addr):
                    continue

                # If instruction is a push, it is possible that the string will be populated
                # after this instruction. Therefore, wait for the function call be before processing.
                if idc.print_insn_mnem(ea) == "push":
                    waiting_for_call.append((ea, var))
                else:
                    self.process_string(context, var, ea)

        # Process any strings still waiting for a call.
        if context:
            for ip, var in waiting_for_call:
                self.process_string(context, var, ip)

        # Remove any substrings or strings that are too small.
        for addr, encoded_string in sorted(self.encoded_strings):
            if len(encoded_string.encoded_data) < 3:
                self.encoded_strings.remove((addr, encoded_string))
                continue
            for _addr, _encoded_string in self.encoded_strings[:]:
                # Remove dups
                if (_addr == addr and _encoded_string is not encoded_string
                        and _encoded_string.encoded_data
                        == encoded_string.encoded_data):
                    self.encoded_strings.remove((addr, encoded_string))
                    break
                # Remove substrings
                elif _addr < addr:
                    index = addr - _addr
                    substring = _encoded_string.encoded_data[
                        index:index + len(encoded_string.encoded_data)]
                    if substring == encoded_string.encoded_data:
                        self.encoded_strings.remove((addr, encoded_string))
                        break

        # Report found strings
        for _, encoded_string in sorted(self.encoded_strings):
            # Don't want to rename because the buffers could be reused for multiple strings.
            encoded_string.publish(rename=False, patch=False)
            # TODO: EncodedString should allow commenting without renaming.
            idc.set_cmt(
                encoded_string.string_reference,
                'Stack String: "{}"'.format(encoded_string.display_name), 0)
Exemplo n.º 16
0
def is_mapped_data(ea, size=1):
    return (idc.is_loaded(ea)
            and idc.get_segm_start(ea) == idc.get_segm_start(ea + size - 1))