def get_original_location(self, addr): """ Retrieves the original location for a given address by looking through it's pointer history. :param addr: address of interest :return: a tuple containing: - instruction pointer where the original location was first copied or None if given address is already loaded or the original location could not be found. - either a loaded address, a tuple containing (frame_id, stack_offset) for a stack variable, or None if the original location could not be found. """ # TODO: Consider refactoring. # Pull either the first seen loaded address or last seen stack variable. if idc.is_loaded(addr): return None, addr ip = None var = self.variables.get(addr, None) for ip, ea in reversed(self.get_pointer_history(addr)): if idc.is_loaded(ea): return ip, ea var = self.variables.get(ea, var) if var and var.is_stack: return ip, (var.frame_id, var.stack_offset) else: return ip, None
def is_func_ptr(offset: int) -> bool: """Returns true if the given offset is a function pointer.""" # As a first check, simply see if the offset is the start of a function. func = ida_funcs.get_func(offset) if func and func.start_ea == offset: return True # Sometimes we will get a really strange issue where the IDA disassember has set a type for an # address that should not have been set during our course of emulation. # Therefore, before attempting to use get_function_data() to test if it's a function pointer, # first see if guess_type() will return None while is_loaded() is true. # If it doesn't, we know that it shouldn't be a function pointer. # (plus it saves on time) # TODO: Determine if we could have false negatives. # - this caused a false negatives, so I added the check if offset is the start of a function. try: if idc.is_loaded(offset) and not idc.guess_type(offset): return False except TypeError: return False try: get_function_data(offset) return True except RuntimeError: return False except Exception as e: # If we get any other type of exception raise a more friendly error message. raise FunctionTracingError( "Failed to retrieve function data from {!r}: {}".format(offset, e))
def find_scatter_table(): scatter_load_bytes = { "__scatterload": [ "0A A0 90 E8 00 0C 82 44", "2C 00 8F E2 00 0C 90 E8 00 A0 8A E0 00 B0 8B E0", # For 5G ], } tables = {} for name, prefixes in scatter_load_bytes.items(): for prefix in prefixes: addrs = create_func_by_prefix(name, prefix, force=True) for addr in addrs: if addr == idc.BADADDR: continue offset_addr = idc.get_operand_value(addr, 1) if offset_addr == -1: old_flag = idc.get_sreg(addr, "T") idc.split_sreg_range(addr, "T", not old_flag, idc.SR_user) offset_addr = idc.get_operand_value(addr, 1) offset = ida_bytes.get_dword(offset_addr) offset2 = ida_bytes.get_dword(offset_addr + 4) start = (offset + offset_addr) & 0xFFFFFFFF end = (offset2 + offset_addr) & 0xFFFFFFFF if not idc.is_loaded(start): continue tables[start] = end print("__scatter_table: 0x%x -> 0x%x" % (start, end)) func_name = set_entry_name(start, "__scatter_table") return tables
def value(self): """ Retrieve the value of the operand as it is currently in the cpu_context. NOTE: We can't cache this value because the value may change based on the cpu context. :return int: An integer of the operand value. """ if self.is_hidden: return None if self.is_immediate: value = idc.get_operand_value(self.ip, self.idx) # Create variable/reference if global. if idc.is_loaded(value): self._cpu_context.variables.add(value, reference=self.ip) return value if self.is_register: value = self._cpu_context.registers[self.text] # Record reference if register is a variable address. if value in self._cpu_context.variables: self._cpu_context.variables[value].add_reference(self.ip) return value # TODO: Determine if this is still necessary. # FS, GS (at least) registers are identified as memory addresses. We need to identify them as registers # and handle them as such if self.type == idc.o_mem: if "fs" in self.text: return self._cpu_context.registers.fs elif "gs" in self.text: return self._cpu_context.registers.gs # If a memory reference, return read in memory. if self.is_memory_reference: addr = self.addr # Record referenc if address is a variable address. if addr in self._cpu_context.variables: self._cpu_context.variables[addr].add_reference(self.ip) # If a function pointer, we want to return the address. # This is because a function may be seen as a memory reference, but we don't # want to dereference it in case it in a non-call instruction. # (e.g. "mov esi, ds:LoadLibraryA") # NOTE: Must use internal function to avoid recursive loop. if utils.is_func_ptr(addr): return addr # Return empty if not self.width: logger.debug("Width is zero for {}, returning empty string.".format(self.text)) return b"" # Otherwise, dereference the address. value = self._cpu_context.mem_read(addr, self.width) return utils.struct_unpack(value) raise FunctionTracingError("Invalid operand type: {}".format(self.type), ip=self.ip)
def run_scatterload(debug=False): # Newly identified region may have additional scatter load procedure. Thus, # we continuously proceed until no changes left. is_changed = True while is_changed: is_changed = False tables = find_scatter_table() scatter_funcs = find_scatter_funcs() for start, end in tables.items(): print("Processing table: 0x%x to 0x%x" % (start, end)) while start < end: ida_bytes.create_dword(start, 16) ida_offset.op_offset(start, 0, idc.REF_OFF32) src = ida_bytes.get_dword(start) dst = ida_bytes.get_dword(start + 4) size = ida_bytes.get_dword(start + 8) how = ida_bytes.get_dword(start + 12) if how not in scatter_funcs: print("%x: no addr 0x%x in scatter_funcs" % (start, how)) start += 16 continue func_name = scatter_funcs[how] start += 16 print("%s: 0x%x -> 0x%x (0x%x bytes)" % (func_name, src, dst, size)) if func_name != "__scatterload_zeroinit": if not idc.is_loaded(src) or size == 0: print("0x%x is not loaded." % (src)) continue if debug: # only show information above continue if func_name == "__scatterload_copy": if add_segment(dst, size, "CODE"): memcpy(src, dst, size) is_changed = True elif func_name == "__scatterload_decompress": if add_segment(dst, size, "DATA"): decomp(src, dst, size) is_changed = True # some old firmware images have this. elif func_name == "__scatterload_decompress2": if add_segment(dst, size, "DATA"): decomp2(src, dst, size) is_changed = True elif func_name == "__scatterload_zeroinit": # No need to further proceed for zero init. if add_segment(dst, size, "DATA"): memclr(dst, size) ida_auto.auto_wait()
def read_bytes_at(ea, count): """ """ # check if byte has a value, see get_wide_byte doc if not idc.is_loaded(ea): return b"" segm_end = idc.get_segm_end(ea) if ea + count > segm_end: return idc.get_bytes(ea, segm_end - ea) else: return idc.get_bytes(ea, count)
def is_mapped(ea, size=1, value=True): """Check if the given address is mapped. Specify a size greater than 1 to check if an address range is mapped. Arguments: ea: The linear address to check. Options: size: The number of bytes at ea to check. Default is 1. value: Only consider an address mapped if it has a value. For example, the contents of a bss section exist but don't have a static value. If value is False, consider such addresses as mapped. Default is True. Notes: This function is currently a hack: It only checks the first and last byte. """ if size < 1: raise ValueError('Invalid argument: size={}'.format(size)) # HACK: We only check the first and last byte, not all the bytes in between. if value: return idc.is_loaded(ea) and (size == 1 or idc.is_loaded(ea + size - 1)) else: return idaapi.getseg(ea) and (size == 1 or idaapi.getseg(ea + size - 1))
def _obtain_bytes(start, end): """ Obtain bytes efficiently, sets non-loaded bytes to \x00 :param int start: starting address :param int end: ending address :return bytearray: bytearray containing bytes within range """ # Reconstruct the segment, account for bytes which are not loaded. bytes_range = range(start, end) # a range from start -> end return bytearray( ida_bytes.get_wide_byte(i) if idc.is_loaded(i) else 0 for i in bytes_range)
def _obtain_bytes(start, end): """ Obtain bytes efficiently, sets non-loaded bytes to \x00 :param int start: starting address :param int end: ending address :return bytes: bytes contained within range """ # Reconstruct the segment, account for bytes which are not loaded. # Can't use xrange() here because we can get a "Python int too large to conver to C long" error bytes_range = range(start, end) # a range from start -> end return bytes( bytearray( ida_bytes.get_wide_byte(i) if idc.is_loaded(i) else 0 for i in bytes_range))
def get_string(ea, length=idc.BADADDR): end_ea = ea + length ret = [] while ea < end_ea: # break if current ea is already assigned if not idc.is_loaded(ea): break byte = ida_bytes.get_byte(ea) if byte == 0: # NULL terminate break ret.append(byte) ea += 1 return bytes(ret)
def handle_string_mov(ea, state): """Updates the stack based on a movs instruction. Used by create_stack If a rep/repne prefix is used, takes the count from ecx. If the count cannot be determined, will ignore the instruction. Also assumes that esi points to memory within the executable, and edi points to the stack. On any errors, this will ignore the instruction. :param ea: instruction location :param state: the current TraceState :return: None - updates stack or regs """ opcode = idaapi.get_bytes(ea, 1) rep_inst = opcode in [b"\xf2", b"\xf3"] count = state.get_reg_value("ecx") if rep_inst else 1 if not count or count < 0: return cmd = idaapi.insn_t() inslen = idaapi.decode_insn(cmd, ea) dtype = cmd.ops[0].dtype word_size = [1, 2, 4][dtype] if dtype < 3 else 4 count *= word_size src = state.get_reg_value("esi") dst = state.get_reg_value("edi") if src is None or dst is None: return # In IDA 7, get_bytes doesn't return None on failure, instead it will return # a string of \xff the size of count. My theory is that the function changed # to return -1 for each byte within the c code and something is casting it to a string before returning. # Since, all \xff's could be valid we need to check if src is valid instead. if not idc.is_loaded(src): return bytes_ = idaapi.get_bytes(src, count) if bytes_ in ( None, -1): # Keep this around in-case they fix it in a future version. return for i in range(count): state.stack[dst + i] = ((bytes_[i]), ea) if rep_inst: state.set_reg_value("ecx", 0, ea) state.set_reg_value("esi", src + count, ea) state.set_reg_value("edi", dst + count, ea)
def _get_segment_bytes(self, start, end): """ Obtain segment bytes, setting non-loaded bytes to NULL :param int start: segment start EA :param int end: segment end EA :return string: bytes contained in segment """ # Reconstruct the segment, account for bytes which are not loaded. # Can't use xrange() here because we can get a "Python int too large to conver to C long" error seg_range = iter(itertools.count(start).next, end) # a range from start -> end return str( bytearray( idc.get_wide_byte(i) if idc.is_loaded(i) else 0 for i in seg_range))
def factory(cls, string_location, string_reference, size=None, offset=None, key=None, encoded_data=None, code_page=None, dest=None): """ Factory function to generate an EncodedString or EncodedStackString based on type. :param string_location: Data segment pointer for static strings or stack pointer for stack strings. :param string_reference: The location the string is referenced from. This is required to pull the stack frame when string_location is a stack pointer. :param size: The size of the string. Required to use self.get_bytes. :param offset: Used when there is an offset based accessing scheme. :param key: Used when there is a key that can vary by string. :param encoded_data: encoded/encrypted data, if not provided data will be retrieved from IDA. :param code_page: known encoding page used to decode data to unicode (after data is decrypted) (code page is dynamically determined if not provided) :param dest: Location of decrypted data (if different from string_location) """ if idc.is_loaded(string_location): return EncodedString( string_location, string_reference, size=size, offset=offset, key=key, encoded_data=encoded_data, code_page=code_page, dest=dest) # otherwise assume string_location is a pointer within the stack # (using function_tracing's CPU emulator) and create an EncodedStackString object. stack = idc.get_func_attr(string_reference, idc.FUNCATTR_FRAME) # FIXME: This method isn't always super accurate because... IDA stack_offset = ( string_location + function_tracing.RSP_OFFSET + idc.get_func_attr(string_reference, idc.FUNCATTR_FRSIZE) - function_tracing.STACK_BASE ) if stack_offset < 0: logger.warning( 'Ignoring negative stack offset {:#x} pulled from 0x{:X}'.format( stack_offset, string_location)) stack_offset = None return EncodedStackString( encoded_data, frame_id=stack, stack_offset=stack_offset, string_reference=string_reference, size=size, offset=offset, key=key, code_page=code_page, dest=dest)
def _is_func_ptr(self, offset): """Returns true if the given offset is a function pointer.""" # Sometimes we will get a really strange issue where the IDA disassember has set a type for an # address that should not have been set during our course of emulation. # Therefore, before attempting to use get_function_data() to test if it's a function pointer, # first see if guess_type() will return None while is_loaded() is true. # If it doesn't, we know that it shouldn't be a function pointer. # (plus it saves on time) # TODO: Determine if we could have false negatives. if idc.is_loaded(offset) and not idc.guess_type(offset): return False try: utils.get_function_data(offset) return True except RuntimeError: return False except Exception as e: # If we get any other type of exception raise a more friendly error message. raise FunctionTracingError( 'Failed to retrieve function data from {!r}: {}'.format( offset, e), ip=self.ip)
def parse_stack_strings(self, func): logger.debug("Processing function: 0x{:X}".format(func.start_ea)) tracer = function_tracing.get_tracer(func.start_ea) waiting_for_call = [] context = None for ea in func.heads(): context = tracer.context_at(ea) if not context: continue context.execute() # also include instruction we are looking at. # If we encounter a call, process pushed in variables. if idc.print_insn_mnem(ea) == "call": for ip, var in waiting_for_call: self.process_string(context, var, ip) waiting_for_call = [] continue # Look for instructions where a stack variable is being used for something other than # a move. # We can do this by only considering variables that are the last operand. operands = context.get_operands(ea) if not operands: continue addr = operands[-1].addr or operands[-1].value if not addr: continue var = context.variables.get(addr) if var and var.is_stack: # Ignore string if it comes from memory with no concatinations. if var.history and idc.is_loaded(var.history[0].addr): continue # If instruction is a push, it is possible that the string will be populated # after this instruction. Therefore, wait for the function call be before processing. if idc.print_insn_mnem(ea) == "push": waiting_for_call.append((ea, var)) else: self.process_string(context, var, ea) # Process any strings still waiting for a call. if context: for ip, var in waiting_for_call: self.process_string(context, var, ip) # Remove any substrings or strings that are too small. for addr, encoded_string in sorted(self.encoded_strings): if len(encoded_string.encoded_data) < 3: self.encoded_strings.remove((addr, encoded_string)) continue for _addr, _encoded_string in self.encoded_strings[:]: # Remove dups if (_addr == addr and _encoded_string is not encoded_string and _encoded_string.encoded_data == encoded_string.encoded_data): self.encoded_strings.remove((addr, encoded_string)) break # Remove substrings elif _addr < addr: index = addr - _addr substring = _encoded_string.encoded_data[ index:index + len(encoded_string.encoded_data)] if substring == encoded_string.encoded_data: self.encoded_strings.remove((addr, encoded_string)) break # Report found strings for _, encoded_string in sorted(self.encoded_strings): # Don't want to rename because the buffers could be reused for multiple strings. encoded_string.publish(rename=False, patch=False) # TODO: EncodedString should allow commenting without renaming. idc.set_cmt( encoded_string.string_reference, 'Stack String: "{}"'.format(encoded_string.display_name), 0)
def is_mapped_data(ea, size=1): return (idc.is_loaded(ea) and idc.get_segm_start(ea) == idc.get_segm_start(ea + size - 1))