def find_parse_ip(li, ea, parsecode): # TODO check memory for SEGA SATURN string # segaSaturn = li.read(16) # warning(segaSaturn+' '+str(li.tell())) ida_bytes.create_strlit(ea, 16, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x10, 16, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x20, 10, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x2A, 6, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x30, 8, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x38, 8, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x40, 10, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x4A, 6, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x50, 16, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(ea + 0x60, 0x70, ida_nalt.STRTYPE_C) ida_bytes.create_byte(ea + 0xD0, 16) ida_bytes.create_dword(ea + 0xE0, 4) ida_bytes.create_dword(ea + 0xE4, 4) ida_bytes.create_dword(ea + 0xE8, 4) ida_bytes.create_dword(ea + 0xEC, 4) ida_bytes.create_dword(ea + 0xF0, 4) ida_funcs.add_func(ida_bytes.get_dword(ea + 0xF0), ida_idaapi.BADADDR) ida_bytes.create_dword(ea + 0xF4, 4) ida_bytes.create_dword(ea + 0xF8, 4) ida_bytes.create_dword(ea + 0xFC, 4) if parsecode: ida_funcs.add_func(ea + 0x100, ida_idaapi.BADADDR) return 1
def rename(beg, ptr, make_funcs=True): go_fun = Utils.load_function_comments() base = beg pos = beg + 8 #skip header size = ptr.ptr(pos) pos += ptr.size end = pos + (size * ptr.size * 2) while pos < end: offset = ptr.ptr(pos + ptr.size) ptr.maker(pos) #in order to get xrefs ptr.maker(pos + ptr.size) pos += ptr.size * 2 ptr.maker(base + offset) func_addr = ptr.ptr(base + offset) if make_funcs == True: ida_bytes.del_items(func_addr, 1, ida_bytes.DELIT_SIMPLE) ida_funcs.add_func(func_addr) name_offset = idc.get_wide_dword(base + offset + ptr.size) name = idc.get_strlit_contents(base + name_offset) comment = name if go_fun: tcomment = Utils.get_function_comment(name, go_fun) if tcomment: comment = tcomment Utils.add_function_comment(func_addr, comment) name = Utils.relaxName(name) print(name) Utils.rename(func_addr, name)
def find_do_go(base_ea): str_ea = idc.get_name_ea_simple("aCebilefciladrm") if str_ea != ida_idaapi.BADADDR: for xref in idautils.XrefsTo(str_ea): # IDA messes up this function, so I find it this way: func = idaapi.get_func(xref.frm) dg_ea = 0 if func != none: dg_ea = ida_search.find_binary(xref.frm, func.start_ea, prologues[0], 16, ida_search.SEARCH_UP) if dg_ea == ida_idaapi.BADADDR: dg_ea = ida_search.find_binary(xref.frm, func.start_ea, "FF ?? ?? D1", 16, ida_search.SEARCH_UP) else: dg_ea = ida_search.find_binary(xref.frm, base_ea, "FF ?? ?? D1", 16, ida_search.SEARCH_UP) ida_funcs.add_func(dg_ea) print("\t[+] _do_go = 0x%x" % (dg_ea)) idc.set_name(dg_ea, "_do_go", idc.SN_CHECK) return dg_ea print("\t[-] _do_go = not found") return ida_idaapi.BADADDR
def get_func_name_and_call_offset(call_addr): success, function_name, function_ea, function_end_ea = extract_function_info_from_nearest_name( call_addr) if success: #don't know why function_name sometimes is None. TODO check this if function_name is None: function_name = "unknown name" call_offset_in_function = "+0x{:X}".format(call_addr - function_ea) else: #failed to get info from nearest name function_info = idaapi.get_func(call_addr) if not function_info: ida_funcs.add_func(call_addr, idaapi.BADADDR) function_info = idaapi.get_func(call_addr) if function_info: function_ea = function_info.start_ea function_name = idc.get_func_name(function_ea) call_offset_int = call_addr - function_ea call_offset_in_function = "+0x{:X}".format(call_offset_int) return (function_name, call_offset_in_function, function_ea, function_end_ea)
def __get_xref_sigs(self, addr: int, is_func: bool) -> Iterator[str]: """ Create a signature from a function address XRef sigs are preferred, however if none are available the func itself will be used :param addr: Function address :param is_func: Indicates that this address is a func, and can be signatured directly :return: A series of Dalamud compatible signatures """ xref_addrs = [xref.frm for xref in idautils.XrefsTo(addr)] if is_func and not ida_funcs.get_func(addr): Log.warn( f'Address at {addr:X} is identified as a func, but is not in IDA, attempting to make a subroutine' ) ida_funcs.add_func(addr) # This should prune xrefs in places like .pdata by only keeping xrefs in a function xref_addrs = list(filter(ida_funcs.get_func_name, xref_addrs)) # Grab the first N xrefs xref_addrs = xref_addrs[:self.XREFS_TO_SEARCH] if is_func: # Try to sig the func itself as well xref_addrs.insert(0, addr) for xref_addr in xref_addrs: yield from SigGen(xref_addr)
def makePointedFunctions(self): """Modify the code and tell IDA that our code fptrs should point to the beginning of functions.""" # We want the list in descending function order fptrs_couples = list(self._ref_ptrs.items()) fptrs_couples.sort(key=lambda x: x[0], reverse=True) # Now we can iterate it for func_ea, code_type in fptrs_couples: self._analyzer.setCodeType(func_ea, func_ea + 1, code_type) ida_funcs.add_func(func_ea)
def create_interrupt_handlers(li): """ Make code at interrupt handler callbacks :param li: Loader input """ li.seek(8) for _ in range(8, 256, 4): dword = struct.unpack('>I', li.read(4))[0] ida_funcs.add_func(dword)
def do_rename(line): symbol_address, symbol_type, symbol_name = line.strip().split(' ') if symbol_type in ('t', 'T'): ida_funcs.add_func(int(symbol_address, 16)) attempts = 0 while attempts < 10: if attempts > 0: symbol_name = ('%s_%d' % (symbol_name[:-2], attempts)) if ida_name.set_name(int(symbol_address, 16), symbol_name): break else: attempts += 1
def _convert_address_to_function(func): """Convert an address that IDA has classified incorrectly into a proper function.""" # If everything goes wrong, we'll try to restore this function. orig = idc.first_func_chunk(func) # If the address is not code, let's undefine whatever it is. if not ida_bytes.is_code(ida_bytes.get_full_flags(func)): if not is_mapped(func): # Well, that's awkward. return False item = ida_bytes.get_item_head(func) itemend = ida_bytes.get_item_end(func) if item != idc.BADADDR: _log(1, 'Undefining item {:#x} - {:#x}', item, itemend) ida_bytes.del_items(item, ida_bytes.DELIT_EXPAND) idc.create_insn(func) # Give IDA a chance to analyze the new code or else we won't be able to create a # function. #ida_auto.auto_wait() autoanalyze() idc.plan_and_wait(item, itemend) else: # Just try removing the chunk from its current function. IDA can add it to another function # automatically, so make sure it's removed from all functions by doing it in loop until it # fails. for i in range(1024): if not idc.remove_fchunk(func, func): break # Now try making a function. if ida_funcs.add_func(func) != 0: return True # This is a stubborn chunk. Try recording the list of chunks, deleting the original function, # creating the new function, then re-creating the original function. if orig != idc.BADADDR: chunks = list(idautils.Chunks(orig)) if ida_funcs.del_func(orig) != 0: # Ok, now let's create the new function, and recreate the original. if ida_funcs.add_func(func) != 0: if ida_funcs.add_func(orig) != 0: # Ok, so we created the functions! Now, if any of the original chunks are not # contained in a function, we'll abort and undo. if all(idaapi.get_func(start) for start, end in chunks): return True # Try to undo the damage. for start, _ in chunks: ida_funcs.del_func(start) # Everything we've tried so far has failed. If there was originally a function, try to restore # it. if orig != idc.BADADDR: _log(0, 'Trying to restore original function {:#x}', orig) ida_funcs.add_func(orig) return False
def locate_functions(segm): ss = find_probable_string_start(segm) print("[+] Strings = %s" % ss) for i in ["7F 23 03 D5", "BD A9", "BF A9"]: ea = segm.start_ea while ea != BADADDR: ea = ida_search.find_binary(ea, segm.end_ea, i, 16, ida_search.SEARCH_DOWN) if ea != BADADDR and ea <= ss: ea -= 2 if (ea % 4) == 0 and idaapi.get_full_flags(ea) < 0x200: # print("[+] Defining a function at 0x%x" % (ea)) ida_funcs.add_func(ea) ea += 4
def btn_imp_ghidra_funcs(self, code=0): """ 导入Ghidra函数列表 """ ghidra_filepath = os.path.join(os.getcwd(), 'ghidra_func_addrs.csv') ghidra_path = ida_kernwin.ask_str(ghidra_filepath, 0, '导入的Ghidra导出函数文件路径') func_addrs = list(idautils.Functions()) make_func_addrs = [] if ghidra_path and ghidra_path != '': if os.path.exists(ghidra_path): with open(ghidra_path, 'rb') as f: next(f) reader = csv.reader(f) for row in reader: addr = int(row[0].strip('\"'), 16) if ida_funcs.add_func(addr) == True: make_func_addrs.append(addr) else: if addr not in func_addrs: FELogger.info("创建函数%s失败" % hexstr(addr)) FELogger.info("Ghidra导出函数文件:%s,已导入" % ghidra_path) else: FELogger.erro("未找到Ghidra导出函数文件:%s" % ghidra_path) else: FELogger.warn("请输入Ghidra导出函数文件路径") FELogger.info("成功创建%d个新函数" % len(make_func_addrs))
def find_img4decodeinit(base_ea): cur_ea = base_ea while true: ea_list = ida_search.find_imm(cur_ea, ida_search.SEARCH_DOWN, 0x494D) if ea_list[0] == ida_idaapi.BADADDR: ea_list = ida_search.find_imm(cur_ea, ida_search.SEARCH_DOWN, 0x494D0000) if ea_list[0] != ida_idaapi.BADADDR: ea = ea_list[0] func = ida_funcs.get_func(ea) func_ea = 0 if not func: func_ea = ida_search.find_binary(ea, base_ea, "?? ?? BD A9", 16, ida_search.SEARCH_UP) if func_ea != ida_idaapi.BADADDR: ida_funcs.add_func(func_ea) else: print("\t[-] _Img4DecodeInit = not found") return ida_idaapi.BADADDR else: func_ea = func.start_ea ea_func_list = list(idautils.XrefsTo(func_ea)) if not ea_func_list: cur_ea = ea + 4 continue if ea_func_list[0].frm != ida_idaapi.BADADDR: try: i4d_ea = ida_funcs.get_func(ea_func_list[0].frm).start_ea print("\t[+] _Img4DecodeInit = 0x%x" % (i4d_ea)) idc.set_name(i4d_ea, "_Img4DecodeInit", idc.SN_CHECK) return i4d_ea except: break cur_ea = ea + 4 print("\t[-] _Img4DecodeInit = not found") return ida_idaapi.BADADDR
def is_valid_lua_function_array_entry(ea): str_ea = ida_bytes.get_64bit(ea) func_name = ida_bytes.get_strlit_contents(str_ea, -1, 0) if func_name is None or len(func_name) == 0: return False func_ea = ida_bytes.get_64bit(ea+8) # If this points to somewhere other than the .text segment, it cant be valid if get_segment_name(func_ea) != '.text': return False f2 = find_func_containing(func_ea) # If no function is found, create one if f2 is None: ida_funcs.add_func(func_ea) print('Created function for Script_%s at 0x%08x' % (func_name, func_ea)) f2 = func_ea elif f2 != func_ea: return False return find_func_containing(func_ea) == func_ea
def rename16(beg, ptr, make_funcs=True): base = beg first_entry = ptr.ptr(base + ptr.size * 6 + 8) + base cnt = ptr.ptr(base + 8) funcname_start = base + 8 + ptr.size * 7 for i in range(cnt): struct_ptr = ptr.ptr(first_entry + i * ptr.size * 2 + 8) + first_entry # print(f"{struct_ptr:x}") func_addr = ptr.ptr(first_entry + i * ptr.size * 2) str_val = ida_bytes.get_dword(struct_ptr + 8) + funcname_start name = ida_bytes.get_strlit_contents(str_val, -1, -1) print(f"{func_addr:x} {name}") if make_funcs == True: ida_bytes.del_items(func_addr, 1, ida_bytes.DELIT_SIMPLE) ida_funcs.add_func(func_addr) # print(type(name)) name = Utils.relaxName(name.decode()) Utils.rename(func_addr, name)
def OnRefresh(self): self.Clear() addr_id = {} for (tid, chain) in self.result.items(): # Each node data will contain a tuple of the form: (Boolean->Is_thread, Int->Value, String->Label) # For threads the is_thread will be true and the value will hold the thread id # For exception handlers, is_thread=False and Value=Handler address # Add the thread node id_parent = self.AddNode((True, tid, "Thread %X" % tid)) # Add each handler for handler in chain: # Check if a function is created at the handler's address f = ida_funcs.get_func(handler) if not f: # create function ida_funcs.add_func(handler) # Node label is function name or address s = ida_funcs.get_func_name(handler) if not s: s = "%x" % handler # cache name self.names[handler] = s # Get the node id given the handler address # We use an addr -> id dictionary so that similar addresses get similar node id if handler not in addr_id: id = self.AddNode((False, handler, s)) addr_id[handler] = id # add this ID else: id = addr_id[handler] # Link handlers to each other self.AddEdge(id_parent, id) id_parent = id return True
def rename(beg, ptr, make_funcs = True): base = beg pos = beg + 8 #skip header size = ptr.ptr(pos) pos += ptr.size end = pos + (size * ptr.size * 2) print("%x" % end) while pos < end: offset = ptr.ptr(pos + ptr.size) ptr.maker(pos) #in order to get xrefs ptr.maker(pos+ptr.size) pos += ptr.size * 2 ptr.maker(base+offset) func_addr = ptr.ptr(base+offset) if make_funcs == True: ida_bytes.del_items(func_addr, 1, ida_bytes.DELIT_SIMPLE) ida_funcs.add_func(func_addr) name_offset = idc.get_wide_dword(base+offset+ptr.size) name = idc.get_strlit_contents(base + name_offset) name = Utils.relaxName(name) Utils.rename(func_addr, name)
def import_functions(functions, sections): """ Create functions from bnida analysis data :param functions: Array of function addrs :param sections: Dict containing section info """ for addr in functions: addr = adjust_addr(sections, int(addr)) if addr is None: continue if ida_funcs.get_func(addr): continue if not ida_funcs.add_func(addr): print('Failed to create function at offset:{:08x}'.format(addr))
def aggressiveFunctionScan(analyzer, scs): """Aggressively scan the code segment and try to define functions. Args: analyzer (instance): analyzer instance to be used scs (list): list of (sark) code segments """ for sc in scs: analyzer.logger.debug( "Aggressively scanning code segment: 0x%x - 0x%x", sc.start_ea, sc.end_ea) search_func = False just_started = True line = sark.Line(sc.start_ea) while line.start_ea < sc.end_ea: # we don't care about non-code lines if not line.is_code: line = line.next continue # check for code lines if line.is_code: try: sark.Function(line.start_ea) search_func = False just_started = True line = line.next continue except sark.exceptions.SarkNoFunction: if just_started: just_started = False else: search_func = True # If we are searching for a function, simply continue if search_func or analyzer.switch_identifier.isSwitchCase( line.start_ea): line = line.next continue # This is code, make it a function if not ida_funcs.add_func(line.start_ea): line = line.next else: analyzer.logger.debug("Declared a function at: 0x%x", line.start_ea)
def parse_one_func(self, f): fn = idaapi.get_func(f) if not (fn): if not (ida_funcs.add_func(f)): return 0 fn = idaapi.get_func(f) f_start, f_end = fn.start_ea, fn.end_ea if self.isLibcFunc(f_start): return 0 if f_start in self.call_graph: return 1 else: self.call_graph[f_start] = [] eas = list(idautils.Heads(f_start, f_end)) if self.pl == 'ia32': for ea in eas: mnem = idc.print_insn_mnem(ea) if mnem == 'call': nextEA = self.get_func_start(idc.get_operand_value(ea, 0)) if not (nextEA): continue if self.parse_one_func(nextEA): self.call_graph[f_start].append(nextEA) elif self.pl == 'mips': for ea in eas: mnem = idc.print_insn_mnem(ea) ''' Two cases: 1. jalr + $t9 <jalr always followed by t9> 2. jr + $t9 <only jrs followed by t9 are func call> ''' if mnem in ['jalr', 'jr']: reg = idc.print_operand(ea, 1) if not (reg == '$t9'): continue line = idc.GetDisasm(ea) parts = line.split(';') if len(parts) == 2: nextEA = self.get_func_start( int(parts[-1].split('_')[-1], 16)) if not (nextEA): continue if self.parse_one_func(nextEA): self.call_graph[f_start].append(nextEA) return 1
def define_missed_functions(): def match(F): return ida_bytes.is_code(F) and not ida_bytes.is_flow(F) for n in range(ida_segment.get_segm_qty()): seg = ida_segment.getnseg(n) if seg.type != ida_segment.SEG_CODE: continue print("[*] Browsing segment from %#x for %#x" % (seg.start_ea, seg.end_ea)) ea = seg.start_ea while ea < seg.end_ea: ea = ida_bytes.next_that(ea, seg.end_ea, match) if ea == ida_idaapi.BADADDR: break if ida_funcs.get_func(ea): continue s = "[*] Trying to define function at %#x... " % ea if not ida_funcs.add_func(ea): print(s + " Failed!") else: print(s + " Success!")
def dataScan(analyzer, scs): """Scan the code segments for orphan data blobs that represent analysis errors. Args: analyzer (instance): analyzer instance to be used scs (list): list of (sark) code segments """ # First Scan - unreffed data chunks inside functions ==> should be converted to code for sc in scs: first_line = None end_line = None for line in sc.lines: # After the first, the rest of the lines should have 0 crefs if first_line is not None and ((not line.is_data) or len(list(line.drefs_to)) > 0 or len(list(line.crefs_to)) > 0): end_line = line # we only care about data lines with a single cref from the previous line elif first_line is None and ( (not line.is_data) or len(list(line.drefs_to)) > 0 or len(list(line.crefs_to)) != 1 or sark.Line(list(line.crefs_to)[0]).next != line): end_line = line # don't mark switch entries elif analyzer.switch_identifier.isSwitchEntry(line.start_ea): end_line = line # Finally, check if it could be a function of some type elif first_line is None: first_line = line continue # Found an adjacent suitable line else: continue # Now check if we found something (end_line is always != None at this point) if first_line is not None and end_line is not None: chunk_start = first_line.start_ea chunk_end = end_line.start_ea # check that we can deduce anything on this current code type if not analyzer.supportedCodeType( analyzer.codeType(chunk_start)): continue # check that the chunk before us is not the end of a function if analyzer.func_classifier.predictFunctionEnd(chunk_start): # shouldn't really happen, do nothing in this case pass # data chunk in the middle of a function, and not at it's end - convert it to code else: analyzer.logger.debug( "In-Function data chunk at: 0x%x - 0x%x (%d)", chunk_start, chunk_end, chunk_end - chunk_start) ida_bytes.del_items(chunk_start, 0, chunk_end - chunk_start) idc.create_insn(chunk_start) # reset the vars first_line = None end_line = None # Second scan - unreffed data chunks outside of functions ==> new functions, possibly of different code type size_limit = analyzer.func_classifier.functionStartSize() analyzer.logger.debug("Size limit for data scan is: %d", size_limit) conversion_candidates = [] # recon pass for sc in scs: first_line = None end_line = None for line in sc.lines: # we only care about data lines without xrefs if (not line.is_data) or len(list(line.crefs_to)) > 0 or len( list(line.drefs_to)) > 0: end_line = line # check if it's big enough for the classifier elif line.size < size_limit: end_line = line # check if it looks like a string elif analyzer.str_identifier.isLocalAsciiString(line.start_ea, check_refs=False): analyzer.str_identifier.defineAsciiString(line.start_ea) end_line = line # make sure it isn't a switch entry elif analyzer.switch_identifier.isSwitchEntry(line.start_ea): end_line = line # Finally, check if it could be a function of some type elif first_line is None: first_line = line continue # Found an adjacent suitable line else: continue # Now check if we found something (end_line is always != None at this point) if first_line is not None and end_line is not None: chunk_start = first_line.start_ea chunk_end = end_line.start_ea guess_code_type = analyzer.func_classifier.predictFunctionStartType( chunk_start) original_code_type = analyzer.codeType(chunk_start) analyzer.logger.debug( "Found a data chunk at: 0x%x - 0x%x (%d), (Type %d, Local type %d)", chunk_start, chunk_end, chunk_end - chunk_start, guess_code_type, original_code_type) # Check if this is the beginning of a function if analyzer.func_classifier.predictFunctionStart( chunk_start, guess_code_type): conversion_candidates.append( (chunk_start, chunk_end, guess_code_type, original_code_type)) # reset the vars first_line = None end_line = None # conversion pass for chunk_start, chunk_end, guess_code_type, original_code_type in conversion_candidates: analyzer.logger.info( "Found an isolated data chunk at: 0x%x - 0x%x (%d), (Type %d, Local type %d)", chunk_start, chunk_end, chunk_end - chunk_start, guess_code_type, original_code_type) ida_bytes.del_items(chunk_start, 0, chunk_end - chunk_start) if original_code_type != guess_code_type: analyzer.setCodeType(chunk_start, chunk_end, guess_code_type) idc.plan_and_wait(chunk_start, chunk_end) ida_funcs.add_func(chunk_start)
def implement(self): if not ida_funcs.add_func(self._start, self._end): ida_funcs.add_func(self._start)
def resolveFunctionChunks(analyzer, scs): """Resolve all of the (external) function chunks that we can manage. Args: analyzer (instance): analyzer instance to be used scs (list): list of (sark) code segments """ seen_candidates = defaultdict(int) for sc in scs: for function in sc.functions: outer_blocks = [] for block in idaapi.FlowChart(function.func_t): if block.end_ea < function.start_ea or function.end_ea <= block.start_ea: try: block_function = sark.Function(block.start_ea) except sark.exceptions.SarkNoFunction: block_function = None # Only interested in chunks which are not already functions if block_function is None or block_function.start_ea != block.start_ea: outer_blocks.append(block) # Function chunks which are switch cases, should be fixed elif block_function is not None and analyzer.switch_identifier.isSwitchCase( block.start_ea): analyzer.logger.debug( "Deleted switch case function: 0x%x", block.start_ea) idc.del_func(block.start_ea) outer_blocks.append(block) # check if there is something to scan if len(outer_blocks) == 0: continue # start by resetting the function idc.del_func(function.start_ea) ida_funcs.add_func(function.start_ea) # Now try to check for chunks for sc in scs: for function in sc.functions: outer_blocks = [] for block in idaapi.FlowChart(function.func_t): if block.end_ea < function.start_ea or function.end_ea <= block.start_ea: try: block_function = sark.Function(block.start_ea) except sark.exceptions.SarkNoFunction: block_function = None # Only interested in chunks which are not already functions if block_function is None or block_function.start_ea != block.start_ea: outer_blocks.append(block) # Function chunks which are switch cases, should be fixed elif block_function is not None and analyzer.switch_identifier.isSwitchCase( block.start_ea): analyzer.logger.debug( "Deleted switch case function: 0x%x", block.start_ea) idc.del_func(block.start_ea) outer_blocks.append(block) # check if there is something to scan if len(outer_blocks) == 0: continue # scan the block for connectivity groups connectivity_mapping = {} connectivity_id = 0 id_mappings = {} for block in outer_blocks: if block.start_ea not in connectivity_mapping: connectivity_mapping[block.start_ea] = connectivity_id id_mappings[connectivity_id] = connectivity_id connectivity_id += 1 cur_id = connectivity_mapping[block.start_ea] for succs in block.succs(): # if unmarked, add him to our group if succs.start_ea not in connectivity_mapping: connectivity_mapping[succs.start_ea] = cur_id # if marked, set our group ID to match his group ID (effectively using the minimal ID) else: id_mappings[cur_id] = id_mappings[connectivity_mapping[ succs.start_ea]] # Now pick the minimal candidate of each connectivity group group_candidate_mapping = {} for block in outer_blocks: cur_id = id_mappings[connectivity_mapping[block.start_ea]] if cur_id not in group_candidate_mapping: group_candidate_mapping[cur_id] = block.start_ea else: group_candidate_mapping[cur_id] = min( block.start_ea, group_candidate_mapping[cur_id]) # Now fix mis-analysed switch cases original_start = function.start_ea original_end = function.end_ea tentative_func_end = original_end for cur_id, candidate in group_candidate_mapping.items(): seen_candidates[candidate] += 1 # Handle the switch cases if analyzer.switch_identifier.isSwitchCase(candidate): tentative_func_end = max(tentative_func_end, candidate) # check if we had a switch case outside of our function if tentative_func_end > original_end: # scan the range and delete each function in it for offset in range(tentative_func_end - original_end): try: func = sark.Function(original_end + offset) if func.end_ea != original_end: idc.del_func(func.start_ea) analyzer.logger.debug("Deleted function at: 0x%x", func.end_ea) except sark.exceptions.SarkNoFunction: pass # now re-define the original function analyzer.logger.debug( "Re-defined the (switch) function at: 0x%x", original_start) idc.del_func(original_start) ida_funcs.add_func(original_start) # can move on to the next function continue # Each candidate should be a function on it's own (unless it is already contained in another function) for cur_id, candidate in group_candidate_mapping.items(): idc.del_func(original_start) external_func = None contained_chunk = False # Check what happens when the candidate is adjacent to the end of the function if candidate == original_end: idc.del_func(candidate) contained_chunk = True else: # candidate might be inside a different function try: func = sark.Function(candidate) # If our chunk is the legit ending of a given function, don't ruin it contained_chunk = func.start_ea <= candidate and candidate < func.end_ea if func.start_ea != original_start and not contained_chunk: external_func = func.start_ea idc.del_func(func.start_ea) except sark.exceptions.SarkNoFunction: pass # Should the chunk be a standalone function? if not contained_chunk: ida_funcs.add_func(candidate) # Restore the original function ida_funcs.add_func(original_start) # If needed, restore the external (container) function if external_func is not None: ida_funcs.add_func(external_func) analyzer.logger.debug( "Re-defined the function at: 0x%x, candidate at: 0x%x", original_start, candidate)
def thumbsUp(analyzer, sc, aggressive=False, align=False): """Use various metrics in order to locate / fix code type transitions. Args: analyzer (instance): analyzer instance to be used sc (segment): (sark) code segment to work on aggressive (bool, optional): True iff should use aggressive heuristics (False by default) align (bool, optional): True iff should use align-based heuristics (False by default) Notes ----- 1. Convert (cancel) a code region that is contained inside the same function, and contains unexplored bytes (not a Chunk, and contains no functions) 2. Convert (cancel) a code region that is misaligned and contains no functions 3. Aggressive - Convert (cancel) a code region if the classifier doesn't agree on it's start 4. Aggressive - Convert (cancel) a code region if it contains illegal code lines / unknowns, and it contains no functions 5. Aggressive - Convert (cancel) a pointed code region that could be misinterpreted, and that contains no functions (+ delete the fptr) 6. Aggressive - Convert (cancel) a code region that begins on a function start, that could be misinterpreted, and that contains no functions 7. Resize a code region that needs a little alignment 8. In all of the heuristics, if the code region before us was OK and we merged with him, there is no need to check it again. """ regions_fixed = 1 line = sark.Line(sc.start_ea) regions = CodeRegions() first_round = True is_fptr_pointed = False code_aligned = False region_start = 0 metric = None # Only continue if we changed something during the current round while regions_fixed > 0: regions_fixed = 0 starting_new_region = True # edge case for the first line in the section dummy_mode = False prev_code_type = None region_converted = False region_code_type = None if not first_round: interesting_regions = regions.changedRegions() analyzer.logger.debug("%d interesting regions", len(interesting_regions)) # edge case, if we have nothing to do if len(interesting_regions) == 0: break line = sark.Line(interesting_regions[0].start) region_offset = -1 # iterate the current region while line.start_ea < sc.end_ea: if not starting_new_region: # check if we found a transitions new_code_type = analyzer.codeType(line.start_ea) # no change, just keep on if region_code_type == new_code_type: if not dummy_mode: metric.record(line) line = line.next continue # we found a transition region_end = line.start_ea region_converted = False if first_round: region = CodeRegion(region_start, region_end, region_code_type) regions.insert(region) # in dummy mode, don't do a thing if dummy_mode: metrics = [] analyzer.logger.debug( "Dummy region of code type %d in range 0x%x - 0x%x", region_code_type, region_start, region_end) # actually do something else: # get the metrics metric.stop(region_end) # suffix / align metrics align_metric = metric.alignMetric() metrics = [metric] + ([align_metric] if align_metric is not None else []) first_metric_region_fixed = True aligned_region_fixed = True # Examine both metrics for code_metric in metrics: contains_functions = code_metric.containsFunctions() unknown_count, unknown_ratio = code_metric.unknowns() illegal_count, illegal_ratio = code_metric.illegals() has_unknown_or_illegal = unknown_count > 0 or illegal_count > 0 containing_function = code_metric.containingFunction() start_function = code_metric.startFunction() metric_region_start, metric_region_end = code_metric.borders( ) metric_region_size = metric_region_end - metric_region_start # special case for the last metric if code_metric == align_metric: aligned_region = True metric_name = "Aligned" else: aligned_region = False metric_name = "Regular" # debug prints analyzer.logger.debug( "%s Metric: Code type %d used in range 0x%x - 0x%x (Pointed: %s, Contains functions: %s)", metric_name, region_code_type, metric_region_start, metric_region_end, str(is_fptr_pointed), contains_functions) if unknown_count > 0: analyzer.logger.debug( "Unknowns %d / %d Overall size = %f%%", unknown_count, metric_region_size, unknown_ratio * 100) if illegal_count > 0: analyzer.logger.debug( "Illegals %d / %d Overall size = %f%%", illegal_count, metric_region_size, illegal_ratio * 100) # Check if we can flip this region # 1. The entire code region is contained inside the same function, and contains unexplored bytes (not a Chunk, and contains no functions) if containing_function is not None and containing_function.start_ea < metric_region_start and metric_region_end <= containing_function.end_ea and\ has_unknown_or_illegal and not contains_functions: analyzer.logger.info( "Code region is contained inside a single function - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 2. Misaligned region elif not aligned_region and not code_aligned and not contains_functions: analyzer.logger.info( "Misaligned code region without any functions - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 3. Aggressive - Classifier doesn't agree about this region's start elif aggressive and\ not aligned_region and\ analyzer.func_classifier.predictFunctionStartType(metric_region_start) != region_code_type and\ analyzer.func_classifier.predictFunctionStartType(metric_region_end) == new_code_type and\ not contains_functions: analyzer.logger.info( "Classifier doesn't agree about the code region's start, and it has no functions - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 4. Aggressive - Unknowns and no functions elif aggressive and\ has_unknown_or_illegal and not contains_functions: analyzer.logger.info( "Code region contains unexplored bytes, and it has no functions - fixing it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 5. Aggressive - pointed region that could be misinterpreted + no functions elif aggressive and\ not aligned_region and\ is_fptr_pointed and\ prev_code_type is not None and\ ((not analyzer.func_classifier.predictFunctionEnd(metric_region_start, prev_code_type)) or\ ((metric_region_size <= analyzer.addressSize()) and not analyzer.func_classifier.predictFunctionEnd(metric_region_end, region_code_type)) or\ ((metric_region_size <= analyzer.addressSize()) and not analyzer.func_classifier.predictFunctionStart(metric_region_end, new_code_type)) or\ analyzer.func_classifier.predictFunctionStart(metric_region_start, new_code_type)) and\ not contains_functions: analyzer.logger.info( "Code region is fptr pointed, classifier says it's not a function end, and it has no functions - cancel it" ) # delete the fptr analyzer.fptr_identifier.deleteFptr( metric_region_start, region_code_type) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 6. Aggressive - region on function start, that could be misinterpreted + no functions elif aggressive and\ not aligned_region and\ start_function is not None and metric_region_start == start_function.start_ea and\ analyzer.func_classifier.predictFunctionStart(metric_region_start, new_code_type) and\ not contains_functions: analyzer.logger.info( "Code region is a function start, classifier prefers a different code type, and it has no functions - cancel it" ) convertRegion(analyzer, metric_region_start, metric_region_end) regions.convert(region, new_code_type) region_converted = True regions_fixed += 1 # 7. Needs a little alignment elif not aligned_region and not code_aligned: analyzer.logger.debug( "Code region is not aligned, align it down (resize)" ) resized_start = analyzer.alignTransitionAddress( metric_region_start, region_code_type) resizeRegion(analyzer, metric_region_start, metric_region_end, resized_start, metric_region_end) regions.resizeStart(region, resized_start) regions_fixed += 1 # Nothing for now else: if aligned_region: aligned_region_fixed = False else: first_metric_region_fixed = False # Aligned region should start with a function if aligned_region and aligned_region_fixed: ida_funcs.add_func(metric_region_start) # Break the loop and start the new region if first_metric_region_fixed: break # if our region was converted, there is no need to scan the current region (partial data) dummy_mode = region_converted and first_round # new region - check if finished the list if not first_round: region_offset += 1 if region_offset >= len(interesting_regions): break # check if we need to skip the next one too if region_converted and region.next == interesting_regions[ region_offset] and interesting_regions[ region_offset].code_type == new_code_type: region_offset += 1 if region_offset >= len(interesting_regions): break region = interesting_regions[region_offset] line = sark.Line(region.start) region_start = line.start_ea if region.prev is not None: prev_code_type = region.prev.code_type else: prev_code_type = None # the simple case else: # the code type could have changed, so we re-sample it if region_code_type is not None: prev_code_type = analyzer.codeType(region_start) region_start = line.start_ea # get the current code type (even in dummy mode) region_code_type = analyzer.codeType(line.start_ea) if not dummy_mode: code_aligned = analyzer.isCodeTransitionAligned(region_start) starting_new_region = False # measure the metrics metric = CodeMetric(analyzer, region_start, measure_align=align) metric.start(line) # check if started because of one of our function pointers is_fptr_pointed = analyzer.fptr_identifier.isPointedFunction( region_start) # advance to the next line if first_round: line = line.next # log the result analyzer.logger.info("Fixed %d code regions in this iteration", regions_fixed) first_round = False
def load_file(fd, neflags, format): global prologues global br_flag size = 0 base_addr = 0 ea = 0 nfunc = 0 idaapi.set_processor_type("arm", ida_idp.SETPROC_LOADER_NON_FATAL) idaapi.get_inf_structure().lflags |= idaapi.LFLG_64BIT if (neflags & idaapi.NEF_RELOAD) != 0: return 1 fd.seek(0, idaapi.SEEK_END) size = fd.tell() segm = idaapi.segment_t() segm.bitness = 2 # 64-bit segm.start_ea = 0 segm.end_ea = size if br_flag == false: idaapi.add_segm_ex(segm, "iBoot", "CODE", idaapi.ADDSEG_OR_DIE) else: idaapi.add_segm_ex(segm, "SecureROM", "CODE", idaapi.ADDSEG_OR_DIE) fd.seek(0) fd.file2base(0, 0, size, false) idaapi.add_entry(0, 0, "start", 1) ida_funcs.add_func(ea) print("[+] Marked as code") # heuristic while (true): mnemonic = idc.print_insn_mnem(ea) if "LDR" in mnemonic: base_str = idc.print_operand(ea, 1) base_addr = int(base_str.split("=")[1], 16) break ea += 4 print("[+] Rebasing to address 0x%x" % (base_addr)) idaapi.rebase_program(base_addr, idc.MSF_NOFIX) segment_start = base_addr segment_end = idc.get_segm_attr(segment_start, idc.SEGATTR_END) ea = segment_start print("[+] Searching and defining functions") for prologue in prologues: while ea != ida_idaapi.BADADDR: ea = ida_search.find_binary(ea, segment_end, prologue, 16, ida_search.SEARCH_DOWN) if ea != ida_idaapi.BADADDR: if len(prologue) < 8: ea = ea - 2 if (ea % 4) == 0 and ida_bytes.get_full_flags(ea) < 0x200: # print("[+] Defining a function at 0x%x" % (ea)) ida_funcs.add_func(ea) nfunc = nfunc + 1 ea = ea + 4 idc.plan_and_wait(segment_start, segment_end) print("[+] Identified %d new functions" % (nfunc)) print("[+] Looking for interesting functions") find_interesting(segment_start, segment_end) return 1
def __call__(self): ida_funcs.add_func(self.start_ea, self.end_ea)
def CallStackWalk(nn): class Result: """ Class holding the result of one call stack item Each call stack item instance has the following attributes: caller = ea of caller displ = display string sp = stack pointer """ def __init__(self, caller, sp): self.caller = caller self.sp = sp f = ida_funcs.get_func(caller) self.displ = "%08x: " % caller if f: self.displ += ida_funcs.get_func_name(caller) t = caller - f.start_ea if t > 0: self.displ += "+" + hex(t) else: self.displ += hex(caller) self.displ += " [" + hex(sp) + "]" def __str__(self): return self.displ # get stack pointer sp = idautils.cpu.Esp seg = ida_segment.getseg(sp) if not seg: return (False, "Could not locate stack segment!") stack_seg = Seg(seg) word_size = 2 ** (seg.bitness + 1) callers = [] sp = idautils.cpu.Esp - word_size while sp < stack_seg.end_ea: sp += word_size ptr = next(idautils.GetDataList(sp, 1, word_size)) seg = ida_segment.getseg(ptr) # only accept executable segments if (not seg) or ((seg.perm & ida_segment.SEGPERM_EXEC) == 0): continue # try to find caller caller = IsPrevInsnCall(ptr) # we have no recognized caller, skip! if caller is None: continue # do we have a debug name that is near? if nn: ret = nn.find(caller) if ret: ea = ret[0] # function exists? f = ida_funcs.get_func(ea) if not f: # create function ida_funcs.add_func(ea) # get the flags f = ida_bytes.get_flags(caller) # no code there? if not ida_bytes.is_code(f): ida_ua.create_insn(caller) callers.append(Result(caller, sp)) # return (True, callers)
procs = { 0x18: "Create", 0x1C: "Destroy", 0x20: "Icon", 0x24: "Paint", 0x28: "Size", 0x2C: "Input", 0x30: "Focus", 0x34: "Scroll", 0x38: "Data", 0x3C: "Help" } if res[0] == 1: regfunc = res[1] regcall = ida_xref.get_first_cref_to(regfunc) while regcall != ida_idaapi.BADADDR: insn = ida_ua.insn_t() ida_ua.decode_insn(insn, regcall) for i in range(20): if insn.get_canon_mnem() == 'mov' and insn.Op1.type == 4: if insn.Op1.addr in procs: ida_offset.op_plain_offset(insn.ea, 1, code_ea) target = code_ea + insn.Op2.value ida_funcs.add_func(target, ida_idaapi.BADADDR) ida_name.set_name( target, procs[insn.Op1.addr] + "_" + hex(regcall)[2:]) ida_ua.decode_prev_insn(insn, insn.ea) regcall = ida_xref.get_next_cref_to(regfunc, regcall)
def create_reset_vectors(): """ Disassemble and name reset vectors (except RST0, which will be set as entry) """ ida_funcs.add_func(0x0008) idaapi.set_name(0x0008, 'RST1', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK) ida_funcs.add_func(0x0010) idaapi.set_name(0x0010, 'RST2', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK) ida_funcs.add_func(0x0018) idaapi.set_name(0x0018, 'RST3', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK) ida_funcs.add_func(0x0020) idaapi.set_name(0x0020, 'RST4', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK) ida_funcs.add_func(0x0028) idaapi.set_name(0x0028, 'RST5', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK) ida_funcs.add_func(0x0030) idaapi.set_name(0x0030, 'RST6', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK) ida_funcs.add_func(0x0038) idaapi.set_name(0x0038, 'RST7', idaapi.SN_NOWARN | idaapi.SN_NOLIST | idaapi.SN_NOCHECK)
def find_bios_funcs(): ida_bytes.create_strlit(0x06000200, 16, ida_nalt.STRTYPE_C) ida_bytes.create_byte(0x06000210, 36) make_vector(0x06000234, "") make_vector(0x06000238, "") make_vector(0x0600023C, "") ida_bytes.create_strlit(0x06000240, 4, ida_nalt.STRTYPE_C) ida_bytes.create_strlit(0x06000244, 4, ida_nalt.STRTYPE_C) ida_bytes.create_dword(0x06000248, 4) ida_bytes.create_dword(0x0600024C, 4) make_vector(0x06000250, "") ida_bytes.create_dword(0x06000264, 4) make_vector(0x06000268, "") make_vector(0x0600026C, "bios_run_cd_player") make_vector(0x06000270, "") make_vector(0x06000274, "bios_is_mpeg_card_present") ida_bytes.create_dword(0x06000278, 4) ida_bytes.create_dword(0x0600027C, 4) make_vector(0x06000280, "") make_vector(0x06000284, "") make_vector(0x06000288, "") make_vector(0x0600028C, "") ida_bytes.create_dword(0x06000290, 4) ida_bytes.create_dword(0x06000294, 4) make_vector(0x06000298, "bios_get_mpeg_rom") make_vector(0x0600029C, "") ida_bytes.create_dword(0x060002A0, 4) ida_bytes.create_dword(0x060002A4, 4) ida_bytes.create_dword(0x060002A8, 4) ida_bytes.create_dword(0x060002AC, 4) make_vector(0x060002B0, "") ida_bytes.create_dword(0x060002B4, 4) ida_bytes.create_dword(0x060002B8, 4) ida_bytes.create_dword(0x060002BC, 4) ida_bytes.create_dword(0x060002C0, 4) # for (i = 0x060002C4; i < 0x06000324; i+=4) for i in range(0x060002C4, 0x06000324, 4): make_vector(i, "") idc.set_name(0x06000300, "bios_set_scu_interrupt") idc.set_name(0x06000304, "bios_get_scu_interrupt") idc.set_name(0x06000310, "bios_set_sh2_interrupt") idc.set_name(0x06000314, "bios_get_sh2_interrupt") idc.set_name(0x06000320, "bios_set_clock_speed") ida_bytes.create_dword(0x06000324, 4) idc.set_name(0x06000324, "bios_get_clock_speed") # for (i = 0x06000328; i < 0x06000348; i+=4) for i in range(0x06000328, 0x06000348, 4): make_vector(i, "") idc.set_name(0x06000340, "bios_set_scu_interrupt_mask") idc.set_name(0x06000344, "bios_change_scu_interrupt_mask") ida_bytes.create_dword(0x06000348, 4) idc.set_name(0x06000348, "bios_get_scu_interrupt_mask") make_vector(0x0600034C, "") ida_bytes.create_dword(0x06000350, 4) ida_bytes.create_dword(0x06000354, 4) ida_bytes.create_dword(0x06000358, 4) ida_bytes.create_dword(0x0600035C, 4) for i in range(0x06000360, 0x06000380, 4): make_vector(i, "") ida_bytes.create_byte(0x06000380, 16) ida_bytes.create_word(0x06000390, 16) ida_bytes.create_dword(0x060003A0, 32) ida_bytes.create_strlit(0x060003C0, 0x40, ida_nalt.STRTYPE_C) ida_funcs.add_func(0x06000600, ida_idaapi.BADADDR) ida_funcs.add_func(0x06000646, ida_idaapi.BADADDR) ida_bytes.create_strlit(0x0600065C, 0x4, ida_nalt.STRTYPE_C) ida_funcs.add_func(0x06000678, ida_idaapi.BADADDR) ida_funcs.add_func(0x0600067C, ida_idaapi.BADADDR) ida_funcs.add_func(0x06000690, ida_idaapi.BADADDR) ida_bytes.create_dword(0x06000A80, 0x80) return 1