def argv(self, func): ''' Attempts to identify what types of arguments are passed to a given function. Currently unused. ''' args = [None for x in self.arch.argv] if not self.arch.unknown: start_ea = ida_shims.start_ea(func) for xref in idautils.XrefsTo(start_ea): if idaapi.is_call_insn(xref.frm): insn = ida_shims.decode_insn(xref.frm) ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size) end_ea = (xref.frm - (self.arch.insn_size * 10)) while ea >= end_ea: if idaapi.is_basic_block_end(ea) or \ (ea != xref.frm and idaapi.is_call_insn(ea)): break insn = ida_shims.decode_insn(ea) features = ida_shims.get_canon_feature(insn) for n in range(0, len(self.CHANGE_OPND)): ops = ida_shims.get_operands(insn) if ops[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ops[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: for xref in idautils.XrefsFrom(ea): # TODO: Where is this xref type defined? if xref.type == 1: string = \ ida_shims.get_strlit_contents( xref.to) if string and len(string) > 4: args[index] = str break ea -= self.arch.insn_size yield args
def _get_api(sea): calls = 0 api = [] flags = idc.GetFunctionFlags(sea) # ignore library functions if flags & idc.FUNC_LIB or flags & idc.FUNC_THUNK: return calls, api # list of addresses addresses = list(idautils.FuncItems(sea)) for instr in addresses: tmp_api_address = "" if idaapi.is_call_insn(instr): for xref in idautils.XrefsFrom(instr, idaapi.XREF_FAR): if xref.to is None: calls += 1 continue tmp_api_address = xref.to break if tmp_api_address == "": calls += 1 continue api_flags = idc.GetFunctionFlags(tmp_api_address) if api_flags & idaapi.FUNC_LIB is True \ or api_flags & idaapi.FUNC_THUNK: tmp_api_name = idc.NameEx(0, tmp_api_address) if tmp_api_name: api.append(tmp_api_name) else: calls += 1 return calls, api
def get_apis(func_addr): calls = 0 apis = [] flags = GetFunctionFlags(func_addr) # ignore library functions if flags & FUNC_LIB or flags & FUNC_THUNK: logging.debug("get_apis: Library code or thunk") return None # list of addresses dism_addr = list(FuncItems(func_addr)) for instr in dism_addr: tmp_api_address = "" if idaapi.is_call_insn(instr): # In theory an API address should only have one xrefs # The xrefs approach was used because I could not find how to # get the API name by address. for xref in XrefsFrom(instr, idaapi.XREF_FAR): if xref.to == None: calls += 1 continue tmp_api_address = xref.to break # get next instr since api address could not be found if tmp_api_address == "": calls += 1 continue api_flags = GetFunctionFlags(tmp_api_address) print GetFunctionName(tmp_api_address) tmp_api_name = GetFunctionName(tmp_api_address) apis.append(tmp_api_name) # check for lib code (api) return (calls, apis)
def _find_leafs(self): # Loop through every function for func_ea in idautils.Functions(): # Count the number of xrefs to this function func = idaapi.get_func(func_ea) if func: leaf_function = True ea = ida_shims.start_ea(func) end_ea = ida_shims.end_ea(func) # Loop through all instructions in this function looking # for call instructions; if found, then this is not a leaf. while ea <= end_ea: insn = ida_shims.decode_insn(ea) if idaapi.is_call_insn(ea): leaf_function = False break ea = ida_shims.next_head(ea) if leaf_function: self.functions.append( Function(start=ida_shims.start_ea(func), end=ida_shims.end_ea(func), leaf=True, loop=self.has_loop(func), argc=self.argp.argc(func))) # Sort leafs by xref count, largest first self.functions.sort(key=lambda f: f.xrefs, reverse=True)
def is_call(self): """ Property indicating if this instruction is a call. :return bool: True if this instruction is a call, False otherwise. """ return idaapi.is_call_insn(self.ea)
def _add_child_subs(self, root, ea, tid): if ea == 0 or root.text(4) == "y": return for x in idautils.FuncItems(ea): if idaapi.is_call_insn(x): fname, target_addr, is_api, callee_id = self._logged_call( x, tid) if self._valid_call(x, target_addr) and fname: current_root = QTreeWidgetItem(root, [ fname, hex(int(x)), "0", hex(int(target_addr)), "n", hex(int(is_api)), hex(int(tid)), hex(int(callee_id)) ]) current_root.setFlags(current_root.flags() & ~QtCore.Qt.ItemIsEditable) try: self._tags[get_api_tag(fname)].append( [root, current_root]) except KeyError: self._tags[get_api_tag(fname)] = [[root, current_root]] self._tags["All"].append(current_root) self._add_child_subs(current_root, target_addr, tid) root.setText(4, "y")
def _find_leafs(self): # Loop through every function for func_ea in idautils.Functions(): # Count the number of xrefs to this function func = idaapi.get_func(func_ea) if func: leaf_function = True ea = func.startEA # Loop through all instructions in this function looking # for call instructions; if found, then this is not a leaf. while ea <= func.endEA: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): leaf_function = False break ea += self.arch.insn_size if leaf_function: self.functions.append( Function( start=func.startEA, end=func.endEA, leaf=True, loop=self.has_loop(func), argc=self.argp.argc(func), ) ) # Sort leafs by xref count, largest first self.functions.sort(key=lambda f: f.xrefs, reverse=True)
def enum_calls_using_var_arg(func_addr): if not func_addr or func_addr == BADADDR: return None walk_limit = 5 varg_calls = [] xrefs = { xref.frm for xref in XrefsTo(func_addr, 0) if idaapi.is_call_insn(xref.frm) } for xref in xrefs: walk = 0 prev_insn = DecodePreviousInstruction(xref) # starting at the call instruction, walk the instructions in reverse order # until we find a push or hit the limit. while stricmp(prev_insn.get_canon_mnem(), "push") and walk < walk_limit: prev_insn = DecodePreviousInstruction(prev_insn.ea) walk = walk + 1 if walk >= walk_limit: print "[!] Reached walk limit for xref at %x." % xref continue if prev_insn.Op1.type == o_reg: varg_calls.append(xref) return varg_calls
def trace_data(ea, min_ea, op_type, op_val): ''' trace from ea to previous instruction, if the instruction is an effect(change the destination register) instruction. return the instruction. e.g: LDR R0, R3 ''' ea_call = ea while ea != idc.BADADDR and ea != min_ea: ea = idc.PrevHead(ea, min_ea) if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea): # BL/BLX that will modify the R0 # return None operand = idc.GetMnem(ea) if operand in ['LDR', 'MOV']: src_op = 1 dest_op = 0 elif operand == 'STR': src_op = 0 dest_op = 1 else: continue #debug if ea == 0x9778a: print 'ea_call: %x' %ea_call print 'op_type: %d, op_val: %d' %(op_type, op_val) if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val: mark_instruction(ea) op_type = idc.GetOpType(ea, src_op) op_val = idc.GetOperandValue(ea, src_op)
def graph_down(ea, path=set()): """ Recursively collect all function calls. Copied with minor modifications from http://hooked-on-mnemonics.blogspot.com/2012/07/renaming-subroutine-blocks-and.html """ path.add(ea) # # extract all the call instructions from the current function # call_instructions = [] instruction_info = idaapi.insn_t() for address in idautils.FuncItems(ea): # decode the instruction if not idaapi.decode_insn(instruction_info, address): continue # check if this instruction is a call if not idaapi.is_call_insn(instruction_info): continue # save this address as a call instruction call_instructions.append(address) # # iterate through all the instructions in the target function (ea) and # inspect all the call instructions # for x in call_instructions: # TODO for r in idautils.XrefsFrom(x, idaapi.XREF_FAR): #print(0x%08X" % h, "--calls-->", "0x%08X" % r.to) if not r.iscode: continue # get the function pointed at by this call func = idaapi.get_func(r.to) if not func: continue # ignore calls to imports / library calls / thunks if (func.flags & (idaapi.FUNC_THUNK | idaapi.FUNC_LIB)) != 0: continue # # if we have not traversed to the destination function that this # call references, recurse down to it to continue our traversal # if r.to not in path: graph_down(r.to, path) return path
def highlight(self, color=COLOR): for ea in idautils.Heads(): if idaapi.isCode(idaapi.getFlags(ea)) and idaapi.is_call_insn(ea): current_color = idaapi.get_item_color(ea) if current_color == self.COLOR: idaapi.set_item_color(ea, idc.DEFCOLOR) elif current_color == idc.DEFCOLOR: idaapi.set_item_color(ea, self.COLOR)
def argv(self, func): ''' Attempts to identify what types of arguments are passed to a given function. Currently unused. ''' args = [None for x in self.arch.argv] for xref in idautils.XrefsTo(func.startEA): if idaapi.is_call_insn(xref.frm): idaapi.decode_insn(xref.frm) ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size) end_ea = (xref.frm - (self.arch.insn_size * 10)) while ea >= end_ea: # Stop searching if we've reached a conditional block or another call if idaapi.is_basic_block_end(ea) or ( ea != xref.frm and idaapi.is_call_insn(ea)): break idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [ idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase ]: try: regname = self.arch.registers[ idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: for xref in idautils.XrefsFrom(ea): # TODO: Where is this xref type defined? if xref.type == 1: string = idc.GetString(xref.to) if string and len(string) > 4: args[index] = str break ea -= self.arch.insn_size yield args
def highlight(self): for ea in idautils.Heads(): flags = ida_shims.get_full_flags(ea) if ida_shims.is_code(flags) and idaapi.is_call_insn(ea): current_color = idaapi.get_item_color(ea) if current_color == self.COLOR: idaapi.set_item_color(ea, idc.DEFCOLOR) elif current_color == idc.DEFCOLOR: idaapi.set_item_color(ea, self.COLOR)
def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, insn): """ parse call $+5 instruction from the given instruction. """ if not idaapi.is_call_insn(insn): return if insn.ea + 5 == idc.get_operand_value(insn.ea, 0): yield Characteristic("call $+5"), insn.ea
def argv(self, func): ''' Attempts to identify what types of arguments are passed to a given function. Currently unused. ''' args = [None for x in self.arch.argv] if not self.arch.unknown: for xref in idautils.XrefsTo(func.startEA): if idaapi.is_call_insn(xref.frm): idaapi.decode_insn(xref.frm) ea = xref.frm + (self.arch.delay_slot * self.arch.insn_size) end_ea = (xref.frm - (self.arch.insn_size * 10)) while ea >= end_ea: # Stop searching if we've reached a conditional block or another call if idaapi.is_basic_block_end(ea) or (ea != xref.frm and idaapi.is_call_insn(ea)): break idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: for xref in idautils.XrefsFrom(ea): # TODO: Where is this xref type defined? if xref.type == 1: string = idc.GetString(xref.to) if string and len(string) > 4: args[index] = str break ea -= self.arch.insn_size yield args
def trace(self, ea): ''' Given an EA where an argument register is set, attempt to trace what function call that argument is passed to. @ea - The address of an instruction that modifies a function argument register. Returns a tuple of (function EA, argv index, argument register name) on success. Returns None on failure. ''' insn = ida_shims.decode_insn(ea) features = ida_shims.get_canon_feature(insn) if self.arch.unknown: return (None, None, None) for n in range(0, len(self.CHANGE_OPND)): ops = ida_shims.get_operands(insn) if ops[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[ops[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: ea = ea - (self.arch.delay_slot * self.arch.insn_size) while True: insn = ida_shims.decode_insn(ea) if idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type in [idaapi.fl_CF, idaapi.fl_CN]: return (xref.to, index, regname) # If we couldn't figure out where the function call # was going to, just quit break try: is_block_end = idaapi.is_basic_block_end(ea) except TypeError: is_block_end = idaapi.is_basic_block_end(ea, True) if is_block_end: break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return (None, None, None)
def extract_function_calls_from(f, bb, insn): """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn): for ref in idautils.CodeRefsFrom(insn.ea, False): yield Characteristic("calls from"), ref
def extract_function_indirect_call_characteristic_features(f, bb, insn): """ extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 most relevant at the function or basic block scope; however, its most efficient to extract at the instruction scope args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) """ if idaapi.is_call_insn(insn) and idc.get_operand_type(insn.ea, 0) in (idc.o_reg, idc.o_phrase, idc.o_displ): yield Characteristic("indirect call"), insn.ea
def trace(self, ea): ''' Given an EA where an argument register is set, attempt to trace what function call that argument is passed to. @ea - The address of an instruction that modifies a function argument register. Returns a tuple of (function EA, argv index, argument register name) on success. Returns None on failure. ''' idaapi.decode_insn(ea) features = idaapi.cmd.get_canon_feature() if self.arch.unknown: return (None, None, None) for n in range(0, len(self.CHANGE_OPND)): if idaapi.cmd.Operands[n].type in [idaapi.o_reg, idaapi.o_displ, idaapi.o_phrase]: try: regname = self.arch.registers[idaapi.cmd.Operands[n].reg] index = self.arch.argv.index(regname) except ValueError: continue if features & self.CHANGE_OPND[n]: ea = ea - (self.arch.delay_slot * self.arch.insn_size) while True: idaapi.decode_insn(ea) if idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type in [idaapi.fl_CF, idaapi.fl_CN]: return (xref.to, index, regname) # If we couldn't figure out where the function call was going to, just quit break try: is_block_end = idaapi.is_basic_block_end(ea) except TypeError: is_block_end = idaapi.is_basic_block_end(ea, True) if is_block_end: break # TODO: Use idc.NextHead(ea) instead... ea += self.arch.insn_size return (None, None, None)
def IsPrevInsnCall(ea): """ Given a return address, this function tries to check if previous instruction is a CALL instruction """ global CallPattern for p in CallPattern: # assume caller's ea caller = ea + p[0] # get the bytes bytes = [x for x in GetDataList(caller, len(p[1]), 1)] # do we have a match? is it a call instruction? if bytes == p[1] and idaapi.is_call_insn(caller): return caller return False
def check_previous_inst_is_call(return_addr, is_64bit): list_of_call_inst_lengths = [2, 3, 5, 6, 7] if is_64bit: list_of_call_inst_lengths.append(9) for call_length in list_of_call_inst_lengths: call_addr = return_addr - call_length try: if idaapi.is_call_insn(call_addr) and idc.create_insn( call_addr) and print_insn_mnem(call_addr) == "call": return (True, call_addr) except ValueError: continue return (False, None)
def enum_calls_in_function(fva): ''' yield the call instructions in the given function. Args: fva (int): the starting address of a function Returns: sequence[tuple[int, str]]: the address of a call instruction, and the disassembly line at that address ''' for ea in enum_function_addrs(fva): if idaapi.is_call_insn(ea): disasm = ida_lines.generate_disassembly(ea, 16, True, False)[1][0] # replace consequent whitespaces by a single whitespaces disasm = re.sub("\s\s+", " ", disasm) yield ea, disasm
def IsPrevInsnCall(ea): """ Given a return address, this function tries to check if previous instruction is a CALL instruction """ global CallPattern if ea == idaapi.BADADDR or ea < 10: return None for delta, opcodes in CallPattern: # assume caller's ea caller = ea + delta # get the bytes bytes = [x for x in GetDataList(caller, len(opcodes), 1)] # do we have a match? is it a call instruction? if bytes == opcodes and idaapi.is_call_insn(caller): return caller return None
def get_apis(func_addr): calls = 0 apis = [] #print func_addr flags = GetFunctionFlags(func_addr) # ignore library functions if flags & FUNC_LIB or flags & FUNC_THUNK: #logging.debug("get_apis: Library code or thunk") #print flags," ",FUNC_LIB," ", FUNC_THUNK return (calls, "Library code or thunk") # list of addresses start = idc.GetFunctionAttr(func_addr, FUNCATTR_START) end = idc.GetFunctionAttr(func_addr, FUNCATTR_END) cur_addr = start while cur_addr <= end: #print cur_addr instr = idc.GetDisasm(cur_addr) tmp_api_address = "" if idaapi.is_call_insn(cur_addr): # In theory an API address should only have one xrefs # The xrefs approach was used because I could not find how to # get the API name by address. for xref in XrefsFrom(cur_addr, idaapi.XREF_FAR): if xref.to == None: calls += 1 cur_addr = idc.NextHead(cur_addr, end) continue tmp_api_address = xref.to break # get next instr since api address could not be found if tmp_api_address == "": calls += 1 cur_addr = idc.NextHead(cur_addr, end) continue api_flags = GetFunctionFlags(tmp_api_address) # check for lib code (api) if api_flags & idaapi.FUNC_LIB == True or api_flags & idaapi.FUNC_THUNK: tmp_api_name = NameEx(0, tmp_api_address) if tmp_api_name: apis.append(tmp_api_name) else: calls += 1 cur_addr = idc.NextHead(cur_addr, end) return (calls, apis)
def check_for_api_call(ctx, insn): """ check instruction for API call """ if not idaapi.is_call_insn(insn): return for ref in idautils.CodeRefsFrom(insn.ea, False): info = get_imports(ctx).get(ref, ()) if info: yield "%s.%s" % (info[0], info[1]) else: f = idaapi.get_func(ref) # check if call to thunk # TODO: first instruction might not always be the thunk if f and (f.flags & idaapi.FUNC_THUNK): for thunk_ref in idautils.DataRefsFrom(ref): # TODO: always data ref for thunk?? info = get_imports(ctx).get(thunk_ref, ()) if info: yield "%s.%s" % (info[0], info[1])
def extract_insn_bytes_features(f, bb, insn): """ parse referenced byte sequences args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset iid_004118d4_IShellLinkA ; riid """ if idaapi.is_call_insn(insn): # ignore call instructions return for ref in idautils.DataRefsFrom(insn.ea): extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE) if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes): yield Bytes(extracted_bytes), insn.ea
def extract_insn_bytes_features(f, bb, insn): """parse referenced byte sequences args: f (IDA func_t) bb (IDA BasicBlock) insn (IDA insn_t) example: push offset iid_004118d4_IShellLinkA ; riid """ if idaapi.is_call_insn(insn): return ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn) if ref != insn.ea: extracted_bytes = capa.features.extractors.ida.helpers.read_bytes_at(ref, MAX_BYTES_FEATURE_SIZE) if extracted_bytes and not capa.features.extractors.helpers.all_zeros(extracted_bytes): yield Bytes(extracted_bytes), insn.ea
def get_func_code_refs_from(func_ea, iaddrs): """Returns a set with the code references from this function""" code_refs = set() for addr in iaddrs: ref = idaapi.BADADDR for r in idautils.XrefsFrom(addr, idaapi.XREF_FAR): if r.iscode: to_func = idaapi.get_func(r.to) if not to_func or to_func.startEA != func_ea: ref = r.to else: ref = r.to if (ref != idaapi.BADADDR or idaapi.is_call_insn(addr) or idaapi.is_indirect_jump_insn(addr)): #print hex(i.addr), i, hex(ref) code_refs.add(ref) return code_refs
def graph_down(ea, path=set()): """ Recursively collect all function calls. Copied with minor modifications from http://hooked-on-mnemonics.blogspot.com/2012/07/renaming-subroutine-blocks-and.html """ path.add(ea) # # iterate through all the instructions in the target function (ea) and # inspect all the call instructions # for x in [x for x in idautils.FuncItems(ea) if idaapi.is_call_insn(x)]: # TODO for r in idautils.XrefsFrom(x, idaapi.XREF_FAR): #print "0x%08X" % h, "--calls-->", "0x%08X" % r.to if not r.iscode: continue # get the function pointed at by this call func = idaapi.get_func(r.to) if not func: continue # ignore calls to imports / library calls / thunks if (func.flags & (idaapi.FUNC_THUNK | idaapi.FUNC_LIB)) != 0: continue # # if we have not traversed to the destination function that this # call references, recurse down to it to continue our traversal # if r.to not in path: graph_down(r.to, path) return path
def analyzeFunction(self, funcea): # https://reverseengineering.stackexchange.com/questions/9352/finding-all-api-calls-in-a-function # Copy + Paste from Stack Overflow - Lika Boss n_flags = set() dism_addr = list(idautils.FuncItems(funcea)) for instr in dism_addr: tmp_api_address = "" if idaapi.is_call_insn(instr): for xref in idautils.XrefsFrom(instr, idaapi.XREF_FAR): if xref.to == None: continue tmp_api_address = xref.to break # get next instr since api address could not be found if tmp_api_address == "": continue api_flags = idc.GetFunctionFlags(tmp_api_address) # check for lib code (api) if (api_flags & idaapi.FUNC_LIB and api_flags & idaapi.FUNC_STATICDEF): tmp_api_name = idc.NameEx(0, tmp_api_address) if tmp_api_name: t_flags = self.processFunction( funcea, tmp_api_name) n_flags = ( t_flags| n_flags ) # Rename function if flags populated # Skip of this isn't the first run sflags = "".join(set(n_flags)) if len(n_flags) > 0 and self.rename: fn = idc.GetFunctionName(funcea) if not fn.startswith(sflags): print "Renaming - ", fn, " with - ", sflags idc.MakeName(funcea, str(sflags + "_" + fn )) tbl = [ funcea, idc.GetFunctionName(funcea), sflags ] for f in definitions.PEAPIs.keys(): if definitions.PEAPIs[f]['flag'] in sflags: tbl.append('*') else: tbl.append('') data.append( tbl )
def get_apis(self, func_ea): calls = 0 apis = [] flags = GetFunctionFlags(func_ea) dism_addr = list(FuncItems(func_ea)) for instr in dism_addr: tmp_api_address = "" if idaapi.is_call_insn(instr): for xref in XrefsFrom(instr, idaapi.XREF_FAR): if xref.to == None: calls += 1 continue tmp_api_address = xref.to break if tmp_api_address == "": calls += 1 continue api_flags = GetFunctionFlags(tmp_api_address) # print GetFunctionName(tmp_api_address) tmp_api_name = GetFunctionName(tmp_api_address) apis.append(tmp_api_name) return apis
def __init__(self, functionName): import idautils import idc import idaapi super(FunctionGraph, self).__init__() start_addr = 0 if type(functionName) == type('str'): start_addr = idc.LocByName(functionName) else: start_addr = idaapi.get_func(functionName).startEA print 'using 0x%x as function start' % (start_addr) self.start_addr = start_addr end_addr = idc.FindFuncEnd(start_addr) self.start_addr = start_addr self.end_addr = end_addr self.name = functionName for h in idautils.Heads(start_addr, end_addr): if h == idc.BADADDR: continue if not idc.isCode(idc.GetFlags(h)): continue self.add_node(h) refs = set(filter(lambda x: x <= end_addr and x >= start_addr, idautils.CodeRefsFrom(h,1))) nh = idc.NextHead(h, end_addr) if nh != idc.BADADDR and \ (idaapi.isFlow(idaapi.get_flags_ex(nh,0)) or idaapi.is_call_insn(h)): refs.add(nh) for r in refs: self.connect(h, r)
def find_rec(ea, func, maxdepth, all=True, depth=0, path=[], processed=[]): if depth > maxdepth: return processed.append(ea) #Call func for each address in the function for addr in [x for x in FuncItems(ea)]: func(addr, path) #For each call instruction in the function descend into that call for addr in [x for x in FuncItems(ea) if idaapi.is_call_insn(x)]: xrefs = [x for x in CodeRefsFrom(addr, 0)] #If the call references a function known by IDA if len(xrefs) > 0: xref = xrefs[0] #If the function has not alread been processed if all == True or not xref in processed: #Find further calls in the below function path.append(addr) find_rec(xref, func, maxdepth, all, depth + 1, path, processed) path.pop()
def trace_data(ea, min_ea, op_type, op_val): ''' trace from ea to previous instruction, if the instruction is an effect(change the destination register) instruction. return the instruction. e.g: LDR R0, R3 ''' ea_call = ea while ea != idc.BADADDR and ea != min_ea: ea = idc.PrevHead(ea, min_ea) if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea): # BL/BLX that will modify the R0 # return None operand = idc.GetMnem(ea) if operand in ['LDR', 'MOV']: src_op = 1 dest_op = 0 elif operand == 'STR': src_op = 0 dest_op = 1 else: continue #debug if ea == 0x9778a: print 'ea_call: %x' % ea_call print 'op_type: %d, op_val: %d' % (op_type, op_val) if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue( ea, dest_op) == op_val: mark_instruction(ea) op_type = idc.GetOpType(ea, src_op) op_val = idc.GetOperandValue(ea, src_op)
def trace_param(ea, min_ea, op_type, op_val): ''' trace_param: ea, min_ea, op_type, op_val Taking ea as start, this function does basic backtrace of an operand (defined by op_type and op_val) until it finds a data reference which we consider the "source". It stops when ea < min_ea (usually the function start). It does not support arithmetic or complex modifications of the source. This will be improved on future versions. ''' global displ_re, msgsend, var_re ea_call = ea while ea != idc.BADADDR and ea != min_ea: ea = idc.PrevHead(ea, min_ea) if op_type == idaapi.o_reg and op_val == 0 and idaapi.is_call_insn(ea): # We have a BL/BLX that will modify the R0 # we're tracking # return None if idc.GetMnem(ea) in ['LDR', 'MOV']: src_op = 1 dest_op = 0 elif idc.GetMnem(ea) == 'STR': src_op = 0 dest_op = 1 else: continue if idc.GetOpType(ea, dest_op) == op_type and idc.GetOperandValue(ea, dest_op) == op_val: # Found, see where it comes from if idc.GetOpType(ea, src_op) == idc.o_mem: # Got the final reference refs = list(idautils.DataRefsFrom(ea)) if not refs: local_ref = idc.GetOperandValue(ea, src_op) far_ref = idc.Dword(local_ref) else: while len(refs) > 0: far_ref = refs[0] refs = list(idautils.DataRefsFrom(refs[0])) return far_ref elif idc.GetOpType(ea, src_op) == idc.o_displ: if ', [SP' in idc.GetDisasm(ea): if 'arg_' in idc.GetDisasm(ea): # We don't track function arguments return None # We're tracking an stack variable try: var_name = var_re.search(idc.GetDisasm(ea)).group('varname') except: print '%08x: Unable to recognize variable' % ea return None while ea != idc.BADADDR and ea > min_ea: if idc.GetMnem(ea) == 'STR' and var_name in idc.GetDisasm(ea): # New reg to track op_val = idc.GetOperandValue(ea, dest_op) break ea = idc.PrevHead(ea, min_ea) else: # New reg to track if '[LR]' in idc.GetDisasm(ea): # Optimizations use LR as general reg op_val = 14 else: try: op_val = int(displ_re.search(idc.GetDisasm(ea)).group('regnum')) except: print '%08x: Unable to recognize register' % ea return None elif idc.GetOpType(ea, src_op) == idc.o_reg: # Direct reg-reg assignment op_val = idc.GetOperandValue(ea, src_op) else: # We don't track o_phrase or other complex source operands :( return None return None
def block(self, block): ''' Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) ''' formal = [] fuzzy = [] functions = [] immediates = [] ea = block.startEA while ea < block.endEA: idaapi.decode_insn(ea) # Get a list of all data/code references from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(idc.GetMnem(ea)) # If this is a call instruction, be sure to note the name of the function # being called. This is used to apply call-based signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the function, # but rather just want to note that a function call was made. # # Formal signatures already have the call instruction mnemonic, which is more # specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = idc.Name(cref) if func_name: functions.append(func_name) fuzzy.append("funcref") # If there are data references from the instruction, check to see if any of them # are strings. These are looked up in the pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of both the fuzzy # and the formal signatures. # # It is more difficult to determine if non-string values are constants or not; # for both fuzzy and formal signatures, just use "data" to indicate that some data # was referenced. elif drefs: for dref in drefs: if self.strings.has_key(dref): formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append("dataref") fuzzy.append("dataref") # If there are no data or code references from the instruction, use every operand as # part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate values, that is, values # that are greater than 65,535, are not memory addresses, and are not displayed as # negative values. elif not drefs and not crefs: for n in range(0, len(idaapi.cmd.Operands)): opnd_text = idc.GetOpnd(ea, n) formal.append(opnd_text) if idaapi.cmd.Operands[ n].type == idaapi.o_imm and not opnd_text.startswith( '-'): if idaapi.cmd.Operands[n].value >= 0xFFFF: if idaapi.getFlags( idaapi.cmd.Operands[n].value) == 0: fuzzy.append(str(idaapi.cmd.Operands[n].value)) immediates.append(idaapi.cmd.Operands[n].value) ea = idc.NextHead(ea) return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
def block(self, block): ''' Returns a tuple: ([formal, block, signatures], [fuzzy, block, signatures], set([unique, immediate, values]), [called, function, names]) ''' formal = [] fuzzy = [] functions = [] immediates = [] ea = block.startEA while ea < block.endEA: idaapi.decode_insn(ea) # Get a list of all data/code references from the current instruction drefs = [x for x in idautils.DataRefsFrom(ea)] crefs = [x for x in idautils.CodeRefsFrom(ea, False)] # Add all instruction mnemonics to the formal block hash formal.append(idc.GetMnem(ea)) # If this is a call instruction, be sure to note the name of the function # being called. This is used to apply call-based signatures to functions. # # For fuzzy signatures, we can't use the actual name or EA of the function, # but rather just want to note that a function call was made. # # Formal signatures already have the call instruction mnemonic, which is more # specific than just saying that a call was made. if idaapi.is_call_insn(ea): for cref in crefs: func_name = idc.Name(cref) if func_name: functions.append(func_name) fuzzy.append("funcref") # If there are data references from the instruction, check to see if any of them # are strings. These are looked up in the pre-generated strings dictionary. # # String values are easily identifiable, and are used as part of both the fuzzy # and the formal signatures. # # It is more difficult to determine if non-string values are constants or not; # for both fuzzy and formal signatures, just use "data" to indicate that some data # was referenced. elif drefs: for dref in drefs: if self.strings.has_key(dref): formal.append(self.strings[dref].value) fuzzy.append(self.strings[dref].value) else: formal.append("dataref") fuzzy.append("dataref") # If there are no data or code references from the instruction, use every operand as # part of the formal signature. # # Fuzzy signatures are only concerned with interesting immediate values, that is, values # that are greater than 65,535, are not memory addresses, and are not displayed as # negative values. elif not drefs and not crefs: for n in range(0, len(idaapi.cmd.Operands)): opnd_text = idc.GetOpnd(ea, n) formal.append(opnd_text) if idaapi.cmd.Operands[n].type == idaapi.o_imm and not opnd_text.startswith('-'): if idaapi.cmd.Operands[n].value >= 0xFFFF: if idaapi.getFlags(idaapi.cmd.Operands[n].value) == 0: fuzzy.append(str(idaapi.cmd.Operands[n].value)) immediates.append(idaapi.cmd.Operands[n].value) ea = idc.NextHead(ea) return (self.sighash(''.join(formal)), self.sighash(''.join(fuzzy)), immediates, functions)
def is_call(self): """Is the instruction a call instruction.""" return idaapi.is_call_insn(self._ea)
def is_call(self): """Is the instruction a call instruction.""" return idaapi.is_call_insn(self._insn)
def _profile_function(self): current_ea = ScreenEA() current_function = idc.GetFunctionName(current_ea) current_function_ea = idc.LocByName(current_function) if current_function: self.function = current_function ea = start_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_START) end_ea = idc.GetFunctionAttr(current_function_ea, idc.FUNCATTR_END) self.highlighted = idaapi.get_highlighted_identifier() while ea < end_ea and ea != idc.BADADDR and self.highlighted: i = 0 match = False optype = self.READ comment = None idaapi.decode_insn(ea) mnem = idc.GetMnem(ea) if self.highlighted in mnem: match = True elif idaapi.is_call_insn(ea): for xref in idautils.XrefsFrom(ea): if xref.type != 21: name = idc.Name(xref.to) if name and self.highlighted in name: match = True break else: while True: opnd = idc.GetOpnd(ea, i) if opnd: if self.highlighted in opnd: match = True if (idaapi.insn_t_get_canon_feature(idaapi.cmd.itype) & self.OPND_WRITE_FLAGS[i]): optype = self.WRITE i += 1 else: break if not match: comment = idc.GetCommentEx(ea, 0) if comment and self.highlighted in comment: match = True else: comment = idc.GetCommentEx(ea, 1) if comment and self.highlighted in comment: match = True else: comment = None if match: if ea > current_ea: direction = self.DOWN elif ea < current_ea: direction = self.UP else: direction = self.THIS self.xrefs[ea] = { 'offset' : idc.GetFuncOffset(ea), 'mnem' : mnem, 'type' : optype, 'direction' : direction, 'text' : idc.GetDisasm(ea), } ea += idaapi.cmd.size