def process_mov_mnem(self, mnem, cur_inst_addr, xref_addr): mov_addr = cur_inst_addr ## look for the operand type of the mov op1_type = get_operand_type(mov_addr, 0) op2_type = get_operand_type(mov_addr, 1) if op1_type == o_reg and op2_type == o_mem: op1 = idc.print_operand(mov_addr, 0) op2 = idc.print_operand(mov_addr, 1) print("{0:08x} {1}".format( cur_inst_addr, idc.generate_disasm_line(cur_inst_addr, GENDSM_FORCE_CODE))) ## operand2 of mov is ptr to the actual encrypted string address ptr_of_enc_str_addr = get_operand_value(mov_addr, 1) enc_str_addr = get_qword(ptr_of_enc_str_addr) print("[+] SUCCESS: Found encrypted string address in {}".format( hex(enc_str_addr))) self.simulate_decryption(enc_str_addr, xref_addr) return True else: return False
def isinthunk(winname, thunk): ea, name = thunk funcstart = idc.get_func_attr(ea, idc.FUNCATTR_START) funcend = idc.get_func_attr(ea, idc.FUNCATTR_END) if funcend - funcstart > 20: # Highest I've seen is 13 opcodes but this works ig return False addr = idc.next_head(funcstart, funcend) if addr == idc.BADADDR: return False b = idc.get_wide_byte(addr) if b in (0xEB, 0xE9): dis = idc.generate_disasm_line(addr, 0) try: funcname = dis[dis.find("jmp") + 3:].strip() if funcname.find("short") != -1: funcname = funcname[funcname.find("short") + 5:].strip() # When this function gets typed, a comment is added # Remove it if funcname.find(";") != -1: funcname = funcname[:funcname.find(";")] if funcname == winname: return True except: pass return False
def process_lea_mnem(self, mnem, cur_inst_addr, xref_addr): lea_addr = cur_inst_addr ## look for the operand type of the lea op1_type = get_operand_type(lea_addr, 0) op2_type = get_operand_type(lea_addr, 1) ## find the first lea with the following operand type if op1_type == o_reg and op2_type == o_mem: op1 = idc.print_operand(lea_addr, 0) op2 = idc.print_operand(lea_addr, 1) #print(hex(cur_inst_addr), mnem, op1, op2) print("{0:08x} {1}".format( cur_inst_addr, idc.generate_disasm_line(cur_inst_addr, GENDSM_FORCE_CODE))) # get operand2 values of lea which is the address of the encrypted string enc_str_addr = idc.get_operand_value(cur_inst_addr, 1) print("[+] SUCCESS: Found encrypted string address in {}".format( hex(enc_str_addr))) self.simulate_decryption(enc_str_addr, xref_addr) return True else: return False
def find_lua_function_array(start, end): for head in idautils.Heads(start, end): disasm = idc.generate_disasm_line(head, 2) if disasm.startswith('lea '): result = idc.get_operand_value(head, 1) return result raise RuntimeError('Unable to find Lua function array near 0x%08x' % start)
def __patch_indirect_call_instructions(): for seg in idautils.Segments(): for func in idautils.Functions(seg): function_name = idc.get_func_name(func) print('[+] Checking function "{}"'.format(function_name)) for (startea, endea) in idautils.Chunks(func): for head in idautils.Heads(startea, endea): m = idc.print_insn_mnem(head) if m == 'call': op = idc.get_operand_type(head, 0) if op == idc.o_displ: print('{}: 0x{:08x}: {}'.format( function_name, head, idc.generate_disasm_line(head, 0))) ida_bytes.patch_word(head, 0x15ff) print('{}: 0x{:08x}: {}'.format( function_name, head, idc.generate_disasm_line(head, 0)))
def function_xrefs(name): import idc import idautils functions_that_exit = [] wf_addr = idc.get_name_ea_simple(name) print hex(wf_addr), idc.generate_disasm_line(wf_addr, 0) for addr in idautils.CodeRefsTo(wf_addr, 0): functions_that_exit.append(idc.get_func_name(addr)) return functions_that_exit
def __get_instruction_bytes_wildcarded(pattern, addr, instr_type, op1_type, op2_type): """Replaces bytes related to memory addresses with wildcards. TODO: To be replaced by ida_idp.ph_calcrel() Args: pattern: current buffer containing the bytes of the current instruction. addr: the address of the current instruction to be wildcarded instr_type: type of the current instruction op1_type: type of the first operand op2_type: type of the second operand Returns: String: hex-encoded representation of the bytes obtained at addr where all the operands that refers to memmory addresses are wildcarded. """ type_calls = frozenset( [idaapi.NN_call, idaapi.NN_callfi, idaapi.NN_callni]) type_jumps = frozenset( [idaapi.NN_jmp, idaapi.NN_jmpfi, idaapi.NN_jmpni]) inst_prefix = binascii.hexlify(idc.get_bytes(addr, 1)).decode('utf-8') drefs = [x for x in idautils.DataRefsFrom(addr)] logging.debug('[VTGREP] Wildcarding: %s', idc.generate_disasm_line(addr, 0)) # Known 2 bytes opcodes if inst_prefix in ('0f', 'f2', 'f3'): pattern = binascii.hexlify(idc.get_bytes(addr, 2)).decode('utf-8') inst_num_bytes = 2 # CALLs or JUMPs using 2 bytes opcodes elif inst_prefix == 'ff' and (instr_type in type_jumps or instr_type in type_calls): pattern = binascii.hexlify(idc.get_bytes(addr, 2)).decode('utf-8') inst_num_bytes = 2 # A PUSH instruction using an inmediate value (mem offset) elif (inst_prefix == 'ff' and drefs and (op1_type == idaapi.o_imm or op2_type == idaapi.o_imm)): pattern = binascii.hexlify(idc.get_bytes(addr, 2)).decode('utf-8') inst_num_bytes = 2 # No prefix is used else: pattern = inst_prefix inst_num_bytes = 1 pattern += ' ' + '??' * (idc.get_item_size(addr) - inst_num_bytes) + ' ' return pattern
def get_call_args_arm(ea, count_max=10): """ 获得函数调用参数(当前仅支持4个参数) """ args = {} mnem = ida_ua.ua_mnem(ea) if mnem != "BL" and mnem != "SVC" and mnem != "BLNE" and mnem != "BLHI" and mnem != "BLEQ": print( "Error: not a BL or SVC or BLNE or BLHI or BLEQ instruction at 0x%x" % ea) return None arg_inst_arm_mov = [ "MOV R0,", "MOV R1,", "MOV R2,", "MOV R3," ] arg_inst_arm_adr = [ "ADR R0,", "ADR R1,", "ADR R2,", "ADR R3," ] arg_inst_arm_ldr = [ "LDR R0,", "LDR R1,", "LDR R2,", "LDR R3," ] arg_inst_arm_adr2 = [ "ADREQ R0,", "ADREQ R1,", "ADDEQ R2,", "ADREQ R3," ] arg_inst_arm_mov2 = [ "MOVEQ R0,", "MOVEQ R1,", "MOVEQ R2,", "MOVEQ R3," ] arg_inst_arm_adr3 = [ "ADRNE R0,", "ADRNE R1,", "ADDNE R2,", "ADRNE R3," ] ea = ida_bytes.prev_head(ea, 0) count = 0 while count <= count_max: disasm_line = idc.generate_disasm_line(ea, 0) for i in range(len(arg_inst_arm_mov)): #print("'%s'" % arg_inst_arm_mov[i]) # 假设最接近调用的指令是赋值指令,忽略其他情况(如碰到另一个MOV reg) inst_list = [ arg_inst_arm_mov[i], arg_inst_arm_mov2[i], arg_inst_arm_adr[i], arg_inst_arm_adr2[i], arg_inst_arm_adr3[i] ] if any(inst in disasm_line for inst in inst_list): if i not in args.keys(): args[i] = idc.get_operand_value(ea, 1) print("Found argument %d: 0x%x" % (i, args[i])) elif arg_inst_arm_ldr[i] in disasm_line: if i not in args.keys(): addr = idc.get_operand_value(ea, 1) args[i] = ida_bytes.get_wide_dword(addr) print("Found argument %d: 0x%x" % (i, args[i])) ea = ida_bytes.prev_head(ea, 0) count += 1 return args
def trace_next(self, blk, node, reg): """ 下一轮回溯 """ for ref_addr in self.get_all_ref(blk.start_ea): block = self.get_blk(ref_addr) if block: FELogger.info("基本块跳转\t"+hexstr(ref_addr)+"\t"+idc.generate_disasm_line(ref_addr, 0)) node_t = self.create_tree_node(ref_addr, prev=node) self.dfs(node_t, reg, block)
def get_down_bound(self): func_addr = None if self.pl == "ia32": func_addr = idc.get_name_ea_simple("fcntl") if func_addr == ida_idaapi.BADADDR: return None else: for func_addr in idautils.Functions(): function = ida_funcs.get_func(func_addr) start_ea, end_ea = function.start_ea, function.end_ea for addr in idautils.Heads(start_ea, end_ea): inst = idc.generate_disasm_line(addr, 0) if MIPS_CHECK_INST[0] not in inst: continue addrr = idc.next_head(addr) instt = idc.generate_disasm_line(addrr, 1) if MIPS_CHECK_INST[1] not in instt: continue self.DOWN_BOUND = func_addr
def hand_block_inside(line, start, reg_target): out = "" current = line while (current >= start): if (reg_target in [ idc.print_operand(current, 1), idc.print_operand(current, 0) ]): out += '\t0x%x: %s\n' % (current, idc.generate_disasm_line(current, 1)) current = prev_head(current) return out
def __get_opcodes(addr, strict): """Get current bytes of the instruction pointed at addr. Args: addr: address of the current instruction strict: be more restrictive when applying wildcards (True) or not (False) Returns: String: hex-encoded representation of the bytes obtained at addr """ if strict: offsets_types = {idaapi.o_far, idaapi.o_mem, idaapi.o_imm} else: offsets_types = {idaapi.o_far, idaapi.o_mem} pattern = '' mnem = idautils.DecodeInstruction(addr) if mnem is not None: op1_type = mnem.Op1.type op2_type = mnem.Op2.type logging.debug('[VTGREP] Instruction: %s [%d, %d, %d]', idc.generate_disasm_line(addr, 0), mnem.itype, op1_type, op2_type) inst_len = idc.get_item_size(addr) drefs = [x for x in idautils.DataRefsFrom(addr)] # Checks if any operand constains a memory address if (drefs and ((op1_type == idaapi.o_imm) or (op2_type == idaapi.o_imm)) or op1_type in offsets_types or op2_type in offsets_types): pattern = VTGrepSearch.__get_instruction_bytes_wildcarded( pattern, addr, mnem.itype, op1_type, op2_type) # Checks if the instruction is a CALL (near or far) or # if it's a JMP (excluding near jumps) else: if ((mnem.itype == idaapi.NN_call) or (mnem.itype == idaapi.NN_jmp and op1_type != idaapi.o_near)): pattern = VTGrepSearch.__get_instruction_bytes_wildcarded( pattern, addr, mnem.itype, op1_type, op2_type) # In any other case, concatenate the raw bytes to the current string else: pattern = binascii.hexlify(idc.get_bytes(addr, inst_len)) pattern = pattern.decode('utf-8') return pattern else: return 0
def findRspRbpDifference(curr_ea): difference = 0 for i in range(0, 256): mnem = idc.print_insn_mnem(curr_ea) debug(mnem) idaapi.decode_insn(insn, curr_ea) if mnem == 'push': push_offset = 8 difference += push_offset elif mnem == 'sub': if idc.get_operand_value( curr_ea, 0 ) == OperandValueRegister.RSP and idc.get_operand_type( curr_ea, 1) == OperandType.IMMEDIATE_VALUE: rsp_substraction = idc.get_operand_value(curr_ea, 1) difference += rsp_substraction elif mnem == 'mov' or mnem == 'lea': #debug('type: ', idc.get_operand_type(curr_ea, 0), ' val: ', idc.get_operand_value(curr_ea, 0)) debug(idc.generate_disasm_line(curr_ea, 0)) if idc.get_operand_value(curr_ea, 0) == OperandValueRegister.RBP: debug( mnem, ' type: ', idc.get_operand_type(curr_ea, 1), ' val: ', 'bp: 0x{:X}'.format(idc.get_operand_value(curr_ea, 1))) #case 1: mov if mnem == 'mov': if idc.get_operand_type( curr_ea, 1 ) == OperandType.GENERAL_REG and idc.get_operand_value( curr_ea, 1) == OperandValueRegister.RSP: displacement = 0 #case 2: lea if mnem == 'lea': if idc.get_operand_type(curr_ea, 1) == OperandType.MEMORY_REG: if idc.get_operand_value(curr_ea, 1) > 0xF000000000000000: displacement = 0x10000000000000000 - idc.get_operand_value( curr_ea, 1) difference += displacement else: displacement = idc.get_operand_value( curr_ea, 1) difference -= displacement break curr_ea += insn.size return difference
def lookup_str_in_func(func): found = [] func_name = idc.get_func_name(func) addrs = list(idautils.FuncItems(func)) # get list of all the address for line in addrs: dism = idc.generate_disasm_line(line, 0) if input_str in dism: find_item = hex(line)[:-1] + "\t" find_item += dism + "\t" find_item += func_name found.append(find_item) for one in found: print(one)
def cmmt_func(start): ea_start = idc.get_func_attr(start,FUNCATTR_START) ea_end = idc.get_func_attr(start,FUNCATTR_END) while ea_start<= end: mnem = idc.print_insn_mnem(ea_start) ins = idc.generate_disasm_line(ea_start,0) offset = idc.get_operand_value(ea_start,0) tp = idc.get_operand_type(ea_start,0) op2 = idc.print_operand(ea_start,0) if tp == o_mem and op2.startswith('off_') and mnem == 'push' : print('0x%x %s 0x%x '%(ea_start,ins,offset)) get_cmmt(ea_start,offset) ea_start = idc.next_head(ea_start,ea_end)
def wildcard_instruction(addr): """Replaces bytes related to memory addresses with wildcards. Args: addr: the address of the current instruction to be wildcarded Returns: String: hex-encoded representation of the bytes obtained at addr where all the operands that refers to memmory addresses are wildcarded. """ pattern = '' mask = ida_idp.ph_calcrel(addr) # IDA > 7.5 return a list, < 7.5 returns a byte object if idaapi.IDA_SDK_VERSION >= 750: mask_bytes = mask[0] mask_str = binascii.hexlify(mask_bytes).decode('utf-8') else: mask_str = binascii.hexlify(mask).decode('utf-8') logging.debug('[VTGREP] Wildcarding: %s', idc.generate_disasm_line(addr, 0)) current_byte = 0 index_instr = 0 pattern = ' ' while current_byte < len(mask_str): if mask_str[current_byte] != '0' or mask_str[current_byte + 1] != '0': pattern += '?? ' else: instr_bytes = idc.get_bytes(addr + index_instr, 1) pattern += binascii.hexlify(instr_bytes).decode('utf-8') + ' ' current_byte += 2 index_instr += 1 logging.debug('[VTGREP] Wildcarded: %s', pattern) return pattern
def search_for_vtables(linuxtable): seg = ida_segment.get_segm_by_name(".rdata") ea = seg.start_ea end = seg.end_ea # Windows is better off finding the COL and deducing the vtable position from there # This is because vtables can be referenced before they are created in position of the binary found = set() while ea < end and ea != idc.BADADDR: if ida_bytes.get_item_size(ea) != 4 or ida_bytes.is_unknown( ida_bytes.get_full_flags(ea)): ea = ida_bytes.next_head(ea, end) continue dword = ida_bytes.get_wide_dword(ea) name = idc.get_name(dword, ida_name.GN_VISIBLE) if name and name.startswith("??_R4"): demangled = idc.demangle_name(name, idc.get_inf_attr(idc.INF_SHORT_DN)) if not demangled or demangled in found: ea = ida_bytes.next_head(ea, end) continue if ida_bytes.get_item_size( ea + 4) == 4 and ida_bytes.get_wide_dword(ea + 4) != 0: disasm = idc.generate_disasm_line(ea + 4, 0) if disasm and disasm.strip().startswith("dd offset"): actualname = demangled.split("::`RTTI")[0][6:] if actualname in found: ea = ida_bytes.next_head(ea, end) continue found.add(actualname) ea = parse_from_key(linuxtable, actualname, ea + 4) continue ea = ida_bytes.next_head(ea, end)
def find_framescript_register(game_init): game_init_end = idc.find_func_end(game_init) magic_string_found = False for head in idautils.Heads(game_init, game_init_end): disasm = idc.generate_disasm_line(head, 2) if not magic_string_found and '"FrameXML_Debug"' in disasm: magic_string_found = True continue if magic_string_found and disasm.startswith('call '): result = int(idc.get_operand_value(head, 0)) func_name = idc.get_func_name(result) mangled_name = '__Z28FrameScript_RegisterFunctionPKcPFiP9lua_StateE' if func_name != mangled_name: idc.set_name(result, mangled_name, SN_CHECK) print('FrameScript::Register: 0x%08x' % result) return result raise RuntimeError('Unable to find FrameScript::Register (%d)' % \ 2 if magic_string_found else 1)
def get_disasm_line(va): """ """ return idc.generate_disasm_line(va, idc.GENDSM_FORCE_CODE)
def DoDescentParser3(self): ''' @brief Walk the function leveraging a recursive descent parser @detail Starting at the entry point, it walks a function, creates a basic block, and associates those blocks with a Function object. much thanks to the author of "Practical Binary Analysis" for the break down of the algorithm in Chapter 8. @return function object ''' # # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] # jcc_terminators = [ 'jnz', 'jz', 'jo', 'jno', 'js', 'jns', 'je', 'jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe', 'jna', 'ja', 'jnbe', 'jl', 'jnge', 'jge', 'jnl', 'jle', 'jng', 'jg', 'jnle', 'jp', 'jpe', 'jnp', 'jpo', 'jcxz', 'jecxz' ] #print "Starting recursive decent:, starting at: %08x" % (self.deferred_targets[0]) func_end_ea = [] ea_part_of_another_func = [] do_things = False func_start_ea = idc.BADADDR expected_func = Function() while len(self.deferred_targets) > 0: curr_insn_ea = self.deferred_targets.pop() if curr_insn_ea in self.instructions_walked: # # skip instructions that were already walked # continue bblock = BasicBlock(curr_insn_ea) expected_func.AddBlock(bblock) if expected_func.start_ea == idc.BADADDR: # # Set the function's expected start address # expected_func.start_ea = curr_insn_ea if bblock.incorrect_function_ea == idc.BADADDR: # # Check if the BasicBlock is part of another function # if bblock.CheckPartOfAnotherFunction(curr_insn_ea, expected_func.start_ea): expected_func.rogue_basic_blocks.append(bblock) #print "Next BB: %08x" % curr_insn_ea while curr_insn_ea != idc.BADADDR: # # Walks the basic block # #print "Current ip: %08x" % (curr_insn_ea) self.instructions_walked.append(curr_insn_ea) bblock.AddInsnAddress(curr_insn_ea) # # Verify current instruction information # curr_insn = ida_ua.insn_t() decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) if decode_result < 1: # # break if instruction invalid # bblock.end_ea = curr_insn_ea break # # Get instruction disasembly # curr_insn_dism = idc.generate_disasm_line(curr_insn_ea, 1) if curr_insn_dism.startswith(tuple(jcc_terminators)): # # JCC conditionals # jcc_insn = curr_insn jmp_target_ea = self.GetInstuctionTargetAddress(jcc_insn) if jmp_target_ea not in self.deferred_targets: self.deferred_targets.append(jmp_target_ea) # # Add fall through address, BB ends at JCC conditional # self.deferred_targets.append(curr_insn_ea + jcc_insn.size) bblock.end_ea = curr_insn_ea break if maze_deobf_utils.CheckInstructionIsFunctionTerminator( curr_insn_ea): # # Return instruction # bblock.end_ea = curr_insn_ea bblock.is_epilogue = True expected_func.end_ea = idc.next_head(curr_insn_ea) expected_func.AddExitPoint(curr_insn_ea) break if curr_insn_dism.startswith("jmp"): jmp_insn = curr_insn jmp_target_ea = maze_deobf_utils.GetInstuctionTargetAddress( jmp_insn) if jmp_target_ea not in self.deferred_targets: self.deferred_targets.append(jmp_target_ea) bblock.end_ea = curr_insn_ea break curr_insn_ea = curr_insn_ea + curr_insn.size return expected_func
def DoDescentParser2(self): ''' @brief Walk the function leveraging a recursive descent parser @detail Walks a function based on an Approach. This is unused in the byte-search implementation. @return function object ''' # # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] # jcc_terminators = [ 'jnz', 'jz', 'jo', 'jno', 'js', 'jns', 'je', 'jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe', 'jna', 'ja', 'jnbe', 'jl', 'jnge', 'jge', 'jnl', 'jle', 'jng', 'jg', 'jnle', 'jp', 'jpe', 'jnp', 'jpo', 'jcxz', 'jecxz' ] print "Starting recursive decent:, starting at: %08x" % ( self.deferred_targets[0]) func_end_ea = [] do_things = False while len(self.deferred_targets) > 0: curr_insn_ea = self.deferred_targets.pop() bblock = BasicBlock(curr_insn_ea) if curr_insn_ea in self.instructions_walked: # # skip instructions that were already walked # continue print "Next BB: %08x" % curr_insn_ea while curr_insn_ea != idc.BADADDR: print "Current ip: %08x" % (curr_insn_ea) self.instructions_walked.append(curr_insn_ea) bblock.AddInsnAddress(curr_insn_ea) # # Verify current instruction information # curr_insn = ida_ua.insn_t() decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) if decode_result < 1: # # break if instruction invalid # bblock.end_ea = curr_insn_ea break # # Get instruction disasembly # curr_insn_dism = idc.generate_disasm_line(curr_insn_ea, 1) # # Check Instruction matches Obfuscated Call # if curr_insn_dism.startswith("push") and do_things: push_insn = curr_insn if maze_deobf_utils.CheckValidTargettingInstr( push_insn, "push"): # # Check for different CALL instruction types # # # Type One # ret_addr_ea = self.deobf_approach.CheckCallTypeOne( curr_insn_ea) if ret_addr_ea != idc.BADADDR: self.deferred_targets.append(ret_addr_ea) bblock.end_ea = curr_insn_ea break # # Type Two # ret_addr_ea = self.deobf_approach.CheckCallTypeTwo( curr_insn_ea) if ret_addr_ea != idc.BADADDR: self.deferred_targets.append(ret_addr_ea) bblock.end_ea = curr_insn_ea break # # Type Three # ret_addr_ea = self.deobf_approach.CheckCallTypeThree( curr_insn_ea) if ret_addr_ea != idc.BADADDR: self.deferred_targets.append(ret_addr_ea) bblock.end_ea = curr_insn_ea break # # Check instruction matches an obfuscated Windows API Call # if maze_deobf_utils.CheckValidInstrImmediate( push_insn, "push"): ret_addr_ea = self.deobf_approach.CheckObfuscatedWindowsAPICall( curr_insn_ea) if ret_addr_ea != idc.BADADDR: print "Obfuscated Windows API Call: %08x, Target: %08x" % ( curr_insn_ea, ret_addr_ea) self.deferred_targets.append(ret_addr_ea) bblock.end_ea = curr_insn_ea break # # Check Instruction matches Obfuscated Absolute Jumps # if curr_insn_dism.startswith("jz") and do_things: jz_insn = curr_insn if maze_deobf_utils.CheckValidTargettingInstr( jz_insn, "jz"): jz_target_ea = maze_deobf_utils.GetInstuctionTargetAddress( jz_insn) next_insn_ea = curr_insn_ea + jz_insn.size next_insn = ida_ua.insn_t() ida_ua.decode_insn(next_insn, next_insn_ea) if maze_deobf_utils.CheckValidTargettingInstr( next_insn, "jnz"): # # Instruction is an absolute jump # get absolute jump target address. # print "JZ/JNZ block: %08x" % curr_insn_ea abs_jmp_target_ea = self.deobf_approach.GetObfuscJMPTarget( next_insn_ea) if abs_jmp_target_ea != idc.BADADDR: print "JNZ %08x Adding Target: %08x" % ( next_insn_ea, abs_jmp_target_ea) print "JZ %08x Adding Target: %08x" % ( curr_insn_ea, jz_target_ea) self.deferred_targets.append(jz_target_ea) self.deferred_targets.append(abs_jmp_target_ea) bblock.end_ea = curr_insn_ea break if curr_insn_dism.startswith(tuple(jcc_terminators)): # # JCC conditionals # jcc_insn = curr_insn jmp_target_ea = self.GetInstuctionTargetAddress(jcc_insn) if jmp_target_ea not in self.deferred_targets: self.deferred_targets.append(jmp_target_ea) # # Add fall through address, BB ends at JCC conditional # self.deferred_targets.append(curr_insn_ea + jcc_insn.size) bblock.end_ea = curr_insn_ea break if maze_deobf_utils.CheckInstructionIsFunctionTerminator( curr_insn_ea): # # Return instruction # bblock.end_ea = curr_insn_ea bblock.is_epilogue = True func_end_ea.append(idc.next_head(curr_insn_ea)) break if curr_insn_dism.startswith("jmp"): jmp_insn = curr_insn jmp_target_ea = maze_deobf_utils.GetInstuctionTargetAddress( jmp_insn) if jmp_target_ea not in self.deferred_targets: self.deferred_targets.append(jmp_target_ea) bblock.end_ea = curr_insn_ea break curr_insn_ea = curr_insn_ea + curr_insn.size return func_end_ea
def __str__(self): return "%s @ 0x%X" % (generate_disasm_line(self.__ea, 0).split(";")[0], self.__ea)
def DoDescentParser(self): ''' @brief Walk the function leveraging a recursive descent parser @detail Starting with a prologue walk each instruction until the associated epilogue is reached. For functions with multiple epilogues, iterate over each one. As each instruction is traversed, do the following three things: - Undefine the instruction - Mark the instruction as code - Check to see if the instruction is already a member of another function If an instruction is a member of another function, undefine that function and place it in a queue. At the end of traversing each function, a new function is going to be created with the new prologue and the new epilogue. In addition, the undefined function queue is going to be iterated over and each function will be redefined. This should clean up messy function much thanks to the author of "Practical Binary Analysis" for the break down of the algorithm in Chapter 8. @return function object ''' # # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] # jcc_terminators = [ 'jnz', 'jz', 'jo', 'jno', 'js', 'jns', 'je', 'jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe', 'jna', 'ja', 'jnbe', 'jl', 'jnge', 'jge', 'jnl', 'jle', 'jng', 'jg', 'jnle', 'jp', 'jpe', 'jnp', 'jpo', 'jcxz', 'jecxz' ] #print "Starting recursive decent:, starting at: %08x" % (self.deferred_targets[0]) func_end_ea = [] ea_part_of_another_func = [] do_things = False curr_func = None while len(self.deferred_targets) > 0: curr_insn_ea = self.deferred_targets.pop() if not curr_func: curr_func = ida_funcs.get_func(curr_insn_ea) else: target_func = ida_funcs.get_func(curr_insn_ea) if target_func and (target_func.start_ea != curr_func.start_ea): if (target_func.start_ea not in ea_part_of_another_func): ea_part_of_another_func.append(target_func.start_ea) bblock = BasicBlock(curr_insn_ea) if curr_insn_ea in self.instructions_walked: # # skip instructions that were already walked # continue #print "Next BB: %08x" % curr_insn_ea while curr_insn_ea != idc.BADADDR: #print "Current ip: %08x" % (curr_insn_ea) self.instructions_walked.append(curr_insn_ea) bblock.AddInsnAddress(curr_insn_ea) # # Verify current instruction information # curr_insn = ida_ua.insn_t() decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) if decode_result < 1: # # break if instruction invalid # bblock.end_ea = curr_insn_ea break # # Get instruction disasembly # curr_insn_dism = idc.generate_disasm_line(curr_insn_ea, 1) if curr_insn_dism.startswith(tuple(jcc_terminators)): # # JCC conditionals # jcc_insn = curr_insn jmp_target_ea = self.GetInstuctionTargetAddress(jcc_insn) if jmp_target_ea not in self.deferred_targets: self.deferred_targets.append(jmp_target_ea) # # Add fall through address, BB ends at JCC conditional # self.deferred_targets.append(curr_insn_ea + jcc_insn.size) bblock.end_ea = curr_insn_ea break if maze_deobf_utils.CheckInstructionIsFunctionTerminator( curr_insn_ea): # # Return instruction # bblock.end_ea = curr_insn_ea bblock.is_epilogue = True func_end_ea.append(idc.next_head(curr_insn_ea)) break if curr_insn_dism.startswith("jmp"): jmp_insn = curr_insn jmp_target_ea = maze_deobf_utils.GetInstuctionTargetAddress( jmp_insn) if jmp_target_ea not in self.deferred_targets: self.deferred_targets.append(jmp_target_ea) bblock.end_ea = curr_insn_ea break curr_insn_ea = curr_insn_ea + curr_insn.size return [func_end_ea, ea_part_of_another_func]
def check_fmt_function(name, addr): """ Check if the format string argument is not valid """ function_head = idc.get_func_attr(addr, idc.FUNCATTR_START) while True: addr = idc.prev_head(addr) op = idc.print_insn_mnem(addr).lower() dst = idc.print_operand(addr, 0) if op in ("ret", "retn", "jmp", "b") or addr < function_head: return c = idc.get_cmt(addr, 0) if c and c.lower() == "format": break elif name.endswith(("snprintf_chk", )): if op in ("mov", "lea") and dst.endswith( ("r8", "r8d", "[esp+10h]")): break elif name.endswith(("sprintf_chk", )): if op in ("mov", "lea") and (dst.endswith( ("rcx", "[esp+0Ch]", "R3")) or dst.endswith("ecx") and BITS == 64): break elif name.endswith(("snprintf", "fnprintf")): if op in ("mov", "lea") and (dst.endswith( ("rdx", "[esp+8]", "R2")) or dst.endswith("edx") and BITS == 64): break elif name.endswith( ("sprintf", "fprintf", "dprintf", "printf_chk")): if op in ("mov", "lea") and (dst.endswith( ("rsi", "[esp+4]", "R1")) or dst.endswith("esi") and BITS == 64): break elif name.endswith("printf"): if op in ("mov", "lea") and (dst.endswith( ("rdi", "[esp]", "R0")) or dst.endswith("edi") and BITS == 64): break # format arg found, check its type and value # get last oprend op_index = idc.generate_disasm_line(addr, 0).count(",") op_type = idc.get_operand_type(addr, op_index) opnd = idc.print_operand(addr, op_index) if op_type == idc.o_reg: # format is in register, try to track back and get the source _addr = addr while True: _addr = idc.prev_head(_addr) _op = idc.print_insn_mnem(_addr).lower() if _op in ("ret", "retn", "jmp", "b") or _addr < function_head: break elif _op in ("mov", "lea", "ldr") and idc.print_operand( _addr, 0) == opnd: op_type = idc.get_operand_type(_addr, 1) opnd = idc.print_operand(_addr, 1) addr = _addr break if op_type == idc.o_imm or op_type == idc.o_mem: # format is a memory address, check if it's in writable segment op_addr = idc.get_operand_value(addr, op_index) seg = idaapi.getseg(op_addr) if seg: if not seg.perm & idaapi.SEGPERM_WRITE: # format is in read-only segment return print("0x%X: Possible Vulnerability: %s, format = %s" % (addr, name, opnd)) return ["0x%X" % addr, name, opnd]
def goto_prev_token(self): token = idaapi.get_highlighted_identifier() if token == "": return self._iterate_instrs(idc.PrevHead, lambda va: token in idc.generate_disasm_line(va, 0))
def getDisasmLine(self, addr): return idc.generate_disasm_line(addr, 0)
tl.itp = idaapi.ITP_SEMI cfunc.set_user_cmt(tl, decrypted_string) cfunc.save_user_cmts() #string decrypting func NAME = "String_Decrypt1" - 0040C8F5 string_decrypt_addr = idc.get_name_ea_simple("String_Decrypt1") print("Func String_Decrypt1 address: 0x%x" % (string_decrypt_addr)) Xref_decrypt_funcs = [] for addr in idautils.CodeRefsTo(string_decrypt_addr, 0): Xref_decrypt_funcs.append(addr) print("XREF String_Decrypt1 func COUNT: %d" % (len(Xref_decrypt_funcs))) for addr in Xref_decrypt_funcs: prev_instr = idc.prev_head(addr) print("XREF Func String_Decrypt1 prev instruction: 0x%x %s" % (prev_instr,idc.generate_disasm_line(prev_instr, 0))) m = idc.print_insn_mnem(prev_instr) #searching instruction in prev-inst addr and finding index argument if m == 'mov': op = idc.get_operand_type(prev_instr, 1) if op == o_imm: index = idc.get_operand_value(prev_instr, 1) print("Index value: 0x%x" % (index)) decryptor(index,addr) if m == 'pop': prev_instr2 = idc.prev_head(prev_instr) prev_instr3 = idc.prev_head(prev_instr2) op = idc.get_operand_type(prev_instr3, 0) if op == o_imm: index = idc.get_operand_value(prev_instr3, 0)
cfunc.save_user_cmts() #string decrypting func NAME = "String_Decrypt2" - 0040C929 string_decrypt_addr = idc.get_name_ea_simple("String_Decrypt2") print("Func String_Decrypt2 address: 0x%x" % (string_decrypt_addr)) Xref_decrypt_funcs = [] for addr in idautils.CodeRefsTo(string_decrypt_addr, 0): Xref_decrypt_funcs.append(addr) print("XREF String_Decrypt2 func COUNT: %d" % (len(Xref_decrypt_funcs))) for addr in Xref_decrypt_funcs: prev_instr = idc.prev_head(addr) print("XREF Func String_Decrypt2 prev instruction: 0x%x %s" % (prev_instr, idc.generate_disasm_line(prev_instr, 0))) m = idc.print_insn_mnem(prev_instr) #searching instruction in prev-inst addr and finding index argument if m == 'mov': op = idc.get_operand_type(prev_instr, 1) if op == o_imm: index = idc.get_operand_value(prev_instr, 1) print("Index value: 0x%x" % (index)) decryptor(index, addr) if m == 'pop': prev_instr2 = idc.prev_head(prev_instr) prev_instr3 = idc.prev_head(prev_instr2) op = idc.get_operand_type(prev_instr3, 0) if op == o_imm: index = idc.get_operand_value(prev_instr3, 0)
def disassemble(ea): """Get the disassembly text associated witn an `ea`.""" return idc.generate_disasm_line(ea, idc.GENDSM_FORCE_CODE)
def get_next_reg(self, addr, reg): """ 寻找下一个赋值来源寄存器 返回寄存器名或None """ reg_t = reg addr_t = addr mnem = get_mnem(addr_t) line = idc.generate_disasm_line(addr_t, 0) if mnem.startswith('BLX') and addr_t != self.trace_addr: FELogger.info("途径函数\t"+hexstr(addr)+"\t"+line) func_name = idc.print_operand(addr_t, 0) func_addr = name_to_addr(func_name) if func_addr is not None: if reg_t == 'R0': does_return = ida_funcs.get_func(func_addr).does_return() if does_return == True: FELogger.info("找到赋值点\t"+hexstr(addr)+"\t"+line) return None else: if func_name in SOURCE_FUNC and reg_t == SOURCE_FUNC[func_name]['dest']: reg_t = SOURCE_FUNC[func_name]['src'] if reg_t == 'None': FELogger.info("找到赋值点\t"+hexstr(addr)+"\t"+line) return None else: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) inst_list_t = INST_LIST reg_re = re.compile(reg_t + '\\D|' + reg_t + '\\Z') if reg_re.search(line): if mnem in reduce(lambda x, y: x + y, [value for value in inst_list_t.values()]): op1 = idc.print_operand(addr_t, 0).split("!")[0] if mnem in inst_list_t['load_multi']: # 找到 LDM R1, {R0-R3} regs = self.parse_operands(mnem, addr_t) if reg_t not in regs: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: FELogger.info("找到赋值点\t"+hexstr(addr)+"\t"+line) return None else: if op1 != reg_t or mnem in inst_list_t['other']: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) elif mnem in inst_list_t['arithmetic']: # 停止 ADD R0, SP; ADD R0, SP, #10 # 回溯R0 ADD R0, R1; ADD R0, #10 # 回溯R1 ADD R0, R1, #10; ADD R0, R1, R2 op2_tmp = idc.print_operand(addr_t, 1) if idc.get_operand_type(addr_t, 2) == ida_ua.o_void: if idc.get_operand_type(addr_t, 1) == ida_ua.o_reg: if op2_tmp == 'SP': FELogger.info("取消回溯SP\t"+hexstr(addr)+"\t"+line) return None else: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) elif idc.get_operand_type(addr_t, 3) == ida_ua.o_void: op3_tmp = idc.print_operand(addr_t, 2) if op2_tmp == 'SP' or op3_tmp == 'SP': FELogger.info("取消回溯SP\t"+hexstr(addr)+"\t"+line) return None elif reg_t == op2_tmp or reg_t == op3_tmp: FELogger.info("复杂运算\t"+hexstr(addr)+"\t"+line) return None else: reg_t = op2_tmp FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: op3_tmp = idc.print_operand(addr_t, 2) op4_tmp = idc.print_operand(addr_t, 3) if op2_tmp == 'SP' or op3_tmp == 'SP' or op4_tmp == 'SP': FELogger.info("取消回溯SP\t"+hexstr(addr)+"\t"+line) return None elif reg_t == op2_tmp or reg_t == op3_tmp or reg_t == op4_tmp: FELogger.info("复杂运算\t"+hexstr(addr)+"\t"+line) return None else: reg_t = op2_tmp FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) elif mnem in inst_list_t['move']: # 停止 MOV R0, SP; MOV R0, SP, #10 # 找到 MOV R0, #10 # 回溯R1 MOV R0, R1 # 回溯D8 VMOV R0, R1, D16 if mnem.startswith('VMOV'): op3_tmp = idc.print_operand(addr_t, 2) reg_t = op3_tmp FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: op2_tmp = idc.print_operand(addr_t, 1) if op2_tmp == 'SP': FELogger.info("取消回溯SP\t"+hexstr(addr)+"\t"+line) return None elif idc.get_operand_type(addr_t, 1) == ida_ua.o_reg: reg_t = op2_tmp FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) elif mnem in ['MOVT', 'MOVTGT', 'MOVTLE']: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: FELogger.info("找到赋值点\t"+hexstr(addr)+"\t"+line) return None elif mnem in inst_list_t['load']: # 找到 LDR R0, =xxxxxxx # 停止 LDR R0, [SP, #10] # 回溯R1 LDR R0, [R1, #10] # 回溯R0 LDR R0, [R0, R1, #10] if idc.get_operand_type(addr_t, 1) == ida_ua.o_mem: FELogger.info("找到赋值点\t"+hexstr(addr)+"\t"+line) return None else: regs_tmp = self.parse_operands(mnem, addr_t) if 'SP' in regs_tmp: FELogger.info("取消回溯SP\t"+hexstr(addr)+"\t"+line) return None elif reg_t in regs_tmp: FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: reg_t = regs_tmp[0] FELogger.info("回溯"+reg_t+"\t"+hexstr(addr)+"\t"+line) else: FELogger.info("未知指令\t"+hexstr(addr)+"\t"+line) else: FELogger.info("未知指令\t"+hexstr(addr)+"\t"+line) else: pass return reg_t