def main(): for segstart, segend, segname in enum_segments(): for head in idautils.Heads(segstart, segend): if not is_code(head): continue # pattern: # # lea rax, unk_6BDF88 # mov [rsp+0], rax # mov qword ptr [rsp+8], 40h if ida_ua.ua_mnem(head) != "lea": continue next_head = ida_bytes.next_head(head, idc.BADADDR) if ida_ua.ua_mnem(next_head) != "mov": continue next_head2 = ida_bytes.next_head(next_head, idc.BADADDR) if ida_ua.ua_mnem(next_head2) != "mov": continue dst = idc.get_operand_value(head, 1) if idc.get_segm_name(dst) not in (".rdata", "UPX1"): continue size = idc.get_operand_value(next_head2, 1) if size > 0x100: continue if size <= 2: continue buf = ida_bytes.get_bytes(dst, size) if not buf: continue if b"\x00" in buf: continue try: s = buf.decode("ascii") except UnicodeDecodeError: continue print("string pointer: 0x%x -> 0x%x: %s" % (head, dst, s)) ida_bytes.del_items(dst, 1) ida_bytes.create_data(dst, idc.FF_BYTE, 1, idc.BADADDR) ida_bytes.set_cmt(dst, s, True) ida_name.set_name(dst, "s_%x" % (dst))
def find_mnem(target_ea, target_mnem, backward=False, threshold=0x100): assert target_ea is not None ea = target_ea addr = None visited = set() while True: mnem = ida_ua.ua_mnem(ea) if not ida_ua.can_decode(ea) or not mnem: break if mnem == target_mnem: addr = ea break visited.add(ea) if backward: next_ea = ida_xref.get_first_cref_to(ea) if next_ea < target_ea - 0x100: break while next_ea != idc.BADADDR and next_ea in visited: next_ea = ida_xref.get_next_cref_to(ea, next_ea) else: next_ea = ida_xref.get_first_cref_from(ea) if next_ea > target_ea + 0x100: break while next_ea != idc.BADADDR and next_ea in visited: next_ea = ida_xref.get_next_cref_from(ea, next_ea) ea = next_ea return addr
def _btn_trace_color_clicked(self): col = 0xccffcc col2 = 0xbbeebb if False: for ea, basic_block in self.infoparser.basic_blocks.iteritems(): while ea != idaapi.BADADDR: idc.set_color(ea, idc.CIC_ITEM, col) ea = idc.next_head(ea, basic_block['end']) for target_pc, flow in self.infoparser.flows.iteritems(): refs = [] for xref in idautils.XrefsTo(target_pc): refs.append(xref.frm) for jump_from_pc, flowtype in flow.iteritems(): if jump_from_pc in refs: continue if ida_ua.ua_mnem(jump_from_pc) == 'call': flowtype = idaapi.fl_CN else: flowtype = idaapi.fl_JN idc.set_color(jump_from_pc, idc.CIC_ITEM, col2) idc.AddCodeXref(jump_from_pc, target_pc, flowtype)
def get_call_args_arm(ea, count_max=10): """ 获得函数调用参数(当前仅支持4个参数) """ args = {} mnem = ida_ua.ua_mnem(ea) if mnem != "BL" and mnem != "SVC" and mnem != "BLNE" and mnem != "BLHI" and mnem != "BLEQ": print( "Error: not a BL or SVC or BLNE or BLHI or BLEQ instruction at 0x%x" % ea) return None arg_inst_arm_mov = [ "MOV R0,", "MOV R1,", "MOV R2,", "MOV R3," ] arg_inst_arm_adr = [ "ADR R0,", "ADR R1,", "ADR R2,", "ADR R3," ] arg_inst_arm_ldr = [ "LDR R0,", "LDR R1,", "LDR R2,", "LDR R3," ] arg_inst_arm_adr2 = [ "ADREQ R0,", "ADREQ R1,", "ADDEQ R2,", "ADREQ R3," ] arg_inst_arm_mov2 = [ "MOVEQ R0,", "MOVEQ R1,", "MOVEQ R2,", "MOVEQ R3," ] arg_inst_arm_adr3 = [ "ADRNE R0,", "ADRNE R1,", "ADDNE R2,", "ADRNE R3," ] ea = ida_bytes.prev_head(ea, 0) count = 0 while count <= count_max: disasm_line = idc.generate_disasm_line(ea, 0) for i in range(len(arg_inst_arm_mov)): #print("'%s'" % arg_inst_arm_mov[i]) # 假设最接近调用的指令是赋值指令,忽略其他情况(如碰到另一个MOV reg) inst_list = [ arg_inst_arm_mov[i], arg_inst_arm_mov2[i], arg_inst_arm_adr[i], arg_inst_arm_adr2[i], arg_inst_arm_adr3[i] ] if any(inst in disasm_line for inst in inst_list): if i not in args.keys(): args[i] = idc.get_operand_value(ea, 1) print("Found argument %d: 0x%x" % (i, args[i])) elif arg_inst_arm_ldr[i] in disasm_line: if i not in args.keys(): addr = idc.get_operand_value(ea, 1) args[i] = ida_bytes.get_wide_dword(addr) print("Found argument %d: 0x%x" % (i, args[i])) ea = ida_bytes.prev_head(ea, 0) count += 1 return args
def run(self, start_ea, end_ea=idc.BADADDR, end_cnt=100): self.init() # not in code segment ea = start_ea func_start = idc.get_func_attr(ea, idc.FUNCATTR_START) if func_start == idc.BADADDR: return self.func_start = func_start cnt = 0 while True: if ea in self.visited: break self.visited.add(ea) # break if ea is out of the original function # TODO: Add inter-procedural if idc.get_func_attr(ea, idc.FUNCATTR_START) != func_start: break if ea == end_ea: break if end_ea == idc.BADADDR and cnt >= end_cnt: break # there may exist data section mnem = ida_ua.ua_mnem(ea) if not ida_ua.can_decode(ea) or not mnem: break if mnem.startswith("B"): ea = idc.get_operand_value(ea, 0) elif mnem.startswith("POP"): break else: if not self.run_helper(ea): # print("%x: something wrong: %s" % (ea, idc.GetDisasm(ea))) break ea = ida_xref.get_first_cref_from(ea) cnt += 1
def decompiled_code(address: int, _visited=None) -> Optional[ida_hexrays.cfuncptr_t]: """ Generates IDA object representing the decompiled code for the given address. :param address: Start address of the function. :returns: cfuncptr_t object or None on failure. """ if _visited is None: _visited = set() attempted_before = address in _visited _visited.add(address) # This requires Hexrays decompiler, load it and make sure it's available before continuing. if not ida_hexrays.init_hexrays_plugin(): idc.load_and_run_plugin("hexrays", 0) or idc.load_and_run_plugin("hexx64", 0) if not ida_hexrays.init_hexrays_plugin(): logger.debug("Unable to load Hexrays decompiler.") return None fail_obj = ida_hexrays.hexrays_failure_t() code = ida_hexrays.decompile(address, fail_obj) if code and not fail_obj.code: return code if not fail_obj: logger.warning(f"Unable to decompile function at {hex(address)}") return None # Cannot possibly recover from call analysis failure. if fail_obj.code == -12: logger.warning(f"Unable to decompile function at {hex(address)}: call analysis failed") return None # We may be able to still recover from this by first trying to decompile # the called function that caused the failure. # If we've attempted this before, ensure we don't try a third time # and cause an infinite loop. if not attempted_before: failed_address = fail_obj.errea if ida_ua.ua_mnem(failed_address) == "call": call_address = idc.get_operand_value(failed_address, 0) if decompiled_code(_visited=_visited) is not None: return decompiled_code(address, visited=_visited) # TODO: Observed this message pops up with fail_obj.code == 0... unsure if that is actually an error. logger.debug(f"Unable to decompile function at {hex(address)}: {fail_obj.code}") return None
def find_reg_value_helper(self, ea, reg_name, end_ea, end_cnt, offset=None): if (ea, reg_name) in self.values: return self.values[(ea, reg_name)] if end_ea != idc.BADADDR and ea < end_ea: return if end_cnt == 0: return # not in code segment func_addr = idc.get_func_attr(ea, idc.FUNCATTR_START) if func_addr == idc.BADADDR: return # out of current function if not self.inter and func_addr != self.func_start: return # there may exist data section mnem = ida_ua.ua_mnem(ea) if not ida_ua.can_decode(ea) or not mnem: return # we need to check at most 4 operands insn = ida_ua.insn_t() inslen = ida_ua.decode_insn(insn, ea) op1 = insn.ops[0] op2 = insn.ops[1] op3 = insn.ops[2] op4 = insn.ops[3] if any(mnem.startswith(word) for word in ["MOV", "LDR"]): assert op2 is not None # first argument should be reg_name if get_reg(op1) != reg_name: return self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset) # follow new register if op2.type == ida_ua.o_reg: if get_reg(op2) == "SP": offset = 0 return self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1, offset) # in the stack. need to check when the value is stored # o_displ = [Base Reg + Index Reg + Displacement] elif op2.type == ida_ua.o_displ: values = self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1, op2.addr) values = set(filter(lambda x: x, values)) if mnem.startswith("LDR"): return set(map(lambda x: ida_bytes.get_dword(x), values)) else: return values # reference the memory and get the value written in the memory elif op2.type == ida_ua.o_mem: # TODO: implement memory access # we assume that this memory is not initialized. if mnem.startswith("LDR"): return set([ida_bytes.get_dword(op2.addr)]) else: return set([op2.addr]) # immediate value, we get the value right away elif op2.type == ida_ua.o_imm: return set([op2.value]) elif op2.type == ida_ua.o_phrase: assert mnem.startswith("LDR") phrase_val = self.proceed_backward(ea, get_reg(op3), end_ea, end_cnt - 1, offset) if not phrase_val: return op2_val = self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1, offset) if not op2_val: return operator = lambda x1, x2: x1 + x2 values = merge_op_vals(op2_val, phrase_val, operator) return set( map(lambda x: ida_bytes.get_dword(x + op2.phrase), values)) return # only checks stored stacks elif any(mnem.startswith(word) for word in ["STR"]): assert op2 is not None if op3 and op3.type != ida_ua.o_void: target_op = op3 else: target_op = op2 # arguments should include reg_name if get_reg(target_op) != reg_name: return self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset) # in the stack. need to check when the value is stored # o_displ = [Base Reg + Index Reg + Displacement] if target_op.type == ida_ua.o_displ: target_memory = self.stack # we assume that memory is not initialized. # reference the memory and get the value written in the memory elif target_op.type == ida_ua.o_mem: assert get_reg(target_op) != "SP" target_memory = self.memory else: return if target_op == op2: if target_op.addr == offset: self.stack[target_op.addr] = self.proceed_backward( ea, get_reg(op1), end_ea, end_cnt - 1) return self.stack[target_op.addr] else: if target_op.addr == offset: self.stack[target_op.addr] = self.proceed_backward( ea, get_reg(op1), end_ea, end_cnt - 1) return self.stack[target_op.addr] elif target_op.addr + 4 == offset: self.stack[target_op.addr + 4] = self.proceed_backward( ea, get_reg(op2), end_ea, end_cnt - 1) return self.stack[target_op.addr + 4] return self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset) elif any(mnem.startswith(word) for word in ["ADD", "SUB", "MUL"]): assert op2 is not None if mnem.startswith("ADD"): operator = lambda x1, x2: x1 + x2 elif mnem.startswith("SUB"): operator = lambda x1, x2: x1 - x2 elif mnem.startswith("MUL"): operator = lambda x1, x2: x1 * x2 # TODO: Handle stack variable # Check how to follow below. # STR R5, [SP #8] # STR R4, [SP #4] # ADD R3, SP, #4 # ADD R2, R3, #4 if get_reg(op1) != reg_name: return self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset) # Two arguments if not op3 or op3.type == ida_ua.o_void: if op2.type == ida_ua.o_reg: op1_val = self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset) if not op1_val: return op2_val = self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1) if not op2_val: return return merge_op_vals(op1_val, op2_val, operator) elif op2.type == ida_ua.o_imm: op1_val = self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset) return set(map(lambda x: operator(x, op2.value), op1_val)) else: return if op2.type != ida_ua.o_reg: # This should not be reached. print(hex(ea), idc.GetDisasm(ea), reg_name, op2.type) assert False # More than three arguments # follow new register # ADD R0, R1, R2 if op3.type == ida_ua.o_reg: op2_val = self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1, offset) # if we cannot fetch the value, stop the analysis if not op2_val: return op3_val = self.proceed_backward(ea, get_reg(op3), end_ea, end_cnt - 1, offset) # if we cannot fetch the value, stop the analysis if not op3_val: return # MLA R0, R1, R2, R3 if op4 and op4.type == ida_ua.o_reg: op4_val = self.proceed_backward(ea, get_reg(op4), end_ea, end_cnt - 1, offset) if not op4_val: return return merge_op_vals( merge_op_vals(op2_val, op3_val, operator), op4_val, operator) return merge_op_vals(op2_val, op3_val, operator) # immediate value, we get the value right away # ADD R0, R1, #123 elif op3.type == ida_ua.o_imm: return self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1, operator(0, op3.value)) # ADD R0, R1, R2,LSL#2 # o_idaspec0~5 elif op3.type == ida_ua.o_idpspec0: # processor specific type 'LSL' op3_val = self.proceed_backward(ea, get_reg(op3), end_ea, end_cnt - 1, offset) # if we cannot fetch the value, stop the analysis if not op3_val: return op3_val = set(map(lambda x: x << op3.value, op3_val)) op2_val = self.proceed_backward(ea, get_reg(op2), end_ea, end_cnt - 1, offset) return merge_op_vals(op2_val, op3_val, operator) else: return else: return self.proceed_backward(ea, reg_name, end_ea, end_cnt - 1, offset)
def run_helper(self, ea): # there may exist data section mnem = ida_ua.ua_mnem(ea) if not ida_ua.can_decode(ea) or not mnem: return # we need to check at most 4 operands insn = ida_ua.insn_t() inslen = ida_ua.decode_insn(insn, ea) op1 = insn.ops[0] op2 = insn.ops[1] op3 = insn.ops[2] op4 = insn.ops[3] if any(mnem.startswith(word) for word in ["PUSH", "POP"]): # TODO: implement this properly return True elif any( mnem.startswith(word) for word in ["MOV", "LDR", "ADR", "STR", "ADD", "SUB", "MUL"]): assert op2 is not None if mnem.startswith("ADD"): operator = lambda x1, x2: x1 + x2 elif mnem.startswith("SUB"): operator = lambda x1, x2: x1 - x2 elif mnem.startswith("MUL"): operator = lambda x1, x2: x1 * x2 else: operator = None value = self.fetch_value(op1, op2, op3, op4, operator) if value is None: return value = value & 0xFFFFFFFF if mnem.startswith("MOV"): self.regs[get_reg(op1)] = value elif mnem.startswith("LDR") or mnem.startswith("ADR"): if mnem.startswith("LDR"): assert op2.type in [ ida_ua.o_displ, ida_ua.o_mem, ida_ua.o_phrase ] if value in self.memory: value = self.memory[value] else: seg = ida_segment.getseg(value) if seg == idc.BADADDR: return value = ida_bytes.get_dword(value) elif mnem.startswith("ADR"): assert op2.type == ida_ua.o_imm self.regs[get_reg(op1)] = value elif mnem.startswith("STR"): assert op2.type in [ ida_ua.o_displ, ida_ua.o_mem, ida_ua.o_phrase ] if get_reg(op1) not in self.regs: return self.memory[value] = self.regs[get_reg(op1)] elif any(mnem.startswith(word) for word in ["ADD", "SUB", "MUL"]): if op2.type == ida_ua.o_imm: if get_reg(op1) not in self.regs: return value = operator(self.regs[get_reg(op1)], value) self.regs[get_reg(op1)] = value return True else: # Skip unknown instructions return True # This should not be reached. print(hex(ea), idc.GetDisasm(ea)) assert False