def _emulate_arm64(start, end, on_BL=None, on_RET=None): """A very basic partial Arm64 emulator that does just enough to find OSMetaClass information.""" # Super basic emulation. reg = _Regs() def load(addr, dtyp): if not addr: return None if dtyp == idaapi.dt_qword: size = 8 elif dtyp == idaapi.dt_dword: size = 4 else: return None return idau.read_word(addr, size) def cleartemps(): for t in ['X{}'.format(i) for i in range(0, 19)]: reg.clear(t) for insn in idau.Instructions(start, end): _log(11, 'Processing instruction {:#x}', insn.ea) mnem = insn.get_canon_mnem() if mnem == 'ADRP' or mnem == 'ADR': reg[insn.Op1.reg] = insn.Op2.value elif mnem == 'ADD' and insn.Op2.type == idc.o_reg and insn.Op3.type == idc.o_imm: reg[insn.Op1.reg] = reg[insn.Op2.reg] + insn.Op3.value elif mnem == 'NOP': pass elif mnem == 'MOV' and insn.Op2.type == idc.o_imm: reg[insn.Op1.reg] = insn.Op2.value elif mnem == 'MOV' and insn.Op2.type == idc.o_reg: reg[insn.Op1.reg] = reg[insn.Op2.reg] elif mnem == 'RET': if on_RET: on_RET(reg) break elif (mnem == 'STP' or mnem == 'LDP') and insn.Op3.type == idc.o_displ: if insn.auxpref & _MEMOP_WBINDEX: reg[insn.Op3.reg] = reg[insn.Op3.reg] + insn.Op3.addr if mnem == 'LDP': reg.clear(insn.Op1.reg) reg.clear(insn.Op2.reg) elif (mnem == 'STR' or mnem == 'LDR') and not insn.auxpref & _MEMOP_WBINDEX: if mnem == 'LDR': if insn.Op2.type == idc.o_displ: reg[insn.Op1.reg] = load(reg[insn.Op2.reg] + insn.Op2.addr, insn.Op1.dtyp) else: reg.clear(insn.Op1.reg) elif mnem == 'BL' and insn.Op1.type == idc.o_near: if on_BL: on_BL(insn.Op1.addr, reg) cleartemps() else: _log(10, 'Unrecognized instruction at address {:#x}', insn.ea) reg.clearall()
def _process_stub_template_1(stub): """A template to match the following stub pattern: ADRP X<reg>, #<offset>@PAGE LDR X<reg>, [X<reg>, #<offset>@PAGEOFF] BR X<reg> """ adrp, ldr, br = idau.Instructions(stub, count=3) if (adrp.itype == idaapi.ARM_adrp and adrp.Op1.type == idaapi.o_reg and adrp.Op2.type == idaapi.o_imm and ldr.itype == idaapi.ARM_ldr and ldr.Op1.type == idaapi.o_reg and ldr.Op2.type == idaapi.o_displ and ldr.auxpref == 0 and br.itype == idaapi.ARM_br and br.Op1.type == idaapi.o_reg and adrp.Op1.reg == ldr.Op1.reg == ldr.Op2.reg == br.Op1.reg): offset = adrp.Op2.value + ldr.Op2.addr target = idau.read_word(offset) if target and idau.is_mapped(target): return target
def _pointer_accesses_process_block(start, end, fix, entry_regs, accesses): """Process a basic block for _pointer_accesses_data_flow. Arm64 only.""" # NOTE: Some object accesses (to large offsets) are encoded in the following style: # MOV W8, #0x9210 # STR X0, [X19,X8] # We try to catch these by keeping track of local constants within a block. RegValue = collections.namedtuple('RegValue', ['type', 'value']) DELTA = 0 # Pointer delta from start of target memory region. CONST = 1 # Constant value def get_reg(reg, type): rv = regs.get(reg, None) if rv is None or rv.type != type: return None return rv.value # Initialize our registers and create accessor functions. regs = {reg: RegValue(DELTA, delta) for reg, delta in entry_regs.items()} # For each instruction in the basic block, see if any new register gets assigned. for insn in idau.Instructions(start, end): # First, if this instruction has a fixed state (i.e., a set mapping of registers to # deltas), set that state. This overwrites any previous values, so care must be taken by # the caller to ensure that this initialization is correct. fixed_regs_and_deltas = fix.get(insn.ea) if fixed_regs_and_deltas: for reg, delta in fixed_regs_and_deltas.items(): _log(6, '\t\t{:x} fix {}={}', insn.ea, reg, delta) regs[reg] = RegValue(DELTA, delta) # If this is an access instruction, record the access. See comment about auxpref below. if not (insn.auxpref & _ARM64_WRITEBACK): for op in insn.Operands: # We only consider o_displ and o_phrase. if op.type == idaapi.o_void: break elif op.type not in (idaapi.o_displ, idaapi.o_phrase): continue # Get the delta for the base register. delta = get_reg(op.reg, DELTA) if delta is None: continue # Get the instruction access size. size = _INSN_OP_DTYP_SZ.get(op.dtyp) if size is None: continue # Get the offset from the base register (which is additional to the base register's # delta). op_offset = None if op.type == idaapi.o_displ: op_offset = op.addr else: # op.type == idaapi.o_phrase op_offset_reg = op.specflag1 & 0xff op_offset = get_reg(op_offset_reg, CONST) if op_offset is None: continue # Record this access. offset = (delta + op_offset) & 0xffffffffffffffff _log(5, '\t\t{:x} access({}) {}, {}', insn.ea, op.reg, offset, size) accesses[(offset, size)].add((insn.ea, delta)) # Update the set of registers pointing to the struct, and the set of known constant # registers. if (insn.itype == idaapi.ARM_mov and insn.Op1.type == idaapi.o_reg and insn.Op2.type == idaapi.o_reg and insn.Op3.type == idaapi.o_void and insn.Op1.dtyp == idaapi.dt_qword and insn.Op2.dtyp == idaapi.dt_qword and insn.Op2.reg in regs): # MOV Xdst, Xsrc _log(6, '\t\t{:x} add {}={}', insn.ea, insn.Op1.reg, regs[insn.Op2.reg].value) regs[insn.Op1.reg] = regs[insn.Op2.reg] elif (insn.itype == idaapi.ARM_mov and insn.Op1.type == idaapi.o_reg and insn.Op2.type == idaapi.o_imm and insn.Op3.type == idaapi.o_void and insn.Op1.dtyp in (idaapi.dt_dword, idaapi.dt_qword)): # MOV Xdst, #imm _log(7, '\t\t{:x} const {}={}', insn.ea, insn.Op1.reg, insn.Op2.value) regs[insn.Op1.reg] = RegValue(CONST, insn.Op2.value) elif (insn.itype == idaapi.ARM_add and insn.Op1.type == idaapi.o_reg and insn.Op2.type == idaapi.o_reg and insn.Op3.type == idaapi.o_imm and insn.Op4.type == idaapi.o_void and insn.Op1.dtyp == idaapi.dt_qword and insn.Op2.dtyp == idaapi.dt_qword and insn.Op2.reg in regs): # ADD Xdst, Xsrc, #amt op2 = regs[insn.Op2.reg] _log(6, '\t\t{:x} add {}={}+{}', insn.ea, insn.Op1.reg, op2.value, insn.Op3.value) regs[insn.Op1.reg] = RegValue(op2.type, op2.value + insn.Op3.value) elif (insn.itype == idaapi.ARM_bl or insn.itype == idaapi.ARM_blr): # A function call (direct or indirect). Any correct compiler should generate code that # does not use the temporary registers after a call, but just to be safe, clear all the # temporary registers. _log(6, '\t\t{:x} clear temps', insn.ea) for r in xrange(0, 19): regs.pop(getattr(idautils.procregs, 'X{}'.format(r)).reg, None) else: # This is an unrecognized instruction. Clear all the registers it modifies. feature = insn.get_canon_feature() # On Arm64, LDR-type instructions store their writeback behavior in the instructions's # auxpref flags. As best I can tell, insn.get_canon_feature()'s CF_CHG* flags indicate # whether the operand will change, which is different than the register changing for # operands like o_displ that use a register to refer to a memory location. Thus, we # actually need to special case auxpref and clear those registers. Fortunately, # writeback behavior is only observed in o_displ operands, of which there should only # ever be one, so it doesn't matter that auxpref is stored on the instruction and not # the operand. for op in insn.Operands: if op.type == idaapi.o_void: break if ((feature & _INSN_OP_CHG[op.n] and op.type == idaapi.o_reg) or (insn.auxpref & _ARM64_WRITEBACK and op.type == idaapi.o_displ)): _log(6, '\t\t{:x} clear {}', insn.ea, op.reg) regs.pop(op.reg, None) return {reg: rv.value for reg, rv in regs.items() if rv.type == DELTA}