def stp(ir, instr, arg1, arg2, arg3): e = [] addr, updt = get_mem_access(arg3) e.append(m2_expr.ExprAff(m2_expr.ExprMem(addr, arg1.size), arg1)) e.append( m2_expr.ExprAff(m2_expr.ExprMem(addr + m2_expr.ExprInt_from(addr, arg1.size / 8), arg2.size), arg2)) if updt: e.append(updt) return e, []
def ldp(ir, instr, arg1, arg2, arg3): e = [] addr, updt = get_mem_access(arg3) e.append(m2_expr.ExprAff(arg1, m2_expr.ExprMem(addr, arg1.size))) e.append( m2_expr.ExprAff(arg2, m2_expr.ExprMem(addr + m2_expr.ExprInt(arg1.size / 8, addr.size), arg2.size))) if updt: e.append(updt) return e, []
def test_Variables_Identifier(self): import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import Variables_Identifier # Build a complex expression cst = m2_expr.ExprInt16(0x100) eax = m2_expr.ExprId("EAX") ebx = m2_expr.ExprId("EBX") ax = eax[0:16] expr = eax + ebx expr = m2_expr.ExprCompose([(ax, 0, 16), (expr[16:32], 16, 32)]) expr2 = m2_expr.ExprMem((eax + ebx) ^ (eax), size=16) expr2 = expr2 | ax | expr2 | cst exprf = expr - expr + m2_expr.ExprCompose([(expr2, 0, 16), (cst, 16, 32)]) # Identify variables vi = Variables_Identifier(exprf) # Use __str__ print vi # Test the result new_expr = vi.equation ## Force replace in the variable dependency order for var_id, var_value in reversed(vi.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(exprf, new_expr)
def l_str(ir, instr, arg1, arg2): e = [] addr, updt = get_mem_access(arg2) e.append(m2_expr.ExprAff(m2_expr.ExprMem(addr, arg1.size), arg1)) if updt: e.append(updt) return e, []
def strh(ir, instr, arg1, arg2): e = [] addr, updt = get_mem_access(arg2) e.append(m2_expr.ExprAff(m2_expr.ExprMem(addr, 16), arg1[:16])) if updt: e.append(updt) return e, []
def lhu(ir, instr, a, b): """A word is loaded (unsigned extended) into a register @a from the specified address @b.""" e = [] b = m2_expr.ExprMem(b.arg, 16) e.append(m2_expr.ExprAff(a, b.zeroExtend(32))) return e, []
def eval_ir_expr(self, exprs): pool_out = {} eval_cache = dict(self.symbols.items()) for e in exprs: if not isinstance(e, m2_expr.ExprAff): raise TypeError('not affect', str(e)) src = self.eval_expr(e.src, eval_cache) if isinstance(e.dst, m2_expr.ExprMem): a = self.eval_expr(e.dst.arg, eval_cache) a = self.expr_simp(a) # search already present mem tmp = None # test if mem lookup is known tmp = m2_expr.ExprMem(a, e.dst.size) dst = tmp pool_out[dst] = src elif isinstance(e.dst, m2_expr.ExprId): pool_out[e.dst] = src else: raise ValueError("affected zarb", str(e.dst)) return pool_out.items()
def substract_mems(self, a, b): ex = b.arg - a.arg ex = self.expr_simp(self.eval_expr(ex, {})) if not isinstance(ex, m2_expr.ExprInt): return None ptr_diff = int(int32(ex.arg)) out = [] if ptr_diff < 0: # [a ] #[b ]XXX sub_size = b.size + ptr_diff * 8 if sub_size >= a.size: pass else: ex = m2_expr.ExprOp('+', a.arg, m2_expr.ExprInt_from(a.arg, sub_size / 8)) ex = self.expr_simp(self.eval_expr(ex, {})) rest_ptr = ex rest_size = a.size - sub_size val = self.symbols[a][sub_size:a.size] out = [(m2_expr.ExprMem(rest_ptr, rest_size), val)] else: #[a ] # XXXX[b ]YY #[a ] # XXXX[b ] out = [] # part X if ptr_diff > 0: val = self.symbols[a][0:ptr_diff * 8] out.append((m2_expr.ExprMem(a.arg, ptr_diff * 8), val)) # part Y if ptr_diff * 8 + b.size < a.size: ex = m2_expr.ExprOp('+', b.arg, m2_expr.ExprInt_from(b.arg, b.size / 8)) ex = self.expr_simp(self.eval_expr(ex, {})) rest_ptr = ex rest_size = a.size - (ptr_diff * 8 + b.size) val = self.symbols[a][ptr_diff * 8 + b.size:a.size] out.append((m2_expr.ExprMem(ex, val.size), val)) return out
def ldr_size(ir, instr, arg1, arg2, size): e = [] addr, updt = get_mem_access(arg2) e.append( m2_expr.ExprAff(arg1, m2_expr.ExprMem(addr, size).zeroExtend(arg1.size))) if updt: e.append(updt) return e, []
def emit_mov(ir, instr, a, b): # movの中間表現を生成 instr_ir, extra_ir = sem.mov(ir, instr, a, b) # カウンタをインクリメントする中間表現を追加 dst = expr.ExprMem(expr.ExprInt64(ADDR_COUNTER), 64) new_value = dst + expr.ExprInt64(1) instr_ir.append(expr.ExprAff(dst, new_value)) return instr_ir, extra_ir
def ldrsw(ir, instr, arg1, arg2): e = [] addr, updt = get_mem_access(arg2) e.append( m2_expr.ExprAff(arg1, m2_expr.ExprMem(addr, 32).signExtend(arg1.size))) if updt: e.append(updt) return e, []
def memory(cls, size=32, depth=1): """Return an ExprMem @size: (optional) Operation size @depth: (optional) Expression depth """ address_size = random.randint(1, cls.memory_max_address_size) return m2_expr.ExprMem(cls._gen(size=address_size, depth=depth - 1), size=size)
def gen_c_assignments(self, assignblk): """ Return C informations used to generate the C code of the @assignblk @assignblk: an AssignBlock instance """ c_var = [] c_main = [] c_mem = [] c_updt = [] c_prefetch = [] dst_index = {8: 0, 16: 0, 32: 0, 64: 0, 128: 0} dst_var = {} prefetchers = self.get_mem_prefetch(assignblk) for expr, prefetcher in sorted(prefetchers.iteritems()): str_src = self.id_to_c(expr) str_dst = self.id_to_c(prefetcher) c_prefetch.append('%s = %s;' % (str_dst, str_src)) for var in prefetchers.itervalues(): c_var.append("uint%d_t %s;" % (var.size, var)) for dst, src in sorted(assignblk.iteritems()): src = src.replace_expr(prefetchers) if dst is self.ir_arch.IRDst: pass elif isinstance(dst, m2_expr.ExprId): new_dst = self.add_local_var(dst_var, dst_index, dst) if dst in self.ir_arch.arch.regs.regs_flt_expr: # Dont mask float affectation c_main.append('%s = (%s);' % (self.id_to_c(new_dst), self.id_to_c(src))) else: c_main.append('%s = (%s)&0x%X;' % (self.id_to_c(new_dst), self.id_to_c(src), SIZE_TO_MASK[src.size])) elif isinstance(dst, m2_expr.ExprMem): ptr = dst.arg.replace_expr(prefetchers) new_dst = m2_expr.ExprMem(ptr, dst.size) str_dst = self.id_to_c(new_dst).replace( 'MEM_LOOKUP', 'MEM_WRITE') c_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) else: raise ValueError("Unknown dst") for dst, new_dst in dst_var.iteritems(): if dst is self.ir_arch.IRDst: continue c_updt.append('%s = %s;' % (self.id_to_c(dst), self.id_to_c(new_dst))) c_var.append("uint%d_t %s;" % (new_dst.size, new_dst)) return c_prefetch, c_var, c_main, c_mem, c_updt
def emit_syscall(ir, instr): e = [] # EXCEPT_PRIV_INSNの設定 e.append( expr.ExprAff(regs.exception_flags, expr.ExprInt32(sem.EXCEPT_PRIV_INSN))) # syscallの次命令のポインタを保存 e.append( expr.ExprAff(expr.ExprMem(expr.ExprInt64(ADDR_SYSCALL_NEXTIP), 64), expr.ExprId(ir.get_next_label(instr), instr.mode))) return e, []
def apply_expr_on_state_visit_cache(self, expr, state, cache, level=0): """ Deep First evaluate nodes: 1. evaluate node's sons 2. simplify """ expr = self.expr_simp(expr) #print '\t'*level, "Eval:", expr if expr in cache: ret = cache[expr] #print "In cache!", ret elif expr.is_int(): return expr elif expr.is_id(): if isinstance(expr.name, asmblock.AsmLabel) and expr.name.offset is not None: ret = m2_expr.ExprInt(expr.name.offset, expr.size) else: ret = state.get(expr, expr) elif expr.is_mem(): ptr = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) ret = m2_expr.ExprMem(ptr, expr.size) ret = self.get_mem_state(ret) assert expr.size == ret.size elif expr.is_cond(): cond = self.apply_expr_on_state_visit_cache(expr.cond, state, cache, level+1) src1 = self.apply_expr_on_state_visit_cache(expr.src1, state, cache, level+1) src2 = self.apply_expr_on_state_visit_cache(expr.src2, state, cache, level+1) ret = m2_expr.ExprCond(cond, src1, src2) elif expr.is_slice(): arg = self.apply_expr_on_state_visit_cache(expr.arg, state, cache, level+1) ret = m2_expr.ExprSlice(arg, expr.start, expr.stop) elif expr.is_op(): args = [] for oarg in expr.args: arg = self.apply_expr_on_state_visit_cache(oarg, state, cache, level+1) assert oarg.size == arg.size args.append(arg) ret = m2_expr.ExprOp(expr.op, *args) elif expr.is_compose(): args = [] for arg in expr.args: args.append(self.apply_expr_on_state_visit_cache(arg, state, cache, level+1)) ret = m2_expr.ExprCompose(*args) else: raise TypeError("Unknown expr type") #print '\t'*level, "Result", ret ret = self.expr_simp(ret) #print '\t'*level, "Result simpl", ret assert expr.size == ret.size cache[expr] = ret return ret
def get_arg_n(self, arg_number): """Return the Expression corresponding to the argument number @arg_number""" # TODO use abicls abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] size = 64 sp = m2_expr.ExprId("RSP", 64) if arg_number < len(abi_order): return m2_expr.ExprId(abi_order[arg_number], size) else: destack = (arg_number - len(abi_order) + 1) return m2_expr.ExprMem( sp + m2_expr.ExprInt(destack * size / 8, size), size)
def elements(self): value = self.cbReg.value if value in self.stk_args: line = self.ira.blocks[self.label].irs[self.line_nb].instr arg_num = self.stk_args[value] stk_high = m2_expr.ExprInt(idc.GetSpd(line.offset), ir_arch.sp.size) stk_off = m2_expr.ExprInt(self.ira.sp.size/8 * arg_num, ir_arch.sp.size) element = m2_expr.ExprMem(mn.regs.regs_init[ir_arch.sp] + stk_high + stk_off, self.ira.sp.size) element = expr_simp(element) # Force stack unaliasing self.stk_unalias_force = True elif value: element = self.ira.arch.regs.all_regs_ids_byname.get(value, None) else: raise ValueError("Unknown element '%s'!" % value) return set([element])
def gen_assignments(self, assignblk, prefetchers): out_var = [] out_main = [] out_mem = [] out_updt = [] dst_index = {8: 0, 16: 0, 32: 0, 64: 0} dst_var = {} for var in prefetchers.itervalues(): out_var.append("uint%d_t %s;" % (var.size, var)) for dst, src in sorted(assignblk.iteritems()): src = src.replace_expr(prefetchers) if dst is self.ir_arch.IRDst: pass elif isinstance(dst, m2_expr.ExprId): new_dst = self.add_local_var(dst_var, dst_index, dst) if dst in self.ir_arch.arch.regs.regs_flt_expr: # Dont mask float affectation out_main.append('%s = (%s);' % (self.id_to_c(new_dst), self.id_to_c(src))) else: out_main.append('%s = (%s)&0x%X;' % (self.id_to_c(new_dst), self.id_to_c(src), SIZE_TO_MASK[src.size])) elif isinstance(dst, m2_expr.ExprMem): ptr = dst.arg.replace_expr(prefetchers) new_dst = m2_expr.ExprMem(ptr, dst.size) str_dst = self.id_to_c(new_dst).replace( 'MEM_LOOKUP', 'MEM_WRITE') out_mem.append('%s, %s);' % (str_dst[:-1], self.id_to_c(src))) else: raise ValueError("Unknown dst") for dst, new_dst in dst_var.iteritems(): if dst is self.ir_arch.IRDst: continue out_updt.append('%s = %s;' % (self.id_to_c(dst), self.id_to_c(new_dst))) out_var.append("uint%d_t %s;" % (new_dst.size, new_dst)) assignblk.C_var = out_var assignblk.C_main = out_main assignblk.C_mem = out_mem assignblk.C_updt = out_updt
def eval_ir_expr(self, assignblk): """ Evaluate AssignBlock on the current state @assignblk: AssignBlock instance """ pool_out = {} eval_cache = {} for dst, src in assignblk.iteritems(): src = self.eval_expr(src, eval_cache) if isinstance(dst, m2_expr.ExprMem): ptr = self.eval_expr(dst.arg, eval_cache) # test if mem lookup is known tmp = m2_expr.ExprMem(ptr, dst.size) pool_out[tmp] = src elif isinstance(dst, m2_expr.ExprId): pool_out[dst] = src else: raise ValueError("affected zarb", str(dst)) return pool_out.iteritems()
def substract_mems(self, arg1, arg2): """ Return the remaining memory areas of @arg1 - @arg2 @arg1, @arg2: ExprMem """ ptr_diff = self.expr_simp(arg2.arg - arg1.arg) ptr_diff = int(int32(ptr_diff.arg)) zone1 = interval([(0, arg1.size/8-1)]) zone2 = interval([(ptr_diff, ptr_diff + arg2.size/8-1)]) zones = zone1 - zone2 out = [] for start, stop in zones: ptr = arg1.arg + m2_expr.ExprInt(start, arg1.arg.size) ptr = self.expr_simp(ptr) value = self.expr_simp(self.symbols[arg1][start*8:(stop+1)*8]) mem = m2_expr.ExprMem(ptr, (stop - start + 1)*8) assert mem.size == value.size out.append((mem, value)) return out
def get_mem_state(self, expr): """ Evaluate the @expr memory in the current state using @cache @expr: the memory key """ ptr, size = expr.arg, expr.size ret = self.find_mem_by_addr(ptr) if not ret: overlaps = self.get_mem_overlapping(expr) if not overlaps: if self.func_read and ptr.is_int(): expr = self.func_read(expr) return expr out = [] off_base = 0 for off, mem in overlaps: if off >= 0: new_size = min(size - off * 8, mem.size) tmp = self.expr_simp(self.symbols[mem][0:new_size]) out.append((tmp, off_base, off_base + new_size)) off_base += new_size else: new_size = min(size - off * 8, mem.size) tmp = self.expr_simp(self.symbols[mem][-off * 8:new_size]) new_off_base = off_base + new_size + off * 8 out.append((tmp, off_base, new_off_base)) off_base = new_off_base missing_slice = self.rest_slice(out, 0, size) for slice_start, slice_stop in missing_slice: ptr = self.expr_simp(ptr + m2_expr.ExprInt(slice_start / 8, ptr.size)) mem = m2_expr.ExprMem(ptr, slice_stop - slice_start) if self.func_read and ptr.is_int(): mem = self.func_read(mem) out.append((mem, slice_start, slice_stop)) out.sort(key=lambda x: x[1]) args = [expr for (expr, _, _) in out] ret = self.expr_simp(m2_expr.ExprCompose(*args)[:size]) return ret # bigger lookup if size > ret.size: rest = size out = [] while rest: mem = self.find_mem_by_addr(ptr) if mem is None: mem = m2_expr.ExprMem(ptr, 8) if self.func_read and ptr.is_int(): value = self.func_read(mem) else: value = mem elif rest >= mem.size: value = self.symbols[mem] else: value = self.symbols[mem][:rest] out.append(value) rest -= value.size ptr = self.expr_simp(ptr + m2_expr.ExprInt(mem.size / 8, ptr.size)) ret = self.expr_simp(m2_expr.ExprCompose(*out)) return ret # part lookup ret = self.expr_simp(self.symbols[ret][:size]) return ret
def lb(ir, instr, a, b): "A byte is loaded into a register @a from the specified address @b." e = [] b = m2_expr.ExprMem(b.arg, 8) e.append(m2_expr.ExprAff(a, b.signExtend(32))) return e, []
def eval_ExprMem(self, e, eval_cache=None): if eval_cache is None: eval_cache = {} a_val = self.expr_simp(self.eval_expr(e.arg, eval_cache)) if a_val != e.arg: a = self.expr_simp(m2_expr.ExprMem(a_val, size=e.size)) else: a = e if a in self.symbols: return self.symbols[a] tmp = None # test if mem lookup is known if a_val in self.symbols.symbols_mem: tmp = self.symbols.symbols_mem[a_val][0] if tmp is None: v = self.find_mem_by_addr(a_val) if not v: out = [] ov = self.get_mem_overlapping(a, eval_cache) off_base = 0 ov.sort() # ov.reverse() for off, x in ov: # off_base = off * 8 # x_size = self.symbols[x].size if off >= 0: m = min(a.size - off * 8, x.size) ee = m2_expr.ExprSlice(self.symbols[x], 0, m) ee = self.expr_simp(ee) out.append((ee, off_base, off_base + m)) off_base += m else: m = min(a.size - off * 8, x.size) ee = m2_expr.ExprSlice(self.symbols[x], -off * 8, m) ff = self.expr_simp(ee) new_off_base = off_base + m + off * 8 out.append((ff, off_base, new_off_base)) off_base = new_off_base if out: missing_slice = self.rest_slice(out, 0, a.size) for sa, sb in missing_slice: ptr = self.expr_simp( a_val + m2_expr.ExprInt_from(a_val, sa / 8)) mm = m2_expr.ExprMem(ptr, size=sb - sa) mm.is_term = True mm.is_simp = True out.append((mm, sa, sb)) out.sort(key=lambda x: x[1]) # for e, sa, sb in out: # print str(e), sa, sb ee = m2_expr.ExprSlice(m2_expr.ExprCompose(out), 0, a.size) ee = self.expr_simp(ee) return ee if self.func_read and isinstance(a.arg, m2_expr.ExprInt): return self.func_read(a) else: # XXX hack test a.is_term = True return a # bigger lookup if a.size > tmp.size: rest = a.size ptr = a_val out = [] ptr_index = 0 while rest: v = self.find_mem_by_addr(ptr) if v is None: # raise ValueError("cannot find %s in mem"%str(ptr)) val = m2_expr.ExprMem(ptr, 8) v = val diff_size = 8 elif rest >= v.size: val = self.symbols[v] diff_size = v.size else: diff_size = rest val = self.symbols[v][0:diff_size] val = (val, ptr_index, ptr_index + diff_size) out.append(val) ptr_index += diff_size rest -= diff_size ptr = self.expr_simp( self.eval_expr( m2_expr.ExprOp('+', ptr, m2_expr.ExprInt_from(ptr, v.size / 8)), eval_cache)) e = self.expr_simp(m2_expr.ExprCompose(out)) return e # part lookup tmp = self.expr_simp(m2_expr.ExprSlice(self.symbols[tmp], 0, a.size)) return tmp
def possible_values(expr): """Return possible values for expression @expr, associated with their condition constraint as a ConstrainedValues instance @expr: Expr instance """ consvals = ConstrainedValues() # Terminal expression if (isinstance(expr, m2_expr.ExprInt) or isinstance(expr, m2_expr.ExprId)): consvals.add(ConstrainedValue(frozenset(), expr)) # Unary expression elif isinstance(expr, m2_expr.ExprSlice): consvals.update(ConstrainedValue(consval.constraints, consval.value[expr.start:expr.stop]) for consval in possible_values(expr.arg)) elif isinstance(expr, m2_expr.ExprMem): consvals.update(ConstrainedValue(consval.constraints, m2_expr.ExprMem(consval.value, expr.size)) for consval in possible_values(expr.arg)) elif isinstance(expr, m2_expr.ExprAff): consvals.update(possible_values(expr.src)) # Special case: constraint insertion elif isinstance(expr, m2_expr.ExprCond): to_ret = set() src1cond = CondConstraintNotZero(expr.cond) src2cond = CondConstraintZero(expr.cond) consvals.update(ConstrainedValue(consval.constraints.union([src1cond]), consval.value) for consval in possible_values(expr.src1)) consvals.update(ConstrainedValue(consval.constraints.union([src2cond]), consval.value) for consval in possible_values(expr.src2)) # N-ary expression elif isinstance(expr, m2_expr.ExprOp): # For details, see ExprCompose consvals_args = [possible_values(arg) for arg in expr.args] for consvals_possibility in itertools.product(*consvals_args): args_value = [consval.value for consval in consvals_possibility] args_constraint = itertools.chain(*[consval.constraints for consval in consvals_possibility]) consvals.add(ConstrainedValue(frozenset(args_constraint), m2_expr.ExprOp(expr.op, *args_value))) elif isinstance(expr, m2_expr.ExprCompose): # Generate each possibility for sub-argument, associated with the start # and stop bit consvals_args = [map(lambda x: x, possible_values(arg)) for arg in expr.args] for consvals_possibility in itertools.product(*consvals_args): # Merge constraint of each sub-element args_constraint = itertools.chain(*[consval.constraints for consval in consvals_possibility]) # Gen the corresponding constraints / ExprCompose args = [consval.value for consval in consvals_possibility] consvals.add( ConstrainedValue(frozenset(args_constraint), m2_expr.ExprCompose(*args))) else: raise RuntimeError("Unsupported type for expr: %s" % type(expr)) return consvals
def sb(ir, instr, a, b): "The least significant byte of @a is stored at the specified address @b." e = [] b = m2_expr.ExprMem(b.arg, 8) e.append(m2_expr.ExprAff(b, a[:8])) return e, []
def test_Variables_Identifier(self): import miasm2.expression.expression as m2_expr from miasm2.expression.expression_helper import Variables_Identifier # Build a complex expression cst = m2_expr.ExprInt16(0x100) eax = m2_expr.ExprId("EAX") ebx = m2_expr.ExprId("EBX") ax = eax[0:16] expr = eax + ebx expr = m2_expr.ExprCompose(ax, expr[16:32]) expr2 = m2_expr.ExprMem((eax + ebx) ^ (eax), size=16) expr2 = expr2 | ax | expr2 | cst exprf = expr - expr + m2_expr.ExprCompose(expr2, cst) # Identify variables vi = Variables_Identifier(exprf) # Use __str__ print vi # Test the result new_expr = vi.equation ## Force replace in the variable dependency order for var_id, var_value in reversed(vi.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(exprf, new_expr) # Test prefix vi = Variables_Identifier(exprf, var_prefix="prefix_v") ## Use __str__ print vi ## Test the result new_expr = vi.equation ### Force replace in the variable dependency order for var_id, var_value in reversed(vi.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(exprf, new_expr) # Test an identify on an expression already containing identifier vi = Variables_Identifier(exprf) vi2 = Variables_Identifier(vi.equation) ## Test the result new_expr = vi2.equation ### Force replace in the variable dependency order for var_id, var_value in reversed(vi2.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(vi.equation, new_expr) ## Corner case: each sub var depends on itself mem1 = m2_expr.ExprMem(ebx, size=32) mem2 = m2_expr.ExprMem(mem1, size=32) cst2 = m2_expr.ExprInt32(-1) expr_mini = ((eax ^ mem2 ^ cst2) & (mem2 ^ (eax + mem2)))[31:32] ## Build vi = Variables_Identifier(expr_mini) vi2 = Variables_Identifier(vi.equation) ## Test the result new_expr = vi2.equation ### Force replace in the variable dependency order for var_id, var_value in reversed(vi2.vars.items()): new_expr = new_expr.replace_expr({var_id: var_value}) self.assertEqual(vi.equation, new_expr)
def callback(self, jitter): # Check previous state is_symbolic = lambda expr: (isinstance(expr, m2_expr.ExprMem) and not isinstance(expr.arg, m2_expr.ExprInt)) # When it is possible, consider only elements modified in the last run # -> speed up to avoid browsing the whole memory to_consider = self.symb.modified_exprs for symbol in to_consider: # Do not consider PC if symbol == self.ira.pc: continue # Write to @NN[... argX ...] if is_symbolic(symbol): self.memories_write.add(symbol) # Read from ... @NN[... argX ...] ... symb_value = self.symb.eval_expr(symbol) to_replace = {} for expr in m2_expr.ExprAff(symbol, symb_value).get_r(mem_read=True): if is_symbolic(expr): if isinstance(expr, m2_expr.ExprMem): # Consider each byte individually # Case: @32[X] with only @8[X+1] to replace addr_expr = expr.arg new_expr = [] consider = False for offset in xrange(expr.size / 8): sub_expr = m2_expr.ExprMem( self.symb.expr_simp( addr_expr + m2_expr.ExprInt( offset, size=addr_expr.size)), 8) if not self.is_pointer(sub_expr): # Not a PTR, we have to replace with the real value original_sub_expr = sub_expr.replace_expr( self.init_values) new_expr.append( self.symb.eval_expr(original_sub_expr)) consider = True else: new_expr.append(sub_expr) # Rebuild the corresponding expression if consider: assert len(new_expr) == expr.size / 8 to_replace[expr] = m2_expr.ExprCompose(*new_expr) if expr not in self.memories_write: # Do not consider memory already written during the run self.memories_read.add(expr) # Replace with real value for non-pointer symbols if to_replace: symb_value = self.symb.expr_simp( symb_value.replace_expr(to_replace)) if isinstance(symbol, m2_expr.ExprMem): # Replace only in ptr (case to_replace: @[arg] = 8, expr: # @[arg] = @[arg]) symbol = m2_expr.ExprMem( self.symb.expr_simp( symbol.arg.replace_expr(to_replace)), symbol.size) self.symb.apply_change(symbol, symb_value) # Check computed values against real ones # TODO idem memory if (isinstance(symbol, m2_expr.ExprId) and isinstance(symb_value, m2_expr.ExprInt)): if hasattr(jitter.cpu, symbol.name): value = m2_expr.ExprInt(getattr(jitter.cpu, symbol.name), symbol.size) assert value == self.symb.symbols[symbol] cur_addr = jitter.pc self.logger.debug("Current address: %s", hex(cur_addr)) if cur_addr == 0x1337BEEF or cur_addr == self.return_addr: # End reached if self.logger.isEnabledFor(logging.DEBUG): print "In:" for x in self.memories_read: print "\t%s (%s)" % ( x, self.c_handler.expr_to_c(x), ) print "Out:" for x in self.memories_write: print "\t%s (%s)" % ( x, self.c_handler.expr_to_c(x), ) return True # Update state ## Reset cache structures self.mdis.job_done.clear() self.symb_ir.blocs.clear() ## Update current state asm_block = self.mdis.dis_bloc(cur_addr) irblocks = self.symb_ir.add_bloc(asm_block) self.symb.emul_ir_blocks(cur_addr) return True
def prepare_symbexec(self, jitter, return_addr): # Activate callback on each instr jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) #jitter.jit.log_mn = True #jitter.jit.log_regs = True jitter.exec_cb = self.callback # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(jitter.vm), lines_wd=1) # Symbexec engine ## Prepare the symbexec engine self.symb_ir = self.machine.ir() self.symb = EmulatedSymbExecWithModif(jitter.cpu, jitter.vm, self.symb_ir, {}) self.symb.enable_emulated_simplifications() ## Update registers value self.symb.reset_regs() self.symb.update_engine_from_cpu() ## Load the memory as ExprMem self.symb.func_read = None self.symb.func_write = None for base_addr, mem_segment in jitter.vm.get_all_memory().iteritems(): # Split into 8 bytes chunk for get_mem_overlapping for start in xrange(0, mem_segment["size"], 8): expr_mem = m2_expr.ExprMem(m2_expr.ExprInt(base_addr + start, size=64), size=8 * min(8, mem_segment["size"] - start)) # Its initialisation, self.symb.apply_change is not necessary self.symb.symbols[expr_mem] = self.symb._func_read(expr_mem) ## Save the initial state self.symbols_init = self.symb.symbols.copy() ## Save the returning address self.return_addr = return_addr # Inject argument # TODO # TODO: use abicls abi_order = ["RDI", "RSI", "RDX", "RCX", "R8", "R9"] self.init_values = {} struct_expr_types = {} self.args_symbols = [] for i, param_name in enumerate(self.prototype.args_order): cur_arg_abi = getattr(self.ira.arch.regs, abi_order[i]) cur_arg = m2_expr.ExprId("arg%d_%s" % (i, param_name), size=cur_arg_abi.size) arg_type = self.prototype.args[param_name] if objc_is_dereferenceable(arg_type): # Convert the argument to symbol to track access based on it self.init_values[cur_arg] = self.symb.symbols[cur_arg_abi] self.symb.apply_change(cur_arg_abi, cur_arg) struct_expr_types[cur_arg.name] = arg_type self.args_symbols.append(cur_arg) # Init Expr <-> C conversion # Strict access is deliberately not enforced (example: memcpy(struct)) self.c_handler = CHandler(self.types, struct_expr_types, enforce_strict_access=False) # Init output structures self.memories_read = set() self.memories_write = set()
def get_mem_state(self, expr): """ Evaluate the @expr memory in the current state using @cache @expr: the memory key """ ptr, size = expr.arg, expr.size ret = self.find_mem_by_addr(ptr) if not ret: out = [] overlaps = self.get_mem_overlapping(expr) off_base = 0 for off, mem in overlaps: if off >= 0: new_size = min(size - off * 8, mem.size) tmp = self.expr_simp(self.symbols[mem][0:new_size]) out.append((tmp, off_base, off_base + new_size)) off_base += new_size else: new_size = min(size - off * 8, mem.size) tmp = self.expr_simp(self.symbols[mem][-off * 8:new_size]) new_off_base = off_base + new_size + off * 8 out.append((tmp, off_base, new_off_base)) off_base = new_off_base if out: missing_slice = self.rest_slice(out, 0, size) for slice_start, slice_stop in missing_slice: ptr = self.expr_simp( ptr + m2_expr.ExprInt(slice_start / 8, ptr.size)) mem = m2_expr.ExprMem(ptr, slice_stop - slice_start) out.append((mem, slice_start, slice_stop)) out.sort(key=lambda x: x[1]) args = [expr for (expr, _, _) in out] tmp = m2_expr.ExprSlice(m2_expr.ExprCompose(*args), 0, size) tmp = self.expr_simp(tmp) return tmp if self.func_read and isinstance(ptr, m2_expr.ExprInt): return self.func_read(expr) else: return expr # bigger lookup if size > ret.size: rest = size ptr = ptr out = [] ptr_index = 0 while rest: mem = self.find_mem_by_addr(ptr) if mem is None: value = m2_expr.ExprMem(ptr, 8) mem = value diff_size = 8 elif rest >= mem.size: value = self.symbols[mem] diff_size = mem.size else: diff_size = rest value = self.symbols[mem][0:diff_size] out.append((value, ptr_index, ptr_index + diff_size)) ptr_index += diff_size rest -= diff_size ptr = self.expr_simp(ptr + m2_expr.ExprInt(mem.size / 8, ptr.size)) out.sort(key=lambda x: x[1]) args = [expr for (expr, _, _) in out] ret = self.expr_simp(m2_expr.ExprCompose(*args)) return ret # part lookup ret = self.expr_simp(self.symbols[ret][:size]) return ret
def sh(ir, instr, a, b): e = [] b = m2_expr.ExprMem(b.arg, 16) e.append(m2_expr.ExprAff(b, a[:16])) return e, []