def _get_merging_var_candidates(self, symb_engine, assignblk, dst): stk_high = ExprInt(self.conn.modules.idc.get_spd(assignblk.instr.offset), self.ir_arch.sp.size) init_sp = self.mn.regs.regs_init[self.ir_arch.sp] fix_dct = {init_sp: - stk_high + init_sp + ExprInt(dst.size // 8, dst.size)} new_merging_var_candidates = set() # values are tuples key, val for key, val in symb_engine.modified(regs.regs_init): if not val.is_int() or not val.size > 1 or type(key) not in [ExprId, ExprMem] \ or key.is_id() and key.name in ["RIP", "EIP", self.ircfg.IRDst.name]: continue if not key.is_id(): # get relative depth key = key.replace_expr(fix_dct) key = expr_simp(key) new_merging_var_candidates.add((key, val)) return new_merging_var_candidates
def _recognize(self, max_loop_num): symb_engine = SymbolicExecutionEngine(self.ir_arch, regs.regs_init) todo = [(LocKey(0), symb_engine.get_state())] done_loc = set() if not max_loop_num: max_loop_num = float('inf') found_loops_num = 0 while todo: loc_key, symb_state = todo.pop() if loc_key in done_loc or loc_key not in self.ircfg.blocks: continue done_loc.add(loc_key) ir_block = self.ircfg.blocks[loc_key] symb_engine.set_state(symb_state) for ind, assignblk in enumerate(ir_block.assignblks): for dst, src in assignblk.items(): if max_loop_num < found_loops_num: return if src.is_int() and int(src) in self.func_addresses: assignblk_node = AssignblkNode(ir_block.loc_key, ind, dst) # no uses if assignblk_node not in self.analyses.defuse_edges or not \ self.analyses.defuse_edges[assignblk_node]: # possible virtual table initialization self.possible_merge_funcs.add( (int(src), frozenset(), loc_key)) elif src.is_op("call_func_stack"): self._process_call(src, dst, symb_engine, assignblk, loc_key) elif (expr_simp(src).is_int() and not is_bad_expr(dst)) \ or (ir_block.loc_key == LocKey(0) and dst == src and (not self._merging_var_candidates or dst in self._merging_var_candidates)): if self._process_assignment(ir_block, ind, dst): self._merging_var_candidates = None found_loops_num += 1 symb_engine.eval_updt_assignblk(assignblk) for succ in self.ircfg.successors(loc_key): todo.append((succ, symb_engine.get_state()))
def _normalize_ircfg(self, conn): # unalias stack miasm.re/blog/2017/02/03/data_flow_analysis_depgraph.html , but involve base pointer too # TODO remove manual *BP propagation in normalize_ircfg and use standrad Miasm propagation when it is fixed # remove composes from bigger to smaller, they are not important for us bp = {} prev_offset = None for irb_loc_key in self.ircfg.walk_breadth_first_forward(LocKey(0)): irs = [] if irb_loc_key not in self.ircfg.blocks: continue irb = self.ircfg.blocks[irb_loc_key] if irb.dst.is_cond() and irb.dst.cond.is_op() and irb.dst.cond.op == 'CC_EQ': # TODO propagate cmp ..., arb_int too # propagate known zeroes to process test eax, eax; jnz ...; lea edi, [eax+4] symb_exec = SymbolicExecutionEngine(self.ir_arch) dst = symb_exec.eval_updt_irblock(irb) if dst.is_cond() and dst.cond.is_id() and not is_bad_expr(dst.cond) and \ symb_exec.eval_expr(dst.cond) == dst.cond: # add explicit mov ID, 0 to given irb target_loc = dst.src2 if target_loc.is_int(): target_loc = self.asmcfg.loc_db.get_offset_location(int(target_loc)) elif target_loc.is_loc(): target_loc = target_loc.loc_key else: continue if len(self.ircfg.predecessors(target_loc)) > 1: continue target_irb = self.ircfg.blocks[target_loc] asign_blk = AssignBlock([ExprAssign(dst.cond, ExprInt(0, dst.cond.size))]) assignblks = tuple([asign_blk, *target_irb.assignblks]) new_irb = IRBlock(target_loc, assignblks) self.ircfg.blocks[target_loc] = new_irb fix_dct = {} for assignblk in irb: offset = prev_offset if assignblk.instr and assignblk.instr.offset: offset = assignblk.instr.offset prev_offset = offset spd = conn.modules.idc.get_spd(offset) if spd is not None: stk_high = ExprInt(spd, self.ir_arch.sp.size) fix_dct = {self.ir_arch.sp: self.mn.regs.regs_init[self.ir_arch.sp] + stk_high} fix_dct.update(bp) else: logger.warning("Couldn't acquire stack depth at 0x%x" % (offset or 0x0BADF00D)) new_assignblk = {} for dst, src in assignblk.items(): if src.is_compose(): slc_arg = None arg = None for tmp_arg in src.args: if not tmp_arg.is_slice(): arg = tmp_arg else: # we're interested only in bigger to smaller slc_arg = tmp_arg if slc_arg and arg and len(arg.get_r()) == 1: top_to_bottom_visitor = ExprVisitorCallbackTopToBottom( lambda x: self._resize_top_expr(x, src.size)) src = top_to_bottom_visitor.visit(arg) if dst == src: # special compiler anomalies such as lea esp, [esp+0] continue if src == self.ir_arch.sp: src = expr_simp(src.replace_expr(fix_dct)) if bp and src not in bp.values() and irb_loc_key != LocKey(0): raise RuntimeError("Ambiguous base pointer") bp.update({dst: src}) fix_dct.update(bp) else: src = expr_simp(src.replace_expr(fix_dct)) if dst != self.ir_arch.sp and dst not in bp.keys(): dst = dst.replace_expr(fix_dct) dst, src = expr_simp(dst), expr_simp(src) new_assignblk[dst] = src irs.append(AssignBlock(new_assignblk, instr=assignblk.instr)) self.ircfg.blocks[irb.loc_key] = IRBlock(irb.loc_key, irs)