def _gen_path_constraints(self, translator, expr, expected): """Generate path constraint from @expr. Handle special case with generated labels """ out = [] expected_is_label = expr_is_label(expected) for consval in possible_values(expr): if (expected_is_label and consval.value != expected): continue if (not expected_is_label and expr_is_label(consval.value)): continue conds = z3.And(*[translator.from_expr(cond.to_constraint()) for cond in consval.constraints]) if expected != consval.value: conds = z3.And(conds, translator.from_expr( m2_expr.ExprAff(consval.value, expected))) out.append(conds) if out: conds = z3.Or(*out) else: # Ex: expr: lblgen1, expected: 0x1234 # -> Avoid unconsistent solution lblgen1 = 0x1234 conds = translator.from_expr(self.unsat_expr) return conds
def edge_attr(self, src, dst): if src not in self._blocks or dst not in self._blocks: return {} src_irdst = self._blocks[src].dst edge_color = "blue" if isinstance(src_irdst, m2_expr.ExprCond): if (expr_is_label(src_irdst.src1) and src_irdst.src1.name == dst): edge_color = "limegreen" elif (expr_is_label(src_irdst.src2) and src_irdst.src2.name == dst): edge_color = "red" return {"color": edge_color}
def post_add_bloc(self, block, ir_blocks): IntermediateRepresentation.post_add_bloc(self, block, ir_blocks) new_irblocks = [] for irb in ir_blocks: pc_val = None lr_val = None for assignblk in irb.irs: pc_val = assignblk.get(self.arch.regs.PC, pc_val) lr_val = assignblk.get(self.arch.regs.RA, lr_val) if pc_val is None or lr_val is None: new_irblocks.append(irb) continue if not expr_is_int_or_label(lr_val): new_irblocks.append(irb) continue if expr_is_label(lr_val): lr_val = ExprInt(lr_val.name.offset, 32) instr = block.lines[-2] if lr_val.arg != instr.offset + 8: raise ValueError("Wrong arg") # CALL lbl = block.get_next() new_lbl = self.gen_label() irs = self.call_effects(pc_val, instr) irs.append(AssignBlock([ExprAff(self.IRDst, ExprId(lbl, size=self.pc.size))], instr)) new_irblocks.append(IRBlock(new_lbl, irs)) new_irblocks.append(irb.set_dst(ExprId(new_lbl, size=self.pc.size))) return new_irblocks
def traverse_expr_dst(self, expr, dst2index): """ Generate the index of the destination label for the @expr @dst2index: dictionnary to link label to its index """ if isinstance(expr, m2_expr.ExprCond): cond = self.id_to_c(expr.cond) src1, src1b = self.traverse_expr_dst(expr.src1, dst2index) src2, src2b = self.traverse_expr_dst(expr.src2, dst2index) return ("((%s)?(%s):(%s))" % (cond, src1, src2), "((%s)?(%s):(%s))" % (cond, src1b, src2b)) if isinstance(expr, m2_expr.ExprInt): offset = int(expr) self.add_label_index(dst2index, offset) return ("%s" % dst2index[offset], hex(offset)) if expr_is_label(expr): label = expr.name if label.offset != None: offset = label.offset self.add_label_index(dst2index, offset) return ("%s" % dst2index[offset], hex(offset)) self.add_label_index(dst2index, label) return ("%s" % dst2index[label], "0") dst2index[expr] = -1 return ("-1", self.id_to_c(expr))
def cb_x86_callpop(cur_bloc, symbol_pool, *args, **kwargs): """ 1000: call 1005 1005: pop Will give: 1000: push 1005 1005: pop """ # Pattern matching if len(cur_bloc.lines) < 1: return ## We want to match a CALL, always the last line of a basic block last_instr = cur_bloc.lines[-1] if last_instr.name != 'CALL': return ## The destination must be a label dst = last_instr.args[0] if not expr_is_label(dst): return ## The destination must be the next instruction if dst.name.offset != last_instr.offset + last_instr.l: return # Update instruction instance last_instr.name = 'PUSH' # Update next blocks to process in the disassembly engine cur_bloc.bto.clear() cur_bloc.add_cst(dst.name.offset, AsmConstraint.c_next, symbol_pool)
def _gen_graph(self): """ Gen irbloc digraph """ self._graph = DiGraphIR(self.blocks) for lbl, block in self.blocks.iteritems(): self._graph.add_node(lbl) for dst in self.dst_trackback(block): if dst.is_int(): dst_lbl = self.symbol_pool.getby_offset_create(int(dst)) dst = m2_expr.ExprId(dst_lbl) if expr_is_label(dst): self._graph.add_edge(lbl, dst.name)
def _gen_graph(self): """ Gen irbloc digraph """ self._graph = DiGraphIR(self.blocks) for lbl, b in self.blocks.iteritems(): self._graph.add_node(lbl) dst = self.dst_trackback(b) for d in dst: if isinstance(d, m2_expr.ExprInt): d = m2_expr.ExprId( self.symbol_pool.getby_offset_create(int(d))) if expr_is_label(d): self._graph.add_edge(lbl, d.name)
def _extract_dst(self, todo, done): """ Naive extraction of @todo destinations WARNING: @todo and @done are modified """ out = set() while todo: dst = todo.pop() if expr_is_label(dst): done.add(dst) elif isinstance(dst, (m2_expr.ExprMem, m2_expr.ExprInt)): done.add(dst) elif isinstance(dst, m2_expr.ExprCond): todo.add(dst.src1) todo.add(dst.src2) elif isinstance(dst, m2_expr.ExprId): out.add(dst) else: done.add(dst) return out
def OnRefresh(self): self.Clear() addr_id = {} for irblock in self.ir_arch.blocks.values(): id_irblock = self.AddNode(color_irblock(irblock, self.ir_arch)) addr_id[irblock] = id_irblock for irblock in self.ir_arch.blocks.values(): if not irblock: continue all_dst = self.ir_arch.dst_trackback(irblock) for dst in all_dst: if not expr_is_label(dst): continue dst = dst.name if not dst in self.ir_arch.blocks: continue dst_block = self.ir_arch.blocks[dst] node1 = addr_id[irblock] node2 = addr_id[dst_block] self.AddEdge(node1, node2) return True
def OnRefresh(self): print 'refresh' self.Clear() addr_id = {} for irblock in self.ir_arch.blocks.values(): id_irblock = self.AddNode(color_irblock(irblock)) addr_id[irblock] = id_irblock for irblock in self.ir_arch.blocks.values(): if not irblock: continue all_dst = ir_arch.dst_trackback(irblock) for dst in all_dst: if not expr_is_label(dst): continue dst = dst.name if not dst in self.ir_arch.blocks: continue dst_block = self.ir_arch.blocks[dst] node1 = addr_id[irblock] node2 = addr_id[dst_block] self.AddEdge(node1, node2) return True
def remove_jmp_blocks(self): """ Remove irblock with only IRDst set, by linking it's parent destination to the block destination. """ # Find candidates jmp_blocks = set() for block in self.blocks.itervalues(): if len(block.irs) != 1: continue assignblk = block.irs[0] if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) if len(self.graph.successors(block.label)) != 1: continue if not expr_is_label(assignblk[self.IRDst]): continue jmp_blocks.add(block) # Remove them, relink graph modified = False for block in jmp_blocks: dst_label = block.dst.name parents = self.graph.predecessors(block.label) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: continue dst = parent.dst if dst.is_id(block.label): dst = m2_expr.ExprId(dst_label, dst.size) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.label): dst = m2_expr.ExprCond( dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if src2.is_id(block.label): dst = m2_expr.ExprCond( dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if dst.src1 == dst.src2: dst = src1 else: continue parent.dst = dst # Remove unlinked useless nodes for block in jmp_blocks: if (len(self.graph.predecessors(block.label)) == 0 and len(self.graph.successors(block.label)) == 0): self.graph.del_node(block.label) return modified
def callback(self, _): """Called before each instruction""" # Assert synchronization with concrete execution self._check_state() # Call callbacks associated to the current address cur_addr = self.jitter.pc if cur_addr in self.handler: self.handler[cur_addr](self) return True if cur_addr in self.instrumentation: self.instrumentation[cur_addr](self) # Handle current address self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) # Avoid memory issue in ExpressionSimplifier if len(self.symb.expr_simp.simplified_exprs) > 100000: self.symb.expr_simp.simplified_exprs.clear() # Get IR blocks if cur_addr in self.addr_to_cacheblocks: self.ir_arch.blocks.clear() self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures self.mdis.job_done.clear() self.ir_arch.blocks.clear()# = {} ## Update current state asm_block = self.mdis.dis_bloc(cur_addr) self.ir_arch.add_bloc(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: next_addr = self.symb.emul_ir_blocks(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain # the full IR blocks path # -> Use a fully concrete execution to get back path # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: next_addr_concrete = self.symb_concrete.emul_ir_block(cur_addr) self.symb.emul_ir_block(cur_addr) if not(expr_is_label(next_addr_concrete) and next_addr_concrete.name.offset is None): # Not a lbl_gen, exit break # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete # At this stage, symbolic engine is one instruction after the concrete # engine return True
def callback(self, _): """Called before each instruction""" # Assert synchronization with concrete execution self._check_state() # Call callbacks associated to the current address cur_addr = self.jitter.pc if cur_addr in self.handler: self.handler[cur_addr](self) return True if cur_addr in self.instrumentation: self.instrumentation[cur_addr](self) # Handle current address self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) # Avoid memory issue in ExpressionSimplifier if len(self.symb.expr_simp.simplified_exprs) > 100000: self.symb.expr_simp.simplified_exprs.clear() # Get IR blocks if cur_addr in self.addr_to_cacheblocks: self.ir_arch.blocks.clear() self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures self.ir_arch.blocks.clear() # = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) self.ir_arch.add_block(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: next_addr = self.symb.run_at(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain # the full IR blocks path # -> Use a fully concrete execution to get back path # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: next_addr_concrete = self.symb_concrete.run_block_at(cur_addr) self.symb.run_block_at(cur_addr) if not (expr_is_label(next_addr_concrete) and next_addr_concrete.name.offset is None): # Not a lbl_gen, exit break # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete # At this stage, symbolic engine is one instruction after the concrete # engine return True
def remove_jmp_blocks(self): """ Remove irblock with only IRDst set, by linking it's parent destination to the block destination. """ # Find candidates jmp_blocks = set() for block in self.blocks.itervalues(): if len(block.irs) != 1: continue assignblk = block.irs[0] if len(assignblk) > 1: continue assert set(assignblk.keys()) == set([self.IRDst]) if len(self.graph.successors(block.label)) != 1: continue if not expr_is_label(assignblk[self.IRDst]): continue jmp_blocks.add(block) # Remove them, relink graph modified = False for block in jmp_blocks: dst_label = block.dst.name parents = self.graph.predecessors(block.label) for lbl in parents: parent = self.blocks.get(lbl, None) if parent is None: continue dst = parent.dst if dst.is_id(block.label): dst = m2_expr.ExprId(dst_label, dst.size) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True elif dst.is_cond(): src1, src2 = dst.src1, dst.src2 if src1.is_id(block.label): dst = m2_expr.ExprCond(dst.cond, m2_expr.ExprId(dst_label, dst.size), dst.src2) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if src2.is_id(block.label): dst = m2_expr.ExprCond(dst.cond, dst.src1, m2_expr.ExprId(dst_label, dst.size)) self.graph.discard_edge(lbl, block.label) self.graph.discard_edge(block.label, dst_label) self.graph.add_uniq_edge(lbl, dst_label) modified = True if dst.src1 == dst.src2: dst = src1 else: continue parent.dst = dst # Remove unlinked useless nodes for block in jmp_blocks: if (len(self.graph.predecessors(block.label)) == 0 and len(self.graph.successors(block.label)) == 0): self.graph.del_node(block.label) return modified