class DSEEngine(object): """Dynamic Symbolic Execution Engine This class aims to be overrided for each specific purpose """ SYMB_ENGINE = ESETrackModif def __init__(self, machine): self.machine = machine self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} self.ir_arch = self.machine.ir() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) self.symb = None # SymbolicExecutionEngine self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), lines_wd=1) # Symbexec engine ## Prepare symbexec engines self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) ## Update registers value self.symb.symbols[self.ir_arch.IRDst] = ExprInt(getattr(self.jitter.cpu, self.ir_arch.pc.name), self.ir_arch.IRDst.size) # Avoid memory write self.symb.func_write = None # Activate callback on each instr self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) self.jitter.exec_cb = self.callback def attach(self, emulator): """Attach the DSE to @emulator @emulator: jitload (or API equivalent) instance""" self.jitter = emulator self.prepare() def handle(self, cur_addr): """Handle destination @cur_addr: Expr of the next address in concrete execution /!\ cur_addr may be a lbl_gen In this method, self.symb is in the "just before branching" state """ pass def add_handler(self, addr, callback): """Add a @callback for address @addr before any state update. The state IS NOT updated after returning from the callback @addr: int @callback: func(dse instance)""" self.handler[addr] = callback def add_lib_handler(self, libimp, namespace): """Add search for handler based on a @libimp libimp instance Known functions will be looked by {name}_symb in the @namespace """ # lambda cannot contain statement def default_func(dse): fname = "%s_symb" % libimp.fad2cname[dse.jitter.pc] raise RuntimeError("Symbolic stub '%s' not found" % fname) for addr, fname in libimp.fad2cname.iteritems(): fname = "%s_symb" % fname func = namespace.get(fname, None) if func is not None: self.add_handler(addr, func) else: self.add_handler(addr, default_func) def add_instrumentation(self, addr, callback): """Add a @callback for address @addr before any state update. The state IS updated after returning from the callback @addr: int @callback: func(dse instance)""" self.instrumentation[addr] = callback def _check_state(self): """Check the current state against the concrete one""" errors = [] # List of DriftInfo for symbol in self.symb.modified_expr: # Do not consider PC if symbol in [self.ir_arch.pc, self.ir_arch.IRDst]: continue # Consider only concrete values symb_value = self.eval_expr(symbol) if not symb_value.is_int(): continue symb_value = int(symb_value) # Check computed values against real ones if symbol.is_id(): if hasattr(self.jitter.cpu, symbol.name): value = getattr(self.jitter.cpu, symbol.name) if value != symb_value: errors.append(DriftInfo(symbol, symb_value, value)) elif symbol.is_mem() and symbol.arg.is_int(): value_chr = self.jitter.vm.get_mem(int(symbol.arg), symbol.size / 8) exp_value = int(value_chr[::-1].encode("hex"), 16) if exp_value != symb_value: errors.append(DriftInfo(symbol, symb_value, exp_value)) # Check for drift, and act accordingly if errors: raise DriftException(errors) def callback(self, _): """Called before each instruction""" # Assert synchronization with concrete execution self._check_state() # Call callbacks associated to the current address cur_addr = self.jitter.pc if cur_addr in self.handler: self.handler[cur_addr](self) return True if cur_addr in self.instrumentation: self.instrumentation[cur_addr](self) # Handle current address self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) # Avoid memory issue in ExpressionSimplifier if len(self.symb.expr_simp.simplified_exprs) > 100000: self.symb.expr_simp.simplified_exprs.clear() # Get IR blocks if cur_addr in self.addr_to_cacheblocks: self.ir_arch.blocks.clear() self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures self.mdis.job_done.clear() self.ir_arch.blocks.clear()# = {} ## Update current state asm_block = self.mdis.dis_bloc(cur_addr) self.ir_arch.add_bloc(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: next_addr = self.symb.emul_ir_blocks(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain # the full IR blocks path # -> Use a fully concrete execution to get back path # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: next_addr_concrete = self.symb_concrete.emul_ir_block(cur_addr) self.symb.emul_ir_block(cur_addr) if not(expr_is_label(next_addr_concrete) and next_addr_concrete.name.offset is None): # Not a lbl_gen, exit break # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete # At this stage, symbolic engine is one instruction after the concrete # engine return True def take_snapshot(self): """Return a snapshot of the current state (including jitter state)""" snapshot = { "mem": self.jitter.vm.get_all_memory(), "regs": self.jitter.cpu.get_gpreg(), "symb": self.symb.symbols.copy() } return snapshot def restore_snapshot(self, snapshot, memory=True): """Restore a @snapshot taken with .take_snapshot @snapshot: .take_snapshot output @memory: (optional) if set, also restore the memory """ # Restore memory if memory: self.jitter.vm.reset_memory_page_pool() self.jitter.vm.reset_code_bloc_pool() for addr, metadata in snapshot["mem"].iteritems(): self.jitter.vm.add_memory_page(addr, metadata["access"], metadata["data"]) # Restore registers self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name] self.jitter.cpu.set_gpreg(snapshot["regs"]) # Reset intern elements self.jitter.vm.set_exception(0) self.jitter.cpu.set_exception(0) self.jitter.bs._atomic_mode = False # Reset symb exec for key, _ in self.symb.symbols.items(): del self.symb.symbols[key] for expr, value in snapshot["symb"].items(): self.symb.symbols[expr] = value def update_state(self, assignblk): """From this point, assume @assignblk in the symbolic execution @assignblk: AssignBlock/{dst -> src} """ for dst, src in assignblk.iteritems(): self.symb.apply_change(dst, src) def _update_state_from_concrete_symb(self, symbexec, cpu=True, mem=False): if mem: # Values will be retrieved from the concrete execution if they are # not present for symbol in symbexec.symbols.symbols_mem.copy(): del symbexec.symbols[symbol] if cpu: regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] for reg in regs: if hasattr(self.jitter.cpu, reg.name): value = ExprInt(getattr(self.jitter.cpu, reg.name), size=reg.size) symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): """Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @mem: (optional) if set, update memory value /!\ all current states will be loss. This function is usually called when states are no more synchronized (at the beginning, returning from an unstubbed syscall, ...) """ self._update_state_from_concrete_symb(self.symb, cpu, mem) def eval_expr(self, expr): """Return the evaluation of @expr: @expr: Expr instance""" return self.symb.eval_expr(expr) @staticmethod def memory_to_expr(addr): """Translate an address to its corresponding symbolic ID (8bits) @addr: int""" return ExprId("MEM_0x%x" % int(addr), 8) def symbolize_memory(self, memory_range): """Register a range of memory addresses to symbolize @memory_range: object with support of __in__ operation (intervals, list, ...) """ self.symb.dse_memory_range = memory_range self.symb.dse_memory_to_expr = self.memory_to_expr
class DSEEngine(object): """Dynamic Symbolic Execution Engine This class aims to be overrided for each specific purpose """ SYMB_ENGINE = ESETrackModif def __init__(self, machine): self.machine = machine self.handler = {} # addr -> callback(DSEEngine instance) self.instrumentation = {} # addr -> callback(DSEEngine instance) self.addr_to_cacheblocks = {} # addr -> {label -> IRBlock} self.ir_arch = self.machine.ir() # corresponding IR # Defined after attachment self.jitter = None # Jitload (concrete execution) self.symb = None # SymbolicExecutionEngine self.symb_concrete = None # Concrete SymbExec for path desambiguisation self.mdis = None # DisasmEngine def prepare(self): """Prepare the environment for attachment with a jitter""" # Disassembler self.mdis = self.machine.dis_engine(bin_stream_vm(self.jitter.vm), lines_wd=1) # Symbexec engine ## Prepare symbexec engines self.symb = self.SYMB_ENGINE(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) self.symb.enable_emulated_simplifications() self.symb_concrete = EmulatedSymbExec(self.jitter.cpu, self.jitter.vm, self.ir_arch, {}) ## Update registers value self.symb.symbols[self.ir_arch.IRDst] = ExprInt( getattr(self.jitter.cpu, self.ir_arch.pc.name), self.ir_arch.IRDst.size) # Avoid memory write self.symb.func_write = None # Activate callback on each instr self.jitter.jit.set_options(max_exec_per_call=1, jit_maxline=1) self.jitter.exec_cb = self.callback # Clean jit cache to avoid multi-line basic blocks already jitted self.jitter.jit.lbl2jitbloc.clear() def attach(self, emulator): """Attach the DSE to @emulator @emulator: jitload (or API equivalent) instance""" self.jitter = emulator self.prepare() def handle(self, cur_addr): """Handle destination @cur_addr: Expr of the next address in concrete execution /!\ cur_addr may be a lbl_gen In this method, self.symb is in the "just before branching" state """ pass def add_handler(self, addr, callback): """Add a @callback for address @addr before any state update. The state IS NOT updated after returning from the callback @addr: int @callback: func(dse instance)""" self.handler[addr] = callback def add_lib_handler(self, libimp, namespace): """Add search for handler based on a @libimp libimp instance Known functions will be looked by {name}_symb in the @namespace """ # lambda cannot contain statement def default_func(dse): fname = "%s_symb" % libimp.fad2cname[dse.jitter.pc] raise RuntimeError("Symbolic stub '%s' not found" % fname) for addr, fname in libimp.fad2cname.iteritems(): fname = "%s_symb" % fname func = namespace.get(fname, None) if func is not None: self.add_handler(addr, func) else: self.add_handler(addr, default_func) def add_instrumentation(self, addr, callback): """Add a @callback for address @addr before any state update. The state IS updated after returning from the callback @addr: int @callback: func(dse instance)""" self.instrumentation[addr] = callback def _check_state(self): """Check the current state against the concrete one""" errors = [] # List of DriftInfo for symbol in self.symb.modified_expr: # Do not consider PC if symbol in [self.ir_arch.pc, self.ir_arch.IRDst]: continue # Consider only concrete values symb_value = self.eval_expr(symbol) if not symb_value.is_int(): continue symb_value = int(symb_value) # Check computed values against real ones if symbol.is_id(): if hasattr(self.jitter.cpu, symbol.name): value = getattr(self.jitter.cpu, symbol.name) if value != symb_value: errors.append(DriftInfo(symbol, symb_value, value)) elif symbol.is_mem() and symbol.arg.is_int(): value_chr = self.jitter.vm.get_mem(int(symbol.arg), symbol.size / 8) exp_value = int(value_chr[::-1].encode("hex"), 16) if exp_value != symb_value: errors.append(DriftInfo(symbol, symb_value, exp_value)) # Check for drift, and act accordingly if errors: raise DriftException(errors) def callback(self, _): """Called before each instruction""" # Assert synchronization with concrete execution self._check_state() # Call callbacks associated to the current address cur_addr = self.jitter.pc if cur_addr in self.handler: self.handler[cur_addr](self) return True if cur_addr in self.instrumentation: self.instrumentation[cur_addr](self) # Handle current address self.handle(ExprInt(cur_addr, self.ir_arch.IRDst.size)) # Avoid memory issue in ExpressionSimplifier if len(self.symb.expr_simp.simplified_exprs) > 100000: self.symb.expr_simp.simplified_exprs.clear() # Get IR blocks if cur_addr in self.addr_to_cacheblocks: self.ir_arch.blocks.clear() self.ir_arch.blocks.update(self.addr_to_cacheblocks[cur_addr]) else: ## Reset cache structures self.ir_arch.blocks.clear() # = {} ## Update current state asm_block = self.mdis.dis_block(cur_addr) self.ir_arch.add_block(asm_block) self.addr_to_cacheblocks[cur_addr] = dict(self.ir_arch.blocks) # Emulate the current instruction self.symb.reset_modified() # Is the symbolic execution going (potentially) to jump on a lbl_gen? if len(self.ir_arch.blocks) == 1: next_addr = self.symb.emul_ir_blocks(cur_addr) else: # Emulation could stuck in generated IR blocks # But concrete execution callback is not enough precise to obtain # the full IR blocks path # -> Use a fully concrete execution to get back path # Update the concrete execution self._update_state_from_concrete_symb(self.symb_concrete) while True: next_addr_concrete = self.symb_concrete.emul_ir_block(cur_addr) self.symb.emul_ir_block(cur_addr) if not (expr_is_label(next_addr_concrete) and next_addr_concrete.name.offset is None): # Not a lbl_gen, exit break # Call handle with lbl_gen state self.handle(next_addr_concrete) cur_addr = next_addr_concrete # At this stage, symbolic engine is one instruction after the concrete # engine return True def _get_gpregs(self): """Return a dict of regs: value from the jitter This version use the regs associated to the attrib (!= cpu.get_gpreg()) """ out = {} regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] for reg in regs: if hasattr(self.jitter.cpu, reg.name): out[reg.name] = getattr(self.jitter.cpu, reg.name) return out def take_snapshot(self): """Return a snapshot of the current state (including jitter state)""" snapshot = { "mem": self.jitter.vm.get_all_memory(), "regs": self._get_gpregs(), "symb": self.symb.symbols.copy(), } return snapshot def restore_snapshot(self, snapshot, memory=True): """Restore a @snapshot taken with .take_snapshot @snapshot: .take_snapshot output @memory: (optional) if set, also restore the memory """ # Restore memory if memory: self.jitter.vm.reset_memory_page_pool() self.jitter.vm.reset_code_bloc_pool() for addr, metadata in snapshot["mem"].iteritems(): self.jitter.vm.add_memory_page(addr, metadata["access"], metadata["data"]) # Restore registers self.jitter.pc = snapshot["regs"][self.ir_arch.pc.name] for reg, value in snapshot["regs"].iteritems(): setattr(self.jitter.cpu, reg, value) # Reset intern elements self.jitter.vm.set_exception(0) self.jitter.cpu.set_exception(0) self.jitter.bs._atomic_mode = False # Reset symb exec for key, _ in self.symb.symbols.items(): del self.symb.symbols[key] for expr, value in snapshot["symb"].items(): self.symb.symbols[expr] = value def update_state(self, assignblk): """From this point, assume @assignblk in the symbolic execution @assignblk: AssignBlock/{dst -> src} """ for dst, src in assignblk.iteritems(): self.symb.apply_change(dst, src) def _update_state_from_concrete_symb(self, symbexec, cpu=True, mem=False): if mem: # Values will be retrieved from the concrete execution if they are # not present for symbol in symbexec.symbols.symbols_mem.copy(): del symbexec.symbols[symbol] if cpu: regs = self.ir_arch.arch.regs.attrib_to_regs[self.ir_arch.attrib] for reg in regs: if hasattr(self.jitter.cpu, reg.name): value = ExprInt(getattr(self.jitter.cpu, reg.name), size=reg.size) symbexec.symbols[reg] = value def update_state_from_concrete(self, cpu=True, mem=False): """Update the symbolic state with concrete values from the concrete engine @cpu: (optional) if set, update registers' value @mem: (optional) if set, update memory value /!\ all current states will be loss. This function is usually called when states are no more synchronized (at the beginning, returning from an unstubbed syscall, ...) """ self._update_state_from_concrete_symb(self.symb, cpu, mem) def eval_expr(self, expr): """Return the evaluation of @expr: @expr: Expr instance""" return self.symb.eval_expr(expr) @staticmethod def memory_to_expr(addr): """Translate an address to its corresponding symbolic ID (8bits) @addr: int""" return ExprId("MEM_0x%x" % int(addr), 8) def symbolize_memory(self, memory_range): """Register a range of memory addresses to symbolize @memory_range: object with support of __in__ operation (intervals, list, ...) """ self.symb.dse_memory_range = memory_range self.symb.dse_memory_to_expr = self.memory_to_expr