def parse(self, data, vm, addr): self._bin_stream = bin_stream_str(data, shift=addr) if vm is not None: vm.add_memory_page(addr, PAGE_READ, data) self._executable = None self._entry_point = 0
def load_asm(asm): """Transform a machine code into a block and symbolically execute it. Args: asm (string): machine code """ bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream) asm_block = mdis.dis_block(0) ira = machine.ira(mdis.symbol_pool) ira.add_block(asm_block) symbols_init = ira.arch.regs.regs_init symbolic = SymbolicExecutionEngine(ira, symbols_init) symbolic.run_block_at(0) return symbolic
def dis(cls, bs_o, mode_o=None, offset=0): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) bs_o.enter_atomic_mode() offset_o = offset try: pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) except: bs_o.leave_atomic_mode() raise candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) if not candidates: bs_o.leave_atomic_mode() raise Disasm_Exception('cannot disasm (guess) at %X' % offset) out = [] out_c = [] if hasattr(bs, 'getlen'): bs_l = bs.getlen() else: bs_l = len(bs) alias = False for c in candidates: log.debug("*" * 40, mode, c.mode) log.debug(c.fields) c = cls.all_mn_inst[c][0] c.reset_class() c.mode = mode if not c.add_pre_dis_info(pre_dis_info): continue todo = {} getok = True fname_values = dict(pre_dis_info) offset_b = offset * 8 total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: l = f.flen(mode, fname_values) else: l = f.l if l is not None: total_l += l f.l = l f.is_present = True log.debug("FIELD %s %s %s %s", f.__class__, f.fname, offset_b, l) if bs_l * 8 - offset_b < l: getok = False break bv = cls.getbits(bs, mode, offset_b, l) offset_b += l if not f.fname in fname_values: fname_values[f.fname] = bv todo[i] = bv else: f.is_present = False todo[i] = None if not getok: continue c.l = prefix_len + total_l / 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: ret = f.decode(todo[i]) if not ret: log.debug("cannot decode %r", f) break if not ret: continue for a in c.args: a.expr = expr_simp(a.expr) c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() if c is None: continue c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) instr.l = prefix_len + total_l / 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) if c.alias: alias = True out.append(instr) out_c.append(c) bs_o.leave_atomic_mode() if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: if not alias: log.warning('dis multiple args ret default') assert (len(out) == 2) for i, o in enumerate(out_c): if o.alias: return out[i] raise NotImplementedError('not fully functional') return out[0]
# Update instruction instance last_instr.name = 'PUSH' # Update next blocks to process in the disassembly engine cur_bloc.bto.clear() cur_bloc.add_cst(dst.name.offset, asm_constraint.c_next, symbol_pool) # Prepare a tiny shellcode shellcode = ''.join([ "\xe8\x00\x00\x00\x00", # CALL $ "X", # POP EAX "\xc3", # RET ]) bin_stream = bin_stream_str(shellcode) mdis = dis_x86_32(bin_stream) print "Without callback:\n" blocks = mdis.dis_multibloc(0) print "\n".join(str(block) for block in blocks) # Enable callback cb_x86_funcs.append(cb_x86_callpop) ## Other method: ## mdis.dis_bloc_callback = cb_x86_callpop # Clean disassembly cache mdis.job_done.clear() print "=" * 40
def dis(cls, bs_o, mode_o = None, offset=0): if not isinstance(bs_o, bin_stream): bs_o = bin_stream_str(bs_o) offset_o = offset pre_dis_info, bs, mode, offset, prefix_len = cls.pre_dis( bs_o, mode_o, offset) candidates = cls.guess_mnemo(bs, mode, pre_dis_info, offset) out = [] out_c = [] if hasattr(bs, 'getlen'): bs_l = bs.getlen() else: bs_l = len(bs) alias = False for c in candidates: log.debug("*" * 40, mode, c.mode) log.debug(c.fields) c = cls.all_mn_inst[c][0] c.reset_class() c.mode = mode if not c.add_pre_dis_info(pre_dis_info): continue args = [] todo = {} getok = True fname_values = dict(pre_dis_info) offset_b = offset * 8 total_l = 0 for i, f in enumerate(c.fields_order): if f.flen is not None: l = f.flen(mode, fname_values) else: l = f.l if l is not None: total_l += l f.l = l f.is_present = True log.debug("FIELD %s %s %s %s", f.__class__, f.fname, offset_b, l) if bs_l * 8 - offset_b < l: getok = False break bv = cls.getbits(bs, mode, offset_b, l) offset_b += l if not f.fname in fname_values: fname_values[f.fname] = bv todo[i] = bv else: f.is_present = False todo[i] = None if not getok: continue c.l = prefix_len + total_l / 8 for i in c.to_decode: f = c.fields_order[i] if f.is_present: ret = f.decode(todo[i]) if not ret: log.debug("cannot decode %r", f) break if not ret: continue for a in c.args: a.expr = expr_simp(a.expr) c.b = cls.getbytes(bs, offset_o, c.l) c.offset = offset_o c = c.post_dis() if c is None: continue c_args = [a.expr for a in c.args] instr = cls.instruction(c.name, mode, c_args, additional_info=c.additional_info()) instr.l = prefix_len + total_l / 8 instr.b = cls.getbytes(bs, offset_o, instr.l) instr.offset = offset_o instr.get_info(c) if c.alias: alias = True out.append(instr) out_c.append(c) if not out: raise Disasm_Exception('cannot disasm at %X' % offset_o) if len(out) != 1: if not alias: log.warning('dis multiple args ret default') assert(len(out) == 2) for i, o in enumerate(out_c): if o.alias: return out[i] raise NotImplementedError('not fully functional') return out[0]
states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif asmblock.expr_is_label(addr): addr = addr.name states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr == ret_addr: print 'Return address reached' continue else: raise ValueError("Unsupported destination") if __name__ == '__main__': translator_smt2 = Translator.to_language("smt2") data = open(args[0]).read() bs = bin_stream_str(data) mdis = dis_engine(bs) addr = int(options.address, 16) symbols_init = dict(machine.mn.regs.regs_init) # config parser for 32 bit reg_and_id = dict(machine.mn.regs.all_regs_ids_byname) def my_ast_int2expr(name): return ExprInt(name, 32) # Modifify parser to avoid label creation in PUSH argc def my_ast_id2expr(string_parsed):
print 'Return address reached' continue elif addr.is_int(): addr = int(addr.arg) states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) elif addr.is_loc(): states_todo.add((addr, symbexec.symbols.copy(), tuple(conds))) else: raise ValueError("Unsupported destination") if __name__ == '__main__': translator_smt2 = Translator.to_language("smt2") data = open(args[0]).read() bs = bin_stream_str(data) mdis = dis_engine(bs) addr = int(options.address, 16) ir_arch = machine.ir(mdis.loc_db) ircfg = ir_arch.new_ircfg() symbexec = SymbolicExecutionEngine(ir_arch) asmcfg, loc_db = parse_asm.parse_txt(machine.mn, 32, ''' init: PUSH argv PUSH argc PUSH ret_addr
# Minimalist Symbol Exec example from miasm2.core.bin_stream import bin_stream_str from miasm2.ir.symbexec import SymbolicExecutionEngine from miasm2.analysis.machine import Machine START_ADDR = 0 machine = Machine("x86_32") # Assemble and disassemble a MOV ## Ensure that attributes 'offset' and 'l' are set line = machine.mn.fromstring("MOV EAX, EBX", 32) asm = machine.mn.asm(line)[0] # Get back block bin_stream = bin_stream_str(asm) mdis = machine.dis_engine(bin_stream) asm_block = mdis.dis_bloc(START_ADDR) # Translate ASM -> IR ira = machine.ira(mdis.symbol_pool) ira.add_bloc(asm_block) # Instanciate a Symbolic Execution engine with default value for registers ## EAX = EAX_init, ... symbols_init = ira.arch.regs.regs_init symb = SymbolicExecutionEngine(ira, symbols_init) # Emulate one IR basic block ## Emulation of several basic blocks can be done through .emul_ir_blocks cur_addr = symb.emul_ir_block(START_ADDR)
elif b.startswith('\x7fELF'): try: e = elf_init.ELF(b) bs = bin_stream_elf(e.virt) default_addr = e.Ehdr.entry except: log.error('Cannot read ELF!') if bs is None or options.shiftoffset is not None: if options.shiftoffset is None: options.shiftoffset = "0" shift = int(options.shiftoffset, 16) log.warning('fallback to string input (offset=%s)' % hex(shift)) bs = bin_stream_str(b, shift=shift) log.info('ok') mdis = dis_engine(bs) # configure disasm engine mdis.dontdis_retcall = options.dontdis_retcall mdis.blocs_wd = options.bw mdis.dont_dis_nulstart_bloc = not options.dis_nulstart_bloc todo = [] addrs = [int(a, 16) for a in args[1:]] if len(addrs) == 0 and default_addr is not None: addrs.append(default_addr) for ad in addrs:
from miasm2.core.bin_stream import bin_stream_str from miasm2.analysis.machine import Machine from miasm2.jitter.csts import PAGE_READ, PAGE_WRITE # Binary path and offset of the target function offset = 0x660 fname = "out" # Get Miasm's binary stream bin_file = open(fname).read() bin_stream = bin_stream_str(bin_file) # Disassembling target func at offset machine = Machine('x86_64') # link the disasm engine to the bin_stream mdis = machine.dis_engine(bin_stream) # Disassemble basic blocks blocs = mdis.dis_multibloc(offset) #for b in blocs: # print b # Initializing the Jit engine with a stack jitter = machine.jitter(jit_type='python') jitter.init_stack() # Add the shellcode in an arbitrary memory location run_addr = 0x400000 jitter.vm.add_memory_page(run_addr, PAGE_READ | PAGE_WRITE, bin_file)
return # Update instruction instance last_instr.name = 'PUSH' # Update next blocks to process in the disassembly engine cur_bloc.bto.clear() cur_bloc.add_cst(loc_key, AsmConstraint.c_next) # Prepare a tiny shellcode shellcode = ''.join(["\xe8\x00\x00\x00\x00", # CALL $ "X", # POP EAX "\xc3", # RET ]) bin_stream = bin_stream_str(shellcode) mdis = dis_x86_32(bin_stream) print "Without callback:\n" asmcfg = mdis.dis_multiblock(0) print "\n".join(str(block) for block in asmcfg.blocks) # Enable callback cb_x86_funcs.append(cb_x86_callpop) ## Other method: ## mdis.dis_block_callback = cb_x86_callpop print "=" * 40 print "With callback:\n" asmcfg_after = mdis.dis_multiblock(0) print "\n".join(str(block) for block in asmcfg_after.blocks)