def symbolicExecution(self, ira, ircfg, address, state): symbolicEngine = SymbolicExecutionEngine(ira, state) nextTarget = symbolicEngine.run_block_at(ircfg, address) while not isinstance(nextTarget, ExprCond) and not isinstance( nextTarget, ExprMem): nextTarget = symbolicEngine.run_block_at(ircfg, nextTarget, step=False) return nextTarget, symbolicEngine.symbols
def main(file_path: Path, start_addr: int, oracle_path: Path) -> None: # symbol table loc_db = LocationDB() # open the binary for analysis container = Container.from_stream(open(file_path, 'rb'), loc_db) # cpu abstraction machine = Machine(container.arch) # init disassemble engine mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db) # initialize intermediate representation lifter = machine.lifter_model_call(mdis.loc_db) # disassemble the function at address asm_block = mdis.dis_block(start_addr) # lift to Miasm IR ira_cfg = lifter.new_ircfg() lifter.add_asmblock_to_ircfg(asm_block, ira_cfg) # init symbolic execution engine sb = SymbolicExecutionEngine(lifter) # symbolically execute basic block sb.run_block_at(ira_cfg, start_addr) # initialize simplifier simplifier = Simplifier(oracle_path) for k, v in sb.modified(): if v.is_int() or v.is_id() or v.is_loc(): continue print(f"before: {v}") simplified = simplifier.simplify(v) print(f"simplified: {simplified}") print("\n\n")
def get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state): referenced_blocks = [] for cfgnode in ircfg.nodes(): irblock = ircfg.get_block(cfgnode) if not irblock: print('[!] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) == 1: _next_addr = irblock.dst else: _symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init) _next_addr = _symbolic_engine.run_block_at( ircfg, get_address(ircfg.loc_db, cfgnode)) if _next_addr == None: continue _next_addr = expr_simp(_next_addr) if isinstance(_next_addr, ExprCond) and \ isinstance(_next_addr.cond, ExprOp) and \ _next_addr.cond.op == '==': args = _next_addr.cond while not isinstance(args.args[0], ExprId): if hasattr(args, 'args'): args = args.args[0] if not isinstance(args, ExprOp): break if hasattr(args, 'args') and \ args.args[0] in (state_register, symbols_init[state_register]) and \ args.args[1] == state: block = ircfg.get_block(cfgnode) if hasattr(block.dst.cond, 'op') and block.dst.cond.op in ('CC_S>'): dst = get_address(ircfg.loc_db, block.dst.src2.loc_key) next_block = ircfg.get_block(dst) dst = get_address(ircfg.loc_db, next_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, block.dst.src1.loc_key) referenced_block = ircfg.get_block(dst) referenced_blocks.append(referenced_block) return referenced_blocks
# set opaque predicate counter opaque_counter = 0 # dictionary of byte patches patches = {} # walk over all basic blocks for basic_block in asm_cfg.blocks: # get address of first basic block instruction address = basic_block.lines[0].offset # init symbolic execution engine sb = SymbolicExecutionEngine(ira) # symbolically execute basic block e = sb.run_block_at(ira_cfg, address) # skip if no conditional jump if not e.is_cond(): continue # cond ? src1 : src2 # check if opaque predicate -- jump if branch_cannot_be_taken(e, e.src1): print(f"opaque predicate at {hex(address)} (jump is never taken)") opaque_counter += 1 # get the jump instruction jump_instruction = basic_block.lines[-1]
def resolve_offsets(state_register, asmcfg, ircfg, ir_arch): patches = set() nodes_to_walk = list(ircfg.nodes()) symbols_init = dict() for i, r in enumerate(all_regs_ids): symbols_init[r] = all_regs_ids_init[i] expr_simp.enable_passes({ExprOp: [ignore_call_results]}) for node in nodes_to_walk: irblock = ircfg.get_block(node) if not irblock: print('[-] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) == 1: if irblock.assignblks[ 0].instr.name == "CMOVNZ" and irblock.assignblks[ 0].instr.args[0] == state_register: temp_reg1 = irblock.assignblks[0].instr.args[0] temp_reg2 = irblock.assignblks[0].instr.args[1] state1 = None state2 = None previous_block = ircfg.get_block(ircfg.predecessors(node)[0]) for line in previous_block.assignblks: if line.instr.name == 'MOV' and \ line.instr.args[0] in (temp_reg1, temp_reg2) and isinstance(line.instr.args[1], ExprInt): if line.instr.args[0] == state_register: state1 = line.instr.args[1] else: state2 = line.instr.args[1] if state1 and state2: break # compiler shenanigans. state missing is not initialised in current bblk. search function for it if not state1: state1 = scan_function_for_state(asmcfg, state_register, temp_reg1) elif not state2: state2 = scan_function_for_state(asmcfg, state_register, temp_reg2) blocks1 = get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state2) blocks2 = get_assignblock_for_state(ircfg, ir_arch, symbols_init, state_register, state1) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) elif irblock.assignblks[0].instr.name == "CMOVZ": state1 = None state2 = None temp_reg1 = irblock.assignblks[0].instr.args[0] temp_reg2 = irblock.assignblks[0].instr.args[1] if temp_reg1 == state_register: previous_block = ircfg.get_block( ircfg.predecessors(node)[0]) for line in previous_block.assignblks: if line.instr.name == 'MOV' and \ line.instr.args[0] in (temp_reg1, temp_reg2): if line.instr.args[0] == state_register: state1 = line.instr.args[1] else: state2 = line.instr.args[1] if state1 and state2: blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) else: found_state = state1 if state1 else state2 missing_state = state1 if not state1 else state2 subject_reg = temp_reg1 if not state1 else temp_reg2 def get_imm_write_for_reg(asmcfg, subject_reg): for node in asmcfg.nodes(): asmblock = asmcfg.loc_key_to_block(node) for line in asmblock.lines: if line.name == 'MOV' and line.args[0] == subject_reg and \ isinstance(line.args[1], ExprInt): return line.args[1] return None missing_state = get_imm_write_for_reg( asmcfg, subject_reg) if not missing_state: print( "[-] Something went wrong. could not find mising state!" ) continue state1 = state1 if state1 == found_state else missing_state state2 = missing_state if state1 == found_state else state2 blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) dst1 = get_address(ircfg.loc_db, blocks1[0].loc_key) src1 = irblock.assignblks[0].instr.offset patches.add((src1, dst1, CNDP1)) dst2 = get_address(ircfg.loc_db, blocks2[0].loc_key) src2 = src1 patches.add((src2, dst2, CNDP0)) else: next_block = ircfg.get_block(ircfg.successors(node)[0]) for line in next_block.assignblks: if line.instr.name == 'MOV' and line.instr.args[ 0] == state_register: state1 = line.instr.args[1] break if state1: blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) src = None for assignblk in next_block.assignblks: if assignblk.instr.name == 'JMP': src = assignblk.instr.offset dst_block = ircfg.get_block(blocks1[0].loc_key) if isinstance(dst_block.dst, ExprCond) and len( dst_block.assignblks): if hasattr(dst_block.dst.cond, 'op') and dst_block.dst.cond.op in ( 'CC_S>'): dst = get_address(ircfg.loc_db, dst_block.dst.src2.loc_key) next_block = ircfg.get_block(dst) dst = get_address(ircfg.loc_db, next_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, dst_block.dst.src1.loc_key) else: dst = get_address(ircfg.loc_db, blocks1[0].loc_key) patches.add((src, dst, STDP)) else: symbolic_engine = SymbolicExecutionEngine(ir_arch, symbols_init) next_addr = symbolic_engine.run_block_at( ircfg, get_address(ircfg.loc_db, node)) next_addr = expr_simp(next_addr) updated_state = symbolic_engine.symbols[state_register] if isinstance(updated_state, ExprOp): updated_state = expr_simp(updated_state) if updated_state != symbols_init[state_register] and \ isinstance(updated_state, ExprOp): irblock = ircfg.get_block(node) if not irblock: print('[-] Could not get IRBLOCK!') sys.exit() if len(irblock.assignblks) > 3: neg_inst = False for i in range(len(irblock.assignblks)): if irblock.assignblks[i].instr.name == 'NEG': neg_inst = True if irblock.assignblks[i].instr.name == 'SBB' and \ irblock.assignblks[i + 1].instr.name == 'AND' and \ irblock.assignblks[i + 2].instr.name == 'ADD': expr = symbolic_engine.symbols[ state_register].copy() if neg_inst: state1 = expr_simp( expr.replace_expr( {EAX_init: ExprInt(0, 32)})) state2 = expr_simp( expr.replace_expr( {EAX_init: ExprInt(1, 32)})) elif irblock.assignblks[i-1].instr.name == 'CMP' and \ irblock.assignblks[i-2].instr.name == 'ADD' and \ isinstance(irblock.assignblks[i-2].instr.args[1], ExprInt): id = irblock.assignblks[i - 2].instr.args[0] imm = irblock.assignblks[i - 2].instr.args[1] state1 = expr_simp( expr.replace_expr({ EAX_init: imm }).replace_expr({ symbolic_engine.symbols[id].args[0]: imm })) state2 = expr_simp( expr.replace_expr({ EAX_init: ExprInt(-1, 32) }).replace_expr({ symbolic_engine.symbols[id].args[0]: imm })) blocks1 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state1) blocks2 = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, state2) process_blocks_for_patches(node, blocks1, ircfg, patches, nodes_to_walk, state1, True) process_blocks_for_patches(node, blocks2, ircfg, patches, nodes_to_walk, state2, False) break elif updated_state != symbols_init[state_register] and \ isinstance(updated_state, ExprInt) and \ updated_state._get_int() > 0xff: #print("[*] Looking for state %s" % hex(updated_state._get_int())) referenced_blocks = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, updated_state) # for block in referenced_blocks: # print("\t[+] Found reference at %s" % hex(get_address(ircfg.loc_db, block.loc_key))) process_blocks_for_patches(node, referenced_blocks, ircfg, patches, nodes_to_walk) elif isinstance(next_addr, ExprCond): if not hasattr(next_addr.cond, 'args'): if isinstance(next_addr.src1, ExprLoc): dest1 = next_addr.src1.loc_key else: dest1 = get_loc_key_at(ircfg.loc_db, next_addr.src1._get_int()) if isinstance(next_addr.src2, ExprLoc): dest2 = next_addr.src2.loc_key else: dest2 = get_loc_key_at(ircfg.loc_db, next_addr.src2._get_int()) if dest1 not in nodes_to_walk: nodes_to_walk.append(dest1) if dest2 not in nodes_to_walk: nodes_to_walk.append(dest2) dst2block = ircfg.get_block(dest2) if dst2block.assignblks[0].instr.name == 'CMP' and \ dst2block.assignblks[0].instr.args[0] == state_register and \ len(ircfg.get_block(node).assignblks) > 1: ref_block = node while True: irblock = ircfg.get_block( ircfg.predecessors(ref_block)[0]) if irblock.assignblks[0].instr.name == 'CMP' and \ dst2block.assignblks[0].instr.args[0] == state_register: break ref_block = ircfg.predecessors(ref_block)[0] asmblock = asmcfg.loc_key_to_block(node) for line in asmblock.lines: if line.name == 'JZ': patches.add( (line.offset, get_address(asmcfg.loc_db, ref_block), CNDP2)) true_block = ircfg.get_block( ircfg.get_block(node).dst.src2.loc_key) symbolic_engine.run_block_at( ircfg, true_block.loc_key) if isinstance( symbolic_engine. symbols[state_register], ExprInt): referenced_block = get_assignblock_for_state( ircfg, ir_arch, symbols_init, state_register, symbolic_engine. symbols[state_register])[0] patches.add( (line.offset, get_address(ircfg.loc_db, referenced_block.loc_key), CNDP3)) break elif isinstance(next_addr, ExprInt): dest = get_loc_key_at(ircfg.loc_db, next_addr._get_int()) if dest not in nodes_to_walk: nodes_to_walk.append( get_loc_key_at(ircfg.loc_db, next_addr._get_int())) return list(patches)
#!/usr/bin/python3 from miasm.analysis.binary import Container from miasm.analysis.machine import Machine from miasm.core.locationdb import LocationDB from miasm.ir.symbexec import SymbolicExecutionEngine start_addr = 0x402300 loc_db = LocationDB() target_file = open("hello_world.exe", 'rb') container = Container.from_stream(target_file, loc_db) machine = Machine(container.arch) mdis = machine.dis_engine(container.bin_stream, loc_db=loc_db) ira = machine.ira(mdis.loc_db) asm_cfg = mdis.dis_multiblock(start_addr) ira_cfg = ira.new_ircfg_from_asmcfg(asm_cfg) symbex = SymbolicExecutionEngine(ira) symbex_state = symbex.run_block_at(ira_cfg, start_addr) print(symbex_state)