def miasm_dis(r2_op, r2_address, r2_buffer, r2_length): """Disassemble an instruction using miasm.""" # Cast radare2 variables rasmop = ffi.cast("RAsmOp_r2m2*", r2_op) opcode = ffi.cast("char*", r2_buffer) # Prepare the opcode opcode = ffi.unpack(opcode, r2_length) # Get the miasm2 machine machine = miasm_machine() if machine is None: return # Disassemble the opcode loc_db = LocationDB() try: mode = machine.dis_engine().attrib instr = machine.mn().dis(opcode, mode) instr.offset = r2_address if instr.dstflow(): # Remember ExprInt arguments sizes args_size = list() for i in range(len(instr.args)): if isinstance(instr.args[i], ExprInt): args_size.append(instr.args[i].size) else: args_size.append(None) # Adjust arguments values using the instruction offset instr.dstflow2label(loc_db) # Convert ExprLoc to ExprInt for i in range(len(instr.args)): if args_size[i] is None: continue if isinstance(instr.args[i], ExprLoc): addr = loc_db.get_location_offset(instr.args[i].loc_key) instr.args[i] = ExprInt(addr, args_size[i]) dis_str = str(instr) dis_len = instr.l except Exception: dis_str = "/!\ Can't disassemble using miasm /!\\" dis_len = 2 # GV: seems fischy ! # Remaining bytes buf_hex = opcode[0:dis_len].encode("hex") # Check buffer sizes if len(dis_str)-1 > 256: dis_str = "/!\ Disassembled instruction is too long /!\\" if len(buf_hex)-1 > 256: buf_hex = buf_hex[:255] # Fill the RAsmOp structure rasmop.size = dis_len set_rbuf(rasmop.buf_asm, dis_str) set_rbuf(rasmop.buf_hex, buf_hex)
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert (False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = ir_mepb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ ir for ir in iir if not (isinstance(ir, ExprAff) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst") ] # Prepare symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print "Expected:", results print "Modified:", [r for r in sb.modified(mems=False)] assert (False)
def check_instruction(mn_str, mn_hex, multi=None, offset=0): """Try to disassemble and assemble this instruction""" # Rename objdump registers names mn_str = re.sub("\$([0-9]+)", lambda m: "R"+m.group(1), mn_str) mn_str = mn_str.replace("$", "") # Disassemble mn = dis(mn_hex) mn.offset = offset if mn.dstflow(): # Remember ExprInt arguments sizes args_size = list() for i in range(len(mn.args)): if isinstance(mn.args[i], ExprInt): args_size.append(mn.args[i].size) else: args_size.append(None) # Adjust arguments values using the instruction offset loc_db = LocationDB() mn.dstflow2label(loc_db) # Convert ExprLoc to ExprInt for i in range(len(mn.args)): if args_size[i] is None: continue if isinstance(mn.args[i], ExprLoc): addr = loc_db.get_location_offset(mn.args[i].loc_key) mn.args[i] = ExprInt(addr, args_size[i]) print "dis: %s -> %s" % (mn_hex.rjust(20), str(mn).rjust(20)) assert(str(mn) == mn_str) # disassemble assertion # Assemble and return all possible candidates instr = mn_mep.fromstring(mn_str, "b") instr.offset = offset instr.mode = "b" if instr.offset: instr.fixDstOffset() asm_list = [i.encode("hex") for i in mn_mep.asm(instr)] # Check instructions variants if multi: print "Instructions count:", len(asm_list) assert(len(asm_list) == multi) # Ensure that variants correspond to the same disassembled instruction for mn_hex in asm_list: mn = dis(mn_hex) print "dis: %s -> %s" % (mn_hex.rjust(20), str(mn).rjust(20)) # Check the assembly result print "asm: %s -> %s" % (mn_str.rjust(20), ", ".join(asm_list).rjust(20)) assert(mn_hex in asm_list) # assemble assertion
def exec_instruction(mn_str, init_values, results, index=0, offset=0): """Symbolically execute an instruction and check the expected results.""" # Assemble and disassemble the instruction instr = mn_mep.fromstring(mn_str, "b") instr.mode = "b" mn_bin = mn_mep.asm(instr)[index] try: instr = mn_mep.dis(mn_bin, "b") except Disasm_Exception: assert(False) # miasm don't know what to do # Specify the instruction offset and compute the destination label instr.offset = offset loc_db = LocationDB() if instr.dstflow(): instr.dstflow2label(loc_db) # Get the IR im = ir_mepb(loc_db) iir, eiir = im.get_ir(instr) # Filter out IRDst iir = [ir for ir in iir if not (isinstance(ir, ExprAssign) and isinstance(ir.dst, ExprId) and ir.dst.name == "IRDst")] # Prepare symbolic execution sb = SymbolicExecutionEngine(ir_a_mepb(loc_db), regs_init) # Assign int values before symbolic evaluation for expr_id, expr_value in init_values: sb.symbols[expr_id] = expr_value # Execute the IR ab = AssignBlock(iir) sb.eval_updt_assignblk(ab) # Check if expected expr_id were modified matched_results = 0 for expr_id, expr_value in results: result = sb.eval_expr(expr_id) if isinstance(result, ExprLoc): addr = loc_db.get_location_offset(result.loc_key) if expr_value.arg == addr: matched_results += 1 continue elif result == expr_value: matched_results += 1 continue # Ensure that all expected results were verified if len(results) is not matched_results: print "Expected:", results print "Modified:", [r for r in sb.modified(mems=False)] assert(False)
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if '$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if '_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt( arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def check_instruction(mn_str, mn_hex, multi=None, offset=0): """Try to disassemble and assemble this instruction""" # Rename objdump registers names mn_str = re.sub("\$([0-9]+)", lambda m: "R" + m.group(1), mn_str) mn_str = mn_str.replace("$", "") # Disassemble mn = dis(mn_hex) mn.offset = offset if mn.dstflow(): # Remember ExprInt arguments sizes args_size = list() for i in range(len(mn.args)): if isinstance(mn.args[i], ExprInt): args_size.append(mn.args[i].size) else: args_size.append(None) # Adjust arguments values using the instruction offset loc_db = LocationDB() mn.dstflow2label(loc_db) # Convert ExprLoc to ExprInt for i in range(len(mn.args)): if args_size[i] is None: continue if isinstance(mn.args[i], ExprLoc): addr = loc_db.get_location_offset(mn.args[i].loc_key) mn.args[i] = ExprInt(addr, args_size[i]) print "dis: %s -> %s" % (mn_hex.rjust(20), str(mn).rjust(20)) assert (str(mn) == mn_str) # disassemble assertion # Assemble and return all possible candidates instr = mn_mep.fromstring(mn_str, "b") instr.offset = offset instr.mode = "b" if instr.offset: instr.fixDstOffset() asm_list = [i.encode("hex") for i in mn_mep.asm(instr)] # Check instructions variants if multi: print "Instructions count:", len(asm_list) assert (len(asm_list) == multi) # Ensure that variants correspond to the same disassembled instruction for mn_hex in asm_list: mn = dis(mn_hex) print "dis: %s -> %s" % (mn_hex.rjust(20), str(mn).rjust(20)) # Check the assembly result print "asm: %s -> %s" % (mn_str.rjust(20), ", ".join(asm_list).rjust(20)) assert (mn_hex in asm_list) # assemble assertion
def resolve_args_with_symbols(self, symbols=None): if symbols is None: symbols = LocationDB() args_out = [] for expr in self.args: # try to resolve symbols using symbols (0 for default value) loc_keys = m2_expr.get_expr_locs(expr) fixed_expr = {} for exprloc in loc_keys: loc_key = exprloc.loc_key names = symbols.get_location_names(loc_key) # special symbols if '$' in names: fixed_expr[exprloc] = self.get_asm_offset(exprloc) continue if '_' in names: fixed_expr[exprloc] = self.get_asm_next_offset(exprloc) continue arg_int = symbols.get_location_offset(loc_key) if arg_int is not None: fixed_expr[exprloc] = m2_expr.ExprInt(arg_int, exprloc.size) continue if not names: raise ValueError('Unresolved symbol: %r' % exprloc) offset = symbols.get_location_offset(loc_key) if offset is None: raise ValueError( 'The offset of loc_key "%s" cannot be determined' % names ) else: # Fix symbol with its offset size = exprloc.size if size is None: default_size = self.get_symbol_size(exprloc, symbols) size = default_size value = m2_expr.ExprInt(offset, size) fixed_expr[exprloc] = value expr = expr.replace_expr(fixed_expr) expr = expr_simp(expr) args_out.append(expr) return args_out
def miasm_anal(r2_op, r2_address, r2_buffer, r2_length): """Define an instruction behavior using miasm.""" # Return the cached result if any global LRU_CACHE result = LRU_CACHE.get(r2_address, None) if result is not None: result.fill_ranalop(r2_op) return # Cheap garbage collection if False and len(LRU_CACHE.keys()) >= 10: to_delete = [addr for addr in LRU_CACHE.keys() if addr < r2_address] for key in to_delete[:5]: del LRU_CACHE[key] # Cast radare2 variables opcode = ffi.cast("char*", r2_buffer) # Prepare the opcode opcode = ffi.unpack(opcode, r2_length) # Disassemble the opcode loc_db = LocationDB() try: machine = miasm_machine() mode = machine.dis_engine().attrib instr = machine.mn().dis(opcode, mode) instr.offset = r2_address if instr.dstflow(): # Adjust arguments values using the instruction offset instr.dstflow2label(loc_db) dis_len = instr.l except: # Can't do anything with an invalid instruction return result = CachedRAnalOp() result.mnemonic = instr.name result.size = dis_len result.type = R_ANAL_OP_TYPE_UNK result.eob = 0 # End Of Block # Convert miasm expressions to ESIL get_esil(result, instr, loc_db) # # # Architecture agnostic analysis # Instructions that *DO NOT* stop a basic bloc if instr.breakflow() is False: result.fill_ranalop(r2_op) LRU_CACHE[r2_address] = result return else: result.eob = 1 # End Of Block # Assume that an instruction starting with 'RET' is a return # Note: add it to miasm2 as getpc() ? if instr.name[:3].upper().startswith("RET"): result.type = R_ANAL_OP_TYPE_RET # Instructions that explicitly provide the destination if instr and instr.dstflow(): expr = instr.getdstflow(None)[0] if instr.is_subcall(): r2_anal_subcall(result, expr, loc_db) result.fill_ranalop(r2_op) LRU_CACHE[r2_address] = result return if result.type == R_ANAL_OP_TYPE_UNK and instr.splitflow(): r2_anal_splitflow(result, r2_address, instr, expr, loc_db) result.fill_ranalop(r2_op) LRU_CACHE[r2_address] = result return if isinstance(expr, ExprInt): result.type = R_ANAL_OP_TYPE_JMP result.jump = int(expr.arg) & 0xFFFFFFFFFFFFFFFF elif isinstance(expr, ExprId): result.type = R_ANAL_OP_TYPE_UJMP elif isinstance(expr, ExprLoc): addr = loc_db.get_location_offset(expr.loc_key) result.type = R_ANAL_OP_TYPE_JMP result.jump = addr & 0xFFFFFFFFFFFFFFFF elif isinstance(expr, ExprMem): result.type = R_ANAL_OP_TYPE_MJMP else: msg = "miasm_anal(): don't know what to do with: %s" % instr print >> sys.stderr, msg result.fill_ranalop(r2_op) LRU_CACHE[r2_address] = result
from miasm2.core.locationdb import LocationDB # Basic tests (LocationDB description) loc_db = LocationDB() loc_key1 = loc_db.add_location() loc_key2 = loc_db.add_location(offset=0x1234) loc_key3 = loc_db.add_location(name="first_name") loc_db.add_location_name(loc_key3, "second_name") loc_db.set_location_offset(loc_key3, 0x5678) loc_db.remove_location_name(loc_key3, "second_name") assert loc_db.get_location_offset(loc_key1) is None assert loc_db.get_location_offset(loc_key2) == 0x1234 assert loc_db.pretty_str(loc_key1) == str(loc_key1) assert loc_db.pretty_str(loc_key2) == "loc_1234" assert loc_db.pretty_str(loc_key3) == "first_name" loc_db.consistency_check() # Offset manipulation loc_key4 = loc_db.add_location() assert loc_db.get_location_offset(loc_key4) is None loc_db.set_location_offset(loc_key4, 0x1122) assert loc_db.get_location_offset(loc_key4) == 0x1122 loc_db.unset_location_offset(loc_key4) assert loc_db.get_location_offset(loc_key4) is None try: loc_db.set_location_offset(loc_key4, 0x1234) has_raised = False except KeyError:
from miasm2.core.locationdb import LocationDB # Basic tests (LocationDB description) loc_db = LocationDB() loc_key1 = loc_db.add_location() loc_key2 = loc_db.add_location(offset=0x1234) loc_key3 = loc_db.add_location(name="first_name") loc_db.add_location_name(loc_key3, "second_name") loc_db.set_location_offset(loc_key3, 0x5678) loc_db.remove_location_name(loc_key3, "second_name") assert loc_db.get_location_offset(loc_key1) is None assert loc_db.get_location_offset(loc_key2) == 0x1234 assert loc_db.pretty_str(loc_key1) == str(loc_key1) assert loc_db.pretty_str(loc_key2) == "loc_1234" assert loc_db.pretty_str(loc_key3) == "first_name" loc_db.consistency_check() # Offset manipulation loc_key4 = loc_db.add_location() assert loc_db.get_location_offset(loc_key4) is None loc_db.set_location_offset(loc_key4, 0x1122) assert loc_db.get_location_offset(loc_key4) == 0x1122 loc_db.unset_location_offset(loc_key4) assert loc_db.get_location_offset(loc_key4) is None try: loc_db.set_location_offset(loc_key4, 0x1234) has_raised = False except KeyError: has_raised = True
# Print and graph firsts blocks before patching it for block in asmcfg.blocks: print block open("graph.dot", "w").write(asmcfg.dot()) # Apply patches patches = asmblock.asm_resolve_final(machine.mn, asmcfg, loc_db, dst_interval) if args.encrypt: # Encrypt code loc_start = loc_db.get_or_create_name_location(args.encrypt[0]) loc_stop = loc_db.get_or_create_name_location(args.encrypt[1]) ad_start = loc_db.get_location_offset(loc_start) ad_stop = loc_db.get_location_offset(loc_stop) new_patches = dict(patches) for ad, val in patches.items(): if ad_start <= ad < ad_stop: new_patches[ad] = "".join([chr(ord(x) ^ 0x42) for x in val]) patches = new_patches print patches if isinstance(virt, StrPatchwork): for offset, raw in patches.items(): virt[offset] = raw else: for offset, raw in patches.items(): virt.set(offset, raw)