def test_max_bytes(): data = bytes.fromhex('909090909090c3') arch = archinfo.ArchX86() nose.tools.assert_equal(lift(data, 0x1000, arch, max_bytes=None).size, len(data)) nose.tools.assert_equal(lift(data, 0x1000, arch, max_bytes=len(data) - 1).size, len(data) - 1) nose.tools.assert_equal(lift(data, 0x1000, arch, max_bytes=len(data) + 1).size, len(data)) data2 = ffi.from_buffer(data) nose.tools.assert_raises(PyVEXError, lift, data2, 0x1000, arch) nose.tools.assert_equal(lift(data2, 0x1000, arch, max_bytes=len(data)).size, len(data)) nose.tools.assert_equal(lift(data2, 0x1000, arch, max_bytes=len(data) - 1).size, len(data) - 1)
def test_max_bytes(self): data = bytes.fromhex("909090909090c3") arch = archinfo.ArchX86() assert lift(data, 0x1000, arch, max_bytes=None).size == len(data) assert lift(data, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1 assert lift(data, 0x1000, arch, max_bytes=len(data) + 1).size == len(data) data2 = ffi.from_buffer(data) self.assertRaises(PyVEXError, lift, data2, 0x1000, arch) assert lift(data2, 0x1000, arch, max_bytes=len(data)).size == len(data) assert lift(data2, 0x1000, arch, max_bytes=len(data) - 1).size == len(data) - 1
def test_ppc_rel24_relocation(): """ Test R_PPC_REL24 relocations on a PowerPC object file. :return: """ l, relocations, ppc_backend = setup() # Verify that a faulty branch-and-link instruction operates correctly. # Expected bytes: 4b ff ff 05 byte_value = l.memory.load(0x414838, 4) nose.tools.assert_equal(byte_value, b'K\xff\xff\x05') # Verify that the symbol in the bl instruction above is correct. goodG2B1Source = l.find_symbol("goodG2B1Source") nose.tools.assert_equal(goodG2B1Source.relative_addr, 83772) # Verify relocated symbol exists in addition to its calculated value. found_symbol = False for r in relocations: if r.symbol.name == "_Znwj" and r.__class__ == ppc_backend.R_PPC_REL24: found_symbol = True irsb = pyvex.lift(struct.pack('>I', r.value), r.rebased_addr, r.arch) nose.tools.assert_equal(irsb.constant_jump_targets, {r.symbol.resolvedby.rebased_addr}) break nose.tools.assert_equal(found_symbol, True)
def test_s390x_lochi(): arch = archinfo.ArchS390X() irsb = pyvex.lift(b"\xec\x18\xab\xcd\x00\x42", 0x400400, arch) # lochi %r1,0xabcd,8 irsb_str = str(irsb) assert "s390_calculate_cond(0x0000000000000008" in irsb_str assert "PUT(r1_32) = 0xffffabcd" in irsb_str assert irsb.jumpkind in "Ijk_Boring"
def test_max_bytes(): data = bytes.fromhex('909090909090c3') arch = archinfo.ArchX86() nose.tools.assert_equal( lift(data, 0x1000, arch, max_bytes=None).size, len(data)) nose.tools.assert_equal( lift(data, 0x1000, arch, max_bytes=len(data) - 1).size, len(data) - 1) nose.tools.assert_equal( lift(data, 0x1000, arch, max_bytes=len(data) + 1).size, len(data)) data2 = ffi.from_buffer(data) nose.tools.assert_raises(PyVEXError, lift, data2, 0x1000, arch) nose.tools.assert_equal( lift(data2, 0x1000, arch, max_bytes=len(data)).size, len(data)) nose.tools.assert_equal( lift(data2, 0x1000, arch, max_bytes=len(data) - 1).size, len(data) - 1)
def test_s390x_vl(): arch = archinfo.ArchS390X() irsb = pyvex.lift(b"\xe7\x40\x90\xa8\x00\x06", 0x11C6C9E, arch) # vl %v4, 0xa8(%r9) irsb_str = str(irsb) assert "GET:I64(r9)" in irsb_str assert "Add64(0x00000000000000a8" in irsb_str assert "LDbe:V128" in irsb_str assert "PUT(v4) =" in irsb_str assert irsb.jumpkind == "Ijk_Boring"
def test_s390x_lochi(): arch = archinfo.ArchS390X() irsb = pyvex.lift( b'\xec\x18\xab\xcd\x00\x42', # lochi %r1,0xabcd,8 0x400400, arch) irsb_str = str(irsb) nose.tools.assert_in('s390_calculate_cond(0x0000000000000008', irsb_str) nose.tools.assert_in('PUT(r1_32) = 0xffffabcd', irsb_str) nose.tools.assert_equal(irsb.jumpkind, 'Ijk_Boring')
def test_s390x_vl(): arch = archinfo.ArchS390X() irsb = pyvex.lift( b'\xe7\x40\x90\xa8\x00\x06', # vl %v4, 0xa8(%r9) 0x11c6c9e, arch) irsb_str = str(irsb) nose.tools.assert_in('GET:I64(r9)', irsb_str) nose.tools.assert_in('Add64(0x00000000000000a8', irsb_str) nose.tools.assert_in('LDbe:V128', irsb_str) nose.tools.assert_in('PUT(v4) =', irsb_str) nose.tools.assert_equal(irsb.jumpkind, 'Ijk_Boring')
def test_s390x_exrl(): arch = archinfo.ArchS390X() irsb = pyvex.lift( b'\xc6\x10\x00\x00\x00\x04' # exrl %r1,0x400408 b'\x07\xfe' # br %r14 b'\xd7\x00\x20\x00\x30\x00' # xc 0(0,%r2),0(%r3) b'\x7d\xa7', # padding 0x400400, arch) irsb_str = str(irsb) nose.tools.assert_in('0xd700200030007da7', irsb_str) nose.tools.assert_in('s390x_dirtyhelper_EX', irsb_str) nose.tools.assert_in('{ PUT(ia) = 0x400400; Ijk_Boring }', irsb_str) nose.tools.assert_in('------ IMark(0x400406, 2, 0) ------', irsb_str) nose.tools.assert_equal(irsb.jumpkind, 'Ijk_Ret')
def test_s390x_exrl(): arch = archinfo.ArchS390X() irsb = pyvex.lift( b"\xc6\x10\x00\x00\x00\x04" # exrl %r1,0x400408 b"\x07\xfe" # br %r14 b"\xd7\x00\x20\x00\x30\x00" # xc 0(0,%r2),0(%r3) b"\x7d\xa7", # padding 0x400400, arch, ) irsb_str = str(irsb) # check last_execute_target, only top 6 bytes are relevant assert "0xd700200030000000" in irsb_str assert "s390x_dirtyhelper_EX" in irsb_str assert "{ PUT(ia) = 0x400400; Ijk_Boring }" in irsb_str assert "------ IMark(0x400406, 2, 0) ------" in irsb_str assert irsb.jumpkind == "Ijk_Ret"
def test_thumb_object(): """ Test for an object file I ripped out of an ARM firmware HAL. Uses some nasty relocs :return: """ path = os.path.join(test_location, "armel", "i2c_api.o") l = cle.Loader(path, rebase_granularity=0x1000) for r in l.main_object.relocs: if r.__class__ == cle.backends.elf.relocation.arm.R_ARM_THM_JUMP24: if r.symbol.name == 'HAL_I2C_ER_IRQHandler': irsb = pyvex.lift(struct.pack('<I', r.value), r.rebased_addr + 1, l.main_object.arch, bytes_offset=1) assert_equal(irsb.default_exit_target, r.resolvedby.rebased_addr) break else: assert False, "Could not find JUMP24 relocation for HAL_I2C_ER_IRQHandler"
def getRegInOut(ctx, address, inslen=15, arch=None, oneins=True, ignorereg=[]): b = ctx.getMemVal(address, inslen) if arch is None: arch = archinfo.ArchAMD64() irsb = pyvex.lift(b, address, arch) out = [[], []] # [[regin][regout]] for s in irsb.statements[1:]: if isinstance(s, pyvex.IRStmt.IMark): if oneins: #bad inslen, grab again, otherwise we miss things #unless we really have the full block return getRegInOut(ctx, address, s.addr - address, arch, oneins, ignorereg) elif isinstance(s, pyvex.IRStmt.Put): roff = s.offset rsz = s.data.result_size(irsb.tyenv) // 8 for i in range(rsz): r = roff + i if r not in ignorereg: out[1].append(r) for e in s.expressions: if isinstance(e, pyvex.IRStmt.Get): roff = e.offset rsz = e.result_size(irsb.tyenv) // 8 for i in range(rsz): r = roff + i if r not in ignorereg: out[0].append(r) # if there is an option for changing RIP here, we need to report RIP as an output #if not isinstance(irsb.next, pyvex.expr.Const): # out[1].append("rip") return out
def lift(self, addr=None, clemory=None, insn_bytes=None, size=None, arch=None, **kwargs): if addr is None: raise ValueError("addr must be specified.") if insn_bytes is None: if clemory is None: raise ValueError( "clemory must be specified if insn_bytes is None.") insn_bytes = clemory.load(addr, size) else: size = len(insn_bytes) if arch is None: arch = archinfo.arch_from_id('bf') irsb = pyvex.lift(insn_bytes, addr, arch, max_bytes=size) return irsb
def lift(self, state=None, clemory=None, insn_bytes=None, arch=None, addr=None, size=None, num_inst=None, traceflags=0, thumb=False, opt_level=None, strict_block_end=None, skip_stmts=False, collect_data_refs=False): """ Lift an IRSB. There are many possible valid sets of parameters. You at the very least must pass some source of data, some source of an architecture, and some source of an address. Sources of data in order of priority: insn_bytes, clemory, state Sources of an address, in order of priority: addr, state Sources of an architecture, in order of priority: arch, clemory, state :param state: A state to use as a data source. :param clemory: A cle.memory.Clemory object to use as a data source. :param addr: The address at which to start the block. :param thumb: Whether the block should be lifted in ARM's THUMB mode. :param opt_level: The VEX optimization level to use. The final IR optimization level is determined by (ordered by priority): - Argument opt_level - opt_level is set to 1 if OPTIMIZE_IR exists in state options - self._default_opt_level :param insn_bytes: A string of bytes to use as a data source. :param size: The maximum size of the block, in bytes. :param num_inst: The maximum number of instructions. :param traceflags: traceflags to be passed to VEX. (default: 0) :param strict_block_end: Whether to force blocks to end at all conditional branches (default: false) """ # phase 0: sanity check if not state and not clemory and not insn_bytes: raise ValueError("Must provide state or clemory or insn_bytes!") if not state and not clemory and not arch: raise ValueError("Must provide state or clemory or arch!") if addr is None and not state: raise ValueError("Must provide state or addr!") if arch is None: arch = clemory._arch if clemory else state.arch if arch.name.startswith("MIPS") and self._single_step: l.error("Cannot specify single-stepping on MIPS.") self._single_step = False # phase 1: parameter defaults if addr is None: addr = state.solver.eval(state._ip) if size is not None: size = min(size, VEX_IRSB_MAX_SIZE) if size is None: size = VEX_IRSB_MAX_SIZE if num_inst is not None: num_inst = min(num_inst, VEX_IRSB_MAX_INST) if num_inst is None and self._single_step: num_inst = 1 if opt_level is None: if state and o.OPTIMIZE_IR in state.options: opt_level = 1 else: opt_level = self._default_opt_level if strict_block_end is None: strict_block_end = self.default_strict_block_end if self._support_selfmodifying_code: if opt_level > 0: if once('vex-engine-smc-opt-warning'): l.warning("Self-modifying code is not always correctly optimized by PyVEX. " "To guarantee correctness, VEX optimizations have been disabled.") opt_level = 0 if state and o.OPTIMIZE_IR in state.options: state.options.remove(o.OPTIMIZE_IR) if skip_stmts is not True: skip_stmts = False use_cache = self._use_cache if skip_stmts or collect_data_refs: # Do not cache the blocks if skip_stmts or collect_data_refs are enabled use_cache = False # phase 2: thumb normalization thumb = int(thumb) if isinstance(arch, ArchARM): if addr % 2 == 1: thumb = 1 if thumb: addr &= ~1 elif thumb: l.error("thumb=True passed on non-arm architecture!") thumb = 0 # phase 3: check cache cache_key = None if use_cache: cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level, strict_block_end) if cache_key in self._block_cache: self._block_cache_hits += 1 irsb = self._block_cache[cache_key] stop_point = self._first_stoppoint(irsb) if stop_point is None: return irsb else: size = stop_point - addr # check the cache again cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level, strict_block_end) if cache_key in self._block_cache: self._block_cache_hits += 1 return self._block_cache[cache_key] else: self._block_cache_misses += 1 else: # a special case: `size` is used as the maximum allowed size tmp_cache_key = (addr, insn_bytes, VEX_IRSB_MAX_SIZE, num_inst, thumb, opt_level, strict_block_end) try: irsb = self._block_cache[tmp_cache_key] if irsb.size <= size: self._block_cache_hits += 1 return self._block_cache[tmp_cache_key] except KeyError: self._block_cache_misses += 1 # phase 4: get bytes if insn_bytes is not None: buff, size = insn_bytes, len(insn_bytes) else: buff, size = self._load_bytes(addr, size, state, clemory) if not buff or size == 0: raise SimEngineError("No bytes in memory for block starting at %#x." % addr) # phase 5: call into pyvex # l.debug("Creating pyvex.IRSB of arch %s at %#x", arch.name, addr) try: for subphase in range(2): irsb = pyvex.lift(buff, addr + thumb, arch, max_bytes=size, max_inst=num_inst, bytes_offset=thumb, traceflags=traceflags, opt_level=opt_level, strict_block_end=strict_block_end, skip_stmts=skip_stmts, collect_data_refs=collect_data_refs, ) if subphase == 0 and irsb.statements is not None: # check for possible stop points stop_point = self._first_stoppoint(irsb) if stop_point is not None: size = stop_point - addr continue if use_cache: self._block_cache[cache_key] = irsb return irsb # phase x: error handling except pyvex.PyVEXError as e: l.debug("VEX translation error at %#x", addr) if isinstance(buff, bytes): l.debug('Using bytes: %r', buff) else: l.debug("Using bytes: %r", pyvex.ffi.buffer(buff, size)) raise SimTranslationError("Unable to translate bytecode") from e
import pyvex import archinfo code = '558bec8b45083b450c7e078b4508eb05eb038b450c5dc3' code = str(bytearray.fromhex(code)) offset = 0x401450 while True: irsb = pyvex.lift(code, offset, archinfo.ArchX86()) print type(irsb) irsb.pp() next_address = irsb.next.con.value # print(type(irsb.next.con.value)) code = code[(next_address - offset):] offset = next_address # print irsb.next # print irsb.jumpkind # print('====================') # print(type(irsb.next)) # irsb.next.pp() # irsb = irsb.next # for stmt in irsb.statements: # stmt.pp()
G.add_edge(reg_node_id, tmpvar_node_id) #print(stmt.data) print("-" * 20 + "\n") #pos = nx.circular_layout(G) #nx.draw_circular(G, node_size=200, with_labels=False) #nx.draw_networkx_labels(G, pos, labels, font_size=9) print(nx.nx_agraph.to_agraph(G)) #plt.savefig("graph.png", format="PNG") return G r2 = r2pipe.open("./linear") r2.cmd('aaaa') r2.cmd('s sym._baz') fn_info = r2.cmdj('afij')[0] fn_machinecode = r2.cmd('pcs ' + str(fn_info['size'])).strip() fn_machinecode = binascii.unhexlify( fn_machinecode.replace('\\', '').replace('x', '')[1:-1]) hexdump(fn_machinecode) base_addr = fn_info['offset'] irsb = pyvex.lift(fn_machinecode, base_addr, arch) G = ir_build_graph(irsb)
def lift(blob, base, arch): return pyvex.lift(blob, base, arch, collect_data_refs=True, opt_level=OPT_LEVEL_NO_OPTIMIZATION)
def convert(self, instr, code=None): if code == None: print(instr["bytes"]) code = unhexlify(instr["bytes"]) print(instr["esil"]) if all([x == 0 for x in code]): print("[!] failed to assemble instruction") return self.irsb = lift(code, self.vex_addr, self.arch_class) self.irsb.pp() self.exprs = [] self.stacklen = 0 self.temp_to_stack = {} self.temp_to_exprs = {} self.skip_next = False for ind, statement in enumerate(self.irsb.statements): if self.skip_next: self.skip_next = False continue #print(type(statement)) #print(dir(statement)) #print(dir(statement.data)) stmt_type = type(statement) next_stmt = None if len(self.irsb.statements) > ind + 1: next_stmt = self.irsb.statements[ind + 1] if stmt_type == WrTmp: #print(dir(statement.data)) # look ahead to see if the stmt is a reg get # and the next stmt is a conv if self.do_lookahead: if type(statement.data) in (Get, GetI): src, size = self.offset_to_reg(statement.data, True) conv_op = "%dto" % (size * 8) if type(next_stmt) == Unop and type( next_stmt.data ) in self.ops and conv_op in next_stmt.data.op: to_size = next_stmt.data.op[4 + len(conv_op):] if to_size.isdigit(): new_size = int(to_size) // 8 new_offset = statement.data.offset if (new_offset, new_size ) in self.arch_class.register_size_names: new_exprs = [ self.arch_class.register_size_names[( new_offset, new_size)] ] self.temp_to_exprs[ next_stmt.tmp] = new_exprs self.skip_next = True continue elif type(next_stmt) in (Put, PutI): dst, size = self.offset_to_reg(next_stmt) conv_op = "to%d" % (size * 8) if type(statement.data ) in self.ops and conv_op in statement.data.op: to_size = statement.data.op[4:statement.data.op. index(conv_op)][:2] if to_size[0] == "8": to_size = "8" if to_size.isdigit(): new_size = int(to_size) // 8 new_offset = next_stmt.offset if (new_offset, new_size ) in self.arch_class.register_size_names: new_dst = self.arch_class.register_size_names[ (new_offset, new_size)] self.exprs += self.temp_to_exprs[ statement.data.args[0].tmp] + [ new_dst, "=" ] self.skip_next = True continue new_exprs = self.data_to_esil(statement.data) self.temp_to_exprs[statement.tmp] = new_exprs elif stmt_type in (Put, PutI): dst, size = self.offset_to_reg(statement) if "cc_" not in dst: # skip flags for now self.exprs += self.data_to_esil(statement.data, dst=dst) elif stmt_type in (Store, StoreG): size = int(statement.data.result_size(self.irsb.tyenv) / 8) self.exprs += self.data_to_esil(statement.data) self.exprs += self.temp_to_exprs[statement.addr.tmp] self.exprs += ["=[%d]" % size] elif stmt_type == Exit: pass #print(self.exprs) esilex = ",".join(self.exprs) #esilchecker = ESILCheck(self.arch, bits=self.bits) #esilchecker.check(code=code, check_flags=False) #esilchecker.check(code=code, esil=esilex, check_flags=False) #print(esilex) return esilex
def getVEXIROfFunction(self, funcName): fn_info = self.getFunctionInfo(funcName) fn_machinecode = self.getBinaryChunkOfFunction(funcName) return pyvex.lift(fn_machinecode, fn_info['offset'], self.arch)
def lift_vex(self, addr=None, state=None, clemory=None, insn_bytes=None, offset=None, arch=None, size=None, num_inst=None, traceflags=0, thumb=False, extra_stop_points=None, opt_level=None, strict_block_end=None, skip_stmts=False, collect_data_refs=False, cross_insn_opt=None, load_from_ro_regions=False): """ Lift an IRSB. There are many possible valid sets of parameters. You at the very least must pass some source of data, some source of an architecture, and some source of an address. Sources of data in order of priority: insn_bytes, clemory, state Sources of an address, in order of priority: addr, state Sources of an architecture, in order of priority: arch, clemory, state :param state: A state to use as a data source. :param clemory: A cle.memory.Clemory object to use as a data source. :param addr: The address at which to start the block. :param thumb: Whether the block should be lifted in ARM's THUMB mode. :param opt_level: The VEX optimization level to use. The final IR optimization level is determined by (ordered by priority): - Argument opt_level - opt_level is set to 1 if OPTIMIZE_IR exists in state options - self._default_opt_level :param insn_bytes: A string of bytes to use as a data source. :param offset: If using insn_bytes, the number of bytes in it to skip over. :param size: The maximum size of the block, in bytes. :param num_inst: The maximum number of instructions. :param traceflags: traceflags to be passed to VEX. (default: 0) :param strict_block_end: Whether to force blocks to end at all conditional branches (default: false) """ # phase 0: sanity check if not state and not clemory and not insn_bytes: raise ValueError("Must provide state or clemory or insn_bytes!") if not state and not clemory and not arch: raise ValueError("Must provide state or clemory or arch!") if addr is None and not state: raise ValueError("Must provide state or addr!") if arch is None: arch = clemory._arch if clemory else state.arch if arch.name.startswith("MIPS") and self._single_step: l.error("Cannot specify single-stepping on MIPS.") self._single_step = False # phase 1: parameter defaults if addr is None: addr = state.solver.eval(state._ip) if size is not None: size = min(size, VEX_IRSB_MAX_SIZE) if size is None: size = VEX_IRSB_MAX_SIZE if num_inst is not None: num_inst = min(num_inst, VEX_IRSB_MAX_INST) if num_inst is None and self._single_step: num_inst = 1 if opt_level is None: if state and o.OPTIMIZE_IR in state.options: opt_level = 1 else: opt_level = self._default_opt_level if cross_insn_opt is None: if state and o.NO_CROSS_INSN_OPT in state.options: cross_insn_opt = False else: cross_insn_opt = True if strict_block_end is None: strict_block_end = self.default_strict_block_end if self._support_selfmodifying_code: if opt_level > 0: if once('vex-engine-smc-opt-warning'): l.warning( "Self-modifying code is not always correctly optimized by PyVEX. " "To guarantee correctness, VEX optimizations have been disabled." ) opt_level = 0 if state and o.OPTIMIZE_IR in state.options: state.options.remove(o.OPTIMIZE_IR) if skip_stmts is not True: skip_stmts = False if offset is None: offset = 0 use_cache = self._use_cache if skip_stmts or collect_data_refs: # Do not cache the blocks if skip_stmts or collect_data_refs are enabled use_cache = False # phase 2: thumb normalization thumb = int(thumb) if isinstance(arch, ArchARM): if addr % 2 == 1: thumb = 1 if thumb: addr &= ~1 elif thumb: l.error("thumb=True passed on non-arm architecture!") thumb = 0 # phase 3: check cache cache_key = None if use_cache: cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level, strict_block_end, cross_insn_opt) if cache_key in self._block_cache: self._block_cache_hits += 1 l.debug("Cache hit IRSB of %s at %#x", arch, addr) irsb = self._block_cache[cache_key] stop_point = self._first_stoppoint(irsb, extra_stop_points) if stop_point is None: return irsb else: size = stop_point - addr # check the cache again cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level, strict_block_end, cross_insn_opt) if cache_key in self._block_cache: self._block_cache_hits += 1 return self._block_cache[cache_key] else: self._block_cache_misses += 1 else: # a special case: `size` is used as the maximum allowed size tmp_cache_key = (addr, insn_bytes, VEX_IRSB_MAX_SIZE, num_inst, thumb, opt_level, strict_block_end, cross_insn_opt) try: irsb = self._block_cache[tmp_cache_key] if irsb.size <= size: self._block_cache_hits += 1 return self._block_cache[tmp_cache_key] except KeyError: self._block_cache_misses += 1 # vex_lift breakpoints only triggered when the cache isn't used buff = NO_OVERRIDE if state: state._inspect('vex_lift', BP_BEFORE, vex_lift_addr=addr, vex_lift_size=size, vex_lift_buff=NO_OVERRIDE) buff = state._inspect_getattr("vex_lift_buff", NO_OVERRIDE) addr = state._inspect_getattr("vex_lift_addr", addr) size = state._inspect_getattr("vex_lift_size", size) # phase 4: get bytes if buff is NO_OVERRIDE: if insn_bytes is not None: buff, size = insn_bytes, len(insn_bytes) # offset stays unchanged else: buff, size, offset = self._load_bytes(addr, size, state, clemory) if isinstance(buff, claripy.ast.BV): # pylint:disable=isinstance-second-argument-not-valid-type if len(buff) == 0: raise SimEngineError( "No bytes in memory for block starting at %#x." % addr) elif not buff: raise SimEngineError( "No bytes in memory for block starting at %#x." % addr) # phase 5: call into pyvex l.debug("Creating IRSB of %s at %#x", arch, addr) try: for subphase in range(2): irsb = pyvex.lift(buff, addr + thumb, arch, max_bytes=size, max_inst=num_inst, bytes_offset=offset + thumb, traceflags=traceflags, opt_level=opt_level, strict_block_end=strict_block_end, skip_stmts=skip_stmts, collect_data_refs=collect_data_refs, load_from_ro_regions=load_from_ro_regions, cross_insn_opt=cross_insn_opt) if subphase == 0 and irsb.statements is not None: # check for possible stop points stop_point = self._first_stoppoint(irsb, extra_stop_points) if stop_point is not None: size = stop_point - addr continue if use_cache: self._block_cache[cache_key] = irsb if state: state._inspect('vex_lift', BP_AFTER, vex_lift_addr=addr, vex_lift_size=size) return irsb # phase x: error handling except pyvex.PyVEXError as e: l.debug("VEX translation error at %#x", addr) if isinstance(buff, bytes): l.debug('Using bytes: %r', buff) else: l.debug("Using bytes: %r", pyvex.ffi.buffer(buff, size)) raise SimTranslationError("Unable to translate bytecode") from e
def lift(self, state=None, clemory=None, insn_bytes=None, arch=None, addr=None, size=None, num_inst=None, traceflags=0, thumb=False, opt_level=None, strict_block_end=None, skip_stmts=False, collect_data_refs=False): """ Lift an IRSB. There are many possible valid sets of parameters. You at the very least must pass some source of data, some source of an architecture, and some source of an address. Sources of data in order of priority: insn_bytes, clemory, state Sources of an address, in order of priority: addr, state Sources of an architecture, in order of priority: arch, clemory, state :param state: A state to use as a data source. :param clemory: A cle.memory.Clemory object to use as a data source. :param addr: The address at which to start the block. :param thumb: Whether the block should be lifted in ARM's THUMB mode. :param opt_level: The VEX optimization level to use. The final IR optimization level is determined by (ordered by priority): - Argument opt_level - opt_level is set to 1 if OPTIMIZE_IR exists in state options - self._default_opt_level :param insn_bytes: A string of bytes to use as a data source. :param size: The maximum size of the block, in bytes. :param num_inst: The maximum number of instructions. :param traceflags: traceflags to be passed to VEX. (default: 0) :param strict_block_end: Whether to force blocks to end at all conditional branches (default: false) """ # phase 0: sanity check if not state and not clemory and not insn_bytes: raise ValueError("Must provide state or clemory or insn_bytes!") if not state and not clemory and not arch: raise ValueError("Must provide state or clemory or arch!") if addr is None and not state: raise ValueError("Must provide state or addr!") if arch is None: arch = clemory._arch if clemory else state.arch if arch.name.startswith("MIPS") and self._single_step: l.error("Cannot specify single-stepping on MIPS.") self._single_step = False # phase 1: parameter defaults if addr is None: addr = state.se.eval(state._ip) if size is not None: size = min(size, VEX_IRSB_MAX_SIZE) if size is None: size = VEX_IRSB_MAX_SIZE if num_inst is not None: num_inst = min(num_inst, VEX_IRSB_MAX_INST) if num_inst is None and self._single_step: num_inst = 1 if opt_level is None: if state and o.OPTIMIZE_IR in state.options: opt_level = 1 else: opt_level = self._default_opt_level if strict_block_end is None: strict_block_end = self.default_strict_block_end if self._support_selfmodifying_code: if opt_level > 0: l.warning( "Self-modifying code is not always correctly optimized by PyVEX. " "To guarantee correctness, VEX optimizations have been disabled." ) opt_level = 0 if state and o.OPTIMIZE_IR in state.options: state.options.remove(o.OPTIMIZE_IR) if skip_stmts is not True: skip_stmts = False use_cache = self._use_cache if skip_stmts or collect_data_refs: # Do not cache the blocks if skip_stmts or collect_data_refs are enabled use_cache = False # phase 2: thumb normalization thumb = int(thumb) if isinstance(arch, ArchARM): if addr % 2 == 1: thumb = 1 if thumb: addr &= ~1 elif thumb: l.error("thumb=True passed on non-arm architecture!") thumb = 0 # phase 3: check cache cache_key = None if use_cache: cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level, strict_block_end) if cache_key in self._block_cache: self._block_cache_hits += 1 irsb = self._block_cache[cache_key] stop_point = self._first_stoppoint(irsb) if stop_point is None: return irsb else: size = stop_point - addr # check the cache again cache_key = (addr, insn_bytes, size, num_inst, thumb, opt_level, strict_block_end) if cache_key in self._block_cache: self._block_cache_hits += 1 return self._block_cache[cache_key] else: self._block_cache_misses += 1 else: # a special case: `size` is used as the maximum allowed size tmp_cache_key = (addr, insn_bytes, VEX_IRSB_MAX_SIZE, num_inst, thumb, opt_level, strict_block_end) try: irsb = self._block_cache[tmp_cache_key] if irsb.size <= size: self._block_cache_hits += 1 return self._block_cache[tmp_cache_key] except KeyError: self._block_cache_misses += 1 # phase 4: get bytes if insn_bytes is not None: buff, size = insn_bytes, len(insn_bytes) else: buff, size = self._load_bytes(addr, size, state, clemory) if not buff or size == 0: raise SimEngineError( "No bytes in memory for block starting at %#x." % addr) # phase 5: call into pyvex # l.debug("Creating pyvex.IRSB of arch %s at %#x", arch.name, addr) try: for subphase in xrange(2): irsb = pyvex.lift( buff, addr + thumb, arch, max_bytes=size, max_inst=num_inst, bytes_offset=thumb, traceflags=traceflags, opt_level=opt_level, strict_block_end=strict_block_end, skip_stmts=skip_stmts, collect_data_refs=collect_data_refs, ) if subphase == 0 and irsb.statements is not None: # check for possible stop points stop_point = self._first_stoppoint(irsb) if stop_point is not None: size = stop_point - addr continue if use_cache: self._block_cache[cache_key] = irsb return irsb # phase x: error handling except pyvex.PyVEXError: l.debug("VEX translation error at %#x", addr) if isinstance(buff, str): l.debug('Using bytes: %r', buff) else: l.debug("Using bytes: %r", pyvex.ffi.buffer(buff, size)) e_type, value, traceback = sys.exc_info() raise SimTranslationError, ("Translation error", e_type, value), traceback
def __init__(self, assembly, opcodes, addr): self.assembly = assembly self.opcodes = binascii.unhexlify(opcodes) self.addr = addr self.ir = pyvex.lift(self.opcodes, addr, self.arch)