def get_interface(self, emu, ptr_size, name): """ Get COM interface """ iface = comdefs.IFACE_TYPES.get(name) if not iface: raise Win32EmuError('Invalid COM interface: %s' % (name)) ci = comdefs.ComInterface(iface, name, ptr_size) com_ptr = emu.mem_map(emu.sizeof(ci.iface), tag='emu.COM.%s' % (name)) ci.address = com_ptr fields = ci.iface.__dict__['__fields__'] field_offset = 0 for field in fields: field_name, field_obj = field # Determine if the field is an inherited interface (e.g., IUnknown) if issubclass(field_obj, ctypes.Structure): if field_name not in comdefs.IFACE_TYPES: raise Win32EmuError('COM interface %s inherits unsupported interface %s' % (name, field_name)) # Iterate inherited interface fields for subfield in field_obj._fields_: subfield_name, subfield_type = subfield if issubclass(subfield_type, (ctypes.c_uint32, ctypes.c_ulong, ctypes.c_ulonglong)): # Inherited inferface field is a method; hook if supported method_name = '%s_%s' % (field_name, subfield_name) if hasattr(com_api.ComApi, method_name): method = getattr(com_api.ComApi, method_name) addr = emu.add_callback(com_api.ComApi.name, method.__apihook__[0]) emu.mem_write(com_ptr + field_offset, addr.to_bytes(emu.get_ptr_size(), 'little')) field_offset += ptr_size elif issubclass(field_obj, (ctypes.c_uint32, ctypes.c_ulong, ctypes.c_ulonglong)): # Field is a method; hook if supported method_name = '%s_%s' % (name, field_name) if hasattr(com_api.ComApi, method_name): method = getattr(com_api.ComApi, method_name) addr = emu.add_callback(com_api.ComApi.name, method.__apihook__[0]) emu.mem_write(com_ptr + field_offset, addr.to_bytes(emu.get_ptr_size(), 'little')) field_offset += ptr_size else: raise Win32EmuError('Invalid field type encountered for %s.%s' % (name, field_name)) return ci
def run_shellcode(self, sc_addr, offset=0): """ Begin emulating position independent code (i.e. shellcode) to prepare for emulation """ target = None for sc_path, _sc_addr, size in self.pic_buffers: if _sc_addr == sc_addr: target = _sc_addr break if not target: raise Win32EmuError("Invalid shellcode address") stack_commit = 0x4000 self.stack_base, stack_addr = self.alloc_stack(stack_commit) self.set_func_args(self.stack_base, self.return_hook, 0x7000) run = Run() run.type = "shellcode" run.start_addr = sc_addr + offset run.instr_cnt = 0 args = [ self.mem_map(1024, tag="emu.shellcode_arg_%d" % (i), base=0x41420000 + i) for i in range(4) ] run.args = args self.reg_write(_arch.X86_REG_ECX, 1024) self.add_run(run) # Create an empty process object for the shellcode if none is # supplied container = self.init_container_process() if container: self.processes.append(container) self.curr_process = container else: p = objman.Process(self) self.processes.append(p) self.curr_process = p mm = self.get_address_map(sc_addr) if mm: mm.set_process(self.curr_process) t = objman.Thread(self, stack_base=self.stack_base, stack_commit=stack_commit) self.om.objects.update({t.address: t}) self.curr_process.threads.append(t) self.curr_thread = t peb = self.alloc_peb(self.curr_process) # Set the TEB self.init_teb(t, peb) self.start()
def load_shellcode(self, path, arch, data=None): """ Load position independent code (i.e. shellcode) to prepare for emulation """ self.arch = arch sc_hash = None if data: sc_hash = hashlib.sha256() sc_hash.update(data) sc_hash = sc_hash.hexdigest() sc = data else: with open(path, 'rb') as scpath: sc = scpath.read() sc_hash = hashlib.sha256() sc_hash.update(sc) sc_hash = sc_hash.hexdigest() if self.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif self.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError('Unsupported architecture: %s' % self.arch) self.emu_eng.init_engine(_arch.ARCH_X86, self.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) sc_tag = 'emu.shellcode.%s' % (sc_hash) # Map the shellcode into memory sc_addr = self.mem_map(len(sc), tag=sc_tag) self.mem_write(sc_addr, sc) self.pic_buffers.append((path, sc_addr, len(sc))) sc_arch = 'unknown' if arch == _arch.ARCH_AMD64: sc_arch = 'x64' elif arch == _arch.ARCH_X86: sc_arch = 'x86' if self.profiler: self.input = {'path': path, 'sha256': sc_hash, 'size': len(sc), 'arch': sc_arch, 'mem_tag': sc_tag, 'emu_version': self.get_emu_version(), 'os_run': self.get_osver_string()} self.profiler.add_input_metadata(self.input) # Strings the initial buffer so that we can detect decoded strings later on if self.do_strings: self.profiler.strings['ansi'] = self.get_ansi_strings(sc) self.profiler.strings['unicode'] = self.get_unicode_strings(sc) return sc_addr
def get_com_interface(self, name): """ Retreive a COM interface by name """ ci = self.com.get_interface(name, self.get_ptr_size()) if not ci: raise Win32EmuError('Invalid COM interface: %s' % (name)) com_ptr = self.mem_map(self.sizeof(ci.iface), tag='emu.COM.%s' % (name)) ci.address = com_ptr return ci
def run_module(self, module, all_entrypoints=False): """ Begin emulating a previously loaded module Arguments: module: Module to emulate """ if not module: self.stop() raise Win32EmuError('Module not found') # Check if any TLS callbacks exist, these run before the module's entry point tls = module.get_tls_callbacks() for i, cb_addr in enumerate(tls): base = module.get_base() if base < cb_addr < base + module.get_image_size(): run = Run() run.start_addr = cb_addr run.type = 'tls_callback_%d' % (i) run.args = [base, DLL_PROCESS_ATTACH, 0] self.add_run(run) # Queue up the module's main entry point ep = module.base + module.ep run = Run() run.start_addr = ep main_exe = None if not module.is_exe(): run.args = [module.base, DLL_PROCESS_ATTACH, 0] run.type = 'dll_entry.DLL_PROCESS_ATTACH' container = self.init_container_process() if container: self.processes.append(container) self.curr_process = container else: run.type = 'module_entry' main_exe = module run.args = [ self.mem_map(8, tag='emu.module_arg_%d' % (i)) for i in range(4) ] if main_exe: self.user_modules = [main_exe] + self.user_modules self.add_run(run) if all_entrypoints: # Only emulate a subset of all the exported functions # There are some modules (such as the windows kernel) with # thousands of exports exports = [ k for k in module.get_exports()[:MAX_EXPORTS_TO_EMULATE] ] if exports: args = [ self.mem_map(8, tag='emu.export_arg_%d' % (i), base=0x41420000) for i in range(4) ] # noqa for exp in exports: if exp.name in ('DllMain', 'ServiceMain'): continue run = Run() if exp.name: fn = exp.name else: fn = 'no_name' run.type = 'export.%s' % (fn) run.start_addr = exp.address # Here we set dummy args to pass into the export function run.args = args # Store these runs and only queue them before the unload # routine this is because some exports may not be ready to # be called yet self.add_run(run) # Create an empty process object for the module if none is # supplied if len(self.processes) == 0: p = objman.Process(self, path=module.get_emu_path(), base=module.base, pe=module, cmdline=self.command_line) self.curr_process = p self.om.objects.update({p.address: p}) mm = self.get_address_map(module.base) if mm: mm.process = self.curr_process t = objman.Thread(self, stack_base=self.stack_base, stack_commit=module.stack_commit) self.om.objects.update({t.address: t}) self.curr_process.threads.append(t) self.curr_thread = t peb = self.alloc_peb(self.curr_process) # Set the TEB self.init_teb(t, peb) # Begin emulation self.start()
def load_module(self, path=None, data=None): """ Load a module into the emulator space from the specified path """ pe = self.load_pe(path=path, data=data, imp_id=w32common.IMPORT_HOOK_ADDR) if pe.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif pe.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError('Unsupported architecture: %s', pe.arch) if not self.arch: self.arch = pe.arch self.set_ptr_size(self.arch) self.emu_eng.init_engine(_arch.ARCH_X86, pe.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) if not data: file_name = os.path.basename(path) + '.exe' mod_name = os.path.splitext(file_name)[0] else: mod_hash = hashlib.sha256() mod_hash.update(data) mod_hash = mod_hash.hexdigest() mod_name = mod_hash file_name = '%s.exe' % (mod_name) self.api = WindowsApi(self) cd = self.get_cd() if not cd.endswith('\\'): cd += '\\' emu_path = cd + file_name if not data: with open(path, 'rb') as f: data = f.read() self.fileman.add_existing_file(emu_path, data) # Strings the initial buffer so that we can detect decoded strings later on if self.profiler and self.do_strings: self.profiler.strings['ansi'] = self.get_ansi_strings(data) self.profiler.strings['unicode'] = self.get_unicode_strings(data) # Set the emulated path emu_path = '' self.cd = self.get_cd() if self.cd: if not self.cd.endswith('\\'): self.cd += '\\' emu_path = self.cd + os.path.basename(file_name) pe.set_emu_path(emu_path) self.map_pe(pe, mod_name=mod_name, emu_path=emu_path) self.mem_write(pe.base, pe.mapped_image) self.setup() if not self.stack_base: self.stack_base, stack_addr = self.alloc_stack(0x12000) self.set_func_args(self.stack_base, self.return_hook) # Init imported data for addr, imp in pe.imports.items(): mn, fn = imp mod, eh = self.api.get_data_export_handler(mn, fn) if eh: data_ptr = self.handle_import_data(mn, fn) sym = "%s.%s" % (mn, fn) self.global_data.update({addr: [sym, data_ptr]}) self.mem_write( addr, data_ptr.to_bytes(self.get_ptr_size(), 'little')) return pe
def load_shellcode(self, path, arch, data=None): """ Load position independent code (i.e. shellcode) to prepare for emulation """ sc_hash = None self._init_name(path, data) if arch == "x86": arch = _arch.ARCH_X86 elif arch in ("x64", "amd64"): arch = _arch.ARCH_AMD64 self.arch = arch if data: sc_hash = hashlib.sha256() sc_hash.update(data) sc_hash = sc_hash.hexdigest() sc = data else: with open(path, "rb") as scpath: sc = scpath.read() sc_hash = hashlib.sha256() sc_hash.update(sc) sc_hash = sc_hash.hexdigest() if self.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif self.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError("Unsupported architecture: %s" % self.arch) self.emu_eng.init_engine(_arch.ARCH_X86, self.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) sc_tag = "emu.shellcode.%s" % (sc_hash) # Map the shellcode into memory sc_addr = self.mem_map(len(sc), tag=sc_tag) self.mem_write(sc_addr, sc) self.pic_buffers.append((path, sc_addr, len(sc))) sc_arch = "unknown" if arch == _arch.ARCH_AMD64: sc_arch = "x64" elif arch == _arch.ARCH_X86: sc_arch = "x86" if self.profiler: self.input = { "path": path, "sha256": sc_hash, "size": len(sc), "arch": sc_arch, "mem_tag": sc_tag, "emu_version": self.get_emu_version(), "os_run": self.get_osver_string(), } self.profiler.add_input_metadata(self.input) # Strings the initial buffer so that we can detect decoded strings later on if self.do_strings: self.profiler.strings["ansi"] = [ a[1] for a in self.get_ansi_strings(sc) ] self.profiler.strings["unicode"] = [ u[1] for u in self.get_unicode_strings(sc) ] self.setup() return sc_addr
def prepare_module_for_emulation(self, module, all_entrypoints, entrypoints): if not module: self.stop() raise Win32EmuError("Module not found") # Check if any TLS callbacks exist, these run before the module's entry point tls = module.get_tls_callbacks() for i, cb_addr in enumerate(tls): base = module.get_base() if base < cb_addr < base + module.get_image_size(): run = Run() run.start_addr = cb_addr run.type = "tls_callback_%d" % (i) run.args = [base, DLL_PROCESS_ATTACH, 0] self.add_run(run) ep = module.base + module.ep run = Run() run.start_addr = ep main_exe = None if not module.is_exe(): run.args = [module.base, DLL_PROCESS_ATTACH, 0] run.type = "dll_entry.DLL_PROCESS_ATTACH" container = self.init_container_process() if container: self.processes.append(container) self.curr_process = container else: run.type = "module_entry" main_exe = module run.args = [ self.mem_map(8, tag="emu.module_arg_%d" % (i)) for i in range(4) ] if main_exe: self.user_modules = [main_exe] + self.user_modules # we consider this run only if all entry_points is selected or DLL_PROCESS_ATTACH is in the entrypoints if all_entrypoints or "DLL_PROCESS_ATTACH" in entrypoints: self.add_run(run) if all_entrypoints or entrypoints: # Only emulate a subset of all the exported functions # There are some modules (such as the windows kernel) with # thousands of exports exports = [k for k in module.get_exports()[:MAX_EXPORTS_TO_EMULATE]] if exports: args = [ self.mem_map(8, tag="emu.export_arg_%d" % (i), base=0x41420000) for i in range(4) ] # noqa for exp in exports: if exp.name in ("DllMain",): continue if all_entrypoints or exp.name in entrypoints: run = Run() if exp.name: fn = exp.name else: fn = "no_name" run.type = "export.%s" % (fn) run.start_addr = exp.address if exp.name == "ServiceMain": # ServiceMain accepts a (argc, argv) pair like main(). # # now, we're not exactly sure if we're in A or W mode. # maybe there are some hints we could take to guess this. # instead, we'll assume W mode and use default service name "IPRIP". # # hack: if we're actually in A mode, then string routines # will think the service name is "I" which isn't perfect, # but might still be good enough. # # layout: # argc: 1 # argv: # 0x00: (argv[0]) pointer to +0x10 -+ # 0x04/08: (argv[1]) 0x0 | # 0x10: "IPRIP" <------------------+ svc_name = "IPRIP\x00".encode("utf-16le") argc = 1 argv = self.mem_map( len(svc_name) + 0x10, tag="emu.export_ServiceMain_argv", base=0x41420000, ) self.write_ptr(argv, argv + 0x10) self.mem_write(argv + 0x10, svc_name) run.args = [argc, argv] else: # Here we set dummy args to pass into the export function run.args = args # Store these runs and only queue them before the unload # routine this is because some exports may not be ready to # be called yet self.add_run(run) return
def load_module(self, path=None, data=None, first_time_setup=True): """ Load a module into the emulator space from the specified path """ self._init_name(path, data) pe = self.load_pe(path=path, data=data, imp_id=w32common.IMPORT_HOOK_ADDR) if pe.arch == _arch.ARCH_X86: disasm_mode = cs.CS_MODE_32 elif pe.arch == _arch.ARCH_AMD64: disasm_mode = cs.CS_MODE_64 else: raise Win32EmuError("Unsupported architecture: %s", pe.arch) if not self.arch: self.arch = pe.arch self.set_ptr_size(self.arch) # No need to initialize the engine and Capstone again if first_time_setup: self.emu_eng.init_engine(_arch.ARCH_X86, pe.arch) if not self.disasm_eng: self.disasm_eng = cs.Cs(cs.CS_ARCH_X86, disasm_mode) self.api = WindowsApi(self) cd = self.get_cd() if not cd.endswith("\\"): cd += "\\" emu_path = cd + self.file_name if not data: with open(path, "rb") as f: data = f.read() self.fileman.add_existing_file(emu_path, data) # Strings the initial buffer so that we can detect decoded strings later on if self.profiler and self.do_strings: self.profiler.strings["ansi"] = [a[1] for a in self.get_ansi_strings(data)] self.profiler.strings["unicode"] = [ u[1] for u in self.get_unicode_strings(data) ] # Set the emulated path emu_path = "" self.cd = self.get_cd() if self.cd: if not self.cd.endswith('\\'): self.cd += '\\' emu_path = self.cd + os.path.basename(self.file_name) pe.set_emu_path(emu_path) # There's a bit of a problem here, if we cannot reserve memory # at the PE's desired base address, and the relocation table # is not present, we can't rebase it. So this is gonna have to # be a bit of a hack for binaries without a relocation table. # This logic is really only for child processes, since we're pretty # much guarenteed memory at the base address of the main module. # 1. If the memory at the child's desired load address is already # being used, remap it somewhere else. I'm pretty sure that # the already-used memory will always be for a module, # since desired load addresses don't really vary across PEs # 2. Fix up any modules that speakeasy has open for the parent # to reflect where it was remapped # 3. Try and grab memory at the child's desired base address, # if that isn't still isn't possible, we're out of luck # # But if the relocation table is present, we can rebase it, # so we do that instead of the above hack. imgbase = pe.OPTIONAL_HEADER.ImageBase ranges = self.get_valid_ranges(pe.image_size, addr=imgbase) base, size = ranges if base != imgbase: if pe.has_reloc_table(): pe.rebase(base) else: parent_map = self.get_address_map(imgbase) # Already being used by the parent, so let's remap the parent # Do get_valid_ranges on the parent map size so we get a # suitable region for it new_parent_mem, unused = self.get_valid_ranges(parent_map.size) new_parent_mem = self.mem_remap(imgbase, new_parent_mem) # Failed if new_parent_mem == -1: # XXX what to do here pass # Update parent module pointer for pe_, ranges_, emu_path_ in self.modules: base_, size_ = ranges_ if base_ == imgbase: self.modules.remove((pe_, ranges_, emu_path_)) self.modules.append((pe_, (new_parent_mem, size_), emu_path_)) break # Alright, let's try to grab that memory for the child again ranges = self.get_valid_ranges(pe.image_size, addr=imgbase) base, size = ranges if base != imgbase: # Out of luck # XXX what to do here pass self.mem_map(pe.image_size, base=base, tag='emu.module.%s' % (self.mod_name)) self.modules.append((pe, ranges, emu_path)) self.mem_write(pe.base, pe.mapped_image) self.setup(first_time_setup=first_time_setup) if not self.stack_base: self.stack_base, stack_addr = self.alloc_stack(0x12000) self.set_func_args(self.stack_base, self.return_hook) # Init imported data for addr, imp in pe.imports.items(): mn, fn = imp mod, eh = self.api.get_data_export_handler(mn, fn) if eh: data_ptr = self.handle_import_data(mn, fn) sym = "%s.%s" % (mn, fn) self.global_data.update({addr: [sym, data_ptr]}) self.mem_write(addr, data_ptr.to_bytes(self.get_ptr_size(), "little")) return pe