def perform_manual_getproc_loadlib_64_for_dbg(target, dll_name): dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "LoadLibraryA\x00" dll_to_load = dll_name + "\x00" RemoteManualLoadLibray = x64.MultipleInstr() code = RemoteManualLoadLibray code += x64.Mov("R15", "RCX") code += x64.Mov("RCX", x64.mem("[R15 + 0]")) code += x64.Mov("RDX", x64.mem("[R15 + 8]")) code += x64.Call(":FUNC_GETPROCADDRESS64") code += x64.Mov("RCX", x64.mem("[R15 + 0x10]")) code += x64.Push("RCX") code += x64.Push("RCX") code += x64.Push("RCX") code += x64.Call("RAX") # LoadLibrary code += x64.Pop("RCX") code += x64.Pop("RCX") code += x64.Pop("RCX") code += x64.Ret() RemoteManualLoadLibray += nativeutils.GetProcAddress64 addr = target.virtual_alloc(0x1000) addr2 = addr + len(dll) addr3 = addr2 + len(api) addr4 = addr3 + len(dll_to_load) target.write_memory(addr, dll) target.write_memory(addr2, api) target.write_memory(addr3, dll_to_load) target.write_qword(addr4, addr) target.write_qword(addr4 + 8, addr2) target.write_qword(addr4 + 0x10, addr3) t = target.execute(RemoteManualLoadLibray.get_code(), addr4) return t
def register_io_in(self): in_ioctl = x64.MultipleInstr() INPUT_BUFFER_SIZE = x64.mem('[RCX]') INPUT_BUFFER_PORT = x64.mem('[RCX + 8]') in_ioctl += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x10) # size indicator / port in_ioctl += x64.Jnz(":FAIL") in_ioctl += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) in_ioctl += x64.Mov('RDX', INPUT_BUFFER_PORT) in_ioctl += x64.Mov('RCX', INPUT_BUFFER_SIZE) in_ioctl += x64.Cmp('RCX', 0x1) in_ioctl += x64.Jnz(":OUT_2_OR_4") in_ioctl += x64.In('AL', 'DX') in_ioctl += x64.Jmp(':SUCCESS') in_ioctl += x64.Label(":OUT_2_OR_4") in_ioctl += x64.Cmp('RCX', 0x2) in_ioctl += x64.Jnz(":OUT_4") in_ioctl += x64.In('AX', 'DX') in_ioctl += x64.Jmp(':SUCCESS') in_ioctl += x64.Label(":OUT_4") in_ioctl += x64.In('EAX', 'DX') in_ioctl += x64.Label(":SUCCESS") in_ioctl += x64.Mov('RDX', self.IRP_OUTPUT_BUFFER) in_ioctl += x64.Mov(x64.mem('[RDX]'), 'RAX') in_ioctl += x64.Xor('RAX', 'RAX') in_ioctl += x64.Ret() in_ioctl += x64.Label(":FAIL") in_ioctl += x64.Mov('RAX', 0x0C000000D) in_ioctl += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_IN_IOCTL, in_ioctl.get_code())
def register_io_out(self): out_ioctl = x64.MultipleInstr() INPUT_BUFFER_SIZE = x64.mem('[RCX]') INPUT_BUFFER_PORT = x64.mem('[RCX + 8]') INPUT_BUFFER_VALUE = x64.mem('[RCX + 0x10]') out_ioctl += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x18) # size indicator / port / value out_ioctl += x64.Jnz(":FAIL") out_ioctl += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) out_ioctl += x64.Mov('RDX', INPUT_BUFFER_PORT) out_ioctl += x64.Mov('RAX', INPUT_BUFFER_VALUE) out_ioctl += x64.Mov('RCX', INPUT_BUFFER_SIZE) out_ioctl += x64.Cmp('RCX', 0x1) out_ioctl += x64.Jnz(":OUT_2_OR_4") out_ioctl += x64.Out('DX', 'AL') out_ioctl += x64.Jmp(':SUCCESS') out_ioctl += x64.Label(":OUT_2_OR_4") out_ioctl += x64.Cmp('RCX', 0x2) out_ioctl += x64.Jnz(":OUT_4") out_ioctl += x64.Out('DX', 'AX') out_ioctl += x64.Jmp(':SUCCESS') out_ioctl += x64.Label(":OUT_4") out_ioctl += x64.Out('DX', 'EAX') out_ioctl += x64.Label(":SUCCESS") out_ioctl += x64.Xor('RAX', 'RAX') out_ioctl += x64.Ret() out_ioctl += x64.Label(":FAIL") out_ioctl += x64.Mov('RAX', 0x0C000000D) out_ioctl += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_OUT_IOCTL, out_ioctl.get_code())
def register_alloc_memory(self): ExAllocatePoolWithTag = self.kdbg.get_symbol_offset("nt!ExAllocatePoolWithTag") if ExAllocatePoolWithTag is None: raise ValueError("Could not resolve <ExAllocatePoolWithTag>") INPUT_BUFFER_ALLOC_TYPE = x64.mem('[RCX]') INPUT_BUFFER_ALLOC_SIZE = x64.mem('[RCX + 0x8]') INPUT_BUFFER_ALLOC_TAG = x64.mem('[RCX + 0x10]') Alloc_IOCTL = x64.MultipleInstr() Alloc_IOCTL += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x18) Alloc_IOCTL += x64.Jnz(':FAIL') Alloc_IOCTL += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) Alloc_IOCTL += x64.Mov('R8', INPUT_BUFFER_ALLOC_TAG) Alloc_IOCTL += x64.Mov('RDX', INPUT_BUFFER_ALLOC_SIZE) Alloc_IOCTL += x64.Mov('RCX', INPUT_BUFFER_ALLOC_TYPE) Alloc_IOCTL += x64.Mov('RAX', ExAllocatePoolWithTag) Alloc_IOCTL += x64.Call('RAX') Alloc_IOCTL += x64.Mov('RBX', self.IRP_OUTPUT_BUFFER) Alloc_IOCTL += x64.Mov(x64.mem('[RBX]'), 'RAX') Alloc_IOCTL += x64.Xor('RAX', 'RAX') Alloc_IOCTL += x64.Ret() Alloc_IOCTL += x64.Label(":FAIL") Alloc_IOCTL += x64.Mov('RAX', 0x0C000000D) Alloc_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_MEMALLOC_IOCTL, Alloc_IOCTL.get_code())
def get_peb_addr(self): dest = self.virtual_alloc(0x1000) if self.bitness == 32: store_peb = x86.MultipleInstr() store_peb += x86.Mov('EAX', x86.mem('fs:[0x30]')) store_peb += x86.Mov(x86.create_displacement(disp=dest), 'EAX') store_peb += x86.Ret() get_peb_code = store_peb.get_code() self.write_memory(dest, "\x00" * 4) self.write_memory(dest + 4, get_peb_code) self.create_thread(dest + 4, 0) time.sleep(0.01) peb_addr = struct.unpack("<I", self.read_memory(dest, 4))[0] return peb_addr else: store_peb = x64.MultipleInstr() store_peb += x64.Mov('RAX', x64.mem('gs:[0x60]')) store_peb += x64.Mov(x64.create_displacement(disp=dest), 'RAX') store_peb += x64.Ret() get_peb_code = store_peb.get_code() self.write_memory(dest, "\x00" * 8) self.write_memory(dest + 8, get_peb_code) self.create_thread(dest + 8, 0) time.sleep(0.01) peb_addr = struct.unpack("<Q", self.read_memory(dest, 8))[0] return peb_addr
def sc_64_AllocRWX(address, rwx_qword): dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "VirtualAlloc\x00" AllocRWX64_sc = x64.MultipleInstr() map(AllocRWX64_sc.__iadd__, [ shellcraft.amd64.pushstr(dll), x64.Mov("RCX", "RSP"), shellcraft.amd64.pushstr(api), x64.Mov("RDX", "RSP"), x64.Call(":FUNC_GETPROCADDRESS64"), x64.Mov("R10", "RAX"), x64.Mov("RCX", address), x64.Mov("RDX", 0x1000), x64.Mov("R8", MEM_COMMIT | MEM_RESERVE), x64.Mov("R9", PAGE_EXECUTE_READWRITE), x64.Sub("RSP", 0x30), x64.And("RSP", -32), x64.Call("R10"), x64.Mov('RAX', rwx_qword), x64.Mov("RCX", address), x64.Mov(x64.mem('[RCX]'), 'RAX'), x64.Call("RCX"), windows.native_exec.nativeutils.GetProcAddress64, ]) return AllocRWX64_sc.get_code()
def get_current_process_syswow_peb_addr(): current_process = windows.current_process dest = current_process.allocator.reserve_size(8) get_peb_64_code = x64.MultipleInstr() get_peb_64_code += x64.Mov('RAX', x64.mem('gs:[0x60]')) get_peb_64_code += x64.Mov(x64.create_displacement(disp=dest), 'RAX') current_process.write_memory(dest, "\x00" * 8) execute_64bits_code_from_syswow(get_peb_64_code.get_code()) peb_addr = struct.unpack("<Q", current_process.read_memory(dest, 8))[0] return peb_addr
def register_kernel_call(self): # expect in buffer: the address to call and all dword to push on the stack CCall_IOCTL = x64.MultipleInstr() CCall_IOCTL += x64.Mov('RAX', self.IO_STACK_INPUT_BUFFER_LEN) CCall_IOCTL += x64.Cmp('RAX', 0) CCall_IOCTL += x64.Jz(":FAIL") # Need at least the function to call CCall_IOCTL += x64.Mov('R15', 4 * 8) # Size to pop on the stack at the end (4 * push RDI) CCall_IOCTL += x64.Mov('R10', self.IO_STACK_INPUT_BUFFER) CCall_IOCTL += x64.Label(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jbe(":SETUP_REG_ARGS") CCall_IOCTL += x64.Sub('RAX', 8) INPUT_BUFFER_NEXT_ARG = x64.create_displacement(base='R10', index='RAX') CCall_IOCTL += x64.Mov('RBX', INPUT_BUFFER_NEXT_ARG) CCall_IOCTL += x64.Push('RBX') CCall_IOCTL += x64.Add('R15', 8) # Add at Size to pop on the stack at the end CCall_IOCTL += x64.Jmp(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Label(":SETUP_REG_ARGS") # Could be done in a loop # But do I really want to generate x86 in a loop.. CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jz(":SETUP_4_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 4)) CCall_IOCTL += x64.Jz(":SETUP_3_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 3)) CCall_IOCTL += x64.Jz(":SETUP_2_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 2)) CCall_IOCTL += x64.Jz(":SETUP_1_ARGS") CCall_IOCTL += x64.Jmp(":SETUP_0_ARGS") CCall_IOCTL += x64.Label(":SETUP_4_ARGS") CCall_IOCTL += x64.Mov('R9', x64.mem('[R10 + 0x20]')) CCall_IOCTL += x64.Label(":SETUP_3_ARGS") CCall_IOCTL += x64.Mov('R8', x64.mem('[R10 + 0x18]')) CCall_IOCTL += x64.Label(":SETUP_2_ARGS") CCall_IOCTL += x64.Mov('RDX', x64.mem('[R10 + 0x10]')) CCall_IOCTL += x64.Label(":SETUP_1_ARGS") CCall_IOCTL += x64.Mov('RCX', x64.mem('[R10 + 8]')) CCall_IOCTL += x64.Label(":SETUP_0_ARGS") CCall_IOCTL += x64.Mov('RAX', x64.mem('[R10]')) # Fix Reserve space (calling convention) CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Call('RAX') CCall_IOCTL += x64.Mov('RDX', self.IRP_OUTPUT_BUFFER) CCall_IOCTL += x64.Mov(x64.mem('[RDX]'), 'RAX') CCall_IOCTL += x64.Xor('RAX', 'RAX') CCall_IOCTL += x64.Add('RSP', 'R15') CCall_IOCTL += x64.Ret() CCall_IOCTL += x64.Label(":FAIL") CCall_IOCTL += x64.Mov('RAX', 0x0C000000D) CCall_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_KCALL_IOCTL, CCall_IOCTL.get_code())
import windows.test import windows.native_exec.simple_x64 as x64 import windows.native_exec.nativeutils from windows.generated_def.winstructs import * GetProcAddress64 = windows.native_exec.nativeutils.GetProcAddress64 dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "LoadLibraryA\x00" dll_to_load = "SUCE" RemoteManualLoadLibray = x64.MultipleInstr() c = RemoteManualLoadLibray c += x64.Mov("R15", "RCX") c += x64.Mov("RCX", x64.mem("[R15 + 0]")) c += x64.Mov("RDX", x64.mem("[R15 + 8]")) c += x64.Call(":FUNC_GETPROCADDRESS64") c += x64.Mov("RCX", x64.mem("[R15 + 0x10]")) c += x64.Push("RCX") c += x64.Push("RCX") c += x64.Push("RCX") c += x64.Call("RAX") c += x64.Pop("RCX") c += x64.Pop("RCX") c += x64.Pop("RCX") c += x64.Ret() RemoteManualLoadLibray += GetProcAddress64 calc = windows.test.pop_calc_64(dwCreationFlags=CREATE_SUSPENDED)
GetProcAddress64 += x64.Push("RBX") GetProcAddress64 += x64.Push("RCX") GetProcAddress64 += x64.Push("RDX") GetProcAddress64 += x64.Push("RSI") GetProcAddress64 += x64.Push("RDI") GetProcAddress64 += x64.Push("R8") GetProcAddress64 += x64.Push("R9") GetProcAddress64 += x64.Push("R10") GetProcAddress64 += x64.Push("R11") GetProcAddress64 += x64.Push("R12") GetProcAddress64 += x64.Push("R13") # Params : RCX -> libname # Params : RDX -> API Name GetProcAddress64 += x64.Mov("R11", "RCX") GetProcAddress64 += x64.Mov("R12", "RDX") GetProcAddress64 += x64.Mov("RAX", x64.mem("GS:[0x60]")) #PEB ! GetProcAddress64 += x64.Mov("RAX", x64.mem("[RAX + 24] ")) # ; RAX = ldr (+ 6 for 64 cause of 2 ptr) GetProcAddress64 += x64.Mov("RAX", x64.mem("[RAX + 32]")) # ; RAX on the first elt of the list (first module) GetProcAddress64 += x64.Mov("RDX", "RAX") GetProcAddress64 += x64.Label(":a_dest") GetProcAddress64 += x64.Mov("RAX", "RDX") GetProcAddress64 += x64.Mov("RBX", x64.mem("[RAX + 32]")) # RBX : first base ! (base of current module) #GetProcAddress64 += x64.Mov("RBX ", x64.mem("[RAX + 32]")) # RBX : first base ! (base of current module) GetProcAddress64 += x64.Cmp("RBX", 0) GetProcAddress64 += x64.Jz(":DLL_NOT_FOUND") GetProcAddress64 += x64.Mov("RCX", x64.mem("[RAX + 80]")) # RCX = NAME (UNICODE_STRING.Buffer) GetProcAddress64 += x64.Call(":FUNC_STRLENW64") GetProcAddress64 += x64.Mov("RDI", "RCX") GetProcAddress64 += x64.Mov("RCX", "RAX") GetProcAddress64 += x64.Mov("RSI", "R11") #GetProcAddress64 += x64.Int3()
class CurrentProcess(Process): """The current process""" get_peb = None get_peb_32_code = x86.MultipleInstr() get_peb_32_code += x86.Mov('EAX', x86.mem('fs:[0x30]')) get_peb_32_code += x86.Ret() get_peb_32_code = get_peb_32_code.get_code() get_peb_64_code = x64.MultipleInstr() get_peb_64_code += x64.Mov('RAX', x64.mem('gs:[0x60]')) get_peb_64_code += x64.Ret() get_peb_64_code = get_peb_64_code.get_code() allocator = native_exec.native_function.allocator def get_peb_builtin(self): if self.get_peb is not None: return self.get_peb if self.bitness == 32: get_peb = native_exec.create_function(self.get_peb_32_code, [PVOID]) else: get_peb = native_exec.create_function(self.get_peb_64_code, [PVOID]) self.get_peb = get_peb return get_peb def _get_handle(self): return winproxy.GetCurrentProcess() def __del__(self): pass @property def pid(self): """Process ID :type: int """ return os.getpid() # Is there a better way ? @utils.fixedpropety def ppid(self): """Parent Process ID :type: int """ return [p for p in windows.system.processes if p.pid == self.pid][0].ppid @utils.fixedpropety def peb(self): """The Process Environment Block of the current process :type: :class:`PEB` """ return PEB.from_address(self.get_peb_builtin()()) @utils.fixedpropety def bitness(self): """The bitness of the process :returns: int -- 32 or 64""" import platform bits = platform.architecture()[0] return int(bits[:2]) def virtual_alloc(self, size): """Allocate memory in the current process :returns: int """ return winproxy.VirtualAlloc(dwSize=size) def write_memory(self, addr, data): """Write data at addr""" buffertype = (c_char * len(data)).from_address(addr) buffertype[:len(data)] = data return True def read_memory(self, addr, size): """Read size from adddr""" dbgprint('Read CurrentProcess Memory', 'READMEM') buffer = (c_char * size).from_address(addr) return buffer[:] def create_thread(self, lpStartAddress, lpParameter, dwCreationFlags=0): """Create a new thread .. note:: CreateThread https://msdn.microsoft.com/en-us/library/windows/desktop/ms682453%28v=vs.85%29.aspx """ handle = winproxy.CreateThread(lpStartAddress=lpStartAddress, lpParameter=lpParameter, dwCreationFlags=dwCreationFlags) return WinThread._from_handle(handle) def exit(self, code=0): """Exit the process""" return winproxy.ExitProcess(code)
# Remote Execution print("Execution some native code in our calc (write 0x424242 at allocated address + return 0x1337)") if calc.bitness == 32: # Let's generate some native code code = x86.MultipleInstr() code += x86.Mov(x86.deref(addr), 0x42424242) code += x86.Mov("EAX", 0x1337) code += x86.Ret() else: code = x64.MultipleInstr() code += x64.Mov('RAX', addr) code += x64.Mov(x64.mem("[RAX]"), 0x42424242) code += x64.Mov("RAX", 0x1337) code += x64.Ret() print("Executing native code !") t = calc.execute(code.get_code()) t.wait() print("Return code = {0}".format(hex(t.exit_code))) print("Reading allocated memory : <{0}>".format(repr(calc.read_memory(addr, 20)))) print("Executing python code !") # Make 'windows' importable in remote python calc.execute_python("import sys; sys.path.append(r'{0}')".format(sys.path[-1])) calc.execute_python("import windows") # Let's write in the calc 'current_process' memory :)
import windows.native_exec.simple_x64 as x64 import windows.native_exec.nativeutils from windows.generated_def.winstructs import * GetProcAddress64 = windows.native_exec.nativeutils.GetProcAddress64 dll = "KERNEL32.DLL\x00".encode("utf-16-le") api = "LoadLibraryA\x00" dll_to_load = "SUCE" RemoteManualLoadLibray = x64.MultipleInstr() c = RemoteManualLoadLibray c += x64.Mov("R15", "RCX") c += x64.Mov("RCX", x64.mem("[R15 + 0]")) c += x64.Mov("RDX", x64.mem("[R15 + 8]")) c += x64.Call(":FUNC_GETPROCADDRESS64") c += x64.Mov("RCX", x64.mem("[R15 + 0x10]")) c += x64.Push("RCX") c += x64.Push("RCX") c += x64.Push("RCX") c += x64.Call("RAX") c += x64.Pop("RCX") c += x64.Pop("RCX") c += x64.Pop("RCX") c += x64.Ret() RemoteManualLoadLibray += GetProcAddress64
# Remote Execution print("Execution some native code in our notepad (write 0x424242 at allocated address + return 0x1337)") if notepad.bitness == 32: # Let's generate some native code code = x86.MultipleInstr() code += x86.Mov(x86.deref(addr), 0x42424242) code += x86.Mov("EAX", 0x1337) code += x86.Ret() else: code = x64.MultipleInstr() code += x64.Mov('RAX', addr) code += x64.Mov(x64.mem("[RAX]"), 0x42424242) code += x64.Mov("RAX", 0x1337) code += x64.Ret() print("Executing native code !") t = notepad.execute(code.get_code()) t.wait() print("Return code = {0}".format(hex(t.exit_code))) print("Reading allocated memory : <{0}>".format(repr(notepad.read_memory(addr, 20)))) print("Executing python code !") # Make 'windows' importable in remote python notepad.execute_python("import sys; sys.path.append(r'{0}')".format(sys.path[-1])) notepad.execute_python("import windows") # Let's write in the notepad 'current_process' memory :)
class CurrentProcess(Process): """The current process""" get_peb = None get_peb_32_code = x86.MultipleInstr() get_peb_32_code += x86.Mov('EAX', x86.mem('fs:[0x30]')) get_peb_32_code += x86.Ret() get_peb_32_code = get_peb_32_code.get_code() get_peb_64_code = x64.MultipleInstr() get_peb_64_code += x64.Mov('RAX', x64.mem('gs:[0x60]')) get_peb_64_code += x64.Ret() get_peb_64_code = get_peb_64_code.get_code() allocator = native_exec.native_function.allocator # Use RtlGetCurrentPeb ? def get_peb_builtin(self): if self.get_peb is not None: return self.get_peb if self.bitness == 32: get_peb = native_exec.create_function(self.get_peb_32_code, [PVOID]) else: get_peb = native_exec.create_function(self.get_peb_64_code, [PVOID]) self.get_peb = get_peb return get_peb def _get_handle(self): return winproxy.GetCurrentProcess() def __del__(self): pass @property def pid(self): """Process ID :type: :class:`int` """ return os.getpid() # Is there a better way ? @utils.fixedpropety def ppid(self): """Parent Process ID :type: :class:`int` """ return [p for p in windows.system.processes if p.pid == self.pid][0].ppid @utils.fixedpropety def peb(self): """The Process Environment Block of the current process :type: :class:`PEB` """ return PEB.from_address(self.get_peb_builtin()()) @utils.fixedpropety def bitness(self): """The bitness of the process :type: :class:`int` -- 32 or 64 """ import platform bits = platform.architecture()[0] return int(bits[:2]) def virtual_alloc(self, size, prot=PAGE_EXECUTE_READWRITE): """Allocate memory in the process :return: The address of the allocated memory :rtype: :class:`int` """ return winproxy.VirtualAlloc(dwSize=size, flProtect=prot) def virtual_free(self, addr): """Free memory in the process by virtual_alloc""" return winproxy.VirtualFree(addr) def write_memory(self, addr, data): """Write data at addr""" buffertype = (c_char * len(data)).from_address(addr) buffertype[:len(data)] = data return True def read_memory(self, addr, size): """Read ``size`` from ``addr`` :return: The data read :rtype: :class:`str` """ dbgprint('Read CurrentProcess Memory', 'READMEM') buffer = (c_char * size).from_address(addr) return buffer[:] def create_thread(self, lpStartAddress, lpParameter, dwCreationFlags=0): """Create a new thread :rtype: :class:`WinThread` or :class:`DeadThread` """ handle = winproxy.CreateThread(lpStartAddress=lpStartAddress, lpParameter=lpParameter, dwCreationFlags=dwCreationFlags) return WinThread._from_handle(handle) def execute(self, code, parameter=0): """Execute native code ``code`` in the current thread. :rtype: :class:`int` the return value of the native code""" f = windows.native_exec.create_function(code, [PVOID, PVOID]) return f(parameter) def exit(self, code=0): """Exit the process""" return winproxy.ExitProcess(code) def wait(self, timeout=INFINITE): """Raise :class:`ValueError` to prevent deadlock :D""" raise ValueError("wait() on current thread") @utils.fixedpropety def peb_syswow(self): """The 64bits PEB of a SysWow64 process :type: :class:`PEB` """ if not self.is_wow_64: raise ValueError("Not a syswow process") return windows.syswow64.get_current_process_syswow_peb()
GetProcAddress64 += x64.Push("RBX") GetProcAddress64 += x64.Push("RCX") GetProcAddress64 += x64.Push("RDX") GetProcAddress64 += x64.Push("RSI") GetProcAddress64 += x64.Push("RDI") GetProcAddress64 += x64.Push("R8") GetProcAddress64 += x64.Push("R9") GetProcAddress64 += x64.Push("R10") GetProcAddress64 += x64.Push("R11") GetProcAddress64 += x64.Push("R12") GetProcAddress64 += x64.Push("R13") # Params : RCX -> libname # Params : RDX -> API Name GetProcAddress64 += x64.Mov("R11", "RCX") GetProcAddress64 += x64.Mov("R12", "RDX") GetProcAddress64 += x64.Mov("RAX", x64.mem("GS:[0x60]")) #PEB ! GetProcAddress64 += x64.Mov( "RAX", x64.mem("[RAX + 24] ")) # ; RAX = ldr (+ 6 for 64 cause of 2 ptr) GetProcAddress64 += x64.Mov( "RAX", x64.mem("[RAX + 32]")) # ; RAX on the first elt of the list (first module) GetProcAddress64 += x64.Mov("RDX", "RAX") GetProcAddress64 += x64.Label(":a_dest") GetProcAddress64 += x64.Mov("RAX", "RDX") GetProcAddress64 += x64.Mov( "RBX", x64.mem("[RAX + 32]")) # RBX : first base ! (base of current module) #GetProcAddress64 += x64.Mov("RBX ", x64.mem("[RAX + 32]")) # RBX : first base ! (base of current module) GetProcAddress64 += x64.Cmp("RBX", 0) GetProcAddress64 += x64.Jz(":DLL_NOT_FOUND") GetProcAddress64 += x64.Mov(
class DriverUpgrader64(DriverUpgrader): """Upgrader for windbg_driver_x64.sys (sha-1 6F5B29FFFB021BF80CA91D6D67CFC019D63F7175) Explanation: the function `upgrade_driver` will add some code to the DeviceIoControl handler The added code will be written in in some empty space after the driver code The new code search the IoControlCode in an array of IOCODE/Handler and call the corresponding handler with the following parameters: RSI -> IO_STACK_LOCATION that contains: : The input buffer size : The input buffer : The output buffer size RDI -> IRP that contains: : The output buffer The handler must: - Verify the size of input / output buffer - Do whatever it wants - Write the returned values in the output buffer - Return 0 if everythin went well - Return an error code otherwise Adding a new handler is simple, we just need to add the IOCODE/HANDLER to the handler array. It can be done by calling: self.upgrade_driver_add_new_ioctl_handler(IOCODE, HANDLER_CODE) """ PTR_SIZE = 8 # Offset of the code in the iohandle that we will hijack hijack_offset = 0x50e8 # Offset of the normal code path in the iohandle for the standard IO_CODE normal_io_offset = 0x50f1 # Offset of the function we will rewrite in the driver init_driver_offset = 0x523a # Offset to the `fail` function end fail_offset = 0x50f7 # Offset to `success` function end normal_end_offset = 0x51d8 # Address of the pointer to the IOCODE/HANDLER array HANDLE_ARRAY_ADDR = 0x5300 FIRST_ARRAY_ADDR = HANDLE_ARRAY_ADDR + 8 # Memory access often used in new handler: based on the parameters expected IO_STACK_INPUT_BUFFER_LEN = x64.mem('[RSI + 0x10]') IO_STACK_INPUT_BUFFER = x64.mem('[RSI + 0x20]') IRP_OUTPUT_BUFFER = x64.mem('[RDI + 0x70]') NORMAL_IO_CODE = 0x22C007 def _upgrade_driver_inject_base_upgrade(self): kldbgdrv = self.kldbgdrv upgrade = x64.MultipleInstr() # R14 : IOCODE # RSI -> IO_STACK_LOCATION # RDI -> IRP upgrade = x64.MultipleInstr() upgrade += x64.Cmp('R14', self.NORMAL_IO_CODE) upgrade += x64.Jz(self.normal_io_offset - (self.init_driver_offset + len(upgrade.get_code()))) upgrade += x64.Mov('Rax', x64.create_displacement(disp=kldbgdrv + self.HANDLE_ARRAY_ADDR)) upgrade += x64.Label(":LOOP") upgrade += x64.Mov('RCX', x64.create_displacement('RAX')) upgrade += x64.Cmp('R14', 'RCX') upgrade += x64.Jnz(':END') upgrade += x64.Mov('RAX', x64.create_displacement('RAX', disp=8)) upgrade += x64.Call('RAX') upgrade += x64.Mov('RBX', 'RAX') upgrade += x64.JmpAt(kldbgdrv + self.normal_end_offset) upgrade += x64.Label(":END") upgrade += x64.Cmp('RCX', 0) upgrade += x64.Jnz(':NEXT') upgrade += x64.JmpAt(kldbgdrv + self.fail_offset) upgrade += x64.Label(":NEXT") upgrade += x64.Add('RAX', 0x10) upgrade += x64.Jmp(':LOOP') self.kdbg.write_pfv_memory(kldbgdrv + self.init_driver_offset, str(upgrade.get_code())) # Write first array dest self.write_pfv_ptr(kldbgdrv + self.HANDLE_ARRAY_ADDR, kldbgdrv + self.FIRST_ARRAY_ADDR) self.write_pfv_ptr(kldbgdrv + self.FIRST_ARRAY_ADDR, 0) self.write_pfv_ptr(kldbgdrv + self.FIRST_ARRAY_ADDR + 8, 0) # Jump hijack jump_init_function = x64.Jmp(self.init_driver_offset - (self.hijack_offset)) self.kdbg.write_pfv_memory(kldbgdrv + self.hijack_offset, str(jump_init_function.get_code())) self.ioctl_array = kldbgdrv + self.FIRST_ARRAY_ADDR self.ioctl_array_ptr = kldbgdrv + self.HANDLE_ARRAY_ADDR self.next_code_addr = kldbgdrv + self.init_driver_offset + len(upgrade.get_code()) self.is_upgraded = True def is_driver_already_upgraded(self): """Check if the driver have already been upgraded by checking if the jump hijack is in place""" jump_hijack = x64.Jmp(self.init_driver_offset - (self.hijack_offset)).get_code() mem = self.kdbg.read_virtual_memory(self.kldbgdrv + self.hijack_offset, len(jump_hijack)) return mem == str(jump_hijack) def full_driver_upgrade(self): """Upgrade the driver, bootstrap it and add new features We don't want to write all the handler in the driver init code. We bootstrap be only adding the `mem_alloc` feature and use it to: - Alloc a new page for the IOCODE/HANDLER array - Alloc a new page for the handlers code We move the IOCODE/HANDLER array Finally we add the other features """ self._upgrade_driver_inject_base_upgrade() self.register_alloc_memory() new_ioctl_array_page = self.kdbg.alloc_memory(0x1000) alloc_ioctl, alloc_code_addr = self.registered_ioctl[0] # Write first array dest self.write_pfv_ptr(self.ioctl_array_ptr, new_ioctl_array_page) self.write_pfv_ptr(new_ioctl_array_page, alloc_ioctl) self.write_pfv_ptr(new_ioctl_array_page + 0x8, alloc_code_addr) self.write_pfv_ptr(new_ioctl_array_page + 0x10, 0) self.write_pfv_ptr(new_ioctl_array_page + 0x18, 0) self.ioctl_array = new_ioctl_array_page new_code_page = self.kdbg.alloc_memory(0x1000) self.next_code_addr = new_code_page # Register other IOCTL self.register_kernel_call() self.register_io_in() self.register_io_out() def register_alloc_memory(self): ExAllocatePoolWithTag = self.kdbg.get_symbol_offset("nt!ExAllocatePoolWithTag") if ExAllocatePoolWithTag is None: raise ValueError("Could not resolve <ExAllocatePoolWithTag>") INPUT_BUFFER_ALLOC_TYPE = x64.mem('[RCX]') INPUT_BUFFER_ALLOC_SIZE = x64.mem('[RCX + 0x8]') INPUT_BUFFER_ALLOC_TAG = x64.mem('[RCX + 0x10]') Alloc_IOCTL = x64.MultipleInstr() Alloc_IOCTL += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x18) Alloc_IOCTL += x64.Jnz(':FAIL') Alloc_IOCTL += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) Alloc_IOCTL += x64.Mov('R8', INPUT_BUFFER_ALLOC_TAG) Alloc_IOCTL += x64.Mov('RDX', INPUT_BUFFER_ALLOC_SIZE) Alloc_IOCTL += x64.Mov('RCX', INPUT_BUFFER_ALLOC_TYPE) Alloc_IOCTL += x64.Mov('RAX', ExAllocatePoolWithTag) Alloc_IOCTL += x64.Call('RAX') Alloc_IOCTL += x64.Mov('RBX', self.IRP_OUTPUT_BUFFER) Alloc_IOCTL += x64.Mov(x64.mem('[RBX]'), 'RAX') Alloc_IOCTL += x64.Xor('RAX', 'RAX') Alloc_IOCTL += x64.Ret() Alloc_IOCTL += x64.Label(":FAIL") Alloc_IOCTL += x64.Mov('RAX', 0x0C000000D) Alloc_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_MEMALLOC_IOCTL, Alloc_IOCTL.get_code()) def register_kernel_call(self): # expect in buffer: the address to call and all dword to push on the stack CCall_IOCTL = x64.MultipleInstr() CCall_IOCTL += x64.Mov('RAX', self.IO_STACK_INPUT_BUFFER_LEN) CCall_IOCTL += x64.Cmp('RAX', 0) CCall_IOCTL += x64.Jz(":FAIL") # Need at least the function to call CCall_IOCTL += x64.Mov('R15', 4 * 8) # Size to pop on the stack at the end (4 * push RDI) CCall_IOCTL += x64.Mov('R10', self.IO_STACK_INPUT_BUFFER) CCall_IOCTL += x64.Label(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jbe(":SETUP_REG_ARGS") CCall_IOCTL += x64.Sub('RAX', 8) INPUT_BUFFER_NEXT_ARG = x64.create_displacement(base='R10', index='RAX') CCall_IOCTL += x64.Mov('RBX', INPUT_BUFFER_NEXT_ARG) CCall_IOCTL += x64.Push('RBX') CCall_IOCTL += x64.Add('R15', 8) # Add at Size to pop on the stack at the end CCall_IOCTL += x64.Jmp(':PUSH_NEXT_ARG') CCall_IOCTL += x64.Label(":SETUP_REG_ARGS") # Could be done in a loop # But do I really want to generate x86 in a loop.. CCall_IOCTL += x64.Cmp('RAX', (8 * 5)) CCall_IOCTL += x64.Jz(":SETUP_4_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 4)) CCall_IOCTL += x64.Jz(":SETUP_3_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 3)) CCall_IOCTL += x64.Jz(":SETUP_2_ARGS") CCall_IOCTL += x64.Cmp('RAX', (8 * 2)) CCall_IOCTL += x64.Jz(":SETUP_1_ARGS") CCall_IOCTL += x64.Jmp(":SETUP_0_ARGS") CCall_IOCTL += x64.Label(":SETUP_4_ARGS") CCall_IOCTL += x64.Mov('R9', x64.mem('[R10 + 0x20]')) CCall_IOCTL += x64.Label(":SETUP_3_ARGS") CCall_IOCTL += x64.Mov('R8', x64.mem('[R10 + 0x18]')) CCall_IOCTL += x64.Label(":SETUP_2_ARGS") CCall_IOCTL += x64.Mov('RDX', x64.mem('[R10 + 0x10]')) CCall_IOCTL += x64.Label(":SETUP_1_ARGS") CCall_IOCTL += x64.Mov('RCX', x64.mem('[R10 + 8]')) CCall_IOCTL += x64.Label(":SETUP_0_ARGS") CCall_IOCTL += x64.Mov('RAX', x64.mem('[R10]')) # Fix Reserve space (calling convention) CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Push('RDI') CCall_IOCTL += x64.Call('RAX') CCall_IOCTL += x64.Mov('RDX', self.IRP_OUTPUT_BUFFER) CCall_IOCTL += x64.Mov(x64.mem('[RDX]'), 'RAX') CCall_IOCTL += x64.Xor('RAX', 'RAX') CCall_IOCTL += x64.Add('RSP', 'R15') CCall_IOCTL += x64.Ret() CCall_IOCTL += x64.Label(":FAIL") CCall_IOCTL += x64.Mov('RAX', 0x0C000000D) CCall_IOCTL += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_KCALL_IOCTL, CCall_IOCTL.get_code()) def register_io_out(self): out_ioctl = x64.MultipleInstr() INPUT_BUFFER_SIZE = x64.mem('[RCX]') INPUT_BUFFER_PORT = x64.mem('[RCX + 8]') INPUT_BUFFER_VALUE = x64.mem('[RCX + 0x10]') out_ioctl += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x18) # size indicator / port / value out_ioctl += x64.Jnz(":FAIL") out_ioctl += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) out_ioctl += x64.Mov('RDX', INPUT_BUFFER_PORT) out_ioctl += x64.Mov('RAX', INPUT_BUFFER_VALUE) out_ioctl += x64.Mov('RCX', INPUT_BUFFER_SIZE) out_ioctl += x64.Cmp('RCX', 0x1) out_ioctl += x64.Jnz(":OUT_2_OR_4") out_ioctl += x64.Out('DX', 'AL') out_ioctl += x64.Jmp(':SUCCESS') out_ioctl += x64.Label(":OUT_2_OR_4") out_ioctl += x64.Cmp('RCX', 0x2) out_ioctl += x64.Jnz(":OUT_4") out_ioctl += x64.Out('DX', 'AX') out_ioctl += x64.Jmp(':SUCCESS') out_ioctl += x64.Label(":OUT_4") out_ioctl += x64.Out('DX', 'EAX') out_ioctl += x64.Label(":SUCCESS") out_ioctl += x64.Xor('RAX', 'RAX') out_ioctl += x64.Ret() out_ioctl += x64.Label(":FAIL") out_ioctl += x64.Mov('RAX', 0x0C000000D) out_ioctl += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_OUT_IOCTL, out_ioctl.get_code()) def register_io_in(self): in_ioctl = x64.MultipleInstr() INPUT_BUFFER_SIZE = x64.mem('[RCX]') INPUT_BUFFER_PORT = x64.mem('[RCX + 8]') in_ioctl += x64.Cmp(self.IO_STACK_INPUT_BUFFER_LEN, 0x10) # size indicator / port in_ioctl += x64.Jnz(":FAIL") in_ioctl += x64.Mov('RCX', self.IO_STACK_INPUT_BUFFER) in_ioctl += x64.Mov('RDX', INPUT_BUFFER_PORT) in_ioctl += x64.Mov('RCX', INPUT_BUFFER_SIZE) in_ioctl += x64.Cmp('RCX', 0x1) in_ioctl += x64.Jnz(":OUT_2_OR_4") in_ioctl += x64.In('AL', 'DX') in_ioctl += x64.Jmp(':SUCCESS') in_ioctl += x64.Label(":OUT_2_OR_4") in_ioctl += x64.Cmp('RCX', 0x2) in_ioctl += x64.Jnz(":OUT_4") in_ioctl += x64.In('AX', 'DX') in_ioctl += x64.Jmp(':SUCCESS') in_ioctl += x64.Label(":OUT_4") in_ioctl += x64.In('EAX', 'DX') in_ioctl += x64.Label(":SUCCESS") in_ioctl += x64.Mov('RDX', self.IRP_OUTPUT_BUFFER) in_ioctl += x64.Mov(x64.mem('[RDX]'), 'RAX') in_ioctl += x64.Xor('RAX', 'RAX') in_ioctl += x64.Ret() in_ioctl += x64.Label(":FAIL") in_ioctl += x64.Mov('RAX', 0x0C000000D) in_ioctl += x64.Ret() self.upgrade_driver_add_new_ioctl_handler(DU_IN_IOCTL, in_ioctl.get_code())