def findVmStubCrossReferences(vmStub, rule): #x64dbg has not provided an interface to their cross-reference functionality yet... #So... We're going to have to do this with signatures references = [] signatureSize = 30 buffer = GetMainModuleSectionList() for val in buffer: x64dbg._plugin_logputs("Scanning section: " + val.name) scanBuffer = Read(val.addr, val.size) matches = rule.match(data=scanBuffer) for m in matches: matchedStrings = m.strings for referenceMatch in matchedStrings: instructionLocation = referenceMatch[0] + signatureSize; lastInstructionBuffer = scanBuffer[instructionLocation : instructionLocation + 10] decomposedInstructions = distorm3.Decompose(instructionLocation + val.addr, lastInstructionBuffer) vmReferenceInstruction = decomposedInstructions[0] if (vmReferenceInstruction.flowControl == "FC_UNC_BRANCH" and vmReferenceInstruction.operands[0].value == vmStub): references.append({"start": referenceMatch[0] + val.addr, "jump": instructionLocation + val.addr, "section": val}) return references
def get_distorm_info(inst_addr): """ @brief Prints whole distrom3 info of the given instruction @param inst_addr Address of instruction """ size = ItemSize(inst_addr) inst_bytes = GetManyBytes(inst_addr, size) inst = distorm3.Decompose(inst_addr, inst_bytes, distorm3.Decode64Bits, 0) print inst[0] i = inst[0] print 'InstBytes ', i.instructionBytes print 'Opcode ', i.opcode for o in i.operands: print 'operand ', o print 'operand type', o.type for f in i.flags: print 'flag ', f print 'raw_flags ', i.rawFlags print 'inst_class ', i.instructionClass print 'flow_control ', i.flowControl print 'address ', i.address print 'size ', i.size print 'dt ', i.dt print 'valid ', i.valid print 'segment ', i.segment print 'unused_Prefixes ', i.unusedPrefixesMask print 'mnemonic ', i.mnemonic print 'inst_class ', i.instructionClass
def get_registry_callbacks_legacy(nt_mod): """ Enumerate registry change callbacks. This method of finding a global variable via disassembly of the CmRegisterCallback function is only for XP systems. If it fails on XP you can still find the callbacks using PoolScanGenericCallback. On Vista and Windows 7, these callbacks are registered using the CmRegisterCallbackEx function. """ if not has_distorm3: return symbol = "CmRegisterCallback" # Get the RVA of the symbol from NT's EAT symbol_rva = nt_mod.getprocaddress(symbol) if symbol_rva == None: return # Absolute VA to the symbol code symbol_address = symbol_rva + nt_mod.DllBase # Read the function prologue data = nt_mod.obj_vm.zread(symbol_address, 200) c = 0 vector = None # Looking for MOV EBX, CmpCallBackVector # This may be the first or second MOV EBX instruction for op in distorm3.Decompose(symbol_address, data, distorm3.Decode32Bits): if (op.valid and op.mnemonic == "MOV" and len(op.operands) == 2 and op.operands[0].name == 'EBX'): vector = op.operands[1].value if c == 1: break else: c += 1 # Can't find the global variable if vector == None: return # The vector is an array of 100 _EX_FAST_REF objects addrs = obj.Object( "Array", count=100, offset=vector, vm=nt_mod.obj_vm, targetType="_EX_FAST_REF", ) for addr in addrs: callback = addr.dereference_as("_EX_CALLBACK_ROUTINE_BLOCK") if callback: yield symbol, callback.Function, None
def _Decompose(self, instructions=10, size=None): overlap = 0x100 data = '' offset = self.obj_offset count = 0 while 1: data = self.obj_vm.read(offset, overlap) op = obj.NoneObject() for op in distorm3.Decompose(offset, data, self.distorm_mode): if op.address - offset > len(data) - 40: break if not op.valid: continue # Exit if we read as much as was required. if size is not None and op.address - self.obj_offset > size: return yield op if size is None and count > instructions: return count += 1 offset = op.address
def check(self, addr): self.caller = None self.instr = None if addr == None: return False # We only need the last 16 bytes, since this is the largest size the (prior) x86 instruction can be code = self.addrspace.zread(addr - 16, 16) for size in range(0, 16): instrs = distorm3.Decompose(addr - size - 1, code[-(size + 1):], self.mode) op = instrs[-1] if not op.valid: continue if op.flowControl == 'FC_NONE': return False if op.flowControl == 'FC_CALL': self.instr = op # if the CALL instruction has a determinate target, set caller to it if [o.type for o in op.operands] == ["Immediate"]: self.caller = op.operands[0].value elif [o.type for o in op.operands] == ["AbsoluteMemoryAddress"]: self.caller = self.start + op.disp return True return False
def hasValidHandler(self, idt_stub_addr, model, distorm_mode): # check if idt entry contains a valid handler op_cnt = 0 valid_handler = False known_idt_hndlrs64 = ['_hndl_allintrs', '_hndl_alltraps', '_hndl_unix_scall', '_hndl_mach_scall', '_hndl_mdep_scall', '_hndl_sysenter', '_hndl_syscall', '_hndl_diag_scall', '_hndl_double_fault', '_hndl_machine_check'] known_idt_hndlrs32 = ['_lo_allintrs', '_lo_alltraps', '_lo_diag_scall', '_lo_mach_scall', '_lo_mc64', '_lo_mdep_scall', '_lo_syscall', '_lo_sysenter', '_lo_unix_scall', '_lo_df64'] exits = ['trap_check_kernel_exit'] buf = self.addr_space.read(idt_stub_addr, 30) for op in distorm3.Decompose(idt_stub_addr, buf, distorm_mode): if op_cnt > 4: break # for x64, LEA RAX, [RIP+0x1602] if model == "64bit" and op.mnemonic == "LEA" and 'FLAG_RIP_RELATIVE' in op.flags and op.operands[0].type == 'Register': hndlr_addr = op.address + op.operands[1].disp + op.size hndlr_name = self.addr_space.profile.get_symbol_by_address('kernel', hndlr_addr) if hndlr_name != '' and hndlr_name in known_idt_hndlrs64: valid_handler = True break # for MOV DWORD [ESP+0x4], 0x2a17b0 elif model == "32bit" and op.mnemonic == "MOV": hndlr_addr = op.operands[1].value hndlr_name = self.addr_space.profile.get_symbol_by_address('kernel', hndlr_addr) if hndlr_name != '' and hndlr_name in known_idt_hndlrs32: valid_handler = True break # for last exit JMP elif model == "32bit" and op.mnemonic == "JMP": exit_addr = op.operands[0].value exit_name = self.addr_space.profile.get_symbol_by_address('kernel', exit_addr) if exit_name != '' and exit_name in exits: valid_handler = True break op_cnt += 1 return valid_handler
def _get_table_info_distorm(self): """ Find the size of the system call table by disassembling functions that immediately reference it in their first isntruction This is in the form 'cmp reg,NR_syscalls' """ table_size = 0 if not has_distorm: return table_size memory_model = self.addr_space.profile.metadata.get( 'memory_model', '32bit') if memory_model == '32bit': mode = distorm3.Decode32Bits func = "sysenter_do_call" else: mode = distorm3.Decode64Bits func = "system_call_fastpath" func_addr = self.addr_space.profile.get_symbol(func) if func_addr: data = self.addr_space.read(func_addr, 6) for op in distorm3.Decompose(func_addr, data, mode): if not op.valid: continue if op.mnemonic == 'CMP': table_size = (op.operands[1].value) & 0xffffffff break return table_size
def process_file(filename): print('Processing file: ', filename) with open(filename, 'rb') as file: # Note, we can't close file until we have finished reading data elf_file = ELFFile(file) # Code is normally in the data section text_section = elf_file.get_section_by_name(".text") base_address = text_section.header['sh_addr'] disassembly = distorm3.Decompose(base_address, text_section.data()) # Get the symbol table as table of addresses mapped to names symbol_table_section = elf_file.get_section_by_name(".symtab") symbol_table = {} # TODO: Fill in the symbol table... # Create an LLVM emulator emulator = Emulator("module", symbol_table) for instruction in disassembly: if hasattr(emulator, instruction.mnemonic): method = getattr(emulator, instruction.mnemonic) method(instruction) else: print(instruction.mnemonic + " not implemented yet. Please implement it!") return disassembly
def emulateAndFind(startStub, jumpAddress): oldEip = Register.EIP SetEIP(startStub) SetBreakpoint(jumpAddress) debug.Run() DeleteBreakpoint(jumpAddress) original = struct.unpack("<L", Read(Register.ESP, 4))[0] bytecode = struct.unpack("<L", Read(Register.ESP + 4, 4))[0] #Seek upwards for jump to VM Stub jmpLocation = original - 5 while(True): instrBuffer = Read(jmpLocation, 10) decomposedInstructions = distorm3.Decompose(jmpLocation, instrBuffer) if(decomposedInstructions[0].flowControl == "FC_UNC_BRANCH"): break; jmpLocation -= 1 #Calculate available size... size = original - jmpLocation while(True): instrBuffer = ReadByte(original); if(instrBuffer == 0x90): size += 1 else: break; original += 1 Register.EIP = oldEip Register.ESP += 8 return {"bytecode": bytecode, "original":jmpLocation, "size": size}
def debug(self): # disasm the machine code, to obtain each instruction # so we can place a while(1) between them buf = '' offset = 0 addr = {} while offset != len(self.machine_code): instr = distorm3.Decompose(None, self.machine_code[offset:])[0] hexdump = instr.instructionBytes.encode('hex') # increase offset offset += len(hexdump) / 2 # short jmp, we have to skip this.. (16-byte aligned m128) if hexdump[:2] == 'eb': # calculate the jmp-length jmp_len = int(hexdump[2:], 16) # extract the m128 m128 = self.machine_code[offset + jmp_len - 16:offset + jmp_len] # align to 16 bytes and write the m128 (including jmp over it) # 32 bytes = 30 bytes align + 2 bytes short jmp buf += '90'*(30 - (len(buf)/2 % 16)) + \ 'eb10' + m128.encode('hex') # write this addr in our dictionary addr[0xfed0000 + offset + jmp_len - 16] = self.code + len(buf) / 2 - 16 # keep a dictionary with address -> m128 self.m128s[0xfed0000+offset+jmp_len-16] = \ struct.unpack('4L', m128) offset += jmp_len # normal and sse instructions are followed by a while(1) loop else: buf += hexdump + 'ebfe' # if referenced, display m128 as well m128 = '' if instr.operands[1].type == distorm3.OPERAND_ABSOLUTE_ADDRESS: m128 = '\nm128 ' + ' '.join(map(lambda x: '0x%08x' % x, \ self.m128s[instr.operands[1].disp])) self.instructions.append(str(instr).lower() + m128) # replace all old addresses with new addresses, using a sortof bad way for key, value in addr.items(): buf = buf.replace(struct.pack('L', key).encode('hex'), \ struct.pack('L', value).encode('hex')) # exit the thread after a last while(1) loop, # to get the final xmm registers buf += 'ebfec3' return self._run(buf.decode('hex'), True)
def getDecryptSubroutine(vmStub): addressCallDecrypt = vmStub + 0x44 instrBuffer = Read(addressCallDecrypt, 10) decomposedInstructions = distorm3.Decompose(addressCallDecrypt, instrBuffer) decryptCall = decomposedInstructions[0] if (decryptCall.flowControl == "FC_CALL"): return decryptCall.operands[0].value; return None;
def calculate(self): common.set_plugin_members(self) model = self.addr_space.profile.metadata.get('memory_model', 0) if model == '32bit': distorm_mode = distorm3.Decode32Bits else: distorm_mode = distorm3.Decode64Bits # get kernel start, end kp = self.addr_space.profile.get_symbol("_g_kernel_kmod_info") kmodk = obj.Object("kmod_info", offset = kp, vm = self.addr_space) k_start = kmodk.address k_end = k_start + kmodk.m('size') # check if trustedbsd mac_policy_list is shadowed is_shadowed = False shadow_addr = None # some functions with MAC_CHECK/mac_policy_list references: mac_proc_check_get_task_name, mac_proc_check_get_task, mac_proc_check_fork, mac_cred_check_label_update, mac_cred_check_visible, mac_proc_check_debug, mac_proc_check_run_cs_invalid, mac_proc_check_sched, mac_proc_check_signal, mac_proc_check_wait, mac_proc_check_setlcid, mac_proc_check_getlcid, mac_lctx_check_label_update, mac_proc_check_suspend_resume, mac_port_check_service, mac_port_label_compute, mac_file_check_create, mac_file_check_dup, mac_file_check_fcntl, mac_file_check_ioctl, mac_file_check_inherit, mac_file_check_receive, mac_file_check_get_offset, mac_file_check_change_offset, mac_file_check_get, mac_file_check_set, mac_file_check_lock, mac_file_check_mmap original_mpl_addr = self.addr_space.profile.get_symbol("_mac_policy_list") original_mpl = obj.Object("mac_policy_list", offset = original_mpl_addr, vm = self.addr_space) # to get the disassembly of MAC_CHECK, disassemble mac_proc_check_get_task since targeted by REX [http://reverse.put.as/2014/03/18/teaching-rex-another-trustedbsd-trick-to-hide-from-volatility/] func_addr = self.addr_space.profile.get_symbol('_mac_proc_check_get_task') content = self.addr_space.read(func_addr, 1024) op_prev = None for op in distorm3.Decompose(func_addr, content, distorm_mode): if not op.valid or (op.mnemonic == 'NOP' and op_prev.mnemonic == "RET"): break if model == "64bit": if op.mnemonic == 'LEA' and op.operands[0].type == 'Register' and op.operands[0].name in ['RDI','RAX','R13','RSP','RBX','R12','R13','R14','R15']: curr_mpl_addr = op.address + op.operands[1].disp + op.size curr_mpl = obj.Object("mac_policy_list", offset = curr_mpl_addr, vm = self.addr_space) # check if mac_policy_list address and mac_policy_list.entries address have changed if curr_mpl_addr != original_mpl_addr or original_mpl.entries.v() != curr_mpl.entries.v(): is_shadowed = True shadow_addr = curr_mpl_addr yield(original_mpl_addr, shadow_addr, op.address) print "mac_policy_address is shadowed! Original Address: {0:#10x}, Shadow Address: {1:#10x}, Modification at: {2:#10x}".format(original_mpl_addr, shadow_addr, op.address) break elif model == "32bit": if op.mnemonic == 'MOV' and op.operands[0].type == 'Register' and op.operands[0].name in ['EAX'] and op.operands[1].type == 'AbsoluteMemoryAddress': curr_mpl_entries_addr = op.operands[1].disp # check if mac_policy_list.entries address has changed if curr_mpl_entries_addr != original_mpl.entries.v(): is_shadowed = True shadow_addr = curr_mpl_entries_addr yield (original_mpl.entries.v(), shadow_addr, op.address) print "mac_policy_address is shadowed! Original Entries Address: {0:#10x}, Shadow Entries Address: {1:#10x}, Modification at: {2:#10x}".format(original_mpl.entries.v(), shadow_addr, op.address) break op_prev = op
def decode(ea=None): if ea == None: ea = idc.ScreenEA() ist = idautils.DecodeInstruction(ea) if ist == None: return None _bytes = map(lambda x: chr(idc.Byte(ea + x)), range(ist.size)) _bytes = ''.join(_bytes) ist = distorm3.Decompose(ea, _bytes)[0] # distorm doesn't decode the operand logical size ie.. byte ptr, so use IDA for that for i in range(len(ist.operands)): idaop = idautils.DecodeInstruction(ist.address)[i] setattr(ist.operands[i], 'op_size', op_size(idaop)) def _get_operand_sym(op): if op.type == 'Immediate': return symath.symbolic(op.value) elif op.type == 'AbsoluteMemoryAddress': return DEREF(op.op_size, op.disp) elif op.type == 'Register': return symath.symbols(distorm3.Registers[op.index].upper()) elif op.type == 'AbsoluteMemory': rv = 0 if op.index != None: rv += symath.symbols( distorm3.Registers[op.index].upper()) * op.scale if op.base != None: rv += symath.symbols(distorm3.Registers[op.base].upper()) if op.disp != None: rv += symath.symbolic(op.disp) return DEREF(op.op_size, rv) else: raise BaseException("Unknown operand type %s (%s)" % (op.type, op)) args = list(map(_get_operand_sym, ist.operands)) if ist.mnemonic.lower() == 'call': spdiff = idc.GetSpDiff(ist.address + ist.size) if spdiff == None: spdiff = 0 try: return Call(args[0], spdiff, ist.address) except Exception as ex: print 'failed to wrap call @%x' % (ist.address) raise ex else: return symath.symbolic(ist.mnemonic.lower())(*args)
def findBranch(ta, size): memory = read_buffer(ta.address, size) decomposedInstructions = distorm3.Decompose(ta.address, memory) for inst in decomposedInstructions: if (inst.flowControl == "FC_UNC_BRANCH") and (inst.operands[0].type == "Register"): ta.jumpAddress = inst.address return True elif (inst.flowControl == "FC_RET"): ta.jumpAddress = inst.address return True return False
def isPrologInlined(self, model, distorm_mode, func_addr): ##check if function prologs are modified inlined = False content = self.addr_space.read(func_addr, 24) op_cnt = 1 for op in distorm3.Decompose(func_addr, content, distorm_mode): if op_cnt == 2: if model == "32bit": if (op.mnemonic == "MOV" and len(op.operands) == 2 and op.operands[0].type == "Register" and op.operands[1].type == "Register" and op.operands[0].name == "EBP" and op.operands[1].name == "ESP" and prev_op.mnemonic == "PUSH" and len(prev_op.operands) == 1 and prev_op.operands[0].type == "Register" and prev_op.operands[0].name == "EBP"): pass else: inlined = True elif model == "64bit": if (op.mnemonic == "MOV" and len(op.operands) == 2 and op.operands[0].type == "Register" and op.operands[1].type == "Register" and op.operands[0].name == "RBP" and op.operands[1].name == "RSP" and prev_op.mnemonic == "PUSH" and len(prev_op.operands) == 1 and prev_op.operands[0].type == "Register" and prev_op.operands[0].name == "RBP"): pass elif (prev_op.mnemonic == "PUSH" and len(prev_op.operands) == 1 and prev_op.operands[0].type == "Register" and prev_op.operands[0].name == "RBP" and op.mnemonic == "PUSH" and len(op.operands) == 1 and op.operands[0].type == "Register" and op.operands[0].name in ["RSP", "RBX", "R12", "R13", "R14", "R15"]): # Registers preserved across calls, http://people.freebsd.org/~lstewart/references/amd64.pdf pass else: inlined = True break prev_op = op op_cnt += 1 return inlined
def getJumpDecoder(handlerMappings): if(not handlerMappings.has_key(7)): x64dbg._plugin_logputs("Cannot get jump decoder, i7 is missing...") return None handler = handlerMappings[7] handler += 0x9 #This is where the decoder is called... instrBuffer = Read(handler, 10) decomposedInstructions = distorm3.Decompose(handler, instrBuffer) decryptCall = decomposedInstructions[0] if (decryptCall.flowControl == "FC_CALL"): return decryptCall.operands[0].value; x64dbg._plugin_logputs("Failed to find jump decoder, could not find call to jump decoder in i7 handler") return None
def Decompose(self, instructions=10, size=None): """A generator for instructions of this object. How much to decompose is can be specified either by the total number of instructions or the total size to decompose. Args: instructions: Stop after reaching this many instructions. The parameter is ignored when size is specified. size: Stop after decoding this much data. If specified we ignore the instructions parameter. """ overlap = 0x1000 data = '' offset = self.obj_offset count = 0 while 1: data = self.obj_vm.read(offset, overlap) # This could happen if we hit an unmapped page - we just # abort. if not data: return op = obj.NoneObject() for op in distorm3.Decompose(offset, data, self.distorm_mode): if op.address - offset > len(data) - 40: break if not op.valid: continue # Exit if we read as much as was required. if size is not None and op.address - self.obj_offset > size: return yield op if size is None and count > instructions: return count += 1 offset = op.address
def find_rr_writes_distorm3(address, data): writes = [] for insn in distorm3.Decompose(address, data, type=distorm3.Decode64Bits): if insn.mnemonic[:3] == 'RET': break if insn.mnemonic[:3] != 'MOV': continue # potential write opnd = insn.operands[0] if opnd.type != 'AbsoluteMemory' or opnd.index is None: continue # Absolute mov, with target that is register-based if distorm3.Registers[opnd.index] != 'RIP': continue # RIP-relative write, this is what we are looking for # distorm3 opnd.size is measured in bits, need to adjust to bytes writes.append((insn.address + insn.size + opnd.disp, opnd.size / 8)) return writes
def parse(self, start_va): start_rva = start_va basic_block = distorm3.Decompose(0x0, binascii.hexlify( self.execute_section_data[start_rva:start_rva+self.MAX_DECODE_SIZE]) .decode('hex'), distorm3.Decode32Bits, distorm3.DF_STOP_ON_FLOW_CONTROL) try: if len(basic_block) >= 1: basic_block_size = 0 for inst in basic_block: basic_block_size += inst.size inst.address += start_rva self.inst_map[inst.address] = inst self.handleConrolFlow(basic_block_size, basic_block[-1]) else: self.removeInstructionFromMap(start_rva) print("Cannot Parse Addr [0x{:x}]").format(start_rva) except IndexError: self.removeInstructionFromMap(start_rva) print IndexError
def __init__(self, offset, code, type=distorm3.Decode32Bits, feature=0): """ @param 指令的地址 @param 指令的Opcode @param 32 or 64 bit code @param feature Possible settings for distrom3,暂时未使用 """ self.valid = False if SV.dissassm_type == 64: type = distorm3.Decode64Bits else: type = distorm3.Decode32Bits inst = distorm3.Decompose(offset, code, type, feature) if len(inst) == 1: self.Instruction = inst[0] if self.Instruction.valid: self.valid = True self.opcode_len = len(code) self.opcode_bytes = [] self.addr = offset for x in code: self.opcode_bytes.append(ord(x)) self._len = len(self.Instruction.operands) + 1
def __init__(self, offset, code, type=distorm3.Decode32Bits, feature=0): """ @param offset Address of the instruction @param code Opcode bytes of the instruction @param type Dissassemble 32 or 64 bit code @param feature Possible settings for distrom3 not used at the moment """ self.valid = False if SV.dissassm_type == 64: type = distorm3.Decode64Bits else: type = distorm3.Decode32Bits inst = distorm3.Decompose(offset, code, type, feature) if len(inst) == 1: self.Instruction = inst[0] if self.Instruction.valid: self.valid = True self.opcode_len = len(code) self.opcode_bytes = [] self.addr = offset for x in code: self.opcode_bytes.append(ord(x)) self._len = len(self.Instruction.operands) + 1
def get_port_uv(s_off, offset, code, id): # create kernel for iaca insns = distorm3.Decompose(s_off, code[offset:], distorm3.Decode64Bits, distorm3.DF_STOP_ON_FLOW_CONTROL) if len(insns) == 1: return numpy.zeros(8), 0 f = open("/tmp/smother.kernel" + str(id), "w") f.write(iaca_start) for insn in insns[:-1]: f.write(insn.instructionBytes) f.write(iaca_end) f.close() # run iaca try: iaca_stats = subprocess.check_output([ os.path.join(install_dir, "iaca"), "-reduceout", "/tmp/smother.kernel" + str(id) ], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: print "exitcode", e.returncode, "output:", e.output return numpy.zeros(8), 0 # parse iaca output port_fingerprint = numpy.zeros(8) for line in iaca_stats.split("\n"): line = line.split() if len(line) > 1 and line[1] == "Cycles": port_fingerprint[0] = float(line[3]) port_fingerprint[1] = float(line[6]) port_fingerprint[2] = float(line[8]) port_fingerprint[3] = float(line[11]) port_fingerprint[4] = float(line[14]) port_fingerprint[5] = float(line[16]) port_fingerprint[6] = float(line[18]) port_fingerprint[7] = float(line[20]) break return port_fingerprint, len(insns) - 1
def call_scan(self, addr_space, base_address, data): """Disassemble a block of data and yield possible calls to imported functions. We're looking for instructions such as these: x86: CALL DWORD [0x1000400] JMP DWORD [0x1000400] x64: CALL QWORD [RIP+0x989d] On x86, the 0x1000400 address is an entry in the IAT or call table. It stores a DWORD which is the location of the API function being called. On x64, the 0x989d is a relative offset from the current instruction (RIP). @param addr_space: an AS to scan with @param base_address: memory base address @param data: buffer of data found at base_address """ end_address = base_address + len(data) memory_model = addr_space.profile.metadata.get('memory_model', '32bit') if memory_model == '32bit': mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits for op in distorm3.Decompose(base_address, data, mode): if not op.valid: continue iat_loc = None if memory_model == '32bit': if (self._call_or_unc_jmp(op) and op.operands[0].type == 'AbsoluteMemoryAddress'): iat_loc = (op.operands[0].disp) & 0xffffffff else: if (self._call_or_unc_jmp(op) and 'FLAG_RIP_RELATIVE' in op.flags and op.operands[0].type == 'AbsoluteMemory'): iat_loc = op.address + op.size + op.operands[0].disp if (not iat_loc or (iat_loc < base_address) or (iat_loc > end_address)): continue # This is the address being called call_dest = obj.Object("address", offset=iat_loc, vm=addr_space) if call_dest == None: continue yield op.address, iat_loc, int(call_dest)
def copy_data_block(self, full_addr): """This function emulates nt!KdCopyDataBlock on a live machine by finding the encoded KDBG structure and using the required entropy values to decode it.""" sizes = self.unique_sizes() alignment = 8 addr_space = self.obj_vm bits = distorm3.Decode64Bits # nt!KdCopyDataBlock is about 100 bytes, we don't want to read # too little and truncate the function, but too much will reach # into other function's space code = addr_space.read(full_addr, 300) # potentially we crossed a boundary into swapped or unallocated space if code == None: return obj.NoneObject("Crossed a code boundary") found_size = False for size in sizes: val = struct.pack("I", size / alignment) if code.find(val) != -1: found_size = True break if not found_size: return obj.NoneObject("Cannot find KDBG size signature") version = (addr_space.profile.metadata.get('major', 0), addr_space.profile.metadata.get('minor', 0)) if version < (6, 4): # we don't perform this check for Windows 10.x found_str = False for size in sizes: val = struct.pack("I", size) if code.find(val) != -1: found_str = True break if not found_str: return obj.NoneObject("Cannot find KDBG size signature") ops = list(distorm3.Decompose(full_addr, code, bits)) # nt!KdDebuggerDataBlock kdbg_block = None # nt!KiWaitNever wait_never = None # nt!KiWaitAlways wait_always = None # nt!KdpDataBlockEncoded block_encoded = None # collect instructions up to the first RET before_ret = [] # we need a bswap instruction to be valid found_bswap = False for op in ops: if op.mnemonic == "BSWAP": found_bswap = True elif op.mnemonic == "RET": break else: before_ret.append(op) if not found_bswap: return obj.NoneObject("No bswap instruction found") for op in before_ret: # cmp cs:KdpDataBlockEncoded, 0 if (not (block_encoded or kdbg_block or wait_never or wait_always) and op.mnemonic == "CMP" and op.operands[0].type == "AbsoluteMemory" and op.operands[1].type == "Immediate" and op.operands[1].value == 0): # an x64 RIP turned absolute offset = op.address + op.size + op.operands[0].disp block_encoded = obj.Object("unsigned char", offset=offset, vm=addr_space) # lea rdx, KdDebuggerDataBlock elif (not (kdbg_block or wait_never or wait_always) and op.mnemonic == "LEA" and op.operands[0].type == "Register" and op.operands[0].size == 64 and op.operands[1].type == "AbsoluteMemory" and op.operands[1].dispSize == 32): kdbg_block = op.address + op.size + op.operands[1].disp # mov r10, cs:KiWaitNever elif (not (wait_never or wait_always) and op.mnemonic == "MOV" and op.operands[0].type == "Register" and op.operands[0].size == 64 and op.operands[1].type == "AbsoluteMemory" and op.operands[1].dispSize == 32): offset = op.address + op.size + op.operands[1].disp wait_never = obj.Object("unsigned long long", offset=offset, vm=addr_space) # mov r11, cs:KiWaitAlways (Win 8 x64) # xor rdx, cs:KiWaitAlways (Win 8.1 x64) elif (not wait_always and op.mnemonic in ["MOV", "XOR"] and op.operands[0].type == "Register" and op.operands[0].size == 64 and op.operands[1].type == "AbsoluteMemory" and op.operands[1].dispSize == 32): offset = op.address + op.size + op.operands[1].disp wait_always = obj.Object("unsigned long long", offset=offset, vm=addr_space) break # check if we've found all the required offsets if (block_encoded != None and kdbg_block != None and wait_never != None and wait_always != None): # some acquisition tools decode the KDBG block but leave # nt!KdpDataBlockEncoded set, so we handle it here. tag_offset = addr_space.profile.get_obj_offset( "_DBGKD_DEBUG_DATA_HEADER64", "OwnerTag") signature = addr_space.read(kdbg_block + tag_offset, 4) if block_encoded == 1 and signature != "KDBG": vals = block_encoded, kdbg_block, wait_never, wait_always data = self.decode_kdbg(vals) buff = addrspace.BufferAddressSpace( config=addr_space.get_config(), base_offset=kdbg_block, data=data) kdbg = obj.Object("_KDDEBUGGER_DATA64", offset=kdbg_block, vm=buff, native_vm=addr_space) else: kdbg = obj.Object("_KDDEBUGGER_DATA64", offset=kdbg_block, vm=addr_space) kdbg.newattr('KdCopyDataBlock', full_addr) kdbg.newattr('block_encoded', block_encoded == 1 and signature != "KDBG") kdbg.newattr('wait_never', wait_never) kdbg.newattr('wait_always', wait_always) if kdbg.Header.OwnerTag == 0x4742444b: return kdbg return obj.NoneObject("Cannot find decoding entropy values")
def copy_data_block(self, full_addr): """This function emulates nt!KdCopyDataBlock on a live machine by finding the encoded KDBG structure and using the required entropy values to decode it.""" # this unpacks the kdbgsize from the signature header = obj.VolMagic(self.obj_vm).KDBGHeader.v() kdbg_size = struct.unpack("<H", header[-2:])[0] size_str = struct.pack("I", kdbg_size) alignment = 8 addr_space = self.obj_vm bits = distorm3.Decode64Bits # nt!KdCopyDataBlock is about 100 bytes, we don't want to read # too little and truncate the function, but too much will reach # into other function's space code = addr_space.read(full_addr, 300) # potentially we crossed a boundary into swapped or unallocated space if code == None: return obj.NoneObject("Crossed a code boundary") if (code.find(struct.pack("I", kdbg_size / alignment)) == -1 or code.find(size_str) == -1): return obj.NoneObject("Cannot find KDBG size signature") ops = list(distorm3.Decompose(full_addr, code, bits)) # nt!KdDebuggerDataBlock kdbg_block = None # nt!KiWaitNever wait_never = None # nt!KiWaitAlways wait_always = None # nt!KdpDataBlockEncoded block_encoded = None for op in ops: # cmp cs:KdpDataBlockEncoded, 0 if (not block_encoded and op.mnemonic == "CMP" and op.operands[0].type == "AbsoluteMemory" and op.operands[1].type == "Immediate" and op.operands[1].value == 0): # an x64 RIP turned absolute offset = op.address + op.size + op.operands[0].disp block_encoded = obj.Object("unsigned char", offset=offset, vm=addr_space) # lea rdx, KdDebuggerDataBlock elif (not kdbg_block and op.mnemonic == "LEA" and op.operands[0].type == "Register" and op.operands[0].size == 64 and op.operands[1].type == "AbsoluteMemory" and op.operands[1].dispSize == 32): kdbg_block = op.address + op.size + op.operands[1].disp # mov r10, cs:KiWaitNever elif (not wait_never and op.mnemonic == "MOV" and op.operands[0].type == "Register" and op.operands[0].size == 64 and op.operands[1].type == "AbsoluteMemory" and op.operands[1].dispSize == 32): offset = op.address + op.size + op.operands[1].disp wait_never = obj.Object("unsigned long long", offset=offset, vm=addr_space) # mov r11, cs:KiWaitAlways (Win 8 x64) # xor rdx, cs:KiWaitAlways (Win 8.1 x64) elif (not wait_always and op.mnemonic in ["MOV", "XOR"] and op.operands[0].type == "Register" and op.operands[0].size == 64 and op.operands[1].type == "AbsoluteMemory" and op.operands[1].dispSize == 32): offset = op.address + op.size + op.operands[1].disp wait_always = obj.Object("unsigned long long", offset=offset, vm=addr_space) break elif op.mnemonic == "RET": break # check if we've found all the required offsets if (block_encoded != None and kdbg_block != None and wait_never != None and wait_always != None): # some acquisition tools decode the KDBG block but leave # nt!KdpDataBlockEncoded set, so we handle it here. tag_offset = addr_space.profile.get_obj_offset( "_DBGKD_DEBUG_DATA_HEADER64", "OwnerTag") signature = addr_space.read(kdbg_block + tag_offset, 4) if block_encoded == 1 and signature != "KDBG": vals = block_encoded, kdbg_block, wait_never, wait_always data = self.decode_kdbg(vals) buff = addrspace.BufferAddressSpace( config=addr_space.get_config(), base_offset=kdbg_block, data=data) kdbg = obj.Object("_KDDEBUGGER_DATA64", offset=kdbg_block, vm=buff, native_vm=addr_space) else: kdbg = obj.Object("_KDDEBUGGER_DATA64", offset=kdbg_block, vm=addr_space) kdbg.newattr('KdCopyDataBlock', full_addr) kdbg.newattr('block_encoded', block_encoded == 1 and signature != "KDBG") kdbg.newattr('wait_never', wait_never) kdbg.newattr('wait_always', wait_always) return kdbg else: return obj.NoneObject("Cannot find decoding entropy values")
def isInlined(self, model, distorm_mode, func_addr, kernel_syms, kmods): inlined = False #modified malware/apihooks.py/check_inline function data = self.addr_space.read(func_addr, 24) # Number of instructions disassembled so far n = 0 # Destination address of hooks d = None # Save the last PUSH before a CALL push_val = None # Save the general purpose registers regs = {} ops = [] for op in distorm3.Decompose(func_addr, data, distorm_mode): ops.append(op) for op in distorm3.Decompose(func_addr, data, distorm_mode): # Quit the loop when we have three instructions or when # a decomposition error is encountered, whichever is first. if not op.valid or n == 3: break if op.flowControl == 'FC_CALL': # Clear the push value if push_val: push_val = None if op.mnemonic == "CALL" and op.operands[ 0].type == 'AbsoluteMemoryAddress': # Check for CALL [ADDR] if model == '32bit': const = op.operands[0].disp & 0xFFFFFFFF d = obj.Object("unsigned int", offset=const, vm=addr_space) else: const = op.operands[0].disp d = obj.Object("unsigned long long", offset=const, vm=addr_space) if self.outside_module(d, kernel_syms, kmods): break elif op.operands[0].type == 'Immediate': # Check for CALL ADDR d = op.operands[0].value if self.outside_module(d, kernel_syms, kmods): break elif op.operands[0].type == 'Register': # Check for CALL REG d = regs.get(op.operands[0].name) if d and self.outside_module(d, kernel_syms, kmods): break elif op.flowControl == 'FC_UNC_BRANCH' and op.mnemonic == "JMP": # Clear the push value if push_val: push_val = None if op.size > 2: if op.operands[0].type == 'AbsoluteMemoryAddress': # Check for JMP [ADDR] if model == '32bit': const = op.operands[0].disp & 0xFFFFFFFF d = obj.Object("unsigned int", offset=const, vm=addr_space) else: const = op.operands[0].disp d = obj.Object("long long", offset=const, vm=addr_space) if self.outside_module(d, kernel_syms, kmods): break elif op.operands[0].type == 'Immediate': # Check for JMP ADDR d = op.operands[0].value if self.outside_module(d, kernel_syms, kmods): break elif op.size == 2 and op.operands[0].type == 'Register': # Check for JMP REG d = regs.get(op.operands[0].name) if d and self.outside_module(d, kernel_syms, kmods): break elif op.flowControl == 'FC_NONE': # Check for PUSH followed by a RET if (op.mnemonic == "PUSH" and op.operands[0].type == 'Immediate' and op.size == 5): # Set the push value push_val = op.operands[0].value # Check for moving immediate values into a register if (op.mnemonic == "MOV" and op.operands[0].type == 'Register' and op.operands[1].type == 'Immediate'): # Clear the push value if push_val: push_val = None # Save the value put into the register regs[op.operands[0].name] = op.operands[1].value elif op.flowControl == 'FC_RET': if push_val: d = push_val if self.outside_module(d, kernel_syms, kmods): break # This causes us to stop disassembling when # reaching the end of a function break n += 1 # filtering out false positives due to structs, you can tweak this as needed if d and self.outside_module(d, kernel_syms, kmods) == True and str( ops[n + 1].mnemonic) not in ["DB 0xff", "ADD", "XCHG", "OUTS"]: inlined = True return (inlined, d)
def isCallReferenceModified(self, model, distorm_mode, func_addr, kernel_syms, kmods): # check if CALL targets are within the kernel/kext range to detect possible call reference modification modified = False #modified malware/apihooks.py/check_inline function data = self.addr_space.read(func_addr, 750) # Number of instructions disassembled so far n = 0 # Destination address of hooks d = None # Save the last PUSH before a CALL push_val = None # Save the general purpose registers regs = {} ops = [] for op in distorm3.Decompose(func_addr, data, distorm_mode): ops.append(op) for op in distorm3.Decompose(func_addr, data, distorm_mode): # Quit when a decomposition error is encountered # or when reach function end if not op.valid or op.mnemonic == "NOP": break if op.flowControl == 'FC_CALL': # Clear the push value if push_val: push_val = None if op.mnemonic == "CALL" and op.operands[ 0].type == 'AbsoluteMemoryAddress': # Check for CALL [ADDR] if model == '32bit': const = op.operands[0].disp & 0xFFFFFFFF d = obj.Object("unsigned int", offset=const, vm=self.addr_space) else: const = op.operands[0].disp d = obj.Object("unsigned long long", offset=const, vm=self.addr_space) if self.outside_module(d, kernel_syms, kmods): break elif op.operands[0].type == 'Immediate': # Check for CALL ADDR d = op.operands[0].value if self.outside_module(d, kernel_syms, kmods): break elif op.operands[0].type == 'Register': # Check for CALL REG d = regs.get(op.operands[0].name) if d and self.outside_module(d, kernel_syms, kmods): break n += 1 # filtering out false positives due to structs, you can tweak this as needed if d and self.outside_module(d, kernel_syms, kmods) == True and str( ops[n + 1].mnemonic) not in ["DB 0xff", "ADD", "XCHG", "OUTS"]: modified = True return (modified, d)
def check_syscall(addr_space, module, module_group): """ Enumerate syscall hooks in ntdll.dll. A syscall hook is one that modifies the function prologue of an NT API function (i.e. ntdll!NtCreateFile) or swaps the location of the sysenter with a malicious address. @param addr_space: a process AS for the process containing the ntdll.dll module. @param module: the _LDR_DATA_TABLE_ENTRY for ntdll.dll @param module_group: a ModuleGroup instance for the process. """ # Resolve the real location of KiFastSystem Call for comparison KiFastSystemCall = module.getprocaddress("KiFastSystemCall") KiIntSystemCall = module.getprocaddress("KiIntSystemCall") if not KiFastSystemCall or not KiIntSystemCall: #debug.debug("Abort check_syscall, can't find KiFastSystemCall") return # Add the RVA to make it absolute KiFastSystemCall += module.DllBase KiIntSystemCall += module.DllBase # Check each exported function if its an NT syscall for _, f, n in module.exports(): # Ignore forwarded exports if not f: #debug.debug("Skipping forwarded export {0}".format(n or '')) continue function_address = module.DllBase + f if not addr_space.is_valid_address(function_address): #debug.debug("Function address {0:#x} for {1} is paged".format( # function_address, n or '')) continue # Read enough of the function prologue for two instructions data = addr_space.zread(function_address, 24) instructions = [] for op in distorm3.Decompose(function_address, data, distorm3.Decode32Bits): if not op.valid: break if len(instructions) == 3: break instructions.append(op) i0 = instructions[0] i1 = instructions[1] i2 = instructions[2] # They both must be properly decomposed and have two operands if (not i0 or not i0.valid or len(i0.operands) != 2 or not i1 or not i1.valid or len(i1.operands) != 2): #debug.debug("Error decomposing prologue for {0} at {1:#x}".format( # n or '', function_address)) continue # Now check the instruction and operand types if (i0.mnemonic == "MOV" and i0.operands[0].type == 'Register' and i0.operands[0].name == 'EAX' and i0.operands[1].type == 'Immediate' and i1.mnemonic == "MOV" and i1.operands[0].type == 'Register' and i1.operands[0].name == 'EDX' and i0.operands[1].type == 'Immediate'): if i2.operands[0].type == "Register": # KiFastSystemCall is already in the register syscall_address = i1.operands[1].value else: # Pointer to where KiFastSystemCall is stored syscall_address = obj.Object('address', offset=i1.operands[1].value, vm=addr_space) if syscall_address not in [KiFastSystemCall, KiIntSystemCall]: hook_module = module_group.find_module(syscall_address) hook = Hook( hook_type=HOOKTYPE_NT_SYSCALL, hook_mode=HOOK_MODE_USER, function_name=n or '', function_address=function_address, hook_address=syscall_address, hook_module=hook_module, victim_module=module, ) # Add the bytes that will later be disassembled in the # output to show exactly how the hook works. The first # hop is the ntdll!Nt* API and the next hop is the rootkit. hook.add_hop_chunk(function_address, data) hook.add_hop_chunk(syscall_address, addr_space.zread(syscall_address, 24)) yield hook
def check_inline(va, addr_space, mem_start, mem_end): """ Check for inline API hooks. We check for direct and indirect calls, direct and indirect jumps, and PUSH/RET combinations. @param va: the virtual address of the function to check @param addr_space: process or kernel AS where the function resides @param mem_start: base address of the module containing the function being checked. @param mem_end: end address of the module containing the func being checked. @returns: a tuple of (hooked, data, hook_address) """ data = addr_space.zread(va, 24) if data == "\x00" * len(data): #debug.debug("Cannot read function prologue at {0:#x}".format(va)) return None outside_module = lambda x: x != None and (x < mem_start or x > mem_end) # Number of instructions disassembled so far n = 0 # Destination address of hooks d = None # Save the last PUSH before a CALL push_val = None # Save the general purpose registers regs = {} for op in distorm3.Decompose(va, data, distorm3.Decode32Bits): # Quit the loop when we have three instructions or when # a decomposition error is encountered, whichever is first. if not op.valid or n == 3: break if op.flowControl == 'FC_CALL': # Clear the push value if push_val: push_val = None if op.mnemonic == "CALL" and op.operands[ 0].type == 'AbsoluteMemoryAddress': # Check for CALL [ADDR] const = op.operands[0].disp & 0xFFFFFFFF d = obj.Object("unsigned int", offset=const, vm=addr_space) if outside_module(d): break elif op.operands[0].type == 'Immediate': # Check for CALL ADDR d = op.operands[0].value & 0xFFFFFFFF if outside_module(d): break elif op.operands[0].type == 'Register': # Check for CALL REG d = regs.get(op.operands[0].name) if d and outside_module(d): break elif op.flowControl == 'FC_UNC_BRANCH' and op.mnemonic == "JMP": # Clear the push value if push_val: push_val = None if op.size > 2: if op.operands[0].type == 'AbsoluteMemoryAddress': # Check for JMP [ADDR] const = op.operands[0].disp & 0xFFFFFFFF d = obj.Object("unsigned int", offset=const, vm=addr_space) if outside_module(d): break elif op.operands[0].type == 'Immediate': # Check for JMP ADDR d = op.operands[0].value & 0xFFFFFFFF if outside_module(d): break elif op.size == 2 and op.operands[0].type == 'Register': # Check for JMP REG d = regs.get(op.operands[0].name) if d and outside_module(d): break elif op.flowControl == 'FC_NONE': # Check for PUSH followed by a RET if (op.mnemonic == "PUSH" and op.operands[0].type == 'Immediate' and op.size == 5): # Set the push value push_val = op.operands[0].value & 0xFFFFFFFF # Check for moving imm values into a register if (op.mnemonic == "MOV" and op.operands[0].type == 'Register' and op.operands[1].type == 'Immediate'): # Clear the push value if push_val: push_val = None # Save the value put into the register regs[op.operands[0].name] = op.operands[1].value elif op.flowControl == 'FC_RET': if push_val: d = push_val if outside_module(d): break # This causes us to stop disassembling when # reaching the end of a function break n += 1 # Check EIP after the function prologue if outside_module(d): return True, data, d else: return False, data, d
def findcookie(self, kernel_space): """Find and read the nt!ObHeaderCookie value. On success, return True and save the cookie value in self._cookie. On Failure, return False. This method must be called before performing any tasks that require object header validation including handles, psxview (due to pspcid) and the object scanning plugins (psscan, etc). NOTE: this cannot be implemented as a volatility "magic" class, because it must be persistent across various classes and sources. We don't want to recalculate the cookie value multiple times. """ meta = kernel_space.profile.metadata vers = (meta.get("major", 0), meta.get("minor", 0)) # this algorithm only applies to Windows 10 or greater if vers < (6, 4): return True # prevent subsequent attempts from recalculating the existing value if self._cookie: return True if not has_distorm: debug.warning("distorm3 module is not installed") return False kdbg = tasks.get_kdbg(kernel_space) if not kdbg: debug.warning("Cannot find KDBG") return False nt_mod = None for mod in kdbg.modules(): nt_mod = mod break if nt_mod == None: debug.warning("Cannot find NT module") return False addr = nt_mod.getprocaddress("ObGetObjectType") if addr == None: debug.warning("Cannot find nt!ObGetObjectType") return False # produce an absolute address by adding the DLL base to the RVA addr += nt_mod.DllBase if not nt_mod.obj_vm.is_valid_address(addr): debug.warning("nt!ObGetObjectType at {0} is invalid".format(addr)) return False # in theory...but so far we haven't tested 32-bits model = meta.get("memory_model") if model == "32bit": mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits data = nt_mod.obj_vm.read(addr, 100) ops = distorm3.Decompose(addr, data, mode, distorm3.DF_STOP_ON_RET) addr = None # search backwards from the RET and find the MOVZX if model == "32bit": # movzx ecx, byte ptr ds:_ObHeaderCookie for op in reversed(ops): if (op.size == 7 and 'FLAG_DST_WR' in op.flags and len(op.operands) == 2 and op.operands[0].type == 'Register' and op.operands[1].type == 'AbsoluteMemoryAddress' and op.operands[1].size == 8): addr = op.operands[1].disp & 0xFFFFFFFF break else: # movzx ecx, byte ptr cs:ObHeaderCookie for op in reversed(ops): if (op.size == 7 and 'FLAG_RIP_RELATIVE' in op.flags and len(op.operands) == 2 and op.operands[0].type == 'Register' and op.operands[1].type == 'AbsoluteMemory' and op.operands[1].size == 8): addr = op.address + op.size + op.operands[1].disp break if not addr: debug.warning("Cannot find nt!ObHeaderCookie") return False if not nt_mod.obj_vm.is_valid_address(addr): debug.warning("nt!ObHeaderCookie at {0} is not valid".format(addr)) return False cookie = obj.Object("unsigned int", offset=addr, vm=nt_mod.obj_vm) self._cookie = int(cookie) return True