def distormDisassemble(filename, code, bits, countStatisticsResult=False): # start function variables distormOpcodeDict = dict() # key = opcode, value = count distormFilename = filename + ".opcode" distormOpcodeList = [] # Output file for opcodes distormFile = open(distormFilename, 'w') #distormFile.write(filename+";") # define distorm arguments offset = 0 length = None if bits == '16bit': mode = distorm3.Decode16Bits elif bits == '32bit': mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits # Decoded instruction iterable = distorm3.DecodeGenerator(offset, code, mode) for (offset, size, instruction, hexdump) in iterable: print("%.8x: %-32s %s" % (offset, hexdump, instruction)) distormFile.write(instruction + "\n") # write opcode to file print filename + " is disassembled." return distormOpcodeList
def Distorm3Decoder(self, address, data): """ @package : Distorm3 """ import distorm3 return distorm3.DecodeGenerator(address, data, distorm3.Decode32Bits)
def get_info(fileName): try: pe = pefile.PE(fileName) op_list_count = {} for section in pe.sections: flags = [] for flag in sorted(section_flags): if getattr(section, flag[0]): flags.append(flag[0]) if 'IMAGE_SCN_MEM_EXECUTE' in flags: iterable = distorm3.DecodeGenerator(0, section.get_data(), distorm3.Decode32Bits) for (offset, size, instruction, hexdump) in iterable: op_code = instruction.split()[0] op_code = str(op_code).lstrip('b') op_code = str(op_code).replace("'", "") if op_code not in op_list_count.keys(): op_list_count[op_code] = 1 elif op_code in op_list_count.keys(): op_list_count[op_code] = op_list_count[op_code] + 1 pe.parse_data_directories( pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']) insert_test_doc.update(op_list_count) print("[+] insert_test_doc =====> ", insert_test_doc) return insert_test_doc except: print("[+] GET_MAL_INFO ERROR")
def disassembly_only(self): lines = [] full = hashlib.md5() full.update(self.BootCode) partial = self.BootCode p = hashlib.md5() iterable = distorm3.DecodeGenerator(0, self.BootCode, distorm3.Decode16Bits) ret = "" for (offset, size, instruction, hexdump) in iterable: ret += "0x%.8x: %-32s %s\n" % (offset, hexdump, instruction) if instruction == "RET": partial = self.BootCode[0:offset + size] hexstuff = "\n" + "\n".join([ "{0:#010x}: {1:<48} {2}".format(o, h, ''.join(c)) for o, h, c in self.Hexdump(self.BootCode[offset + size:], offset + size) ]) ret += hexstuff break p.update(partial) lines.append("Bootcode md5 (up to RET): {0}".format(p.hexdigest())) lines.append("Full Bootcode md5: {0}\n".format( full.hexdigest())) lines.append(ret) return lines
def dis(address, length=128, space=None, mode=None): """Disassemble code at a given address. Disassembles code starting at address for a number of bytes given by the length parameter (default: 128). Note: This feature requires distorm, available at http://www.ragestorm.net/distorm/ The mode is '32bit' or '64bit'. If not supplied, the disasm mode is taken from the profile. """ if not sys.modules.has_key("distorm3"): print "ERROR: Disassembly unavailable, distorm not found" return if not space: space = self.eproc.get_process_address_space() if not mode: mode = space.profile.metadata.get('memory_model', '32bit') if mode == '32bit': distorm_mode = distorm3.Decode32Bits else: distorm_mode = distorm3.Decode64Bits data = space.read(address, length) iterable = distorm3.DecodeGenerator(address, data, distorm_mode) for (offset, _size, instruction, hexdump) in iterable: print "{0:<#8x} {1:<32} {2}".format(offset, hexdump, instruction)
def get_info(): pe = pefile.PE('calc.exe') op_list_count = {} for section in pe.sections: flags = [] for flag in sorted(section_flags): if getattr(section, flag[0]): flags.append(flag[0]) if 'IMAGE_SCN_MEM_EXECUTE' in flags: iterable = distorm3.DecodeGenerator(0, section.get_data(), distorm3.Decode32Bits) for (offset, size, instruction, hexdump) in iterable: op_code = instruction.split()[0] # print("11111111111111 ",op_code) op_code = str(op_code).lstrip('b') op_code = str(op_code).replace("\'", "") # print("222222222222222 ",op_code) if op_code not in op_list_count.keys(): # op_list_count[op_code.replace("'","")] = 1 op_list_count[op_code] = 1 elif op_code in op_list_count.keys(): op_list_count[op_code] = op_list_count[op_code] + 1 print(op_list_count) print(type(op_list_count)) return op_list_count
def diassemble(self, filename, bits='32bit'): """ Disassembly executable file return iterable instruction set. :param filename : Executable file path :type filename: str :param bits : File platform 16, 32 or 64. :type bits : str [16bit, 32bit, 64bit] (default:32bit) :return: assembly code iterator: :rtype: iterator """ # Read file content as binary with open(filename, 'rb') as input_file: code = input_file.read() # define distorm arguments offset = 0 # define distorm mode 16, 32 or 64 bits if bits == '16bit': mode = distorm3.Decode16Bits elif bits == '32bit': mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits # Decode assembly instructions iterable = distorm3.DecodeGenerator(offset, code, mode) return iterable
def calculate(self): eip = self.core.functions.gr("eip") space = self.core.current_EPROCESS.get_process_address_space() factor = 0x20 # while(factor > 0x0): # try: data = space.read(eip, factor) iterable = distorm3.DecodeGenerator(eip, data, distorm3.Decode32Bits) _, size, instruction, _ = iterable.next() # except Exception: # print("reducing") # factor -= 0x1 # continue if (instruction.find("CALL ") > -1): # if True: neip = eip + size # self.core.functions.update_EPROCESS.calculate() # self.core.gshell.log("test") # self.core.bp_index.addBpt(Breakpoint(neip), self.core.current_EPROCESS) # gdb.execute("cont") # self.core.bp_index.delBpt(neip) gdb.execute("until *0x%x" % neip) else: self.core.functions.si() # we need to update EPROCESS self.core.functions.uce.calculate()
def dis(self, address=0, length=128, code=None, mode=None): """Disassemble code at a given address. Disassembles code starting at address for a number of bytes given by the length parameter (default: 128). Note: This feature requires distorm, available at http://www.ragestorm.net/distorm/ The mode is '16bit', '32bit' or '64bit'. If not supplied, the disasm mode is taken from the profile. """ if not sys.modules.has_key("distorm3"): print "ERROR: Disassembly unavailable, distorm not found" return data = code # if mode == None: # mode = space.profile.metadata.get('memory_model', '32bit') # we'll actually allow the possiblility that someone passed a correct mode # if mode not in [distorm3.Decode16Bits, distorm3.Decode32Bits, distorm3.Decode64Bits]: # if mode == '16bit': # mode = distorm3.Decode16Bits # elif mode == '32bit': mode = distorm3.Decode32Bits # else: # mode = distorm3.Decode64Bits distorm_mode = mode iterable = distorm3.DecodeGenerator(address, data, distorm_mode) for (offset, _size, instruction, hexdump) in iterable: # print "{0:<#8x} {1:<32} {2}".format(offset, hexdump, instruction) yield offset, hexdump, instruction
def _get_opcodes(self, data): opcode = [] for (offset, size, instruction, hexdump) in distorm3.DecodeGenerator(self._offset, data, self._options): opcode.append('%.8x: %-40s %s' % (offset, hexdump, instruction.lower())) return opcode
def disasm_from_memory(memory_dump_path, pid, base_address, memory_len): """ Returns disassembly of a region from the memory """ data = get_memory_from_proc(memory_dump_path, pid, base_address, memory_len) iterable = distorm3.DecodeGenerator(base_address, data, distorm3.Decode32Bits) ret = "" for (offset, _size, instruction, hexdump) in iterable: ret += "{0:<#8x} {1:<32} {2}".format(offset, hexdump, instruction) return ret
def Disasm(self, address, data): ''' Disassemble the binary data stream @Param address - index address (int type) @Param data - binary data ''' ins = distorm3.DecodeGenerator(address, data, distorm3.Decode32Bits) for (offset, _size, instruction, hexdump) in ins: print "{0:<#8x} {1:<32} {2}".format(offset, hexdump, instruction) return
def get_info(): #def get_info(filepath) pe = pefile.PE('calc.exe') op_list_count = {} section_name = {} api_list = [] for section in pe.sections: flags = [] # t = section for flag in sorted(section_flags): if getattr(section, flag[0]): flags.append(flag[0]) if 'IMAGE_SCN_MEM_EXECUTE' in flags: iterable = distorm3.DecodeGenerator(0, section.get_data(), distorm3.Decode32Bits) for (offset, size, instruction, hexdump) in iterable: #print("%.8x: %-32s %s" % (offset, hexdump, instruction)) op_code = instruction.split()[0] op_code = str(op_code).lstrip('b') #print (op_code) if op_code not in op_list_count.keys(): op_list_count[op_code] = 1 elif op_code in op_list_count.keys(): op_list_count[op_code] = op_list_count[op_code] + 1 for flag in sorted(section_flags): if getattr(section, flag[0]): flags.append(flag[0]) s_name1 = str(section.Name) #print(s_name1) s_name = re.sub(r"[b'|\\x00]", "", s_name1) if s_name == '.tet': s_name = '.text' #이거 자꾸 tet 나와서 부셔버릴뻔 #s_name = (re.split(r"[\b'|\x00]",s_name1)) #print(type(s_name)) section_name[s_name] = section.get_entropy() pe.parse_data_directories( pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']) try: for entry in pe.DIRECTORY_ENTRY_IMPORT: for imp in entry.imports: api_list.append(imp.name) except: pass try: for exp in pe.DIRECTORY_ENTRY_EXPORT.symbols: api_list.append(exp.name) except: pass #print (type(section_name)) dic #print (type(op_list_count)) dic #print (type(api_list)) list return section_name, op_list_count, api_list
def calculate(self, address=None, length=128, space=None, mode=None): """Disassemble code at a given address. Disassembles code starting at address for a number of bytes given by the length parameter (default: 128). Note: This feature requires distorm, available at http://www.ragestorm.net/distorm/ The mode is '32bit' or '64bit'. If not supplied, the disasm mode is taken from the profile. """ if (address == None): address = self.core.functions.gr("eip") if not space: space = self.core.current_EPROCESS.get_process_address_space() if not sys.modules.has_key("distorm3"): print "ERROR: Disassembly unavailable, distorm not found" return if not space: space = self.eproc.get_process_address_space() if not mode: mode = space.profile.metadata.get('memory_model', '32bit') if mode == '32bit': distorm_mode = distorm3.Decode32Bits else: distorm_mode = distorm3.Decode64Bits data = space.read(address, length) iterable = distorm3.DecodeGenerator(address, data, distorm_mode) lines = [] for (offset, _size, instruction, hexdump) in iterable: if (instruction.find("CALL ") > -1): try: op1 = instruction[5:] if (op1.find("DWORD ") == 0): op1 = op1[6:] dst = self.functions.dec_op1(op1) if (self.core.symbols_by_offset.has_key(int(dst))): target = self.core.symbols_by_offset[int(dst)] instruction = "CALL %s" % target except Exception: print(instruction) # lines.append((offset, hexdump, instruction)) lines.append((offset, instruction)) return lines
def _get_instructions(self, boot_code): if self._config.HEX: return "".join(["{2}".format(o, h, ''.join(c)) for o, h, c in self.Hexdump(boot_code, 0)]) iterable = distorm3.DecodeGenerator(0, boot_code, distorm3.Decode16Bits) ret = "" for (offset, size, instruction, hexdump) in iterable: ret += "{0}".format(instruction) if instruction == "RET": hexstuff = "".join(["{2}".format(o, h, ''.join(c)) for o, h, c in self.Hexdump(boot_code[offset + size:], 0)]) ret += hexstuff break return ret
def get_disasm_text(self, boot_code, start): iterable = distorm3.DecodeGenerator(0, boot_code, distorm3.Decode16Bits) ret = "" self.code_data = boot_code for (offset, size, instruction, hexdump) in iterable: ret += "{0:010x}: {1:<32} {2}\n".format(offset + start, hexdump, instruction) if instruction == "RET": self.code_data = boot_code[0:offset + size] hexstuff = "\n" + "\n".join(["{0:010x}: {1:<48} {2}".format(o, h, ''.join(c)) for o, h, c in self.Hexdump(boot_code[offset + size:], offset + start + size)]) ret += hexstuff break return ret
def main(): args = parse_args() offset = args.offset # Read the code from the file with open(args.file, "rb") as infp: code = infp.read() # Print each decoded instruction # This shows how to use the DecodeGenerator iterable = distorm3.DecodeGenerator(offset, code, args.dt) for (offset, size, instruction, hexdump) in iterable: print("%.8x: %-32s %s" % (offset, hexdump, instruction))
def Disassemble(self, data, start, pe, bits='32bit', stoponret=False): if not has_distorm3: raise StopIteration if bits == "32bit": mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits for address, _, code, hex_data in distorm3.DecodeGenerator(start, data, mode): if stoponret and i.startswith("RET"): raise StopIteration yield address, code, len(hex_data)/2
def Disassemble(self, data, start, bits='32bit', stoponret=False): if not has_distorm3: raise StopIteration if bits == '32bit': mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits for _, _, i, _ in distorm3.DecodeGenerator(start, data, mode): if stoponret and i.startswith("RET"): raise StopIteration yield i
def get_codes(self): ''' parse binary to extract access codes ''' p = log.progress('getting access codes...') if not self.binary: p.failure("missing binary") raise OhShit p.status('disassembling') disasm = {} iterable = distorm3.DecodeGenerator(0x0, self.binary, distorm3.Decode32Bits) for (offset, size, instruction, hexdump) in iterable: disasm[offset] = (size, instruction) #start = 0x000007b9 # this is where the check sequence starts start = 0x000007bf instructions = [] p.status('parsing instructions') # get interesting instructions for x in range(16): size, instr = disasm[start] instructions.append(instr) # hop 2 instructions start += size size, instr = disasm[start] start += size size, instr = disasm[start] start += size # parse interesting instructions for instr in instructions: # just checking it's != 0 if instr == 'TEST EAX, EAX': self.codes.append("1") # else we get the check value elif instr.startswith('CMP EAX, '): value = instr.split(" ")[-1] self.codes.append(str(eval(value))) p.success(' '.join(self.codes))
def remove_anti_debug(binary): patch = [0x83, 0xf8, 0xff, 0x90, 0x90] # cmp eax, 0xFFFFFFFF ep = binary.header.entrypoint text_section = binary.section_from_virtual_address(ep) code = "".join(map(chr, text_section.content)) iterable = distorm3.DecodeGenerator(text_section.virtual_address, code, distorm3.Decode32Bits) for (offset, size, instruction, hexdump) in iterable: if "CMP EAX, 0x3000" in instruction: # Patch 3d 00 30 00 00 binary.patch_address(offset, patch) print("[PATCH] %.8x: %-32s %s" % (offset, hexdump, instruction)) # Distorm didn't get this one binary.patch_address(0x804936B, patch)
def disassemble(self, address_space, entry_point): """ :param address_space: process's address space object :param entry_point: Start address :return: A string of the disassembled code Disassemble the 64 bytes of code by giving the process's address space and the start address """ entry_point = int(entry_point) content = address_space.read(entry_point, 64) # Check if we could have read from memory, might be paged if content: disassemble_code = "\t" disassemble_code += ("{0}\n\n".format("\n\t".join([ "{0:#010x} {1:<48} {2}".format(entry_point + o, h, ''.join(c)) for o, h, c in utils.Hexdump(content) ]))) disassemble_code += "\t" # Rather disassemble with distrom3 than malfind if has_distorm: # Get OS profile mode = address_space.profile.metadata.get('memory_model') if mode == '64bit': mode = distorm3.Decode64Bits else: mode = distorm3.Decode32Bits disassemble_code += "\n\t".join(["{0:<#010x} {1:<16} {2}".format(o, h, i) \ for o, _size, i, h in \ distorm3.DecodeGenerator(entry_point, content, mode)]) else: disassemble_code += "\n\t".join([ "{0:#010x} {1:<16} {2}".format(o, h, i) for o, i, h in malfind.Disassemble(content, entry_point) ]) disassemble_code += "\n" else: disassemble_code = "\t** Couldn't read memory\n" return disassemble_code
def Disassemble(data, start, bits='32bit'): """ Dissassemble code with distorm3. @param data: python byte str to decode @param start: address where `data` is found in memory @param bits: use 32bit or 64bit decoding @returns: tuple of (offset, instruction, hex bytes) """ if bits == '32bit': mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits for o, _, i, h in distorm3.DecodeGenerator(start, data, mode): yield o, i, h
def extract_opcodes(section, file_path, instr): ''' Extract opcodes from the file section sample: https://github.com/gdabah/distorm/blob/master/python/distorm3/sample.py ''' # iterable = distorm3.DecodeGenerator(offset, code, options.dt) iterable = distorm3.DecodeGenerator(section.PointerToRawData, open(file_path, 'rb').read(), distorm3.Decode32Bits) # print(next(iterable)) for (offset, size, instruction, hexdump) in iterable: # print('%.8x: %-32s %s' % (offset, hexdump, instruction)) if section.SizeOfRawData - section.PointerToRawData < offset + size: if instruction != 'INT 3' and instruction != 'NOP': instruction = (instruction.replace('INC', 'ADD')).replace( 'SUB', 'ADD') instr.append(instruction.split(' ')[0])
def pe_read_x_bytes_from_ep(file_path, bytes_to_read=20): IMAGE_NT_OPTIONAL_HDR32_MAGIC = hex(0x10b) IMAGE_NT_OPTIONAL_HDR64_MAGIC = hex(0x20b) try: pe = pefile.PE(file_path) ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint logging.info('Got EP: {}'.format(ep)) # data = pe.get_memory_mapped_image()[ep+READ_OFFSET:ep+READ_OFFSET+int(bytes_to_read)] with open(file_path, 'rb') as pefile_raw: data = pefile_raw.read()[ep + READ_OFFSET:ep + READ_OFFSET + int(bytes_to_read)] # Print each decoded instruction # This shows how to use the Deocode - Generator # Check PE arch. if 0x10b then if hex(pe.OPTIONAL_HEADER.Magic) == IMAGE_NT_OPTIONAL_HDR32_MAGIC: logging.info('[*] File is 32 bit') architechture = distorm3.Decode32Bits elif hex(pe.OPTIONAL_HEADER.Magic) == IMAGE_NT_OPTIONAL_HDR64_MAGIC: logging.info('[*] File is 64 bit') architechture = distorm3.Decode64Bits opcode_list = list() disasm_data = list() iterable = distorm3.DecodeGenerator(ep, data, architechture) for (offset, size, instruction, hexdump) in iterable: # print("%.8x: %-32s %s" % (offset, hexdump, instruction)) formatted_line = "%.8x: %-32s %s" % ( offset, hexdump.decode('utf-8'), instruction.decode('utf-8')) opcode_list.append(formatted_line) line = generalize(instruction.decode('utf-8')) disasm_data.append(line) return hashlib.sha256('|'.join(disasm_data).encode()).hexdigest() except PEFormatError as e: logging.error('error reading file %s: %s' % (file_path, e)) return 'failed' except AttributeError as e: logging.error('error reading file %s: %s' % (file_path, e)) return 'failed'
def get_disasm_text(self, boot_code, start): iterable = distorm3.DecodeGenerator(0, boot_code, distorm3.Decode16Bits) ret = "" self.code_data = boot_code for (offset, size, instruction, hexdump) in iterable: ret += f"{offset + start:010x}: {hexdump:<32} {instruction}\n" if instruction == "RET": self.code_data = boot_code[0:offset + size] hexstuff = "\n" + "\n".join([ f"{o:010x}: {h:<48} {''.join(c)}" for o, h, c in self.Hexdump(boot_code[offset + size:], offset + start + size) ]) ret += hexstuff break return ret
def Disassemble(self, instructions=10): """Generate some instructions.""" overlap = 0x100 data = '' offset = self.obj_offset count = 0 while True: if offset - self.obj_offset > len(data) - 40: data = self.obj_vm.read(offset, overlap) iterator = distorm3.DecodeGenerator(offset, data, self.distorm_mode) for (offset, _size, instruction, hexdump) in iterator: yield offset, hexdump, instruction count += 1 if count >= instructions: return
def SROP_findinstruction_at_va(baseaddr, code_start, code_size): global g_srop_linenumber instruction_node_map = {} rop_node_map = {} for offset in xrange(code_size): if offset in instruction_node_map: continue try: iterable = distorm3.DecodeGenerator(baseaddr + offset, code_start[offset:], distorm3.Decode32Bits) parent_node = None node_inserted = 0 for (va, size, instruction, hexdump) in iterable: hexdump = hexdump.upper() #print "%X:\t%-16s\t%s" % (va, hexdump.upper(), instruction) cur_offset = va - baseaddr node = instruction_node_map.get(cur_offset) if not node: node = InstructionNode(va, cur_offset, hexdump, instruction) else: node_inserted = 1 if parent_node: node.parents.append(parent_node) parent_node.child = node parent_node = node if node_inserted: break if g_srop_discernfunc(instruction, hexdump): rop_node_map[ cur_offset] = node #whct instruction we need is contained in this node instruction_node_map[cur_offset] = node except: pass SROP_show_result(rop_node_map)
def distormDisassemble(filename, code, bits, countStatisticsResult=False): # start function variables distormOpcodeDict = dict() # key = opcode, value = count distormFilename = filename + ".opcode" distormOpcodeList = [] # Output file for opcodes distormFile = open(distormFilename, 'w') #distormFile.write(filename+";") # define distorm arguments offset = 0 length = None if bits == '16bit': mode = distorm3.Decode16Bits elif bits == '32bit': mode = distorm3.Decode32Bits else: mode = distorm3.Decode64Bits # Decoded instruction iterable = distorm3.DecodeGenerator(offset, code, mode) for (offset, size, instruction, hexdump) in iterable: # print("%.8x: %-32s %s" % (offset, hexdump, instruction)) opcode = instruction.split(" ")[0].lower() # get opcode distormOpcodeList.append(opcode) # add opcode to List distormFile.write(opcode + ",") # write opcode to file if countStatisticsResult: # count opcode value for statistics results # count opcode number if opcode in distormOpcodeDict: distormOpcodeDict[opcode] += 1 else: distormOpcodeDict[opcode] = 1 if countStatisticsResult: # count opcode statistics results and write them file countStatisticResult(distormOpcodeDict, filename) print filename + " is disassembled." return distormOpcodeList