def __process_backward(self, hexbyte, end_offset): self.__LOG("Backward process: " + hexbyte.encode('hex') + ", offset: " + hex(end_offset)) RET = RET_CODE["ret"] l = len(hexbyte) for i in range(l): found_bad = 0 code = (hexbyte[(l-i-1) : l]) code = code + RET disassembly = distorm.DecodeGenerator(end_offset - i, code, self.__decode_option) disassembly = list(disassembly) if len(disassembly) <= self.__backward_depth + 1: # max backward depth not reach if disassembly[-1][-1].lower() != RET.encode('hex'): # invalid sequence continue asmcode = [] for (offset, size, instruction, hexdump) in disassembly[:-1]: asmcode += ("".join(instruction).replace(",", " ")).split() + [";"] # skip bad instructions s = " ".join(asmcode) if "CALL 0x" in s or "JMP 0x" in s: continue self.__LOG(asmcode) if set(asmcode) & set(BAD_INSTS) != set([]): continue #asmcode += [RET_CODE.keys()[1] + " "] value = (" ".join(asmcode).lower() + ";", end_offset - i) self.__LOG("i = %d, value = %x, code: %s" % (i, value[1], asmcode)) self.__insert_asmcode(asmcode, value)
def generate(self, filename, backward_depth=3): code = open(filename, 'rb').read() self.set_backward_depth(backward_depth) # get binary info: md5sum, name, base_addr, data_addr self.__gadget_info["hash"] = self.__md5sum(filename) self.__gadget_info["name"] = os.path.basename(filename) (base_addr, data_addr) = self.__get_elf_address(filename) self.__gadget_info["base_addr"] = base_addr self.__gadget_info["data_addr"] = data_addr block_size = 1024 * 1024 # process 1 MB a time block_count = len(code) // block_size + 1 print("Generating gadgets for " + filename + " with backward depth=" + str(backward_depth)) print("It may take few minutes depends on the depth and file size...") for count in range(block_count): print("Processing code block %d/%d" % (count + 1, block_count)) block_start = count * block_size disassembly = distorm.DecodeGenerator( block_start, code[block_start:block_start + block_size], self.__decode_option) bincode = b"" # keep track of hex code for (offset, size, instruction, hexdump) in disassembly: hexbyte = hexdump.replace(" ", "") if len(hexbyte ) % 2 != 0: # invalid hexdump?, cut the last char hexbyte = hexbyte[:-1] hexbyte = bytearray.fromhex(hexbyte) bincode += hexbyte l = len(hexbyte) i = hexbyte.find(RET_CODE["ret"]) # find RET in opcode if i != -1: # RET found self.__LOG("Found RET at 0x%x" % (offset + i)) # get back (__backward_depth * 8) bytes, enough? hexbyte = bincode[-((l - i) + (self.__backward_depth * 8)):-(l - i)] self.__process_backward(hexbyte, base_addr + offset + i - 1) print("Generated " + str(self.__asmgadget.get_size()) + " gadgets") return True
def __disass(self, filename, offset = 0, option = distorm.Decode32Bits): code = open(filename, 'rb').read() disass = distorm.DecodeGenerator(offset, code, option) return disass