def test_encode_single_char(self): data = "a" expected = [0, 0, 'a'] result = lz77.encode(data, 5, 3) self.assertEqual(expected, result)
def test_encode_two_chars(self): data = "aa" expected = [0, 0, 'a', 6, 1, None] result = lz77.encode(data, 6, 3) self.assertEqual(expected, result)
def experiment(n, fileName, W, L, willPrint): global dBits global lBits compTimes = [] decompTimes = [] sizeComp = -1 sizeUncomp = -1 if(W > 2**dBits - 1 or L > 2**lBits): #Checks if the window and buffer size fit into the encoding format. W = 2**dBits - 1 L = 2**lBits - 1 print("The window and buffer sizes were above the maximum for the no. of bytes used to encode distance and length so have been adjusted to: " + str(2**dBits - 1) + " and " + str(2**lBits - 1) + " respectively.") with open("originals/" + fileName, "rb") as file: content = file.read() decodeContent = bitarray() for i in range(n): start = time.time() lz77.encode(fileName, content, W, L) endComp = time.time() compTimes.append(endComp - start) start = time.time() decodeContent = lz77.decode(fileName) endDecomp = time.time() decompTimes.append(endDecomp - start) with open("decompressed/" + fileName.split(".")[0] + "Decomp" + "." + fileName.split(".")[1], "wb") as file: #Write decompressed content to file decodeContent.tofile(file) sizeComp = os.path.getsize("./binaries/" + fileName.split(".")[0] + ".bin") #Get the sizes of files sizeUncomp = os.path.getsize("./originals/" + fileName) timeCompress = sum(compTimes) / len(compTimes) #Find average times timeDecompress = sum(decompTimes) / len(decompTimes) if(not willPrint): #Will return instead of printing encodeData = "Across " + str(n) + " tests it took " + str(timeCompress) + " seconds to compress " + fileName + " from " + str(sizeUncomp) + " to " + str(sizeComp) + " bytes\n" decodeData = "Across " + str(n) + " tests it took " + str(timeDecompress) + " seconds to decompress " + fileName.split(".")[0] + ".bin\n" compData = "The compression ratio is: " + str(sizeUncomp / sizeComp) + "\n" return (encodeData, decodeData, compData) else: print("Window size: " + str(W) + "Buffer size: " + str(L) + "\n") print("Across " + str(n) + " tests it took " + str(timeCompress) + " seconds to compress " + fileName + " from " + str(sizeUncomp) + " to " + str(sizeComp) + " bytes\n") print("Across " + str(n) + " tests it took " + str(timeDecompress) + " seconds to decompress " + fileName.split(".")[0] + ".bin\n") print("The compression ratio is: " + str(sizeUncomp / sizeComp) + "\n")
def test_encode_abra(self): bufsize = 6 lookahead_bufsize = 4 data = "abrakabrabra" expected = [0, 0, 'a', 0, 0, 'b', 0, 0, 'r', 4, 1, 'k', 2, 4, None, 4, 3, None] result = lz77.encode(data, bufsize, lookahead_bufsize) self.assertEqual(expected, result)
def generateMobi(name, text): exth = [{'data': b'Test', 'type': 503}, {'data': b'en', 'type': 524}, {'data': b'My Author', 'type': 100}, {'data': b'calibre (3.16.0) [https://calibre-ebook.com]', 'type': 108}, {'data': b'443467fb-212b-4817-8519-e9009343355d', 'type': 113}, {'data': b'calibre:443467fb-212b-4817-8519-e9009343355d', 'type': 112}, {'data': b'EBOK', 'type': 501}, {'data': b'2018-05-30T21:40:16.296448+00:00', 'type': 106}, {'data': b'\x00\x00\x00\xc9', 'type': 204}, {'data': b'\x00\x00\x00\x01', 'type': 205}, {'data': b'\x00\x00\x00\x02', 'type': 206}, {'data': b'\x00\x00\x82\x1b', 'type': 207}, {'data': b'\x00\x00\x00\x19', 'type': 116}, {'data': b'\x00\x00\x00\x00', 'type': 131}] exthsize, exthpad = sizeofExthHeader(exth) nmagicrecords = 4 # '\0\0', flis, fcis, crlf with open(name + b".mobi", "wb") as f: padded_name = name + b"\0\0" + ((len(name) + 2) % 4 * b"\0") record_size = 4096 text_length = len(text) #glob_header modtext = text_length % record_size recordlen = (text_length // record_size) + (0 if (modtext == 0) else 1) + 1 + nmagicrecords #plus one for palm meta record plus n magic records attributes = 0 version = 0 created = int(time.time()) modified = created backup = 0 modnum = 0 appInfoId = 0 sortInfoId = 0 atype = b"BOOK" creator = b"MOBI" uniqueIDseed = recordlen nextRecordListID = 0 shortname = name[:31] shortname = shortname + b"\0" * (32 - len(shortname)) f.write(struct.pack('>32shhIIIIII4s4sIIH', shortname, attributes, version, created, modified, backup, modnum, appInfoId, sortInfoId, atype, creator, uniqueIDseed, nextRecordListID, recordlen )) hsize = sizeofHeader(padded_name, recordlen, exthsize) f.write(struct.pack('>II', sizeofGlobHeader(recordlen), 0)) # meta record print(hsize) textsize = 0 textsnips = [] for r in range(recordlen - 1 - nmagicrecords): print("wrote record", hsize + (record_size * r), r + 1) textsnips.append(encode(text[r * 4096: (r+1) * 4096])) f.write(struct.pack('>II', hsize + textsize, r + 1)) textsize += len(textsnips[-1]) offset = hsize + textsize f.write(struct.pack('>II', offset, recordlen - 4)) # double null f.write(struct.pack('>II', offset + 2, recordlen - 3)) # FLIS f.write(struct.pack('>II', offset + 36 + 2, recordlen - 2)) # FCIS f.write(struct.pack('>II', offset + 36 + 44 + 2, recordlen - 1)) # CRLF # palm compression = 2 # no compression unused = 0 encryption_type = 0 # none unknown = 0 #usu zero f.write(struct.pack('>HHIHHHH', compression, unused, len(text), recordlen - nmagicrecords, record_size, encryption_type, unknown)) # mobi mobitype = 2 # book encoding = 65001 #utf-8 genver = 6 nameoffset = nameOffset(exthsize) print("recordlen", recordlen) f.write(struct.pack('> 4sIII II 40s III IIIII IIII I 36s IIII 8s HHIIIII 8sI IIII I 20s I', b"MOBI", mobiheaderlen(), mobitype, encoding, recordlen, # a uid genver, (struct.pack(">I", 0xFFFFFFFF) * 10), recordlen - nmagicrecords + 1, #first non book (flis) nameoffset, len(name), 9, #english 9, #english in 9, #english out genver, 0xFFFFFFFF, # first image index 0xFFFFFFFF, 0,#huff count 0,#off 0,#length 0x40, #exth ((struct.pack(">I", 0xFFFFFFFF) * 36)), 0xFFFFFFFF,#drm off 0,#drm count 0,#drm size 0, #drm flags b"\0" * 8, 1,#first text record recordlen - nmagicrecords - 1,#last content 1,#unknown recordlen - nmagicrecords + 2,#fcis 1, #"-Unknown", recordlen - nmagicrecords + 1,#"FLIS record", 1, #"-Unknown" b"\0" * 8, #"-Unknown 0x0000000000000000" 0xFFFFFFFF, #"-Unknown 0xFFFFFFFF" 0, #First Compilation data section count Use 0x00000000 0xFFFFFFFF, #Number of Compilation data sections Use 0xFFFFFFFF. 0xFFFFFFFF, #Unknown Use 0xFFFFFFFF. 0, #Extra Record Data Flags 0xFFFFFFFF,#INDX Record Offset if not 0xFFFFFFFF ((struct.pack(">I", 0xFFFFFFFF) * 5)), 0 )) # EXTH Header f.write(struct.pack("> 4sII", b"EXTH", exthsize, len(exth) )) for data in exth: f.write(struct.pack("> II", data["type"], len(data["data"]) + struct.calcsize(">II"))) f.write(data["data"]) f.write(exthpad * b"\0") f.write(padded_name) for snip in textsnips: print("wrote", (snip), "at", f.tell()) #print("wrote", text[r*record_size:(r+1)*record_size]) f.write(snip) #f.write(b" " * (record_size - modtext)) f.write(b"\0\0") f.write(struct.pack("> 4sIHH IIHH III", b"FLIS", 8, 65, 0, 0, 0xFFFFFFFF, 1, 3, 3, 1, 0xFFFFFFFF)) f.write(struct.pack("> 4sIII IIII IHHI", b"FCIS", 20,16,1, 0, text_length, 0, 32, 8, 1, 1, 0)) f.write(b"\xe9\x8e\x0d\x0a")
def test_encode_decode(self): itty = "itty bitty bit bin" encoded = lz77.encode(itty, 10, 5) decoded = lz77.decode(encoded, 10) self.assertEqual(itty, decoded)