def test_encode_single_char(self):
        data = "a"
        expected = [0, 0, 'a']

        result = lz77.encode(data, 5, 3)

        self.assertEqual(expected, result)
    def test_encode_two_chars(self):
        data = "aa"
        expected = [0, 0, 'a',
                    6, 1, None]

        result = lz77.encode(data, 6, 3)

        self.assertEqual(expected, result)
示例#3
0
def experiment(n, fileName, W, L, willPrint):
    global dBits
    global lBits
    compTimes = []
    decompTimes = []
    sizeComp = -1
    sizeUncomp = -1     
    if(W > 2**dBits - 1 or L > 2**lBits): #Checks if the window and buffer size fit into the encoding format.  
        W = 2**dBits - 1
        L = 2**lBits - 1
        print("The window and buffer sizes were above the maximum for the no. of bytes used to encode distance and length so have been adjusted to: " + str(2**dBits - 1) + " and " + str(2**lBits - 1) + " respectively.")  
    with open("originals/" + fileName, "rb") as file:
        content = file.read()
    decodeContent = bitarray()
    for i in range(n):
        start  = time.time()
        lz77.encode(fileName, content, W, L)
        endComp = time.time()
        compTimes.append(endComp - start)
        start = time.time()
        decodeContent = lz77.decode(fileName)
        endDecomp = time.time()
        decompTimes.append(endDecomp - start)   
    with open("decompressed/" + fileName.split(".")[0] + "Decomp" + "." + fileName.split(".")[1], "wb") as file: #Write decompressed content to file
        decodeContent.tofile(file) 
    sizeComp = os.path.getsize("./binaries/" + fileName.split(".")[0] + ".bin") #Get the sizes of files
    sizeUncomp = os.path.getsize("./originals/" + fileName)
    timeCompress = sum(compTimes) / len(compTimes) #Find average times
    timeDecompress = sum(decompTimes) / len(decompTimes)
    if(not willPrint): #Will return instead of printing
        encodeData = "Across " + str(n) +  " tests it took " + str(timeCompress) + " seconds to compress " + fileName + " from " + str(sizeUncomp) + " to " + str(sizeComp) + " bytes\n"
        decodeData = "Across " + str(n) +  " tests it took " + str(timeDecompress) + " seconds to decompress " + fileName.split(".")[0] + ".bin\n"
        compData = "The compression ratio is: " + str(sizeUncomp / sizeComp) + "\n"
        return (encodeData, decodeData, compData)
    else:
        print("Window size: " + str(W) + "Buffer size: " + str(L) + "\n")
        print("Across " + str(n) +  " tests it took " + str(timeCompress) + " seconds to compress " + fileName + " from " + str(sizeUncomp) + " to " + str(sizeComp) + " bytes\n")
        print("Across " + str(n) +  " tests it took " + str(timeDecompress) + " seconds to decompress " + fileName.split(".")[0] + ".bin\n")
        print("The compression ratio is: " + str(sizeUncomp / sizeComp) + "\n")
    def test_encode_abra(self):
        bufsize = 6
        lookahead_bufsize = 4
        data = "abrakabrabra"
        expected = [0, 0, 'a',
                    0, 0, 'b',
                    0, 0, 'r',
                    4, 1, 'k',
                    2, 4, None,
                    4, 3, None]

        result = lz77.encode(data, bufsize, lookahead_bufsize)

        self.assertEqual(expected, result)
示例#5
0
def generateMobi(name, text):
    exth = [{'data': b'Test', 'type': 503},
            {'data': b'en', 'type': 524},
            {'data': b'My Author', 'type': 100},
            {'data': b'calibre (3.16.0) [https://calibre-ebook.com]',
             'type': 108},
            {'data': b'443467fb-212b-4817-8519-e9009343355d',
             'type': 113},
            {'data': b'calibre:443467fb-212b-4817-8519-e9009343355d',
             'type': 112},
            {'data': b'EBOK', 'type': 501},
            {'data': b'2018-05-30T21:40:16.296448+00:00',
             'type': 106},
            {'data': b'\x00\x00\x00\xc9', 'type': 204},
            {'data': b'\x00\x00\x00\x01', 'type': 205},
            {'data': b'\x00\x00\x00\x02', 'type': 206},
            {'data': b'\x00\x00\x82\x1b', 'type': 207},
            {'data': b'\x00\x00\x00\x19', 'type': 116},
            {'data': b'\x00\x00\x00\x00', 'type': 131}]
    exthsize, exthpad = sizeofExthHeader(exth)
    
    nmagicrecords = 4 # '\0\0', flis, fcis, crlf
    with open(name + b".mobi", "wb") as f:
        padded_name = name + b"\0\0" + ((len(name) + 2) % 4 * b"\0")
        record_size = 4096
        text_length = len(text)
        #glob_header
        modtext = text_length % record_size
        recordlen = (text_length // record_size) + (0 if (modtext == 0) else 1) + 1 + nmagicrecords #plus one for palm meta record plus n magic records
        attributes = 0
        version = 0
        created = int(time.time())
        modified = created
        backup = 0
        modnum = 0
        appInfoId = 0
        sortInfoId = 0
        atype = b"BOOK"
        creator = b"MOBI"
        uniqueIDseed = recordlen
        nextRecordListID = 0
        
        shortname = name[:31]
        shortname = shortname + b"\0" * (32 - len(shortname))
        f.write(struct.pack('>32shhIIIIII4s4sIIH',
                            shortname,
                            attributes,
                            version,
                            created,
                            modified,
                            backup,
                            modnum,
                            appInfoId,
                            sortInfoId,
                            atype,
                            creator,
                            uniqueIDseed,
                            nextRecordListID,
                            recordlen
        ))
        hsize = sizeofHeader(padded_name, recordlen, exthsize)
        f.write(struct.pack('>II', sizeofGlobHeader(recordlen), 0)) # meta record
        print(hsize)
        textsize = 0
        textsnips = []
        for r in range(recordlen - 1 - nmagicrecords):
            print("wrote record", hsize + (record_size * r), r + 1)
            textsnips.append(encode(text[r * 4096: (r+1) * 4096]))
            f.write(struct.pack('>II', hsize + textsize, r + 1))
            textsize += len(textsnips[-1])
        offset = hsize + textsize
        f.write(struct.pack('>II', offset, recordlen - 4)) # double null
        f.write(struct.pack('>II', offset + 2, recordlen - 3)) # FLIS
        f.write(struct.pack('>II', offset + 36 + 2, recordlen - 2)) # FCIS
        f.write(struct.pack('>II', offset + 36 + 44 + 2, recordlen - 1)) # CRLF
        # palm

            
        compression = 2 # no compression
        unused = 0
        encryption_type = 0 # none
        unknown = 0 #usu zero
        f.write(struct.pack('>HHIHHHH',
                            compression,
                            unused,
                            len(text),
                            recordlen - nmagicrecords,
                            record_size,
                            encryption_type,
                            unknown))
        
        # mobi
        mobitype = 2 # book
        encoding = 65001 #utf-8
        genver = 6
        nameoffset =  nameOffset(exthsize)
        print("recordlen", recordlen)
        f.write(struct.pack('> 4sIII II 40s III IIIII IIII I 36s IIII 8s HHIIIII 8sI IIII I 20s I',
                            b"MOBI",
                            mobiheaderlen(),
                            mobitype,
                            encoding,
                            
                            recordlen, # a uid
                            genver,
                            
                            (struct.pack(">I", 0xFFFFFFFF) * 10),
                            
                            recordlen - nmagicrecords + 1, #first non book (flis)
                            nameoffset,
                            len(name),
                            
                            9, #english
                            9, #english in
                            9, #english out
                            genver,
                            0xFFFFFFFF, # first image index
                            
                            0xFFFFFFFF,
                            0,#huff count
                            0,#off
                            0,#length
                            
                            0x40, #exth
                            
                            ((struct.pack(">I", 0xFFFFFFFF) * 36)),

                            0xFFFFFFFF,#drm off
                            0,#drm count
                            0,#drm size
                            0, #drm flags

                            b"\0" * 8,

                            1,#first text record
                            recordlen - nmagicrecords - 1,#last content
                            1,#unknown
                            recordlen - nmagicrecords + 2,#fcis
                            1, #"-Unknown",
                            recordlen - nmagicrecords + 1,#"FLIS record",
                            1, #"-Unknown"
                            
                            b"\0" * 8, #"-Unknown 0x0000000000000000"
                            0xFFFFFFFF, #"-Unknown 0xFFFFFFFF"
                            
                            0, #First Compilation data section count	Use 0x00000000
                            0xFFFFFFFF, #Number of Compilation data sections	Use 0xFFFFFFFF.
                            0xFFFFFFFF, #Unknown	Use 0xFFFFFFFF.
                            0, #Extra Record Data Flags
                            0xFFFFFFFF,#INDX Record Offset if not 0xFFFFFFFF
                            ((struct.pack(">I", 0xFFFFFFFF) * 5)),
                            0
                            
        ))

        # EXTH Header
        f.write(struct.pack("> 4sII",
                b"EXTH",
                exthsize,
                len(exth)
                ))

        for data in exth:
            f.write(struct.pack("> II", data["type"], len(data["data"]) + struct.calcsize(">II")))
            f.write(data["data"])
        f.write(exthpad * b"\0")
        f.write(padded_name)
        for snip in textsnips:
            print("wrote", (snip), "at", f.tell())
            #print("wrote", text[r*record_size:(r+1)*record_size])
            f.write(snip)
        #f.write(b" " * (record_size - modtext))
        f.write(b"\0\0")
        f.write(struct.pack("> 4sIHH IIHH III", b"FLIS", 8, 65, 0,
                            0, 0xFFFFFFFF, 1, 3,
                            3, 1, 0xFFFFFFFF))
        f.write(struct.pack("> 4sIII IIII IHHI", b"FCIS", 20,16,1,
                            0, text_length, 0, 32,
                            8, 1, 1, 0))
        f.write(b"\xe9\x8e\x0d\x0a")
    def test_encode_decode(self):
        itty = "itty bitty bit bin"
        encoded = lz77.encode(itty, 10, 5)
        decoded = lz77.decode(encoded, 10)

        self.assertEqual(itty, decoded)