def decompress(compf, f): """ Decompress a file. """ # read alphabet alphabet = cPickle.load(compf) print "alphabet:\n%r" % alphabet count = 0 while True: header = compf.read(8) if header == '': break print "block %r:" % count nbits, I = struct.unpack('>2I', header) freqs = load_freqs(compf) #print freqs nbyte = (nbits+7) / 8 huff_encode = to01(compf.read(nbyte))[:nbits] #print "huff_encode:\n%r" % huff_encode coding, root = huff.generate_coding(freqs) #print "coding:\n%r" % coding huff_decode = huff.decode(huff_encode, root) mtf_decode = mtf.decode(alphabet, huff_decode) bwt_decode = bwt.decode(mtf_decode, I, reverse = True) content = ''.join(bwt_decode) f.write(content) print "nbits = %r, I = %r, length = %r" % (nbits, I, len(content)) count = count + 1
def test_same(self): seq = [0] * 16 from collections import Counter freqs = Counter(seq) coding, root = huff.generate_coding(freqs) encode = huff.encode(seq, coding) decode = huff.decode("".join(encode), root) self.assertEqual(list(seq), decode)
def test_mix(self): seq = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5] random.shuffle(seq) from collections import Counter freqs = Counter(seq) expect = {1: "10", 2: "01", 3: "11", 4: "001", 5: "000"} coding, root = huff.generate_coding(freqs) self.assertDictEqual(expect, coding) encode = huff.encode(seq, coding) decode = huff.decode("".join(encode), root) self.assertEqual(seq, decode)
def compress(f, compf, block = None): """ Compress a file by bwt-mtf-huff processes. f: a file-like object, content for compressing compf: compression result """ # craete alphabet for move-to-front content = f.read() alphabet_set = set(content) alphabet = [''] + list(alphabet_set) # dump alphabet cPickle.dump(alphabet, compf, 2) content = None f.seek(0) count = 0 while True: data = f.read(BLOCK_SIZE) if data == '': break if block == None or block == count: print "block %r:" % count bwt_encode, I = bwt.encode(data) mtf_encode = mtf.encode(alphabet, bwt_encode) # create Huffman tree freqs = Counter(mtf_encode) if block: print freqs coding, root = huff.generate_coding(freqs) if block: print coding # encoding huff_encode = ''.join(huff.encode(mtf_encode, coding)) #print "huff_encode:\n%r" % huff_encode nbits = len(huff_encode) huff_bytes = tobytes(huff_encode) compf.write(struct.pack(">2I", nbits, I)) dump_freqs(compf, freqs) compf.write(huff_bytes) print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data)) count = count + 1