示例#1
0
def decompress(compf, f):
    """
    Decompress a file.
    """
    # read alphabet
    alphabet = cPickle.load(compf)
    print "alphabet:\n%r" % alphabet
    count = 0
    while True:
        header = compf.read(8)
        if header == '':
            break
        print "block %r:" % count
        nbits, I = struct.unpack('>2I', header)
        freqs = load_freqs(compf)
        #print freqs
        nbyte = (nbits+7) / 8

        huff_encode = to01(compf.read(nbyte))[:nbits]
        #print "huff_encode:\n%r" % huff_encode
        coding, root = huff.generate_coding(freqs)
        #print "coding:\n%r" % coding
        huff_decode = huff.decode(huff_encode, root)
        mtf_decode = mtf.decode(alphabet, huff_decode)
        bwt_decode = bwt.decode(mtf_decode, I, reverse = True)
        content = ''.join(bwt_decode)
        f.write(content)
        print "nbits = %r, I = %r, length = %r" % (nbits, I, len(content))
        count = count + 1
示例#2
0
    def test_same(self):
        seq = [0] * 16
        from collections import Counter

        freqs = Counter(seq)
        coding, root = huff.generate_coding(freqs)
        encode = huff.encode(seq, coding)
        decode = huff.decode("".join(encode), root)
        self.assertEqual(list(seq), decode)
示例#3
0
    def test_mix(self):
        seq = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5]
        random.shuffle(seq)
        from collections import Counter

        freqs = Counter(seq)
        expect = {1: "10", 2: "01", 3: "11", 4: "001", 5: "000"}
        coding, root = huff.generate_coding(freqs)
        self.assertDictEqual(expect, coding)

        encode = huff.encode(seq, coding)
        decode = huff.decode("".join(encode), root)
        self.assertEqual(seq, decode)
示例#4
0
def compress(f, compf, block = None):
    """
    Compress a file by bwt-mtf-huff processes.
    f: a file-like object, content for compressing
    compf: compression result
    """
    # craete alphabet for move-to-front
    content = f.read()
    alphabet_set = set(content)
    alphabet = [''] + list(alphabet_set)
    # dump alphabet
    cPickle.dump(alphabet, compf, 2)
    content = None
    f.seek(0)
    count = 0
    while True:
        data = f.read(BLOCK_SIZE)
        if data == '':
            break
        if block == None or block == count:
            print "block %r:" % count
            bwt_encode, I = bwt.encode(data)
            mtf_encode = mtf.encode(alphabet, bwt_encode)
            # create Huffman tree
            freqs = Counter(mtf_encode)
            if block:
                print freqs
            coding, root = huff.generate_coding(freqs)
            if block:
                print coding
            # encoding
            huff_encode = ''.join(huff.encode(mtf_encode, coding))
            #print "huff_encode:\n%r" % huff_encode
            nbits = len(huff_encode)
            huff_bytes = tobytes(huff_encode)

            compf.write(struct.pack(">2I", nbits, I))
            dump_freqs(compf, freqs)
            compf.write(huff_bytes)

            print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data))
        count = count + 1