def test_both(self): for x in xrange(10): chars = string.ascii_uppercase + string.digits seq = generator(1024*16, chars) encode_seq = mtf.encode(chars, seq) decode_seq = mtf.decode(chars, encode_seq) self.assertEqual(decode_seq, list(seq))
def compress(f, compf, block = None): """ Compress a file by bwt-mtf-huff processes. f: a file-like object, content for compressing compf: compression result """ # craete alphabet for move-to-front content = f.read() alphabet_set = set(content) alphabet = [''] + list(alphabet_set) # dump alphabet cPickle.dump(alphabet, compf, 2) content = None f.seek(0) count = 0 while True: data = f.read(BLOCK_SIZE) if data == '': break if block == None or block == count: print "block %r:" % count bwt_encode, I = bwt.encode(data) mtf_encode = mtf.encode(alphabet, bwt_encode) # create Huffman tree freqs = Counter(mtf_encode) if block: print freqs coding, root = huff.generate_coding(freqs) if block: print coding # encoding huff_encode = ''.join(huff.encode(mtf_encode, coding)) #print "huff_encode:\n%r" % huff_encode nbits = len(huff_encode) huff_bytes = tobytes(huff_encode) compf.write(struct.pack(">2I", nbits, I)) dump_freqs(compf, freqs) compf.write(huff_bytes) print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data)) count = count + 1
def test_encode(self): l = mtf.encode('abcr', 'caraab') self.assertEqual(l, [2, 1, 3, 1, 0, 3])