def main(): symb = [(.07,'a'), (.09,'b'),(.12,'c'),(.22,'d'),(.23,'e'),(.27,'f')] tree = huff.makeHuff(symb) if tree.left.left != 'd' or \ tree.left.right != 'e' or \ tree.right.left != 'f' or \ tree.right.right.left !='c' or \ tree.right.right.right.left !='a' or \ tree.right.right.right.right != 'b': passed = False else: passed = True testPass("makeHuff",passed) passed = True result = huff.encode(tree) expected = {'a':'1110','c':'110','b':'1111','e':'01','d':'00','f':'10'} if result != expected: passed = False print "symb | prob | code" for i in sorted(symb): print i[1], " |", (i[0]), "| ", result[i[1]] testPass("encode",passed)
def test_same(self): seq = [0] * 16 from collections import Counter freqs = Counter(seq) coding, root = huff.generate_coding(freqs) encode = huff.encode(seq, coding) decode = huff.decode("".join(encode), root) self.assertEqual(list(seq), decode)
def test_mix(self): seq = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5] random.shuffle(seq) from collections import Counter freqs = Counter(seq) expect = {1: "10", 2: "01", 3: "11", 4: "001", 5: "000"} coding, root = huff.generate_coding(freqs) self.assertDictEqual(expect, coding) encode = huff.encode(seq, coding) decode = huff.decode("".join(encode), root) self.assertEqual(seq, decode)
def compress(f, compf, block = None): """ Compress a file by bwt-mtf-huff processes. f: a file-like object, content for compressing compf: compression result """ # craete alphabet for move-to-front content = f.read() alphabet_set = set(content) alphabet = [''] + list(alphabet_set) # dump alphabet cPickle.dump(alphabet, compf, 2) content = None f.seek(0) count = 0 while True: data = f.read(BLOCK_SIZE) if data == '': break if block == None or block == count: print "block %r:" % count bwt_encode, I = bwt.encode(data) mtf_encode = mtf.encode(alphabet, bwt_encode) # create Huffman tree freqs = Counter(mtf_encode) if block: print freqs coding, root = huff.generate_coding(freqs) if block: print coding # encoding huff_encode = ''.join(huff.encode(mtf_encode, coding)) #print "huff_encode:\n%r" % huff_encode nbits = len(huff_encode) huff_bytes = tobytes(huff_encode) compf.write(struct.pack(">2I", nbits, I)) dump_freqs(compf, freqs) compf.write(huff_bytes) print "nbits = %r, I = %r, length = %r" % (nbits, I, len(data)) count = count + 1
def main(): symb = [(.07, 'a'), (.09, 'b'), (.12, 'c'), (.22, 'd'), (.23, 'e'), (.27, 'f')] tree = huff.makeHuff(symb) if tree.left.left != 'd' or \ tree.left.right != 'e' or \ tree.right.left != 'f' or \ tree.right.right.left !='c' or \ tree.right.right.right.left !='a' or \ tree.right.right.right.right != 'b': passed = False else: passed = True testPass("makeHuff", passed) passed = True result = huff.encode(tree) expected = { 'a': '1110', 'c': '110', 'b': '1111', 'e': '01', 'd': '00', 'f': '10' } if result != expected: passed = False print "symb | prob | code" for i in sorted(symb): print i[1], " |", (i[0]), "| ", result[i[1]] testPass("encode", passed)