def decompress(compressed, uncompressed): '''First, read a Huffman tree from the 'compressed' stream using your read_tree function. Then use that tree to decode the rest of the stream and write the resulting symbols to the 'uncompressed' stream. Args: compressed: A file stream from which compressed input is read. uncompressed: A writable file stream to which the uncompressed output is written. ''' comp = BitReader(compressed) uncomp = BitWriter(uncompressed) tree = read_tree(comp) while True: try: uncomp_byte = huffman.decode(tree, comp) if uncomp_byte == None: raise EOFError uncomp.writebits(uncomp_byte, 8) except EOFError: uncomp.writebits(29, 8) break
def extract(self, output_file): """ Restore the compressed data """ br = BitReader(self.file) root = self._read_trie(br) # number of bytes to write length = int(br.readbits(8)) bw = BitWriter(output_file) # decode using the Huffman trie for i in xrange(length): node = root while not node.is_leaf: bit = br.readbit() if bit: node = node.right else: node = node.left # write the character to output bw.writebits(node.char, 8)
def compress(self, output_file): """ Compress data from input file and write compressed data to the output file. """ # create a ternary search trie and fill it with single ASCII characters st = TernarySt() for i in xrange(self.radix): st[chr(i)] = i code = self.radix + 1 # read all the data from the input file (not optimal, but easy to code) data = self.file.read() bw = BitWriter(output_file) while len(data) > 0: lp = st.longest_prefix(data) # write the value of the prefix to output bw.writebits(st[lp], self.codeword_width) if len(lp) < len(data) and code < self.codeword_limit: # add new prefix to the symbol table st[data[:len(lp) + 1]] = code code += 1 data = data[len(lp):] bw.writebits(self.radix, self.codeword_width)
def compress(self, output_file): """ Compress data from input file and write compressed data to the output file. """ # read input and count chars freq = defaultdict(int) length = 0 while True: # read one char from a file char = self.file.read(1) if char: length += 1 freq[ord(char)] += 1 else: # EOF break # build Huffman trie root = self._build_trie(freq) # build symbol table for chars and their binary representation st = dict() self._build_code(st, root, '') bw = BitWriter(output_file) # write the Huffman trie binary representation to the file self._write_trie(root, bw) # write number of bytes in original uncompressed message bw.writebits(length, 8) # use Huffman code to encode input for i in xrange(length): self.file.seek(i) code = st[ord(self.file.read(1))] for c in code: if c == '0': bw.writebit(False) else: bw.writebit(True)