def store_data(filename_base, code_page, num_chunks, chunks): int_chunks = [int(chunk, 2) for chunk in chunks] out_file_path = make_path("%s%s_encoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) with open(out_file_path, "wb") as encoded_file: encoded_file.write(struct.pack('I', num_chunks)) encoded_file.write(bytearray(int_chunks)) out_file_path = make_path("%s%s_encoding%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) json.dump(code_page, open(out_file_path, 'w'))
def load_data(filename_base): in_file_path = make_path("%s%s_encoding%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) code_page = json.load(open(in_file_path)) in_file_path = make_path("%s%s_encoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) file_size = os.path.getsize(in_file_path) with open(in_file_path, "rb") as encoded_file: (num_chars,) = struct.unpack('I', encoded_file.read(4)) n_bytes = file_size-4 bytes = list(struct.unpack('%dB' % n_bytes, encoded_file.read(n_bytes))) to_bin = lambda i: format(i, '08b') encoded_string = ''.join(map(to_bin, bytes)) # print encoded_string return code_page, num_chars, encoded_string
def huffman(filename_base, chunk_size): encoded_string = "" in_file_path = make_path("%s%s%s" % (settings.DATA_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) with open(in_file_path) as f: for line in f: encoded_string += unicode(line, errors='replace') + u'\n' if settings.DEBUG: encoded_string = u"TO BE OR\n NOT TO BE" frequencies = count_occurences(encoded_string, chunk_size) tree = construct_huffman_tree(frequencies) code_page = construct_code_page(tree) if not settings.SURPRESS_OUTPUT: print code_page num_chunks, chunks = encode(code_page, encoded_string, chunk_size) store_data(filename_base, code_page, num_chunks, chunks) code_page, num_chars, encoded_string = load_data(filename_base) decoded_string = decode(code_page, num_chars, encoded_string) if not settings.SURPRESS_OUTPUT: print ">> decoded string:", decoded_string out_file_path = make_path("%s%s_decoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) with open(out_file_path, 'w') as f: f.write(decoded_string.encode('utf8')) file_path = make_path("%s%s%s" % (settings.DATA_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) decoded_file_size = os.path.getsize(file_path) file_path = make_path("%s%s_encoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) encoded_file_size = os.path.getsize(file_path) file_path = make_path("%s%s_encoding%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION)) encoding_file_size = os.path.getsize(file_path) print "Encoded size:", encoded_file_size print "Encodingsize:", encoding_file_size print "Decoded size:", decoded_file_size print "Compression rate:", float(encoded_file_size + encoding_file_size) / float(decoded_file_size) * 100.0, "%"