def store_data(filename_base, code_page, num_chunks, chunks):
    int_chunks = [int(chunk, 2) for chunk in chunks]
    out_file_path = make_path("%s%s_encoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    with open(out_file_path, "wb") as encoded_file:
        encoded_file.write(struct.pack('I', num_chunks))
        encoded_file.write(bytearray(int_chunks))
    out_file_path = make_path("%s%s_encoding%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    json.dump(code_page, open(out_file_path, 'w'))
def load_data(filename_base):
    in_file_path = make_path("%s%s_encoding%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    code_page = json.load(open(in_file_path))
    in_file_path = make_path("%s%s_encoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    file_size = os.path.getsize(in_file_path)
    with open(in_file_path, "rb") as encoded_file:
        (num_chars,) = struct.unpack('I', encoded_file.read(4))
        n_bytes = file_size-4
        bytes = list(struct.unpack('%dB' % n_bytes, encoded_file.read(n_bytes)))
    to_bin = lambda i: format(i, '08b')
    encoded_string = ''.join(map(to_bin, bytes))

    # print encoded_string

    return code_page, num_chars, encoded_string
def huffman(filename_base, chunk_size):
    encoded_string = ""
    in_file_path = make_path("%s%s%s" % (settings.DATA_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    with open(in_file_path) as f:
        for line in f:
            encoded_string += unicode(line, errors='replace') + u'\n'

    if settings.DEBUG:
        encoded_string = u"TO BE OR\n NOT TO BE"
    frequencies = count_occurences(encoded_string, chunk_size)
    tree = construct_huffman_tree(frequencies)
    code_page = construct_code_page(tree)
    if not settings.SURPRESS_OUTPUT:
        print code_page

    num_chunks, chunks = encode(code_page, encoded_string, chunk_size)

    store_data(filename_base, code_page, num_chunks, chunks)
    code_page, num_chars, encoded_string = load_data(filename_base)
    decoded_string = decode(code_page, num_chars, encoded_string)

    if not settings.SURPRESS_OUTPUT:
        print ">> decoded string:", decoded_string

    out_file_path = make_path("%s%s_decoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    with open(out_file_path, 'w') as f:
        f.write(decoded_string.encode('utf8'))

    file_path = make_path("%s%s%s" % (settings.DATA_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    decoded_file_size = os.path.getsize(file_path)
    file_path = make_path("%s%s_encoded%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    encoded_file_size = os.path.getsize(file_path)
    file_path = make_path("%s%s_encoding%s" % (settings.OUT_FOLDER, filename_base, settings.HUFFMAN_DATAFILE_EXTENSION))
    encoding_file_size = os.path.getsize(file_path)

    print "Encoded size:", encoded_file_size
    print "Encodingsize:", encoding_file_size
    print "Decoded size:", decoded_file_size

    print "Compression rate:", float(encoded_file_size + encoding_file_size) / float(decoded_file_size) * 100.0, "%"