def main(inputfile, outputfile): # Perform file compression with open(inputfile, "rb") as inp, \ contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitOut: compress(inp, bitOut) print("Adaptive Compress Success !!!")
def compress(quantized, output_file): """ Function to load d Input: filename : Input hdf5 file consisting of training dataset Output: dataframe of paths to images dataset """ data = pickle.dumps(quantized) with open(output_file, "wb") as file: bitout = arithmeticcoding.BitOutputStream(file) initfreqs = arithmeticcoding.FlatFrequencyTable(257) freqs = arithmeticcoding.SimpleFrequencyTable(initfreqs) enc = arithmeticcoding.ArithmeticEncoder(32, bitout) i = 0 while i < len(data): # Read and encode one byte symbol = data[i] i += 1 enc.write(freqs, symbol) freqs.increment(symbol) enc.write(freqs, 256) # EOF enc.finish() # Flush remaining code bits
def entropy_coding(frame_index, lat, path_bin, latent, sigma, mu): if lat == 'mv': bias = 50 else: bias = 100 bin_name = 'f' + str(frame_index).zfill(3) + '_' + lat + '.bin' bitout = arithmeticcoding.BitOutputStream(open(path_bin + bin_name, "wb")) enc = arithmeticcoding.ArithmeticEncoder(32, bitout) for h in range(latent.shape[1]): for w in range(latent.shape[2]): for ch in range(latent.shape[3]): mu_val = mu[0, h, w, ch] + bias sigma_val = sigma[0, h, w, ch] symbol = latent[0, h, w, ch] + bias freq = arithmeticcoding.logFrequencyTable_exp( mu_val, sigma_val, np.int(bias * 2 + 1)) enc.write(freq, symbol) enc.finish() bitout.close() bits_value = os.path.getsize(path_bin + bin_name) * 8 return bits_value
def start(self, dictionary_size=256): self.dictionary_size = dictionary_size self.bitout = arithmeticcoding.BitOutputStream( open(self.outputfile, "wb")) #self.freqsTable = arithmeticcoding.SimpleFrequencyTable([float(i % 8 + 1) for i in range(self.dictionary_size + 1)]) self.freqsTable = arithmeticcoding.FlatFrequencyTable( self.dictionary_size + 1) self.encoder = arithmeticcoding.ArithmeticEncoder(32, self.bitout)
def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python adaptive_arithmetic_compress.py InputFile OutputFile") inputfile, outputfile = args # Perform file compression with open(inputfile, "rb") as inp, \ contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitout: compress(inp, bitout)
def compress(model): bit_out = arithmeticcoding.BitOutputStream(open('./result/data.bin', "wb")) enc = arithmeticcoding.ArithmeticEncoder(bit_out) # model = GenRNN(input_size=1, hidden_size=opt.hidden_size, output_size=len(characters), n_layers=opt.num_layers) # device = t.device(opt.device) # model.load_state_dict(t.load('./checkpoints/net_{}.pth'.format(opt.model_name, opt.chunk_len))) z = open('./result/old.txt', 'w') # model = model.to(device) # model.eval() hidden = None num_line = 0 sum_all = 0 time_num = 0 acc_num = 0 end_freq = generate_freqs(pro=1, first_step=True) with open('./result/test.qs') as f: while True: text = f.readline().replace('\n', '') z.write(text) z.write('\n') if not text: break encode_text = [char2int[char] for char in text] num_line += 1 hidden = None for char_index in range(len(encode_text)): if char_index == 0: freq = generate_freqs(pro=1, first_step=True) sum_all += -np.log2(1 / 35.) time_num += 8.0 # enc.write(freq, encode_text[char_index]) else: target_char = np.array(encode_text[char_index]) context_char = np.array(encode_text[char_index - 1]) out, hidden = predict(model, context_char, hidden) out = out[0] # (35, ) sum_all += -np.log2(out[target_char]) time_num += 8.0 freq = generate_freqs(pro=out, first_step=False) if np.argmax(out) == target_char.astype(np.int): acc_num += 1 enc.write(freq, encode_text[char_index]) end_freq = freq # enc.write(end_freq, 40) if num_line % 100 == 0: print(num_line) if num_line > 10000: break freq = generate_freqs(pro=1, first_step=True) # print(end_freq) enc.write(end_freq, len(characters)) enc.finish() print(acc_num / time_num, sum_all / time_num)
def comparess(file1, model, indices_char): #this is painfully slow #if at all possible it should be revised so that it can mostly be run on the gpu #by painfully slow i mean on the order of .02 seconds per character guess. #ie ~16 minutes for a 50k character file. f1 = open(file1, 'r').read() data_size = len(f1) i = 0 #output = [0, f1[0]] bitout = arithmeticcoding.BitOutputStream(open(file1 + '.comp', "wb")) initfreqs = arithmeticcoding.FlatFrequencyTable(AE_SIZE) freqs = arithmeticcoding.SimpleFrequencyTable(initfreqs) enc = arithmeticcoding.ArithmeticEncoder(bitout) guesses_right = 0 gss = '' while i < data_size: current = ord(f1[i]) if i < maxlen: enc.write(freqs, 0) # Always 'guessing' zero correctly before maxlen freqs.increment(0) enc.write(freqs, current) freqs.increment(current) else: guess = predict(f1[(i - maxlen):i], model, indices_char) if (f1[i] == guess and guesses_right < 255): guesses_right += 1 print("Guessed", f1[i], "correctly") else: enc.write(freqs, guesses_right) print("Wrong guess. Outputing", guesses_right, "correct guesses") freqs.increment(guesses_right) print(i, "Outputing char", current) enc.write(freqs, current) freqs.increment(current) guesses_right = 0 if (i % 100 == 0): print("i:", i) i += 1 if guesses_right > 0: enc.write(freqs, guesses_right) enc.write(freqs, MAGIC_EOF) print("out eof sanity check") enc.finish() bitout.close() return None
def final_function(args): """ Final function to compress the given text file with PPM compression. """ # Handle command line arguments if len(args) != 2: sys.exit("Usage: python ppm_compress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Perform file compression with open(inputfile, "rb") as inp, \ contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitout: compress(inp, bitout)
def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python ppm-compress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Perform file compression with open(inputfile, "rb") as inp: bitout = arithmeticcoding.BitOutputStream(open(outputfile, "wb")) try: compress(inp, bitout) finally: bitout.close()
def main(inputfile, outputfile): # Handle command line arguments # if len(args) != 2: # sys.exit("Usage: python arithmetic-compress.py InputFile OutputFile") # Read input file once to compute symbol frequencies freqs = get_frequencies(inputfile) freqs.increment(256) # EOF symbol gets a frequency of 1 # Read input file again, compress with arithmetic coding, and write compress_file file with open(inputfile, "rb") as inp, \ contextlib.closing(arithmeticcoding.BitOutputStream(open(outputfile, "wb"))) as bitout: write_frequencies(bitout, freqs) compress(freqs, inp, bitout) print("Compress Success !!!")
def __init__(self, path, ispaired, kmerLen, verbose, sequenceTable): self.mutiDict = {} ### kmers for buckets self.sequenceTable = sequenceTable self.kmerLen = kmerLen self.indexLen = kmerLen self.paired = ispaired self.seqLen = 0 #self.bucketDict = defaultdict(lambda : defaultdict(dict)) self.bucketDict = {} #nested_dict(2, int) self.encodeBucketPath = {} self.newNodeNum = 0 self.simpleNodeNum = 0 self.tipNodeNum = 0 self.bifurNodeNum = 0 self.deleteBifurRatio = 0.2 self.outPutPath = path self.dna2num = {"A": 0, "C": 1, "G": 2, "T": 3} self.num2dna = {0: "A", 1: "C", 2: "G", 3: "T"} self.dna2bit = {"A": '0b00', "C": '0b01', "G": '0b10', "T": '0b11'} self.num2dna = {0: "A", 1: "C", 2: "G", 3: "T"} self.firstSeq = BitStream() self.numFlag = BitStream() self.freq3 = arithmeticcoding.SimpleFrequencyTable( arithmeticcoding.FlatFrequencyTable(3)) self.freq4 = arithmeticcoding.SimpleFrequencyTable( arithmeticcoding.FlatFrequencyTable(4)) self.freqs = arithmeticcoding.SimpleFrequencyTable( arithmeticcoding.FlatFrequencyTable(4)) self.bitoutL = arithmeticcoding.BitOutputStream( open(self.outPutPath + ".bifurL", "wb")) self.bitoutR = arithmeticcoding.BitOutputStream( open(self.outPutPath + ".bifurR", "wb")) self.encodeSeqPathL = self.openFileLeft() self.encodeSeqPathR = self.openFileRight() self.verbose = verbose self.removeOutputFile()
def main(binfile): #inp = np.random.randint(5,size=[10]) #inp.tolist() inp = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] with contextlib.closing( arithmeticcoding.BitOutputStream(open(binfile, "wb"))) as bitout: compress(inp, bitout) # Perform file decompression with open(binfile, "rb") as bitsfile: bitin = arithmeticcoding.BitInputStream(bitsfile) out = decompress(bitin) bpp = 1.0 * -np.log(0.1) / np.log(2.0) + 9.0 * -np.log(0.1) / np.log( 2.0) + 1.0 * -np.log(0.3) / np.log(2.0) print(bpp / 8.0, "bytes") print(inp, out)
def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python arithmetic-compress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Read input file once to compute symbol frequencies freqs = get_frequencies(inputfile) freqs.increment(256) # EOF symbol gets a frequency of 1 # Read input file again, compress with arithmetic coding, and write output file with open(inputfile, "rb") as inp: bitout = arithmeticcoding.BitOutputStream(open(outputfile, "wb")) try: write_frequencies(bitout, freqs) compress(freqs, inp, bitout) finally: bitout.close()
def encode(self, model_type, input_path, compressed_file_path, quality_level): # with TOP N dimensions img = Image.open(input_path) w, h = img.size fileobj = open(compressed_file_path, mode='wb') buf = quality_level << 1 buf = buf + model_type arr = np.array([0], dtype=np.uint8) arr[0] = buf arr.tofile(fileobj) arr = np.array([w, h], dtype=np.uint16) arr.tofile(fileobj) fileobj.close() new_w = int(math.ceil(w / 16) * 16) new_h = int(math.ceil(h / 16) * 16) pad_w = new_w - w pad_h = new_h - h input_x = np.asarray(img) input_x = np.pad(input_x, ((pad_h, 0), (pad_w, 0), (0, 0)), mode='reflect') input_x = input_x.reshape(1, new_h, new_w, 3) input_x = input_x.transpose([0, 3, 1, 2]) h_s_out, y_hat, z_hat, sigma_z = self.sess.run( [self.h_s_out, self.y_hat, self.z_hat, self.sigma_z], feed_dict={self.input_x: input_x}) # NCHW ############### encode z #################################### bitout = arithmeticcoding.BitOutputStream( open(compressed_file_path, "ab+")) enc = arithmeticcoding.ArithmeticEncoder(bitout) printProgressBar(0, z_hat.shape[1], prefix='Encoding z_hat:', suffix='Complete', length=50) for ch_idx in range(z_hat.shape[1]): printProgressBar(ch_idx + 1, z_hat.shape[1], prefix='Encoding z_hat:', suffix='Complete', length=50) mu_val = 255 sigma_val = sigma_z[ch_idx] # exp_sigma_val = np.exp(sigma_val) freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val) for h_idx in range(z_hat.shape[2]): for w_idx in range(z_hat.shape[3]): symbol = np.rint(z_hat[0, ch_idx, h_idx, w_idx] + 255) if symbol < 0 or symbol > 511: print("symbol range error: " + str(symbol)) # print(symbol) enc.write(freq, symbol) # enc.write(freq, 512) # enc.finish() # bitout.close() ############### encode y #################################### padded_y1_hat = np.pad(y_hat[:, :self.M1, :, :], ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) # bitout = arithmeticcoding.BitOutputStream(open(enc_outputfile, "wb")) # enc = arithmeticcoding.ArithmeticEncoder(bitout) c_prime = h_s_out[:, :self.M1, :, :] sigma2 = h_s_out[:, self.M1:, :, :] padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) printProgressBar(0, y_hat.shape[2], prefix='Encoding y_hat:', suffix='Complete', length=50) for h_idx in range(y_hat.shape[2]): printProgressBar(h_idx + 1, y_hat.shape[2], prefix='Encoding y_hat:', suffix='Complete', length=50) for w_idx in range(y_hat.shape[3]): c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx) c_doubleprime_i = self.extractor_doubleprime( padded_y1_hat, h_idx, w_idx) concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i], axis=1) pred_mean, pred_sigma = self.sess.run( [self.pred_mean, self.pred_sigma], feed_dict={self.concatenated_c_i: concatenated_c_i}) zero_means = np.zeros([ pred_mean.shape[0], self.M2, pred_mean.shape[2], pred_mean.shape[3] ]) concat_pred_mean = np.concatenate([pred_mean, zero_means], axis=1) concat_pred_sigma = np.concatenate([ pred_sigma, sigma2[:, :, h_idx:h_idx + 1, w_idx:w_idx + 1] ], axis=1) for ch_idx in range(self.M): mu_val = concat_pred_mean[0, ch_idx, 0, 0] + 255 sigma_val = concat_pred_sigma[0, ch_idx, 0, 0] # exp_sigma_val = np.exp(sigma_val) freq = arithmeticcoding.ModelFrequencyTable( mu_val, sigma_val) symbol = np.rint(y_hat[0, ch_idx, h_idx, w_idx] + 255) if symbol < 0 or symbol > 511: print("symbol range error: " + str(symbol)) enc.write(freq, symbol) enc.write(freq, 512) enc.finish() bitout.close() return compressed_file_path
def encode(self, model_type, input_path, compressed_file_path, quality_level): # with TOP N dimensions img = Image.open(input_path) w, h = img.size fileobj = open(compressed_file_path, mode='wb') buf = quality_level << 1 buf = buf + model_type arr = np.array([0], dtype=np.uint8) arr[0] = buf arr.tofile(fileobj) arr = np.array([w, h], dtype=np.uint16) arr.tofile(fileobj) fileobj.close() new_w = int(math.ceil(float(w) / 2.0) * 2) new_h = int(math.ceil(float(h) / 2.0) * 2) pad_w = new_w - w pad_h = new_h - h img_array = np.asarray(img) img_array = np.pad(img_array, ((0, pad_h), (0, pad_w), (0, 0)), mode='edge') input_x = img_array input_x = input_x.reshape(1, new_h, new_w, 3) input_x = input_x.transpose([0, 3, 1, 2]) gah1, sigma_z = self.sess.run([self.gah1, self.sigma_z], feed_dict={self.input_x: input_x}) gah1 = np.pad(gah1, ((0, 0), (0, gah1.shape[1] % 2), (0, gah1.shape[2] % 2), (0, 0)), mode='constant') gah2 = self.sess.run(self.gah2, feed_dict={self.gah1: gah1}) gah2 = np.pad(gah2, ((0, 0), (0, gah2.shape[1] % 2), (0, gah2.shape[2] % 2), (0, 0)), mode='constant') gah3 = self.sess.run(self.gah3, feed_dict={self.gah2: gah2}) gah3 = np.pad(gah3, ((0, 0), (0, gah3.shape[1] % 2), (0, gah3.shape[2] % 2), (0, 0)), mode='constant') y_hat = self.sess.run(self.y_hat, feed_dict={self.gah3: gah3}) y_w = y_hat.shape[3] y_h = y_hat.shape[2] new_y_w = int(math.ceil(float(y_w) / 4.0) * 4) new_y_h = int(math.ceil(float(y_h) / 4.0) * 4) pad_y_w = new_y_w - y_w pad_y_h = new_y_h - y_h pad_y_hat = np.pad(y_hat, ((0, 0), (0, 0), (0, pad_y_h), (0, pad_y_w)), mode='symmetric') z_hat, c_prime = self.sess.run([self.z_hat, self.c_prime], feed_dict={self.y_hat: pad_y_hat}) ############### encode zhat #################################### printProgressBar(0, z_hat.shape[1], prefix='Encoding z_hat:', suffix='Complete', length=50) bitout = arithmeticcoding.BitOutputStream( open(compressed_file_path, "ab+")) enc = arithmeticcoding.ArithmeticEncoder(bitout) for ch_idx in range(z_hat.shape[1]): printProgressBar(ch_idx + 1, z_hat.shape[1], prefix='Encoding z_hat:', suffix='Complete', length=50) mu_val = 255 sigma_val = sigma_z[ch_idx] freq = arithmeticcoding.ModelFrequencyTable(mu_val, sigma_val) for h_idx in range(z_hat.shape[2]): for w_idx in range(z_hat.shape[3]): symbol = np.int(z_hat[0, ch_idx, h_idx, w_idx] + 255) if symbol < 0 or symbol > 511: print("symbol range error: " + str(symbol)) enc.write(freq, symbol) ############### encode yhat #################################### padded_y_hat = np.pad(y_hat, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) padded_c_prime = np.pad(c_prime, ((0, 0), (0, 0), (3, 0), (2, 1)), 'constant', constant_values=((0, 0), (0, 0), (0, 0), (0, 0))) printProgressBar(0, y_hat.shape[2], prefix='Encoding y_hat:', suffix='Complete', length=50) for h_idx in range(y_hat.shape[2]): printProgressBar(h_idx + 1, y_hat.shape[2], prefix='Encoding y_hat:', suffix='Complete', length=50) for w_idx in range(y_hat.shape[3]): c_prime_i = self.extractor_prime(padded_c_prime, h_idx, w_idx) c_doubleprime_i = self.extractor_doubleprime( padded_y_hat, h_idx, w_idx) concatenated_c_i = np.concatenate([c_doubleprime_i, c_prime_i], axis=1) pred_mean, pred_sigma = self.sess.run( [self.pred_mean, self.pred_sigma], feed_dict={self.concatenated_c_i: concatenated_c_i}) for ch_idx in range(self.M): mu_val = pred_mean[0, ch_idx, 0, 0] + 255 sigma_val = pred_sigma[0, ch_idx, 0, 0] freq = arithmeticcoding.ModelFrequencyTable( mu_val, sigma_val) symbol = np.int(y_hat[0, ch_idx, h_idx, w_idx] + 255) if symbol < 0 or symbol > 511: print("symbol range error: " + str(symbol)) enc.write(freq, symbol) enc.write(freq, 512) enc.finish() bitout.close() return compressed_file_path