def main() -> None: """Main function""" string = input("Enter the string: ") freq_dict, code = arith.encode(string) print(f"\nEncoded message:") print(f"(common) {code.numerator} / {code.denominator}") print(f"(decimal) {float(code):.30e}\n") decoded_string = arith.decode(code, freq_dict) print(f"Decoded message is '{decoded_string}'")
def camzip(method, filename, b=0.1, num=0, scale=(100000, 1), pr=0, pc=0): with open(filename, 'rb') as fin: #* x = fin.read() frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(x))]) n = sum([frequencies[a] for a in frequencies]) p = dict([(a, frequencies[a] / n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) y = vl_encode(x, c) elif method == 'arithmetic': y = arithmetic.encode(x, p) elif method == 'carithmeticac': y = arithmeticac.encode(x, b, num, scale) elif method == 'iadhuffman': y = adhuffman(x, pr) elif method == 'fcondarithmetic': y = condarithmetic.encode(x, pc) elif method == 'gadconarithmetic': y = adconarithmetic.encode(x, b) elif method == 'jadconarithmetic': y = adconarithmetic2.encode(x, b) else: raise NameError('Compression method %s unknown' % method) y = bytes(bits2bytes(y)) outfile = filename + '.cz' + method[0] with open(outfile, 'wb') as fout: fout.write(y) pfile = filename + '.czp' n = len(x) with open(pfile, 'w') as fp: dump(frequencies, fp)
def camzip(method, filename): with open(filename, 'rb') as fin: x = fin.read() frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(x))]) n = sum([frequencies[a] for a in frequencies]) p = dict([(a,frequencies[a]/n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) y = vl_encode(x, c) elif method == 'arithmetic': y = arithmetic.encode(x,p) elif method == 'dapt': y = adaptive_arithmetic.encode(x) elif method == 'context': y = contextual_arithmetic.encode(x) else: raise NameError('Compression method %s unknown' % method) y = bytes(bits2bytes(y)) outfile = filename + '.cz' + method[0] with open(outfile, 'wb') as fout: fout.write(y) if method in ['huffman', 'shannon_fano', 'arithmetic']: pfile = filename + '.czp' n = len(x) with open(pfile, 'w') as fp: dump(frequencies, fp)
def __init__(self, name_real, name_email, name_comment, passphrase): self.batch = {'name_real': '%s' % name_real, 'name_email': '%s' % name_email, 'name_comment': '%s'% name_comment, 'expire_date': '0', 'key_type': 'RSA', 'key_length': 2048, 'key_usage': 'encrypt,sign,auth', 'subkey_type': 'RSA', 'subkey_length': 2048, 'subkey_usage': 'encrypt,sign,auth', 'passphrase': '%s' % passphrase} batch = xmlify(self.batch, wrap="profile", indent=" ") encrypt_info = codify.encode(batch, 58) with open("profiles.txt", "a") as self.profiles: print(encrypt_info, file=self.profiles) self.profiles.close()
def camzip(method, filename): with open(filename, 'rb') as fin: x = fin.read() p, frequencies = vl_codes.probability_dict(x) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = vl_codes.huffman(p) c = trees.xtree2code(xt) else: c = vl_codes.shannon_fano(p) xt = trees.code2xtree(c) y = vl_codes.vl_encode(x, c) elif method == 'arithmetic': y = arithmetic.encode(x, p) elif method == 'arithmetic_ftr': y = arithmetic_ftr.encode(x, p) else: raise NameError('Compression method %s unknown' % method) y = bytes(vl_codes.bits2bytes(y)) outfile = filename + '.cz' + method[0] with open(outfile, 'wb') as fout: fout.write(y) pfile = filename + '.czp' n = len(x) with open(pfile, 'w') as fp: dump(frequencies, fp)
def zip(self, method, filename, n=1, v=False): "Compress a file using the specified method" self.n = n filename = self.content_dir + filename with open(filename, 'r') as fin: text = fin.read() # Generate frequencies object frequencies = self._build_freq(text) tot = sum(list(frequencies.values())) p = dict([(a, frequencies[a] / tot) for a in frequencies]) cp, f_initial = self._build_conditional_pdf(frequencies) # Initial conditions tot_initial = sum(list(f_initial.values())) p_initial = dict([(a, f_initial[a] / tot_initial) for a in f_initial]) if method == 'huffman' or method == 'shannon_fano': c = {} xt = {} init_key = '$' # unused character if len(cp.keys()) == 1: c[init_key], xt[init_key] = self._build_structures(method, p) else: c[init_key], xt[init_key] = self._build_structures( method, p_initial) for key in cp.keys(): c[key], xt[key] = self._build_structures(method, cp[key]) y = vl_encode(text, c, self.n) elif method == 'arithmetic': y = arithmetic.encode(text, p) else: raise NameError('Compression method %s unknown' % method) y = bits2bytes(y) Nout = len(y) # + sys.getsizeof(str(frequencies)) y = bytes(y) outfile = filename + '.cz' + method[0] with open(outfile, 'wb') as fout: fout.write(y) pfile = filename + '.czp' with open(pfile, 'w') as fp: dump(frequencies, fp) if v: Nin = sys.getsizeof(text) print('Nin: ' + str(Nin)) print('Nout: ' + str(Nout)) print('Compression Ratio: ' + str(Nout / Nin)) print('Compression Ratio (bits per byte): ' + str(8 * Nout / Nin)) H = lambda pr: -sum([pr[a] * log2(pr[a]) for a in pr]) print('Entropy: ' + str(H(p) / n)) return 8 * Nout / Nin
def camzip(method, message_filename, context_chars=1, cond_prob_dict_filename=' ', cum_prob_dict_filename=' '): if not method == 'contextual arithmetic': with open(message_filename, 'rb') as fin: x = fin.read() frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(x))]) n = sum([frequencies[a] for a in frequencies]) p = dict([(a, frequencies[a] / n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): xt = huffman(p) c = xtree2code(xt) else: c = shannon_fano(p) xt = code2xtree(c) y = vl_encode(x, c) elif method == 'arithmetic': y = arithmetic.encode(x, p) else: raise NameError('Compression method %s unknown' % method) y = bytes(bits2bytes(y)) outfile = message_filename + '.cz' + method[0] with open(outfile, 'wb') as fout: fout.write(y) pfile = message_filename + '.czp' n = len(x) with open(pfile, 'w') as fp: dump(frequencies, fp) else: with open('cond_prob_models/' + cond_prob_dict_filename, 'r') as cond_prob_file: context_dict = json.load(cond_prob_file) with open('cond_prob_models/' + cum_prob_dict_filename, 'r') as cum_prob_file: cumulative_dict = json.load(cum_prob_file) with open('text_files/' + message_filename, 'r', encoding='utf-8-sig') as file: original_message = file.read() with open('encoded_messages/' + message_filename + '_zipped.cz', 'w') as zipped_file: zipped_message = bytes( bits2bytes( contextual_arithmetic.encode(original_message, context_dict, cumulative_dict, context_chars))) zipped_file.write(str(zipped_message))
def camzip(method, filename): with open(filename, 'r') as fin: x = fin.read() frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(x))]) n = sum([frequencies[a] for a in frequencies]) p = dict([(a, frequencies[a] / n) for a in frequencies]) if method == 'huffman' or method == 'shannon_fano': if (method == 'huffman'): start = time.time() #start count xt = huffman(p) c = xtree2code(xt) end = time.time() timer = end - start print(f'Huffman compression time:{timer}') else: c = shannon_fano(p) xt = code2xtree(c) y = vl_encode(x, c) elif method == 'arithmetic': y = arithmetic.encode(x, p) elif method == 'arithmetic_adaptive': y = arithmetic_ftr_adaptive.encode(x) elif method == 'context_adaptive': y = arithmetic_ftr.encode(x) else: raise NameError('Compression method %s unknown' % method) zipped = bits2bytes(y) y = bytes(bits2bytes(y)) if method == 'arithmetic': outfile = filename + '.cz' + 'ar' elif method == 'arithmetic_adaptive': outfile = filename + '.cz' + 'ad' elif method == 'context_adaptive': outfile = filename + '.cz' + 'ca' else: outfile = filename + '.cz' + method[0] with open(outfile, 'wb') as fout: fout.write(y) pfile = filename + '.czp' n = len(x) with open(pfile, 'w') as fp: dump(frequencies, fp) #finding the entropy and the compression rate C = 8 * len(zipped) / n H = lambda p: -sum([p[a] * log2(p[a]) for a in p]) print(f'Compression Rate:{C}') print(f'Entropy :{H(p)}') print(f'File size (bytes): {n}')
if task == TASK_ARITH: blockSize = int(arguments[3]) if task not in TASKS: sys.stderr.write( f"Invalid usage! The given task: {task} does not exist!\n") sys.stderr.write("For help, use: encode.py -h") sys.exit(errno.EINVAL) if not os.path.exists(fileName): sys.stderr.write(f"Could not find input file: {fileName}") sys.exit(errno.ENOENT) if task == TASK_DISPLAY: utils.display(stats.createStatistic(fileName)) if task == TASK_SF: utils.display(shannon_fano.encode(fileName)) if task == TASK_SF_STAT: codes = shannon_fano.encode(fileName) utils.display(codes) utils.displayOptimality(stats.getOptimality(codes)) if task == TASK_HUFF: utils.display(huffman.encode(fileName)) if task == TASK_HUFF_STAT: codes = huffman.encode(fileName) utils.display(huffman.encode(fileName)) utils.displayOptimality(stats.getOptimality(codes)) if task == TASK_ARITH: code = arithmetic.encode(fileName, blockSize) utils.displayArithmeticCode(code)
import arithmetic as arith from vl_codes import bytes2bits, bits2bytes from os import stat from itertools import groupby filename = 'hamlet.txt' Nin = stat(filename).st_size f = open(filename, 'r') hamlet = f.read() frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(hamlet))]) p = dict([(a,frequencies[a]/Nin) for a in frequencies]) f.close() hamlet = hamlet * 10 Nin = Nin * 10 arith_encoded = arith.encode(hamlet, p, probability_on_the_go=False) arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=False) hamlet_zipped = bits2bytes(arith_encoded) Nout = len(hamlet_zipped) print(Nout/Nin) print(8 * Nout/Nin) arith_encoded = arith.encode(hamlet, p, probability_on_the_go=True) arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=True) hamlet_zipped = bits2bytes(arith_encoded) Nout = len(hamlet_zipped) print(Nout/Nin) print(8 * Nout/Nin)
from trees import * from vl_codes import bytes2bits, bits2bytes import arithmetic as arith f = open('hamlet.txt', 'r') hamlet = f.read() f.close() from itertools import groupby frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(hamlet))]) Nin = sum([frequencies[a] for a in frequencies]) p = dict([(a, frequencies[a] / Nin) for a in frequencies]) print(f'File length: {Nin}') arith_encoded = arith.encode(hamlet, p) arith_decoded = arith.decode(arith_encoded, p, Nin) #============================================================================== # c = huffman(p) #============================================================================== #print(xtree2newick(code2xtree(c))) #============================================================================== # hamlet_sf = vl_encode(hamlet,c); # print(f'Length of binary sequence: {len(hamlet_sf)}') #============================================================================== #============================================================================== # x = bits2bytes([0,1])