Exemplo n.º 1
0
def main() -> None:
    """Main function"""
    string = input("Enter the string: ")

    freq_dict, code = arith.encode(string)
    print(f"\nEncoded message:")
    print(f"(common) {code.numerator} / {code.denominator}")
    print(f"(decimal) {float(code):.30e}\n")

    decoded_string = arith.decode(code, freq_dict)
    print(f"Decoded message is '{decoded_string}'")
Exemplo n.º 2
0
def camzip(method, filename, b=0.1, num=0, scale=(100000, 1), pr=0, pc=0):

    with open(filename, 'rb') as fin:  #*
        x = fin.read()

    frequencies = dict([(key, len(list(group)))
                        for key, group in groupby(sorted(x))])
    n = sum([frequencies[a] for a in frequencies])
    p = dict([(a, frequencies[a] / n) for a in frequencies])

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            xt = huffman(p)
            c = xtree2code(xt)
        else:
            c = shannon_fano(p)
            xt = code2xtree(c)

        y = vl_encode(x, c)

    elif method == 'arithmetic':
        y = arithmetic.encode(x, p)
    elif method == 'carithmeticac':
        y = arithmeticac.encode(x, b, num, scale)
    elif method == 'iadhuffman':
        y = adhuffman(x, pr)
    elif method == 'fcondarithmetic':
        y = condarithmetic.encode(x, pc)
    elif method == 'gadconarithmetic':
        y = adconarithmetic.encode(x, b)
    elif method == 'jadconarithmetic':
        y = adconarithmetic2.encode(x, b)

    else:
        raise NameError('Compression method %s unknown' % method)

    y = bytes(bits2bytes(y))

    outfile = filename + '.cz' + method[0]

    with open(outfile, 'wb') as fout:
        fout.write(y)

    pfile = filename + '.czp'
    n = len(x)

    with open(pfile, 'w') as fp:
        dump(frequencies, fp)
Exemplo n.º 3
0
def camzip(method, filename):

    with open(filename, 'rb') as fin:
        x = fin.read()

    frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(x))])
    n = sum([frequencies[a] for a in frequencies])
    p = dict([(a,frequencies[a]/n) for a in frequencies])

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            xt = huffman(p)
            c = xtree2code(xt)
        else:
            c = shannon_fano(p)
            xt = code2xtree(c)

        y = vl_encode(x, c)

    elif method == 'arithmetic':
        y = arithmetic.encode(x,p)

    elif method == 'dapt':
        y = adaptive_arithmetic.encode(x)
        
    elif method == 'context':
        y = contextual_arithmetic.encode(x)

    else:
        raise NameError('Compression method %s unknown' % method)
    
    
    y = bytes(bits2bytes(y))
    
    outfile = filename + '.cz' + method[0]

    with open(outfile, 'wb') as fout:
        fout.write(y)


    if method in ['huffman', 'shannon_fano', 'arithmetic']:
        pfile = filename + '.czp'
        n = len(x)

        with open(pfile, 'w') as fp:
            dump(frequencies, fp)
Exemplo n.º 4
0
    def __init__(self, name_real, name_email,
                 name_comment, passphrase):
        self.batch = {'name_real': '%s' % name_real,
                      'name_email': '%s' % name_email,
                      'name_comment': '%s'% name_comment,
                      'expire_date': '0',
                      'key_type': 'RSA',
                      'key_length': 2048,
                      'key_usage': 'encrypt,sign,auth',
                      'subkey_type': 'RSA',
                      'subkey_length': 2048,
                      'subkey_usage': 'encrypt,sign,auth',
                      'passphrase': '%s' % passphrase}

        batch = xmlify(self.batch, wrap="profile",
                       indent="    ")
        encrypt_info = codify.encode(batch, 58)
        with open("profiles.txt", "a") as self.profiles:
            print(encrypt_info, file=self.profiles)
            self.profiles.close()
Exemplo n.º 5
0
def camzip(method, filename):

    with open(filename, 'rb') as fin:
        x = fin.read()

    p, frequencies = vl_codes.probability_dict(x)

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            xt = vl_codes.huffman(p)
            c = trees.xtree2code(xt)
        else:
            c = vl_codes.shannon_fano(p)
            xt = trees.code2xtree(c)

        y = vl_codes.vl_encode(x, c)

    elif method == 'arithmetic':
        y = arithmetic.encode(x, p)

    elif method == 'arithmetic_ftr':
        y = arithmetic_ftr.encode(x, p)

    else:
        raise NameError('Compression method %s unknown' % method)

    y = bytes(vl_codes.bits2bytes(y))

    outfile = filename + '.cz' + method[0]

    with open(outfile, 'wb') as fout:
        fout.write(y)

    pfile = filename + '.czp'
    n = len(x)

    with open(pfile, 'w') as fp:
        dump(frequencies, fp)
Exemplo n.º 6
0
    def zip(self, method, filename, n=1, v=False):
        "Compress a file using the specified method"
        self.n = n
        filename = self.content_dir + filename
        with open(filename, 'r') as fin:
            text = fin.read()

        # Generate frequencies object
        frequencies = self._build_freq(text)
        tot = sum(list(frequencies.values()))
        p = dict([(a, frequencies[a] / tot) for a in frequencies])
        cp, f_initial = self._build_conditional_pdf(frequencies)

        # Initial conditions
        tot_initial = sum(list(f_initial.values()))
        p_initial = dict([(a, f_initial[a] / tot_initial) for a in f_initial])

        if method == 'huffman' or method == 'shannon_fano':
            c = {}
            xt = {}
            init_key = '$'  # unused character
            if len(cp.keys()) == 1:
                c[init_key], xt[init_key] = self._build_structures(method, p)
            else:
                c[init_key], xt[init_key] = self._build_structures(
                    method, p_initial)
                for key in cp.keys():
                    c[key], xt[key] = self._build_structures(method, cp[key])

            y = vl_encode(text, c, self.n)

        elif method == 'arithmetic':
            y = arithmetic.encode(text, p)

        else:
            raise NameError('Compression method %s unknown' % method)

        y = bits2bytes(y)
        Nout = len(y)  # + sys.getsizeof(str(frequencies))
        y = bytes(y)

        outfile = filename + '.cz' + method[0]

        with open(outfile, 'wb') as fout:
            fout.write(y)

        pfile = filename + '.czp'

        with open(pfile, 'w') as fp:
            dump(frequencies, fp)

        if v:
            Nin = sys.getsizeof(text)
            print('Nin: ' + str(Nin))
            print('Nout: ' + str(Nout))
            print('Compression Ratio: ' + str(Nout / Nin))
            print('Compression Ratio (bits per byte): ' + str(8 * Nout / Nin))

            H = lambda pr: -sum([pr[a] * log2(pr[a]) for a in pr])
            print('Entropy: ' + str(H(p) / n))
            return 8 * Nout / Nin
Exemplo n.º 7
0
def camzip(method,
           message_filename,
           context_chars=1,
           cond_prob_dict_filename=' ',
           cum_prob_dict_filename=' '):

    if not method == 'contextual arithmetic':
        with open(message_filename, 'rb') as fin:
            x = fin.read()

        frequencies = dict([(key, len(list(group)))
                            for key, group in groupby(sorted(x))])
        n = sum([frequencies[a] for a in frequencies])
        p = dict([(a, frequencies[a] / n) for a in frequencies])

        if method == 'huffman' or method == 'shannon_fano':
            if (method == 'huffman'):
                xt = huffman(p)
                c = xtree2code(xt)
            else:
                c = shannon_fano(p)
                xt = code2xtree(c)

            y = vl_encode(x, c)

        elif method == 'arithmetic':
            y = arithmetic.encode(x, p)
        else:
            raise NameError('Compression method %s unknown' % method)

        y = bytes(bits2bytes(y))

        outfile = message_filename + '.cz' + method[0]

        with open(outfile, 'wb') as fout:
            fout.write(y)

        pfile = message_filename + '.czp'
        n = len(x)

        with open(pfile, 'w') as fp:
            dump(frequencies, fp)

    else:
        with open('cond_prob_models/' + cond_prob_dict_filename,
                  'r') as cond_prob_file:
            context_dict = json.load(cond_prob_file)
        with open('cond_prob_models/' + cum_prob_dict_filename,
                  'r') as cum_prob_file:
            cumulative_dict = json.load(cum_prob_file)
        with open('text_files/' + message_filename, 'r',
                  encoding='utf-8-sig') as file:
            original_message = file.read()
        with open('encoded_messages/' + message_filename + '_zipped.cz',
                  'w') as zipped_file:
            zipped_message = bytes(
                bits2bytes(
                    contextual_arithmetic.encode(original_message,
                                                 context_dict, cumulative_dict,
                                                 context_chars)))
            zipped_file.write(str(zipped_message))
Exemplo n.º 8
0
def camzip(method, filename):

    with open(filename, 'r') as fin:
        x = fin.read()

    frequencies = dict([(key, len(list(group)))
                        for key, group in groupby(sorted(x))])
    n = sum([frequencies[a] for a in frequencies])
    p = dict([(a, frequencies[a] / n) for a in frequencies])

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            start = time.time()  #start count
            xt = huffman(p)
            c = xtree2code(xt)
            end = time.time()
            timer = end - start
            print(f'Huffman compression time:{timer}')
        else:
            c = shannon_fano(p)
            xt = code2xtree(c)

        y = vl_encode(x, c)

    elif method == 'arithmetic':
        y = arithmetic.encode(x, p)

    elif method == 'arithmetic_adaptive':
        y = arithmetic_ftr_adaptive.encode(x)

    elif method == 'context_adaptive':
        y = arithmetic_ftr.encode(x)

    else:
        raise NameError('Compression method %s unknown' % method)

    zipped = bits2bytes(y)
    y = bytes(bits2bytes(y))

    if method == 'arithmetic':
        outfile = filename + '.cz' + 'ar'
    elif method == 'arithmetic_adaptive':
        outfile = filename + '.cz' + 'ad'
    elif method == 'context_adaptive':
        outfile = filename + '.cz' + 'ca'

    else:
        outfile = filename + '.cz' + method[0]

    with open(outfile, 'wb') as fout:
        fout.write(y)

    pfile = filename + '.czp'
    n = len(x)

    with open(pfile, 'w') as fp:
        dump(frequencies, fp)

    #finding the entropy and the compression rate

    C = 8 * len(zipped) / n
    H = lambda p: -sum([p[a] * log2(p[a]) for a in p])
    print(f'Compression Rate:{C}')
    print(f'Entropy :{H(p)}')
    print(f'File size (bytes): {n}')
Exemplo n.º 9
0
if task == TASK_ARITH:
    blockSize = int(arguments[3])

if task not in TASKS:
    sys.stderr.write(
        f"Invalid usage! The given task: {task} does not exist!\n")
    sys.stderr.write("For help, use: encode.py -h")
    sys.exit(errno.EINVAL)

if not os.path.exists(fileName):
    sys.stderr.write(f"Could not find input file: {fileName}")
    sys.exit(errno.ENOENT)

if task == TASK_DISPLAY:
    utils.display(stats.createStatistic(fileName))
if task == TASK_SF:
    utils.display(shannon_fano.encode(fileName))
if task == TASK_SF_STAT:
    codes = shannon_fano.encode(fileName)
    utils.display(codes)
    utils.displayOptimality(stats.getOptimality(codes))
if task == TASK_HUFF:
    utils.display(huffman.encode(fileName))
if task == TASK_HUFF_STAT:
    codes = huffman.encode(fileName)
    utils.display(huffman.encode(fileName))
    utils.displayOptimality(stats.getOptimality(codes))
if task == TASK_ARITH:
    code = arithmetic.encode(fileName, blockSize)
    utils.displayArithmeticCode(code)
Exemplo n.º 10
0
import arithmetic as arith
from vl_codes import bytes2bits, bits2bytes
from os import stat
from itertools import groupby

filename = 'hamlet.txt'
Nin = stat(filename).st_size
f = open(filename, 'r')
hamlet = f.read()
frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(hamlet))])
p = dict([(a,frequencies[a]/Nin) for a in frequencies])
f.close()

hamlet = hamlet * 10
Nin = Nin * 10

arith_encoded = arith.encode(hamlet, p, probability_on_the_go=False)
arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=False)
hamlet_zipped = bits2bytes(arith_encoded)
Nout = len(hamlet_zipped)
print(Nout/Nin)
print(8 * Nout/Nin)

arith_encoded = arith.encode(hamlet, p, probability_on_the_go=True)
arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=True)
hamlet_zipped = bits2bytes(arith_encoded)
Nout = len(hamlet_zipped)
print(Nout/Nin)
print(8 * Nout/Nin)
Exemplo n.º 11
0
from trees import *
from vl_codes import bytes2bits, bits2bytes
import arithmetic as arith

f = open('hamlet.txt', 'r')
hamlet = f.read()
f.close()

from itertools import groupby
frequencies = dict([(key, len(list(group)))
                    for key, group in groupby(sorted(hamlet))])
Nin = sum([frequencies[a] for a in frequencies])
p = dict([(a, frequencies[a] / Nin) for a in frequencies])
print(f'File length: {Nin}')

arith_encoded = arith.encode(hamlet, p)
arith_decoded = arith.decode(arith_encoded, p, Nin)

#==============================================================================
# c = huffman(p)
#==============================================================================

#print(xtree2newick(code2xtree(c)))

#==============================================================================
# hamlet_sf = vl_encode(hamlet,c);
# print(f'Length of binary sequence: {len(hamlet_sf)}')
#==============================================================================

#==============================================================================
# x = bits2bytes([0,1])