def camunzip(filename):
    if (filename[-1] == 'h'):
        method = 'huffman'
    elif (filename[-1] == 's'):
        method = 'shannon_fano'
    elif (filename[-2] + filename[-1] == 'ar'):
        print("dfass")
        method = 'arithmetic'
    elif (filename[-2] + filename[-1] == 'ad'):
        method = 'arithmetic_adaptive'
    elif (filename[-2] + filename[-1] == 'ca'):
        method = 'context_adaptive'
    else:
        raise NameError('Unknown compression method')

    with open(filename, 'rb') as fin:
        y = fin.read()
    y = bytes2bits(y)
    if method == 'arithmetic' or method == 'arithmetic_adaptive' or method == 'context_adaptive':
        pfile = filename[:-2] + 'p'
    else:
        pfile = filename[:-1] + 'p'
    with open(pfile, 'r') as fp:
        frequencies = load(fp)

    n = sum([frequencies[a] for a in frequencies])
    p = dict([(a, frequencies[a] / n) for a in frequencies])

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            xt = huffman(p)
            c = xtree2code(xt)
        else:
            c = shannon_fano(p)
            xt = code2xtree(c)

        x = vl_decode(y, xt)

    elif method == 'arithmetic_adaptive':
        x = arithmetic_ftr_adaptive.decode(y)

    elif method == 'arithmetic':
        x = arithmetic.decode(y, p, n)

    elif method == 'context_adaptive':
        x = arithmetic_ftr.decode(y)

    else:
        raise NameError('This will never happen (famous last words)')

    #'.cuz' for Cam UnZipped (don't want to overwrite the original file...)
    if method == 'arithmetic' or method == 'arithmetic_adaptive' or method == 'context_adaptive':
        outfile = filename[:-5] + '.cuz'
    else:
        outfile = filename[:-4] + '.cuz'
    with open(outfile, 'w') as fout:
        for c in x:
            fout.write(c)
Exemplo n.º 2
0
    def unzip(self, filename):
        "Decompress a file compressed by camzip"
        filename = self.content_dir + filename
        if filename[-1] == 'h':
            method = 'huffman'
        elif filename[-1] == 's':
            method = 'shannon_fano'
        elif filename[-1] == 'a':
            method = 'arithmetic'
        else:
            raise NameError('Unknown compression method')

        with open(filename, 'rb') as fin:
            y = fin.read()
        y = bytes2bits(y)

        # Load frequencies file
        pfile = filename[:-1] + 'p'
        with open(pfile, 'r') as fp:
            frequencies = load(fp)
        tot = sum(list(frequencies.values()))
        p = dict([(a, frequencies[a] / tot) for a in frequencies])

        # Initial conditions
        cp, f_initial = self._build_conditional_pdf(frequencies)
        tot_initial = sum(list(f_initial.values()))
        p_initial = dict([(a, f_initial[a] / tot_initial) for a in f_initial])

        if method == 'huffman' or method == 'shannon_fano':
            c = {}
            xt = {}
            init_key = '$'  # unused character
            if len(cp.keys()) == 1:
                c[init_key], xt[init_key] = self._build_structures(method, p)
            else:
                c[init_key], xt[init_key] = self._build_structures(
                    method, p_initial)
                for key in cp.keys():
                    c[key], xt[key] = self._build_structures(method, cp[key])

            x = vl_decode(y, xt)

        elif method == 'arithmetic':
            x = arithmetic.decode(y, p, tot)

        else:
            raise NameError('This will never happen (famous last words)')

        # '.cuz' for Cam UnZipped (don't want to overwrite the original file...)
        outfile = filename[:-4] + '.cuz'

        with open(outfile, 'w') as fout:
            fout.write(x)

        self._convert_to_LF(outfile)
Exemplo n.º 3
0
def main() -> None:
    """Main function"""
    string = input("Enter the string: ")

    freq_dict, code = arith.encode(string)
    print(f"\nEncoded message:")
    print(f"(common) {code.numerator} / {code.denominator}")
    print(f"(decimal) {float(code):.30e}\n")

    decoded_string = arith.decode(code, freq_dict)
    print(f"Decoded message is '{decoded_string}'")
Exemplo n.º 4
0
 def fetch_info(self, username):
     profiles = open("profiles.txt", "r")
     blob = profiles.read()
     blob_decoded = codify.decode(blob, 58)
     xmlfile = etree.fromstring(blob_decoded)
     result = ""
     for element in etree.iterparse(xmlfile):
         if element.text == username:
             self.is_logged_in = YES
             return element.root
         else:
             pass
Exemplo n.º 5
0
def camunzip(filename, length_of_LSTM_context=30):
    if (filename[-1] == 'h'):
        method = 'huffman'
    elif (filename[-1] == 's'):
        method = 'shannon_fano'
    elif (filename[-1] == 'a'):
        method = 'arithmetic'
    elif (filename[-1] == "c"):
        method = "conditional-arithmetic"
    else:
        raise NameError('Unknown compression method')

    with open(filename, 'rb') as fin:
        y = fin.read()
    y = bytes2bits(y)

    pfile = filename[:-1] + 'p'
    with open(pfile, 'r') as fp:
        frequencies = load(fp)
    n = sum([frequencies[a] for a in frequencies])
    p = dict([(int(a), frequencies[a] / n) for a in frequencies])

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            xt = huffman(p)
            c = xtree2code(xt)
        else:
            c = shannon_fano(p)
            xt = code2xtree(c)

        x = vl_decode(y, xt)

    elif method == 'arithmetic':
        x = arithmetic.decode(y, p, n)
    elif method == "conditional-arithmetic":
        x = con_ari.decode(y, p, n, length_of_LSTM_context)
    else:
        raise NameError('This will never happen (famous last words)')

    # '.cuz' for Cam UnZipped (don't want to overwrite the original file...)
    outfile = filename[:-4] + '.cuz'

    with open(outfile, 'wb') as fout:
        fout.write(bytes(x))

    return x
Exemplo n.º 6
0
def camunzip(filename,
             cond_prob_dict_filename=' ',
             cum_prob_dict_filename=' ',
             orig_message_filename=' '):
    if not filename[-1] == 'z':
        if filename[-1] == 'h':
            method = 'huffman'
        elif filename[-1] == 's':
            method = 'shannon_fano'
        elif filename[-1] == 'a':
            method = 'arithmetic'
        else:
            raise NameError('Unknown compression method')

        with open(filename, 'rb') as fin:
            y = fin.read()
        y = bytes2bits(y)

        pfile = filename[:-1] + 'p'
        with open(pfile, 'r') as fp:
            frequencies = load(fp)
        n = sum([frequencies[a] for a in frequencies])
        p = dict([(int(a), frequencies[a] / n) for a in frequencies])

        if method == 'huffman' or method == 'shannon_fano':
            if method == 'huffman':
                xt = huffman(p)
                c = xtree2code(xt)
            else:
                c = shannon_fano(p)
                xt = code2xtree(c)

            x = vl_decode(y, xt)

        elif method == 'arithmetic':
            x = arithmetic.decode(y, p, n)

        else:
            raise NameError('This will never happen (famous last words)')

        # '.cuz' for Cam UnZipped (don't want to overwrite the original file...)
        outfile = filename[:-4] + '.cuz'

    else:

        with open('encoded_messages/' + filename + '_zipped.cz',
                  'r') as zipped_file:
            with open('cond_prob_models/' + cond_prob_dict_filename,
                      'r') as cond_prob_file:
                context_dict = json.load(cond_prob_file)
            with open('cond_prob_models/' + cum_prob_dict_filename,
                      'r') as cum_prob_file:
                cumulative_dict = json.load(cum_prob_file)
            with open('text_files/' + orig_message_filename,
                      'r',
                      encoding='utf-8-sig') as file:
                original_message = file.read()
            x = contextual_arithmetic.decode(bytes2bits(zipped_file.read()),
                                             context_dict, cumulative_dict,
                                             len(original_message))

    with open(outfile, 'wb') as fout:
        fout.write(bytes(x))
Exemplo n.º 7
0
def camunzip(filename, b=0.1, num=0, scale=(100000, 1), pr=0, pc=0):
    if (filename[-1] == 'h'):
        method = 'huffman'
    elif (filename[-1] == 's'):
        method = 'shannon_fano'
    elif (filename[-1] == 'a'):
        method = 'arithmetic'
    elif (filename[-1] == 'c'):
        method = 'carithmeticac'
    elif (filename[-1] == 'i'):
        method = 'iadhuffman'
    elif (filename[-1] == 'f'):
        method = 'fcondarithmetic'
    elif (filename[-1] == 'g'):
        method = 'gadconarithmetic'
    elif (filename[-1] == 'j'):
        method = 'jadconarithmetic'
    else:
        raise NameError('Unknown compression method')

    with open(filename, 'rb') as fin:  #*
        y = fin.read()
    y = bytes2bits(y)

    pfile = filename[:-1] + 'p'
    with open(pfile, 'r') as fp:
        frequencies = load(fp)
    n = sum([frequencies[a] for a in frequencies])
    p = dict([(int(a), frequencies[a] / n) for a in frequencies])  #*

    if method == 'huffman' or method == 'shannon_fano':
        if (method == 'huffman'):
            xt = huffman(p)
            c = xtree2code(xt)
        else:
            c = shannon_fano(p)
            xt = code2xtree(c)

        x = vl_decode(y, xt)

    elif method == 'arithmetic':
        x = arithmetic.decode(y, p, n)
    elif method == 'carithmeticac':
        x = arithmeticac.decode(y, b, n, num, scale)
    elif method == 'iadhuffman':
        x = adhuffmandec(y, pr)
    elif method == 'fcondarithmetic':
        x = condarithmetic.decode(y, pc, n)
    elif method == 'gadconarithmetic':
        x = adconarithmetic.decode(y, b, n)
    elif method == 'jadconarithmetic':
        x = adconarithmetic2.decode(y, b, n)

    else:
        raise NameError('This will never happen (famous last words)')

    # '.cuz' for Cam UnZipped (don't want to overwrite the original file...)
    outfile = filename[:-4] + '.cuz'

    with open(outfile, 'wb') as fout:
        fout.write(bytes(x))  #*
Exemplo n.º 8
0
import arithmetic as arith
from vl_codes import bytes2bits, bits2bytes
from os import stat
from itertools import groupby

filename = 'hamlet.txt'
Nin = stat(filename).st_size
f = open(filename, 'r')
hamlet = f.read()
frequencies = dict([(key, len(list(group))) for key, group in groupby(sorted(hamlet))])
p = dict([(a,frequencies[a]/Nin) for a in frequencies])
f.close()

hamlet = hamlet * 10
Nin = Nin * 10

arith_encoded = arith.encode(hamlet, p, probability_on_the_go=False)
arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=False)
hamlet_zipped = bits2bytes(arith_encoded)
Nout = len(hamlet_zipped)
print(Nout/Nin)
print(8 * Nout/Nin)

arith_encoded = arith.encode(hamlet, p, probability_on_the_go=True)
arith_decoded = arith.decode(arith_encoded, p, Nin, probability_on_the_go=True)
hamlet_zipped = bits2bytes(arith_encoded)
Nout = len(hamlet_zipped)
print(Nout/Nin)
print(8 * Nout/Nin)
Exemplo n.º 9
0
from vl_codes import bytes2bits, bits2bytes
import arithmetic as arith

f = open('hamlet.txt', 'r')
hamlet = f.read()
f.close()

from itertools import groupby
frequencies = dict([(key, len(list(group)))
                    for key, group in groupby(sorted(hamlet))])
Nin = sum([frequencies[a] for a in frequencies])
p = dict([(a, frequencies[a] / Nin) for a in frequencies])
print(f'File length: {Nin}')

arith_encoded = arith.encode(hamlet, p)
arith_decoded = arith.decode(arith_encoded, p, Nin)

#==============================================================================
# c = huffman(p)
#==============================================================================

#print(xtree2newick(code2xtree(c)))

#==============================================================================
# hamlet_sf = vl_encode(hamlet,c);
# print(f'Length of binary sequence: {len(hamlet_sf)}')
#==============================================================================

#==============================================================================
# x = bits2bytes([0,1])
# print([format(a, '08b') for a in x])