示例#1
0
def decompress(txt):
    """Decompress text with password"""
    buff = BytesIO(memoryview(txt))

    if buff.read(len(BSC_HEADER_FLAG)) != BSC_HEADER_FLAG:
        raise InvalidFileFormatException()

    huff = dict()
    blen = 0

    while True:
        chk = buff.read(len(BSC_HEADER_FLAG))
        if chk == BSC_HEADER_FLAG:
            break
        buff.seek(-len(BSC_HEADER_FLAG), 1)

        fqc, fq = unpack('B', buff.read(1))[0], 0
        if fqc is 0x01:
            fq = unpack('H', buff.read(2))[0]
        elif fqc is 0x02:
            fq = unpack('I', buff.read(4))[0]
        else:
            # Subtract 2 from fqc because first two values are reserved
            fq = fqc - 2

        sym = unpack('B', buff.read(1))[0]
        blen += fq
        huff[sym] = fq

    buff.close()

    out = bitarray()
    # Cut off unnecessary parts
    out.frombytes(txt[txt.index(BSC_HEADER_FLAG, len(BSC_HEADER_FLAG))+len(BSC_HEADER_FLAG):])
    return reverse(encode(huff), out, blen)
示例#2
0
def compress(txt):
    """Compress text with password"""
    con = bytearray()
    feq = Counter(txt)
    out = apply(encode(feq), txt)

    con += BSC_HEADER_FLAG

    for sym in feq:
        fq = feq[sym]
        if fq < 2**8 - 2:
            # Use next Byte to pack small int, first 0x01 and 0x02 are reserved values.
            con += pack('B', fq + 2)
        elif fq < 4**8:
            # Use H
            con += pack('B', 1)
            con += pack('H', fq)
        elif fq < 8**8:
            # Use I
            con += pack('B', 2)
            con += pack('I', fq)
        else:
            raise FrequencyOverflowException("Symbol " + sym + " occurred " + fq + " times, we cannot handle that!")
        con += pack('B', sym)

    con += BSC_HEADER_FLAG
    con += bitarray(out).tobytes()
    return con
示例#3
0
def encode(text):

	Lziv = open('Lziv_'+file,'w')
	#text = lz.prep_text(file)

	t1 = time.clock()
	lztext = lz.encode(text,windowlen)
	Lziv.write(lztext)
	Lziv.close()
	t2 = time.clock()

	lztext = lz.prep_text('Lziv_'+file)
	t3 = time.clock()
	tree = huff.make_tree(lztext)
	t4 = time.clock()

	h = open('huff_'+file,'w')
	t5 = time.clock()
	htext = huff.encode(tree,lztext)
	#h.write(tree)
	h.write(htext)
	t6 = time.clock()
	h.close

	return [t2-t1,t4-t3,t6-t5],tree
示例#4
0
 def test_decoding_real_world(self):
     frequency_map = get_real_world_input()
     tree = build_tree(frequency_map)
     text = get_real_world_text()
     encoded = encode(text, tree)
     decoded = decode(encoded, tree)
     self.assertEqual(text, decoded)
示例#5
0
文件: wrapper.py 项目: J-j-4/DEFLATE
def encode(text):

    Lziv = open('Lziv_' + file, 'w')
    #text = lz.prep_text(file)

    t1 = time.clock()
    lztext = lz.encode(text, windowlen)
    Lziv.write(lztext)
    Lziv.close()
    t2 = time.clock()

    lztext = lz.prep_text('Lziv_' + file)
    t3 = time.clock()
    tree = huff.make_tree(lztext)
    t4 = time.clock()

    h = open('huff_' + file, 'w')
    t5 = time.clock()
    htext = huff.encode(tree, lztext)
    #h.write(tree)
    h.write(htext)
    t6 = time.clock()
    h.close

    return [t2 - t1, t4 - t3, t6 - t5], tree
示例#6
0
def file2DNA(fileName, fileId, signalStatus):
    totalLen = 0
    myFile = io.open(fileName, "rb")
    outFile = io.open(fileName + '.dnac', "w")
    chunkManager = ChunkManager1.ChunkManager(outFile, fileId)
    prevBase = 'A'
    countOfBytes = 0
    while True:
        byte = myFile.read(1)
        countOfBytes = countOfBytes + 1
        global percentageCompleted
        global fileLength
        percentageCompleted = (countOfBytes * 1.00 / fileLength) * 100
        if countOfBytes % 1000 == 0:
            #print percentageCompleted,fileLength
            signalStatus.emit(str(int(percentageCompleted)))
        if (not byte):
            break
        tritString = str(huffman.encode(byte))
        totalLen = totalLen + len(tritString)
        dnaString = ExtraModules.encodeSTR(tritString, prevBase)
        prevBase = dnaString[-1]

        chunkManager.addString(dnaString)
    S2 = ExtraModules.intToBase3(totalLen, 20)
    currLen = 20 + totalLen
    lenOfS3 = 25 - (currLen % 25)
    S3 = '0' * lenOfS3
    dnaString1 = ExtraModules.encodeSTR(S3 + S2, prevBase)
    chunkManager.addString(dnaString1)
    chunkManager.close()
    signalStatus.emit('100')
示例#7
0
 def test_encoding(self):
     frequency_map = get_wikipedia_input()
     tree = build_tree(frequency_map)
     encoded = encode('adeaddadcededabadbabeabeadedabacabed', tree)
     self.assertEqual(
         encoded.to01(),
         '01000100100000010001110001000010011010000110100111001001110010001000010011010110100111000'
     )
示例#8
0
def test_encode_returns_correct_tree():
    string = "abbb"
    smallest = Node(0.25, 'a', None, None)
    sec_smallest = Node(0.75, 'b', None, None)
    correct_tree = Node(1, None, smallest, sec_smallest)
    tree = encode(TEST_FILE, string)
    assert isinstance(tree, Node)
    assert correct_tree == tree
示例#9
0
 def test_decoding(self):
     frequency_map = get_wikipedia_input()
     tree = build_tree(frequency_map)
     samples = ['a', 'abc', 'adeaddadcededabadbabeabeadedabacabed']
     for text in samples:
         encoded = encode(text, tree)
         decoded = decode(encoded, tree)
         self.assertEqual(text, decoded)
示例#10
0
    def sendFrame(self):
        block = QByteArray()
        out = QDataStream(block, QIODevice.WriteOnly)
        out.setVersion(QDataStream.Qt_5_0)

        frame = huffman.encode(self.cam.capture())

        out.writeInt32(len(frame))
        out.writeRawData(frame)

        self.client.write(block)
示例#11
0
def test_decode_reverses_encode_long():
    string = """In computer science and information theory, Huffman coding is
    an entropy encoding algorithm used for lossless data compression. The term
    refers to the use of a variable-length code table for encoding a source
    symbol (such as a character in a file) where the variable-length code table
    has been derived in a particular way based on the estimated probability of
    occurrence for each possible value of the source symbol. It was developed
    by David A. Huffman while he was a Ph.D. student at MIT, and published in
    the 1952 paper "A Method for the Construction of Minimum-Redundancy Codes.
    """
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
示例#12
0
def encode(filename_in, filename_out):
    with open(filename_in, 'rb') as fi:
        freq = huffman.freq_str(read_each(fi))
        tree = huffman.build_tree(freq)
        map_code = huffman.build_map_code(tree)
        fi.seek(0)
        out = huffman.encode(read_each(fi), map_code)
        u = out.buffer_info()[3]  # unused bits of last byte
        header = pickle.dumps(freq, pickle.HIGHEST_PROTOCOL)
        n_header = len(header)

        with open(filename_out, 'wb') as fo:
            fo.write(
                n_header.to_bytes(8, 'big') + u.to_bytes(1, 'big') + header +
                out.tobytes())
示例#13
0
 def test_encode_text(self):
     print('test_encode_text')
     huffman.DEBUG = True
     print('huffman.DEBUG=' + str(huffman.DEBUG))
     path = self.filepath
     with open(path, 'r+') as f:
         eof = False
         while not eof:
             line = f.readline()
             if len(line) == 0:
                 eof = True
                 break
             coded = huffman.encode(line)
             decoded = huffman.decode(coded[0], coded[1])
             self.assertEqual(line, decoded)
示例#14
0
def compress(image, img_mode, output):
    shape = image.shape

    image = convert(image, img_mode, 'YCbCr')

    # Chroma subsampling

    Y = image[:, :, 0]
    Cb = utils.submatrices(image[:, :, 1], CS, CS).mean((2, 3))
    Cr = utils.submatrices(image[:, :, 2], CS, CS).mean((2, 3))

    # Channel compression

    Y_data, Y_length = compress_channel(Y)
    Cb_data, Cb_length = compress_channel(Cb)
    Cr_data, Cr_length = compress_channel(Cr)
    click.echo(Y_length)
    click.echo(Cb_length)
    click.echo(Cr_length)

    # click.echo((Yr_length, Yi_length, Cbr_length, Cbi_length, Crr_length, Cri_length))

    file_data = bytearray()
    file_data.extend(struct.pack('iii', shape[0], shape[1], 3))

    fmt = 'iii%us%us%us' % (Y_length, Cb_length, Cr_length)
    file_data.extend(
        struct.pack(fmt, Y_length, Cb_length, Cr_length, Y_data, Cb_data,
                    Cr_data))

    click.echo(len(file_data))
    file_data = huffman.encode(file_data)
    click.echo(len(file_data))

    with open(output, 'wb') as file:
        file.write(file_data)
示例#15
0
# ======================= SOURCE ENCODING ========================
# =========================== Huffman ============================

# Use t.tic() and t.toc() to measure the executing time as shown below

t = Time()

t.tic()
# TODO Determine the number of occurrences of the source or use a fixed huffman_freq
huffman_freq = "TODO"
huffman_tree = huffman.Tree(huffman_freq)
print(F"Generating the Huffman Tree took {t.toc_str()}")

t.tic()
# TODO print-out the codebook and validate the codebook (include your findings in the report)
encoded_message = huffman.encode(huffman_tree.codebook, image.get_pixel_seq())
print(len(encoded_message))
print("Enc: {}".format(t.toc()))

t.tic()
decoded_message = huffman.decode(huffman_tree, encoded_message)
print("Dec: {}".format(t.toc()))

input_lzw = img.get_pixel_seq().copy()

# ======================= SOURCE ENCODING ========================
# ====================== Lempel-Ziv-Welch ========================

t.tic()
encoded_msg, dictonary = lzw.encode(input_lzw)
print("Enc: {}".format(t.toc()))
示例#16
0
    sequences.extend(
        ((b, g, r, a), c) for b, g, r, a, c in
         struct.iter_unpack('=BBBBI', chunks[:4*n_ints]))
    sequences_int.extend(struct.iter_unpack('=II', chunks[:4*n_ints]))
    if save_imgs:
        os.makedirs('training_images', exist_ok=True)
        image_manip.export_bmp_py(pixels, "training_images/img_gen_huffman_{:03d}.bmp".format(i).encode('ascii'))

colors = [c for c, _ in sequences_int]
# This makes all color appear at least onece, such
# that they are in the Huffman tree
colors.extend(0xff000000 | (r << 20) | (g << 12) | (b << 4) for r, g, b in itertools.product(range(0x10), range(0x10), range(0x10)))
lengths = [l for _, l in sequences_int]
# colors
print('generating color encoder/decoder')
_, (codes, tree) = huffman.encode(colors)
max_len_code_color = max(n_bits for sym, (code_str, code_int, n_bits) in codes)
print('max color code length:', max(l for s, (s_c, i_c, l) in codes))
generate_huffman_encoder(codes, 'opengl/huffman_encode_colors.cpp')
generate_huffman_decoder(tree, 'opengl/huffman_decode_colors.cpp')
generate_huffman_decoder(tree, '../src/huffman_decode_colors.sv', lang='sv')
avg_color = huffman.huffman_avg_len(colors)

# lengths
print('generating length encoder/decoder')
_, (codes, tree) = huffman.encode(lengths)
max_len_code_length = max(n_bits for sym, (code_str, code_int, n_bits) in codes)
assert max_len_code_length + max_len_code_color < 32
print('max length code length:', max(l for s, (s_c, i_c, l) in codes))
generate_huffman_encoder(codes, 'opengl/huffman_encode_lengths.cpp')
generate_huffman_decoder(tree, 'opengl/huffman_decode_lengths.cpp')
示例#17
0
文件: main.py 项目: Lcbx/pyjpeg
]

# we make a list out of the flattened array via zigzag traversal
fileContent = []
for idx, channel in enumerate(copy):
    rows, cols = channel.shape
    for row in range(0, rows, 8):
        for col in range(0, cols, 8):
            bloc = channel[row:row + 8, col:col + 8]
            flattenedBloc = bloc.flatten()
            fileContent.extend(flattenedBloc[Zigzag].tolist())

# huffman encoding
import huffman
filechars = "".join(map(lambda x: str(x) + ",", fileContent))
huffRes = huffman.encode(filechars)

# run length encoding
import RLE
rleRes = RLE.encode(huffRes)

# sizes
hsize = len(huffRes.tobytes())
rsize = int(len(rleRes) * 1.5 / 8)
print "original size = ", reduce(
    lambda x, y: x * y, image.shape
), " bytes, huffman size = ", hsize, "bytes, compressed size = ", rsize, " bytes"

# decoding
decoded = RLE.decode(rleRes)
decoded = eval("[" + huffman.decode(decoded)[:len(filechars)] + "]")
示例#18
0
 def test_encode(self):
     encode("test_file.txt", "test_file.huff")
     assert True
示例#19
0
 def test_encode(self):
     encode("story.txt", "story.huff")
     assert True
示例#20
0
print("-------The tortoise and the hare.----------")

# construct the frequency dictionary
file = open("theTortoiseAndTheHare.txt", "r")
textData = file.read()
textFreqs = getFreqDict(textData)

# construct the Huffman tree and extract binary codes
textTree = getHuffmanTree(textFreqs)
textCodes = getCode(textTree)
print("\nHuffman code for text data:")
for (key, value) in textCodes.items():
    print(key, '\t', value)

# let's encode the tale
textBinary = encode(textData, textCodes)
print("\nEncoded text data:")
print("%s -------> %s" % (textData, textBinary))
print("Average length (bits per character): ", len(textBinary) / len(textData))
# TODO: to compare average length to entropy, must implement getEntropy()
print("PART A - Entropy:", getEntropy(textFreqs))
print("PART B - The ceiling of entropy and average code length are equal")
# TODO: to decode messages, must implement decode()
messageEncoded = '0110000101010010111100011001111110100101100101001011110'
messageDecoded = decode(messageEncoded, textTree)
print('\nPART C, D - Decoded message:', "".join(messageDecoded))

print("\n\n-----Web session lengths.------")

# construct the frequency dictionary
sessionLengths = np.load("sessionLengths.npy")
示例#21
0
#COLOR PALETTE CAPTURES ALL POSSIBLE PIXEL VALUES IN DICT
color_palette = {}
total_pix = len(pixel_values) #num of total pixels of image
for i in range(total_pix):
	pixvalstr= str(pixel_values[i]) 
	palette = color_palette.keys()
	# if new value, add to dictionary
	if pixvalstr not in palette:
		color_palette[pixvalstr] = 1
	else:
		color_palette[pixvalstr] += 1	
#add huffman marker to color_palette
color_palette['end'] = 1

#Huffman tree creation
huff = encode(color_palette) #array of tuples
huff_dict = {}  #convert array to huff_dictionary
for p in huff:
    huff_dict[p[0]] = p[1]

#Generate binary string of image
img_bin_str = ''
for i in pixel_values:
    j = str(i) #convert RGB value into string for key dict usage
    img_bin_str = img_bin_str + huff_dict[j]

#Generate ENCODETABLEHEADER
#255,255,255,freq1,255,255,255,freq2,BASE128
encodertableheader = str(len(huff_dict) - 1) #start with number of keys, minus the 'end' marker
keys = huff_dict.keys()
for i in range(len(keys)): #iterating through keys, ignoring end
示例#22
0
import huffman


huffman.encode("input.txt", "test.huff")
huffman.decode("test.huff", "output.txt")
示例#23
0
def test_encode():
    string = "abbb"
    encode(TEST_FILE, string)
    with open(TEST_FILE, 'rb') as bit_file:
        bits = BitArray(bit_file)
        assert bits.bin == '01110000'
示例#24
0
def EncodeSingleChannel(data,codingParams):
    """Encodes a single-channel block of signed-fraction data based on the parameters in a PACFile object"""
    # prepare various constants
    N = codingParams.a + codingParams.b
    halfN = N/2
    nScaleBits = codingParams.nScaleBits
    maxMantBits = (1<<codingParams.nMantSizeBits)  # 1 isn't an allowed bit allocation so n size bits counts up to 2^n
    if maxMantBits>16: maxMantBits = 16  # to make sure we don't ever overflow mantissa holders

    if codingParams.state == 0:
        sfBands = codingParams.sfBandsLong
    elif codingParams.state == 1 or codingParams.state == 3:
        sfBands = codingParams.sfBandsTrans
    else:
        sfBands = codingParams.sfBandsShort

    # vectorizing the Mantissa function call
#    vMantissa = np.vectorize(Mantissa)

    # compute target mantissa bit budget for this block of halfN MDCT mantissas
    bitBudget = codingParams.targetBitsPerSample * halfN  # this is overall target bit rate
    bitBudget -= 34         # Block type + nBytes bits
    bitBudget -=  nScaleBits*(sfBands.nBands +1)  # less scale factor bits (including overall scale factor)
    bitBudget -= codingParams.nHuffTableBits # less huff table type bits
    bitBudget -= codingParams.nMantSizeBits*sfBands.nBands  # less mantissa bit allocation bits
    if codingParams.state == 2:
        bitsFromRes = np.min([codingParams.reservoir, 1.125*bitBudget])
        codingParams.reservoir -= bitsFromRes
    else:
        bitsFromRes = 0

    # window data for side chain FFT and also window and compute MDCT
    timeSamples = data

    if codingParams.state == 0 or codingParams.state == 2:
        mdctTimeSamples = SineWindow(data)
        mdctLines = MDCT(mdctTimeSamples, halfN, halfN)[:halfN]
    else:
        mdctTimeSamples = TransitionSineWindow(data,codingParams.a,codingParams.b)
        mdctLines = MDCT(mdctTimeSamples, codingParams.a, codingParams.b)[:halfN]

    # compute overall scale factor for this block and boost mdctLines using it
    maxLine = np.max( np.abs(mdctLines) )
    overallScale = ScaleFactor(maxLine,nScaleBits)  #leading zeroes don't depend on nMantBits
    mdctLines *= (1<<overallScale)
    # compute SMRs in side chain FFT
    SMRs = CalcSMRs(timeSamples, mdctLines, overallScale, codingParams.sampleRate, sfBands)
    # perform bit allocation using SMR results
    bitAlloc = BitAlloc(bitBudget+bitsFromRes, maxMantBits, sfBands.nBands, sfBands.nLines, SMRs)

    # given the bit allocations, quantize the mdct lines in each band
    scaleFactor = np.empty(sfBands.nBands,dtype=np.int32)
    nMant=halfN
    for iBand in range(sfBands.nBands):
        if not bitAlloc[iBand]: nMant-= sfBands.nLines[iBand]  # account for mantissas not being transmitted
    mantissa=np.empty(nMant,dtype=np.int32)
    nHuffMaps = len(codingParams.encodingMaps)
    mHuff=[]
    huffBits=[]
    for h in range(nHuffMaps):
        mHuff.append([])
        huffBits.append(0)
    iMant=0
    for iBand in range(sfBands.nBands):
        nLines= sfBands.nLines[iBand]
        if nLines and bitAlloc[iBand]:      # Only encode mantissas if lines exist in current band
            lowLine = sfBands.lowerLine[iBand]
            highLine = sfBands.upperLine[iBand] + 1  # extra value is because slices don't include last value
            scaleLine = np.max(np.abs( mdctLines[lowLine:highLine] ) )
            scaleFactor[iBand] = ScaleFactor(scaleLine, nScaleBits, bitAlloc[iBand])
            # store FP coded mantissa
            m = vMantissa(mdctLines[lowLine:highLine],scaleFactor[iBand], nScaleBits, bitAlloc[iBand])
            mantissa[iMant:iMant+nLines] = m

            for h in range(nHuffMaps):
                # store Huffman coded mantissa
                huffCode = huff.encode(m, codingParams.encodingMaps[h])
                mHuff[h].append(huffCode)
                huffBits[h] += codingParams.nHuffLengthBits + huffCode[0]
            # increment starting index
            iMant += nLines
        else:
            for h in range(nHuffMaps):
                mHuff[h].append([])

    # If building freq table, at mantissas to freq table
    if codingParams.buildTable:
        codingParams.freqTable = huff.buildFrequencyTable(codingParams.freqTable, mantissa)

    # Initialize optimal bits as non-huffman
    optimalBits = np.sum(np.multiply(bitAlloc,sfBands.nLines))
    huffTable = 0

    # check for optimal bit allocation
    for h in range(nHuffMaps):
        if huffBits[h] < optimalBits:
            huffTable = h + 1
            optimalBits = huffBits[h]
            mantissa = mHuff[h]

    # calculate rollover bits for bit reservoir
    codingParams.reservoir += np.max([bitBudget + bitsFromRes - optimalBits, 0])

    # else return normal fp mantissas
    return (scaleFactor, bitAlloc, mantissa, overallScale, huffTable, optimalBits)
示例#25
0
    for z in range(4):
        if current >= 9:
            if current != 0: compressed += str(current)
            current = 0
        if y_test[i][z] != y[i][z]:
            flag = 1
    if flag == 1:
        if current != 0: compressed += str(current) + ref(y[i])
        current = 0
        wrong += 1
    else:
        current += 1
    total += 1
print("Accuracy: ", (1 - wrong / total) * 100)
print("Wrong: ", wrong)
print("Total", total)
# print(compressed)

val, key = hf.encode(compressed)
# print(val)

print("Length of DNA: ", len(DNA_backup))
print("Lenght of LSTM Compress: ", len(compressed))
print("After Huffman: ", len(str(val)))
print("Just Huffman: ", len(str(hf.encode(DNA_backup)[0])))

print("Final Compression: ",
      ((len(DNA_backup) - len(str(val))) * 100 / len(DNA_backup)))
print("Just Huffman Compression: ",
      (len(DNA_backup) - len(str(hf.encode(DNA_backup)[0]))) * 100 /
      len(DNA_backup))
示例#26
0
    def evaluate(self, data1, s, d):
        global size
        print("size: ", size)
        #size=20
        #global vals
        global vals_d
        if s == 'bwt':
            global count_bwt
            count_bwt += 1
            sum = 0
            vals_bwt = []
            for i in range(1, size + 1):
                rnd_txt = RandomText(data1)
                data = rnd_txt.makeRandomText(i)
                pre_text = self.textBrowser.toPlainText()
                self.textBrowser.setText(pre_text + "  " + str(i) + ": " +
                                         data + " \n \n")
                initial_len = len(data)
                t1_start = time.perf_counter_ns()
                bwt = BWT(data)
                transform_bwt = bwt.transform()
                transform_rle = bwt.rle_encode(transform_bwt)
                decode_rle = bwt.rle_decode(transform_rle)
                #decode_bwt = bwt.ibwt(decode_rle)
                t1_stop = time.perf_counter_ns()
                pre_text = self.textBrowser_3.toPlainText()
                self.textBrowser_3.setText(pre_text + str(t1_stop - t1_start) +
                                           "\n")
                sum = sum + (t1_stop - t1_start)
                vals_bwt.append(t1_stop - t1_start)
                pre_text2 = self.textBrowser_2.toPlainText()
                self.textBrowser_2.setText(pre_text2 + str(i) + "BWT: " +
                                           transform_bwt + " RLE: " +
                                           transform_rle + " RLE-DECODE: " +
                                           decode_rle + " \n")
                #print("len of transform_rle:", len(transform_rle))
                #print("koef = "+str(initial_len/len(transform_rle)))
            key = "".join(s + str(count_bwt))
            vals_d[s] = vals_bwt
            pre_text = self.textBrowser_3.toPlainText()
            self.textBrowser_3.setText(pre_text + 'Sum for bwt: ' + str(sum) +
                                       "\n")
            pre_text2 = self.textBrowser_2.toPlainText()
            self.textBrowser_2.setText(pre_text2 +
                                       "; length of the compressed text: " +
                                       str(len(transform_rle)) +
                                       ";  initial length: " + str(len(data)) +
                                       "\n")

        elif s == 'huffman':
            global count_h
            count_h += 1
            sum = 0
            vals_h = []
            for i in range(1, size + 1):
                rnd_txt = RandomText(data1)
                data = rnd_txt.makeRandomText(i)
                initial_len = len(data)
                pre_text = self.textBrowser.toPlainText()
                self.textBrowser.setText(pre_text + " " + str(i) + ": " +
                                         data + " \n \n")
                t1_start = time.perf_counter_ns()
                huffman = Huffman(data)
                frequencyTable = huffman.computeFrequencies(data)
                codeTable = huffman.huffman_code(frequencyTable)
                huffmanCode = huffman.encode(codeTable)
                encoded = "".join(huffmanCode[ch] for ch in data)
                decoded_str = huffman.huffman_decode(encoded, huffmanCode)
                t1_stop = time.perf_counter_ns()
                vals_h.append(t1_stop - t1_start)
                pre_text = self.textBrowser_3.toPlainText()
                self.textBrowser_3.setText(pre_text + str(t1_stop - t1_start) +
                                           "\n")
                pre_text2 = self.textBrowser_2.toPlainText()
                self.textBrowser_2.setText(pre_text2 + "encoded: " + encoded +
                                           "Decoded-string: " + decoded_str +
                                           "\n")
                sum = sum + (t1_stop - t1_start)
                #print("len of transform_rle:", len(huffmanCode))
                #print("koef = " + str(initial_len / len(huffmanCode)))
            key = "".join(s + str(count_h))
            vals_d[key] = vals_h
            pre_text = self.textBrowser_3.toPlainText()
            self.textBrowser_3.setText(pre_text + 'Sum for Huffman: ' +
                                       str(sum) + "\n")
            self.textBrowser_2.setText(pre_text2 + "length of Huffmancode: " +
                                       str(len(huffmanCode)) +
                                       ";  initial length: " + str(len(data)) +
                                       "\n")

        elif s == 'repair':
            sum = 0
            vals_r = []
            global count_r
            count_r += 1
            for i in range(1, size + 1):
                rnd_txt = RandomText(data1)
                data = rnd_txt.makeRandomText(i)
                initial_len = len(data)
                pre_text = self.textBrowser.toPlainText()
                self.textBrowser.setText(pre_text + " " + str(i) + ": " +
                                         data + " \n \n")
                t1_start = time.perf_counter_ns()
                repair = RePair(data)
                ch = 'A'
                rules = {}
                rules, s1 = repair.repair(data, ch, rules)
                #decomp_string=repair.decomp(rules,s)
                decomp_string = ""
                t1_stop = time.perf_counter_ns()
                vals_r.append(t1_stop - t1_start)
                pre_text2 = self.textBrowser_2.toPlainText()
                self.textBrowser_2.setText(pre_text2 + "Rules: " + str(rules) +
                                           "; s: " + s1 + "Decomp string: " +
                                           decomp_string)
                pre_text = self.textBrowser_3.toPlainText()
                self.textBrowser_3.setText(pre_text + str(t1_stop - t1_start) +
                                           "\n")
                sum = sum + (t1_stop - t1_start)
                #print("len of transform_rle:", len(s1))
                #print("koef = " + str(initial_len / len(s1)))
            key = "".join(s + str(count_r))
            vals_d[key] = vals_r
            pre_text = self.textBrowser_3.toPlainText()
            self.textBrowser_3.setText(pre_text + 'Sum for RePair: ' +
                                       str(sum) + "\n")
            pre_text2 = self.textBrowser_2.toPlainText()
            self.textBrowser_2.setText(pre_text2 + ' initial length: ' +
                                       str(len(data)) + 'len(RePair): ' +
                                       str(len(s)))
        self.write_to_excel()
示例#27
0
def test_decode_reverses_encode_simple():
    string = 'abbb'
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
示例#28
0
from pprint import pprint

import huffman
from view import viz_tree

data = b"huffman"

tree = huffman.build_tree(data)
map_code = huffman.build_map_code(tree)

# encode
bin_data = huffman.encode(data, map_code)

print("Map code")
for k, v in map_code.items():
    print("{}: {}".format(chr(k), v.to01()))
print("Encoded data")
print(bin_data.to01())
viz_tree(tree)

# decode
print("After decode")
print(huffman.decode(bin_data.tobytes(), map_code, bin_data.buffer_info()[3]))

# calculate performance
p = len(bin_data) / (len(data) * 8)
print(f"Reduce {p * 100}%")
示例#29
0
def test_decode_reverses_encode_special():
    string = '! %'
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
示例#30
0
string = h.remove_spl_ch(string)
message = h.remove_spl_ch(message)

# to create the huffman map
prob_of_characters, enc_dict = h.encode_dict(input=string)

print("\nencoded dictionary  : ", end="\n\n")

for key, value in enc_dict:
    print(key, " : ", value)

print("\n\n")
print("probability of characters : ", end="\n\n")
for key, value in prob_of_characters:
    print(key, " : ", value)
print("\n\n")

# to encode the message(custom) using the huffman map
enc_msg = h.encode(msg=message, dictionary=enc_dict)
print("encoded message : ", enc_msg, end="\n\n")

# to encode the original string using huffman map
enc_string = h.encode(msg=message, dictionary=enc_dict)

# to decode the encoded message using huffman map
dec_msg = h.decode(enc_msg=enc_msg, dictionary=enc_dict)
print("decoded message : ", dec_msg, end="\n\n")

# to get information about the space saved
h.size_saved(dictionary=enc_dict, msg=string, enc_msg=enc_string)
示例#31
0
def test_decode_reverses_encode_unicode():
    string = 'Kærlighed og Øl!'
    tree = encode(TEST_FILE, string)
    decoded = decode(TEST_FILE, tree)
    assert decoded.startswith(string)
示例#32
0
def main():
    path = "../data/raw/iliad.txt"
    ascii_content = np.fromfile(path, np.uint8)
    huffman.encode(ascii_content)
示例#33
0
                ccount += 1
            text += line
            words += [w.strip(' \n.,”“') for w in line.split()]

    print('= Stats =')
    print('Number of characters', ccount)
    print('Number of words', len(words))

    min_entropy = compute_entropy(frequency_map.values())
    print('Minimum entropy', min_entropy)

    huffman_entropy = compute_huffman_entropy(frequency_map)
    print('Huffman entropy', huffman_entropy)

    tree = build_tree(frequency_map)
    encoded_text = encode(text, tree)
    print('Length of raw text: {} bytes'.format(len(text)))
    print('Length of encoded text: {} bytes'.format(len(encoded_text)/8))
    print('Compression rate: {}'.format(len(text)*8/len(encoded_text)))

    print('= Word-based =')
    text_length = 0

    frequency_map = {}
    for w in words:
        text_length += len(w)
        if w not in frequency_map:
            frequency_map[w] = 0
        frequency_map[w] += 1
    avg_word_size = text_length / len(words)
示例#34
0
if task == TASK_ARITH:
    blockSize = int(arguments[3])

if task not in TASKS:
    sys.stderr.write(
        f"Invalid usage! The given task: {task} does not exist!\n")
    sys.stderr.write("For help, use: encode.py -h")
    sys.exit(errno.EINVAL)

if not os.path.exists(fileName):
    sys.stderr.write(f"Could not find input file: {fileName}")
    sys.exit(errno.ENOENT)

if task == TASK_DISPLAY:
    utils.display(stats.createStatistic(fileName))
if task == TASK_SF:
    utils.display(shannon_fano.encode(fileName))
if task == TASK_SF_STAT:
    codes = shannon_fano.encode(fileName)
    utils.display(codes)
    utils.displayOptimality(stats.getOptimality(codes))
if task == TASK_HUFF:
    utils.display(huffman.encode(fileName))
if task == TASK_HUFF_STAT:
    codes = huffman.encode(fileName)
    utils.display(huffman.encode(fileName))
    utils.displayOptimality(stats.getOptimality(codes))
if task == TASK_ARITH:
    code = arithmetic.encode(fileName, blockSize)
    utils.displayArithmeticCode(code)
示例#35
0
    path = os.path.join(output_path, dirname)
    if not os.path.exists(path):
        os.mkdir(path)

# Save results
total_added_bitrate = 0
for i in range(len(resids)):
    if i > 0 and i % 10 == 0:
        print('Running frame', i)
        print('Average added bitrate so far:', total_added_bitrate / i)

    # Encode
    encoded = model.encoder(data[i:i + 1])

    # Grab size
    _, _, added_bitrate = huffman.encode(encoded, fps=int(args['fps']))
    total_added_bitrate += added_bitrate

    # Binarize
    encoded = encoded.sign()
    encoded[encoded == 0] = 1

    # Decode
    decoded = model.decoder(encoded)
    decoded = decoded.data.numpy()

    save_img(i, compressed[i] + decoded[0], 'result')
    save_img(i, compressed[i], 'compressed')
    save_img(i, compressed[i] + resids[i], 'reference')
    #save_img(i, resids[i], 'input', resid=True)
    #save_img(i, decoded[0], 'output', resid=True)
示例#36
0
文件: test.py 项目: sathish-a/Huffman
 def test_encode(self):
     encode("", "")
     assert True