def execute(arguments): _validate_arguments(arguments) _enrich_arguments(arguments) compress_value = arguments[_compress_command] read_file_value = arguments[_read_file_parameter] write_file_value = arguments[_write_file_parameter] lzw_value = arguments[_lzw_option] elias_value = arguments[_elias_option] code_value = arguments[_code_option] if compress_value: if lzw_value: lzw.compress(read_file_value, write_file_value) elif elias_value: elias.compress(read_file_value, write_file_value, code_type=code_value) else: if lzw_value: lzw.decompress(read_file_value, write_file_value) elif elias_value: elias.decompress(read_file_value, write_file_value, code_type=code_value)
def lzwEncode(stream, parameters): ''' Method to encode streams using the LZW algorithm @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' encodedStream = '' if parameters == None or parameters == {}: try: generator = lzw.compress(stream) for c in generator: encodedStream += c return (0,encodedStream) except: return (-1,'Error compressing string') else: if parameters.has_key('/Predictor'): predictor = parameters['/Predictor'].getRawValue() else: predictor = 1 # Columns = number of samples per row if parameters.has_key('/Columns'): columns = parameters['/Columns'].getRawValue() else: columns = 1 # Colors = number of components per sample if parameters.has_key('/Colors'): colors = parameters['/Colors'].getRawValue() if colors < 1: colors = 1 else: colors = 1 # BitsPerComponent: number of bits per color component if parameters.has_key('/BitsPerComponent'): bits = parameters['/BitsPerComponent'].getRawValue() if bits not in [1,2,4,8,16]: bits = 8 else: bits = 8 if parameters.has_key('/EarlyChange'): earlyChange = parameters['/EarlyChange'].getRawValue() else: earlyChange = 1 if predictor != None and predictor != 1: ret = pre_prediction(stream, predictor, columns, colors, bits) if ret[0] == -1: return ret output = ret[1] else: output = stream try: generator = lzw.compress(output) for c in generator: encodedStream += c return (0,encodedStream) except: return (-1,'Error decompressing string')
def lzwEncode(stream, parameters): ''' Method to encode streams using the LZW algorithm @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' encodedStream = '' if parameters == None or parameters == {}: try: generator = lzw.compress(stream) for c in generator: encodedStream += c return (0, encodedStream) except: return (-1, 'Error compressing string') else: if parameters.has_key('/Predictor'): predictor = parameters['/Predictor'].getRawValue() else: predictor = 1 # Columns = number of samples per row if parameters.has_key('/Columns'): columns = parameters['/Columns'].getRawValue() else: columns = 1 # Colors = number of components per sample if parameters.has_key('/Colors'): colors = parameters['/Colors'].getRawValue() if colors < 1: colors = 1 else: colors = 1 # BitsPerComponent: number of bits per color component if parameters.has_key('/BitsPerComponent'): bits = parameters['/BitsPerComponent'].getRawValue() if bits not in [1, 2, 4, 8, 16]: bits = 8 else: bits = 8 if parameters.has_key('/EarlyChange'): earlyChange = parameters['/EarlyChange'].getRawValue() else: earlyChange = 1 if predictor != None and predictor != 1: ret = pre_prediction(stream, predictor, columns, colors, bits) if ret[0] == -1: return ret output = ret[1] else: output = stream try: generator = lzw.compress(output) for c in generator: encodedStream += c return (0, encodedStream) except: return (-1, 'Error decompressing string')
def test_typical_cases(self): string = "TOBEORNOTTOBEORTOBEORNOT" result = compress(string) self.assertSequenceEqual(result, [84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263]) string = "^WED^WE^WEE^WEB^WET" result = compress(string) self.assertSequenceEqual(result, [94, 87, 69, 68, 256, 69, 260, 261, 257, 66, 260, 84]) string = "thisisthe" result = compress(string) self.assertSequenceEqual(result, [116, 104, 105, 115, 258, 256, 101])
def comprimir_archivos(origen, destino): for archivo in os.listdir(origen): comprimir = lzw.readbytes(os.path.abspath(origen + "/" + archivo)) out = lzw.compress(comprimir) archivo_destino = os.path.abspath(destino + "/" + archivo + ".compressed") lzw.writebytes(archivo_destino, out)
def verify_compressed_file(self, testfile=GIANT_FILE): with tempfile.TemporaryFile("w+b") as compressedfile: originalsize = 0 compressedsize = 0 uncompressedsize = 0 bigstream = lzw.readbytes(testfile) compressed = lzw.compress(bigstream) for bs in compressed: compressedsize = compressedsize + 1 compressedfile.write(bs) ############################ compressedfile.flush() compressedfile.seek(0) checkstream = lzw.readbytes(testfile) uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) for oldbyte, newbyte in six.moves.zip_longest( checkstream, uncompressed): uncompressedsize = uncompressedsize + 1 if oldbyte != newbyte: msg = "Corrupted byte at {0}, original {1} != {2}".format( uncompressedsize, oldbyte, newbyte) self.assertEquals(oldbyte, newbyte, msg)
def extend_with_ratio_metrics(**record): ret = {} for key in ['diff', 'neg_diff', 'editcomment']: value = record[key] total_len = len(value) upper_len = sum(c in uppercase for c in value) lower_len = sum(c in uppercase for c in value) digits_len = sum(c in digits for c in value) alnum_len = sum(c in alphanum for c in value) if value == 0: compressed_len = 0 else: compressed_len = len(list(lzw.compress(value.encode('utf8')))) ret.update({ key + '_ul_ratio': None if lower_len == 0 else upper_len / lower_len, key + '_u_ratio': None if total_len == 0 else upper_len / total_len, key + '_d_ratio': None if total_len == 0 else digits_len / total_len, key + '_non_alnum_ratio': None if total_len == 0 else (total_len - alnum_len) / total_len, key + '_compressibility': None if compressed_len == 0 else total_len / compressed_len, }) return ret
def calcular_distancia(buscados, comprimidos): for archivo in os.listdir(buscados): bytes_archivo = lzw.readbytes(os.path.abspath(buscados + "/" + archivo)) # X archivo_comprimido = lzw.compress(bytes_archivo) # C(X) for comprimido in os.listdir(comprimidos): bytes_comprimido = lzw.readbytes(os.path.abspath(comprimidos + "/" + comprimido)) # C(Y) bytes_descomprimido = lzw.decompress(bytes_comprimido) # C(Y)
def verify_compressed_file(self, testfile=GIANT_FILE): with tempfile.TemporaryFile("w+b") as compressedfile: originalsize = 0 compressedsize = 0 uncompressedsize = 0 bigstream = lzw.readbytes(testfile) compressed = lzw.compress(bigstream) for bs in compressed: compressedsize = compressedsize + 1 compressedfile.write(bs) ############################ compressedfile.flush() compressedfile.seek(0) checkstream = lzw.readbytes(testfile) uncompressed = lzw.decompress(lzw.filebytes(compressedfile)) for oldbyte, newbyte in six.moves.zip_longest(checkstream, uncompressed): uncompressedsize = uncompressedsize + 1 if oldbyte != newbyte: msg = "Corrupted byte at {0}, original {1} != {2}".format(uncompressedsize, oldbyte, newbyte) self.assertEquals(oldbyte, newbyte, msg)
def post_build(self, pkt, pay): if not conf.contribs["http"]["auto_compression"]: return pkt + pay encodings = self._get_encodings() # Compress if "deflate" in encodings: import zlib pay = zlib.compress(pay) elif "gzip" in encodings: pay = gzip_compress(pay) elif "compress" in encodings: import lzw pay = lzw.compress(pay) elif "br" in encodings: if _is_brotli_available: pay = brotli.compress(pay) else: log_loading.info( "Can't import brotli. brotli compression will " "be ignored !") elif "zstd" in encodings: if _is_zstd_available: pay = zstandard.ZstdCompressor().compress(pay) else: log_loading.info( "Can't import zstandard. zstd compression will " "be ignored !") return pkt + pay
def test_compressdecompress(self): english = self.english gibberish = self.gibberish compressed = lzw.compress(english) compressed = [ b for b in compressed ] decompressed = b"".join(lzw.decompress(compressed)) self.assertEqual(english, decompressed) compressed = lzw.compress(gibberish) compressed = [ b for b in compressed ] decompressed = b"".join(lzw.decompress(compressed)) self.assertEqual(gibberish, decompressed)
def test_typical_cases(self): string = "TOBEORNOTTOBEORTOBEORNOT" result = compress(string) self.assertSequenceEqual(result, [ 84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263 ]) string = "^WED^WE^WEE^WEB^WET" result = compress(string) self.assertSequenceEqual( result, [94, 87, 69, 68, 256, 69, 260, 261, 257, 66, 260, 84]) string = "thisisthe" result = compress(string) self.assertSequenceEqual(result, [116, 104, 105, 115, 258, 256, 101])
def compressed_cp_lines(cps): values_per_line = 12 bytes_ = [] for cp in cps: lzw.add_cp(bytes_, int(cp, 16)) compressed_bytes = lzw.compress(bytes_) print 'rewrote {} * 32 = {} bits as {} * 8 = {} bits'.format(len(cps), len(cps)*32, len(bytes_), len(bytes_)*8) print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16) return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
def compressed_prop_lines(cp_prop_pairs): values_per_line = 12 bytes_ = uncompressed_prop_bytes(cp_prop_pairs) compressed_bytes = lzw.compress(bytes_) #print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(cp_prop_pairs), len(cp_prop_pairs)*64, len(bytes_), len(bytes_)*8) #print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16) return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
def compressed_case_mapping_lines(mappings): values_per_line = 12 bytes_ = [] for t in mappings: lzw.add_cp(bytes_, int(t[0], 16)) lzw.add_short(bytes_, t[1][0]) lzw.add_short(bytes_, t[1][1]) compressed_bytes = lzw.compress(bytes_) print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*64, len(bytes_), len(bytes_)*8) print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16) return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
def encode(self, output_path): with open(output_path, 'wb') as w: # "GIF89a" in Hex w.write(bytes([0x47, 0x49, 0x46, 0x38, 0x39, 0x61])) # width and height in unsigned 2 byte (16 bit) little-endian width_bytes = (self.img.shape[1]).to_bytes(2, byteorder='little') height_bytes = (self.img.shape[0]).to_bytes(2, byteorder='little') w.write(width_bytes) w.write(height_bytes) # GCT follows for 256 colors with resolution 3 x 8 bits/primary; # the lowest 3 bits represent the bit depth minus 1, the highest # true bit means that the GCT is present w.write(bytes([0xf0 + self.color_table_bits - 1])) # Background color #0 w.write(bytes([0x00])) # Default pixel aspect ratio w.write(bytes([0x00])) # Global color table (GCT) assert self.color_table_size == self.color_table.shape[0] for c in range(self.color_table_size): r,g,b = self.color_table[c] w.write(bytes([r, g, b])) # Graphic Control Extension (comment fields precede this in most files) w.write(bytes([0x21, 0xf9, 0x03, 0x00, 0x00, 0x00, 0x00])) # Image Descriptor w.write(bytes([0x2c])) w.write(bytes([0x00, 0x00, 0x00, 0x00])) # NW corner position of image in logical screen w.write(width_bytes) w.write(height_bytes) w.write(bytes([0x00])) # no local color table lzw_min = max(2, self.color_table_bits) max_code_size = 10 # start of image - LZW minium w.write(lzw_min.to_bytes(1, byteorder='little')) color_table_indices = ''.join([chr(x) for x in self.color_table_indices.flatten()]) compressed_indices = lzw.compress(color_table_indices, lzw_min, max_code_size) for i, byte in enumerate(compressed_indices): if i % 255 == 0: # Write length of coded stream in bytes (subblock can maximum be 255 long) w.write((min(255, len(compressed_indices)-i)).to_bytes(1, byteorder='little')) w.write(byte.to_bytes(1, byteorder='little')) w.write(bytes([0x00, 0x3b])) # end of image data, end of GIF file
def post_build(self, pkt, pay): if not conf.contribs["http"]["auto_compression"]: return pkt + pay encodings = self._get_encodings() # Compress if "deflate" in encodings: import zlib pay = zlib.compress(pay) elif "gzip" in encodings: pay = gzip_compress(pay) elif "compress" in encodings: import lzw pay = lzw.compress(pay) return pkt + pay
def compressed_case_mapping_to_lines(mappings): values_per_line = 12 bytes_ = [] for t in mappings: lzw.add_short(bytes_, t[0][0]) lzw.add_short(bytes_, t[0][1]) try: x = case_conditions[t[1]] # TODO: Totally wrong! Just here for size eval. except: x = 0 lzw.add_short(bytes_, x) compressed_bytes = lzw.compress(bytes_) print 'rewrote {} * 48 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*48, len(bytes_), len(bytes_)*8) print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16) return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
def pic_to_file(self, fh): pic = self.pic data = pack("HHB", len(pic[0]), len(pic), 11) pic_data = [] for ln in pic: pic_data += ln pic_data = rle.encode(pic_data) pic_data = lzw.compress(pic_data) pic_data = bytearray(pic_data) hdr_data = pack('HH', 0x3058, len(data) + len(pic_data)) fh.write(hdr_data) fh.write(data) fh.write(pic_data)
def lzw_algm(filename): start_time = time.time() try: outfilename = filename + '.lzw' comp_time = timeit.default_timer() file_bytes = lzw.readbytes(filename) tot_time = timeit.default_timer() - comp_time print("reading time", tot_time) compressed = lzw.compress(file_bytes) comp_time = timeit.default_timer() lzw.writebytes(outfilename, compressed) tot_time = timeit.default_timer() - comp_time print("writing time", tot_time) finally: print("")
def extract_features(sample, phon_dict, pc_words, prons, freqs, total_freqs, function): features = {} sylls = utils.read_sample(sample) words = utils.read_sample(sample, words=True) nwords = sum(len(line) for line in words) # average word length features['word-length'] = np.mean([len(w) for line in words for w in line]) # average word length in syllables features['word-length-syllables'] = np.mean( [len(s) for line in sylls for s in utils.group_syllables(line)]) # alliteration score features['alliteration'] = alliteration_score(words, phon_dict) # repetitiveness features['stressed-vowel-repetitiveness'] = assonance_entropy( words, phon_dict) features['word-onset-repetitiveness'] = onset_entropy(words, phon_dict) features['word-repetitiveness'] = vocab_entropy(words) features['syllable-repetitiveness'] = vocab_entropy(sylls) # proportion of pc words in line features['pc-words'] = \ len([w for line in words for w in line if w in pc_words]) / nwords features['pronouns'] = \ len([w for line in words for w in line if w in prons]) / nwords # lexical diversity wrt general corpus features['unigram-ppl'] = unigram_ppl(words, freqs, total_freqs) features['lzw'] = len(' '.join([w for l in words for w in l])) / \ len(lzw.compress(' '.join([w for l in words for w in l]))) # syntactic features features['nwords'] = nwords features['nchars'] = sum(len(w) for line in words for w in line) features['nlines'] = len(words) sentences = [' '.join(s) for s in utils.read_sample(sample, words=True)] for key, val in syntactic_features.get_features(sentences).items(): features[key] = val # flow features for key, val in rhyme_features.get_features(words, phon_dict).items(): features[key] = val features['assonance'] = assonance(words, phon_dict, function) features['repeated-words'] = repeated_words(words) return features
ce_bytes = [] for ce in collation_elements: x = '0' + ''.join(ce[0]) if ce[0] != ('', ): x += '0' * (4 - len(ce[0])) * 2 lzw.add_int(ce_bytes, int(x, 16)) x = '0' + ''.join(ce[1]) if ce[1] != ('', ): x += '0' * (2 - len(ce[1])) * 2 lzw.add_short(ce_bytes, int(x, 16)) x = '0' + ''.join(ce[2]) if ce[2] != ('', ): x += '0' * (2 - len(ce[2])) * 2 lzw.add_short(ce_bytes, int(x, 16)) compressed_ces = lzw.compress(ce_bytes) def values_to_lines(values, value_type, values_per_chunk): retval = '' chunk_form = '''\ #ifdef _MSC_VER {{ std::array<{0}, {1}> values {{{{ #endif {2} #ifdef _MSC_VER }}}}; it = std::copy(values.begin(), values.end(), it); }} #endif '''
def compress(): lzw.compress( r'D:\workspace.python\data-compressor\files\wap.txt', r'D:\workspace.python\data-compressor\files\wap_compressed.txt')
def lzwEncode(stream, parameters): encodedStream = '' if parameters == None or parameters == {}: try: generator = lzw.compress(stream) for c in generator: encodedStream += c return (0,encodedStream) except: return (-1,'Error compressing string') else: if parameters.has_key('/Predictor'): predictor = parameters['/Predictor'].getRawValue() else: predictor = None if parameters.has_key('/Columns'): columns = parameters['/Columns'].getRawValue() else: columns = None if parameters.has_key('/Colors'): colors = parameters['/Colors'].getRawValue() else: colors = None if parameters.has_key('/BitsPerComponent'): bits = parameters['/BitsPerComponent'].getRawValue() else: bits = None if predictor != None and predictor != 1: # PNG prediction: if predictor >= 10 and predictor <= 15: output = '' # PNG prediction can vary from row to row for row in xrange(len(stream) / columns): rowdata = [ord(x) for x in stream[(row*columns):((row+1)*columns)]] filterByte = predictor - 10 rowdata = [filterByte]+rowdata if filterByte == 0: pass elif filterByte == 1: for i in range(len(rowdata)-1,1,-1): if rowdata[i] < rowdata[i-1]: rowdata[i] = rowdata[i] + 256 - rowdata[i-1] else: rowdata[i] = rowdata[i] - rowdata[i-1] elif filterByte == 2: pass else: return (-1,'Unsupported parameters') output += (''.join([chr(x) for x in rowdata])) else: # unsupported predictor #sys.exit("Unsupported flatedecode predictor %r" % predictor) return (-1,'Unsupported parameters') else: output = stream try: generator = lzw.compress(output) for c in generator: encodedStream += c return (0,encodedStream) except: return (-1,'Error decompressing string')
def encode(self, data): assert self.getParams()['EarlyChange'] == 1 assert self.getParams()['Predictor'] == 1 return ''.join(lzw.compress(data))
def compressToOutputFile(input_file, output_file_name, option): print ("fileName: " + output_file_name) print ("option: " + str(option)) global tupleList global bitStream if option == 1: file = open(output_file_name, 'w') file.write(open(input_file,'r').read()) file.close() print "Size of input file in bytes: " print_file_size(input_file) print "Size of output file ("+ output_file_name +") in bytes: " print_file_size(output_file_name) if option == 2: # read the whole input file into a byte array fileSize = os.path.getsize(str(os.path.abspath((input_file)))) fi = open(input_file, 'rb') # byteArr = map(ord, fi.read(fileSize)) byteArr = bytearray(fi.read(fileSize)) fi.close() fileSize = len(byteArr) print "Size of input file in bytes: ", fileSize # calculate the total number of each byte value in the file freqList = [0] * 256 for b in byteArr: freqList[b] += 1 # create a list of (frequency, byteValue, encodingBitStr) tuples tupleList = [] for b in range(256): if freqList[b] > 0: tupleList.append((freqList[b], b, '')) # sort the list according to the frequencies descending tupleList = sorted(tupleList, key=lambda tup: tup[0], reverse = True) shannon_fano_encoder(0, len(tupleList) - 1) # print 'The list of (frequency, byteValue, encodingBitStr) tuples:' # print tupleList # print # create a dictionary of byteValue : encodingBitStr pairs dic = dict([(tup[1], tup[2]) for tup in tupleList]) del tupleList # unneeded anymore # print 'The dictionary of byteValue : encodingBitStr pairs:' # print dic # write a list of (byteValue,3-bit(len(encodingBitStr)-1),encodingBitStr) # tuples as the compressed file header bitStream = '' fo = open(output_file_name, 'wb') fo.write(chr(len(dic) - 1)) # first write the number of encoding tuples for (byteValue, encodingBitStr) in dic.iteritems(): # convert the byteValue into 8-bit and send to be written into file bitStr = bin(byteValue) bitStr = bitStr[2:] # remove 0b bitStr = '0' * (8 - len(bitStr)) + bitStr # add 0's if needed for 8 bits byteWriter(bitStr, fo) # convert len(encodingBitStr) to 3-bit and send to be written into file bitStr = bin(len(encodingBitStr) - 1) # 0b0 to 0b111 bitStr = bitStr[2:] # remove 0b bitStr = '0' * (3 - len(bitStr)) + bitStr # add 0's if needed for 3 bits byteWriter(bitStr, fo) # send encodingBitStr to be written into file byteWriter(encodingBitStr, fo) # write 32-bit (input file size)-1 value bitStr = bin(fileSize - 1) bitStr = bitStr[2:] # remove 0b bitStr = '0' * (32 - len(bitStr)) + bitStr # add 0's if needed for 32 bits byteWriter(bitStr, fo) # write the encoded data for b in byteArr: byteWriter(dic[b], fo) byteWriter('0' * 8, fo) # to write the last remaining bits (if any) fo.close() print "Size of compressed putput file ("+ output_file_name +") in bytes: " print_file_size(output_file_name) if option == 3: print "Size of input file in bytes: " print_file_size(input_file) mybytes = lzw.readbytes(input_file) lessbytes = lzw.compress(mybytes) lzw.writebytes(output_file_name, lessbytes) print "Size of compressed putput file ("+ output_file_name +") in bytes: " print_file_size(output_file_name) if option == 4: print "Size of input file in bytes: " print_file_size(input_file) ar = arcode.ArithmeticCode(False) ar.encode_file(input_file, output_file_name) print "Size of compressed putput file ("+ output_file_name +") in bytes: " print_file_size(output_file_name)
def test_compress_decompress_2(): s = "rererere" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def lzwEncode(stream, parameters): encodedStream = '' if parameters == None or parameters == {}: try: generator = lzw.compress(stream) for c in generator: encodedStream += c return (0, encodedStream) except: return (-1, 'Error compressing string') else: if parameters.has_key('/Predictor'): predictor = parameters['/Predictor'].getRawValue() else: predictor = None if parameters.has_key('/Columns'): columns = parameters['/Columns'].getRawValue() else: columns = None if parameters.has_key('/Colors'): colors = parameters['/Colors'].getRawValue() else: colors = None if parameters.has_key('/BitsPerComponent'): bits = parameters['/BitsPerComponent'].getRawValue() else: bits = None if predictor != None and predictor != 1: # PNG prediction: if predictor >= 10 and predictor <= 15: output = '' # PNG prediction can vary from row to row for row in xrange(len(stream) / columns): rowdata = [ ord(x) for x in stream[(row * columns):((row + 1) * columns)] ] filterByte = predictor - 10 rowdata = [filterByte] + rowdata if filterByte == 0: pass elif filterByte == 1: for i in range(len(rowdata) - 1, 1, -1): if rowdata[i] < rowdata[i - 1]: rowdata[i] = rowdata[i] + 256 - rowdata[i - 1] else: rowdata[i] = rowdata[i] - rowdata[i - 1] elif filterByte == 2: pass else: return (-1, 'Unsupported parameters') output += (''.join([chr(x) for x in rowdata])) else: # unsupported predictor #sys.exit("Unsupported flatedecode predictor %r" % predictor) return (-1, 'Unsupported parameters') else: output = stream try: generator = lzw.compress(output) for c in generator: encodedStream += c return (0, encodedStream) except: return (-1, 'Error decompressing string')
def write_tiff(filename, data): """ expects data to be a 3-dimensional numpy array (height, width, channels) of type numpy.float32 """ assert len(data.shape) == 3 height, width, nrchannels = data.shape assert nrchannels == 4 ROWSPERSTRIP = 32 FIRSTSTRIP = 8 BITSPERSAMPLE = 32 stripoffsets = [] stripbytecounts = [] directory = { "width": (width, VT_SHORT), "height": (height, VT_SHORT), "bitspersample": (nrchannels * [BITSPERSAMPLE], VT_SHORT), "compression": (COMPRESSION_LZW, VT_SHORT), "photometric": (PHOTOMETRIC_RGB, VT_SHORT), "stripoffsets": (stripoffsets, VT_LONG), "orientation": (1, VT_SHORT), "samplesperpixel": (nrchannels, VT_SHORT), "rowsperstrip": (ROWSPERSTRIP, VT_SHORT), "stripbytecounts": (stripbytecounts, VT_LONG), "planarconfig": (1, VT_SHORT), "xposition": ((0, 1), VT_RATIONAL), "yposition": ((0, 1), VT_RATIONAL), "datetime": ("some time long ago", VT_ASCII), "predictor": (PREDICTOR_FLOAT, VT_SHORT), "extrasamples": (EXTRASAMPLES_ALPHA, VT_SHORT), "sampleformat": (nrchannels * [SAMPLEFORMAT_FLOAT], VT_SHORT), "xml": ("dontcare", VT_BYTE) } nrstrips = int(math.ceil(float(height) / ROWSPERSTRIP)) stripstart = FIRSTSTRIP stripdata = [] for stripnr in range(nrstrips): nrrows = min(height - ROWSPERSTRIP * stripnr, ROWSPERSTRIP) bytespersample = BITSPERSAMPLE / 8 stripstring = data[stripnr * ROWSPERSTRIP:][:ROWSPERSTRIP].tostring() stripbytes = numpy.fromstring(stripstring, dtype=numpy.uint8) # reverse the thing we do in reading cumsummedstrip = (stripbytes.reshape( (nrrows, width * nrchannels, bytespersample))[:, :, ::-1]. transpose(0, 2, 1)) reshapedcumsummedstrip = cumsummedstrip.reshape( (nrrows, width * bytespersample, nrchannels)) # now the second step is slightly more complex than in the read-case diffstrip = numpy.diff(reshapedcumsummedstrip, axis=1) # because the diffstrip only contains diffs, not the starting value # so we have to re-attach the staring column predictedstrip = numpy.concatenate((reshapedcumsummedstrip[:, 0:1, :], diffstrip), axis=1).tostring() compressedstrip = lzw.compress(predictedstrip) stripoffsets.append(stripstart) stripbytecounts.append(len(compressedstrip)) stripstart += len(compressedstrip) stripdata.append(compressedstrip) log.debug("Found strips of sizes: %s", repr(stripbytecounts)) with open(filename, "w+b") as f: f.write(TIFF_HEADER) directorystart = stripstart write_uint32(f, directorystart) # pad.... Not sure if we need the padding at all or can just have the # first strip start at position 8.... while f.tell() < FIRSTSTRIP: f.write('\x00') for stripstring in stripdata: f.write(stripstring) assert f.tell() == directorystart write_uint16(f, len(directory)) extradatastart = (directorystart + 2 + DIRECTORY_ENTRY_LENGTH * len(directory) + len(END_OF_DIRECTORY_PADDING)) extradata = "" assert len(directory) == len(FIELD) for info in FIELD: tagname, tag = info[:2] assert tagname in directory value = directory[tagname][0] vt_type = directory[tagname][1] write_uint16(f, tag) write_uint16(f, vt_type) if isinstance(value, list): values = value else: values = [value] if vt_type == VT_BYTE: towrite = value elif vt_type == VT_ASCII: towrite = "\x00".join(values) + "\x00" elif vt_type in [VT_SHORT, VT_LONG]: packformat = "<" + len(values) * VALUETYPE[vt_type][0] towrite = struct.pack(packformat, *values) else: assert vt_type == VT_RATIONAL packformat = "<" + len(values) * VALUETYPE[vt_type][0] topack = sum(values, ()) towrite = struct.pack(packformat, *topack) length = len(towrite) / VALUETYPE[vt_type][1] write_uint32(f, length) if len(towrite) > 4: pointer = extradatastart + len(extradata) write_uint32(f, pointer) extradata += towrite else: f.write((towrite + 4 * '\x00')[:4]) f.write(END_OF_DIRECTORY_PADDING) assert f.tell() == extradatastart if extradata: f.write(extradata)
import lzw mybytes = lzw.readbytes("ElQuijote.txt") lessbytes = lzw.compress(mybytes) outFile = open("Compressed.txt", 'wb') outFile.write(b"".join(lessbytes)) outFile.close() newbytes = b"".join(lzw.decompress(lessbytes)) oldbytes = b"".join(lzw.readbytes("ElQuijote.txt")) print(oldbytes == newbytes)
def test_compress_decompress_5(): s = "abcd*dccacbdda*aaddcba*" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def approximate_KC_string(x): compressed_string = compress(x) total_bits = calculate_bits(compressed_string) return total_bits
import lzw print("Compressing text...") print() compressed = lzw.compress('darth_plagueis.txt') print("Compressed text: ") print(compressed) print() print("Decompressing text...") lzw.decompress(compressed, 'darth_plagueis_out.txt')
def determinRepetition(self, text): compressed = lzw.compress(text) sbytes = str.encode(text.encode('utf8')) ratio = float(len("".join(sbytes))) / float(len("".join(compressed))) return ratio
import lzw infile = lzw.readbytes("3_1.spc") compressed = lzw.compress(infile) lzw.writebytes("3_1.spc.compressed", compressed) infile.close() infile = lzw.readbytes("3_1.spc.compressed") uncompressed = lzw.decompress(infile) lzw.writebytes("3_1.spc.decompressed", uncompressed) infile.close()
nfkc_quick_check = 'quick_check::yes' if cp in quick_check_maps['NFKC']: nfkc_quick_check = quick_check_maps['NFKC'][cp] lzw.add_cp(prop_bytes_, cp) lzw.add_short(prop_bytes_, canonical_decomp[0]) lzw.add_short(prop_bytes_, canonical_decomp[1]) lzw.add_short(prop_bytes_, compatible_decomp[0]) lzw.add_short(prop_bytes_, compatible_decomp[1]) lzw.add_byte(prop_bytes_, int(ccc)) lzw.add_byte(prop_bytes_, \ quick_checks_to_byte(nfd_quick_check, nfkd_quick_check)) lzw.add_byte(prop_bytes_, \ quick_checks_to_byte(nfc_quick_check, nfkc_quick_check)) value_per_line = 12 compressed_bytes = lzw.compress(prop_bytes_) props_lines, num_shorts = lzw.compressed_bytes_to_lines( compressed_bytes, value_per_line) #print 'rewrote {} * 144 = {} bits as {} * 8 = {} bits'.format(len(all_cps), len(all_cps)*144, len(prop_bytes_), len(prop_bytes_)*8) #print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16) cpp_file = open('normalization_data_cp_props.cpp', 'w') cpp_file.write( cp_props_file_form.format(canon_all_cps_string, len(canon_all_cps), compat_all_cps_string, len(compat_all_cps), props_lines, num_shorts, len(all_cps))) def cps_string(cps): cps = map(lambda x: hex(x)[2:], cps)
def test_compress_decompress_4(): s = "pourquoi pas" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def test_compress_decompress_1(): s = "ab*cde*fgh*" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def test_compress_decompress_3(): s = "coucou" cmp_s, _, dico = compress(s) res = decompress(cmp_s, dico) assert res == s
def encode(self, data): assert self.getParams()['EarlyChange']==1 assert self.getParams()['Predictor']==1 return ''.join(lzw.compress(data))
def approximate_KC_concat(x, y): concat = x + y compressed_string = compress(concat) total_bits = calculate_bits(compressed_string) return total_bits
def encode(data): assert self.getParams()["EarlyChange"] == 1 assert self.getParams()["Predictor"] == 1 return lzw.compress(data)