def compression_distance(x,y,l_x=None,l_y=None): if x==y: return 0 x_b = x.encode('utf-8') y_b = y.encode('utf-8') if l_x is None: l_x = len(lzma.compress(x_b)) l_y = len(lzma.compress(y_b)) l_xy = len(lzma.compress(x_b+y_b)) l_yx = len(lzma.compress(y_b+x_b)) dist = (min(l_xy,l_yx)-min(l_x,l_y))/max(l_x,l_y) return dist
def compression_distance(x, y, l_x=None, l_y=None): if x == y: return 0 x_b = x.encode('utf-8') y_b = y.encode('utf-8') if l_x is None: l_x = len(lzma.compress(x_b)) l_y = len(lzma.compress(y_b)) l_xy = len(lzma.compress(x_b + y_b)) l_yx = len(lzma.compress(y_b + x_b)) dist = (min(l_xy, l_yx) - min(l_x, l_y)) / max(l_x, l_y) return dist
def xz_compress(data): '''decompress xz `data` using backports.lzma, or if that's not available then the commandline `xz --decompress` tool ''' if xz is not None: try: data = xz.compress(data) except: logger.error('decompress of %s bytes failed', len(data)) raise else: ## launch xz child xz_child = subprocess.Popen( ['xz', '--compress'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## use communicate to pass the data incrementally to the child ## while reading the output, to avoid blocking data, errors = xz_child.communicate(data) assert not errors, errors return data
def open_db(self): self.terms_ldb = leveldb.LevelDB(self.terms_fl) self.docs_ldb = leveldb.LevelDB(self.docs_fl) self.doc_buffer_size = 0 self.term_buffer_size = 0 #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size) self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype="S%d" % self.max_term_size) self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype=np.int64) self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype=np.int64) if self.compression == COMPRESSION.NONE: self.compress = lambda string: string self.decompress = lambda string: string elif self.compression == COMPRESSION.ZLIB: import zlib self.compress = lambda string: zlib.compress(string, self.compression_level) self.decompress = lambda string: zlib.decompress(string) elif self.compression == COMPRESSION.LZMA: import backports.lzma as lzma self.compress = lambda string: lzma.compress(bytearray(string), format=lzma.FORMAT_RAW) self.decompress = lambda data: lzma.decompress(data, format=lzma.FORMAT_RAW) elif self.compression == COMPRESSION.LZ4R: import lz4 self.compress = lambda string: lz4.compress(string) self.decompress = lambda string: lz4.decompress(string) elif self.compression == COMPRESSION.LZ4H: import lz4 self.compress = lambda string: lz4.compressHC(string) self.decompress = lambda string: lz4.decompress(string) else: raise Exception("Wrong compression type %r" % self.compression)
def close(self): """Compress and write remaining data, and also write the index.""" compressed = lzma.compress(self._queue) self._outfile.write(compressed) size = ((len(self._index) - 1) * self.in_block_size) + len(self._queue) self._header.magic = b'ZEEX' self._header.block_size = self.in_block_size self._header.data_length = size self._header.cdata_length = len(compressed) + self._last_out_pos if USEBUFFER: self._outfile.write(buffer(ctypes.c_uint32(len(self._index)))[:]) else: self._outfile.write(ctypes.c_uint32(len(self._index))) #sys.stderr.write("Index: \n") for idx, i in enumerate(self._index): #sys.stderr.write("\t %d: %d\n"% (idx,i)) if USEBUFFER: self._outfile.write(buffer(ctypes.c_uint64(i))[:]) else: self._outfile.write(ctypes.c_uint64(i)) self._outfile.seek(0) if USEBUFFER: self._outfile.write(buffer(self._header)[:]) else: self._outfile.write(self._header) self._outfile.close()
def xz_compress(data): '''decompress xz `data` using backports.lzma, or if that's not available then the commandline `xz --decompress` tool ''' if xz is not None: try: data = xz.compress(data) except: logger.error('decompress of %s bytes failed', len(data)) raise else: ## launch xz child xz_child = subprocess.Popen(['xz', '--compress'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ## use communicate to pass the data incrementally to the child ## while reading the output, to avoid blocking data, errors = xz_child.communicate(data) assert not errors, errors return data
def sync(package, source): # update local package with source # add package -> mirror url in deps = source.find_deps(package) all_pkgs = [package] + list(deps) index_file = source.export_index(all_pkgs) # save file into local # if you use deb http://[hostname]/ubuntu main/, then use this index_folder = mirror_root + '/main' # else you use deb http://[hostname]/ubuntu bionic main, then use this # index_folder = mirror_root + '/' + source.get_index_path() os.system('mkdir -p ' + index_folder) index_full_path = index_folder + '/' + 'Packages' with open(index_full_path, 'w') as f: f.write(index_file) index_compress_full_path = index_folder + '/' + 'Packages.xz' with open(index_compress_full_path, 'w') as f: data = lzma.compress(index_file) f.write(data) release_full_path = index_folder + '/' + 'Release' with open(release_full_path, 'w') as f: f.write( source.export_release(index_full_path, index_compress_full_path)) download_map = source.export_download_map(all_pkgs) for key in download_map: bin_full_path = mirror_root + '/' + key bin_folder = mirror_root + '/' + '/'.join(key.split('/')[:-1]) print(bin_folder) os.system('mkdir -p ' + bin_folder) size = 0 try: size = os.stat(bin_full_path).st_size except: pass if size == download_map[key]['size']: continue if http_proxy == "": command = 'wget ' + download_map[key][ 'url'] + ' -O ' + bin_full_path else: command = 'wget -e use_proxy=yes -e http_proxy=' + http_proxy + ' ' + download_map[ key]['url'] + ' -O ' + bin_full_path print(command) os.system(command)
def write(self, data): """Feeds Data to the compressor. The data buffer will not be compressed till either it reaches the block size or the "close" funciton is called on the writer object. @param data Data to be fed to the writer WARNING: These methods are made to resemble File class methods only for convenience. They may not behave intricately like one. """ self._queue += data while (len(self._queue) >= self.in_block_size): data = self._queue[0:self.in_block_size] self._queue = self._queue[self.in_block_size:] compressed = lzma.compress(data) self._outfile.write(compressed) cur_out_pos = self._last_out_pos + len(compressed) self._index.append(cur_out_pos) self._last_out_pos = cur_out_pos
def open_db(self): self.terms_ldb = leveldb.LevelDB(self.terms_fl) self.docs_ldb = leveldb.LevelDB(self.docs_fl) self.doc_buffer_size = 0 self.term_buffer_size = 0 #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size) self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype="S%d" % self.max_term_size) self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype=np.int64) self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype=np.int64) if self.compression == COMPRESSION.NONE: self.compress = lambda string: string self.decompress = lambda string: string elif self.compression == COMPRESSION.ZLIB: import zlib self.compress = lambda string: zlib.compress( string, self.compression_level) self.decompress = lambda string: zlib.decompress(string) elif self.compression == COMPRESSION.LZMA: import backports.lzma as lzma self.compress = lambda string: lzma.compress( bytearray(string), format=lzma.FORMAT_RAW) self.decompress = lambda data: lzma.decompress( data, format=lzma.FORMAT_RAW) elif self.compression == COMPRESSION.LZ4R: import lz4 self.compress = lambda string: lz4.compress(string) self.decompress = lambda string: lz4.decompress(string) elif self.compression == COMPRESSION.LZ4H: import lz4 self.compress = lambda string: lz4.compressHC(string) self.decompress = lambda string: lz4.decompress(string) else: raise Exception("Wrong compression type %r" % self.compression)
def dumps(obj): """Serialize Python object via pickle into a compressed string.""" return lzma.compress(pickle.dumps(obj))
## could check something about the FCs #from dossier.fc.feature_collection import FeatureCollectionChunk as FCChunk #fcc = FCChunk(path) #for fc in fcc: # print fc['feature_name'] ## assume the incoming data is some long-term archival stuff in XZ ## compressed format, and you want to see how slow XZ is: xz_data = open(args.path).read() start = time.time() data = xz.decompress(xz_data) decompression_time = time.time() - start start = time.time() xz_data2 = xz.compress(data) assert xz_data2 == xz_data compression_time = time.time() - start def report(rec): rec['MB'] = rec['bytes'] / 2**20 rec['ratio'] = rec['bytes'] / len(data) ctime = rec.get('compression_time') rec['compression_rate'] = ctime and rec['MB'] / ctime or float('inf') dtime = rec.get('decompression_time') rec['decompression_rate'] = dtime and rec['MB'] / dtime or 0 print( '%(name)s:\t%(MB)d MB of FC data, %(ratio).3f compression, %(compression_time).3f seconds --> %(compression_rate).3f MB/sec compression, ' '%(decompression_time).3f seconds --> %(decompression_rate).3f MB/sec decompression' % rec)
def Z(contents) : return len(lzma.compress(contents, format=lzma.FORMAT_RAW, filters= lzma_filters))
## could check something about the FCs #from dossier.fc.feature_collection import FeatureCollectionChunk as FCChunk #fcc = FCChunk(path) #for fc in fcc: # print fc['feature_name'] ## assume the incoming data is some long-term archival stuff in XZ ## compressed format, and you want to see how slow XZ is: xz_data = open(args.path).read() start = time.time() data = xz.decompress(xz_data) decompression_time = time.time() - start start = time.time() xz_data2 = xz.compress(data) assert xz_data2 == xz_data compression_time = time.time() - start def report(rec): rec['MB'] = rec['bytes'] / 2**20 rec['ratio'] = rec['bytes'] / len(data) ctime = rec.get('compression_time') rec['compression_rate'] = ctime and rec['MB'] / ctime or float('inf') dtime = rec.get('decompression_time') rec['decompression_rate'] = dtime and rec['MB'] / dtime or 0 print('%(name)s:\t%(MB)d MB of FC data, %(ratio).3f compression, %(compression_time).3f seconds --> %(compression_rate).3f MB/sec compression, ' '%(decompression_time).3f seconds --> %(decompression_rate).3f MB/sec decompression' % rec) sys.stdout.flush() raw_rec = dict(name='raw', bytes=len(data), compression_time=0, decompression_time=0)
if sdist_formats == 'xztar': tarxz_path = os.path.join(release_dir, 'deluge-%s.tar.xz' % version) else: # Compress release archive with xz tar_path = os.path.join(release_dir, 'deluge-%s.tar' % version) tarxz_path = tar_path + '.xz' print('Compressing tar (%s) with xz' % tar_path) try: from backports import lzma except ImportError: print('backports.lzma not installed, falling back to xz shell command') call(['xz', '-e9zkf', tar_path]) else: with open(tar_path, 'rb') as tar_file, open(tarxz_path, 'wb') as xz_file: xz_file.write( lzma.compress(bytes(tar_file.read()), preset=9 | lzma.PRESET_EXTREME) ) # Calculate shasum and add to sha256sums.txt with open(tarxz_path, 'rb') as _file: sha256sum = '%s %s' % ( sha256(_file.read()).hexdigest(), os.path.basename(tarxz_path), ) with open(os.path.join(release_dir, 'sha256sums.txt'), 'w') as _file: _file.write(sha256sum + '\n') print('Complete: %s' % release_dir)
def compress(data, *args, **kwargs): return lzma.compress(data, *args, **kwargs)
def compress_image(image): compressed = lzma.compress(bytearray(image)) return compressed.__sizeof__()
def put(self, items): zitems = lzma.compress(cPickle.dumps(items)) self.bigcall('put', '/q/%s' % self.queue, data=zitems)
def dbput(self, table, doc): # doc may be a list of docs zdoc = lzma.compress(cPickle.dumps(doc)) self.bigcall('put', '/db/%s' % table, zdoc)