Пример #1
0
def compression_distance(x,y,l_x=None,l_y=None):
    if x==y:
        return 0
    x_b = x.encode('utf-8')
    y_b = y.encode('utf-8')
    if l_x is None:
        l_x = len(lzma.compress(x_b))
        l_y = len(lzma.compress(y_b))
    l_xy = len(lzma.compress(x_b+y_b))
    l_yx = len(lzma.compress(y_b+x_b))
    dist = (min(l_xy,l_yx)-min(l_x,l_y))/max(l_x,l_y)
    return dist
Пример #2
0
def compression_distance(x, y, l_x=None, l_y=None):
    if x == y:
        return 0
    x_b = x.encode('utf-8')
    y_b = y.encode('utf-8')
    if l_x is None:
        l_x = len(lzma.compress(x_b))
        l_y = len(lzma.compress(y_b))
    l_xy = len(lzma.compress(x_b + y_b))
    l_yx = len(lzma.compress(y_b + x_b))
    dist = (min(l_xy, l_yx) - min(l_x, l_y)) / max(l_x, l_y)
    return dist
Пример #3
0
def xz_compress(data):
    '''decompress xz `data` using backports.lzma, or if that's not
    available then the commandline `xz --decompress` tool

    '''
    if xz is not None:
        try:
            data = xz.compress(data)
        except:
            logger.error('decompress of %s bytes failed', len(data))
            raise

    else:
        ## launch xz child
        xz_child = subprocess.Popen(
            ['xz', '--compress'],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        ## use communicate to pass the data incrementally to the child
        ## while reading the output, to avoid blocking
        data, errors = xz_child.communicate(data)

        assert not errors, errors

    return data
    def open_db(self):
        self.terms_ldb = leveldb.LevelDB(self.terms_fl)
        self.docs_ldb = leveldb.LevelDB(self.docs_fl)

        self.doc_buffer_size = 0
        self.term_buffer_size = 0

        #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size)
        self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size
        self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype="S%d" % self.max_term_size)
        self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype=np.int64)
        self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype=np.int64)

        if self.compression == COMPRESSION.NONE:
            self.compress = lambda string: string
            self.decompress = lambda string: string
        elif self.compression == COMPRESSION.ZLIB:
            import zlib
            self.compress = lambda string: zlib.compress(string, self.compression_level)
            self.decompress = lambda string: zlib.decompress(string)
        elif self.compression == COMPRESSION.LZMA:
            import backports.lzma as lzma
            self.compress = lambda string: lzma.compress(bytearray(string), format=lzma.FORMAT_RAW)
            self.decompress = lambda data: lzma.decompress(data, format=lzma.FORMAT_RAW)
        elif self.compression == COMPRESSION.LZ4R:
            import lz4
            self.compress = lambda string: lz4.compress(string)
            self.decompress = lambda string: lz4.decompress(string)
        elif self.compression == COMPRESSION.LZ4H:
            import lz4
            self.compress = lambda string: lz4.compressHC(string)
            self.decompress = lambda string: lz4.decompress(string)
        else:
            raise Exception("Wrong compression type %r" % self.compression)
Пример #5
0
    def close(self):
        """Compress and write remaining data, and also write the index."""
        compressed = lzma.compress(self._queue)
        self._outfile.write(compressed)

        size = ((len(self._index) - 1) * self.in_block_size) + len(self._queue)
        self._header.magic = b'ZEEX'
        self._header.block_size = self.in_block_size
        self._header.data_length = size
        self._header.cdata_length = len(compressed) + self._last_out_pos

        if USEBUFFER:
            self._outfile.write(buffer(ctypes.c_uint32(len(self._index)))[:])
        else:
            self._outfile.write(ctypes.c_uint32(len(self._index)))
        #sys.stderr.write("Index: \n")
        for idx, i in enumerate(self._index):
            #sys.stderr.write("\t %d: %d\n"% (idx,i))
            if USEBUFFER:
                self._outfile.write(buffer(ctypes.c_uint64(i))[:])
            else:
                self._outfile.write(ctypes.c_uint64(i))
        self._outfile.seek(0)
        if USEBUFFER:
            self._outfile.write(buffer(self._header)[:])
        else:
            self._outfile.write(self._header)

        self._outfile.close()
Пример #6
0
def xz_compress(data):
    '''decompress xz `data` using backports.lzma, or if that's not
    available then the commandline `xz --decompress` tool

    '''
    if xz is not None:
        try:
            data = xz.compress(data)
        except:
            logger.error('decompress of %s bytes failed', len(data))
            raise

    else:
        ## launch xz child
        xz_child = subprocess.Popen(['xz', '--compress'],
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
        ## use communicate to pass the data incrementally to the child
        ## while reading the output, to avoid blocking
        data, errors = xz_child.communicate(data)

        assert not errors, errors

    return data
Пример #7
0
def sync(package, source):
    # update local package with source
    # add package -> mirror url in
    deps = source.find_deps(package)
    all_pkgs = [package] + list(deps)
    index_file = source.export_index(all_pkgs)

    # save file into local
    # if you use deb http://[hostname]/ubuntu main/, then use this
    index_folder = mirror_root + '/main'
    # else you use deb http://[hostname]/ubuntu bionic main, then use this
    # index_folder = mirror_root + '/' + source.get_index_path()

    os.system('mkdir -p ' + index_folder)

    index_full_path = index_folder + '/' + 'Packages'
    with open(index_full_path, 'w') as f:
        f.write(index_file)

    index_compress_full_path = index_folder + '/' + 'Packages.xz'

    with open(index_compress_full_path, 'w') as f:
        data = lzma.compress(index_file)
        f.write(data)

    release_full_path = index_folder + '/' + 'Release'
    with open(release_full_path, 'w') as f:
        f.write(
            source.export_release(index_full_path, index_compress_full_path))

    download_map = source.export_download_map(all_pkgs)
    for key in download_map:
        bin_full_path = mirror_root + '/' + key
        bin_folder = mirror_root + '/' + '/'.join(key.split('/')[:-1])
        print(bin_folder)
        os.system('mkdir -p ' + bin_folder)
        size = 0
        try:
            size = os.stat(bin_full_path).st_size
        except:
            pass
        if size == download_map[key]['size']:
            continue
        if http_proxy == "":
            command = 'wget ' + download_map[key][
                'url'] + ' -O ' + bin_full_path
        else:
            command = 'wget -e use_proxy=yes -e http_proxy=' + http_proxy + ' ' + download_map[
                key]['url'] + ' -O ' + bin_full_path
        print(command)
        os.system(command)
Пример #8
0
    def write(self, data):
        """Feeds Data to the compressor. The data buffer will not be compressed till either it reaches the block size or the "close" funciton is called on the writer object.
        
        @param data
        Data to be fed to the writer

        WARNING: These methods are made to resemble File class methods only for convenience. They may not behave intricately like one.
        """
        self._queue += data
        while (len(self._queue) >= self.in_block_size):
            data = self._queue[0:self.in_block_size]
            self._queue = self._queue[self.in_block_size:]
            compressed = lzma.compress(data)
            self._outfile.write(compressed)
            cur_out_pos = self._last_out_pos + len(compressed)
            self._index.append(cur_out_pos)
            self._last_out_pos = cur_out_pos
Пример #9
0
    def open_db(self):
        self.terms_ldb = leveldb.LevelDB(self.terms_fl)
        self.docs_ldb = leveldb.LevelDB(self.docs_fl)

        self.doc_buffer_size = 0
        self.term_buffer_size = 0

        #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size)
        self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size
        self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size,
                                          dtype="S%d" % self.max_term_size)
        self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size,
                                            dtype=np.int64)
        self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size,
                                             dtype=np.int64)

        if self.compression == COMPRESSION.NONE:
            self.compress = lambda string: string
            self.decompress = lambda string: string
        elif self.compression == COMPRESSION.ZLIB:
            import zlib
            self.compress = lambda string: zlib.compress(
                string, self.compression_level)
            self.decompress = lambda string: zlib.decompress(string)
        elif self.compression == COMPRESSION.LZMA:
            import backports.lzma as lzma
            self.compress = lambda string: lzma.compress(
                bytearray(string), format=lzma.FORMAT_RAW)
            self.decompress = lambda data: lzma.decompress(
                data, format=lzma.FORMAT_RAW)
        elif self.compression == COMPRESSION.LZ4R:
            import lz4
            self.compress = lambda string: lz4.compress(string)
            self.decompress = lambda string: lz4.decompress(string)
        elif self.compression == COMPRESSION.LZ4H:
            import lz4
            self.compress = lambda string: lz4.compressHC(string)
            self.decompress = lambda string: lz4.decompress(string)
        else:
            raise Exception("Wrong compression type %r" % self.compression)
Пример #10
0
 def dumps(obj):
     """Serialize Python object via pickle into a compressed string."""
     return lzma.compress(pickle.dumps(obj))
Пример #11
0
## could check something about the FCs
#from dossier.fc.feature_collection import FeatureCollectionChunk as FCChunk
#fcc = FCChunk(path)
#for fc in fcc:
#    print fc['feature_name']

## assume the incoming data is some long-term archival stuff in XZ
## compressed format, and you want to see how slow XZ is:
xz_data = open(args.path).read()
start = time.time()
data = xz.decompress(xz_data)
decompression_time = time.time() - start

start = time.time()
xz_data2 = xz.compress(data)
assert xz_data2 == xz_data
compression_time = time.time() - start


def report(rec):
    rec['MB'] = rec['bytes'] / 2**20
    rec['ratio'] = rec['bytes'] / len(data)
    ctime = rec.get('compression_time')
    rec['compression_rate'] = ctime and rec['MB'] / ctime or float('inf')
    dtime = rec.get('decompression_time')
    rec['decompression_rate'] = dtime and rec['MB'] / dtime or 0
    print(
        '%(name)s:\t%(MB)d MB of FC data, %(ratio).3f compression, %(compression_time).3f seconds --> %(compression_rate).3f MB/sec compression, '
        '%(decompression_time).3f seconds --> %(decompression_rate).3f MB/sec decompression'
        % rec)
def Z(contents) :
  return len(lzma.compress(contents, format=lzma.FORMAT_RAW, filters= lzma_filters))
Пример #13
0
 def dumps(obj):
     """Serialize Python object via pickle into a compressed string."""
     return lzma.compress(pickle.dumps(obj))
Пример #14
0
## could check something about the FCs
#from dossier.fc.feature_collection import FeatureCollectionChunk as FCChunk
#fcc = FCChunk(path)
#for fc in fcc:
#    print fc['feature_name']


## assume the incoming data is some long-term archival stuff in XZ
## compressed format, and you want to see how slow XZ is:
xz_data = open(args.path).read()
start = time.time()
data = xz.decompress(xz_data)
decompression_time = time.time() - start

start = time.time()
xz_data2 = xz.compress(data)
assert xz_data2 == xz_data
compression_time = time.time() - start

def report(rec):
    rec['MB'] = rec['bytes'] / 2**20
    rec['ratio'] = rec['bytes'] / len(data)
    ctime = rec.get('compression_time')
    rec['compression_rate'] = ctime and rec['MB'] / ctime or float('inf')
    dtime = rec.get('decompression_time')
    rec['decompression_rate'] = dtime and rec['MB'] / dtime or 0
    print('%(name)s:\t%(MB)d MB of FC data, %(ratio).3f compression, %(compression_time).3f seconds --> %(compression_rate).3f MB/sec compression, '
          '%(decompression_time).3f seconds --> %(decompression_rate).3f MB/sec decompression' % rec)
    sys.stdout.flush()

raw_rec = dict(name='raw', bytes=len(data), compression_time=0, decompression_time=0)
Пример #15
0

if sdist_formats == 'xztar':
    tarxz_path = os.path.join(release_dir, 'deluge-%s.tar.xz' % version)
else:
    # Compress release archive with xz
    tar_path = os.path.join(release_dir, 'deluge-%s.tar' % version)
    tarxz_path = tar_path + '.xz'
    print('Compressing tar (%s) with xz' % tar_path)
    try:
        from backports import lzma
    except ImportError:
        print('backports.lzma not installed, falling back to xz shell command')
        call(['xz', '-e9zkf', tar_path])
    else:
        with open(tar_path, 'rb') as tar_file, open(tarxz_path, 'wb') as xz_file:
            xz_file.write(
                lzma.compress(bytes(tar_file.read()), preset=9 | lzma.PRESET_EXTREME)
            )

# Calculate shasum and add to sha256sums.txt
with open(tarxz_path, 'rb') as _file:
    sha256sum = '%s %s' % (
        sha256(_file.read()).hexdigest(),
        os.path.basename(tarxz_path),
    )
with open(os.path.join(release_dir, 'sha256sums.txt'), 'w') as _file:
    _file.write(sha256sum + '\n')

print('Complete: %s' % release_dir)
Пример #16
0
 def compress(data, *args, **kwargs):
     return lzma.compress(data, *args, **kwargs)
Пример #17
0
def compress_image(image):
    compressed = lzma.compress(bytearray(image))
    return compressed.__sizeof__()
Пример #18
0
 def put(self, items):
     zitems = lzma.compress(cPickle.dumps(items))
     self.bigcall('put', '/q/%s' % self.queue, data=zitems)
Пример #19
0
 def dbput(self, table, doc): # doc may be a list of docs
     zdoc = lzma.compress(cPickle.dumps(doc))
     self.bigcall('put', '/db/%s' % table, zdoc)