示例#1
0
def extract_tdps(tmp_dir,project_name,station_name,num_cores,tqdm):
    '''Runs _gather_tdps for each station in the stations_list of the project.
    After update gathers [value] [nomvalue] [sigma] and outputs MultiIndex DataFrame
    Extraction of residuals moved to extract_residuals
    
    Extracted data is saved in the project directory name_of_project.npz with
    [solutions] and [residuals] datasets inside.
    If file doesn't exist, will run the script and save the file as it should.
    Rolling back to the version where solutions and residuals were collected simultaneously.

    Creates folder "gather" and puts station-named files in it.
    All stations all years.
    '''

    station_files = _np.asarray(sorted(_glob.glob(tmp_dir + '/gd2e/' + project_name + '/' + station_name + '/*/*.zstd')))
    tmp_data = _np.asarray(_gather_tdps(station_files, num_cores,tqdm))

    # Stacking list of tmp tdps and residuals into one np array
    stacked_solutions = _pd.concat(tmp_data[:,0])
    stacked_residuals = _pd.concat(tmp_data[:,1])
    # For residuals trans column should be converted to category again
    stacked_residuals['trans'] = stacked_residuals['trans'].astype('category')
    # print(station_name, 'extraction finished')

    _blosc.set_nthreads(24) #using 24 threads for efficient compression of extracted data
    # default blosc.MAX_BUFFERSIZE = 2147483631 (too small for nz dataset with 54 stations)
    solutions_file = tmp_dir + '/gd2e/' + project_name + '/' +  station_name + '/solutions.zstd'
    residuals_file = tmp_dir + '/gd2e/' + project_name + '/' +  station_name + '/residuals.zstd'

    print('Compressing and saving extracted gathers')
    _dump_write(data=stacked_solutions,filename=solutions_file,cname='zstd')
    _dump_write(data=stacked_residuals,filename=residuals_file,cname='zstd')
示例#2
0
def load_kernel(filepath, n_threads=None):
    """
    Loads a kernel that was saved using save_kernel().

    Parameters
    ----------
    filepath: str
        The filepath of the saved kernel

    n_threads: int
        The threads to use for decompression. By default, all threads are used.

    Returns
    -------
    ImputationKernel
    """
    n_threads = blosc.detect_number_of_cores() if n_threads is None else n_threads
    blosc.set_nthreads(n_threads)
    with open(filepath, "rb") as f:
        kernel = dill.loads(blosc.decompress(dill.load(f)))

    if kernel.original_data_class == "pd_DataFrame":
        kernel.working_data = pd_read_parquet(kernel.working_data)
        for col in kernel.working_data.columns:
            kernel.working_data[col] = kernel.working_data[col].astype(
                kernel.working_dtypes[col]
            )

    return kernel
示例#3
0
def test_profiling_disables_threadpools(tmpdir):
    """
    Memory profiling disables thread pools, then restores them when done.
    """
    cwd = os.getcwd()
    os.chdir(tmpdir)

    import numexpr
    import blosc

    numexpr.set_num_threads(3)
    blosc.set_nthreads(3)
    with threadpoolctl.threadpool_limits(3, "blas"):
        with run_with_profile():
            assert numexpr.set_num_threads(2) == 1
            assert blosc.set_nthreads(2) == 1

            for d in threadpoolctl.threadpool_info():
                assert d["num_threads"] == 1, d

        # Resets when done:
        assert numexpr.set_num_threads(2) == 3
        assert blosc.set_nthreads(2) == 3

        for d in threadpoolctl.threadpool_info():
            if d["user_api"] == "blas":
                assert d["num_threads"] == 3, d
示例#4
0
def compress(data: bytes,
             compress_type=COMPRESS_FASTEST,
             nthreads=blosc.ncores) -> bytes:
    assert type(data) is bytes
    blosc.set_nthreads(nthreads)

    compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd"
    level = 1 if compress_type == COMPRESS_FASTEST else 5
    return blosc.compress(data, cname=compressor, clevel=level)
示例#5
0
def decompressStack(imageShape,
                    imageDtype,
                    blosc_threads=1,
                    pool_threads=maxThreads):
    blosc.set_nthreads(blosc_threads)
    tPool = ThreadPool(pool_threads)

    num_slices = imageShape[0]
    imageStack = np.full(imageShape, fill_value=0)
示例#6
0
    def compress(self, data, **kwargs):
        '''Useful compression kwargs:
        nthreads
        compression_block_size
        blosc_block_size
        shuffle
        typesize
        cname
        clevel
        '''
        # Blosc code probably assumes contiguous buffer
        assert data.contiguous

        nthreads = kwargs.pop('nthreads', 1)
        compression_block_size = kwargs.pop('compression_block_size', 1 << 22)
        blosc_block_size = kwargs.pop('blosc_block_size', 512 * 1024)
        typesize = kwargs.pop('typesize',
                              'auto')  # dtype size in bytes, e.g. 8 for int64
        clevel = kwargs.pop(
            'clevel',
            1)  # compression level, usually only need lowest for zstd
        cname = kwargs.pop(
            'cname', 'zstd'
        )  # compressor name, default zstd, good performance/compression tradeoff

        shuffle = kwargs.pop('shuffle', 'shuffle')
        if shuffle == 'shuffle':
            shuffle = blosc.SHUFFLE
        elif shuffle == 'bitshuffle':
            shuffle = blosc.BITSHUFFLE
        elif shuffle == None:
            shuffle = blosc.NOSHUFFLE
        else:
            raise ValueError(shuffle)

        blosc.set_nthreads(nthreads)
        blosc.set_blocksize(blosc_block_size)

        if typesize == 'auto':
            this_typesize = data.itemsize
        else:
            this_typesize = typesize
        #assert this_typesize != 1

        nelem = compression_block_size // data.itemsize
        for i in range(0, len(data), nelem):
            compressed = blosc.compress(data[i:i + nelem],
                                        typesize=this_typesize,
                                        clevel=clevel,
                                        shuffle=shuffle,
                                        cname=cname,
                                        **kwargs)
            header = struct.pack('!I', len(compressed))
            # TODO: this probably triggers a data copy, feels inefficient. Probably have to add output array arg to blosc to fix
            yield header + compressed
示例#7
0
def doDecompression(packedDataList, shape, n_threads):
    blosc.set_nthreads(n_threads)
    dataList = [None] * len(packedDataList)
    for J in np.arange(len(packedDataList)):
        #        dataStack[J,:,:] = np.reshape(
        #            np.frombuffer( blosc.decompress( packedDataList[J] ), dtype='uint8' ),
        #            shape[1:] )
        # Something here Numpy-side is very slow, so let's not include that in our
        # benchmark.
        dataList[J] = blosc.decompress(packedDataList[J])
    return dataList
 def __init__(self, data_path, phase, transform=None, option=None):
     """
     :param data_path:  string, path to processed data
     :param transform: function,   apply transform on data
     """
     self.data_path = data_path
     self.phase = phase
     self.transform = transform
     ind = ['train', 'val', 'test', 'debug'].index(phase)
     max_num_for_loading = option['max_num_for_loading', (
         -1, -1, -1, -1
     ), "the max number of pairs to be loaded, set -1 if there is no constraint,[max_train, max_val, max_test, max_debug]"]
     self.max_num_for_loading = max_num_for_loading[ind]
     self.has_label = False
     self.get_file_list()
     self.seg_option = option['seg']
     self.img_after_resize = option[('img_after_resize', [
         -1, -1, -1
     ], "resample the image into desired size")]
     self.img_after_resize = None if any(
         [sz == -1
          for sz in self.img_after_resize]) else self.img_after_resize
     self.patch_size = self.seg_option['patch_size']
     self.interested_label_list = self.seg_option['interested_label_list', [
         -1
     ], "the label to be evaluated, the label not in list will be turned into 0 (background)"]
     self.interested_label_list = None if any([
         label == -1 for label in self.interested_label_list
     ]) else self.interested_label_list
     self.transform_name_seq = self.seg_option['transform']['transform_seq']
     self.option_p = self.seg_option[('partition', {},
                                      "settings for the partition")]
     self.use_whole_img_as_input = self.seg_option[(
         'use_whole_img_as_input', False, "use whole image as the input")]
     self.load_into_memory = True
     self.img_list = []
     self.img_sz_list = []
     self.original_spacing_list = []
     self.original_sz_list = []
     self.spacing_list = []
     self.label_org_index_list = []
     self.label_converted_index_list = []
     self.label_density_list = []
     if self.load_into_memory:
         self.init_img_pool()
         print('img pool initialized complete')
         if self.phase == 'train':
             self.init_corr_transform_pool()
             print('transforms initialized complete')
         else:
             self.init_corr_partition_pool()
             print("partition pool initialized complete")
     blosc.set_nthreads(1)
示例#9
0
def _dump_write(filename, data, num_cores=24, cname='zstd'):
    '''Serializes the input (may be a list of dataframes or else) and uses blosc to compress it and write to a file specified'''
    _blosc.set_nthreads(
        num_cores
    )  #using 24 threads for efficient compression of extracted data
    context = _pa.default_serialization_context()
    serialized_data = context.serialize(data).to_buffer()
    compressed = _blosc.compress(serialized_data,
                                 typesize=8,
                                 clevel=9,
                                 cname=cname)
    with open(filename, 'wb') as f:
        f.write(compressed)
示例#10
0
def compress(data: bytes, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores) -> bytes:
    """
    compress(data[, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores])

    High speed compress with multi-threading. Implement from blosc.compress

    Raise ValueError if size of buffer larger than 2147483631 bytes.
    """
    assert type(data) is bytes
    blosc.set_nthreads(nthreads)

    compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd"
    level = 1 if compress_type == COMPRESS_FASTEST else 5
    return blosc.compress(data, cname=compressor, clevel=level)
示例#11
0
def compress_ndarray(vectors: numpy.ndarray,
                     compress_type=COMPRESS_FASTEST,
                     nthreads=blosc.ncores) -> bytes:
    assert type(vectors) is numpy.ndarray
    blosc.set_nthreads(nthreads)

    compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd"
    level = 1 if compress_type == COMPRESS_FASTEST else 5
    buffer = blosc.compress_ptr(vectors.__array_interface__['data'][0],
                                vectors.size,
                                typesize=max(1, min(255,
                                                    vectors.dtype.itemsize)),
                                clevel=level,
                                cname=compressor,
                                shuffle=blosc.BITSHUFFLE)
    return pickle.dumps([buffer, vectors.dtype, vectors.shape])
示例#12
0
def set_blosc_nthreads() -> int:
    """set the blosc library to two less than the core count on the system.

    If less than 2 cores are ncores-2, we set the value to two.

    Returns
    -------
    int
        ncores blosc will use on the system
    """
    nCores = blosc.detect_number_of_cores()
    if nCores <= 2:
        nUsed = 1
    elif nCores <= 4:
        nUsed = nCores - 1
    else:
        nUsed = nCores - 2
    blosc.set_nthreads(nUsed)
    return nUsed
示例#13
0
def doCompression(dataStack,
                  compressor='zstd',
                  blocksize=2**20,
                  n_threads=16,
                  shuffle=blosc.BITSHUFFLE,
                  clevel=5):

    blosc.set_blocksize(blocksize)
    blosc.set_nthreads(n_threads)
    typeSize = dataStack.dtype.itemsize
    packedDataList = [None] * dataStack.shape[0]
    for J in np.arange(dataStack.shape[0]):
        packedDataList[J] = blosc.compress(dataStack[J, :, :],
                                           typesize=typeSize,
                                           clevel=clevel,
                                           shuffle=shuffle,
                                           cname=compressor)

    return packedDataList
示例#14
0
def compressStack(imageStack, blosc_threads=1, pool_threads=maxThreads):
    """
    Does frame compression using a ThreadPool to distribute the load.
    """
    blosc.set_nthreads(blosc_threads)
    tPool = ThreadPool(pool_threads)

    num_slices = imageStack.shape[0]
    # Build parameters list for the threaded processeses, consisting of index
    tArgs = [None] * num_slices
    itemSize = imageStack.dtype.itemsize
    bytesList = [None] * num_slices
    for J in np.arange(num_slices):
        tArgs[J] = (imageStack[J,:,:].__array_interface__['data'][0], \
                    N*N, itemSize, bytesList, J)

    # All operations are done 'in-place'
    tPool.map(compressSlice, tArgs)
    tPool.close()
    tPool.join()
示例#15
0
def compressStack( imageStack, blosc_threads = 1, pool_threads=maxThreads ):
    """
    Does frame compression using a ThreadPool to distribute the load. 
    """
    blosc.set_nthreads( blosc_threads )
    tPool = ThreadPool( pool_threads )

    num_slices = imageStack.shape[0]
    # Build parameters list for the threaded processeses, consisting of index
    tArgs = [None] * num_slices
    itemSize = imageStack.dtype.itemsize
    bytesList = [None] * num_slices
    for J in np.arange(num_slices):
        tArgs[J] = (imageStack[J,:,:].__array_interface__['data'][0], \
                    N*N, itemSize, bytesList, J)
    
    # All operations are done 'in-place' 
    tPool.map( compressSlice, tArgs )
    tPool.close()
    tPool.join()
示例#16
0
def compress_ndarray(vectors, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores) -> bytes:
    """
    compress_ndarray(vectors[, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores])

    High speed compress numpy.ndarray with multi-threading. Implement from blosc.compress

    Raise ValueError if size of array larger than 2147483631 bytes.
    Example: array with float32 have itemsize=4 and size=614400000 ((1200000, 512) at 2D array)
    -> total size of array: 4*614400000 == 2457600000 bytes

    You must split array to small pieces.
    """
    assert type(vectors) is numpy.ndarray
    blosc.set_nthreads(nthreads)

    compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd"
    level = 1 if compress_type == COMPRESS_FASTEST else 5
    buffer = blosc.compress_ptr(vectors.__array_interface__['data'][0], vectors.size,
                                typesize=max(1, min(255, vectors.dtype.itemsize)),
                                clevel=level, cname=compressor, shuffle=blosc.BITSHUFFLE)
    return pickle.dumps([buffer, vectors.dtype, vectors.shape])
示例#17
0
def SetupEnv():
    os.environ["CXX"] = "g++"
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    blosc.set_nthreads(4)
    gc.enable()
示例#18
0
bloscThreads = np.hstack([1, powProduct[::-1]])
# Let's try instead just pool threads...
#poolThreads = np.arange( 1, maxThreads+1 )
#bloscThreads = np.ones_like( poolThreads )

solo_times = np.zeros_like(poolThreads, dtype='float64')
solo_unlocked_times = np.zeros_like(poolThreads, dtype='float64')
locked_times = np.zeros_like(poolThreads, dtype='float64')
unlocked_times = np.zeros_like(poolThreads, dtype='float64')

for J in np.arange(nRuns):
    print("Run  %d of %d" % (J + 1, nRuns))
    blosc.set_releasegil(False)
    for I in np.arange(len(poolThreads)):
        t1 = time.time()
        blosc.set_nthreads(bloscThreads[I])
        blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \
                       clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR )
        solo_times[I] += time.time() - t1

    blosc.set_releasegil(True)
    for I in np.arange(len(poolThreads)):
        t2 = time.time()
        blosc.set_nthreads(bloscThreads[I])
        blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \
                       clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR )
        solo_unlocked_times[I] += time.time() - t2

    blosc.set_releasegil(True)
    for I in np.arange(len(poolThreads)):
        t3 = time.time()
示例#19
0
def __MRCExport( input_image, header, MRCfilename, endchar = '<' ):
    """
    MRCExport private interface with a dictionary rather than a mess of function 
    arguments.
    """
    with open( MRCfilename, 'wb', buffering=BUFFERSIZE ) as f:
    
        writeMRCHeader( f, header, endchar )
        f.seek(1024)
        
        if ('compressor' in header) \
                and (header['compressor'] in REVERSE_COMPRESSOR_ENUM) \
                and (REVERSE_COMPRESSOR_ENUM[header['compressor']]) > 0:
            # compressed MRCZ
            print( "Compressing %s with compressor %s%d" %
                    (MRCfilename, header['compressor'], header['clevel'] ) )
            
            
            
            if header['dtype'] != 'uint4' and input_image.dtype != header['dtype']:
                # This correctly works for text to dtype comparison
                input_image = input_image.astype(header['dtype']) 
                
            if input_image.ndim == 3:
                chunkSize = input_image[0,:,:].size
            else:
                chunkSize = input_image.size
                input_image = np.reshape( input_image, [1,input_image.shape[0],input_image.shape[1] ])
                
            blosc.set_nthreads( header['n_threads'] )
            blosc.set_blocksize( 65536 )
            
            header['packedBytes'] = 0
            typeSize = input_image.dtype.itemsize
            
            print( input_image.shape )
            for J in np.arange( input_image.shape[0] ):
                # print( "Slice %d: Compressing address at: %d of %d:" % (J, int(J*typeSize*blockSize), input_image.nbytes) )
                
                # Looks like I have problem for typesize > 1?
                if int(J*typeSize*chunkSize) >= input_image.nbytes:
                    raise MemoryError( "MRCExport: Tried to reference past end of ndarray %d > %d" % (int(J*typeSize*chunkSize), input_image.nbytes ) )
                    

                compressedData = blosc.compress( input_image[J,:,:].tobytes(),
                            typeSize, 
                            clevel=header['clevel'], 
                            shuffle=blosc.BITSHUFFLE,
                            cname=header['compressor'] )
                f.write( compressedData )
                    
                header['packedBytes'] += len(compressedData)
                # print( "packedBytes = %d" % header['packedBytes'] )
                
            # print( "Finished writing out compressedData" )
            # Rewind and write out the total compressed size
            f.seek(144)
            np.int64( header['packedBytes'] ).astype( endchar + "i8" ).tofile(f)

            
        else: # vanilla MRC
            if header['dtype'] != 'uint4' and input_image.dtype != header['dtype']:
                input_image = input_image.astype( header['dtype'] )
            
            input_image.tofile(f)
            
            
    return 
示例#20
0
from __future__ import print_function
from effects import SnpEff

# native Python imports
import os.path
import time
import sys
import sqlite3
import itertools as it

import toml  # toml.py

# third-party imports
import cyvcf2 as vcf
import blosc
blosc.set_nthreads(1)
blosc.set_blocksize(8192)

import zlib
import cPickle

def opack_blob(obj, _none=buffer(zlib.compress(cPickle.dumps(None, cPickle.HIGHEST_PROTOCOL)))):
    if obj is None: return _none
    return buffer(zlib.compress(cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL), 1))

def pack_blob(obj):
    if obj is None: return ''
    return buffer(blosc.compress(obj.tostring(), obj.dtype.itemsize, clevel=5, shuffle=True))
    #return buffer(blosc.pack_array(obj))

def is_number(op, field):
示例#21
0
bloscThreads = np.hstack( [1, powProduct[::-1]] )
# Let's try instead just pool threads...
#poolThreads = np.arange( 1, maxThreads+1 )
#bloscThreads = np.ones_like( poolThreads )

solo_times = np.zeros_like( poolThreads, dtype='float64' )
solo_unlocked_times = np.zeros_like( poolThreads, dtype='float64' )
locked_times = np.zeros_like( poolThreads, dtype='float64' )
unlocked_times = np.zeros_like( poolThreads, dtype='float64' )

for J in np.arange(nRuns):
    print( "Run  %d of %d" % (J+1, nRuns) )
    blosc.set_releasegil(False)
    for I in np.arange( len(poolThreads) ):
        t1 = time.time()
        blosc.set_nthreads( bloscThreads[I] )
        blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \
                       clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR )
        solo_times[I] += time.time() - t1

    blosc.set_releasegil(True)
    for I in np.arange( len(poolThreads) ):
        t2 = time.time()
        blosc.set_nthreads( bloscThreads[I] )
        blosc.compress_ptr( stack.__array_interface__['data'][0], stack.size, stack.dtype.itemsize, \
                       clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR )
        solo_unlocked_times[I] += time.time() - t2

    blosc.set_releasegil(True)
    for I in np.arange( len(poolThreads) ):
        t3 = time.time()
示例#22
0
文件: utils.py 项目: Yufeng98/Clair
def setup_environment():
    environ["CXX"] = "g++"
    environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    blosc.set_nthreads(4)
    gc.enable()
示例#23
0
import logging

import blosc
import numpy as np

from pmc_turbo.camera.pycamera import dtypes

logger = logging.getLogger(__name__)

# We need to ensure blosc uses just 1 thread so that it is always compatible with multiprocessing. This is true as of
#  blosc 1.4.4, but may improve in the future.
original_nthreads = blosc.set_nthreads(1)
logger.debug("Set blosc to use 1 thread, originally was using %d" %
             original_nthreads)


def load_blosc_file(filename):
    logger.debug("Reading blosc file from %s" % filename)
    with open(filename, 'rb') as fh:
        data = blosc.decompress(fh.read())
    return data


def load_blosc_image(filename):
    data = load_blosc_file(filename)
    image = np.frombuffer(data[:-dtypes.chunk_num_bytes], dtype='uint16')
    image.shape = dtypes.image_dimensions
    chunk_data = np.frombuffer(data[-dtypes.chunk_num_bytes:],
                               dtype=dtypes.chunk_dtype)
    return image, chunk_data
示例#24
0
from . import chunks
from . import hangar_service_pb2
from . import hangar_service_pb2_grpc
from .. import config
from ..context import Environments
from ..context import TxnRegister
from ..hdf5_store import FileHandles
from ..records import commiting
from ..records import hashs
from ..records import heads
from ..records import parsing
from ..records import queries
from ..records import summarize

blosc.set_nthreads(blosc.detect_number_of_cores() - 2)


class HangarClient(object):
    '''Client which connects and handles data transfer to the hangar server.

    Parameters
    ----------
    envs : Environments
        environment handles to manage all required calls to the local
        repostory state.
    address : str
        IP:PORT where the hangar server can be reached.
    '''
    def __init__(self, envs: Environments, address: str):
        self.env = envs
示例#25
0
        def check():
            assert numexpr.set_num_threads(2) == 1
            assert blosc.set_nthreads(2) == 1

            for d in threadpoolctl.threadpool_info():
                assert d["num_threads"] == 1, d
示例#26
0
def __MRCExport(input_image, header, MRCfilename, slices, 
                endchar='<', offset=0, idxnewfile=True):
    '''
    MRCExport private interface with a dictionary rather than a mess of function 
    arguments.
    '''

    if idxnewfile: # If forcing a new file we truncate it even if it already exists:
        fmode = 'wb'
    else: # Otherwise we'll just update its header and append images as required:
        fmode = 'rb+'

    with open(MRCfilename, fmode, buffering=BUFFERSIZE) as f:
        extendedBytes = writeMRCHeader(f, header, slices, endchar=endchar)
        f.seek(DEFAULT_HEADER_LEN + extendedBytes + offset)

        dtype = header['dtype']
        if ('compressor' in header) \
                and (header['compressor'] in REVERSE_COMPRESSOR_ENUM) \
                and (REVERSE_COMPRESSOR_ENUM[header['compressor']]) > 0:
            # compressed MRCZ
            logger.debug('Compressing %s with compressor %s%d' %
                    (MRCfilename, header['compressor'], header['clevel']))
            
            applyCast = False
            if slices > 0:
                chunkSize = input_image[0].size
                typeSize = input_image[0].dtype.itemsize
                if dtype != 'uint4' and input_image[0].dtype != dtype: 
                    applyCast = True
            else:
                chunkSize = input_image[0,:,:].size
                typeSize = input_image.dtype.itemsize
                if dtype != 'uint4' and input_image.dtype != dtype: 
                    applyCast = True
                
            blosc.set_nthreads(header['n_threads'])
            # for small image dimensions we need to scale blocksize appropriately
            # so we use the available cores
            block_size = np.minimum(BLOSC_BLOCK, chunkSize//header['n_threads'])
            blosc.set_blocksize(block_size)
            
            header['packedBytes'] = 0
            
            clevel = header['clevel']
            cname = header['compressor']

            # For 3D frames in lists, we need to further sub-divide each frame 
            # into slices so that each channel is compressed seperately by 
            # blosc.
            if slices > 1:
                deep_image = input_image # grab a reference
                input_image = []
                for frame in deep_image:
                    for I in range(slices):
                        input_image.append(frame[I,:,:])

            for J, frame in enumerate(input_image):
                if applyCast:
                    frame = frame.astype(dtype)

                if frame.flags['C_CONTIGUOUS'] and frame.flags['ALIGNED']:
                    # Use pointer
                    compressedData = blosc.compress_ptr(frame.__array_interface__['data'][0], 
                                    frame.size,
                                    typeSize, 
                                    clevel=header['clevel'], 
                                    shuffle=blosc.BITSHUFFLE,
                                    cname=header['compressor'])
                else: 
                    # Use tobytes, which is slower in benchmarking
                    compressedData = blosc.compress(frame.tobytes(),
                                    typeSize, 
                                    clevel=clevel, 
                                    shuffle=blosc.BITSHUFFLE,
                                    cname=cname)
        

                f.write(compressedData)
                header['packedBytes'] += len(compressedData)

            # Rewind and write out the total compressed size
            f.seek(144)
            np.int64(header['packedBytes']).astype(endchar + 'i8').tofile(f)
            
        else: # vanilla MRC
            if slices > 0: 
                if dtype != 'uint4' and dtype != input_image[0].dtype:
                    for z_slice in input_image:
                        z_slice.astype(dtype).tofile(f)
                else:
                    for z_slice in input_image:
                        z_slice.tofile(f)
            else:
                if dtype != 'uint4' and dtype != input_image.dtype:
                    input_image = input_image.astype(dtype)
                input_image.tofile(f)
            
            
    return 
示例#27
0
import pickle
import warnings

import blosc
import numpy

from dpsutil.dataframe.convert import cvt_dec2hex, cvt_hex2dec, cvt_hex2str, cvt_str2hex

COMPRESS_FASTEST = 0
COMPRESS_BEST = 1

blosc.set_nthreads(min(8, max(4, blosc.detect_number_of_cores() // 2)))


def compress(data: bytes, compress_type=COMPRESS_FASTEST) -> bytes:
    """
    compress(data[, compress_type=COMPRESS_FASTEST, nthreads=blosc.ncores, level=None])
    Optionals:
        - compress_type: [COMPRESS_FASTEST, COMPRESS_BEST]
        - nthreads: range 0 -> 256. Default is the number of cores in this system.
        - level: 0-16. If 'level' is None, compress_type will set.
        Higher values will result in better compression at the cost of more CPU usage.

    High speed compress with multi-threading. Implement from blosc.compress
    Raise ValueError if size of buffer larger than 2147483631 bytes.
    """
    assert type(data) is bytes

    compressor = "lz4" if compress_type == COMPRESS_FASTEST else "zstd"
    level = 1 if compress_type == COMPRESS_FASTEST else 5
    return blosc.compress(data, cname=compressor, clevel=level)
示例#28
0
            streaks += 1
            above = not above
    return streaks


FILENAMES = ('HiSPARC.h5', )
PATH = '/home/francesc/datasets/tests/'
BLOCK_SIZES = (0, MINIMUM_SIZE, KB16, KB32, KB64, KB128, KB256, KB512, MB, MB2)
C_LEVELS = range(1, 10)
COLS = [
    'Filename', 'DataSet', 'Table', 'DType', 'Chunk_Number', 'Chunk_Size',
    'Mean', 'Median', 'Sd', 'Skew', 'Kurt', 'Min', 'Max', 'Q1', 'Q3',
    'N_Streaks', 'Block_Size', 'Codec', 'Filter', 'CL', 'CRate', 'CSpeed',
    'DSpeed'
]
blosc.set_nthreads(4)

if not os.path.isfile('blosc_test_data.csv'):
    pd.DataFrame(columns=COLS).to_csv('blosc_test_data.csv',
                                      sep='\t',
                                      index=False)

for filename in FILENAMES:
    for path, d_type, table, buffer in file_reader(PATH + filename):
        n_chunks = calculate_nchunks(buffer.dtype.itemsize, buffer.size)
        print("Starting tests with %-s %-s t%-s" % (filename, path, table))
        if buffer.dtype.kind in ('S', 'U'):
            is_string = True
            filters = (blosc.NOSHUFFLE, )
        else:
            is_string = False
示例#29
0
import multiprocessing as mp
import os
import shutil
import tempfile
from nose.tools import timed

#__test__ = False

import blosc
import numpy as np

print blosc.set_nthreads(1)
from pmc_turbo.camera.image_processing import blosc_file
from pmc_turbo.camera.pycamera import dtypes
print blosc.set_nthreads(1)


class TestBloscFiles(object):
    def setup(self):
        self.temp_dir = tempfile.mkdtemp()

    def teardown(self):
        shutil.rmtree(self.temp_dir)

    def test_blosc_file_round_trip(self):
        filename = os.path.join(self.temp_dir,'blah.blosc')
        data = np.random.random_integers(0,255,2**20).astype('uint8').tostring()
        blosc_file.write_image_blosc(filename=filename, data=data)
        data2 = blosc_file.load_blosc_file(filename)
        assert data == data2
示例#30
0
def __MRCExport(input_image,
                header,
                MRCfilename,
                endchar='<',
                offset=0,
                idxnewfile=True):
    '''
    MRCExport private interface with a dictionary rather than a mess of function 
    arguments.
    '''

    if idxnewfile:
        # If forcing a new file we truncate it even if it already exists:
        fmode = 'wb'

    else:
        # Otherwise we'll just update its header and append images as required:
        fmode = 'rb+'

    with open(MRCfilename, fmode, buffering=BUFFERSIZE) as f:
        extendedBytes = writeMRCHeader(f, header, endchar)
        f.seek(DEFAULT_HEADER_LEN + extendedBytes + offset)

        if ('compressor' in header) \
                and (header['compressor'] in REVERSE_COMPRESSOR_ENUM) \
                and (REVERSE_COMPRESSOR_ENUM[header['compressor']]) > 0:
            # compressed MRCZ
            logger.info('Compressing %s with compressor %s%d' %
                        (MRCfilename, header['compressor'], header['clevel']))

            if header['dtype'] != 'uint4' and input_image.dtype != header[
                    'dtype']:
                # This correctly works for text to dtype comparison
                input_image = input_image.astype(header['dtype'])

            if input_image.ndim == 3:
                chunkSize = input_image[0, :, :].size
            else:
                chunkSize = input_image.size
                input_image = np.reshape(
                    input_image,
                    [1, input_image.shape[0], input_image.shape[1]])

            blosc.set_nthreads(header['n_threads'])
            blosc.set_blocksize(BLOSC_BLOCK)

            header['packedBytes'] = 0
            typeSize = input_image.dtype.itemsize

            for J in np.arange(input_image.shape[0]):
                # print( 'Slice %d: Compressing address at: %d of %d:' % (J, int(J*typeSize*blockSize), input_image.nbytes) )

                # Looks like I have problem for typesize > 1?
                if int(J * typeSize * chunkSize) >= input_image.nbytes:
                    raise MemoryError(
                        'MRCExport: Tried to reference past end of ndarray %d > %d'
                        % (int(J * typeSize * chunkSize), input_image.nbytes))

                compressedData = blosc.compress(input_image[J, :, :].tobytes(),
                                                typeSize,
                                                clevel=header['clevel'],
                                                shuffle=blosc.BITSHUFFLE,
                                                cname=header['compressor'])
                f.write(compressedData)

                header['packedBytes'] += len(compressedData)

            # Rewind and write out the total compressed size
            f.seek(144)
            np.int64(header['packedBytes']).astype(endchar + 'i8').tofile(f)

        else:  # vanilla MRC
            if header['dtype'] != 'uint4' and input_image.dtype != header[
                    'dtype']:
                input_image = input_image.astype(header['dtype'])
            input_image.tofile(f)

    return
示例#31
0
文件: numpy.py 项目: dask/distributed
from __future__ import print_function, division, absolute_import

import sys

import numpy as np

try:
    import blosc
    n = blosc.set_nthreads(2)
except ImportError:
    blosc = False

from .utils import frame_split_size
from .serialize import register_serialization
from . import pickle

from ..utils import log_errors, ensure_bytes


def itemsize(dt):
    """ Itemsize of dtype

    Try to return the itemsize of the base element, return 8 as a fallback
    """
    result = dt.base.itemsize
    if result > 255:
        result = 8
    return result


def serialize_numpy_ndarray(x):
示例#32
0
文件: cli.py 项目: gyenney/Tools
def process_nthread_arg(args):
    """ Extract and set nthreads. """
    if args.nthreads != blosc.ncores:
        blosc.set_nthreads(args.nthreads)
    log.verbose("using %d thread%s" % (args.nthreads, "s" if args.nthreads > 1 else ""))
示例#33
0
def decompressStack( imageShape, imageDtype, blosc_threads = 1, pool_threads=maxThreads ):
    blosc.set_nthreads( blosc_threads )
    tPool = ThreadPool( pool_threads )
    
    num_slices = imageShape[0]
    imageStack = np.empty( imageShape  )
示例#34
0
文件: cli.py 项目: ASPP/bloscpack
def process_nthread_arg(args):
    """ Extract and set nthreads. """
    if args.nthreads != blosc.ncores:
        blosc.set_nthreads(args.nthreads)
    log.verbose('using %d thread%s' %
                (args.nthreads, 's' if args.nthreads > 1 else ''))
示例#35
0
def set_nthreads(nthreads):
    blosc.set_nthreads(nthreads)
示例#36
0
        return result
    else:
        result = np.frombuffer(bytes, dtype)
        if copy:
            result = result.copy()
        return result


compress_text = identity
decompress_text = identity
compress_bytes = lambda bytes, itemsize: bytes
decompress_bytes = identity

with ignoring(ImportError):
    import blosc
    blosc.set_nthreads(1)

    compress_bytes = blosc.compress
    decompress_bytes = blosc.decompress

    compress_text = partial(blosc.compress, typesize=1)
    decompress_text = blosc.decompress

with ignoring(ImportError):
    from snappy import compress as compress_text
    from snappy import decompress as decompress_text


def compress(bytes, dtype):
    if dtype == 'O':
        return compress_text(bytes)
示例#37
0
文件: cli.py 项目: gyenney/Tools
def process_nthread_arg(args):
    """ Extract and set nthreads. """
    if args.nthreads != blosc.ncores:
        blosc.set_nthreads(args.nthreads)
    log.verbose('using %d thread%s' %
                (args.nthreads, 's' if args.nthreads > 1 else ''))
示例#38
0
from __future__ import print_function, division, absolute_import

import sys

import numpy as np

try:
    import blosc
    n = blosc.set_nthreads(2)
except ImportError:
    blosc = False

from .compression import byte_sample
from .utils import frame_split_size
from .serialize import register_serialization
from . import pickle

from ..utils import log_errors, ensure_bytes


def itemsize(dt):
    """ Itemsize of dtype

    Try to return the itemsize of the base element, return 8 as a fallback
    """
    result = dt.base.itemsize
    if result > 255:
        result = 8
    return result


"""
import sys
import numpy as np
import os
import time

import io
import zlib
import blosc
import snappy
import lz4
nthreads = 8
blosc_comp = 9
blosc.set_nthreads(nthreads)
import tables
import sqlalchemy as sa


sig_size = 1e6
loop = 50

#~ arr = np.random.rand(sig_size).astype('f4')
arr = np.zeros(sig_size)
#~ arr = np.empty(sig_size)
buf = np.getbuffer(arr)

print 'Array :',arr.shape, arr.dtype, ' buffer size',len(buf), len(buf)/1024.**3, 'Go'