def test_filter(self): shape = (32 * 1024 + 783,) chunks = (4 * 1024 + 23,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname) h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008, 32000), filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), filter_opts=None, ) f["range"][:] = data f.close() f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close()
def test_with_block_size(self): shape = (128 * 1024 + 783, ) chunks = (4 * 1024 + 23, ) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname) h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008, 32000), filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), filter_opts=((680, ), ()), ) f["range"][:] = data f.close() #os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, 'r') d = f['range'][:] self.assertTrue(np.all(d == data)) f.close()
def test_filter(self): shape = (32 * 1024 + 783, ) chunks = (4 * 1024 + 23, ) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008, 32000), filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), filter_opts=None, ) f["range"][:] = data f.close() f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close()
def test_with_lz4_compression(self): shape = (128 * 1024 + 783, ) chunks = (4 * 1024 + 23, ) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname, "w") h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008, ), filter_flags=(h5z.FLAG_MANDATORY, ), filter_opts=((0, h5.H5_COMPRESS_LZ4), ), ) f["range"][:] = data f.close() # os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close()
def test_with_compression(self): shape = (128 * 1024 + 783,) chunks = (4 * 1024 + 23,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname) h5.create_dataset( f, b"range", shape, dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_LZ4),), ) f["range"][:] = data f.close() # os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, "r") d = f["range"][:] self.assertTrue(np.all(d == data)) f.close()
def __write_to_hdf5_light(self, filename_out, *args, **kwargs): """ Write data to HDF5 file in one go. Args: filename_out (str): Name of output file """ block_size = 0 with h5py.File(filename_out, 'w') as h5: h5.attrs[b'CLASS'] = b'FILTERBANK' h5.attrs[b'VERSION'] = b'1.0' if HAS_BITSHUFFLE: bs_compression = bitshuffle.h5.H5FILTER bs_compression_opts = (block_size, bitshuffle.h5.H5_COMPRESS_LZ4) else: bs_compression = None bs_compression_opts = None logger.warning( "Warning: bitshuffle not found. No compression applied.") dset = h5.create_dataset( 'data', data=self.data, # compression='lzf') compression=bs_compression, compression_opts=bs_compression_opts) dset_mask = h5.create_dataset( 'mask', shape=self.file_shape, # compression='lzf', compression=bs_compression, compression_opts=bs_compression_opts, dtype='uint8') dset.dims[0].label = b"frequency" dset.dims[1].label = b"feed_id" dset.dims[2].label = b"time" dset_mask.dims[0].label = b"frequency" dset_mask.dims[1].label = b"feed_id" dset_mask.dims[2].label = b"time" # Copy over header information as attributes for key, value in self.header.items(): dset.attrs[key] = value
def __write_to_hdf5_light(self, filename_out, *args, **kwargs): """ Write data to HDF5 file in one go. Args: filename_out (str): Name of output file """ block_size = 0 with h5py.File(filename_out, 'w') as h5: h5.attrs[b'CLASS'] = b'FILTERBANK' h5.attrs[b'VERSION'] = b'1.0' if HAS_BITSHUFFLE: bs_compression = bitshuffle.h5.H5FILTER bs_compression_opts = (block_size, bitshuffle.h5.H5_COMPRESS_LZ4) else: bs_compression = None bs_compression_opts = None logger.warning("Warning: bitshuffle not found. No compression applied.") dset = h5.create_dataset('data', data=self.data, # compression='lzf') compression=bs_compression, compression_opts=bs_compression_opts) dset_mask = h5.create_dataset('mask', shape=self.file_shape, # compression='lzf', compression=bs_compression, compression_opts=bs_compression_opts, dtype='uint8') dset.dims[0].label = b"frequency" dset.dims[1].label = b"feed_id" dset.dims[2].label = b"time" dset_mask.dims[0].label = b"frequency" dset_mask.dims[1].label = b"feed_id" dset_mask.dims[2].label = b"time" # Copy over header information as attributes for key, value in self.header.items(): dset.attrs[key] = value
def create_compressed(hgroup, name, data, **kwargs): """ Add a compressed dataset to a given group. Use bitshuffle compression and LZ4 to compress a dataset. hgroup: h5py group in which to add dataset name: name of dataset data: data to write chunks: chunk size """ # Check explicitly for bitshuffle, as it is not part of h5py compression = '' if 'compression' in kwargs: compression = kwargs['compression'] #print name, shape, dtype, chunks if compression == 'bitshuffle' and USE_BITSHUFFLE: if 'chunks' not in kwargs: kwargs['chunks'] = guess_chunk(data.shape) chunks = kwargs['chunks'] #print "Creating bitshuffled dataset %s" % hgroup h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008, ), filter_flags=(h5z.FLAG_MANDATORY, ), filter_opts=((0, h5.H5_COMPRESS_LZ4), ), ) else: #print "Creating dataset %s" % hgroup hgroup.create_dataset(name, data.shape, data.dtype, **kwargs) hgroup[name][:] = data return hgroup[name]
def test_with_block_size(self): shape = (128 * 1024 + 783,) chunks = (4 * 1024 + 23,) dtype = np.int64 data = np.arange(shape[0]) fname = "tmp_test_filters.h5" f = h5py.File(fname) h5.create_dataset(f, "range", shape, dtype, chunks, filter_pipeline=(32008, 32000), filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), filter_opts=((680,), ()), ) f["range"][:] = data f.close() #os.system('h5dump -H -p tmp_test_filters.h5') f = h5py.File(fname, 'r') d = f['range'][:] self.assertTrue(np.all(d == data)) f.close()
def create_compressed(hgroup, name, data, **kwargs): """ Add a compressed dataset to a given group. Use bitshuffle compression and LZ4 to compress a dataset. hgroup: h5py group in which to add dataset name: name of dataset data: data to write chunks: chunk size """ # Check explicitly for bitshuffle, as it is not part of h5py compression = '' if 'compression' in kwargs: compression = kwargs['compression'] #print name, shape, dtype, chunks if compression == 'bitshuffle' and USE_BITSHUFFLE: if 'chunks' not in kwargs: kwargs['chunks'] = guess_chunk(data.shape) chunks = kwargs['chunks'] #print "Creating bitshuffled dataset %s" % hgroup h5.create_dataset(hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_LZ4),), ) else: #print "Creating dataset %s" % hgroup hgroup.create_dataset(name, data.shape, data.dtype, **kwargs) hgroup[name][:] = data return hgroup[name]
def create_compressed(hgroup, name, data, **kwargs): """ Add a compressed dataset to a given group. Use bitshuffle compression and LZ4 to compress a dataset. hgroup: h5py group in which to add dataset name: name of dataset data: data to write chunks: chunk size """ # Parse keyword arguments that we need to check compression = '' if 'compression' in kwargs: compression = kwargs['compression'] if compression is None: compression = '' if 'chunks' not in kwargs: kwargs['chunks'] = guess_chunk(data.shape) chunks = kwargs['chunks'] #print name, shape, dtype, chunks if compression.startswith('quinoa') and USE_BITSHUFFLE: q = 4 do_dither = True try: cparts = compression.split('_') q = int(cparts[1]) do_dither = bool(cparts[2]) #print cparts except: pass if data.ndim == 2: print("QUINOA: scaling %s " % name) qdata = quinoa.quinoa_scale(data, q=q, subtractive_dither=do_dither) data = qdata["data"] #data = quinoa.quinoa_unscale(qdata) #dtype = "int32" for key in qdata: if key != 'data': print("QUINOA: %s: %s" % (key, qdata[key])) #print "Creating bitshuffled dataset %s" % hgroup print(data.dtype) h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008, ), filter_flags=(h5z.FLAG_MANDATORY, ), filter_opts=((0, h5.H5_COMPRESS_LZ4), ), ) elif compression == 'couscous' and USE_BITSHUFFLE: qdata = quinoa.couscous_scale(data) data = qdata["data"] #for key in qdata: # if key != 'data': # print "COUSCOUS: %s: %s" % (key, qdata[key]) h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008, ), filter_flags=(h5z.FLAG_MANDATORY, ), filter_opts=((0, h5.H5_COMPRESS_LZ4), ), ) elif compression == 'bitshuffle' and USE_BITSHUFFLE: #print "Creating bitshuffled dataset %s" % hgroup h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008, ), filter_flags=(h5z.FLAG_MANDATORY, ), filter_opts=((0, h5.H5_COMPRESS_LZ4), ), ) else: #print "Creating dataset %s" % hgroup hgroup.create_dataset(name, data.shape, data.dtype, **kwargs) hgroup[name][:] = data return hgroup[name]
OUT_FILE = "bitshuffle/tests/data/regression_%s.h5" % bitshuffle.__version__ DTYPES = ['a1', 'a2', 'a3', 'a4', 'a6', 'a8', 'a10'] f = h5py.File(OUT_FILE, 'w') g_comp = f.create_group("compressed") g_orig = f.create_group("origional") for dtype in DTYPES: for rep in ['a', 'b', 'c']: dset_name = "%s_%s" % (dtype, rep) dtype = np.dtype(dtype) n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE) shape = (n_elem, ) chunks = shape data = random.randint(0, 255, n_elem * dtype.itemsize) data = data.astype(np.uint8).view(dtype) g_orig.create_dataset(dset_name, data=data) h5.create_dataset(g_comp, dset_name, shape, dtype, chunks=chunks, filter_pipeline=FILTER_PIPELINE, filter_opts=FILTER_OPTS) g_comp[dset_name][:] = data f.close()
def cmd_tool(args=None): """ Command line tool for converting guppi raw into HDF5 versions of guppi raw """ from argparse import ArgumentParser if not HAS_BITSHUFFLE: print("Error: the bitshuffle library is required to run this script.") exit() parser = ArgumentParser( description="Command line utility for creating HDF5 Raw files.") parser.add_argument('filename', type=str, help='Name of filename to read') args = parser.parse_args() fileroot = args.filename.split('.0000.raw')[0] filelist = glob.glob(fileroot + '*.raw') filelist = sorted(filelist) # Read first file r = GuppiRaw(filelist[0]) header, data = r.read_next_data_block() dshape = data.shape #r.read_next_data_block_shape() print(dshape) n_blocks_total = 0 for filename in filelist: print(filename) r = GuppiRaw(filename) n_blocks_total += r.n_blocks print(n_blocks_total) full_dshape = np.concatenate(((n_blocks_total, ), dshape)) # Create h5py file h5 = h5py.File(fileroot + '.h5', 'w') h5.attrs['CLASS'] = 'GUPPIRAW' block_size = 0 # This is chunk block size dset = h5.create_dataset( 'data', shape=full_dshape, #compression=bitshuffle.h5.H5FILTER, #compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype=data.dtype) h5_idx = 0 for filename in filelist: print("\nReading %s header..." % filename) r = GuppiRaw(filename) h5 = h5py.File(filename + '.h5', 'w') header, data = r.read_next_data_block() for ii in range(0, r.n_blocks): t0 = time.time() print("Reading block %i of %i" % (h5_idx + 1, full_dshape[0])) header, data = r.read_next_data_block() t1 = time.time() t2 = time.time() print("Writing block %i of %i" % (h5_idx + 1, full_dshape[0])) dset[h5_idx, :] = data t3 = time.time() print("Read: %2.2fs, Write %2.2fs" % ((t1 - t0), (t3 - t2))) h5_idx += 1 # Copy over header information as attributes for key, value in header.items(): dset.attrs[key] = value h5.close() t1 = time.time() print("Conversion time: %2.2fs" % (t1 - t0))
def cmd_tool(args=None): """ Command line utility for creating HDF5 blimpy files. """ from argparse import ArgumentParser parser = ArgumentParser(description="Command line utility for creating HDF5 Filterbank files.") parser.add_argument('dirname', type=str, help='Name of directory to read') args = parser.parse_args() if not HAS_BITSHUFFLE: print("Error: the bitshuffle library is required to run this script.") exit() filelist = glob.glob(os.path.join(args.dirname, '*.fil')) for filename in filelist: if not os.path.exists(filename + '.h5'): t0 = time.time() print("\nReading %s header..." % filename) fb = Filterbank(filename, load_data=False) data_shape = (fb.n_ints_in_file, fb.header['nifs'], fb.header['nchans']) data_dtype = fb.data.dtype print(data_dtype) print("Creating new dataset, %s" % str(data_shape)) block_size = 0 h5 = h5py.File(filename + '.h5', 'w') h5.attrs['CLASS'] = 'FILTERBANK' dset = h5.create_dataset('data', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype=data_dtype) dset_mask = h5.create_dataset('mask', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype='uint8') dset.dims[0].label = "frequency" dset.dims[1].label = "feed_id" dset.dims[2].label = "time" dset_mask.dims[0].label = "frequency" dset_mask.dims[1].label = "feed_id" dset_mask.dims[2].label = "time" # Copy over header information as attributes for key, value in fb.header.items(): dset.attrs[key] = value filesize = os.path.getsize(filename) if filesize >= MAX_SIZE: n_int_per_read = int(filesize / MAX_SIZE / 2) print("Filling in with data over %i reads..." % n_int_per_read) for ii in range(0, n_int_per_read): print("Reading %i of %i" % (ii + 1, n_int_per_read)) #print ii*n_int_per_read, (ii+1)*n_int_per_read fb = Filterbank(filename, t_start=ii*n_int_per_read, t_stop=(ii+1) * n_int_per_read) dset[ii*n_int_per_read:(ii+1)*n_int_per_read] = fb.data[:] else: fb = Filterbank(filename) print(dset.shape, " -> ", fb.data.shape) dset[:] = fb.data[:] h5.close() t1 = time.time() print("Conversion time: %2.2fs" % (t1- t0))
BLOCK_SIZE = 64 # Smallish such that datasets have many blocks but are small. FILTER_PIPELINE = [h5.H5FILTER,] FILTER_OPTS = [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)] OUT_FILE = "bitshuffle/tests/data/regression_%s.h5" % bitshuffle.__version__ DTYPES = ['a1', 'a2', 'a3', 'a4', 'a6', 'a8', 'a10'] f = h5py.File(OUT_FILE, 'w') g_comp = f.create_group("compressed") g_orig = f.create_group("origional") for dtype in DTYPES: for rep in ['a', 'b', 'c']: dset_name = "%s_%s" % (dtype, rep) dtype = np.dtype(dtype) n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE) shape = (n_elem,) chunks = shape data = random.randint(0, 255, n_elem * dtype.itemsize) data = data.astype(np.uint8).view(dtype) g_orig.create_dataset(dset_name, data=data) h5.create_dataset(g_comp, dset_name, shape, dtype, chunks=chunks, filter_pipeline=FILTER_PIPELINE, filter_opts=FILTER_OPTS) g_comp[dset_name][:] = data f.close()
dset_name = "%s_%s" % (dtype, rep) dtype = np.dtype(dtype) n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE) shape = (n_elem, ) chunks = shape data = random.randint(0, 255, n_elem * dtype.itemsize) data = data.astype(np.uint8).view(dtype) g_orig.create_dataset(dset_name, data=data) # Create LZ4 compressed data h5.create_dataset( g_comp_lz4, bytes(dset_name, "utf-8"), shape, dtype, chunks=chunks, filter_pipeline=FILTER_PIPELINE, filter_flags=(h5z.FLAG_MANDATORY, ), filter_opts=FILTER_OPTS[0], ) g_comp_lz4[dset_name][:] = data # Create ZSTD compressed data h5.create_dataset( g_comp_zstd, bytes(dset_name, "utf-8"), shape, dtype, chunks=chunks, filter_pipeline=FILTER_PIPELINE, filter_flags=(h5z.FLAG_MANDATORY, ),
def cmd_tool(args=None): """ Command line utility for creating HDF5 blimpy files. """ from argparse import ArgumentParser parser = ArgumentParser( description="Command line utility for creating HDF5 Filterbank files.") parser.add_argument('dirname', type=str, help='Name of directory to read') args = parser.parse_args() if not HAS_BITSHUFFLE: print("Error: the bitshuffle library is required to run this script.") exit() filelist = glob.glob(os.path.join(args.dirname, '*.fil')) for filename in filelist: if not os.path.exists(filename + '.h5'): t0 = time.time() print("\nReading %s header..." % filename) fb = Filterbank(filename, load_data=False) data_shape = (fb.n_ints_in_file, fb.header['nifs'], fb.header['nchans']) data_dtype = fb.data.dtype print(data_dtype) print("Creating new dataset, %s" % str(data_shape)) block_size = 0 h5 = h5py.File(filename + '.h5', 'w') h5.attrs['CLASS'] = 'FILTERBANK' dset = h5.create_dataset( 'data', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype=data_dtype) dset_mask = h5.create_dataset( 'mask', shape=data_shape, compression=bitshuffle.h5.H5FILTER, compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), dtype='uint8') dset.dims[0].label = "frequency" dset.dims[1].label = "feed_id" dset.dims[2].label = "time" dset_mask.dims[0].label = "frequency" dset_mask.dims[1].label = "feed_id" dset_mask.dims[2].label = "time" # Copy over header information as attributes for key, value in fb.header.items(): dset.attrs[key] = value filesize = os.path.getsize(filename) if filesize >= MAX_SIZE: n_int_per_read = int(filesize / MAX_SIZE / 2) print("Filling in with data over %i reads..." % n_int_per_read) for ii in range(0, n_int_per_read): print("Reading %i of %i" % (ii + 1, n_int_per_read)) #print ii*n_int_per_read, (ii+1)*n_int_per_read fb = Filterbank(filename, t_start=ii * n_int_per_read, t_stop=(ii + 1) * n_int_per_read) dset[ii * n_int_per_read:(ii + 1) * n_int_per_read] = fb.data[:] else: fb = Filterbank(filename) print(dset.shape, " -> ", fb.data.shape) dset[:] = fb.data[:] h5.close() t1 = time.time() print("Conversion time: %2.2fs" % (t1 - t0))
def __write_to_hdf5_heavy(self, filename_out, *args, **kwargs): """ Write data to HDF5 file. Args: filename_out (str): Name of output file """ block_size = 0 #Note that a chunk is not a blob!! chunk_dim = self.__get_chunk_dimensions() blob_dim = self.__get_blob_dimensions(chunk_dim) n_blobs = self.container.calc_n_blobs(blob_dim) with h5py.File(filename_out, 'w') as h5: h5.attrs[b'CLASS'] = b'FILTERBANK' h5.attrs[b'VERSION'] = b'1.0' if HAS_BITSHUFFLE: bs_compression = bitshuffle.h5.H5FILTER bs_compression_opts = (block_size, bitshuffle.h5.H5_COMPRESS_LZ4) else: bs_compression = None bs_compression_opts = None logger.warning("Warning: bitshuffle not found. No compression applied.") dset = h5.create_dataset('data', shape=self.selection_shape, chunks=chunk_dim, compression=bs_compression, compression_opts=bs_compression_opts, dtype=self.data.dtype) dset_mask = h5.create_dataset('mask', shape=self.selection_shape, chunks=chunk_dim, compression=bs_compression, compression_opts=bs_compression_opts, dtype='uint8') dset.dims[0].label = b"frequency" dset.dims[1].label = b"feed_id" dset.dims[2].label = b"time" dset_mask.dims[0].label = b"frequency" dset_mask.dims[1].label = b"feed_id" dset_mask.dims[2].label = b"time" # Copy over header information as attributes for key, value in self.header.items(): dset.attrs[key] = value if blob_dim[self.freq_axis] < self.selection_shape[self.freq_axis]: logger.info('Using %i n_blobs to write the data.'% n_blobs) for ii in range(0, n_blobs): logger.info('Reading %i of %i' % (ii + 1, n_blobs)) bob = self.container.read_blob(blob_dim,n_blob=ii) #----- #Using channels instead of frequency. c_start = self.container.chan_start_idx + ii*blob_dim[self.freq_axis] t_start = self.container.t_start + (c_start/self.selection_shape[self.freq_axis])*blob_dim[self.time_axis] t_stop = t_start + blob_dim[self.time_axis] # Reverse array if frequency axis is flipped # if self.header['foff'] < 0: # c_stop = self.selection_shape[self.freq_axis] - (c_start)%self.selection_shape[self.freq_axis] # c_start = c_stop - blob_dim[self.freq_axis] # else: c_start = (c_start)%self.selection_shape[self.freq_axis] c_stop = c_start + blob_dim[self.freq_axis] #----- logger.debug(t_start,t_stop,c_start,c_stop) dset[t_start:t_stop,0,c_start:c_stop] = bob[:] else: logger.info('Using %i n_blobs to write the data.'% n_blobs) for ii in range(0, n_blobs): logger.info('Reading %i of %i' % (ii + 1, n_blobs)) bob = self.container.read_blob(blob_dim,n_blob=ii) t_start = self.container.t_start + ii*blob_dim[self.time_axis] #This prevents issues when the last blob is smaller than the others in time if (ii+1)*blob_dim[self.time_axis] > self.n_ints_in_file: t_stop = self.n_ints_in_file else: t_stop = (ii+1)*blob_dim[self.time_axis] dset[t_start:t_stop] = bob[:]
def create_compressed(hgroup, name, data, **kwargs): """ Add a compressed dataset to a given group. Use bitshuffle compression and LZ4 to compress a dataset. hgroup: h5py group in which to add dataset name: name of dataset data: data to write chunks: chunk size """ # Parse keyword arguments that we need to check compression = "" if "compression" in kwargs: compression = kwargs["compression"] if compression is None: compression = "" if "chunks" not in kwargs: kwargs["chunks"] = guess_chunk(data.shape) chunks = kwargs["chunks"] # print name, shape, dtype, chunks if compression.startswith("quinoa") and USE_BITSHUFFLE: q = 4 do_dither = True try: cparts = compression.split("_") q = int(cparts[1]) do_dither = bool(cparts[2]) # print cparts except: pass if data.ndim == 2: print "QUINOA: scaling %s " % name qdata = quinoa.quinoa_scale(data, q=q, subtractive_dither=do_dither) data = qdata["data"] # data = quinoa.quinoa_unscale(qdata) # dtype = "int32" for key in qdata: if key != "data": print "QUINOA: %s: %s" % (key, qdata[key]) # print "Creating bitshuffled dataset %s" % hgroup print data.dtype h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_LZ4),), ) elif compression == "couscous" and USE_BITSHUFFLE: qdata = quinoa.couscous_scale(data) data = qdata["data"] # for key in qdata: # if key != 'data': # print "COUSCOUS: %s: %s" % (key, qdata[key]) h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_LZ4),), ) elif compression == "bitshuffle" and USE_BITSHUFFLE: # print "Creating bitshuffled dataset %s" % hgroup h5.create_dataset( hgroup, name, data.shape, data.dtype, chunks, filter_pipeline=(32008,), filter_flags=(h5z.FLAG_MANDATORY,), filter_opts=((0, h5.H5_COMPRESS_LZ4),), ) else: # print "Creating dataset %s" % hgroup hgroup.create_dataset(name, data.shape, data.dtype, **kwargs) hgroup[name][:] = data return hgroup[name]