Exemplo n.º 1
0
    def test_filter(self):
        shape = (32 * 1024 + 783,)
        chunks = (4 * 1024 + 23,)
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname)
        h5.create_dataset(
            f,
            b"range",
            shape,
            dtype,
            chunks,
            filter_pipeline=(32008, 32000),
            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
            filter_opts=None,
        )
        f["range"][:] = data

        f.close()

        f = h5py.File(fname, "r")
        d = f["range"][:]
        self.assertTrue(np.all(d == data))
        f.close()
Exemplo n.º 2
0
    def test_with_block_size(self):
        shape = (128 * 1024 + 783, )
        chunks = (4 * 1024 + 23, )
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname)
        h5.create_dataset(
            f,
            b"range",
            shape,
            dtype,
            chunks,
            filter_pipeline=(32008, 32000),
            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
            filter_opts=((680, ), ()),
        )
        f["range"][:] = data

        f.close()
        #os.system('h5dump -H -p tmp_test_filters.h5')

        f = h5py.File(fname, 'r')
        d = f['range'][:]
        self.assertTrue(np.all(d == data))
        f.close()
Exemplo n.º 3
0
    def test_filter(self):
        shape = (32 * 1024 + 783, )
        chunks = (4 * 1024 + 23, )
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname, "w")
        h5.create_dataset(
            f,
            b"range",
            shape,
            dtype,
            chunks,
            filter_pipeline=(32008, 32000),
            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
            filter_opts=None,
        )
        f["range"][:] = data

        f.close()

        f = h5py.File(fname, "r")
        d = f["range"][:]
        self.assertTrue(np.all(d == data))
        f.close()
Exemplo n.º 4
0
    def test_with_lz4_compression(self):
        shape = (128 * 1024 + 783, )
        chunks = (4 * 1024 + 23, )
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname, "w")
        h5.create_dataset(
            f,
            b"range",
            shape,
            dtype,
            chunks,
            filter_pipeline=(32008, ),
            filter_flags=(h5z.FLAG_MANDATORY, ),
            filter_opts=((0, h5.H5_COMPRESS_LZ4), ),
        )
        f["range"][:] = data

        f.close()
        # os.system('h5dump -H -p tmp_test_filters.h5')

        f = h5py.File(fname, "r")
        d = f["range"][:]
        self.assertTrue(np.all(d == data))
        f.close()
Exemplo n.º 5
0
    def test_with_compression(self):
        shape = (128 * 1024 + 783,)
        chunks = (4 * 1024 + 23,)
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname)
        h5.create_dataset(
            f,
            b"range",
            shape,
            dtype,
            chunks,
            filter_pipeline=(32008,),
            filter_flags=(h5z.FLAG_MANDATORY,),
            filter_opts=((0, h5.H5_COMPRESS_LZ4),),
        )
        f["range"][:] = data

        f.close()
        # os.system('h5dump -H -p tmp_test_filters.h5')

        f = h5py.File(fname, "r")
        d = f["range"][:]
        self.assertTrue(np.all(d == data))
        f.close()
Exemplo n.º 6
0
    def __write_to_hdf5_light(self, filename_out, *args, **kwargs):
        """ Write data to HDF5 file in one go.

        Args:
            filename_out (str): Name of output file
        """

        block_size = 0

        with h5py.File(filename_out, 'w') as h5:

            h5.attrs[b'CLASS'] = b'FILTERBANK'
            h5.attrs[b'VERSION'] = b'1.0'

            if HAS_BITSHUFFLE:
                bs_compression = bitshuffle.h5.H5FILTER
                bs_compression_opts = (block_size,
                                       bitshuffle.h5.H5_COMPRESS_LZ4)
            else:
                bs_compression = None
                bs_compression_opts = None
                logger.warning(
                    "Warning: bitshuffle not found. No compression applied.")

            dset = h5.create_dataset(
                'data',
                data=self.data,
                #                          compression='lzf')
                compression=bs_compression,
                compression_opts=bs_compression_opts)

            dset_mask = h5.create_dataset(
                'mask',
                shape=self.file_shape,
                #                                 compression='lzf',
                compression=bs_compression,
                compression_opts=bs_compression_opts,
                dtype='uint8')

            dset.dims[0].label = b"frequency"
            dset.dims[1].label = b"feed_id"
            dset.dims[2].label = b"time"

            dset_mask.dims[0].label = b"frequency"
            dset_mask.dims[1].label = b"feed_id"
            dset_mask.dims[2].label = b"time"

            # Copy over header information as attributes
            for key, value in self.header.items():
                dset.attrs[key] = value
Exemplo n.º 7
0
    def __write_to_hdf5_light(self, filename_out, *args, **kwargs):
        """ Write data to HDF5 file in one go.

        Args:
            filename_out (str): Name of output file
        """

        block_size = 0

        with h5py.File(filename_out, 'w') as h5:

            h5.attrs[b'CLASS']   = b'FILTERBANK'
            h5.attrs[b'VERSION'] = b'1.0'

            if HAS_BITSHUFFLE:
                bs_compression = bitshuffle.h5.H5FILTER
                bs_compression_opts = (block_size, bitshuffle.h5.H5_COMPRESS_LZ4)
            else:
                bs_compression = None
                bs_compression_opts = None
                logger.warning("Warning: bitshuffle not found. No compression applied.")


            dset = h5.create_dataset('data',
                        data=self.data,
#                          compression='lzf')
                        compression=bs_compression,
                        compression_opts=bs_compression_opts)

            dset_mask = h5.create_dataset('mask',
                        shape=self.file_shape,
#                                 compression='lzf',
                        compression=bs_compression,
                        compression_opts=bs_compression_opts,
                        dtype='uint8')

            dset.dims[0].label = b"frequency"
            dset.dims[1].label = b"feed_id"
            dset.dims[2].label = b"time"

            dset_mask.dims[0].label = b"frequency"
            dset_mask.dims[1].label = b"feed_id"
            dset_mask.dims[2].label = b"time"

            # Copy over header information as attributes
            for key, value in self.header.items():
                dset.attrs[key] = value
Exemplo n.º 8
0
def create_compressed(hgroup, name, data, **kwargs):
    """
    Add a compressed dataset to a given group.

    Use bitshuffle compression and LZ4 to compress a dataset.

    hgroup: h5py group in which to add dataset
    name:   name of dataset
    data:   data to write
    chunks: chunk size
    """

    # Check explicitly for bitshuffle, as it is not part of h5py
    compression = ''
    if 'compression' in kwargs:
        compression = kwargs['compression']

    #print name, shape, dtype, chunks
    if compression == 'bitshuffle' and USE_BITSHUFFLE:

        if 'chunks' not in kwargs:
            kwargs['chunks'] = guess_chunk(data.shape)
            chunks = kwargs['chunks']

        #print "Creating bitshuffled dataset %s" % hgroup
        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008, ),
            filter_flags=(h5z.FLAG_MANDATORY, ),
            filter_opts=((0, h5.H5_COMPRESS_LZ4), ),
        )
    else:
        #print "Creating dataset %s" % hgroup
        hgroup.create_dataset(name, data.shape, data.dtype, **kwargs)

    hgroup[name][:] = data

    return hgroup[name]
Exemplo n.º 9
0
    def test_with_block_size(self):
        shape = (128 * 1024 + 783,)
        chunks = (4 * 1024 + 23,)
        dtype = np.int64
        data = np.arange(shape[0])
        fname = "tmp_test_filters.h5"
        f = h5py.File(fname)
        h5.create_dataset(f, "range", shape, dtype, chunks,
                filter_pipeline=(32008, 32000),
                filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
                filter_opts=((680,), ()),
                )
        f["range"][:] = data

        f.close()
        #os.system('h5dump -H -p tmp_test_filters.h5')

        f = h5py.File(fname, 'r')
        d = f['range'][:]
        self.assertTrue(np.all(d == data))
        f.close()
Exemplo n.º 10
0
def create_compressed(hgroup, name, data, **kwargs):
    """
    Add a compressed dataset to a given group.

    Use bitshuffle compression and LZ4 to compress a dataset.

    hgroup: h5py group in which to add dataset
    name:   name of dataset
    data:   data to write
    chunks: chunk size
    """
    
    # Check explicitly for bitshuffle, as it is not part of h5py
    compression = ''
    if 'compression' in kwargs:
        compression = kwargs['compression']
    
    #print name, shape, dtype, chunks
    if compression == 'bitshuffle' and USE_BITSHUFFLE:
        
        if 'chunks' not in kwargs:
            kwargs['chunks'] = guess_chunk(data.shape)
            chunks = kwargs['chunks']
            
        #print "Creating bitshuffled dataset %s" % hgroup
        h5.create_dataset(hgroup, name, data.shape, data.dtype, chunks,
                          filter_pipeline=(32008,),
                          filter_flags=(h5z.FLAG_MANDATORY,),
                          filter_opts=((0, h5.H5_COMPRESS_LZ4),),
                          )
    else:
        #print "Creating dataset %s" % hgroup
        hgroup.create_dataset(name, data.shape, data.dtype, **kwargs)

    hgroup[name][:] = data

    return hgroup[name]
Exemplo n.º 11
0
def create_compressed(hgroup, name, data, **kwargs):
    """
    Add a compressed dataset to a given group.

    Use bitshuffle compression and LZ4 to compress a dataset.

    hgroup: h5py group in which to add dataset
    name:   name of dataset
    data:   data to write
    chunks: chunk size
    """

    # Parse keyword arguments that we need to check
    compression = ''
    if 'compression' in kwargs:
        compression = kwargs['compression']
        if compression is None:
            compression = ''

    if 'chunks' not in kwargs:
        kwargs['chunks'] = guess_chunk(data.shape)
        chunks = kwargs['chunks']

    #print name, shape, dtype, chunks

    if compression.startswith('quinoa') and USE_BITSHUFFLE:
        q = 4
        do_dither = True

        try:
            cparts = compression.split('_')
            q = int(cparts[1])
            do_dither = bool(cparts[2])
            #print cparts
        except:
            pass

        if data.ndim == 2:
            print("QUINOA: scaling %s " % name)
            qdata = quinoa.quinoa_scale(data,
                                        q=q,
                                        subtractive_dither=do_dither)
            data = qdata["data"]
            #data = quinoa.quinoa_unscale(qdata)
            #dtype = "int32"
            for key in qdata:
                if key != 'data':
                    print("QUINOA: %s: %s" % (key, qdata[key]))

        #print "Creating bitshuffled dataset %s" % hgroup
        print(data.dtype)
        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008, ),
            filter_flags=(h5z.FLAG_MANDATORY, ),
            filter_opts=((0, h5.H5_COMPRESS_LZ4), ),
        )

    elif compression == 'couscous' and USE_BITSHUFFLE:
        qdata = quinoa.couscous_scale(data)
        data = qdata["data"]
        #for key in qdata:
        #    if key != 'data':
        #        print "COUSCOUS: %s: %s" % (key, qdata[key])

        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008, ),
            filter_flags=(h5z.FLAG_MANDATORY, ),
            filter_opts=((0, h5.H5_COMPRESS_LZ4), ),
        )

    elif compression == 'bitshuffle' and USE_BITSHUFFLE:
        #print "Creating bitshuffled dataset %s" % hgroup
        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008, ),
            filter_flags=(h5z.FLAG_MANDATORY, ),
            filter_opts=((0, h5.H5_COMPRESS_LZ4), ),
        )
    else:
        #print "Creating dataset %s" % hgroup
        hgroup.create_dataset(name, data.shape, data.dtype, **kwargs)

    hgroup[name][:] = data

    return hgroup[name]
Exemplo n.º 12
0
OUT_FILE = "bitshuffle/tests/data/regression_%s.h5" % bitshuffle.__version__

DTYPES = ['a1', 'a2', 'a3', 'a4', 'a6', 'a8', 'a10']

f = h5py.File(OUT_FILE, 'w')
g_comp = f.create_group("compressed")
g_orig = f.create_group("origional")

for dtype in DTYPES:
    for rep in ['a', 'b', 'c']:
        dset_name = "%s_%s" % (dtype, rep)
        dtype = np.dtype(dtype)
        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
        shape = (n_elem, )
        chunks = shape
        data = random.randint(0, 255, n_elem * dtype.itemsize)
        data = data.astype(np.uint8).view(dtype)

        g_orig.create_dataset(dset_name, data=data)

        h5.create_dataset(g_comp,
                          dset_name,
                          shape,
                          dtype,
                          chunks=chunks,
                          filter_pipeline=FILTER_PIPELINE,
                          filter_opts=FILTER_OPTS)
        g_comp[dset_name][:] = data

f.close()
Exemplo n.º 13
0
def cmd_tool(args=None):
    """ Command line tool for converting guppi raw into HDF5 versions of guppi raw """
    from argparse import ArgumentParser

    if not HAS_BITSHUFFLE:
        print("Error: the bitshuffle library is required to run this script.")
        exit()

    parser = ArgumentParser(
        description="Command line utility for creating HDF5 Raw files.")
    parser.add_argument('filename', type=str, help='Name of filename to read')
    args = parser.parse_args()

    fileroot = args.filename.split('.0000.raw')[0]

    filelist = glob.glob(fileroot + '*.raw')
    filelist = sorted(filelist)

    # Read first file
    r = GuppiRaw(filelist[0])
    header, data = r.read_next_data_block()
    dshape = data.shape  #r.read_next_data_block_shape()
    print(dshape)

    n_blocks_total = 0
    for filename in filelist:
        print(filename)
        r = GuppiRaw(filename)
        n_blocks_total += r.n_blocks
    print(n_blocks_total)

    full_dshape = np.concatenate(((n_blocks_total, ), dshape))

    # Create h5py file
    h5 = h5py.File(fileroot + '.h5', 'w')
    h5.attrs['CLASS'] = 'GUPPIRAW'
    block_size = 0  # This is chunk block size
    dset = h5.create_dataset(
        'data',
        shape=full_dshape,
        #compression=bitshuffle.h5.H5FILTER,
        #compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
        dtype=data.dtype)

    h5_idx = 0
    for filename in filelist:
        print("\nReading %s header..." % filename)
        r = GuppiRaw(filename)
        h5 = h5py.File(filename + '.h5', 'w')

        header, data = r.read_next_data_block()

        for ii in range(0, r.n_blocks):
            t0 = time.time()
            print("Reading block %i of %i" % (h5_idx + 1, full_dshape[0]))
            header, data = r.read_next_data_block()
            t1 = time.time()

            t2 = time.time()
            print("Writing block %i of %i" % (h5_idx + 1, full_dshape[0]))
            dset[h5_idx, :] = data
            t3 = time.time()
            print("Read: %2.2fs, Write %2.2fs" % ((t1 - t0), (t3 - t2)))

            h5_idx += 1

            # Copy over header information as attributes
            for key, value in header.items():
                dset.attrs[key] = value

        h5.close()

        t1 = time.time()
        print("Conversion time: %2.2fs" % (t1 - t0))
Exemplo n.º 14
0
def cmd_tool(args=None):
    """ Command line utility for creating HDF5 blimpy files. """
    from argparse import ArgumentParser
    parser = ArgumentParser(description="Command line utility for creating HDF5 Filterbank files.")
    parser.add_argument('dirname', type=str, help='Name of directory to read')
    args = parser.parse_args()
    
    if not HAS_BITSHUFFLE:
        print("Error: the bitshuffle library is required to run this script.")
        exit()

    filelist = glob.glob(os.path.join(args.dirname, '*.fil'))

    for filename in filelist:
        if not os.path.exists(filename + '.h5'):
            t0 = time.time()
            print("\nReading %s header..." % filename)
            fb = Filterbank(filename, load_data=False)

            data_shape = (fb.n_ints_in_file, fb.header['nifs'], fb.header['nchans'])
            data_dtype = fb.data.dtype
            print(data_dtype)


            print("Creating new dataset, %s" % str(data_shape))
            block_size = 0
            h5 = h5py.File(filename + '.h5', 'w')

            h5.attrs['CLASS'] = 'FILTERBANK'

            dset = h5.create_dataset('data',
                              shape=data_shape,
                              compression=bitshuffle.h5.H5FILTER,
                              compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
                              dtype=data_dtype)

            dset_mask = h5.create_dataset('mask',
                                     shape=data_shape,
                                     compression=bitshuffle.h5.H5FILTER,
                                     compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
                                     dtype='uint8')

            dset.dims[0].label = "frequency"
            dset.dims[1].label = "feed_id"
            dset.dims[2].label = "time"

            dset_mask.dims[0].label = "frequency"
            dset_mask.dims[1].label = "feed_id"
            dset_mask.dims[2].label = "time"

            # Copy over header information as attributes
            for key, value in fb.header.items():
                dset.attrs[key] = value

            filesize = os.path.getsize(filename)

            if filesize >= MAX_SIZE:
                n_int_per_read = int(filesize / MAX_SIZE / 2)
                print("Filling in with data over %i reads..." % n_int_per_read)
                for ii in range(0, n_int_per_read):
                    print("Reading %i of %i" % (ii + 1, n_int_per_read))
                    #print  ii*n_int_per_read, (ii+1)*n_int_per_read
                    fb = Filterbank(filename, t_start=ii*n_int_per_read, t_stop=(ii+1) * n_int_per_read)
                    dset[ii*n_int_per_read:(ii+1)*n_int_per_read] = fb.data[:]
            else:
                fb = Filterbank(filename)
                print(dset.shape, " -> ", fb.data.shape)
                dset[:] = fb.data[:]

            h5.close()

            t1 = time.time()
            print("Conversion time: %2.2fs" % (t1- t0))
Exemplo n.º 15
0
BLOCK_SIZE = 64   # Smallish such that datasets have many blocks but are small.
FILTER_PIPELINE = [h5.H5FILTER,]
FILTER_OPTS = [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)]

OUT_FILE = "bitshuffle/tests/data/regression_%s.h5" % bitshuffle.__version__

DTYPES = ['a1', 'a2', 'a3', 'a4', 'a6', 'a8', 'a10']


f = h5py.File(OUT_FILE, 'w')
g_comp = f.create_group("compressed")
g_orig = f.create_group("origional")

for dtype in DTYPES:
    for rep in ['a', 'b', 'c']:
        dset_name = "%s_%s" % (dtype, rep)
        dtype = np.dtype(dtype)
        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
        shape = (n_elem,)
        chunks = shape
        data = random.randint(0, 255, n_elem * dtype.itemsize)
        data = data.astype(np.uint8).view(dtype)

        g_orig.create_dataset(dset_name, data=data)

        h5.create_dataset(g_comp, dset_name, shape, dtype, chunks=chunks,
                filter_pipeline=FILTER_PIPELINE, filter_opts=FILTER_OPTS)
        g_comp[dset_name][:] = data

f.close()
Exemplo n.º 16
0
        dset_name = "%s_%s" % (dtype, rep)
        dtype = np.dtype(dtype)
        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
        shape = (n_elem, )
        chunks = shape
        data = random.randint(0, 255, n_elem * dtype.itemsize)
        data = data.astype(np.uint8).view(dtype)

        g_orig.create_dataset(dset_name, data=data)

        # Create LZ4 compressed data
        h5.create_dataset(
            g_comp_lz4,
            bytes(dset_name, "utf-8"),
            shape,
            dtype,
            chunks=chunks,
            filter_pipeline=FILTER_PIPELINE,
            filter_flags=(h5z.FLAG_MANDATORY, ),
            filter_opts=FILTER_OPTS[0],
        )
        g_comp_lz4[dset_name][:] = data

        # Create ZSTD compressed data
        h5.create_dataset(
            g_comp_zstd,
            bytes(dset_name, "utf-8"),
            shape,
            dtype,
            chunks=chunks,
            filter_pipeline=FILTER_PIPELINE,
            filter_flags=(h5z.FLAG_MANDATORY, ),
Exemplo n.º 17
0
def cmd_tool(args=None):
    """ Command line utility for creating HDF5 blimpy files. """
    from argparse import ArgumentParser
    parser = ArgumentParser(
        description="Command line utility for creating HDF5 Filterbank files.")
    parser.add_argument('dirname', type=str, help='Name of directory to read')
    args = parser.parse_args()

    if not HAS_BITSHUFFLE:
        print("Error: the bitshuffle library is required to run this script.")
        exit()

    filelist = glob.glob(os.path.join(args.dirname, '*.fil'))

    for filename in filelist:
        if not os.path.exists(filename + '.h5'):
            t0 = time.time()
            print("\nReading %s header..." % filename)
            fb = Filterbank(filename, load_data=False)

            data_shape = (fb.n_ints_in_file, fb.header['nifs'],
                          fb.header['nchans'])
            data_dtype = fb.data.dtype
            print(data_dtype)

            print("Creating new dataset, %s" % str(data_shape))
            block_size = 0
            h5 = h5py.File(filename + '.h5', 'w')

            h5.attrs['CLASS'] = 'FILTERBANK'

            dset = h5.create_dataset(
                'data',
                shape=data_shape,
                compression=bitshuffle.h5.H5FILTER,
                compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
                dtype=data_dtype)

            dset_mask = h5.create_dataset(
                'mask',
                shape=data_shape,
                compression=bitshuffle.h5.H5FILTER,
                compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
                dtype='uint8')

            dset.dims[0].label = "frequency"
            dset.dims[1].label = "feed_id"
            dset.dims[2].label = "time"

            dset_mask.dims[0].label = "frequency"
            dset_mask.dims[1].label = "feed_id"
            dset_mask.dims[2].label = "time"

            # Copy over header information as attributes
            for key, value in fb.header.items():
                dset.attrs[key] = value

            filesize = os.path.getsize(filename)

            if filesize >= MAX_SIZE:
                n_int_per_read = int(filesize / MAX_SIZE / 2)
                print("Filling in with data over %i reads..." % n_int_per_read)
                for ii in range(0, n_int_per_read):
                    print("Reading %i of %i" % (ii + 1, n_int_per_read))
                    #print  ii*n_int_per_read, (ii+1)*n_int_per_read
                    fb = Filterbank(filename,
                                    t_start=ii * n_int_per_read,
                                    t_stop=(ii + 1) * n_int_per_read)
                    dset[ii * n_int_per_read:(ii + 1) *
                         n_int_per_read] = fb.data[:]
            else:
                fb = Filterbank(filename)
                print(dset.shape, " -> ", fb.data.shape)
                dset[:] = fb.data[:]

            h5.close()

            t1 = time.time()
            print("Conversion time: %2.2fs" % (t1 - t0))
Exemplo n.º 18
0
    def __write_to_hdf5_heavy(self, filename_out, *args, **kwargs):
        """ Write data to HDF5 file.

        Args:
            filename_out (str): Name of output file
        """

        block_size = 0

        #Note that a chunk is not a blob!!
        chunk_dim = self.__get_chunk_dimensions()
        blob_dim = self.__get_blob_dimensions(chunk_dim)
        n_blobs = self.container.calc_n_blobs(blob_dim)

        with h5py.File(filename_out, 'w') as h5:

            h5.attrs[b'CLASS'] = b'FILTERBANK'
            h5.attrs[b'VERSION'] = b'1.0'

            if HAS_BITSHUFFLE:
                bs_compression = bitshuffle.h5.H5FILTER
                bs_compression_opts = (block_size, bitshuffle.h5.H5_COMPRESS_LZ4)
            else:
                bs_compression = None
                bs_compression_opts = None
                logger.warning("Warning: bitshuffle not found. No compression applied.")

            dset = h5.create_dataset('data',
                            shape=self.selection_shape,
                            chunks=chunk_dim,
                            compression=bs_compression,
                            compression_opts=bs_compression_opts,
                            dtype=self.data.dtype)

            dset_mask = h5.create_dataset('mask',
                            shape=self.selection_shape,
                            chunks=chunk_dim,
                            compression=bs_compression,
                            compression_opts=bs_compression_opts,
                            dtype='uint8')

            dset.dims[0].label = b"frequency"
            dset.dims[1].label = b"feed_id"
            dset.dims[2].label = b"time"

            dset_mask.dims[0].label = b"frequency"
            dset_mask.dims[1].label = b"feed_id"
            dset_mask.dims[2].label = b"time"

            # Copy over header information as attributes
            for key, value in self.header.items():
                dset.attrs[key] = value

            if blob_dim[self.freq_axis] < self.selection_shape[self.freq_axis]:

                logger.info('Using %i n_blobs to write the data.'% n_blobs)
                for ii in range(0, n_blobs):
                    logger.info('Reading %i of %i' % (ii + 1, n_blobs))

                    bob = self.container.read_blob(blob_dim,n_blob=ii)

                    #-----
                    #Using channels instead of frequency.
                    c_start = self.container.chan_start_idx + ii*blob_dim[self.freq_axis]
                    t_start = self.container.t_start + (c_start/self.selection_shape[self.freq_axis])*blob_dim[self.time_axis]
                    t_stop = t_start + blob_dim[self.time_axis]

                    # Reverse array if frequency axis is flipped
#                     if self.header['foff'] < 0:
#                         c_stop = self.selection_shape[self.freq_axis] - (c_start)%self.selection_shape[self.freq_axis]
#                         c_start = c_stop - blob_dim[self.freq_axis]
#                     else:
                    c_start = (c_start)%self.selection_shape[self.freq_axis]
                    c_stop = c_start + blob_dim[self.freq_axis]
                    #-----

                    logger.debug(t_start,t_stop,c_start,c_stop)

                    dset[t_start:t_stop,0,c_start:c_stop] = bob[:]

            else:

                logger.info('Using %i n_blobs to write the data.'% n_blobs)
                for ii in range(0, n_blobs):
                    logger.info('Reading %i of %i' % (ii + 1, n_blobs))

                    bob = self.container.read_blob(blob_dim,n_blob=ii)
                    t_start = self.container.t_start + ii*blob_dim[self.time_axis]

                    #This prevents issues when the last blob is smaller than the others in time
                    if (ii+1)*blob_dim[self.time_axis] > self.n_ints_in_file:
                        t_stop = self.n_ints_in_file
                    else:
                        t_stop = (ii+1)*blob_dim[self.time_axis]

                    dset[t_start:t_stop] = bob[:]
Exemplo n.º 19
0
def create_compressed(hgroup, name, data, **kwargs):
    """
    Add a compressed dataset to a given group.

    Use bitshuffle compression and LZ4 to compress a dataset.

    hgroup: h5py group in which to add dataset
    name:   name of dataset
    data:   data to write
    chunks: chunk size
    """

    # Parse keyword arguments that we need to check
    compression = ""
    if "compression" in kwargs:
        compression = kwargs["compression"]
        if compression is None:
            compression = ""

    if "chunks" not in kwargs:
        kwargs["chunks"] = guess_chunk(data.shape)
        chunks = kwargs["chunks"]

    # print name, shape, dtype, chunks

    if compression.startswith("quinoa") and USE_BITSHUFFLE:
        q = 4
        do_dither = True

        try:
            cparts = compression.split("_")
            q = int(cparts[1])
            do_dither = bool(cparts[2])
            # print cparts
        except:
            pass

        if data.ndim == 2:
            print "QUINOA: scaling %s " % name
            qdata = quinoa.quinoa_scale(data, q=q, subtractive_dither=do_dither)
            data = qdata["data"]
            # data = quinoa.quinoa_unscale(qdata)
            # dtype = "int32"
            for key in qdata:
                if key != "data":
                    print "QUINOA: %s: %s" % (key, qdata[key])

        # print "Creating bitshuffled dataset %s" % hgroup
        print data.dtype
        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008,),
            filter_flags=(h5z.FLAG_MANDATORY,),
            filter_opts=((0, h5.H5_COMPRESS_LZ4),),
        )

    elif compression == "couscous" and USE_BITSHUFFLE:
        qdata = quinoa.couscous_scale(data)
        data = qdata["data"]
        # for key in qdata:
        #    if key != 'data':
        #        print "COUSCOUS: %s: %s" % (key, qdata[key])

        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008,),
            filter_flags=(h5z.FLAG_MANDATORY,),
            filter_opts=((0, h5.H5_COMPRESS_LZ4),),
        )

    elif compression == "bitshuffle" and USE_BITSHUFFLE:
        # print "Creating bitshuffled dataset %s" % hgroup
        h5.create_dataset(
            hgroup,
            name,
            data.shape,
            data.dtype,
            chunks,
            filter_pipeline=(32008,),
            filter_flags=(h5z.FLAG_MANDATORY,),
            filter_opts=((0, h5.H5_COMPRESS_LZ4),),
        )
    else:
        # print "Creating dataset %s" % hgroup
        hgroup.create_dataset(name, data.shape, data.dtype, **kwargs)

    hgroup[name][:] = data

    return hgroup[name]
Exemplo n.º 20
0
    def __write_to_hdf5_heavy(self, filename_out, *args, **kwargs):
        """ Write data to HDF5 file.

        Args:
            filename_out (str): Name of output file
        """

        block_size = 0

        #Note that a chunk is not a blob!!
        chunk_dim = self.__get_chunk_dimensions()
        blob_dim = self.__get_blob_dimensions(chunk_dim)
        n_blobs = self.container.calc_n_blobs(blob_dim)

        with h5py.File(filename_out, 'w') as h5:

            h5.attrs[b'CLASS'] = b'FILTERBANK'
            h5.attrs[b'VERSION'] = b'1.0'

            if HAS_BITSHUFFLE:
                bs_compression = bitshuffle.h5.H5FILTER
                bs_compression_opts = (block_size, bitshuffle.h5.H5_COMPRESS_LZ4)
            else:
                bs_compression = None
                bs_compression_opts = None
                logger.warning("Warning: bitshuffle not found. No compression applied.")

            dset = h5.create_dataset('data',
                            shape=self.selection_shape,
                            chunks=chunk_dim,
                            compression=bs_compression,
                            compression_opts=bs_compression_opts,
                            dtype=self.data.dtype)

            dset_mask = h5.create_dataset('mask',
                            shape=self.selection_shape,
                            chunks=chunk_dim,
                            compression=bs_compression,
                            compression_opts=bs_compression_opts,
                            dtype='uint8')

            dset.dims[0].label = b"frequency"
            dset.dims[1].label = b"feed_id"
            dset.dims[2].label = b"time"

            dset_mask.dims[0].label = b"frequency"
            dset_mask.dims[1].label = b"feed_id"
            dset_mask.dims[2].label = b"time"

            # Copy over header information as attributes
            for key, value in self.header.items():
                dset.attrs[key] = value

            if blob_dim[self.freq_axis] < self.selection_shape[self.freq_axis]:

                logger.info('Using %i n_blobs to write the data.'% n_blobs)
                for ii in range(0, n_blobs):
                    logger.info('Reading %i of %i' % (ii + 1, n_blobs))

                    bob = self.container.read_blob(blob_dim,n_blob=ii)

                    #-----
                    #Using channels instead of frequency.
                    c_start = self.container.chan_start_idx + ii*blob_dim[self.freq_axis]
                    t_start = self.container.t_start + (c_start/self.selection_shape[self.freq_axis])*blob_dim[self.time_axis]
                    t_stop = t_start + blob_dim[self.time_axis]

                    # Reverse array if frequency axis is flipped
#                     if self.header['foff'] < 0:
#                         c_stop = self.selection_shape[self.freq_axis] - (c_start)%self.selection_shape[self.freq_axis]
#                         c_start = c_stop - blob_dim[self.freq_axis]
#                     else:
                    c_start = (c_start)%self.selection_shape[self.freq_axis]
                    c_stop = c_start + blob_dim[self.freq_axis]
                    #-----

                    logger.debug(t_start,t_stop,c_start,c_stop)

                    dset[t_start:t_stop,0,c_start:c_stop] = bob[:]

            else:

                logger.info('Using %i n_blobs to write the data.'% n_blobs)
                for ii in range(0, n_blobs):
                    logger.info('Reading %i of %i' % (ii + 1, n_blobs))

                    bob = self.container.read_blob(blob_dim,n_blob=ii)
                    t_start = self.container.t_start + ii*blob_dim[self.time_axis]

                    #This prevents issues when the last blob is smaller than the others in time
                    if (ii+1)*blob_dim[self.time_axis] > self.n_ints_in_file:
                        t_stop = self.n_ints_in_file
                    else:
                        t_stop = (ii+1)*blob_dim[self.time_axis]

                    dset[t_start:t_stop] = bob[:]