Exemplo n.º 1
0
def create_file(number):
    ## CREATE FILE TO SAVE RWFS AND EVENTS
    h5file = tb.open_file(out_dir + "/" + f"run_{run}_{number}_selected.h5",
                          mode="w",
                          title="selected wfs")
    selrwfs_group = h5file.create_group("/", "RD")
    event_info_group = h5file.create_group("/", "Run")

    PMTRWFs_Array = h5file.create_earray(selrwfs_group,
                                         "pmtrwf",
                                         tb.Int16Atom(),
                                         shape=(0, 3, 32000))
    SIPMRWFs_Array = h5file.create_earray(selrwfs_group,
                                          "sipmrwf",
                                          tb.Int16Atom(),
                                          shape=(0, 256, 800))

    class Event_Info(tb.IsDescription):
        event = tb.Int32Col()
        time = tb.UInt64Col()

    Event_Info_table = h5file.create_table(event_info_group, "events",
                                           Event_Info, "selected events")
    #EI = Event_Info_table.row

    return h5file, PMTRWFs_Array, SIPMRWFs_Array, Event_Info_table
Exemplo n.º 2
0
 def create_VLInt16Array(self, name, array, group):
     """Stores a homogenous variable length float array in a group"""
     self.h5file.create_vlarray(group,
                                 name,
                                 tables.Int16Atom(),
                                 "ragged array of floats",
                                 chunkshape = 512)
Exemplo n.º 3
0
def save_hdf5(qa_idxs, filename):
    '''save the processed data into a hdf5 file'''
    print("writing hdf5..")
    f = tables.open_file(filename, 'w')
    filters = tables.Filters(complib='blosc', complevel=5)
    earrays = f.create_earray(f.root,
                              'sentences',
                              tables.Int16Atom(),
                              shape=(0, ),
                              filters=filters)
    indices = f.create_table("/", 'indices', Index,
                             "a table of indices and lengths")
    count = 0
    pos = 0
    for qa in qa_idxs:
        q = qa[0]
        a = qa[1]
        earrays.append(np.array(q))
        earrays.append(np.array(a))
        ind = indices.row
        ind['pos'] = pos
        ind['q_len'] = len(q)
        ind['a_len'] = len(a)
        ind.append()
        pos += len(q) + len(a)
        count += 1
        if count % 1000000 == 0:
            print(count)
            sys.stdout.flush()
            indices.flush()
        elif count % 100000 == 0:
            sys.stdout.write('.')
            sys.stdout.flush()
    f.close()
Exemplo n.º 4
0
def add_recording_in_kwd(kwd,
                         recording_id=0,
                         downsample_factor=None,
                         nchannels=None,
                         nsamples=None,
                         data=None):
    if isinstance(kwd, string_types):
        kwd = open_file(kwd, 'a')
        to_close = True
    else:
        to_close = False

    if data is not None:
        nsamples, nchannels = data.shape

    recording = kwd.createGroup('/recordings', str(recording_id))
    recording._f_setAttr('downsample_factor', downsample_factor)

    dataset = kwd.createEArray(recording,
                               'data',
                               tb.Int16Atom(), (0, nchannels),
                               expectedrows=nsamples)

    # Add raw data.
    if data is not None:
        assert data.shape[1] == nchannels
        data_int16 = convert_dtype(data, np.int16)
        dataset.append(data_int16)

    kwd.createGroup(recording, 'filter')
    # TODO: filter
    if to_close:
        kwd.close()

    return kwd
Exemplo n.º 5
0
def buildRelationalMatrixToH5File(seqList,
                                  listFeature,
                                  filename="..//outputfile//" + version +
                                  "//encodingFile.h5"):
    fileh = tables.open_file(filename, mode="w")
    # Lay root cua file
    root = fileh.root
    sizeOfSeqList = len(seqList)

    atom = tables.Int16Atom()

    featureRowMatrix = fileh.create_earray(root, 'featureRowMatrix', atom,
                                           (0, sizeOfSeqList),
                                           "featureRowMatrix")
    seqRowMatrix = fileh.create_earray(root, 'seqRowMatrix', atom,
                                       (sizeOfSeqList, 0), "seqRowMatrix")

    for curFeatureIndex in range(len(listFeature)):
        print(curFeatureIndex)
        curFeature = listFeature[curFeatureIndex]
        arr = np.zeros((sizeOfSeqList, ), np.uint16)

        #dem so lan xuat hien
        for indexOfSeq in xrange(sizeOfSeqList):
            arr[indexOfSeq] = seqList[indexOfSeq].count(curFeature)

        # luu array vao HDF5 file
        featureRowMatrix.append([arr])
        seqRowMatrix.append(arr.reshape(sizeOfSeqList, 1))
    fileh.flush()
    fileh.close()
Exemplo n.º 6
0
def segmentCollection(arrayFile, arrayOut):

    DBi = tables.open_file(arrayFile, mode='r')
    array = DBi.root.resampled
    DFi = pandas.read_csv(tsvFile, sep='\t')

    IMG_SHAPE = array[0].shape
    nVols = len(array)

    DBo = tables.open_file(arrayOut, mode='w')
    filters = tables.Filters(
        complevel=1, complib='blosc:snappy'
    )  # 7.7sec / 1.2 GB   (14 sec 1015MB if precision is reduced)           140s 3.7GB
    images = DBo.create_carray(DBo.root,
                               'resampled',
                               atom=tables.Int16Atom(shape=IMG_SHAPE),
                               shape=(nVols, ),
                               filters=filters)

    print nVols
    for index in trange(nVols):
        vol = array[index]
        seg = segmentVol(vol)
        print 'seg: ', seg.min(), seg.mean(), seg.max()
        #images.append([seg])
        images[index] = seg
Exemplo n.º 7
0
    def add_raw_ephys_data(self, max_load=1e9):
        """

        :param rec_h5_obj: h5 file object from the record.
        :param max_load: number of integers to load at a given time for RAM limitations (default uses 8 GB).
        :return:
        """
        filesize = os.path.getsize(self.bin_fn)
        expct_rows = filesize / self.nchannels / 2
        data = {
        }  # dictionary to hold all of the data array objects (neural and metadata streams).
        for k, v in self.chan_idxes.iteritems():
            if self.run_group.__contains__(k):  # TODO: add overwritability!!!
                for k in self.chan_idxes.keys():
                    try:
                        data[k] = self.run_group._f_get_child(k)
                        logging.info(
                            'Raw data for stream {0} exists for run'.format(k))
                    except tables.NoSuchNodeError:
                        logging.error('No data exists for {0}'.format)
                        raise Exception('No data exists for {0}'.format)
                logging.info('Data already exists, using existing data.')
                return data

            data[k] = self.rec_h5_obj.create_earray(self.run_group,
                                                    name=k,
                                                    atom=tables.Int16Atom(),
                                                    shape=(0, len(v)),
                                                    title='raw %s edata' % k,
                                                    expectedrows=expct_rows)
            data[k]._v_attrs['bin_filename'] = self.bin_fn
            data[k]._v_attrs['acquisition_system'] = self.prms[
                'acquisition_system']
            data[k]._v_attrs['sampling_rate_Hz'] = self.prms['sample_rate']

        f = open(self.bin_fn, 'rb')
        ld_q = int(max_load) // int(
            self.nchannels
        )  # automatically floors this value. a ceil wouldn't be bad
        ld_iter = ld_q * self.nchannels  # calculate number of values to read in each iteration
        ld_count = 0
        logging.info('\t\tAdding raw run recording data to kwd...')
        while ld_count < filesize:
            arr = np.fromfile(f, np.int16, ld_iter)
            ld_count += ld_iter
            larr = arr.size / self.nchannels
            arr.shape = (larr, self.nchannels)
            for k, v in data.iteritems():
                idx = self.chan_idxes[k]
                v.append(arr[:, idx])
                v.flush()
            pc = float(ld_count) / float(filesize) * 100.
            if pc > 100.:
                pc = 100.
            logging.info('\t\t\t... %0.1d %% complete' % pc)
        f.close()
        self.run_group._v_attrs['bin_filename'] = str(self.bin_fn)
        self.rec_h5_obj.flush()
        return data
def add_recording_in_kwd(kwd,
                         recording_id=0,
                         downsample_factor=None,
                         nchannels=None,
                         nsamples=None,
                         data=None,
                         name=None,
                         sample_rate=None,
                         start_time=None,
                         start_sample=None,
                         bit_depth=None,
                         band_high=None,
                         band_low=None,
                         filter_name=''):
    if isinstance(kwd, string_types):
        kwd = open_file(kwd, 'a')
        to_close = True
    else:
        to_close = False

    if data is not None:
        nsamples, nchannels = data.shape

    try:
        recording = kwd.createGroup('/recordings', str(recording_id))
    except tb.NodeError:
        if to_close:
            kwd.close()
        return kwd
    recording._f_setAttr('downsample_factor', downsample_factor)

    dataset = kwd.createEArray(recording,
                               'data',
                               tb.Int16Atom(), (0, nchannels),
                               expectedrows=nsamples)

    # Add raw data.
    if data is not None:
        assert data.shape[1] == nchannels
        data_int16 = convert_dtype(data, np.int16)
        dataset.append(data_int16)

    # Add filter info.
    fil = kwd.createGroup(recording, 'filter')
    fil._f_setAttr('name', filter_name)

    # Copy recording info from kwik to kwd.
    recording._f_setAttr('name', name)
    recording._f_setAttr('start_time', start_time)
    recording._f_setAttr('start_sample', start_sample)
    recording._f_setAttr('sample_rate', sample_rate)
    recording._f_setAttr('bit_depth', bit_depth)
    recording._f_setAttr('band_high', band_high)
    recording._f_setAttr('band_low', band_low)

    if to_close:
        kwd.close()

    return kwd
Exemplo n.º 9
0
    def set_output_store(self, h5out, nmax, sp):

        # RD group
        RD = h5out.create_group(h5out.root, "RD")
        # MC group
        MC = h5out.root.MC
        # create a table to store Energy plane FEE
        self.fee_table = h5out.create_table(MC, "FEE", FEE,
                                            "EP-FEE parameters",
                                            tbl.filters("NOCOMPR"))
        # create vectors
        self.pmtrwf = h5out.create_earray(RD,
                                          "pmtrwf",
                                          atom=tb.Int16Atom(),
                                          shape=(0, sp.NPMT, sp.PMTWL),
                                          expectedrows=nmax,
                                          filters=tbl.filters(
                                              self.compression))

        self.pmtblr = h5out.create_earray(RD,
                                          "pmtblr",
                                          atom=tb.Int16Atom(),
                                          shape=(0, sp.NPMT, sp.PMTWL),
                                          expectedrows=nmax,
                                          filters=tbl.filters(
                                              self.compression))

        self.sipmrwf = h5out.create_earray(RD,
                                           "sipmrwf",
                                           atom=tb.Int16Atom(),
                                           shape=(0, sp.NSIPM, sp.SIPMWL),
                                           expectedrows=nmax,
                                           filters=tbl.filters(
                                               self.compression))

        # run group
        RUN = h5out.create_group(h5out.root, "Run")
        self.runInfot = h5out.create_table(RUN, "RunInfo", RunInfo, "Run info",
                                           tbl.filters("NOCOMPR"))
        self.evtsInfot = h5out.create_table(RUN, "events", EventInfo,
                                            "Events info",
                                            tbl.filters("NOCOMPR"))
Exemplo n.º 10
0
    def create_training_feature_array(self, image_filenames, seg_filenames, array_name, indices_list):

        nb_features_per_subject = 1000000
        nb_subjects = len(image_filenames)
        nb_src_modalities = len(image_filenames[0])
        print(image_filenames[0])
        tmpimg = nib.load(image_filenames[0])
        tmpseg = nib.load(seg_filenames[0])
        image_dtype = tmpimg.get_data_dtype()
        seg_dtype = tmpseg.get_data_dtype()

        feature_array = self.data_storage.create_earray(self.data_storage.root, array_name,
                                                 tables.Atom.from_dtype(image_dtype),
                                                 shape=(0,) + self.feature_shape + (1,),
                                                 expectedrows=np.prod(nb_features_per_subject)*nb_subjects)
        seg_array = self.data_storage.create_earray(self.data_storage.root, array_name+'_seg',
                                                 tables.Atom.from_dtype(seg_dtype),
                                                 shape=(0,) + self.feature_shape + (1,),
                                                 expectedrows=np.prod(nb_features_per_subject)*nb_subjects)

        index_array = self.data_storage.create_earray(self.data_storage.root, array_name+'_index',
                                                 tables.Int16Atom(), shape=(0, 3),
                                                 expectedrows=np.prod(nb_features_per_subject) * nb_subjects)
        if indices_list == None:
            print("No indices_list found")
            indices_list = list()
            for input_file, seg_file in zip(image_filenames, seg_filenames):
                (features, indices) = self.extract_training_patches(input_file, seg_file, intensity_threshold=0,
                                                           step_size=[1,1,1], indices=None)
                feature_array.append(features)
                index_array.append(indices)
                indices_list.append(indices)
                print(input_file + " features extract size ")
                print(features.shape)

        else:
            print("YES indices_list found")

            for input_file, seg_file, curr_indices in zip(image_filenames, seg_filenames, indices_list):
                print("curr indices shape is ")
                print(curr_indices.shape)
                (features, indices) = self.extract_training_patches(input_file, seg_file, intensity_threshold=0,
                                                           step_size=[1,1,1], indices=curr_indices)

                print("indices shape is ")
                print(indices.shape)
                feature_array.append(features)
                index_array.append(curr_indices)
                print(input_file + " features extract size ")
                print(features.shape)



        return feature_array, index_array, indices_list
Exemplo n.º 11
0
    def _add_datasets(self, group, j, track_times):
        # Create a table
        table = self.h5file.create_table(group,
                                         f'table{j}',
                                         Record,
                                         title=self.title,
                                         filters=None,
                                         track_times=track_times)
        # Get the record object associated with the new table
        d = table.row
        # Fill the table
        for i in range(self.nrows):
            d['var1'] = '%04d' % (self.nrows - i)
            d['var2'] = i
            d['var3'] = i * 2
            d.append()  # This injects the Record values
        # Flush the buffer for this table
        table.flush()

        # Create a couple of arrays in each group
        var1List = [x['var1'] for x in table.iterrows()]
        var3List = [x['var3'] for x in table.iterrows()]

        self.h5file.create_array(group,
                                 f'array{j}',
                                 var1List,
                                 f"col {j}",
                                 track_times=track_times)

        # Create CArrays as well
        self.h5file.create_carray(group,
                                  name=f'carray{j}',
                                  obj=var3List,
                                  title="col {}".format(j + 2),
                                  track_times=track_times)

        # Create EArrays as well
        ea = self.h5file.create_earray(group,
                                       f'earray{j}',
                                       tb.StringAtom(itemsize=4), (0, ),
                                       "col {}".format(j + 4),
                                       track_times=track_times)
        # And fill them with some values
        ea.append(var1List)

        # Finally VLArrays too
        vla = self.h5file.create_vlarray(group,
                                         f'vlarray{j}',
                                         tb.Int16Atom(),
                                         "col {}".format(j + 6),
                                         track_times=track_times)
        # And fill them with some values
        vla.append(var3List)
Exemplo n.º 12
0
def fetch_data():
    data_path = "/data/lisatmp3/Lessac_Blizzard2013_segmented/backup"
    partial_path = os.path.join("/Tmp/", os.getenv("USER"))
    hdf5_path = os.path.join(partial_path, "full_blizzard.h5")
    if not os.path.exists(hdf5_path):
        data_matches = []
        for root, dirnames, filenames in os.walk(data_path):
            for filename in fnmatch.filter(filenames, 'data_*.npy'):
                data_matches.append(os.path.join(root, filename))
        # sort in proper order
        data_matches = sorted(
            data_matches,
            key=lambda x: int(x.split("/")[-1].split("_")[-1][0]))
        # setup tables
        sz = 32000
        compression_filter = tables.Filters(complevel=5, complib='blosc')
        hdf5_file = tables.openFile(hdf5_path, mode='w')
        data = hdf5_file.createEArray(
            hdf5_file.root,
            'data',
            tables.Int16Atom(),
            shape=(0, sz),
            filters=compression_filter,
        )
        for na, f in enumerate(data_matches):
            print("Reading file %s" % (f))
            with open(f) as fp:
                # Array of arrays, ragged
                d = np.load(fp)
                for n, di in enumerate(d):
                    print("Processing line %i of %i" % (n, len(d)))
                    # Some of these are stereo??? wtf
                    if len(di.shape) < 2:
                        e = [r for r in range(0, len(di), sz)]
                        e.append(None)
                        starts = e[:-1]
                        stops = e[1:]
                        endpoints = zip(starts, stops)
                        for i, j in endpoints:
                            di_new = di[i:j]
                            # zero pad
                            if len(di_new) < sz:
                                di_large = np.zeros((sz, ), dtype='int16')
                                di_large[:len(di_new)] = di_new
                                di_new = di_large
                            data.append(di_new[None])
        hdf5_file.close()
    hdf5_file = tables.openFile(hdf5_path, mode='r')
    data = hdf5_file.root.data
    X = data
    return X
Exemplo n.º 13
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)
Exemplo n.º 14
0
 def __init__(self, filename, **kwargs):
     MasterBlock.__init__(self)
     self.filename = filename
     for arg, default in [
         ("node", "table"),
         ("expected_rows", 10**8),
         ("atom", tables.Int16Atom()),
         ("label", "stream"),
         ("metadata", {}),
     ]:
         setattr(self, arg, kwargs.pop(arg, default))
     assert not kwargs, "Invalid kwarg(s) in Hdf_saver: " + str(kwargs)
     if not isinstance(self.atom, tables.Atom):
         self.atom = tables.Atom.from_dtype(np.dtype(self.atom))
Exemplo n.º 15
0
def rwf_writer(h5out: tb.file.File,
               *,
               group_name: str,
               table_name: str,
               compression: str = 'ZLIB4',
               n_sensors: int,
               waveform_length: int) -> Callable:
    """
    Defines group and table where raw waveforms
    will be written.

    h5out           : pytables file
                      Output file where waveforms to be saved
    group_name      : str
                      Name of the group in h5in.root
                      Known options: RD, BLR
                      Setting to None will save directly in root
    table_name      : str
                      Name of the table
                      Known options: pmtrwf, pmtcwf, sipmrwf
    compression     : str
                      file compression
    n_sensors       : int
                      number of sensors in the table (shape[0])
    waveform_length : int
                      Number of samples per sensor
    """
    if group_name is None:
        rwf_group = h5out.root
    elif group_name in h5out.root:
        rwf_group = getattr(h5out.root, group_name)
    else:
        rwf_group = h5out.create_group(h5out.root, group_name)

    rwf_table = h5out.create_earray(rwf_group,
                                    table_name,
                                    atom=tb.Int16Atom(),
                                    shape=(0, n_sensors, waveform_length),
                                    filters=tbl.filters(compression))

    def write_rwf(waveform: np.ndarray) -> None:
        """
        Writes raw waveform arrays to file.
        waveform : np.ndarray
                   shape = (n_sensors, waveform_length) array
                   of sensor charge.
        """
        rwf_table.append(waveform.reshape(1, n_sensors, waveform_length))

    return write_rwf
Exemplo n.º 16
0
    def _create_table(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError(
                "Could not create table %s because of unknown dtype '%s'" %
                (name, example.dtype))  #+ ", of name: " % example.shape)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_earray(h5.root,
                                                 name,
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)
        elif type(example) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)
            self.tables[name] = h5.create_vlarray(h5.root,
                                                  name,
                                                  h5type,
                                                  filters=filters)
        self.types[name] = type(example)
Exemplo n.º 17
0
def main(options):
    gdb = genome.db.GenomeDB(assembly=options.assembly)

    chrom_dict = gdb.get_chromosome_dict()

    track = gdb.create_track(options.track_name[0])
    
    if options.dtype == "float32":
        atom = tables.Float32Atom()
    elif options.dtype == "int8":
        atom = tables.Int8Atom()
    elif options.dtype == "uint8":
        atom = tables.UInt8Atom()
    elif options.dtype == "int16":
        atom = tables.Int16Atom()
    else:
        raise NotImplementedError("datatype %s not implemented" % dtype)

    for path in options.filename:
        filename = path.split("/")[-1]

        if options.format in ("xb", "xbf"):
            # all of the chromosomes are in a single file...
            chrom_names = [chrom.name for chrom in gdb.get_chromosomes()]
        else:
            chrom_names = [extract_chrom_name(filename)]
            
        for chrom_name in chrom_names:
            if chrom_name not in chrom_dict:
                raise ValueError("unknown chromosome '%s'" % chrom_name)

            chrom = chrom_dict[chrom_name]
            sys.stderr.write(chrom_name + "\n")

            # create a chunked array with one dimension the length
            # of the chromosome
            shape = [chrom.length]
            carray = track.h5f.createCArray(track.h5f.root, chrom_name,
                                            atom, shape, filters=ZLIB_FILTER)

            # populate the array with data read from a file
            carray[:] = trackreader.read_file(path, chrom,
                                              dtype=options.dtype,
                                              format=options.format,
                                              pos_idx=options.pos_idx,
                                              val_idx=options.val_idx,
                                              strand=options.strand)

    track.close()
Exemplo n.º 18
0
def _make_lfp(raw_files_prefix: str,
              channels,
              lfp_filename,
              acquistion_frequency,
              create_file=False,
              target_freqency=1000,
              dtype=np.int16,
              expectedrows=0):
    """
    Creates a decimated copy of the acquired (or processed) binary file. Only saves specific channels indicated by the
    user. Target frequency is 1kHz, but this can be adjusted as required.

    Output is a .npy file (for now), as this can be easily converted as required.

    :param raw_files_prefix: Path to the binary files (separated by channels).
    :param channels: list of channels to save LFP copies.
    :param save_filename: filename for LFP file to save.
    :param acquistion_frequency: Sampling frequency of the original binary file.
    :param create_file: create lfp file?
    :param target_freqency: Desired sampling frequency of the LFP copy (default is 1 kHz).
    :return:
    """
    logging.info('Making LFP for {}. Loading data...'.format(raw_files_prefix))
    downsample_factor = acquistion_frequency // target_freqency
    lfp_freq = acquistion_frequency / downsample_factor

    if os.path.exists(lfp_filename) and create_file:
        raise ValueError('LFP file already exists.')
    elif create_file:
        with tb.open_file(lfp_filename, 'w') as f:
            n = f.create_group('/', 'lfp')
            n._f_setattr('Frequency_hz', lfp_freq)
            for ch in channels:
                f.create_earray('/lfp/',
                                'ch_{0:04n}'.format(ch),
                                tb.Int16Atom(),
                                shape=(0, ),
                                expectedrows=expectedrows // downsample_factor,
                                filters=LFP_FILTER)
    logging.info("writing LFP to {}".format(lfp_filename))
    with tb.open_file(lfp_filename, 'r+') as f:
        for ch in tqdm(channels, unit='chan', desc='LFP save'):
            fn = _gen_channel_fn(raw_files_prefix, ch)
            a = np.fromfile(fn, dtype=dtype)
            b = decimate(a, downsample_factor, zero_phase=True)
            ch_array = f.get_node('/lfp/ch_{0:04n}'.format(ch))
            ch_array.append(b)
    logging.info('Complete.')
Exemplo n.º 19
0
def buildTrainingSet(DF, segImages):

	sparseImages = SparseImageSource('/data/datasets/lung/resampled_order1/segmentedNonzero.h5')

	outArray = '/ssd/camsB.h5'
	outTsv = outArray.replace('.h5', '.tsv')

	camImageDF = pandas.DataFrame()



	DBo = tables.open_file(outArray, mode='w')
	filters = tables.Filters(complevel=6, complib='blosc:snappy')      # 7.7sec / 1.2 GB   (14 sec 1015MB if precision is reduced)           140s 3.7GB
	#filters = None
	cams = DBo.create_earray(DBo.root, 'cams', atom=tables.Int16Atom(shape=CAM_SHAPE), shape=(0,), expectedrows=len(DF), filters=filters)




	for index, row in tqdm(DF.iterrows(), total=len(DF)):
		print row
		cancer = row['cancer']

		# slow
		#image, imgNum = getImage(segImages, row)
		#camImage = makeCamImgFromImage(image, cubeSize)

		# faster
		#image = sparseImages.getImageFromSparse(row)
		#camImage = makeCamImgFromImage(image, cubeSize)

		# should be fastest
		cubes, positions = sparseImages.getCubesAndPositions(row, posType='pos')
		camImage = makeCamImageFromCubes(cubes, positions)


		print 'CAM IMAGE SHAPE %s    mean %s   max %s     ==========', camImage.shape, camImage.mean(), camImage.max()

		if camImage.mean() == 0: print 'THIS IMAGE IS BAD ========================'


		cam = forceImageIntoShape(camImage, CAM_SHAPE)

		cams.append([cam])
		camImageDF = camImageDF.append(row)

		camImageDF.to_csv(outTsv, sep='\t')
Exemplo n.º 20
0
def _make_tables(hdf5_file, n_sensors, compression="ZLIB4"):
    compr = tbl_filters(compression)
    trigger_group = hdf5_file.create_group(hdf5_file.root, 'Trigger')
    make_table = partial(hdf5_file.create_table, trigger_group, filters=compr)

    trg_type = make_table('trigger', table_formats.TriggerType, "Trigger Type")

    array_name = "events"
    trg_channels = hdf5_file.create_earray(trigger_group,
                                           array_name,
                                           atom=tb.Int16Atom(),
                                           shape=(0, n_sensors),
                                           filters=compr)

    trg_tables = trg_type, trg_channels

    return trg_tables
Exemplo n.º 21
0
    def _create_table(self, name, example, parent=None):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        """
        h5 = self.h5
        filters = tables.Filters(complevel=self.compression_level,
                                 complib='zlib',
                                 shuffle=True)
        if parent is None:
            parent = h5.root

        if type(example) == str:
            h5type = tables.VLStringAtom()
            h5.createVLArray(parent, name, h5type, filters=filters)
            return
        if type(example) == dict:
            self.h5.createGroup(parent, name)
            return
        #If we get here then we're dealing with numpy arrays
        example = np.asarray(example)

        #MODIFICATION: appended name everywhere and introduced string
        type_map = {
            np.dtype(np.float64).name: tables.Float64Atom(),
            np.dtype(np.float32).name: tables.Float32Atom(),
            np.dtype(np.int).name: tables.Int64Atom(),
            np.dtype(np.int8).name: tables.Int8Atom(),
            np.dtype(np.uint8).name: tables.UInt8Atom(),
            np.dtype(np.int16).name: tables.Int16Atom(),
            np.dtype(np.uint16).name: tables.UInt16Atom(),
            np.dtype(np.int32).name: tables.Int32Atom(),
            np.dtype(np.uint32).name: tables.UInt32Atom(),
            np.dtype(np.bool).name: tables.BoolAtom(),
            # Maximal string length of 128 per string - change if needed
            'string32': tables.StringAtom(128)
        }

        try:
            h5type = type_map[example.dtype.name]
            h5dim = (0, ) + example.shape
            h5.createEArray(parent, name, h5type, h5dim, filters=filters)
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)
Exemplo n.º 22
0
def rwf_writer(file,
               *,
               group_name      : 'options: RD, BLR',
               table_name      : 'options: pmtrwf, pmtcwf, sipmrwf',
               compression     = 'ZLIB4',
               n_sensors       : 'number of pmts or sipms',
               waveform_length : 'length of pmt or sipm waveform_length'):
    try:                       rwf_group = getattr          (file.root, group_name)
    except tb.NoSuchNodeError: rwf_group = file.create_group(file.root, group_name)

    rwf_table = file.create_earray(rwf_group,
                                   table_name,
                                   atom    = tb.Int16Atom(),
                                   shape   = (0, n_sensors, waveform_length),
                                   filters = tbl.filters(compression))
    def write_rwf(waveform : 'np.array: RWF, CWF, SiPM'):
        rwf_table.append(waveform.reshape(1, n_sensors, waveform_length))
    return write_rwf
Exemplo n.º 23
0
def edf2hdf5(fn):
    ef = _edflib.Edfreader(fn)
    # nf = tables.createFile(fn+'.h5')
    nsigs = ef.signals_in_file
    print("nsigs:", nsigs)

    nsamples = [ef.samples_in_file(ii) for ii in range(nsigs)]
    nsample0 = nsamples[0]
    nsamples = np.array(nsamples)

    if any(nsamples != nsample0):
        raise Exception(
            "Assumption error: should be equal rate or shoudl all something else"
        )

    print("nsample0", nsample0)

    bigarr = np.empty(nsample0, dtype='int32')
    big16arr = np.empty(nsample0, dtype='int16')

    ii = 5
    _edflib.read_int_samples(ef.handle, ii, N, bigarr)

    big16arr[:] = bigarr

    compfilter = tables.Filters(complevel=6, complib='zlib')

    h5 = tables.openFile('tstint16.h5',
                         mode="w",
                         title="test int16 file",
                         filters=compfilter)

    atom16 = tables.Int16Atom()
    shape = big16arr.shape

    dataset = h5.createCArray(h5.root,
                              'int16 array',
                              atom16,
                              shape,
                              filters=compfilter)
    dataset[:] = big16arr
    h5.flush()
    h5.close()
Exemplo n.º 24
0
    def _setup_output(self):
        outputfile = self.LPU_id + '_out'
        if self.record_neuron:
            self.outputfile_I = tables.openFile(outputfile + 'I.h5', 'w')
            self.outputfile_I.createEArray(
                "/", "array",
                tables.Float64Atom() if self.dtype == np.double else
                tables.Float32Atom(), (0, self.num_neurons))

            self.outputfile_V = tables.openFile(outputfile + 'V.h5', 'w')
            self.outputfile_V.createEArray(
                "/", "array",
                tables.Float64Atom() if self.dtype == np.double else
                tables.Float32Atom(), (0, self.num_neurons))

        if self.record_microvilli:
            self.outputfile_X0 = tables.openFile(outputfile + 'X0.h5', 'w')
            self.outputfile_X0.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))

            self.outputfile_X1 = tables.openFile(outputfile + 'X1.h5', 'w')
            self.outputfile_X1.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))

            self.outputfile_X2 = tables.openFile(outputfile + 'X2.h5', 'w')
            self.outputfile_X2.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))

            self.outputfile_X3 = tables.openFile(outputfile + 'X3.h5', 'w')
            self.outputfile_X3.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))

            self.outputfile_X4 = tables.openFile(outputfile + 'X4.h5', 'w')
            self.outputfile_X4.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))

            self.outputfile_X5 = tables.openFile(outputfile + 'X5.h5', 'w')
            self.outputfile_X5.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))

            self.outputfile_X6 = tables.openFile(outputfile + 'X6.h5', 'w')
            self.outputfile_X6.createEArray("/", "array", tables.Int16Atom(),
                                            (0, self.num_neurons))
Exemplo n.º 25
0
def initfile(h5name, ncsf, q_down, include_times=True):
    """
    initializes a h5 file to store converted data
    """
    adbitvolts = ncsf.header['ADBitVolts']
    timestep = ncsf.timestep

    chname = ncsf.header['AcqEntName']

    h5f = tables.open_file(h5name, 'w')

    h5f.create_group('/', 'data')
    h5f.create_earray('/data', 'rawdata', tables.Int16Atom(), [0])
    h5f.root.data.rawdata.set_attr('ADBitVolts', adbitvolts)
    h5f.root.data.rawdata.set_attr('timestep', timestep)
    h5f.root.data.rawdata.set_attr('Q', q_down)
    h5f.root.data.rawdata.set_attr('AcqEntName', chname)

    if include_times:
        h5f.create_earray('/', 'time', tables.UInt64Atom(), [0])

    return h5f
Exemplo n.º 26
0
    def SaveToHDF(self, Filename, loc="/"):
        """A generic function for saving ForceFields / Topologies / Conformations to H5 files.  Certain types of data cannot be stored as simple arrays, so these are the exceptions (if statements) in this function."""

        # check h5 file doesn't already exist
        CheckIfFileExists(Filename)

        F = tables.File(Filename, 'a')

        for key, data in self.iteritems():
            #print(key,data)
            try:  #This checks if the list is homogenous and can be stored in an array data type (ie a square tensor).  If not, we need VLArray
                TEMP = np.array(data)
                if TEMP.dtype == np.dtype("object"):
                    raise ValueError  #This check is necessary for Numpy 1.6.0 and greater, which allow inhomogeneous lists to be converted to dtype arrays.
            except ValueError:
                F.createVLArray(loc, key, tables.Int16Atom())
                for x in data:
                    F.getNode(loc, key).append(x)
                continue
            SaveEntryAsCArray(np.array(data), key, F0=F, loc=loc)
        F.flush()
        F.close()
Exemplo n.º 27
0
            if (listSubStr[curIndex]
                    in listSubStr[indexArr]) or (listSubStr[indexArr]
                                                 in listSubStr[curIndex]):
                listRet.append(curIndex)

    #chi chua 1 index duy nhat la chinh no ==> khong co arr nao giong no
    if (len(listRet) == 1):
        return None
    else:
        return listRet


#-------------------------BUILD MATRIX AND SAVE TO HDF5 FILES--------------------------
sizeOfSeqList = len(seqList)

atom = tables.Int16Atom()
# Use ``a`` as the object type for the enlargeable array.
featureRowMatrix = fileh.create_earray(root, 'featureRowMatrix', atom,
                                       (0, sizeOfSeqList), "featureRowMatrix")
seqRowMatrix = fileh.create_earray(root, 'seqRowMatrix', atom,
                                   (sizeOfSeqList, 0), "seqRowMatrix")
count = 0

for curFeatureIndex in range(len(listSubStr)):
    count += 1
    print(count)
    curFeature = listSubStr[curFeatureIndex]
    arr = np.zeros((sizeOfSeqList, ), np.uint16)

    #dem so lan xuat hien
    for indexOfSeq in xrange(sizeOfSeqList):
Exemplo n.º 28
0
    def _create_table_list(self, name, example):
        """
        Create a new table within the HDF file, where the tables shape and its
        datatype are determined by *example*.
        The modified version for creating table with appendList
        """
        type_map = {
            np.dtype(np.float64): tables.Float64Atom(),
            np.dtype(np.float32): tables.Float32Atom(),
            np.dtype(np.int): tables.Int64Atom(),
            np.dtype(np.int8): tables.Int8Atom(),
            np.dtype(np.uint8): tables.UInt8Atom(),
            np.dtype(np.int16): tables.Int16Atom(),
            np.dtype(np.uint16): tables.UInt16Atom(),
            np.dtype(np.int32): tables.Int32Atom(),
            np.dtype(np.uint32): tables.UInt32Atom(),
            np.dtype(np.bool): tables.BoolAtom(),
        }

        try:
            if type(example) == np.ndarray:
                h5type = type_map[example.dtype]
            elif type(example) == list and type(example[0]) == str:
                h5type = tables.VLStringAtom()
        except KeyError:
            raise TypeError("Don't know how to handle dtype '%s'" %
                            example.dtype)

        if type(example) == np.ndarray:
            h5dim = (0, ) + example.shape[1:]

            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)

            nodes = h5.list_nodes(h5.root)

            nmpt = name.replace('.', '/\n')
            nmpt = nmpt.split('\n')

            path = '/'
            for kay in range(len(nmpt) - 1):
                #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1])
                try:
                    h5.is_visible_node(path + nmpt[kay][:-1])
                except:
                    h5.create_group(path, nmpt[kay][:-1])
                path += nmpt[kay]

            self.tables[name] = h5.create_earray(path,
                                                 nmpt[-1],
                                                 h5type,
                                                 h5dim,
                                                 filters=filters)

        elif type(example) == list and type(example[0]) == str:
            h5 = self.h5
            filters = tables.Filters(complevel=self.compression_level,
                                     complib='zlib',
                                     shuffle=True)

            nodes = h5.list_nodes(h5.root)

            nmpt = name.replace('.', '/\n')
            nmpt = nmpt.split('\n')

            path = '/'
            for kay in range(len(nmpt) - 1):
                #if not path+nmpt[kay][:-1] in str(nodes): h5.create_group(path,nmpt[kay][:-1])
                try:
                    h5.is_visible_node(path + nmpt[kay][:-1])
                except:
                    h5.create_group(path, nmpt[kay][:-1])
                path += nmpt[kay]

            self.tables[name] = h5.create_vlarray(path,
                                                  nmpt[-1],
                                                  h5type,
                                                  filters=filters)

        self.types[name] = type(example)
Exemplo n.º 29
0
    data = model.get_data(dataset)
    dataptr = data.root.test_img if test_set else data.root.train_img
    batch_size = 64

    # test model image by image
    batch_round = 0
    nseq = network_params['sequence_length']
    nsoundstream = network_params['audio_gen']['nsoundstream']
    n_v1_write = model.n_v1_write
    v1_gaussian = network_params['v1_gaussian']
    section_len = int(network_params['audio_gen']['section_len_msec'] / 1000. * network_params['fs'])
    nmodulation = network_params['audio_gen']['nmodulation']
    soundstream_len = int(nmodulation * section_len)
    soundscape_len = int(soundstream_len * network_params['audio_gen']['soundscape_len_by_stream_len']) * nseq
    float_dtype = tables.Float32Atom()
    ss_dtype = tables.Int16Atom()
    img_dtype = tables.UInt8Atom()

    set_text = '_test' if test_set else '_train'
    hdf5_file = tables.open_file('data/gendata_' + config_id + set_text + '.hdf5', mode='w')
    cs_storage = hdf5_file.create_earray(hdf5_file.root, 'cs', img_dtype, shape=[0, nseq, model.img_h, model.img_w])
    if test_set:
        ss_storage = hdf5_file.create_earray(hdf5_file.root, 'soundscapes', ss_dtype, shape=[0, soundscape_len, 2])
    img_storage = hdf5_file.create_earray(hdf5_file.root, 'gen_img', img_dtype, shape=[0, model.img_h, model.img_w])
    inp_img_storage = hdf5_file.create_earray(hdf5_file.root, 'inp_img', img_dtype, shape=[0, model.img_h, model.img_w])
    df_storage = hdf5_file.create_earray(hdf5_file.root, 'df', float_dtype, shape=[0, nseq, nsoundstream, soundstream_len])
    da_storage = hdf5_file.create_earray(hdf5_file.root, 'da', float_dtype, shape=[0, nseq, nsoundstream, soundstream_len])
    dazim_storage = hdf5_file.create_earray(hdf5_file.root, 'dazim', float_dtype, shape=[0, nseq, nsoundstream, soundstream_len])
    gx_storage = hdf5_file.create_earray(hdf5_file.root, 'gx', float_dtype, shape=[0, nseq, n_v1_write])
    gy_storage = hdf5_file.create_earray(hdf5_file.root, 'gy', float_dtype, shape=[0, nseq, n_v1_write])
    delta_storage = hdf5_file.create_earray(hdf5_file.root, 'delta', float_dtype, shape=[0, nseq, n_v1_write])
Exemplo n.º 30
0
def ANASTASIA(argv=sys.argv):
    """
    ANASTASIA driver
    """
    CFP = configure(argv)

    if CFP["INFO"]:
        print(__doc__)

    # Increate thresholds by 1% for safety
    PMT_NOISE_CUT_RAW = CFP["PMT_NOISE_CUT_RAW"] * 1.01
    PMT_NOISE_CUT_BLR = CFP["PMT_NOISE_CUT_BLR"] * 1.01
    SIPM_ZS_METHOD = CFP["SIPM_ZS_METHOD"]
    SIPM_NOISE_CUT = CFP["SIPM_NOISE_CUT"]
    COMPRESSION = CFP["COMPRESSION"]

    with tb.open_file(CFP["FILE_IN"], "r+",
                      filters=tbl.filters(CFP["COMPRESSION"])) as h5in:
        pmtblr = h5in.root.RD.pmtblr
        pmtcwf = h5in.root.RD.pmtcwf
        sipmrwf = h5in.root.RD.sipmrwf
        pmtdf = DB.DataPMT()
        sipmdf = DB.DataSiPM()

        NEVT, NPMT, PMTWL = pmtcwf.shape
        NEVT, NSIPM, SIPMWL = sipmrwf.shape

        print_configuration({"# PMT": NPMT, "PMT WL": PMTWL,
                             "# SiPM": NSIPM, "SIPM WL": SIPMWL,
                             "# events in DST": NEVT})

        # Create instance of the noise sampler and compute noise thresholds
        sipms_noise_sampler_ = SiPMsNoiseSampler(SIPMWL)

        if SIPM_ZS_METHOD == "FRACTION":
            sipms_thresholds_ = sipms_noise_sampler_.ComputeThresholds(
                                SIPM_NOISE_CUT, sipmdf['adc_to_pes'])
        else:
            sipms_thresholds_ = np.ones(NSIPM) * SIPM_NOISE_CUT

        if "/ZS" not in h5in:
            h5in.create_group(h5in.root, "ZS")
        if "/ZS/PMT" in h5in:
            h5in.remove_node("/ZS", "PMT")
        if "/ZS/BLR" in h5in:
            h5in.remove_node("/ZS", "BLR")
        if "/ZS/SiPM" in h5in:
            h5in.remove_node("/ZS", "SiPM")

        # Notice the Int16, not Float32! bad for compression
        pmt_zs_ = h5in.create_earray(h5in.root.ZS, "PMT",
                                     atom=tb.Int16Atom(),
                                     shape=(0, NPMT, PMTWL),
                                     expectedrows=NEVT,
                                     filters=tbl.filters(COMPRESSION))

        blr_zs_ = h5in.create_earray(h5in.root.ZS, "BLR",
                                     atom=tb.Int16Atom(),
                                     shape=(0, NPMT, PMTWL),
                                     expectedrows=NEVT,
                                     filters=tbl.filters(COMPRESSION))

        sipm_zs_ = h5in.create_earray(h5in.root.ZS, "SiPM",
                                      atom=tb.Int16Atom(),
                                      shape=(0, NSIPM, SIPMWL),
                                      expectedrows=NEVT,
                                      filters=tbl.filters(COMPRESSION))

        adc_to_pes = abs(1.0/pmtdf["adc_to_pes"].reshape(NPMT, 1))
        t0 = time()
        for i in define_event_loop(CFP, NEVT):
            sumpmt = np.sum(pmtcwf[i] * adc_to_pes, axis=0)
            selection = np.tile(sumpmt > PMT_NOISE_CUT_RAW, (NPMT, 1))
            pmtzs = np.where(selection, pmtcwf[i], 0)

            blr = wfm.subtract_baseline(FE.CEILING - pmtblr[i])
            sumpmt = np.sum(blr * adc_to_pes, axis=0)
            selection = np.tile(sumpmt > PMT_NOISE_CUT_BLR, (NPMT, 1))
            blrzs = np.where(selection, blr, 0)

            pmt_zs_.append(pmtzs[np.newaxis])
            blr_zs_.append(blrzs[np.newaxis])

            sipmzs = sipmrwf[i]
            if "/MC" not in h5in:
                sipmzs = wfm.subtract_baseline(sipmzs, 200)
            sipmzs = wfm.noise_suppression(sipmzs, sipms_thresholds_)
            sipm_zs_.append(sipmzs[np.newaxis])

        t1 = time()
        dt = t1-t0

        print("ANASTASIA has run over {} events in {} seconds".format(i+1, dt))
    print("Leaving ANASTASIA. Safe travels!")