示例#1
0
def append_to_hdf5_dataset(mels, stfts, filename):
    inputs = mels, stfts
    short_names = 'mels', 'stfts'
    filepath = "data/%s/%s" % (filename, "data")
    atoms = [tables.Float16Atom(), tables.Float16Atom()]

    tables_file = tables.open_file(filepath, mode='a')
    for short_name, inp, atom in zip(short_names, inputs, atoms):
        tables_file.get_node("/%s" % short_name).append(inp)
    tables_file.close()
示例#2
0
def create_hdf5_file(max_freq_length, filename):
    short_names = 'mels', 'stfts'
    filepath = "data/%s/%s" % (filename, "data")

    atoms = [tables.Float16Atom(), tables.Float16Atom()]
    sizes = [(0, max_freq_length, 80 * audio.r),
             (0, max_freq_length, 1025 * audio.r)]
    tables_file = tables.open_file(filepath, mode='w')
    for short_name, atom, size in zip(short_names, atoms, sizes):
        print("Creating earray at /root/%s" % short_name)
        tables_file.create_earray(tables_file.root, short_name, atom, size)
    print("Tables file created: %s" % tables_file)
    tables_file.close()
示例#3
0
def genotype_minimac2hdf5(data_path, id, save_path, study_name):
    df = pd.read_csv(data_path,
                     header=None,
                     index_col=None,
                     sep='\t',
                     dtype=np.float16)
    data = df.as_matrix()
    data = data.T
    print(data.shape)
    print('Saving chunk...{}'.format(
        os.path.join(save_path, 'genotype',
                     str(id) + '_' + study_name + '.h5')))
    h5_gen_file = tables.open_file(os.path.join(
        save_path, 'genotype',
        str(id) + '_' + study_name + '.h5'),
                                   'w',
                                   title=study_name)

    atom = tables.Float16Atom()
    genotype = h5_gen_file.create_carray(h5_gen_file.root,
                                         'genotype',
                                         atom, (data.shape),
                                         title='Genotype',
                                         filters=tables.Filters(
                                             complevel=9, complib='zlib'))
    genotype[:] = data
    h5_gen_file.close()
    os.remove(data_path)
示例#4
0
    def generate_hdf5_file(self, config):

        # Open HDF5 file
        if isfile(self.out_name) and not config['overwrite_data']:
            return self.out_name

        h5file = tables.open_file(self.out_name, mode="w", title=self.out_name)
        img_dtype = tables.Float16Atom()
        print("Generating HDF5 file")

        new_size = (config['resize']['height'], config['resize']['width'])
        img_shape = (0, ) + new_size + (1, )

        for prefix, df in {
                'train': self.train,
                'val': self.validation,
                'test': self.test
        }.items():

            group = h5file.create_group("/", prefix)
            img_storage = h5file.create_earray(group,
                                               'images',
                                               img_dtype,
                                               shape=img_shape)
            label_storage = h5file.create_earray(group,
                                                 'labels',
                                                 img_dtype,
                                                 shape=img_shape)
            filename_storage = h5file.create_earray(group,
                                                    'filenames',
                                                    tables.StringAtom(256),
                                                    shape=(0, ))

            for img_path, meta_data in df.iterrows():

                print(img_path)
                filename_storage.append([img_path])

                # Load image and labels
                img_arr = dicom.read_file(img_path, force=True).pixel_array
                label_arr = np.array(Image.open(img_path + self.label_ext))

                img_arr_resized = resize(img_arr, new_size)
                label_arr_resized = resize(label_arr, new_size)

                img_arr_pp = normalize(img_arr_resized).astype(np.float16)
                label_arr_pp = (label_arr_resized / 255.).astype(np.float16)

                img_arr_pp = np.expand_dims(img_arr_pp, 2)
                label_arr_pp = np.expand_dims(label_arr_pp, 2)

                img_storage.append(
                    img_arr_pp[None]
                )  # this syntax prepends a singleton dimension to the image
                label_storage.append(label_arr_pp[None])

        h5file.close()
        return self.out_name
def create_data_file(out_file, channels, samples, image_shape):
    hdf5_file = tables.open_file(out_file, mode='w')
    filters = tables.Filters(complevel=5, complib='blosc')
    data_shape = tuple([0, channels] + list(image_shape))
    label_shape = tuple([0, 1] + list(image_shape))
    data_storage = hdf5_file.create_earray(hdf5_file.root,
                                           'data',
                                           tables.Float16Atom(),
                                           shape=data_shape,
                                           filters=filters,
                                           expectedrows=samples)
    label_storage = hdf5_file.create_earray(hdf5_file.root,
                                            'truth',
                                            tables.UInt8Atom(),
                                            shape=label_shape,
                                            filters=filters,
                                            expectedrows=samples)
    affine_storage = hdf5_file.create_earray(hdf5_file.root,
                                             'affine',
                                             tables.Float16Atom(),
                                             shape=(0, 4, 4),
                                             filters=filters,
                                             expectedrows=samples)
    return hdf5_file, data_storage, label_storage, affine_storage
示例#6
0
    def _write_genotype(self, trityper_data):

        number_of_chunks = (trityper_data.number_of_variants //
                            self.chunk_size) + 1
        for chunk_index in xrange(number_of_chunks):
            start = chunk_index * self.chunk_size
            end = min((chunk_index + 1) * self.chunk_size,
                      trityper_data.number_of_variants)
            dosage_matrix = np.empty(
                (end - start, len(trityper_data.individuals_data)))

            print "Loading {}-{} variants to write to chunk {} out of {} total chunks".format(
                start, end, chunk_index, number_of_chunks)

            # Get the dosages for every variant within the
            for chunked_variant_index, variant_index in enumerate(
                    xrange(start, end)):
                dosage_matrix[chunked_variant_index,
                              ] = trityper_data.get_dosages(variant_index)

            # Drop every variant that did not have two alleles.
            bad_variant_indices_chunk = list()
            for bad_variant_index in self.bad_variant_indices:
                if start <= bad_variant_index < end:
                    bad_variant_indices_chunk.append(bad_variant_index - start)
            print bad_variant_indices_chunk
            dosage_matrix = np.delete(dosage_matrix, bad_variant_indices_chunk,
                                      0)

            h5_gen_file = tables.open_file(os.path.join(
                self.genotype_directory_path,
                str(chunk_index) + '_' + str(self.study_name) + '.h5'),
                                           'w',
                                           title=self.study_name)

            atom = tables.Float16Atom()
            genotype = h5_gen_file.create_carray(h5_gen_file.root,
                                                 'genotype',
                                                 atom, (dosage_matrix.shape),
                                                 title='Genotype',
                                                 filters=tables.Filters(
                                                     complevel=9,
                                                     complib='zlib'))

            genotype[:] = dosage_matrix
            h5_gen_file.close()
        print >> sys.stderr, "Discarded {} variants that did not have two alleles".format(
            len(self.bad_variant_indices))
示例#7
0
    def save_hdf5_chunk(self, data, out, name):
        print('Saving chunk...{}'.format(
            os.path.join(out, 'genotype',
                         str(self.hdf5_iter) + '_' + name + '.h5')))
        h5_gen_file = tables.open_file(os.path.join(
            out, 'genotype',
            str(self.hdf5_iter) + '_' + name + '.h5'),
                                       'w',
                                       title=name)

        atom = tables.Float16Atom()  # TODO (low) check data format
        genotype = h5_gen_file.create_carray(h5_gen_file.root,
                                             'genotype',
                                             atom, (data.shape),
                                             title='Genotype',
                                             filters=self.pytable_filter)
        genotype[:] = data
        h5_gen_file.close()
        genotype = None
        data = None
        gc.collect()
        self.hdf5_iter += 1
示例#8
0
    def makeSoltab(self,
                   solset=None,
                   soltype=None,
                   soltab=None,
                   axesNames=[],
                   axesVals=[],
                   chunkShape=None,
                   vals=None,
                   weights=None,
                   parmdbType=None):
        """
        Create a solution-table into a specified solution-set
        Keyword arguments:
        solset -- a solution-set name (String) or a Group instance
        soltype -- solution-type (e.g. amplitude, phase)
        soltab -- the solution-table name (String) if not specified is generated from the solution-type
        axesNames -- list with the axes names
        axesVals -- list with the axes values
        chunkShape -- list with the chunk shape
        vals --
        weights -- 0->FLAGGED, 1->MAX_WEIGHT
        parmdbType -- original parmdb solution type
        """

        if soltype == None:
            raise Exception(
                "Solution-type not specified while adding a solution-table.")

        # checks on the solset
        if solset == None:
            raise Exception(
                "Solution-set not specified while adding a solution-table.")
        if type(solset) is str:
            solset = self.getSolset(solset)
        solsetName = solset._v_name

        if not solsetName in self.getSolsets().keys():
            raise Exception("Solution-set " + solsetName + " doesn't exist.")

        # checks on the soltab
        soltabName = soltab
        if type(soltabName) is str and not re.match(r'^[A-Za-z0-9_-]+$',
                                                    soltabName):
            logging.warning(
                'Solution-table ' + soltabName +
                ' contains unsuported characters. Use [A-Za-z0-9_-]. Switching to default.'
            )
            soltabName = None

        if soltabName in self.getSoltabs(solset).keys():
            logging.warning('Solution-table ' + soltabName +
                            ' already present. Switching to default.')
            soltabName = None

        if soltabName == None:
            soltabName = self._fisrtAvailSoltabName(solset, soltype)

        logging.info('Creating a new solution-table: ' + soltabName + '.')
        soltab = self.H.create_group("/" + solsetName,
                                     soltabName,
                                     title=soltype)
        soltab._v_attrs['parmdb_type'] = parmdbType

        # create axes
        assert len(axesNames) == len(axesVals)
        dim = []

        #        newChunkShape = []
        for i, axisName in enumerate(axesNames):
            #axis = self.H.create_carray('/'+solsetName+'/'+soltabName, axisName,\
            #        obj=axesVals[i], chunkshape=[len(axesVals[i])])
            axis = self.H.create_array('/' + solsetName + '/' + soltabName,
                                       axisName,
                                       obj=axesVals[i])
            axis.attrs['h5parm_version'] = _version.__h5parmVersion__
            dim.append(len(axesVals[i]))


#            # Put time/freq on max lenght for better performances
#            if chunkShape == None:
#                if axisName == 'time':
#                    newChunkShape.append(100)
#                elif axisName == 'freq':
#                    newChunkShape.append(10)
#                else:
#                    newChunkShape.append(1)
#        if chunkShape == None: chunkShape = newChunkShape
#        logging.debug('Chunk shape: '+str(chunkShape))

# check if the axes were in the proper order
        assert dim == list(vals.shape)
        assert dim == list(weights.shape)

        # create the val/weight Carrays
        #val = self.H.create_carray('/'+solsetName+'/'+soltabName, 'val', obj=vals.astype(np.float64), chunkshape=None, atom=tables.Float64Atom())
        #weight = self.H.create_carray('/'+solsetName+'/'+soltabName, 'weight', obj=weights.astype(np.float16), chunkshape=None, atom=tables.Float16Atom())
        # array do not have compression but are much faster
        val = self.H.create_array('/' + solsetName + '/' + soltabName,
                                  'val',
                                  obj=vals.astype(np.float64),
                                  atom=tables.Float64Atom())
        weight = self.H.create_array('/' + solsetName + '/' + soltabName,
                                     'weight',
                                     obj=weights.astype(np.float16),
                                     atom=tables.Float16Atom())
        val.attrs['VERSION_H5PARM'] = _version.__h5parmVersion__
        val.attrs['AXES'] = ','.join([axisName for axisName in axesNames])
        weight.attrs['VERSION_H5PARM'] = _version.__h5parmVersion__
        weight.attrs['AXES'] = ','.join([axisName for axisName in axesNames])

        return soltab
示例#9
0
文件: h5parm.py 项目: tammojan/losoto
    def makeSoltab(self, soltype=None, soltabName=None,
            axesNames = [], axesVals = [], chunkShape=None, vals=None,
            weights=None, parmdbType=''):
        """
        Create a Soltab into this solset.

        Parameters
        ----------
        soltype : str
            Solution-type (e.g. amplitude, phase)
        soltabName : str, optional
            The solution-table name, if not specified is generated from the solution-type
        axesNames : list
            List with the axes names
        axesVals : list
            List with the axes values (each is a separate list)
        chunkShape : list, optional
            List with the chunk shape
        vals : numpy array
            Array with shape given by the axesVals lenghts
        weights : numpy array
            Same shape of the vals array
            0->FLAGGED, 1->MAX_WEIGHT
        parmdbType : str
            Original parmdb solution type

        Returns
        -------
        soltab obj
            Newly created soltab object
        """

        if soltype is None:
            raise Exception("Solution-type not specified while adding a solution-table.")

        # checks on the soltab
        if type(soltabName) is str and not re.match(r'^[A-Za-z0-9_-]+$', soltabName):
            logging.warning('Solution-table '+soltabName+' contains unsuported characters. Use [A-Za-z0-9_-]. Switching to default.')
            soltabName = None

        if soltabName in self.getSoltabNames():
            logging.warning('Solution-table '+soltabName+' already present. Switching to default.')
            soltabName = None

        if soltabName is None:
            soltabName = self._fisrtAvailSoltabName(soltype)

        logging.info('Creating a new solution-table: '+soltabName+'.')

        # check input
        assert len(axesNames) == len(axesVals)
        dim = []
        for i, axisName in enumerate(axesNames):
            dim.append(len(axesVals[i]))
        assert dim == list(vals.shape)
        assert dim == list(weights.shape)

        # if input is OK, create table
        soltab = self.obj._v_file.create_group("/"+self.name, soltabName, title=soltype)
        soltab._v_attrs['parmdb_type'] = parmdbType
        for i, axisName in enumerate(axesNames):
            #axis = self.obj._v_file.create_carray('/'+self.name+'/'+soltabName, axisName,\
            #        obj=axesVals[i], chunkshape=[len(axesVals[i])])
            axis = self.obj._v_file.create_array('/'+self.name+'/'+soltabName, axisName, obj=axesVals[i])

        # create the val/weight Carrays
        #val = self.obj._v_file.create_carray('/'+self.name+'/'+soltabName, 'val', obj=vals.astype(np.float64), chunkshape=None, atom=tables.Float64Atom())
        #weight = self.obj._v_file.create_carray('/'+self.name+'/'+soltabName, 'weight', obj=weights.astype(np.float16), chunkshape=None, atom=tables.Float16Atom())
        # array do not have compression but are much faster
        val = self.obj._v_file.create_array('/'+self.name+'/'+soltabName, 'val', obj=vals.astype(np.float64), atom=tables.Float64Atom())
        weight = self.obj._v_file.create_array('/'+self.name+'/'+soltabName, 'weight', obj=weights.astype(np.float16), atom=tables.Float16Atom())
        val.attrs['AXES'] = ','.join([axisName for axisName in axesNames])
        weight.attrs['AXES'] = ','.join([axisName for axisName in axesNames])

        return Soltab(soltab)
示例#10
0
        chunk.to_hdf(os.path.join(args.g, 'individuals',
                                  args.study_name + '.h5'),
                     key='individuals',
                     format='table',
                     min_itemsize=25,
                     complib='zlib',
                     complevel=9)

        for g_file in os.listdir(os.path.join(args.g, 'genotype')):
            print(g_file)

            data = h5py.File(os.path.join(args.g, 'genotype', g_file),
                             'r')['genotype'][...]
            data = data[:, keep_index]

            h5_gen_file = tables.open_file(os.path.join(
                args.g, 'genotype', g_file),
                                           'w',
                                           title=args.study_name)

            atom = tables.Float16Atom()
            genotype = h5_gen_file.create_carray(h5_gen_file.root,
                                                 'genotype',
                                                 atom, (data.shape),
                                                 title='Genotype',
                                                 filters=tables.Filters(
                                                     complevel=9,
                                                     complib='zlib'))
            genotype[:] = data
            h5_gen_file.close()
示例#11
0
def open_h5(f, hid_size):
    f = tables.open_file(f, mode='w')
    atom = tables.Float16Atom()
    array = f.create_earray(f.root, 'data', atom, (0, hid_size))
    return f, array
示例#12
0
def soltab_swap_freq_time(soltab):
    """Swap the frequency and time axes to make the frequency the fastest varying axis

    Parameters
    ----------
    soltab : Soltab
        Soltab object which will be changed
    """
    vals = soltab.getValues(retAxesVals=False)
    weights = soltab.getValues(weight=True, retAxesVals=False)

    if vals.shape != weights.shape:
        raise RuntimeError("Shape of weights differs from shape of values")

    axesnames = soltab.getAxesNames()
    axesnums = list(range(len(axesnames)))

    if 'freq' not in axesnames or 'time' not in axesnames:
        print("Nothing to be done, no freq + time axes in " + soltab.name)
        return

    freqindex = axesnames.index('freq')
    timeindex = axesnames.index('time')

    if freqindex > timeindex:
        print("Nothing to be done, freq already varies fastest in " +
              soltab.name)
        return

    # Swap the time and frequency axis in the axes names and numbers
    axesnums[freqindex], axesnums[timeindex] = axesnums[timeindex], axesnums[
        freqindex]
    axesnames[freqindex], axesnames[timeindex] = axesnames[
        timeindex], axesnames[freqindex]

    # Swap the axes order in the metadata
    soltab.obj.val._f_setattr("AXES", ",".join(axesnames))
    # Transpose the values
    vals = vals.transpose(axesnums)
    weights = weights.transpose(axesnums)

    # Need to remove the array from the file because changing shape is not supported by pytables
    # Store the attributes in a dict
    attrs = soltab.obj.val._v_attrs
    attrsdict = {}
    for attrname in attrs._f_list():
        attrsdict[attrname] = attrs[attrname]
    soltab.obj.val._f_remove()
    soltab.obj.weight._f_remove()
    # Create new val here
    soltab.obj._v_file.create_array(soltab.obj._v_pathname,
                                    'val',
                                    obj=vals,
                                    atom=tables.Float64Atom())
    soltab.obj._v_file.create_array(soltab.obj._v_pathname,
                                    'weight',
                                    obj=weights,
                                    atom=tables.Float16Atom())
    # Restore the original attributes
    for attrname in attrsdict:
        soltab.obj.val._f_setattr(attrname, attrsdict[attrname])

    soltab.addHistory(
        "Swap frequency and time axes to make frequency vary fastest")

    soltab.obj._f_flush()

    print("Successfully swapped frequency and time axes in " + soltab.name)
    return E, t, w, dt, w0, frogtrace.reshape(-1)


if __name__ == '__main__':
    E, t, _, _, _, frogtrace_flat = retrieve_data(plot_frog_bool=False,
                                                  print_size=True)

    # data for input
    E_real = np.real(E)
    E_imag = np.imag(E)

    # create file
    hdf5_file = tables.open_file('frogtrainingdata.hdf5', mode='w')
    frog_image_f = hdf5_file.create_earray(hdf5_file.root,
                                           'frog',
                                           tables.Float16Atom(),
                                           shape=(0, len(frogtrace_flat)))
    E_real_f = hdf5_file.create_earray(hdf5_file.root,
                                       'E_real',
                                       tables.Float16Atom(),
                                       shape=(0, len(E_real)))
    E_imag_f = hdf5_file.create_earray(hdf5_file.root,
                                       'E_imag',
                                       tables.Float16Atom(),
                                       shape=(0, len(E_imag)))
    hdf5_file.close()

    # create file
    hdf5_file = tables.open_file('frogtestdata.hdf5', mode='w')
    frog_image_f = hdf5_file.create_earray(hdf5_file.root,
                                           'frog',
def getIntensityProfile(masked_image_file,
                        skeletons_file,
                        intensities_file,
                        width_resampling=15,
                        length_resampling=131,
                        min_num_skel=100,
                        smooth_win=11,
                        pol_degree=3,
                        width_percentage=0.5,
                        save_maps=False):

    min_num_skel = min_num_skel_defaults(skeletons_file,
                                         min_num_skel=min_num_skel)

    assert smooth_win > pol_degree
    assert min_num_skel > 0
    assert 0 < width_percentage < 1

    # we want to use symetrical distance centered in the skeleton
    if length_resampling % 2 == 0:
        length_resampling += 1
    if width_resampling % 2 == 0:
        width_resampling += 1

    # get the limits to be averaged from the intensity map
    if save_maps:
        width_win_ind = getWidthWinLimits(width_resampling, width_percentage)
    else:
        width_win_ind = (0, width_resampling)

    # filters for the tables structures
    table_filters = tables.Filters(complevel=5,
                                   complib='zlib',
                                   shuffle=True,
                                   fletcher32=True)

    # Get a reduced version of the trajectories_data table with only the valid skeletons.
    # The rows of this new table are going to be saved into skeletons_file
    trajectories_data_valid = setIntMapIndexes(skeletons_file, min_num_skel)

    # let's save this new table into the intensities file
    with tables.File(intensities_file, 'w') as fid:
        fid.create_table('/',
                         'trajectories_data_valid',
                         obj=trajectories_data_valid.to_records(index=False),
                         filters=table_filters)

    tot_rows = len(trajectories_data_valid)
    if tot_rows == 0:
        with tables.File(intensities_file, "r+") as int_file_id:
            # nothing to do here let's save empty data and go out
            worm_int_avg_tab = int_file_id.create_array(
                "/", "straighten_worm_intensity_median", obj=np.zeros(0))
            worm_int_avg_tab._v_attrs['has_finished'] = 1
        return

    with tables.File(masked_image_file, 'r')  as mask_fid, \
            tables.File(skeletons_file, 'r') as ske_file_id, \
            tables.File(intensities_file, "r+") as int_file_id:

        # pointer to the compressed videos
        mask_dataset = mask_fid.get_node("/mask")

        # pointer to skeletons
        skel_tab = ske_file_id.get_node('/skeleton')
        skel_width_tab = ske_file_id.get_node('/width_midbody')

        filters = tables.Filters(complevel=5, complib='zlib', shuffle=True)

        # we are using Float16 to save space, I am assuing the intensities are
        # between uint8
        worm_int_avg_tab = int_file_id.create_carray(
            "/",
            "straighten_worm_intensity_median",
            tables.Float16Atom(dflt=np.nan), (tot_rows, length_resampling),
            chunkshape=(1, length_resampling),
            filters=table_filters)

        worm_int_avg_tab._v_attrs['has_finished'] = 0
        worm_int_avg_tab.attrs['width_win_ind'] = width_win_ind

        if save_maps:
            worm_int_tab = int_file_id.create_carray(
                "/",
                "straighten_worm_intensity",
                tables.Float16Atom(dflt=np.nan),
                (tot_rows, length_resampling, width_resampling),
                chunkshape=(1, length_resampling, width_resampling),
                filters=table_filters)

        grouped_frames = trajectories_data_valid.groupby('frame_number')
        # variables used to report progress
        base_name = skeletons_file.rpartition('.')[0].rpartition(
            os.sep)[-1].rpartition('_')[0]
        progressTime = TimeCounter('Obtaining intensity maps.',
                                   len(grouped_frames))

        for frame, frame_data in grouped_frames:
            img = mask_dataset[frame, :, :]
            for ii, row_data in frame_data.iterrows():
                skeleton_id = int(row_data['skeleton_id'])
                worm_index = int(row_data['worm_index_joined'])
                int_map_id = int(row_data['int_map_id'])

                # read ROI and skeleton, and put them in the same coordinates
                # map
                worm_img, roi_corner = getWormROI(img, row_data['coord_x'],
                                                  row_data['coord_y'],
                                                  row_data['roi_size'])
                skeleton = skel_tab[skeleton_id, :, :] - roi_corner

                half_width = skel_width_tab[skeleton_id] / 2
                assert not np.isnan(skeleton[0, 0])

                skel_smooth = smoothSkeletons(
                    skeleton,
                    length_resampling=length_resampling,
                    smooth_win=smooth_win,
                    pol_degree=pol_degree)
                straighten_worm, grid_x, grid_y = getStraightenWormInt(
                    worm_img,
                    skel_smooth,
                    half_width=half_width,
                    width_resampling=width_resampling)

                # if you use the mean it is better to do not use float16
                int_avg = np.median(
                    straighten_worm[width_win_ind[0]:width_win_ind[1], :],
                    axis=0)

                worm_int_avg_tab[int_map_id] = int_avg

                # only save the full map if it is specified by the user
                if save_maps:
                    worm_int_tab[int_map_id] = straighten_worm.T

            if frame % 500 == 0:
                progress_str = progressTime.get_str(frame)
                print_flush(base_name + ' ' + progress_str)

        worm_int_avg_tab._v_attrs['has_finished'] = 1