Exemplo n.º 1
0
    def _create_fit_datasets(self):
        """
        Creates the HDF5 fit dataset. pycroscopy requires that the h5 group, guess dataset,
        corresponding spectroscopic and position datasets be created and populated at this point.
        This function will create the HDF5 dataset for the fit and link it to same ancillary datasets as the guess.
        The fit dataset will NOT be populated here but will instead be populated using the __setData function
        """

        if self._h5_guess is None or self.h5_results_grp is None:
            warn('Need to guess before fitting!')
            return
        """
        Once the guess is complete, the last_pixel attribute will be set to complete for the group.
        Once the fit is initiated, during the creation of the status dataset, this last_pixel
        attribute will be used and it wil make the fit look like it was already complete. Which is not the case.
        This is a problem of doing two processes within the same group. 
        Until all legacy is removed, we will simply reset the last_pixel attribute.
        """
        self.h5_results_grp.attrs['last_pixel'] = 0

        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        # Create the fit dataset as an empty dataset of the same size and dtype
        # as the guess.
        # Also automatically links in the ancillary datasets.
        self._h5_fit = USIDataset(
            create_empty_dataset(self._h5_guess, dtype=sho32, dset_name='Fit'))

        self._h5_fit.file.flush()

        if self.verbose and self.mpi_rank == 0:
            print('Finished creating Fit dataset')
Exemplo n.º 2
0
def write_results(h5_group, dataset=None, attributes=None, process_name=None):

    found_valid_dataset = False
    if dataset is not None:
        if isinstance(dataset, Dataset):
            found_valid_dataset = True
    found_valid_attributes = False

    if attributes is not None:
        if isinstance(attributes, dict):
            if len(attributes) > 0:
                found_valid_attributes = True
    if not (found_valid_dataset or found_valid_attributes):
        raise ValueError(
            'results should contain at least a sidpy Dataset or a dictionary in results'
        )
    log_name = 'Log_'
    if process_name is not None:
        log_name = log_name + process_name
    log_group = create_indexed_group(h5_group, log_name)

    if found_valid_dataset:
        write_nsid_dataset(dataset, log_group)
    if found_valid_attributes:
        write_simple_attrs(log_group, flatten_dict(attributes))

    return log_group
Exemplo n.º 3
0
    def create_hdf5_file(self,
                         append_path='',
                         grp_name='Measurement',
                         overwrite=False):
        if not append_path:
            h5_path = os.path.join(self.directory,
                                   self.basename.replace('.txt', '.h5'))
            if os.path.exists(h5_path):
                if not overwrite:
                    raise FileExistsError(
                        'This file already exists). Set attribute overwrite to True'
                    )
                else:
                    print('Overwriting file', h5_path)
                    #os.remove(h5_path)

            self.h5_f = h5py.File(h5_path, mode='w')

        else:
            if not os.path.exists(append_path):
                raise Exception('File does not exist. Check pathname.')
            self.h5_f = h5py.File(append_path, mode='r+')

        self.h5_meas_grp = create_indexed_group(self.h5_f, grp_name)

        write_simple_attrs(self.h5_meas_grp, self.params_dictionary)

        return
Exemplo n.º 4
0
def make_simple_nsid_dataset(*args, **kwargs):
    """
    h5 dataset which is fully pyNSID compatible
    """
    with tempfile.TemporaryDirectory() as tmp_dir:
        file_path = tmp_dir + 'nsid_simple.h5'
    h5_file = h5py.File(file_path, 'a')
    h5_group = h5_file.create_group('MyGroup')

    dsetnames = kwargs.get("dsetnames", ['data'])
    dsetshapes = kwargs.get("dsetshapes")
    if dsetshapes is None:
        dsetshapes = [(2, 3) for i in range(len(dsetnames))]
    for i, d in enumerate(dsetnames):
        data = np.random.normal(size=dsetshapes[i])
        h5_dataset = h5_group.create_dataset(d, data=data)

        attrs_to_write = {
            'quantity': 'quantity',
            'units': 'units',
            'pyNSID_version': 'version',
            'main_data_name': 'title',
            'data_type': 'UNKNOWN',
            'modality': 'modality',
            'source': 'test'
        }
        if len(args) > 0:
            for k, v in args[0].items():
                if k in attrs_to_write:
                    attrs_to_write[k] = v

        write_simple_attrs(h5_dataset, attrs_to_write)

        dims = {
            0:
            h5_group.create_dataset("a{}".format(i),
                                    data=np.arange(data.shape[0])),
            1:
            h5_group.create_dataset("b{}".format(i),
                                    data=np.arange(data.shape[1]))
        }
        for dim, this_dim_dset in dims.items():
            name = this_dim_dset.name.split('/')[-1]
            attrs_to_write = {
                'name': name,
                'units': 'units',
                'quantity': 'quantity',
                'dimension_type': 'dimension_type.name',
                'nsid_version': 'test'
            }

            write_simple_attrs(this_dim_dset, attrs_to_write)

            this_dim_dset.make_scale(name)
            h5_dataset.dims[dim].label = name
            h5_dataset.dims[dim].attach_scale(this_dim_dset)
    return h5_file
Exemplo n.º 5
0
    def _translate_force_map(self, h5_meas_grp):
        """
        Reads the scan image + force map from the proprietary file and writes it to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # First lets write the image into the measurement group that has already been created:
        image_parms = self.meta_data['Ciao image list']
        quantity = image_parms.pop('Image Data_2')
        image_mat = self._read_image_layer(image_parms)
        h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
        write_main_dataset(
            h5_chan_grp,
            np.reshape(image_mat, (-1, 1)),
            'Raw_Data',
            # Quantity and Units needs to be fixed by someone who understands these files better
            quantity,
            'a. u.',
            [
                Dimension('X', 'nm', image_parms['Samps/line']),
                Dimension('Y', 'nm', image_parms['Number of lines'])
            ],
            Dimension('single', 'a. u.', 1),
            dtype=np.float32,
            compression='gzip')
        # Think about standardizing attributes for rows and columns
        write_simple_attrs(h5_chan_grp, image_parms)

        # Now work on the force map:
        force_map_parms = self.meta_data['Ciao force image list']
        quantity = force_map_parms.pop('Image Data_4')
        force_map_vec = self._read_data_vector(force_map_parms)
        tr_rt = [
            int(item) for item in force_map_parms['Samps/line'].split(' ')
        ]
        force_map_2d = force_map_vec.reshape(image_mat.size, np.sum(tr_rt))
        h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
        write_main_dataset(
            h5_chan_grp,
            force_map_2d,
            'Raw_Data',
            # Quantity and Units needs to be fixed by someone who understands these files better
            quantity,
            'a. u.',
            [
                Dimension('X', 'nm', image_parms['Samps/line']),
                Dimension('Y', 'nm', image_parms['Number of lines'])
            ],
            Dimension('Z', 'nm', int(np.sum(tr_rt))),
            dtype=np.float32,
            compression='gzip')
        # Think about standardizing attributes
        write_simple_attrs(h5_chan_grp, force_map_parms)
Exemplo n.º 6
0
def create_results_group(h5_main, tool_name, h5_parent_group=None):
    """
    Creates a h5py.Group object autoindexed and named as 'DatasetName-ToolName_00x'

    Parameters
    ----------
    h5_main : h5py.Dataset object
        Reference to the dataset based on which the process / analysis is being performed
    tool_name : string / unicode
        Name of the Process / Analysis applied to h5_main
    h5_parent_group : h5py.Group, optional. Default = None
        Parent group under which the results group will be created. Use this
        option to write results into a new HDF5 file. By default, results will
        be written into the same group containing `h5_main`

    Returns
    -------
    h5_group : :class:`h5py.Group`
        Results group which can now house the results datasets

    """
    if not isinstance(h5_main, h5py.Dataset):
        raise TypeError('h5_main should be a h5py.Dataset object')
    if h5_parent_group is not None:
        if not isinstance(h5_parent_group, (h5py.File, h5py.Group)):
            raise TypeError("'h5_parent_group' should either be a h5py.File "
                            "or h5py.Group object")
    else:
        h5_parent_group = h5_main.parent

    tool_name = validate_single_string_arg(tool_name, 'tool_name')

    if '-' in tool_name:
        warn('tool_name should not contain the "-" character. Reformatted name from:{} to '
             '{}'.format(tool_name, tool_name.replace('-', '_')))
    tool_name = tool_name.replace('-', '_')

    group_name = h5_main.name.split('/')[-1] + '-' + tool_name + '_'
    group_name = assign_group_index(h5_parent_group, group_name)

    h5_group = h5_parent_group.create_group(group_name)

    write_book_keeping_attrs(h5_group)

    # Also add some basic attributes like source and tool name. This will allow relaxation of nomenclature restrictions:
    # this are NOT being used right now but will be in the subsequent versions of pyUSID
    write_simple_attrs(h5_group, {'tool': tool_name, 'num_source_dsets': 1})
    # in this case, there is only one source
    if h5_parent_group.file == h5_main.file:
        for dset_ind, dset in enumerate([h5_main]):
            h5_group.attrs['source_' + '{:03d}'.format(dset_ind)] = dset.ref

    return h5_group
Exemplo n.º 7
0
    def _translate_gwy(self, file_path, meas_grp):
        """

        Parameters
        ----------
        file_path
        meas_grp

        For more information on the .gwy file format visit the link below -
        http://gwyddion.net/documentation/user-guide-en/gwyfile-format.html
        """

        # Need to build a set of channels to test against and a function-level variable to write to
        channels = {}

        # Read the data in from the specified file
        gwy_data = gwyfile.load(file_path)
        for obj in gwy_data:
            gwy_key = obj.split('/')
            try:
                # if the second index of the gwy_key can be cast into an int then
                # it needs to be processed either as an image or a graph

                int(gwy_key[1])

                if gwy_key[2] == 'graph':
                    # graph processing
                    self.global_parms['data_type'] = 'GwyddionGWY_' + 'Graph'
                    channels = self._translate_graph(meas_grp, gwy_data, obj,
                                                     channels)
                elif obj.endswith('data'):
                    self.global_parms['data_type'] = 'GwyddionGWY_' + 'Image'
                    channels = self._translate_image_stack(
                        meas_grp, gwy_data, obj, channels)
                else:
                    continue
            except ValueError:
                # if the second index of the gwy_key cannot be cast into an int
                # then it needs to be processed wither as a spectra, volume or xyz

                if gwy_key[1] == 'sps':
                    self.global_parms['data_type'] = 'GwyddionGWY_' + 'Spectra'
                    channels = self._translate_spectra(meas_grp, gwy_data, obj,
                                                       channels)
                elif gwy_key[1] == 'brick':
                    self.global_parms['data_type'] = 'GwyddionGWY_' + 'Volume'
                    channels = self._translate_volume(meas_grp, gwy_data, obj,
                                                      channels)
                elif gwy_key[1] == 'xyz':
                    self.global_parms['data_type'] = 'GwyddionGWY_' + 'XYZ'
                    channels = self._translate_xyz(meas_grp, gwy_data, obj,
                                                   channels)
        write_simple_attrs(meas_grp.parent, self.global_parms)
Exemplo n.º 8
0
 def _write_source_dset_provenance(self):
     """
     Writes path of HDF5 file and path of h5_main to the results group
     if results are being written to a new HDF5 file
     """
     if self.h5_main.file == self.h5_results_grp.file:
         return
     write_simple_attrs(
         self.h5_results_grp, {
             'source_file_path': self.h5_main.file.filename,
             'source_dataset_path': self.h5_main.name
         })
Exemplo n.º 9
0
    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name,
                                            h5_parent_group=self._h5_target_group)
        self.h5_results_grp = h5_svd_group
        self._write_source_dset_provenance()
        

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))

        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name,
                                                           data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
Exemplo n.º 10
0
    def _write_dset_attributes(h5_dset, attrs, print_log=False):
        """
        Writes attributes to a h5py dataset

        Parameters
        ----------
        h5_dset : h5py.Dataset object
            h5py dataset to which the attributes will be written to.
            This function handles region references as well
        attrs : dict
            Dictionary containing the attributes as key-value pairs
        print_log : bool, optional. Default=False
            Whether or not to print debugging statements
        """
        if not isinstance(attrs, dict):
            HDFwriter.__safe_abort(h5_dset.file)
            raise TypeError(
                'attrs should be a dictionary but is instead of type '
                '{}'.format(type(attrs)))
        if not isinstance(h5_dset, h5py.Dataset):
            raise TypeError(
                'h5_dset should be a h5py Dataset object but is instead of type '
                '{}. UNABLE to safely abort'.format(type(h5_dset)))

        # First, set aside the complicated attribute(s)
        attr_dict = attrs.copy()
        labels_dict = attr_dict.pop('labels', None)

        # Next, write the simple ones using a centralized function
        write_simple_attrs(h5_dset,
                           attr_dict,
                           obj_type='dataset',
                           verbose=print_log)

        if labels_dict is None:
            if print_log:
                print('Finished writing all attributes of dataset')
            return

        if isinstance(labels_dict, (tuple, list)):
            # What if the labels dictionary is just a list of names? make a dictionary using the names
            # This is the most that can be done.
            labels_dict = attempt_reg_ref_build(h5_dset,
                                                labels_dict,
                                                verbose=print_log)

        if len(labels_dict) == 0:
            if print_log:
                warn('No region references to write')
            return
        # Now, handle the region references attribute:
        write_region_references(h5_dset, labels_dict, verbose=print_log)
Exemplo n.º 11
0
    def _translate_image_stack(self, h5_meas_grp):
        """
        Reads the scan images from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp,
                                                         Dimension(
                                                             'single', 'a. u.',
                                                             1),
                                                         is_spectral=True)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                break

        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [
            Dimension('X', 'nm', layer_info['Samps/line']),
            Dimension('Y', 'nm', layer_info['Number of lines'])
        ],
                                                       is_spectral=False)

        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_2')
                data = self._read_image_layer(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(
                    h5_chan_grp,
                    np.reshape(data, (-1, 1)),
                    'Raw_Data',
                    # Quantity and Units needs to be fixed by someone who understands these files better
                    quantity,
                    'a. u.',
                    None,
                    None,
                    dtype=np.float32,
                    compression='gzip',
                    h5_pos_inds=h5_pos_inds,
                    h5_pos_vals=h5_pos_vals,
                    h5_spec_inds=h5_spec_inds,
                    h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes for rows and columns
                write_simple_attrs(h5_chan_grp, layer_info)
Exemplo n.º 12
0
    def _translate_force_curve(self, h5_meas_grp):
        """
        Reads the force curves from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp,
                                                       Dimension(
                                                           'single', 'a. u.',
                                                           1),
                                                       is_spectral=False)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                break
        tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')]

        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
            h5_meas_grp,
            Dimension('Z', 'nm', int(np.sum(tr_rt))),
            is_spectral=True)

        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_4')
                data = self._read_data_vector(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(
                    h5_chan_grp,
                    np.expand_dims(data, axis=0),
                    'Raw_Data',
                    # Quantity and Units needs to be fixed by someone who understands these files better
                    quantity,
                    'a. u.',
                    None,
                    None,
                    dtype=np.float32,
                    compression='gzip',
                    h5_pos_inds=h5_pos_inds,
                    h5_pos_vals=h5_pos_vals,
                    h5_spec_inds=h5_spec_inds,
                    h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes
                write_simple_attrs(h5_chan_grp, layer_info)
Exemplo n.º 13
0
def write_book_keeping_attrs(h5_obj):
    """
    Writes basic book-keeping and posterity related attributes to groups created in pyNSID such as machine id,
    pyNSID version, timestamp.

    Parameters
    ----------
    h5_obj : :class:`h5py.Dataset`, :class:`h5py.Group`, or :class:`h5py.File`
        Object to which basic book-keeping attributes need to be written

    """
    hut.write_book_keeping_attrs(h5_obj)
    hut.write_simple_attrs(h5_obj, {'pyNSID_version': py_nsid_version})
Exemplo n.º 14
0
def write_pynsid_book_keeping_attrs(h5_object):
    """
    Writes book-keeping information to the HDF5 object

    Parameters
    ----------
    h5_object

    Returns
    -------

    """
    write_book_keeping_attrs(h5_object)
    write_simple_attrs(h5_object, {'pyNSID_version': pynsid_version})
Exemplo n.º 15
0
    def _create_guess_datasets(self):
        """
        Creates the h5 group, guess dataset, corresponding spectroscopic datasets and also
        links the guess dataset to the spectroscopic datasets.
        """
        self.h5_results_grp = create_results_group(
            self.h5_main,
            self.process_name,
            h5_parent_group=self._h5_target_group)
        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        # If writing to a new HDF5 file:
        # Add back the data_type attribute - still being used in the visualizer
        if self.h5_results_grp.file != self.h5_main.file:
            write_simple_attrs(
                self.h5_results_grp.file,
                {'data_type': get_attr(self.h5_main.file, 'data_type')})

        ret_vals = write_reduced_anc_dsets(self.h5_results_grp,
                                           self.h5_main.h5_spec_inds,
                                           self.h5_main.h5_spec_vals,
                                           self._fit_dim_name,
                                           verbose=self.verbose)

        h5_sho_inds, h5_sho_vals = ret_vals

        self._h5_guess = write_main_dataset(
            self.h5_results_grp, (self.h5_main.shape[0], self.num_udvs_steps),
            'Guess',
            'SHO',
            'compound',
            None,
            None,
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            h5_spec_inds=h5_sho_inds,
            h5_spec_vals=h5_sho_vals,
            chunks=(1, self.num_udvs_steps),
            dtype=sho32,
            main_dset_attrs=self.parms_dict,
            verbose=self.verbose)

        # Does not make sense to propagate region refs - nobody uses them
        # copy_region_refs(self.h5_main, self._h5_guess)

        self._h5_guess.file.flush()

        if self.verbose and self.mpi_rank == 0:
            print('Finished creating Guess dataset')
Exemplo n.º 16
0
    def _create_root_image(self, image_path):
        """
        Create the Groups and Datasets for a single root image

        Parameters
        ----------
        image_path : str
            Path to the image file

        Returns
        -------
        None
        """
        image, image_parms = read_dm3(image_path)
        if image.ndim == 3:
            image = np.sum(image, axis=0)
        '''
        Create the Measurement and Channel Groups to hold the
        image Datasets
        '''
        meas_grp = create_indexed_group(self.h5_f, 'Measurement')

        chan_grp = create_indexed_group(meas_grp, 'Channel')
        '''
        Set the Measurement Group attributes
        '''
        usize, vsize = image.shape
        image_parms['image_size_u'] = usize
        image_parms['image_size_v'] = vsize
        image_parms['translator'] = 'OneView'
        image_parms['num_pixels'] = image.size
        write_simple_attrs(meas_grp, image_parms)
        '''
        Build Spectroscopic and Position dimensions
        '''
        spec_desc = Dimension('Image', 'a.u.', [1])
        pos_desc = [
            Dimension('X', 'pixel', np.arange(image.shape[0])),
            Dimension('Y', 'pixel', np.arange(image.shape[1]))
        ]

        h5_image = write_main_dataset(chan_grp, np.reshape(image, (-1, 1)),
                                      'Raw_Data', 'Intensity', 'a.u.',
                                      pos_desc, spec_desc)

        self.root_image_list.append(h5_image)
Exemplo n.º 17
0
    def translate(self, file_path, *args, **kwargs):
        # Two kinds of files:
        # 1. Simple GSF files -> use metadata, data = gsf_read(file_path)
        # 2. Native .gwy files -> use the gwyfile package
        # I have a notebook that shows how such data can be read.
        # Create the .h5 file from the input file
        if not isinstance(file_path, (str, unicode)):
            raise TypeError('file_path should be a string!')
        if not (file_path.endswith('.gsf') or file_path.endswith('.gwy')):
            # TODO: Gwyddion is weird, it doesn't append the file extension some times.
            # In theory, you could identify the kind of file by looking at the header (line 38 in gsf_read()).
            # Ideally the header check should be used instead of the extension check
            raise ValueError('file_path must have a .gsf or .gwy extension!')

        file_path = path.abspath(file_path)
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        h5_path = path.join(folder_path, base_name + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        self.h5_file = h5py.File(h5_path, 'w')
        """
        Setup the global parameters
        ---------------------------
        translator: Gywddion
        data_type: depends on file type
                    GwyddionGSF_<gsf_meta['title']>
                    or
                    GwyddionGWY_<gwy_meta['title']>
        """
        self.global_parms = dict()
        self.global_parms['translator'] = 'Gwyddion'

        # Create the measurement group
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')

        if file_path.endswith('.gsf'):
            self._translate_gsf(file_path, meas_grp)

        if file_path.endswith('gwy'):
            self._translate_gwy(file_path, meas_grp)

        write_simple_attrs(self.h5_file, self.global_parms)

        return h5_path
Exemplo n.º 18
0
def get_dim_dict(dims: Tuple[int]) -> Dict[int, h5py.Dataset]:
    h5_f = h5py.File('test2.h5', 'a')
    h5_group = h5_f.create_group('MyGroup2')
    dim_dict = {}
    names = ['X', 'Y', 'Z', 'F']
    for i, d in enumerate(dims):
        dim_dict[i] = h5_group.create_dataset(names[i], data=np.arange(d))
    for dim, this_dim_dset in dim_dict.items():
        name = this_dim_dset.name.split('/')[-1]
        attrs_to_write = {
            'name': name,
            'units': 'units',
            'quantity': 'quantity',
            'dimension_type': 'dimension_type.name',
            'nsid_version': 'test'
        }
        write_simple_attrs(this_dim_dset, attrs_to_write)
    return dim_dict
Exemplo n.º 19
0
def make_nsid_dataset_no_dim_attached():
    """
    except for the dimensions attached the h5 dataset which is fully pyNSID compatible
    """
    with tempfile.TemporaryDirectory() as tmp_dir:
        file_path = tmp_dir + 'nsid_simple.h5'
    h5_file = h5py.File(file_path, 'a')
    h5_group = h5_file.create_group('MyGroup')
    data = np.random.normal(size=(2, 3))
    h5_dataset = h5_group.create_dataset('data', data=data)
    attrs_to_write = {
        'quantity': 'quantity',
        'units': 'units',
        'nsid_version': 'version',
        'main_data_name': 'title',
        'data_type': 'UNKNOWN',
        'modality': 'modality',
        'source': 'test'
    }

    write_simple_attrs(h5_dataset, attrs_to_write)

    dims = {
        0: h5_group.create_dataset('a', data=np.arange(data.shape[0])),
        1: h5_group.create_dataset('b', data=np.arange(data.shape[1]))
    }
    for dim, this_dim_dset in dims.items():
        name = this_dim_dset.name.split('/')[-1]
        attrs_to_write = {
            'name': name,
            'units': 'units',
            'quantity': 'quantity',
            'dimension_type': 'dimension_type.name',
            'nsid_version': 'test'
        }

        write_simple_attrs(this_dim_dset, attrs_to_write)

        this_dim_dset.make_scale(name)
        h5_dataset.dims[dim].label = name
        # h5_dataset.dims[dim].attach_scale(this_dim_dset)
    return h5_file
Exemplo n.º 20
0
    def _setupH5(self, image_parms):
        """
        Setup the HDF5 file in which to store the data
        Due to the structure of the ndata format, we can only create the Measurement and Channel groups here

        Parameters
        ----------
        image_parms : dict
            Dictionary of parameters

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        root_parms = dict()
        root_parms['data_type'] = 'PtychographyData'

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_f, root_parms)

        h5_channels = list()
        for meas_parms in image_parms:
            # Create new measurement group for each set of parameters
            meas_grp = create_indexed_group(self.h5_f, 'Measurement')
            # Write the parameters as attributes of the group
            write_simple_attrs(meas_grp, meas_parms)

            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_channels.append(chan_grp)

        self.h5_f.flush()

        return h5_channels
Exemplo n.º 21
0
def write_simple_attrs(h5_obj, attrs, obj_type='', verbose=False):
    """
    Writes attributes to a h5py object

    Parameters
    ----------
    h5_obj : :class:`h5py.File`, :class:`h5py.Group`, or h5py.Dataset object
        h5py object to which the attributes will be written to
    attrs : dict
        Dictionary containing the attributes as key-value pairs
    obj_type : str or unicode, optional. Default = ''
        type of h5py.obj. Examples include 'group', 'file', 'dataset
    verbose : bool, optional. Default=False
        Whether or not to print debugging statements

    """
    warn('pyUSID.io.hdf_utils.write_simple_attrs has been moved to '
         'sidpy.hdf.hdf_utils.write_simple_attrs. This copy in pyUSID will'
         'be removed in future release. Please update your import statements')
    return hut.write_simple_attrs(h5_obj,
                                  attrs,
                                  obj_type=obj_type,
                                  verbose=verbose)
Exemplo n.º 22
0
    def _create_results_datasets(self):
        """
        Creates hdf5 datasets and datagroups to hold the resutls
        """
        # create all h5 datasets here:
        num_pos = self.h5_main.shape[0]

        if self.verbose and self.mpi_rank == 0:
            print('Now creating the datasets')

        self.h5_results_grp = create_results_group(
            self.h5_main,
            self.process_name,
            h5_parent_group=self._h5_target_group)

        write_simple_attrs(self.h5_results_grp, {
            'algorithm_author': 'Kody J. Law',
            'last_pixel': 0
        })
        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        if self.verbose and self.mpi_rank == 0:
            print('created group: {} with attributes:'.format(
                self.h5_results_grp.name))
            print(get_attributes(self.h5_results_grp))

        # One of those rare instances when the result is exactly the same as the source
        self.h5_i_corrected = create_empty_dataset(
            self.h5_main,
            np.float32,
            'Corrected_Current',
            h5_group=self.h5_results_grp)

        if self.verbose and self.mpi_rank == 0:
            print('Created I Corrected')
            # print_tree(self.h5_results_grp)

        # For some reason, we cannot specify chunks or compression!
        # The resistance dataset requires the creation of a new spectroscopic dimension
        self.h5_resistance = write_main_dataset(
            self.h5_results_grp,
            (num_pos, self.num_x_steps),
            'Resistance',
            'Resistance',
            'GOhms',
            None,
            Dimension('Bias', 'V', self.num_x_steps),
            dtype=np.
            float32,  # chunks=(1, self.num_x_steps), #compression='gzip',
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals)

        if self.verbose and self.mpi_rank == 0:
            print('Created Resistance')
            # print_tree(self.h5_results_grp)

        assert isinstance(self.h5_resistance,
                          USIDataset)  # only here for PyCharm
        self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals

        # The variance is identical to the resistance dataset
        self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32,
                                                'R_variance')

        if self.verbose and self.mpi_rank == 0:
            print('Created Variance')
            # print_tree(self.h5_results_grp)

        # The capacitance dataset requires new spectroscopic dimensions as well
        self.h5_cap = write_main_dataset(
            self.h5_results_grp,
            (num_pos, 1),
            'Capacitance',
            'Capacitance',
            'pF',
            None,
            Dimension('Direction', '', [1]),
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            dtype=cap_dtype,  #compression='gzip',
            aux_spec_prefix='Cap_Spec_')

        if self.verbose and self.mpi_rank == 0:
            print('Created Capacitance')
            # print_tree(self.h5_results_grp)
            print('Done creating all results datasets!')

        if self.mpi_size > 1:
            self.mpi_comm.Barrier()
        self.h5_main.file.flush()
Exemplo n.º 23
0
    def translate(self, parm_path):
        """
        The main function that translates the provided file into a .h5 file
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file.
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)
        excit_wfm = excit_wfm[1::2]
        self._parse_file_path(parm_path)

        num_dat_files = len(self.file_list)

        f = open(self.file_list[0], 'rb')
        spectrogram_size, count_vals = self._parse_spectrogram_size(f)
        print("Excitation waveform shape: ", excit_wfm.shape)
        print("spectrogram size:", spectrogram_size)
        num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols']
        print('Number of pixels: ', num_pixels)
        print('Count Values: ', count_vals)
        # if (num_pixels + 1) != count_vals:
        #    print("Data size does not match number of pixels expected. Cannot continue")

        # Find how many channels we have to make
        num_ai_chans = num_dat_files // 2  # Division by 2 due to real/imaginary

        # Now start creating datasets and populating:
        # Start with getting an h5 file
        h5_file = h5py.File(self.h5_path)

        # First create a measurement group
        h5_meas_group = create_indexed_group(h5_file, 'Measurement')

        # Set up some parameters that will be written as attributes to this Measurement group
        global_parms = dict()
        global_parms['data_type'] = 'trKPFM'
        global_parms['translator'] = 'trKPFM'
        write_simple_attrs(h5_meas_group, global_parms)
        write_simple_attrs(h5_meas_group, parm_dict)

        # Now start building the position and spectroscopic dimension containers
        # There's only one spectroscpoic dimension and two position dimensions

        # The excit_wfm only has the DC values without any information on cycles, time, etc.
        # What we really need is to add the time component. For every DC step there are some time steps.

        num_time_steps = (
            spectrogram_size - 5
        ) // excit_wfm.size // 2  # Need to divide by 2 because it considers on and off field

        # There should be three spectroscopic axes
        # In order of fastest to slowest varying, we have
        # time, voltage, field

        time_vec = np.linspace(0, parm_dict['IO_time'], num_time_steps)
        print('Num time steps: {}'.format(num_time_steps))
        print('DC Vec size: {}'.format(excit_wfm.shape))
        print('Spectrogram size: {}'.format(spectrogram_size))

        field_vec = np.array([0, 1])

        spec_dims = [
            Dimension('Time', 's', time_vec),
            Dimension('Field', 'Binary', field_vec),
            Dimension('Bias', 'V', excit_wfm)
        ]

        pos_dims = [
            Dimension('Cols', 'm', int(parm_dict['grid_num_cols'])),
            Dimension('Rows', 'm', int(parm_dict['grid_num_rows']))
        ]

        self.raw_datasets = list()

        for chan_index in range(num_ai_chans):
            chan_grp = create_indexed_group(h5_meas_group, 'Channel')

            if chan_index == 0:
                write_simple_attrs(chan_grp, {'Harmonic': 1})
            else:
                write_simple_attrs(chan_grp, {'Harmonic': 2})

            h5_raw = write_main_dataset(
                chan_grp,  # parent HDF5 group
                (num_pixels, spectrogram_size - 5),
                # shape of Main dataset
                'Raw_Data',  # Name of main dataset
                'Deflection',  # Physical quantity contained in Main dataset
                'V',  # Units for the physical quantity
                pos_dims,  # Position dimensions
                spec_dims,  # Spectroscopic dimensions
                dtype=np.complex64,  # data type / precision
                compression='gzip',
                chunks=(1, spectrogram_size - 5),
                main_dset_attrs={'quantity': 'Complex'})

            # h5_refs = hdf.write(chan_grp, print_log=False)
            # h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
            # link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs))
            self.raw_datasets.append(h5_raw)
            self.raw_datasets.append(h5_raw)

        # Now that the N channels have been made, populate them with the actual data....
        self._read_data(parm_dict, parm_path, spectrogram_size)

        h5_file.file.close()

        # hdf.close()
        return self.h5_path
Exemplo n.º 24
0
    def _setup_h5(self, data_gen_parms):
        """
        Setups up the hdf5 file structure before doing the actual generation

        Parameters
        ----------
        data_gen_parms : dict
            Dictionary containing the parameters to write to the Measurement Group as attributes

        Returns
        -------

        """
        '''
        Build the group structure down to the channel group
        '''
        # Set up the basic group structure
        root_parms = dict()
        root_parms['translator'] = 'FAKEBEPS'
        root_parms['data_type'] = data_gen_parms['data_type']

        # Write the file
        self.h5_f = h5py.File(self.h5_path, 'w')
        write_simple_attrs(self.h5_f, root_parms)

        meas_grp = create_indexed_group(self.h5_f, 'Measurement')
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        write_simple_attrs(meas_grp, data_gen_parms)

        # Create the Position and Spectroscopic datasets for the Raw Data
        h5_pos_dims, h5_spec_dims = self._build_ancillary_datasets()

        h5_raw_data = write_main_dataset(chan_grp,
                                         (self.n_pixels, self.n_spec_bins),
                                         'Raw_Data',
                                         'Deflection',
                                         'Volts',
                                         h5_pos_dims,
                                         h5_spec_dims,
                                         slow_to_fast=True,
                                         dtype=np.complex64,
                                         verbose=True)
        '''
        Build the SHO Group
        '''
        sho_grp = create_results_group(h5_raw_data, 'SHO_Fit')

        # Build the Spectroscopic datasets for the SHO Guess and Fit
        h5_sho_spec_inds, h5_sho_spec_vals = write_reduced_anc_dsets(
            sho_grp,
            h5_raw_data.h5_spec_inds,
            h5_raw_data.h5_spec_vals,
            'Frequency',
            is_spec=True)

        h5_sho_fit = write_main_dataset(
            sho_grp, (self.n_pixels, int(self.n_spec_bins // self.n_bins)),
            'Fit',
            'SHO Parameters',
            'a.u.',
            None,
            None,
            h5_pos_inds=h5_raw_data.h5_pos_inds,
            h5_pos_vals=h5_raw_data.h5_pos_vals,
            h5_spec_inds=h5_sho_spec_inds,
            h5_spec_vals=h5_sho_spec_vals,
            slow_to_fast=True,
            dtype=sho32)

        h5_sho_guess = copy_dataset(h5_sho_fit, sho_grp, alias='Guess')
        '''
        Build the loop group
        '''

        loop_grp = create_results_group(h5_sho_fit, 'Loop_Fit')

        # Build the Spectroscopic datasets for the loops

        h5_loop_spec_inds, h5_loop_spec_vals = write_reduced_anc_dsets(
            loop_grp,
            h5_sho_fit.h5_spec_inds,
            h5_sho_fit.h5_spec_vals,
            'DC_Offset',
            is_spec=True)

        h5_loop_fit = write_main_dataset(loop_grp,
                                         (self.n_pixels, self.n_loops),
                                         'Fit',
                                         'Loop Fitting Parameters',
                                         'a.u.',
                                         None,
                                         None,
                                         h5_pos_inds=h5_raw_data.h5_pos_inds,
                                         h5_pos_vals=h5_raw_data.h5_pos_vals,
                                         h5_spec_inds=h5_loop_spec_inds,
                                         h5_spec_vals=h5_loop_spec_vals,
                                         slow_to_fast=True,
                                         dtype=loop_fit32)

        h5_loop_guess = copy_dataset(h5_loop_fit, loop_grp, alias='Guess')
        copy_all_region_refs(h5_loop_guess, h5_loop_fit)

        self.h5_raw = h5_raw_data
        self.h5_sho_guess = h5_sho_guess
        self.h5_sho_fit = h5_sho_fit
        self.h5_loop_guess = h5_loop_guess
        self.h5_loop_fit = h5_loop_fit
        self.h5_spec_vals = h5_raw_data.h5_spec_vals
        self.h5_spec_inds = h5_raw_data.h5_spec_inds
        self.h5_sho_spec_inds = h5_sho_fit.h5_spec_inds
        self.h5_sho_spec_vals = h5_sho_fit.h5_spec_vals
        self.h5_loop_spec_inds = h5_loop_fit.h5_spec_inds
        self.h5_loop_spec_vals = h5_loop_fit.h5_spec_vals
        self.h5_file = h5_raw_data.file

        return
Exemplo n.º 25
0
    def translate(self,
                  file_path,
                  show_plots=True,
                  save_plots=True,
                  do_histogram=False):
        """
        Basic method that translates .dat data file(s) to a single .h5 file
        
        Inputs:
            file_path -- Absolute file path for one of the data files. 
            It is assumed that this file is of the OLD data format. 
            
        Outputs:
            Nothing
        """
        file_path = path.abspath(file_path)
        (folder_path, basename) = path.split(file_path)
        (basename, path_dict) = self._parse_file_path(file_path)

        h5_path = path.join(folder_path, basename + '.h5')
        if path.exists(h5_path):
            remove(h5_path)
        self.h5_file = h5py.File(h5_path, 'w')

        isBEPS = True
        parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms'])

        ignored_plt_grps = ['in-field'
                            ]  # Here we assume that there is no in-field.
        # If in-field data is captured then the translator would have to be modified.

        # Technically, we could do away with this if statement, as isBEPS is always true for this translation
        if isBEPS:
            parm_dict['data_type'] = 'BEPSData'

            std_expt = parm_dict[
                'VS_mode'] != 'load user defined VS Wave from file'

            if not std_expt:
                warn(
                    'This translator does not handle user defined voltage spectroscopy'
                )
                return

            spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode'])

            # Check file sizes:
        if 'read_real' in path_dict.keys():
            real_size = path.getsize(path_dict['read_real'])
            imag_size = path.getsize(path_dict['read_imag'])
        else:
            real_size = path.getsize(path_dict['write_real'])
            imag_size = path.getsize(path_dict['write_imag'])

        if real_size != imag_size:
            raise ValueError(
                "Real and imaginary file sizes DON'T match!. Ending")

        num_rows = int(parm_dict['grid_num_rows'])
        num_cols = int(parm_dict['grid_num_cols'])
        num_pix = num_rows * num_cols
        tot_bins = real_size / (
            num_pix * 4)  # Finding bins by simple division of entire datasize

        # Check for case where only a single pixel is missing.
        check_bins = real_size / ((num_pix - 1) * 4)

        if tot_bins % 1 and check_bins % 1:
            warn('Aborting! Some parameter appears to have changed in-between')
            return
        elif not tot_bins % 1:
            #             Everything's ok
            pass
        elif not check_bins % 1:
            tot_bins = check_bins
            warn(
                'Warning:  A pixel seems to be missing from the data.  File will be padded with zeros.'
            )

        tot_bins = int(tot_bins)
        (bin_inds, bin_freqs, bin_FFT, ex_wfm,
         dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms'])
        """
        Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less 
        than the actual number), we need to re-calculate it based on the available data. This is done below.
        """

        band_width = parm_dict['BE_band_width_[Hz]'] * (
            0.5 - parm_dict['BE_band_edge_trim'])
        st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width
        en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width
        bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32)

        # Forcing standardized datatypes:
        bin_inds = np.int32(bin_inds)
        bin_freqs = np.float32(bin_freqs)
        bin_FFT = np.complex64(bin_FFT)
        ex_wfm = np.float32(ex_wfm)

        self.FFT_BE_wave = bin_FFT

        (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict)

        # Remove the unused plot group columns before proceeding:
        (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs,
                                                     UDVS_units,
                                                     ignored_plt_grps)

        spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE)

        # Will assume that all excitation waveforms have same number of bins
        # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1
        num_actual_udvs_steps = UDVS_mat.shape[0] / 2
        bins_per_step = tot_bins / num_actual_udvs_steps

        # Some more checks
        if bins_per_step % 1:
            warn('Non integer number of bins per step!')
            return
        else:
            bins_per_step = int(bins_per_step)

        num_actual_udvs_steps = int(num_actual_udvs_steps)

        stind = 0
        for step_index in range(UDVS_mat.shape[0]):
            if UDVS_mat[step_index, 2] < 1E-3:  # invalid AC amplitude
                continue  # skip
            spec_inds[0, stind:stind + bins_per_step] = np.arange(
                bins_per_step, dtype=INDICES_DTYPE)  # Bin step
            spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones(
                bins_per_step, dtype=INDICES_DTYPE)  # UDVS step
            stind += bins_per_step
        del stind, step_index

        # Some very basic information that can help the processing / analysis crew
        parm_dict['num_bins'] = tot_bins
        parm_dict['num_pix'] = num_pix
        parm_dict['num_udvs_steps'] = num_actual_udvs_steps

        global_parms = dict()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        global_parms['experiment_date'] = parm_dict['File_date_and_time']

        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict[
            'data_type']  # self.__class__.__name__
        global_parms['translator'] = 'ODF'
        write_simple_attrs(self.h5_file, global_parms)

        # Create Measurement and Channel groups
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        chan_grp = create_indexed_group(meas_grp, 'Channel')
        chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1']

        # Create Auxilliary Datasets
        h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm)

        udvs_slices = dict()
        for col_ind, col_name in enumerate(UDVS_labs):
            udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1))
        h5_UDVS = chan_grp.create_dataset('UDVS',
                                          data=UDVS_mat,
                                          dtype=np.float32)
        write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units})

        h5_bin_steps = chan_grp.create_dataset('Bin_Steps',
                                               data=np.arange(bins_per_step,
                                                              dtype=np.uint32),
                                               dtype=np.uint32)

        # Need to add the Bin Waveform type - infer from UDVS
        exec_bin_vec = self.signal_type * np.ones(len(bin_inds),
                                                  dtype=np.int32)
        h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type',
                                             data=exec_bin_vec,
                                             dtype=np.int32)

        h5_bin_inds = chan_grp.create_dataset('Bin_Indices',
                                              data=bin_inds,
                                              dtype=np.uint32)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies',
                                              data=bin_freqs,
                                              dtype=np.float32)
        h5_bin_FFT = chan_grp.create_dataset('Bin_FFT',
                                             data=bin_FFT,
                                             dtype=np.complex64)
        # Noise floor should be of shape: (udvs_steps x 3 x positions)
        h5_noise_floor = chan_grp.create_dataset(
            'Noise_Floor',
            shape=(num_pix, num_actual_udvs_steps),
            dtype=nf32,
            chunks=(1, num_actual_udvs_steps))
        """ 
        ONLY ALLOCATING SPACE FOR MAIN DATA HERE!
        Chunk by each UDVS step - this makes it easy / quick to:
            1. read data for a single UDVS step from all pixels
            2. read an entire / multiple pixels at a time
        The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes.
        This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes
        meaning that the metadata would be very substantial.
        This assumption is fine since we almost do not handle any user defined cases
        """
        """
        New Method for chunking the Main_Data dataset.  Chunking is now done in N-by-N squares of UDVS steps by pixels.
        N is determined dinamically based on the dimensions of the dataset.  Currently it is set such that individual
        chunks are less than 10kB in size.
        
        Chris Smith -- [email protected]
        """
        pos_dims = [
            Dimension('X', 'nm', num_cols),
            Dimension('Y', 'nm', num_rows)
        ]

        # Create Spectroscopic Values and Spectroscopic Values Labels datasets
        spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals(
            UDVS_mat, spec_inds, bin_freqs, exec_bin_vec, parm_dict, UDVS_labs,
            UDVS_units)

        spec_dims = list()
        for row_ind, row_name in enumerate(spec_vals_labs):
            spec_dims.append(
                Dimension(row_name, spec_vals_units[row_ind],
                          spec_vals[row_ind]))

        pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps,
                                       bins_per_step,
                                       np.dtype('complex64').itemsize)
        chunking = np.floor(np.sqrt(pixel_chunking))
        chunking = max(1, chunking)
        chunking = min(num_actual_udvs_steps, num_pix, chunking)
        self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins),
                                          'Raw_Data',
                                          'Piezoresponse',
                                          'V',
                                          pos_dims,
                                          spec_dims,
                                          dtype=np.complex64,
                                          chunks=(chunking,
                                                  chunking * bins_per_step),
                                          compression='gzip')

        self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]),
                                  dtype=np.complex64)
        self.max_resp = np.zeros(shape=(self.ds_main.shape[0]),
                                 dtype=np.float32)
        self.min_resp = np.zeros(shape=(self.ds_main.shape[0]),
                                 dtype=np.float32)

        # Now read the raw data files:
        self._read_data(path_dict['read_real'], path_dict['read_imag'],
                        parm_dict)
        self.h5_file.flush()

        generatePlotGroups(self.ds_main,
                           self.mean_resp,
                           folder_path,
                           basename,
                           self.max_resp,
                           self.min_resp,
                           max_mem_mb=self.max_ram,
                           spec_label=spec_label,
                           show_plots=show_plots,
                           save_plots=save_plots,
                           do_histogram=do_histogram)

        self.h5_file.close()

        return h5_path
Exemplo n.º 26
0
    def translate(self, file_path, *args, **kwargs):
        """
        Translates a given Bruker / Veeco / Nanoscope AFM derived file to HDF5. Currently handles scans, force curves,
        and force-distance maps

        Note that this translator was written with a single example file for each modality and may be buggy.

        Parameters
        ----------
        file_path : str / unicode
            path to data file

        Returns
        -------
        h5_path : str / unicode
            path to translated HDF5 file
        """
        self.file_path = path.abspath(file_path)
        self.meta_data, other_parms = self._extract_metadata()

        # These files are weirdly named with extensions such as .001
        h5_path = file_path.replace('.', '_') + '.h5'

        if path.exists(h5_path):
            remove(h5_path)

        h5_file = h5py.File(h5_path, 'w')

        type_suffixes = ['Image', 'Force_Curve', 'Force_Map']
        # 0 - stack of scan images
        # 1 - single force curve
        # 2 - force map
        force_count = 0
        image_count = 0
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                force_count += 1
            elif 'Ciao image list' in class_name:
                image_count += 1
        data_type = 0
        if force_count > 0:
            if image_count > 0:
                data_type = 2
            else:
                data_type = 1

        global_parms = dict()
        global_parms['data_type'] = 'Bruker_AFM_' + type_suffixes[data_type]
        global_parms['translator'] = 'Bruker_AFM'
        write_simple_attrs(h5_file, global_parms)

        # too many parameters. Making a dummy group just for the parameters.
        h5_parms_grp = h5_file.create_group('Parameters')
        # We currently have a dictionary of dictionaries. This needs to be flattened
        flat_dict = dict()
        for class_name, sub_dict in other_parms.items():
            for key, val in sub_dict.items():
                flat_dict[class_name + '_' + key] = val
        write_simple_attrs(h5_parms_grp, flat_dict)

        # Create measurement group
        h5_meas_grp = create_indexed_group(h5_file, 'Measurement')

        # Call the data specific translation function
        trans_funcs = [
            self._translate_image_stack, self._translate_force_curve,
            self._translate_force_map
        ]
        trans_funcs[data_type](h5_meas_grp)

        # wrap up and return path
        h5_file.close()
        return h5_path
Exemplo n.º 27
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train',
                                          'BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'


        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = dict()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)


        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path
Exemplo n.º 28
0
    def translate(self, h5_path, force_patch=False, **kwargs):
        """
        Add the needed references and attributes to the h5 file that are not created by the
        LabView data aquisition program.

        Parameters
        ----------
        h5_path : str
            path to the h5 file
        force_patch : bool, optional
            Should the check to see if the file has already been patched be ignored.
            Default False.

        Returns
        -------
        h5_file : str
            path to the patched dataset

        """
        #TODO: Need a way to choose which channels to apply the patcher to,
        #fails for multi-channel files where not all files are capable of being main datasets
        # Open the file and check if a patch is needed
        h5_file = h5py.File(os.path.abspath(h5_path), 'r+')
        if h5_file.attrs.get('translator') is not None and not force_patch:
            print('File is already Pycroscopy ready.')
            h5_file.close()
            return h5_path
        '''
        Get the list of all Raw_Data Datasets
        Loop over the list and update the needed attributes
        '''
        raw_list = find_dataset(h5_file, 'Raw_Data')
        for h5_raw in raw_list:
            if 'quantity' not in h5_raw.attrs:
                h5_raw.attrs['quantity'] = 'quantity'
            if 'units' not in h5_raw.attrs:
                h5_raw.attrs['units'] = 'a.u.'

            # Grab the channel and measurement group of the data to check some needed attributes
            h5_chan = h5_raw.parent
            try:
                c_type = get_attr(h5_chan, 'channel_type')

            except KeyError:
                warn_str = "'channel_type' was not found as an attribute of {}.\n".format(
                    h5_chan.name)
                warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \
                            "please run the following piece of code.  Afterwards, run this function again.\n" + \
                            "CODE: " \
                            "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name)
                warn(warn_str)
                h5_file.close()
                return h5_path

            except:
                raise

            if c_type != 'BE':
                continue

            h5_meas = h5_chan.parent
            h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps']

            # Get the object handles for the Indices and Values datasets
            h5_pos_inds = h5_chan['Position_Indices']
            h5_pos_vals = h5_chan['Position_Values']
            h5_spec_inds = h5_chan['Spectroscopic_Indices']
            h5_spec_vals = h5_chan['Spectroscopic_Values']

            # Make sure we have correct spectroscopic indices for the given values
            ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()])
            if not np.allclose(ds_spec_inds, h5_spec_inds[()]):
                h5_spec_inds[:, :] = ds_spec_inds[:, :]
                h5_file.flush()

            # Get the labels and units for the Spectroscopic datasets
            h5_spec_labels = h5_spec_inds.attrs['labels']
            inds_and_vals = [
                h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals
            ]
            for dset in inds_and_vals:
                spec_labels = dset.attrs['labels']
                try:
                    spec_units = dset.attrs['units']

                    if len(spec_units) != len(spec_labels):
                        raise KeyError

                except KeyError:
                    dset['units'] = ['' for _ in spec_labels]
                except:
                    raise
            """"
            In early versions, too many spectroscopic dimension labels and 
            units were listed compared to the number of rows. Remove here:
            """
            remove_non_exist_spec_dim_labs(h5_spec_inds,
                                           h5_spec_vals,
                                           h5_meas,
                                           verbose=False)
            """
            Add back some standard metadata to be consistent with older
            BE data
            """
            missing_metadata = dict()
            if 'File_file_name' not in h5_meas.attrs.keys():
                missing_metadata['File_file_name'] = os.path.split(
                    h5_raw.file.filename)[-1].replace('.h5', '')
            if 'File_date_and_time' not in h5_meas.attrs.keys():
                try:
                    date_str = get_attr(h5_raw.file, 'date_string')
                    time_str = get_attr(h5_raw.file, 'time_string')
                    full_str = date_str.strip() + ' ' + time_str.strip()
                    """
                    convert:
                        date_string : 2018-12-05
                        time_string : 3:41:45 PM
                    to: 
                        File_date_and_time: 19-Jun-2009 18:44:56
                    """
                    try:
                        dt_obj = datetime.datetime.strptime(
                            full_str, "%Y-%m-%d %I:%M:%S %p")
                        missing_metadata[
                            'File_date_and_time'] = dt_obj.strftime(
                                '%d-%b-%Y %H:%M:%S')
                    except ValueError:
                        pass
                except KeyError:
                    pass
            # Now write to measurement group:
            if len(missing_metadata) > 0:
                write_simple_attrs(h5_meas, missing_metadata)

            # Link the references to the Indices and Values datasets to the Raw_Data
            print(h5_raw.shape, h5_pos_vals.shape, h5_spec_vals.shape)
            print(h5_spec_inds.shape, h5_pos_inds.shape)

            link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds,
                         h5_spec_vals)

            # Also link the Bin_Frequencies and Bin_Wfm_Type datasets
            h5_freqs = h5_chan['Bin_Frequencies']
            aux_dset_names = ['Bin_Frequencies']
            aux_dset_refs = [h5_freqs.ref]
            check_and_link_ancillary(h5_raw,
                                     aux_dset_names,
                                     anc_refs=aux_dset_refs)
            '''
            Get all SHO_Fit groups for the Raw_Data and loop over them
            Get the Guess and Spectroscopic Datasets for each SHO_Fit group
            '''
            sho_list = find_results_groups(h5_raw, 'SHO_Fit')
            for h5_sho in sho_list:
                h5_sho_guess = h5_sho['Guess']
                h5_sho_spec_inds = h5_sho['Spectroscopic_Indices']
                h5_sho_spec_vals = h5_sho['Spectroscopic_Values']

                # Make sure we have correct spectroscopic indices for the given values
                ds_sho_spec_inds = create_spec_inds_from_vals(
                    h5_sho_spec_inds[()])
                if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]):
                    h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :]

                # Get the labels and units for the Spectroscopic datasets
                h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels')
                h5_sho_spec_units = get_attr(h5_sho_spec_vals, 'units')
                if h5_sho_spec_inds.shape[-1] != h5_sho_guess.shape[-1]:
                    print(
                        'Warning! Found incorrect spectral dimension for dataset {}. Attempting a fix.'
                        .format(h5_sho_guess))
                    try:
                        h5_sho_spec_inds = h5_sho_guess.parent.create_dataset(
                            "h5_sho_spec_inds_fixed",
                            shape=(1, 1),
                            dtype='uint32')
                        h5_sho_spec_inds.attrs['labels'] = 'labels'
                        h5_sho_spec_inds.attrs['units'] = 'units'
                    except RuntimeError:
                        print(
                            "It seems that the file has already been patched."
                            " Will use previously computed ancilliary datasets"
                        )
                        h5_sho_spec_inds = h5_sho_guess.parent[
                            'h5_sho_spec_inds_fixed']
                    try:
                        h5_sho_spec_vals = h5_sho_guess.parent.create_dataset(
                            "h5_sho_spec_vals_fixed",
                            shape=(1, 1),
                            dtype='uint32')
                        h5_sho_spec_vals[:] = 0
                        h5_sho_spec_vals.attrs['labels'] = 'labels'
                        h5_sho_spec_vals.attrs['units'] = 'units'
                    except RuntimeError:
                        print(
                            "It seems that the file has already been patched."
                            " Will use previously computed ancilliary datasets"
                        )

                        h5_sho_spec_vals = h5_sho_guess.parent[
                            'h5_sho_spec_vals_fixed2']

                link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals,
                             h5_sho_spec_inds, h5_sho_spec_vals)
                sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals]

                for dset in sho_inds_and_vals:
                    spec_labels = get_attr(dset, 'labels')
                    try:
                        spec_units = get_attr(dset, 'units')

                        if len(spec_units) != len(spec_labels):
                            raise KeyError

                    except KeyError:
                        spec_units = [''.encode('utf-8') for _ in spec_labels]
                        dset.attrs['units'] = spec_units

                    except:
                        raise

            h5_file.flush()

        h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8')

        h5_file.close()

        return h5_path
Exemplo n.º 29
0
    def translate(self, parm_path):
        """      
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)
        folder_path, base_name = path.split(parm_path)
        waste, base_name = path.split(folder_path)

        # Until a better method is provided....
        with h5py.File(path.join(folder_path, 'line_1.mat'),
                       'r') as h5_mat_line_1:
            num_ai_chans = h5_mat_line_1['data'].shape[1]

        h5_path = path.join(folder_path, base_name + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        with h5py.File(h5_path) as h5_f:

            h5_meas_grp = create_indexed_group(h5_f, 'Measurement')
            global_parms = dict()
            global_parms.update({'data_type': 'gIV', 'translator': 'gIV'})
            write_simple_attrs(h5_meas_grp, global_parms)

            # Only prepare the instructions for the dimensions here
            spec_dims = Dimension('Bias', 'V', excit_wfm)
            pos_dims = Dimension(
                'Y', 'm',
                np.linspace(0, parm_dict['grid_scan_height_[m]'],
                            parm_dict['grid_num_rows']))

            self.raw_datasets = list()

            for chan_index in range(num_ai_chans):

                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_simple_attrs(h5_chan_grp, parm_dict)
                """
                Minimize file size to the extent possible.
                DAQs are rated at 16 bit so float16 should be most appropriate.
                For some reason, compression is effective only on time series data
                """
                h5_raw = write_main_dataset(
                    h5_chan_grp, (parm_dict['grid_num_rows'], excit_wfm.size),
                    'Raw_Data',
                    'Current',
                    '1E-{} A'.format(parm_dict['IO_amplifier_gain']),
                    pos_dims,
                    spec_dims,
                    dtype=np.float16,
                    chunks=(1, excit_wfm.size),
                    compression='gzip')

                self.raw_datasets.append(h5_raw)

            # Now that the N channels have been made, populate them with the actual data....
            self._read_data(parm_dict, folder_path)

        return h5_path
Exemplo n.º 30
0
    def _create_results_datasets(self):
        """
        Creates all the datasets necessary for holding all parameters + data.
        """

        self.h5_results_grp = create_results_group(
            self.h5_main,
            self.process_name,
            h5_parent_group=self._h5_target_group)

        self.parms_dict.update({
            'last_pixel': 0,
            'algorithm': 'pycroscopy_SignalFilter'
        })

        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        assert isinstance(self.h5_results_grp, h5py.Group)

        if isinstance(self.composite_filter, np.ndarray):
            h5_comp_filt = self.h5_results_grp.create_dataset(
                'Composite_Filter', data=np.float32(self.composite_filter))

            if self.verbose and self.mpi_rank == 0:
                print(
                    'Rank {} - Finished creating the Composite_Filter dataset'.
                    format(self.mpi_rank))

        # First create the position datsets if the new indices are smaller...
        if self.num_effective_pix != self.h5_main.shape[0]:
            # TODO: Do this part correctly. See past solution:
            """
            # need to make new position datasets by taking every n'th index / value:
                new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :])
                pos_descriptor = []
                for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'],
                                             [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix)
                                              for dim_ind in range(h5_pos_inds.shape[1])]):
                    pos_descriptor.append(Dimension(name, units, np.arange(leng)))
                ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose)
                h5_pos_vals.data = np.atleast_2d(new_pos_vals)  # The data generated above varies linearly. Override.

            """
            h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets(
                self.h5_results_grp,
                Dimension('pixel', 'a.u.', self.num_effective_pix),
                is_spectral=False,
                verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Created the new position ancillary dataset'.
                      format(self.mpi_rank))

        else:
            h5_pos_inds_new = self.h5_main.h5_pos_inds
            h5_pos_vals_new = self.h5_main.h5_pos_vals

            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Reusing source datasets position datasets'.
                      format(self.mpi_rank))

        if self.noise_threshold is not None:
            self.h5_noise_floors = write_main_dataset(
                self.h5_results_grp, (self.num_effective_pix, 1),
                'Noise_Floors',
                'Noise',
                'a.u.',
                None,
                Dimension('arb', '', [1]),
                dtype=np.float32,
                aux_spec_prefix='Noise_Spec_',
                h5_pos_inds=h5_pos_inds_new,
                h5_pos_vals=h5_pos_vals_new,
                verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Noise_Floors dataset'.
                      format(self.mpi_rank))

        if self.write_filtered:
            # Filtered data is identical to Main_Data in every way - just a duplicate
            self.h5_filtered = create_empty_dataset(
                self.h5_main,
                self.h5_main.dtype,
                'Filtered_Data',
                h5_group=self.h5_results_grp)
            if self.verbose and self.mpi_rank == 0:
                print(
                    'Rank {} - Finished creating the Filtered dataset'.format(
                        self.mpi_rank))

        self.hot_inds = None

        if self.write_condensed:
            self.hot_inds = np.where(self.composite_filter > 0)[0]
            self.hot_inds = np.uint(self.hot_inds[int(0.5 *
                                                      len(self.hot_inds)):]
                                    )  # only need to keep half the data
            condensed_spec = Dimension('hot_frequencies', '',
                                       int(0.5 * len(self.hot_inds)))
            self.h5_condensed = write_main_dataset(
                self.h5_results_grp,
                (self.num_effective_pix, len(self.hot_inds)),
                'Condensed_Data',
                'Complex',
                'a. u.',
                None,
                condensed_spec,
                h5_pos_inds=h5_pos_inds_new,
                h5_pos_vals=h5_pos_vals_new,
                dtype=np.complex,
                verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print(
                    'Rank {} - Finished creating the Condensed dataset'.format(
                        self.mpi_rank))

        if self.mpi_size > 1:
            self.mpi_comm.Barrier()
        self.h5_main.file.flush()