Пример #1
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the decomposition results
        """

        h5_decomp_group = create_results_group(self.h5_main, self.process_name)
        write_simple_attrs(h5_decomp_group, self.parms_dict)
        write_simple_attrs(h5_decomp_group, {'n_components': self.__components.shape[0],
                                             'n_samples': self.h5_main.shape[0], 'last_pixel': self.h5_main.shape[0]})

        decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0])

        # equivalent to V - compound / complex
        h5_components = write_main_dataset(h5_decomp_group, self.__components, 'Components',
                                           get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc,
                                           None,
                                           h5_spec_inds=self.h5_main.h5_spec_inds,
                                           h5_spec_vals=self.h5_main.h5_spec_vals)

        # equivalent of U - real
        h5_projections = write_main_dataset(h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance',
                                            'a.u.', None, decomp_desc, dtype=np.float32,
                                            h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals)

        # return the h5 group object
        self.h5_results_grp = h5_decomp_group
        return self.h5_results_grp
Пример #2
0
    def _translate_image_stack(self, meas_grp, gwy_data, obj, channels):
        """
        Use this function to write data corresponding to a stack of scan images (most common)
        Returns
        -------
        """        
        current_channel = ''

        # Iterate through each object in the gwy dataset
        gwy_key = obj.split('/')
        # Test whether a new channel needs to be created
        # The 'filename' structure in the gwy file should not have a channel created hence the try/except block
        try:
            if int(gwy_key[1]) not in channels.keys():
                current_channel = create_indexed_group(meas_grp, "Channel")
                channels[int(gwy_key[1])] = current_channel
            else:
                current_channel = channels[int(gwy_key[1])]
        except ValueError:
            if obj.endswith('filename'):          
                pass

        # The data structure of the gwy file will be used to create the main dataset in the h5 file
        if obj.endswith('data'):
            x_range = gwy_data[obj].get('xreal', 1.0)
            x_vals = np.linspace(0, x_range, gwy_data[obj]['xres'])
            # print('obj {}\nx_vals {}'.format(obj, x_vals))

            y_range = gwy_data[obj].get('yreal', 1.0)
            y_vals = np.linspace(0, y_range, gwy_data[obj]['yres'])
            
            pos_desc = [Dimension('X',
                        gwy_data[obj]['si_unit_xy'].get('unitstr'),
                        x_vals),
                        Dimension('Y', 
                        gwy_data[obj]['si_unit_xy'].get('unitstr'),
                        y_vals)]
            # print(pos_desc)

            spec_dim = gwy_data['/{}/data/title'.format(gwy_key[1])]
            spec_desc = Dimension(spec_dim,
                        gwy_data[obj]['si_unit_z'].get('unitstr', 'arb. units'),
                        [0])

            two_dim_image = gwy_data[obj]['data']
            write_main_dataset(current_channel,
                                np.atleast_2d(np.reshape(two_dim_image,
                                len(pos_desc[0].values) * len(pos_desc[1].values))).transpose(),
                                'Raw_Data',
                                spec_dim,
                                gwy_data[obj]['si_unit_z'].get('unitstr'),
                                pos_desc, spec_desc)
            # print('main dataset has been written')
            # image data processing
        elif obj.endswith('meta'):
            meta = {}            
            write_simple_attrs(current_channel, meta, verbose=False)

        return channels
Пример #3
0
    def _translate_gsf(self, file_path, meas_grp):
        """

        Parameters
        ----------
        file_path
        meas_grp

        For more information on the .gsf file format visit the link below -
        http://gwyddion.net/documentation/user-guide-en/gsf.html
        """
        # Read the data in from the specified file
        gsf_meta, gsf_values = gsf_read(file_path)

        # Write parameters where available specifically for sample_name
        # data_type, comments and experiment_date to file-level parms
        # Using pop, move some global parameters from gsf_meta to global_parms:
        self.global_parms['data_type'] = 'Gwyddion_GSF'
        self.global_parms['comments'] = gsf_meta.get('comment', '')
        self.global_parms['experiment_date'] = gsf_meta.get('date', '')

        # overwrite some parameters at the file level:
        write_simple_attrs(meas_grp.parent, self.global_parms)

        # Build the reference values for the ancillary position datasets:
        # TODO: Remove information from parameters once it is used meaningfully where it needs to be.
        # Here, it is no longer necessary to save XReal anymore so we will pop (remove) it from gsf_meta
        x_offset = gsf_meta.get('XOffset', 0)
        x_range = gsf_meta.get('XReal', 1.0)
        # TODO: Use Numpy wherever possible instead of pure python
        x_vals = np.linspace(0, x_range, gsf_meta.get('XRes')) + x_offset

        y_offset = gsf_meta.get('YOffset', 0)
        y_range = gsf_meta.get('YReal', 1.0)
        y_vals = np.linspace(0, y_range, gsf_meta.get('YRes')) + y_offset

        # Just define the ancillary position and spectral dimensions. Do not create datasets yet
        pos_desc = [Dimension('X', gsf_meta.get('XYUnits', 'arb. units'), x_vals),
                    Dimension('Y', gsf_meta.get('XYUnits', 'arb. units'), y_vals)]

        spec_desc = Dimension('Intensity', gsf_meta.get('ZUnits', 'arb. units'), [1])

        """
        You only need to prepare the dimensions for positions and spectroscopic. You do not need to write the 
        ancillary datasets at this point. write_main_dataset will take care of that. You only need to use 
        write_ind_val_datasets() for the cases where you may need to reuse the datasets. See the tutorial online.
        """

        # Create the channel-level group
        chan_grp = create_indexed_group(meas_grp, 'Channel')
        write_simple_attrs(chan_grp, gsf_meta)

        # Create the main dataset (and the
        two_dim_image = gsf_values
        write_main_dataset(chan_grp,
                           np.atleast_2d(np.reshape(two_dim_image,
                           len(pos_desc[0].values) * len(pos_desc[1].values))).transpose(),
                           'Raw_Data', gsf_meta.get('Title', 'Unknown'), gsf_meta.get('ZUnits', 'arb. units'),
                           pos_desc, spec_desc)
Пример #4
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the clustering results
        """
        print('Writing clustering results to file.')
        num_clusters = self.__mean_resp.shape[0]

        h5_cluster_group = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(h5_cluster_group, self.parms_dict)
        h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0]

        h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels',
                                       'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1),
                                       h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                       aux_spec_prefix='Cluster_', dtype=np.uint32)

        if self.num_comps != self.h5_main.shape[1]:
            '''
            Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components
            Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data
            for now until a better method is figured out
            '''
            """
            if isinstance(self.data_slice[1], np.ndarray):
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()]

            else:
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]]

            ds_centroid_values.data[0, :] = centroid_vals_mat
            """
            if isinstance(self.data_slice[1], np.ndarray):
                vals_slice = self.data_slice[1].tolist()
            else:
                vals_slice = self.data_slice[1]
            vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze()
            new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals)
            h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True)

        else:
            h5_inds = self.h5_main.h5_spec_inds
            h5_vals = self.h5_main.h5_spec_vals

        # For now, link centroids with default spectroscopic indices and values.
        h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response',
                                          get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0],
                                          Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None,
                                          h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_',
                                          h5_spec_vals=h5_vals)

        return h5_cluster_group
Пример #5
0
    def _translate_force_map(self, h5_meas_grp):
        """
        Reads the scan image + force map from the proprietary file and writes it to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # First lets write the image into the measurement group that has already been created:
        image_parms = self.meta_data['Ciao image list']
        quantity = image_parms.pop('Image Data_2')
        image_mat = self._read_image_layer(image_parms)
        h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
        write_main_dataset(
            h5_chan_grp,
            np.reshape(image_mat, (-1, 1)),
            'Raw_Data',
            # Quantity and Units needs to be fixed by someone who understands these files better
            quantity,
            'a. u.',
            [
                Dimension('X', 'nm', image_parms['Samps/line']),
                Dimension('Y', 'nm', image_parms['Number of lines'])
            ],
            Dimension('single', 'a. u.', 1),
            dtype=np.float32,
            compression='gzip')
        # Think about standardizing attributes for rows and columns
        write_simple_attrs(h5_chan_grp, image_parms)

        # Now work on the force map:
        force_map_parms = self.meta_data['Ciao force image list']
        quantity = force_map_parms.pop('Image Data_4')
        force_map_vec = self._read_data_vector(force_map_parms)
        tr_rt = [
            int(item) for item in force_map_parms['Samps/line'].split(' ')
        ]
        force_map_2d = force_map_vec.reshape(image_mat.size, np.sum(tr_rt))
        h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
        write_main_dataset(
            h5_chan_grp,
            force_map_2d,
            'Raw_Data',
            # Quantity and Units needs to be fixed by someone who understands these files better
            quantity,
            'a. u.',
            [
                Dimension('X', 'nm', image_parms['Samps/line']),
                Dimension('Y', 'nm', image_parms['Number of lines'])
            ],
            Dimension('Z', 'nm', int(np.sum(tr_rt))),
            dtype=np.float32,
            compression='gzip')
        # Think about standardizing attributes
        write_simple_attrs(h5_chan_grp, force_map_parms)
Пример #6
0
    def _translate_force_curve(self, h5_meas_grp):
        """
        Reads the force curves from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp,
                                                       Dimension(
                                                           'single', 'a. u.',
                                                           1),
                                                       is_spectral=False)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                break
        tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')]

        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
            h5_meas_grp,
            Dimension('Z', 'nm', int(np.sum(tr_rt))),
            is_spectral=True)

        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_4')
                data = self._read_data_vector(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(
                    h5_chan_grp,
                    np.expand_dims(data, axis=0),
                    'Raw_Data',
                    # Quantity and Units needs to be fixed by someone who understands these files better
                    quantity,
                    'a. u.',
                    None,
                    None,
                    dtype=np.float32,
                    compression='gzip',
                    h5_pos_inds=h5_pos_inds,
                    h5_pos_vals=h5_pos_vals,
                    h5_spec_inds=h5_spec_inds,
                    h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #7
0
    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name,
                                            h5_parent_group=self._h5_target_group)
        self.h5_results_grp = h5_svd_group
        self._write_source_dset_provenance()
        

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))

        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name,
                                                           data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
Пример #8
0
    def _translate_image_stack(self, h5_meas_grp):
        """
        Reads the scan images from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp,
                                                         Dimension(
                                                             'single', 'a. u.',
                                                             1),
                                                         is_spectral=True)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                break

        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [
            Dimension('X', 'nm', layer_info['Samps/line']),
            Dimension('Y', 'nm', layer_info['Number of lines'])
        ],
                                                       is_spectral=False)

        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_2')
                data = self._read_image_layer(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(
                    h5_chan_grp,
                    np.reshape(data, (-1, 1)),
                    'Raw_Data',
                    # Quantity and Units needs to be fixed by someone who understands these files better
                    quantity,
                    'a. u.',
                    None,
                    None,
                    dtype=np.float32,
                    compression='gzip',
                    h5_pos_inds=h5_pos_inds,
                    h5_pos_vals=h5_pos_vals,
                    h5_spec_inds=h5_spec_inds,
                    h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes for rows and columns
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #9
0
    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name)
        self.h5_results_grp = h5_svd_group

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))

        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name,
                                                           data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
Пример #10
0
    def test_prod_sizes_mismatch(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        main_data = np.random.rand(15, 14)
        main_data_name = 'Test_Main'
        quantity = 'Current'
        dset_units = 'nA'

        pos_sizes = [5, 15]  # too many steps in the Y direction
        pos_names = ['X', 'Y']
        pos_units = ['nm', 'um']
        pos_dims = []
        for length, name, units in zip(pos_sizes, pos_names, pos_units):
            pos_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))

        spec_sizes = [7, 2]
        spec_names = ['Bias', 'Cycle']
        spec_units = ['V', '']
        spec_dims = []
        for length, name, units in zip(spec_sizes, spec_names, spec_units):
            spec_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))

        with h5py.File(file_path) as h5_f:
            with self.assertRaises(ValueError):
                _ = hdf_utils.write_main_dataset(h5_f, main_data,
                                                 main_data_name, quantity,
                                                 dset_units, pos_dims,
                                                 spec_dims)
        os.remove(file_path)
Пример #11
0
 def write_spectrograms(self):
     if bool(self.spectrogram_desc):
         for spectrogram_f, descriptors in self.spectrogram_desc.items():
             channel_i = create_indexed_group(self.h5_meas_grp, 'Channel_')
             spec_vals_i = self.spectrogram_spec_vals[spectrogram_f]
             spectrogram_spec_dims = Dimension('Wavelength', descriptors[8],
                                               spec_vals_i)
             h5_raw = write_main_dataset(
                 channel_i,  # parent HDF5 group
                 (self.x_len * self.y_len,
                  len(spec_vals_i)),  # shape of Main dataset
                 'Raw_Data',  # Name of main dataset
                 'Spectrogram',  # Physical quantity contained in Main dataset
                 descriptors[3],  # Units for the physical quantity
                 self.pos_dims,  # Position dimensions
                 spectrogram_spec_dims,  # Spectroscopic dimensions
                 dtype=np.float32,  # data type / precision
                 main_dset_attrs={
                     'Caption': descriptors[0],
                     'Bytes_Per_Pixel': descriptors[1],
                     'Scale': descriptors[2],
                     'Physical_Units': descriptors[3],
                     'Offset': descriptors[4],
                     'Datatype': descriptors[5],
                     'Bytes_Per_Reading': descriptors[6],
                     'Wavelength_File': descriptors[7],
                     'Wavelength_Units': descriptors[8]
                 })
             h5_raw.h5_pos_vals[:, :] = self.pos_val
             h5_raw[:, :] = self.spectrograms[spectrogram_f].reshape(
                 h5_raw.shape)
Пример #12
0
    def _create_guess_datasets(self):
        """
        Creates the h5 group, guess dataset, corresponding spectroscopic datasets and also
        links the guess dataset to the spectroscopic datasets.
        """
        h5_group = create_results_group(self.h5_main, 'SHO_Fit')
        write_simple_attrs(h5_group, {'SHO_guess_method': "pycroscopy BESHO"})

        h5_sho_inds, h5_sho_vals = write_reduced_spec_dsets(
            h5_group, self.h5_main.h5_spec_inds, self.h5_main.h5_spec_vals,
            self._fit_dim_name)

        self.h5_guess = write_main_dataset(
            h5_group, (self.h5_main.shape[0], self.num_udvs_steps),
            'Guess',
            'SHO',
            'compound',
            None,
            None,
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            h5_spec_inds=h5_sho_inds,
            h5_spec_vals=h5_sho_vals,
            chunks=(1, self.num_udvs_steps),
            dtype=sho32,
            main_dset_attrs=self._parms_dict,
            verbose=self._verbose)

        write_simple_attrs(self.h5_guess, {
            'SHO_guess_method': "pycroscopy BESHO",
            'last_pixel': 0
        })

        copy_region_refs(self.h5_main, self.h5_guess)
Пример #13
0
def reshape_from_lines_to_pixels(h5_main, pts_per_cycle, scan_step_x_m=None):
    """
    Breaks up the provided raw G-mode dataset into lines and pixels (from just lines)

    Parameters
    ----------
    h5_main : h5py.Dataset object
        Reference to the main dataset that contains the raw data that is only broken up by lines
    pts_per_cycle : unsigned int
        Number of points in a single pixel
    scan_step_x_m : float
        Step in meters for pixels

    Returns
    -------
    h5_resh : h5py.Dataset object
        Reference to the main dataset that contains the reshaped data
    """
    if not check_if_main(h5_main):
        raise TypeError('h5_main is not a Main dataset')
    h5_main = USIDataset(h5_main)
    if pts_per_cycle % 1 != 0 or pts_per_cycle < 1:
        raise TypeError('pts_per_cycle should be a positive integer')
    if scan_step_x_m is not None:
        if not isinstance(scan_step_x_m, Number):
            raise TypeError('scan_step_x_m should be a real number')
    else:
        scan_step_x_m = 1

    if h5_main.shape[1] % pts_per_cycle != 0:
        warn('Error in reshaping the provided dataset to pixels. Check points per pixel')
        raise ValueError

    num_cols = int(h5_main.shape[1] / pts_per_cycle)

    # TODO: DO NOT assume simple 1 spectral dimension!
    single_ao = np.squeeze(h5_main.h5_spec_vals[:, :pts_per_cycle])

    spec_dims = Dimension(get_attr(h5_main.h5_spec_vals, 'labels')[0],
                          get_attr(h5_main.h5_spec_vals, 'units')[0], single_ao)

    # TODO: DO NOT assume simple 1D in positions!
    pos_dims = [Dimension('X', 'm', np.linspace(0, scan_step_x_m, num_cols)),
                Dimension('Y', 'm', np.linspace(0, h5_main.h5_pos_vals[1, 0], h5_main.shape[0]))]

    h5_group = create_results_group(h5_main, 'Reshape')
    # TODO: Create empty datasets and then write for very large datasets
    h5_resh = write_main_dataset(h5_group, (num_cols * h5_main.shape[0], pts_per_cycle), 'Reshaped_Data',
                                 get_attr(h5_main, 'quantity')[0], get_attr(h5_main, 'units')[0], pos_dims, spec_dims,
                                 chunks=(10, pts_per_cycle), dtype=h5_main.dtype, compression=h5_main.compression)

    # TODO: DON'T write in one shot assuming small datasets fit in memory!
    print('Starting to reshape G-mode line data. Please be patient')
    h5_resh[()] = np.reshape(h5_main[()], (-1, pts_per_cycle))

    print('Finished reshaping G-mode line data to rows and columns')

    return USIDataset(h5_resh)
Пример #14
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the decomposition results
        """

        h5_decomp_group = create_results_group(self.h5_main, self.process_name,
                                               h5_parent_group=self._h5_target_group)
        self._write_source_dset_provenance()
        write_simple_attrs(h5_decomp_group, self.parms_dict)
        write_simple_attrs(h5_decomp_group, {'n_components': self.__components.shape[0],
                                             'n_samples': self.h5_main.shape[0]})

        decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0])

        # equivalent to V - compound / complex
        h5_components = write_main_dataset(h5_decomp_group, self.__components, 'Components',
                                           get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc,
                                           None,
                                           h5_spec_inds=self.h5_main.h5_spec_inds,
                                           h5_spec_vals=self.h5_main.h5_spec_vals)

        # equivalent of U - real
        h5_projections = write_main_dataset(h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance',
                                            'a.u.', None, decomp_desc, dtype=np.float32,
                                            h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals)

        # return the h5 group object
        self.h5_results_grp = h5_decomp_group

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_decomp_group.create_dataset(self._status_dset_name,
                                                              data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_decomp_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return self.h5_results_grp
Пример #15
0
    def _translate_spectra(self, meas_grp, gwy_data, obj, channels):
        """
        Use this to translate simple 1D data like force curves
        Returns
        -------

        """
        current_channel = ''

        gwy_key = obj.split('/')

        try:
            if int(gwy_key[2]) not in channels.keys():
                current_channel = create_indexed_group(meas_grp, "Channel")
                channels[int(gwy_key[2])] = current_channel
            else:
                current_channel = channels[int(gwy_key[2])]
        except ValueError:
            if obj.endswith('filename'):          
                pass
            else:
                raise ValueError('There was an unexpected directory in the spectra file')

        title = obj['title']
        unitstr = obj['unitstr']
        coords = obj['coords']
        res = obj['data']['res']
        real = obj['data']['real']
        offset = obj['data']['off']
        x_units = obj['data']['si_unit_x']['unitstr']
        y_units = obj['data']['si_unit_y']['unitstr']
        data = obj['data']['data']
        indices = obj['selected']
        x_vals = np.linspace(offset, real, res)
        pos_desc = [Dimension('X', x_units, x_vals)]
        spec_desc = [Dimension(title, y_units, 0)]
        write_main_dataset(current_channel, data,
                                'Raw_Data', title,
                                gwy_data[obj]['si_unit_y'],
                                pos_desc, spec_desc)
        return channels
Пример #16
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the decomposition results
        """

        h5_decomp_group = create_results_group(self.h5_main, self.process_name)
        write_simple_attrs(h5_decomp_group, self.parms_dict)
        write_simple_attrs(h5_decomp_group, {'n_components': self.__components.shape[0],
                                             'n_samples': self.h5_main.shape[0]})

        decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0])

        # equivalent to V - compound / complex
        h5_components = write_main_dataset(h5_decomp_group, self.__components, 'Components',
                                           get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc,
                                           None,
                                           h5_spec_inds=self.h5_main.h5_spec_inds,
                                           h5_spec_vals=self.h5_main.h5_spec_vals)

        # equivalent of U - real
        h5_projections = write_main_dataset(h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance',
                                            'a.u.', None, decomp_desc, dtype=np.float32,
                                            h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals)

        # return the h5 group object
        self.h5_results_grp = h5_decomp_group

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_decomp_group.create_dataset(self._status_dset_name,
                                                              data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_decomp_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return self.h5_results_grp
Пример #17
0
    def setUp(self):
        self.h5_f = h5py.File(test_h5_file_path)
        h5_raw_grp = self.h5_f.create_group('Raw_Measurement')

        num_rows = 3
        num_cols = 5
        num_cycles = 2
        num_cycle_pts = 7

        # Create Main dataset and ancillaries
        source_dset_name = 'source_main'

        pos_dims = [Dimension('X', 'nm', num_rows),
                    Dimension('Y', 'nm', num_cols)]

        spec_dims = [Dimension('Bias', 'V', num_cycle_pts),
                     Dimension('Cycle', 'a.u.', num_cycles)]

        source_main_data = np.random.rand(num_rows * num_cols, num_cycle_pts * num_cycles)

        h5_source_main = write_main_dataset(h5_raw_grp, source_main_data, source_dset_name,
                                            'Current', 'A',
                                            pos_dims, spec_dims)

        # Create Guess dataset and ancillaries
        h5_guess_grp = h5_raw_grp.create_group(source_dset_name+'-Fitter_000')

        guess_data = np.random.rand(num_rows * num_cols, num_cycles)

        guess_spec_dims = spec_dims[1]

        self.h5_guess = write_main_dataset(h5_guess_grp, guess_data, 'Guess',
                                           'Guess', 'a.u.',
                                           pos_dims, guess_spec_dims)

        self.fitter = Fitter(h5_source_main, variables=['Bias'])
        self.h5_main = h5_source_main

        self.h5_f.flush()
Пример #18
0
    def _translate_force_map(self, h5_meas_grp):
        """
        Reads the scan image + force map from the proprietary file and writes it to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # First lets write the image into the measurement group that has already been created:
        image_parms = self.meta_data['Ciao image list']
        quantity = image_parms.pop('Image Data_2')
        image_mat = self._read_image_layer(image_parms)
        h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
        write_main_dataset(h5_chan_grp, np.reshape(image_mat, (-1, 1)), 'Raw_Data',
                           # Quantity and Units needs to be fixed by someone who understands these files better
                           quantity, 'a. u.',
                           [Dimension('X', 'nm', image_parms['Samps/line']),
                            Dimension('Y', 'nm', image_parms['Number of lines'])],
                           Dimension('single', 'a. u.', 1), dtype=np.float32, compression='gzip')
        # Think about standardizing attributes for rows and columns
        write_simple_attrs(h5_chan_grp, image_parms)

        # Now work on the force map:
        force_map_parms = self.meta_data['Ciao force image list']
        quantity = force_map_parms.pop('Image Data_4')
        force_map_vec = self._read_data_vector(force_map_parms)
        tr_rt = [int(item) for item in force_map_parms['Samps/line'].split(' ')]
        force_map_2d = force_map_vec.reshape(image_mat.size, np.sum(tr_rt))
        h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
        write_main_dataset(h5_chan_grp, force_map_2d, 'Raw_Data',
                           # Quantity and Units needs to be fixed by someone who understands these files better
                           quantity, 'a. u.',
                           [Dimension('X', 'nm', image_parms['Samps/line']),
                            Dimension('Y', 'nm', image_parms['Number of lines'])],
                           Dimension('Z', 'nm', int(np.sum(tr_rt))), dtype=np.float32, compression='gzip')
        # Think about standardizing attributes
        write_simple_attrs(h5_chan_grp, force_map_parms)
Пример #19
0
    def _translate_image_stack(self, h5_meas_grp):
        """
        Reads the scan images from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=True)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                break

        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [Dimension('X', 'nm', layer_info['Samps/line']),
                                                                     Dimension('Y', 'nm',
                                                                               layer_info['Number of lines'])],
                                                       is_spectral=False)

        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_2')
                data = self._read_image_layer(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data',
                                   # Quantity and Units needs to be fixed by someone who understands these files better
                                   quantity, 'a. u.',
                                   None, None, dtype=np.float32, compression='gzip',
                                   h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                   h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes for rows and columns
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #20
0
    def _translate_force_curve(self, h5_meas_grp):
        """
        Reads the force curves from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=False)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                break
        tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')]

        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))),
                                                         is_spectral=True)

        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_4')
                data = self._read_data_vector(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data',
                                   # Quantity and Units needs to be fixed by someone who understands these files better
                                   quantity, 'a. u.',
                                   None, None, dtype=np.float32, compression='gzip',
                                   h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                   h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #21
0
    def _create_guess_datasets(self):
        """
        Creates the h5 group, guess dataset, corresponding spectroscopic datasets and also
        links the guess dataset to the spectroscopic datasets.
        """
        self.h5_results_grp = create_results_group(
            self.h5_main,
            self.process_name,
            h5_parent_group=self._h5_target_group)
        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        # If writing to a new HDF5 file:
        # Add back the data_type attribute - still being used in the visualizer
        if self.h5_results_grp.file != self.h5_main.file:
            write_simple_attrs(
                self.h5_results_grp.file,
                {'data_type': get_attr(self.h5_main.file, 'data_type')})

        ret_vals = write_reduced_anc_dsets(self.h5_results_grp,
                                           self.h5_main.h5_spec_inds,
                                           self.h5_main.h5_spec_vals,
                                           self._fit_dim_name,
                                           verbose=self.verbose)

        h5_sho_inds, h5_sho_vals = ret_vals

        self._h5_guess = write_main_dataset(
            self.h5_results_grp, (self.h5_main.shape[0], self.num_udvs_steps),
            'Guess',
            'SHO',
            'compound',
            None,
            None,
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            h5_spec_inds=h5_sho_inds,
            h5_spec_vals=h5_sho_vals,
            chunks=(1, self.num_udvs_steps),
            dtype=sho32,
            main_dset_attrs=self.parms_dict,
            verbose=self.verbose)

        # Does not make sense to propagate region refs - nobody uses them
        # copy_region_refs(self.h5_main, self._h5_guess)

        self._h5_guess.file.flush()

        if self.verbose and self.mpi_rank == 0:
            print('Finished creating Guess dataset')
Пример #22
0
    def _create_root_image(self, image_path):
        """
        Create the Groups and Datasets for a single root image

        Parameters
        ----------
        image_path : str
            Path to the image file

        Returns
        -------
        None
        """
        image, image_parms = read_dm3(image_path)
        if image.ndim == 3:
            image = np.sum(image, axis=0)

        '''
        Create the Measurement and Channel Groups to hold the
        image Datasets
        '''
        meas_grp = create_indexed_group(self.h5_f, 'Measurement')

        chan_grp = create_indexed_group(meas_grp, 'Channel')

        '''
        Set the Measurement Group attributes
        '''
        usize, vsize = image.shape
        image_parms['image_size_u'] = usize
        image_parms['image_size_v'] = vsize
        image_parms['translator'] = 'OneView'
        image_parms['num_pixels'] = image.size
        write_simple_attrs(meas_grp, image_parms)

        '''
        Build Spectroscopic and Position dimensions
        '''
        spec_desc = Dimension('Image', 'a.u.', [1])
        pos_desc = [Dimension('X', 'pixel', np.arange(image.shape[0])),
                    Dimension('Y', 'pixel', np.arange(image.shape[1]))]

        h5_image = write_main_dataset(chan_grp, np.reshape(image, (-1, 1)), 'Raw_Data',
                                      'Intensity', 'a.u.',
                                      pos_desc, spec_desc)

        self.root_image_list.append(h5_image)
Пример #23
0
    def _create_root_image(self, image_path):
        """
        Create the Groups and Datasets for a single root image

        Parameters
        ----------
        image_path : str
            Path to the image file

        Returns
        -------
        None
        """
        image, image_parms = read_dm3(image_path)
        if image.ndim == 3:
            image = np.sum(image, axis=0)

        '''
        Create the Measurement and Channel Groups to hold the
        image Datasets
        '''
        meas_grp = create_indexed_group(self.h5_f, 'Measurement')

        chan_grp = create_indexed_group(meas_grp, 'Channel')

        '''
        Set the Measurement Group attributes
        '''
        usize, vsize = image.shape
        image_parms['image_size_u'] = usize
        image_parms['image_size_v'] = vsize
        image_parms['translator'] = 'OneView'
        image_parms['num_pixels'] = image.size
        write_simple_attrs(meas_grp, image_parms)

        '''
        Build Spectroscopic and Position dimensions
        '''
        spec_desc = Dimension('Image', 'a.u.', [1])
        pos_desc = [Dimension('X', 'pixel', np.arange(image.shape[0])),
                    Dimension('Y', 'pixel', np.arange(image.shape[1]))]

        h5_image = write_main_dataset(chan_grp, np.reshape(image, (-1, 1)), 'Raw_Data',
                                      'Intensity', 'a.u.',
                                      pos_desc, spec_desc)

        self.root_image_list.append(h5_image)
Пример #24
0
    def _create_projection_datasets(self):
        """
        Setup the Loop_Fit Group and the loop projection datasets

        """
        # First grab the spectroscopic indices and values and position indices
        self._sho_spec_inds = self.h5_main.h5_spec_inds
        self._sho_spec_vals = self.h5_main.h5_spec_vals
        self._sho_pos_inds = self.h5_main.h5_pos_inds

        fit_dim_ind = self.h5_main.spec_dim_labels.index(self._fit_dim_name)

        self._fit_spec_index = fit_dim_ind
        self._fit_offset_index = 1 + fit_dim_ind

        # Calculate the number of loops per position
        cycle_start_inds = np.argwhere(self._sho_spec_inds[fit_dim_ind, :] == 0).flatten()
        tot_cycles = cycle_start_inds.size

        # Make the results group
        self._h5_group = create_results_group(self.h5_main, 'Loop_Fit')
        write_simple_attrs(self._h5_group, {'projection_method': 'pycroscopy BE loop model'})

        # Write datasets
        self.h5_projected_loops = create_empty_dataset(self.h5_main, np.float32, 'Projected_Loops',
                                                       h5_group=self._h5_group)

        h5_loop_met_spec_inds, h5_loop_met_spec_vals = write_reduced_spec_dsets(self._h5_group, self._sho_spec_inds,
                                                                                self._sho_spec_vals, self._fit_dim_name,
                                                                                basename='Loop_Metrics')

        self.h5_loop_metrics = write_main_dataset(self._h5_group, (self.h5_main.shape[0], tot_cycles), 'Loop_Metrics',
                                                  'Metrics', 'compound', None, None, dtype=loop_metrics32,
                                                  h5_pos_inds=self.h5_main.h5_pos_inds,
                                                  h5_pos_vals=self.h5_main.h5_pos_vals,
                                                  h5_spec_inds=h5_loop_met_spec_inds,
                                                  h5_spec_vals=h5_loop_met_spec_vals)

        # Copy region reference:
        copy_region_refs(self.h5_main, self.h5_projected_loops)
        copy_region_refs(self.h5_main, self.h5_loop_metrics)

        self.h5_main.file.flush()
        self._met_spec_inds = self.h5_loop_metrics.h5_spec_inds

        return
Пример #25
0
    def write_ps_spectra(self):
        if bool(self.pspectrum_desc):
            for spec_f, descriptors in self.pspectrum_desc.items():

                # create new measurement group for ea spectrum
                self.h5_meas_grp = create_indexed_group(
                    self.h5_f, 'Measurement_')
                x_name = self.spectra_x_y_dim_name[spec_f][0].split(' ')[0]
                x_unit = self.spectra_x_y_dim_name[spec_f][0].split(' ')[1]
                y_name = self.spectra_x_y_dim_name[spec_f][1].split(' ')[0]
                y_unit = self.spectra_x_y_dim_name[spec_f][1].split(' ')[1]
                spec_i_spec_dims = Dimension(x_name, x_unit,
                                             self.spectra_spec_vals[spec_f])
                spec_i_pos_dims = [
                    Dimension(
                        'X', self.params_dictionary['XPhysUnit'].replace(
                            '\xb5', 'u'), np.array([0])),
                    Dimension(
                        'Y', self.params_dictionary['YPhysUnit'].replace(
                            '\xb5', 'u'), np.array([0]))
                ]
                # write data to a channel in the measurement group
                spec_i_ch = create_indexed_group(self.h5_meas_grp,
                                                 'PowerSpectrum_')
                h5_raw = write_main_dataset(
                    spec_i_ch,  # parent HDF5 group
                    (1, len(self.spectra_spec_vals[spec_f])),
                    # shape of Main dataset
                    'Raw_Spectrum',
                    # Name of main dataset
                    y_name,
                    # Physical quantity contained in Main dataset
                    y_unit,  # Units for the physical quantity
                    # Position dimensions
                    pos_dims=spec_i_pos_dims,
                    spec_dims=spec_i_spec_dims,
                    # Spectroscopic dimensions
                    dtype=np.float32,  # data type / precision
                    main_dset_attrs={
                        'XLoc': 0,
                        'YLoc': 0
                    })
                h5_raw[:, :] = self.spectra[spec_f].reshape(h5_raw.shape)
Пример #26
0
    def _create_guess_datasets(self):
        """
        Creates the h5 group, guess dataset, corresponding spectroscopic datasets and also
        links the guess dataset to the spectroscopic datasets.
        """
        h5_group = create_results_group(self.h5_main, 'SHO_Fit')
        write_simple_attrs(h5_group, {'SHO_guess_method': "pycroscopy BESHO"})

        h5_sho_inds, h5_sho_vals = write_reduced_spec_dsets(h5_group, self.h5_main.h5_spec_inds,
                                                            self.h5_main.h5_spec_vals, self._fit_dim_name)

        self.h5_guess = write_main_dataset(h5_group, (self.h5_main.shape[0], self.num_udvs_steps), 'Guess', 'SHO',
                                           'compound', None, None, h5_pos_inds=self.h5_main.h5_pos_inds,
                                           h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=h5_sho_inds,
                                           h5_spec_vals=h5_sho_vals, chunks=(1, self.num_udvs_steps), dtype=sho32,
                                           main_dset_attrs=self._parms_dict, verbose=self._verbose)

        write_simple_attrs(self.h5_guess, {'SHO_guess_method': "pycroscopy BESHO", 'last_pixel': 0})

        copy_region_refs(self.h5_main, self.h5_guess)
Пример #27
0
    def _create_guess_datasets(self):
        """
        Creates the h5 group, guess dataset, corresponding spectroscopic datasets and also
        links the guess dataset to the spectroscopic datasets.
        """
        self.h5_results_grp = create_results_group(self.h5_main, self.process_name)
        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        h5_sho_inds, h5_sho_vals = write_reduced_anc_dsets(self.h5_results_grp, self.h5_main.h5_spec_inds,
                                                            self.h5_main.h5_spec_vals, self._fit_dim_name)

        self._h5_guess = write_main_dataset(self.h5_results_grp, (self.h5_main.shape[0], self.num_udvs_steps), 'Guess', 'SHO',
                                           'compound', None, None, h5_pos_inds=self.h5_main.h5_pos_inds,
                                           h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=h5_sho_inds,
                                           h5_spec_vals=h5_sho_vals, chunks=(1, self.num_udvs_steps), dtype=sho32,
                                           main_dset_attrs=self.parms_dict, verbose=self.verbose)
        
        copy_region_refs(self.h5_main, self._h5_guess)
        
        self._h5_guess.file.flush()
        
        if self.verbose and self.mpi_rank == 0:
            print('Finished creating Guess dataset')
Пример #28
0
    def translate(self, parm_path):
        """
        The main function that translates the provided file into a .h5 file
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file.
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)
        excit_wfm = excit_wfm[1::2]
        self._parse_file_path(parm_path)

        num_dat_files = len(self.file_list)

        f = open(self.file_list[0], 'rb')
        spectrogram_size, count_vals = self._parse_spectrogram_size(f)
        print("Excitation waveform shape: ", excit_wfm.shape)
        print("spectrogram size:", spectrogram_size)
        num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols']
        print('Number of pixels: ', num_pixels)
        print('Count Values: ', count_vals)
        # if (num_pixels + 1) != count_vals:
        #    print("Data size does not match number of pixels expected. Cannot continue")

        # Find how many channels we have to make
        num_ai_chans = num_dat_files // 2  # Division by 2 due to real/imaginary

        # Now start creating datasets and populating:
        # Start with getting an h5 file
        h5_file = h5py.File(self.h5_path)

        # First create a measurement group
        h5_meas_group = create_indexed_group(h5_file, 'Measurement')

        # Set up some parameters that will be written as attributes to this Measurement group
        global_parms = dict()
        global_parms['data_type'] = 'trKPFM'
        global_parms['translator'] = 'trKPFM'
        write_simple_attrs(h5_meas_group, global_parms)
        write_simple_attrs(h5_meas_group, parm_dict)

        # Now start building the position and spectroscopic dimension containers
        # There's only one spectroscpoic dimension and two position dimensions

        # The excit_wfm only has the DC values without any information on cycles, time, etc.
        # What we really need is to add the time component. For every DC step there are some time steps.

        num_time_steps = (
            spectrogram_size - 5
        ) // excit_wfm.size // 2  # Need to divide by 2 because it considers on and off field

        # There should be three spectroscopic axes
        # In order of fastest to slowest varying, we have
        # time, voltage, field

        time_vec = np.linspace(0, parm_dict['IO_time'], num_time_steps)
        print('Num time steps: {}'.format(num_time_steps))
        print('DC Vec size: {}'.format(excit_wfm.shape))
        print('Spectrogram size: {}'.format(spectrogram_size))

        field_vec = np.array([0, 1])

        spec_dims = [
            Dimension('Time', 's', time_vec),
            Dimension('Field', 'Binary', field_vec),
            Dimension('Bias', 'V', excit_wfm)
        ]

        pos_dims = [
            Dimension('Cols', 'm', int(parm_dict['grid_num_cols'])),
            Dimension('Rows', 'm', int(parm_dict['grid_num_rows']))
        ]

        self.raw_datasets = list()

        for chan_index in range(num_ai_chans):
            chan_grp = create_indexed_group(h5_meas_group, 'Channel')

            if chan_index == 0:
                write_simple_attrs(chan_grp, {'Harmonic': 1})
            else:
                write_simple_attrs(chan_grp, {'Harmonic': 2})

            h5_raw = write_main_dataset(
                chan_grp,  # parent HDF5 group
                (num_pixels, spectrogram_size - 5),
                # shape of Main dataset
                'Raw_Data',  # Name of main dataset
                'Deflection',  # Physical quantity contained in Main dataset
                'V',  # Units for the physical quantity
                pos_dims,  # Position dimensions
                spec_dims,  # Spectroscopic dimensions
                dtype=np.complex64,  # data type / precision
                compression='gzip',
                chunks=(1, spectrogram_size - 5),
                main_dset_attrs={'quantity': 'Complex'})

            # h5_refs = hdf.write(chan_grp, print_log=False)
            # h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
            # link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs))
            self.raw_datasets.append(h5_raw)
            self.raw_datasets.append(h5_raw)

        # Now that the N channels have been made, populate them with the actual data....
        self._read_data(parm_dict, parm_path, spectrogram_size)

        h5_file.file.close()

        # hdf.close()
        return self.h5_path
Пример #29
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the clustering results
        """
        print('Writing clustering results to file.')
        num_clusters = self.__mean_resp.shape[0]

        h5_cluster_group = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(h5_cluster_group, self.parms_dict)

        h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels',
                                       'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1),
                                       h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                       aux_spec_prefix='Cluster_', dtype=np.uint32)

        if self.num_comps != self.h5_main.shape[1]:
            '''
            Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components
            Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data
            for now until a better method is figured out
            '''
            """
            if isinstance(self.data_slice[1], np.ndarray):
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()]

            else:
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]]

            ds_centroid_values.data[0, :] = centroid_vals_mat
            """
            if isinstance(self.data_slice[1], np.ndarray):
                vals_slice = self.data_slice[1].tolist()
            else:
                vals_slice = self.data_slice[1]
            vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze()
            new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals)
            h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True)

        else:
            h5_inds = self.h5_main.h5_spec_inds
            h5_vals = self.h5_main.h5_spec_vals

        # For now, link centroids with default spectroscopic indices and values.
        h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response',
                                          get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0],
                                          Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None,
                                          h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_',
                                          h5_spec_vals=h5_vals)

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_cluster_group.create_dataset(self._status_dset_name,
                                                               data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return h5_cluster_group
Пример #30
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(
        components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)
    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError(
            'SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)
    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize +
                   h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size /
                                                1024.0**2))
    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print(
        'Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp,
                                    rebuild,
                                    'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'),
                                    get_attr(h5_main, 'units'),
                                    None,
                                    None,
                                    h5_pos_inds=h5_main.h5_pos_inds,
                                    h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds,
                                    h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks,
                                    compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(
            comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Пример #31
0
    def test_existing_both_aux(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        main_data = np.random.rand(15, 14)
        main_data_name = 'Test_Main'
        quantity = 'Current'
        dset_units = 'nA'

        pos_sizes = [5, 3]
        pos_names = ['X', 'Y']
        pos_units = ['nm', 'um']
        pos_dims = []
        for length, name, units in zip(pos_sizes, pos_names, pos_units):
            pos_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))
        pos_data = np.vstack((np.tile(np.arange(5),
                                      3), np.repeat(np.arange(3), 5))).T

        spec_sizes = [7, 2]
        spec_names = ['Bias', 'Cycle']
        spec_units = ['V', '']
        spec_dims = []
        for length, name, units in zip(spec_sizes, spec_names, spec_units):
            spec_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))
        spec_data = np.vstack((np.tile(np.arange(7),
                                       2), np.repeat(np.arange(2), 7)))

        with h5py.File(file_path) as h5_f:
            h5_spec_inds, h5_spec_vals = hdf_utils.write_ind_val_dsets(
                h5_f, spec_dims, is_spectral=True)
            h5_pos_inds, h5_pos_vals = hdf_utils.write_ind_val_dsets(
                h5_f, pos_dims, is_spectral=False)

            usid_main = hdf_utils.write_main_dataset(h5_f,
                                                     main_data,
                                                     main_data_name,
                                                     quantity,
                                                     dset_units,
                                                     None,
                                                     None,
                                                     h5_spec_inds=h5_spec_inds,
                                                     h5_spec_vals=h5_spec_vals,
                                                     h5_pos_vals=h5_pos_vals,
                                                     h5_pos_inds=h5_pos_inds,
                                                     main_dset_attrs=None)

            data_utils.validate_aux_dset_pair(self,
                                              h5_f,
                                              h5_pos_inds,
                                              h5_pos_vals,
                                              pos_names,
                                              pos_units,
                                              pos_data,
                                              h5_main=usid_main,
                                              is_spectral=False)

            data_utils.validate_aux_dset_pair(self,
                                              h5_f,
                                              h5_spec_inds,
                                              h5_spec_vals,
                                              spec_names,
                                              spec_units,
                                              spec_data,
                                              h5_main=usid_main,
                                              is_spectral=True)
        os.remove(file_path)
Пример #32
0
    def translate(self, data_filepath, out_filename, verbose=False, debug=False):
        '''
        The main function that translates the provided file into a .h5 file

        Parameters
        ----------------
        data_filepath : String / unicode
            Absolute path of the data file
        out_filename : String / unicode
            Name for the new generated hdf5 file. The new file will be
            saved in the same folder of the input file with
            file name "out_filename".
            NOTE: the .h5 extension is automatically added to "out_filename"
        debug : Boolean (Optional. default is false)
            Whether or not to print log statements

        Returns
        ----------------
        h5_path : String / unicode
            Absolute path of the generated .h5 file
        '''

        self.debug = debug
    
        # Open the datafile
        try:
            data_filepath = os.path.abspath(data_filepath)
            ARh5_file = h5py.File(data_filepath, 'r')
        except:
            print('Unable to open the file', data_filepath)
            raise

        # Get info from the origin file like Notes and Segments
        self.notes = ARh5_file.attrs['Note']
        self.segments = ARh5_file['ForceMap']['Segments'] #shape: (X, Y, 4)
        self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments'])
        self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0]
        self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1]
        self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels'])
        try:
            self.points_per_sec = np.float(self.note_value('ARDoIVPointsPerSec'))
        except NameError:
            self.points_per_sec = np.float(self.note_value('NumPtsPerSec'))
        if self.debug:
            print('Map size [X, Y]: ', self.map_size)
            print('Channels names: ', self.channels_name)

        # Only the extension 'Ext' segment can change size
        # so we get the shortest one and we trim all the others
        extension_idx = self.segments_name.index('Ext')
        short_ext = np.amin(np.array(self.segments[:, :, extension_idx]))
        longest_ext = np.amax(np.array(self.segments[:, :, extension_idx]))
        difference = longest_ext - short_ext  # this is a difference between integers
        tot_length = (np.amax(self.segments) - difference) + 1
        # +1 otherwise array(tot_length) will be of 1 position shorter
        points_trimmed = np.array(self.segments[:, :, extension_idx]) - short_ext
        if self.debug:
            print('Data were trimmed in the extension segment of {} points'.format(difference))

        # Open the output hdf5 file
        folder_path = os.path.dirname(data_filepath)
        h5_path = os.path.join(folder_path, out_filename + '.h5')
        h5_file = h5py.File(h5_path, 'w')

        # Create the measurement group
        h5_meas_group = create_indexed_group(h5_file, 'Measurement')

        # Create all channels and main datasets
        # at this point the main dataset are just function of time
        x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')),
                             self.map_size['X'])
        y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')),
                             self.map_size['Y'])
        z_dim = np.arange(tot_length) / np.float(self.points_per_sec)
        pos_dims = [Dimension('Cols', 'm', x_dim),
                    Dimension('Rows', 'm', y_dim)]
        spec_dims = [Dimension('Time', 's', z_dim)]

        # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful
        # to parallelize these loops
        for index, channel in enumerate(self.channels_name):
            cur_chan = create_indexed_group(h5_meas_group, 'Channel')
            main_dset = np.empty((self.map_size['X'], self.map_size['Y'], tot_length))
            for column in np.arange(self.map_size['X']):
                for row in np.arange(self.map_size['Y']):
                    AR_pos_string = str(column) + ':' + str(row)
                    seg_start = self.segments[column, row, extension_idx] - short_ext
                    main_dset[column, row, :] = ARh5_file['ForceMap'][AR_pos_string][index, seg_start:]

            # Reshape with Fortran order to have the correct position indices
            main_dset = np.reshape(main_dset, (-1, tot_length), order='F')
            if index == 0:
                first_main_dset = cur_chan
                quant_unit = self.get_def_unit(channel)
                h5_raw = write_main_dataset(cur_chan, # parent HDF5 group
                                                           main_dset, # 2D array of raw data
                                                           'Raw_'+channel, # Name of main dset
                                                           channel, # Physical quantity
                                                           self.get_def_unit(channel), # Unit
                                                           pos_dims, # position dimensions
                                                           spec_dims, #spectroscopy dimensions
                                                           )
            else:
                h5_raw = write_main_dataset(cur_chan, # parent HDF5 group
                                                           main_dset, # 2D array of raw data
                                                           'Raw_'+channel, # Name of main dset
                                                           channel, # Physical quantity
                                                           self.get_def_unit(channel), # Unit
                                                           pos_dims, # position dimensions
                                                           spec_dims, #spectroscopy dimensions
                                                           # Link Ancilliary dset to the first
                                                           h5_pos_inds=first_main_dset['Position_Indices'],
                                                           h5_pos_vals=first_main_dset['Position_Values'],
                                                           h5_spec_inds=first_main_dset['Spectroscopic_Indices'],
                                                           h5_spec_vals=first_main_dset['Spectroscopic_Values'],
                                                           )

        # Make Channels with IMAGES.
        # Position indices/values are the same of all other channels
        # Spectroscopic indices/valus are they are just one single dimension
        img_spec_dims = [Dimension('arb', 'a.u.', [1])]
        for index, image in enumerate(ARh5_file['Image'].keys()):
            main_dset = np.reshape(np.array(ARh5_file['Image'][image]), (-1,1), order='F')
            cur_chan = create_indexed_group(h5_meas_group, 'Channel')
            if index == 0:
                first_image_dset = cur_chan
                h5_raw = write_main_dataset(cur_chan,  # parent HDF5 group
                                                           main_dset, # 2D array of image (shape: P*Q x 1)
                                                           'Img_'+image, # Name of main dset
                                                           image, # Physical quantity
                                                           self.get_def_unit(image), # Unit
                                                           pos_dims, # position dimensions
                                                           img_spec_dims, #spectroscopy dimensions
                                                           # Link Ancilliary dset to the first
                                                           h5_pos_inds=first_main_dset['Position_Indices'],
                                                           h5_pos_vals=first_main_dset['Position_Values'],
                                                           )
            else:
                h5_raw = write_main_dataset(cur_chan, # parent HDF5 group
                                                           main_dset, # 2D array of image (shape: P*Q x 1)
                                                           'Img_'+image, # Name of main dset
                                                           image, # Physical quantity
                                                           self.get_def_unit(image), # Unit
                                                           pos_dims, # position dimensions
                                                           img_spec_dims, #spectroscopy dimensions
                                                           # Link Ancilliary dset to the first
                                                           h5_pos_inds=first_main_dset['Position_Indices'],
                                                           h5_pos_vals=first_main_dset['Position_Values'],
                                                           h5_spec_inds=first_image_dset['Spectroscopic_Indices'],
                                                           h5_spec_vals=first_image_dset['Spectroscopic_Values'],
                                                           )

        # Create the new segments that will be stored as attribute
        new_segments = {}
        for seg, name in enumerate(self.segments_name):
            new_segments.update({name:self.segments[0,0,seg] - short_ext})
        write_simple_attrs(h5_meas_group, {'Segments':new_segments,
                                                          'Points_trimmed':points_trimmed,
                                                          'Notes':self.notes})
        write_simple_attrs(h5_file,
                                          {'translator':'ARhdf5',
                                           'instrument':'Asylum Research '+self.note_value('MicroscopeModel'),
                                           'AR sftware version':self.note_value('Version')})

        if self.debug:
            print(print_tree(h5_file))
            print('\n')
            for key, val in get_attributes(h5_meas_group).items():
                if key != 'Notes':
                    print('{} : {}'.format(key, val))
                else:
                    print('{} : {}'.format(key, 'notes string too long to be written here.'))

        # Clean up
        ARh5_file.close()        
        h5_file.close()
        self.translated = True
        return h5_path
Пример #33
0
    def translate(self, parm_path):
        """
        Basic method that translates .mat data files to a single .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        self.parm_path = path.abspath(parm_path)
        (folder_path, file_name) = path.split(parm_path)
        (file_name, base_name) = path.split(folder_path)
        h5_path = path.join(folder_path, base_name + '.h5')

        # Read parameters
        parm_dict = readGmodeParms(parm_path)

        # Add the w^2 specific parameters to this list
        parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True)
        freq_sweep_parms = parm_data['freqSweepParms']
        parm_dict['freq_sweep_delay'] = np.float(
            freq_sweep_parms['delay'].item())
        gen_sig = parm_data['genSig']
        parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item())
        freq_array = np.float32(parm_data['freqArray'])

        # prepare and write spectroscopic values
        samp_rate = parm_dict['IO_down_samp_rate_[Hz]']
        num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] *
                       samp_rate)

        w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate,
                          np.float32(samp_rate / num_bins))

        # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid
        spec_val_mat = np.zeros((len(freq_array) * num_bins, 2),
                                dtype=VALUES_DTYPE)
        spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array))
        spec_val_mat[:, 1] = np.repeat(freq_array, num_bins)

        spec_ind_mat = np.zeros((2, len(freq_array) * num_bins),
                                dtype=np.int32)
        spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array))
        spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins)

        num_rows = parm_dict['grid_num_rows']
        num_cols = parm_dict['grid_num_cols']
        parm_dict['data_type'] = 'GmodeW2'

        num_pix = num_rows * num_cols

        global_parms = dict()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict[
            'data_type']  # self.__class__.__name__
        global_parms['translator'] = 'W2'

        # Now start creating datasets and populating:
        if path.exists(h5_path):
            remove(h5_path)

        h5_f = h5py.File(h5_path, 'w')
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        chan_grp = create_indexed_group(meas_grp, 'Channel')
        write_simple_attrs(chan_grp, parm_dict)

        pos_dims = [
            Dimension('X', 'nm', num_rows),
            Dimension('Y', 'nm', num_cols)
        ]
        spec_dims = [
            Dimension('Response Bin', 'a.u.', num_bins),
            Dimension('Excitation Frequency ', 'Hz', len(freq_array))
        ]

        # Minimize file size to the extent possible.
        # DAQs are rated at 16 bit so float16 should be most appropriate.
        # For some reason, compression is more effective on time series data

        h5_main = write_main_dataset(chan_grp, (num_pix, num_bins),
                                     'Raw_Data',
                                     'Deflection',
                                     'V',
                                     pos_dims,
                                     spec_dims,
                                     chunks=(1, num_bins),
                                     dtype=np.float32)

        h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies',
                                              freq_array)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec)

        # Now doing link_h5_objects_as_attrs:
        link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq])

        # Now read the raw data files:
        pos_ind = 0
        for row_ind in range(1, num_rows + 1):
            for col_ind in range(1, num_cols + 1):
                file_path = path.join(
                    folder_path,
                    'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat')
                print('Working on row {} col {}'.format(row_ind, col_ind))
                if path.exists(file_path):
                    # Load data file
                    pix_data = loadmat(file_path, squeeze_me=True)
                    pix_mat = pix_data['AI_mat']
                    # Take the inverse FFT on 2nd dimension
                    pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1),
                                          axis=1)
                    # Verified with Matlab - no conjugate required here.
                    pix_vec = pix_mat.transpose().reshape(pix_mat.size)
                    h5_main[pos_ind, :] = np.float32(pix_vec)
                    h5_f.flush()  # flush from memory!
                else:
                    print('File not found for: row {} col {}'.format(
                        row_ind, col_ind))
                pos_ind += 1
                if (100.0 * pos_ind / num_pix) % 10 == 0:
                    print('completed translating {} %'.format(
                        int(100 * pos_ind / num_pix)))

        h5_f.close()

        return h5_path
Пример #34
0
def reshape_from_lines_to_pixels(h5_main, pts_per_cycle, scan_step_x_m=None):
    """
    Breaks up the provided raw G-mode dataset into lines and pixels (from just lines)

    Parameters
    ----------
    h5_main : h5py.Dataset object
        Reference to the main dataset that contains the raw data that is only broken up by lines
    pts_per_cycle : unsigned int
        Number of points in a single pixel
    scan_step_x_m : float
        Step in meters for pixels

    Returns
    -------
    h5_resh : h5py.Dataset object
        Reference to the main dataset that contains the reshaped data
    """
    if not check_if_main(h5_main):
        raise TypeError('h5_main is not a Main dataset')
    h5_main = USIDataset(h5_main)
    if pts_per_cycle % 1 != 0 or pts_per_cycle < 1:
        raise TypeError('pts_per_cycle should be a positive integer')
    if scan_step_x_m is not None:
        if not isinstance(scan_step_x_m, Number):
            raise TypeError('scan_step_x_m should be a real number')
    else:
        scan_step_x_m = 1

    if h5_main.shape[1] % pts_per_cycle != 0:
        warn(
            'Error in reshaping the provided dataset to pixels. Check points per pixel'
        )
        raise ValueError

    num_cols = int(h5_main.shape[1] / pts_per_cycle)

    # TODO: DO NOT assume simple 1 spectral dimension!
    single_ao = np.squeeze(h5_main.h5_spec_vals[:, :pts_per_cycle])

    spec_dims = Dimension(
        get_attr(h5_main.h5_spec_vals, 'labels')[0],
        get_attr(h5_main.h5_spec_vals, 'units')[0], single_ao)

    # TODO: DO NOT assume simple 1D in positions!
    pos_dims = [
        Dimension('X', 'm', np.linspace(0, scan_step_x_m, num_cols)),
        Dimension('Y', 'm',
                  np.linspace(0, h5_main.h5_pos_vals[1, 0], h5_main.shape[0]))
    ]

    h5_group = create_results_group(h5_main, 'Reshape')
    # TODO: Create empty datasets and then write for very large datasets
    h5_resh = write_main_dataset(h5_group,
                                 (num_cols * h5_main.shape[0], pts_per_cycle),
                                 'Reshaped_Data',
                                 get_attr(h5_main, 'quantity')[0],
                                 get_attr(h5_main, 'units')[0],
                                 pos_dims,
                                 spec_dims,
                                 chunks=(10, pts_per_cycle),
                                 dtype=h5_main.dtype,
                                 compression=h5_main.compression)

    # TODO: DON'T write in one shot assuming small datasets fit in memory!
    print('Starting to reshape G-mode line data. Please be patient')
    h5_resh[()] = np.reshape(h5_main[()], (-1, pts_per_cycle))

    print('Finished reshaping G-mode line data to rows and columns')

    return USIDataset(h5_resh)
Пример #35
0
    def translate(self, file_path, verbose=False, parm_encoding='utf-8'):
        """
        Translates the provided file to .h5

        Parameters
        ----------
        file_path : String / unicode
            Absolute path of the .ibw file
        verbose : Boolean (Optional)
            Whether or not to show  print statements for debugging
        parm_encoding : str, optional
            Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the .h5 file
        """
        file_path = path.abspath(file_path)
        # Prepare the .h5 file:
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        h5_path = path.join(folder_path, base_name + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        h5_file = h5py.File(h5_path, 'w')

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)

        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[2] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for layer 0 null set errors in older AR software

        if images.ndim == 3:  # Image stack
            if verbose:
                print('Found image stack of size {}'.format(images.shape))
            type_suffix = 'Image'

            num_rows = parm_dict['ScanLines']
            num_cols = parm_dict['ScanPoints']

            images = images.transpose(2, 1, 0)  # now ordered as [chan, Y, X] image
            images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

            pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))]

            spec_desc = Dimension('arb', 'a.u.', [1])

        else:  # single force curve
            if verbose:
                print('Found force curve of size {}'.format(images.shape))

            type_suffix = 'ForceCurve'
            images = np.atleast_3d(images)  # now [Z, chan, 1]
            images = images.transpose((1, 2, 0))  # [chan ,1, Z] force curve

            # The data generated above varies linearly. Override.
            # For now, we'll shove the Z sensor data into the spectroscopic values.

            # Find the channel that corresponds to either Z sensor or Raw:
            try:
                chan_ind = chan_labels.index('ZSnsr')
                spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind]))
            except ValueError:
                try:
                    chan_ind = chan_labels.index('Raw')
                    spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind]))
                except ValueError:
                    # We don't expect to come here. If we do, spectroscopic values remains as is
                    spec_data = np.arange(images.shape[2])

            pos_desc = Dimension('X', 'm', [1])
            spec_desc = Dimension('Z', 'm', spec_data)

        # Create measurement group
        meas_grp = create_indexed_group(h5_file, 'Measurement')

        # Write file and measurement level parameters
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'IgorIBW_' + type_suffix
        global_parms['translator'] = 'IgorIBW'
        write_simple_attrs(h5_file, global_parms)

        write_simple_attrs(meas_grp, parm_dict)

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                               dtype=np.float32)

        if verbose:
            print('Finished preparing raw datasets')

        h5_file.close()
        return h5_path
Пример #36
0
    def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y, image_parms):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        scan_size_x : int
            Number of images in the x dimension
        scan_size_y : int
            Number of images in the y dimension
        image_parms : dict
            Dictionary of parameters

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize
        num_files = scan_size_x * scan_size_y

        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        main_parms = {'num_images': num_files,
                      'image_size_u': usize,
                      'image_size_v': vsize,
                      'num_pixels': num_pixels,
                      'translator': 'Ptychography',
                      'scan_size_x': scan_size_x,
                      'scan_size_y': scan_size_y}
        main_parms.update(image_parms)

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_f, root_parms)
        meas_grp = create_indexed_group(self.h5_f, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_desc = [Dimension('U', 'pixel', np.arange(usize)),
                     Dimension('V', 'pixel', np.arange(vsize))]
        pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)),
                    Dimension('Y', 'pixel', np.arange(scan_size_y))]

        ds_chunking = calc_chunks([num_files, num_pixels],
                                  data_type(0).itemsize,
                                  unit_chunks=(1, num_pixels))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data',
                                     'Intensity', 'a.u.',
                                     pos_desc, spec_desc,
                                     chunks=ds_chunking, dtype=data_type)

        h5_ronch= chan_grp.create_dataset('Mean_Ronchigram', shape=[num_pixels], dtype=np.float32)
        h5_mean_spec = chan_grp.create_dataset('Spectroscopic_Mean', shape=[num_files], dtype=np.float32)

        self.h5_f.flush()

        return h5_main, h5_mean_spec, h5_ronch
Пример #37
0
    def translate(self, raw_data_path):
        """
        The main function that translates the provided file into a .h5 file

        Parameters
        ------------
        raw_data_path : string / unicode
            Absolute file path of the data .mat file.

        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        raw_data_path = path.abspath(raw_data_path)
        folder_path, file_name = path.split(raw_data_path)

        h5_path = path.join(folder_path, file_name[:-4] + '.h5')
        if path.exists(h5_path):
            remove(h5_path)
        h5_f = h5py.File(h5_path, 'w')

        self.h5_read = True
        try:
            h5_raw = h5py.File(raw_data_path, 'r')
        except ImportError:
            self.h5_read = False
            h5_raw = loadmat(raw_data_path)

        excite_cell = h5_raw['dc_amp_cell3']
        test = excite_cell[0][0]
        if self.h5_read:
            excitation_vec = h5_raw[test]
        else:
            excitation_vec = np.float32(np.squeeze(test))

        current_cell = h5_raw['current_cell3']

        num_rows = current_cell.shape[0]
        num_cols = current_cell.shape[1]
        num_iv_pts = excitation_vec.size

        current_data = np.zeros(shape=(num_rows * num_cols, num_iv_pts), dtype=np.float32)
        for row_ind in range(num_rows):
            for col_ind in range(num_cols):
                pix_ind = row_ind * num_cols + col_ind
                if self.h5_read:
                    curr_val = np.squeeze(h5_raw[current_cell[row_ind][col_ind]].value)
                else:
                    curr_val = np.float32(np.squeeze(current_cell[row_ind][col_ind]))
                current_data[pix_ind, :] = 1E+9 * curr_val

        parm_dict = self._read_parms(h5_raw)
        parm_dict.update({'translator': 'FORC_IV'})

        pos_desc = [Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols))]
        spec_desc = [Dimension('DC Bias', 'V', excitation_vec)]

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        write_simple_attrs(chan_grp, parm_dict)

        h5_main = write_main_dataset(chan_grp, current_data, 'Raw_Data',
                                     'Current', '1E-9 A',
                                     pos_desc, spec_desc)

        return
Пример #38
0
    def _read_data(self, file_list, h5_channels):
        """
        Iterates over the images in `file_list`, reading each image and downsampling if
        reqeusted, and writes the flattened image to file.  Also builds the Mean_Ronchigram
        and the Spectroscopic_Mean datasets at the same time.

        Parameters
        ----------
        file_list : list of str
            List of all files in `image_path` that will be read
        h5_main : h5py.Dataset
            Dataset which will hold the Ronchigrams
        h5_mean_spec : h5py.Dataset
            Dataset which will hold the Spectroscopic Mean
        h5_ronch : h5py.Dataset
            Dataset which will hold the Mean Ronchigram
        image_path : str
            Absolute file path to the directory which hold the images

        Returns
        -------
        None
        """
        h5_main_list = list()
        '''
        For each file, we must read the data then create the neccessary datasets, add them to the channel, and
        write it all to file
        '''

        '''
        Get zipfile handles for all the ndata1 files that were found in the image_path
        '''

        for ifile, (this_file, this_channel) in enumerate(zip(file_list, h5_channels)):
            _, ext = os.path.splitext(this_file)
            if ext in ['.ndata1', '.ndata']:
                '''
                Extract the data file from the zip archive and read it into an array
                '''
                this_zip = zipfile.ZipFile(this_file, 'r')
                tmp_path = this_zip.extract('data.npy')
                this_data = np.load(tmp_path)
                os.remove(tmp_path)
            elif ext == '.npy':
                # Read data directly from npy file
                this_data = np.load(this_file)

            '''
            Find the shape of the data, then calculate the final dimensions based on the crop and
            downsampling parameters
            '''
            while this_data.ndim < 4:
                this_data = np.expand_dims(this_data, 0)

            this_data = self.crop_ronc(this_data)
            scan_size_x, scan_size_y, usize, vsize = this_data.shape

            usize = int(round(1.0 * usize / self.bin_factor[-2]))
            vsize = int(round(1.0 * vsize / self.bin_factor[-1]))

            num_images = scan_size_x * scan_size_y
            num_pixels = usize * vsize

            '''
            Write these attributes to the Measurement group
            '''
            new_attrs = {'image_size_u': usize,
                         'image_size_v': vsize,
                         'scan_size_x': scan_size_x,
                         'scan_size_y': scan_size_y}

            write_simple_attrs(this_channel.parent, new_attrs)


            # Get the Position and Spectroscopic Datasets
            spec_desc = [Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize))]
            pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)),
                        Dimension('Y', 'pixel', np.arange(scan_size_y))]

            ds_chunking = calc_chunks([num_images, num_pixels],
                                      np.float32(0).itemsize,
                                      unit_chunks=(1, num_pixels))

            # Allocate space for Main_Data and Pixel averaged DataX
            h5_main = write_main_dataset(this_channel, (num_images, num_pixels), 'Raw_Data',
                                         'Intensity', 'a.u.',
                                         pos_desc, spec_desc,
                                         chunks=ds_chunking, dtype=np.float32)

            h5_ronch = this_channel.create_dataset('Mean_Ronchigram',
                                                   data=np.zeros(num_pixels, dtype=np.float32))

            h5_mean_spec = this_channel.create_dataset('Mean_Spectrogram',
                                                       data=np.zeros(num_images, dtype=np.float32))

            this_data = self.binning_func(this_data, self.bin_factor, self.bin_func).reshape(h5_main.shape)

            h5_main[:, :] = this_data

            h5_mean_spec[:] = np.mean(this_data, axis=1)

            h5_ronch[:] = np.mean(this_data, axis=0)

            self.h5_f.flush()

            h5_main_list.append(h5_main)

        self.h5_f.flush()
Пример #39
0
    def _setupH5(self, usize, vsize, data_type, num_images, main_parms):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        num_images : int
            Number of images in the movie
        main_parms : dict


        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize

        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        main_parms['num_images'] = num_images
        main_parms['image_size_u'] = usize
        main_parms['image_size_v'] = vsize
        main_parms['num_pixels'] = num_pixels
        main_parms['translator'] = 'Movie'

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_file, root_parms)
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_dim = Dimension('Time', 's', np.arange(num_images))
        pos_dims = [Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize))]

        ds_chunking = calc_chunks([num_pixels, num_images],
                                  data_type(0).itemsize,
                                  unit_chunks=(num_pixels, 1))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data',
                                     'Intensity', 'a.u.',
                                     pos_dims, spec_dim,
                                     chunks=ds_chunking, dtype=data_type)
        h5_ronch = meas_grp.create_dataset('Mean_Ronchigram',
                                           data=np.zeros(num_pixels, dtype=np.float32),
                                           dtype=np.float32)
        h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean',
                                               data=np.zeros(num_images, dtype=np.float32),
                                               dtype=np.float32)

        self.h5_file.flush()

        return h5_main, h5_mean_spec, h5_ronch
Пример #40
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)

    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)

    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2))

    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print('Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'),
                                    None, None,
                                    h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks, compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Пример #41
0
    def translate(self, data_channels=None, verbose=False):
        """
        Translate the data into a Pycroscopy compatible HDF5 file.

        Parameters
        ----------
        data_channels : (optional) list of str
            Names of channels that will be read and stored in the file.
            If not given, all channels in the file will be used.
        verbose : (optional) Boolean
            Whether or not to print statements

        Returns
        -------
        h5_path : str
            Filepath to the output HDF5 file.

        """
        if self.parm_dict is None or self.data_dict is None:
            self._read_data(self.data_path)

        if data_channels is None:
            print('No channels specified. All channels in file will be used.')
            data_channels = self.parm_dict['channel_parms'].keys()

        if verbose:
            print('Using the following channels')
            for channel in data_channels:
                print(channel)

        if os.path.exists(self.h5_path):
            os.remove(self.h5_path)

        h5_file = h5py.File(self.h5_path, 'w')

        # Create measurement group and assign attributes
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(
            meas_grp, self.parm_dict['meas_parms']
        )

        # Create datasets for positional and spectroscopic indices and values
        spec_dim = self.data_dict['Spectroscopic Dimensions']
        pos_dims = self.data_dict['Position Dimensions']
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_dims,
                                                       is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_dim,
                                                         is_spectral=True)

        # Create the datasets for all the channels
        num_points = h5_pos_inds.shape[0]
        for data_channel in data_channels:
            raw_data = self.data_dict[data_channel].reshape([num_points, -1])

            chan_grp = create_indexed_group(meas_grp, 'Channel')
            data_label = data_channel
            data_unit = self.parm_dict['channel_parms'][data_channel]['Unit']
            write_simple_attrs(
                chan_grp, self.parm_dict['channel_parms'][data_channel]
            )
            write_main_dataset(chan_grp, raw_data, 'Raw_Data',
                               data_label, data_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds,
                               h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds,
                               h5_spec_vals=h5_spec_vals)
            h5_file.flush()

        h5_file.close()
        print('Nanonis translation complete.')

        return self.h5_path
Пример #42
0
    def translate(self, parm_path):
        """
        The main function that translates the provided file into a .h5 file

        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file.

        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)

        self._parse_file_path(parm_path)

        num_dat_files = len(self.file_list)

        f = open(self.file_list[0], 'rb')
        spectrogram_size, count_vals = self._parse_spectrogram_size(f)
        print("Excitation waveform shape: ", excit_wfm.shape)
        print("spectrogram size:", spectrogram_size)
        num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols']
        print('Number of pixels: ', num_pixels)
        print('Count Values: ', count_vals)
        if (num_pixels + 1) != count_vals:
            print("Data size does not match number of pixels expected. Cannot continue")

        #Find how many channels we have to make
        num_ai_chans = num_dat_files // 2  # Division by 2 due to real/imaginary

        # Now start creating datasets and populating:
        #Start with getting an h5 file
        h5_file = h5py.File(self.h5_path)

        #First create a measurement group
        h5_meas_group = create_indexed_group(h5_file, 'Measurement')

        #Set up some parameters that will be written as attributes to this Measurement group
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'trKPFM'
        global_parms['translator'] = 'trKPFM'
        write_simple_attrs(h5_meas_group, global_parms)
        write_simple_attrs(h5_meas_group, parm_dict)

        #Now start building the position and spectroscopic dimension containers
        #There's only one spectroscpoic dimension and two position dimensions

        #The excit_wfm only has the DC values without any information on cycles, time, etc.
        #What we really need is to add the time component. For every DC step there are some time steps.

        num_time_steps = (spectrogram_size-5) //excit_wfm.size

        #Let's repeat the excitation so that we get the full vector of same size as the spectrogram
        #TODO: Check if this is the norm for this type of dataset

        full_spect_val = np.copy(excit_wfm).repeat(num_time_steps)

        spec_dims = Dimension('Bias', 'V', full_spect_val)
        pos_dims = [Dimension('Cols', 'nm', parm_dict['grid_num_cols']),
                    Dimension('Rows', 'um', parm_dict['grid_num_rows'])]


        self.raw_datasets = list()

        for chan_index in range(num_ai_chans):
            chan_grp = create_indexed_group(h5_meas_group,'Channel')

            if chan_index == 0:
                write_simple_attrs(chan_grp,{'Harmonic': 1})
            else:
                write_simple_attrs(chan_grp,{'Harmonic': 2})

            h5_raw = write_main_dataset(chan_grp,  # parent HDF5 group
                                        (num_pixels, spectrogram_size - 5),
                                        # shape of Main dataset
                                        'Raw_Data',  # Name of main dataset
                                        'Deflection',  # Physical quantity contained in Main dataset
                                        'V',  # Units for the physical quantity
                                        pos_dims,  # Position dimensions
                                        spec_dims,  # Spectroscopic dimensions
                                        dtype=np.complex64,  # data type / precision
                                        compression='gzip',
                                        chunks=(1, spectrogram_size - 5),
                                        main_dset_attrs={'quantity': 'Complex'})

            #h5_refs = hdf.write(chan_grp, print_log=False)
            #h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
            #link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs))
            self.raw_datasets.append(h5_raw)
            self.raw_datasets.append(h5_raw)

        # Now that the N channels have been made, populate them with the actual data....
        self._read_data(parm_dict, parm_path, spectrogram_size)

        h5_file.file.close()

        #hdf.close()
        return self.h5_path
Пример #43
0
    #copy subset to new h5 file
    f = h5py.File('subsetFile{}.h5'.format(time.time()), 'a')
    subsetGroup = f.create_group("subsetBoi")
    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
        subsetGroup,
        Dimension("Bias", "V", int(h5_resh.h5_spec_inds.size)),
        is_spectral=True)
    h5_spec_vals[()] = h5_resh.h5_spec_vals[()]
    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(subsetGroup,
                                                   Dimension(
                                                       "Position", "m",
                                                       numPixels),
                                                   is_spectral=False)
    #h5_pos_vals[()] = h5_resh.h5_pos_vals[()][pixelInds, :]
    h5_subset = write_main_dataset(subsetGroup, (numPixels, h5_resh.shape[1]),
                                   "Measured Current",
                                   "Current",
                                   "nA",
                                   None,
                                   None,
                                   dtype=np.float64,
                                   h5_pos_inds=h5_pos_inds,
                                   h5_pos_vals=h5_pos_vals,
                                   h5_spec_inds=h5_spec_inds,
                                   h5_spec_vals=h5_spec_vals)
    print("check if main returns: {}".format(
        usid.hdf_utils.check_if_main(h5_subset)))

f.close()
Пример #44
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file
        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = self._parse_file_path(file_path)
        
        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename+'.h5')
        
        if path.exists(h5_path):
            remove(h5_path)
        
        # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'
        matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))

        # Need to take the complex conjugate if reading from a .mat file
        # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave'])))
        
        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])
        self.points_per_pixel = len(be_wave)
        
        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])
        
        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])
        
        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # method 2 for calculating the exact excitation frequency:
        """
        fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave)))
        w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel,
                            self.points_per_pixel)
        hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3))
        ex_freq_correct = w_vec[hot_bins[-1]]
        """

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'
            
        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))
        
        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size/self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_f = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        first_dat = True
        for key in data_paths.keys():
            # Now that the file has been created, go over each raw data file:
            # 1. write all ancillary data. Link data. 2. Write main data sequentially

            """ We only allocate the space for the main data here.
            This does NOT change with each file. The data written to it does.
            The auxiliary datasets will not change with each raw data file since
            only one excitation waveform is used"""
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            if first_dat:
                if len(data_paths) > 1:
                    # All positions and spectra are shared between channels
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)
                elif len(data_paths) == 1:
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True)

                first_dat = False
            else:
                pass

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            self._read_data(data_paths[key], h5_main)
            
        h5_f.close()
        print('G-Line translation complete!')

        return h5_path
Пример #45
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths,
         data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=[
                              'AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1',
                              'BE_wave_train', 'BE_wave', 'total_cols',
                              'total_rows'
                          ])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel *
                                           num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'

        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(
                self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V',
                              np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp,
                                                     pos_desc,
                                                     is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp,
                                                         spec_desc,
                                                         is_spectral=True)

        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(
                chan_grp, (self.num_rows, self.points_per_pixel * num_cols),
                'Raw_Data',
                'Deflection',
                'V',
                None,
                None,
                h5_pos_inds=h5_pos_ind,
                h5_pos_vals=h5_pos_val,
                h5_spec_inds=h5_spec_inds,
                h5_spec_vals=h5_spec_vals,
                chunks=(1, self.points_per_pixel),
                dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index],
                                                    h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path
Пример #46
0
    def translate(self, parm_path):
        """      
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)
        folder_path, base_name = path.split(parm_path)
        waste, base_name = path.split(folder_path)

        # Until a better method is provided....
        with h5py.File(path.join(folder_path, 'line_1.mat'), 'r') as h5_mat_line_1:
            num_ai_chans = h5_mat_line_1['data'].shape[1]
        
        h5_path = path.join(folder_path, base_name+'.h5')
        if path.exists(h5_path):
            remove(h5_path)

        with h5py.File(h5_path) as h5_f:

            h5_meas_grp = create_indexed_group(h5_f, 'Measurement')
            global_parms = generate_dummy_main_parms()
            global_parms.update({'data_type': 'gIV', 'translator': 'gIV'})
            write_simple_attrs(h5_meas_grp, global_parms)

            # Only prepare the instructions for the dimensions here
            spec_dims = Dimension('Bias', 'V', excit_wfm)
            pos_dims = Dimension('Y', 'm', np.linspace(0, parm_dict['grid_scan_height_[m]'],
                                                       parm_dict['grid_num_rows']))

            self.raw_datasets = list()

            for chan_index in range(num_ai_chans):

                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_simple_attrs(h5_chan_grp, parm_dict)
                """
                Minimize file size to the extent possible.
                DAQs are rated at 16 bit so float16 should be most appropriate.
                For some reason, compression is effective only on time series data
                """
                h5_raw = write_main_dataset(h5_chan_grp, (parm_dict['grid_num_rows'], excit_wfm.size), 'Raw_Data',
                                            'Current',
                                            '1E-{} A'.format(parm_dict['IO_amplifier_gain']), pos_dims, spec_dims,
                                            dtype=np.float16, chunks=(1, excit_wfm.size), compression='gzip')

                self.raw_datasets.append(h5_raw)

            # Now that the N channels have been made, populate them with the actual data....
            self._read_data(parm_dict, folder_path)

        return h5_path
Пример #47
0
    def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        scan_size_x : int
            Number of images in the x dimension
        scan_size_y : int
            Number of images in the y dimension

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize
        num_files = scan_size_x * scan_size_y

        root_parms = dict()
        root_parms['data_type'] = 'ImageStackData'

        main_parms = {
            'num_images': num_files,
            'image_size_u': usize,
            'image_size_v': vsize,
            'num_pixels': num_pixels,
            'translator': 'ImageStack',
            'scan_size_x': scan_size_x,
            'scan_size_y': scan_size_y
        }

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_file, root_parms)
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_desc = [
            Dimension('U', 'pixel', np.arange(usize)),
            Dimension('V', 'pixel', np.arange(vsize))
        ]
        pos_desc = [
            Dimension('X', 'pixel', np.arange(scan_size_x)),
            Dimension('Y', 'pixel', np.arange(scan_size_y))
        ]

        ds_chunking = calc_chunks([num_files, num_pixels],
                                  data_type(0).itemsize,
                                  unit_chunks=(1, num_pixels))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_files, num_pixels),
                                     'Raw_Data',
                                     'Intensity',
                                     'a.u.',
                                     pos_desc,
                                     spec_desc,
                                     chunks=ds_chunking,
                                     dtype=data_type)
        h5_ronch = meas_grp.create_dataset('Stack_Mean',
                                           data=np.zeros(num_pixels,
                                                         dtype=np.float32),
                                           dtype=np.float32)
        h5_mean_spec = meas_grp.create_dataset('Image_Means',
                                               data=np.zeros(num_files,
                                                             dtype=np.float32),
                                               dtype=np.float32)

        self.h5_file.flush()

        return h5_main, h5_mean_spec, h5_ronch
Пример #48
0
    def _create_results_datasets(self):
        """
        Creates all the datasets necessary for holding all parameters + data.
        """

        self.h5_results_grp = create_results_group(self.h5_main, self.process_name)

        self.parms_dict.update({'last_pixel': 0, 'algorithm': 'pycroscopy_SignalFilter'})

        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        assert isinstance(self.h5_results_grp, h5py.Group)

        if isinstance(self.composite_filter, np.ndarray):
            h5_comp_filt = self.h5_results_grp.create_dataset('Composite_Filter',
                                                              data=np.float32(self.composite_filter))

            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Composite_Filter dataset'.format(self.mpi_rank))

        # First create the position datsets if the new indices are smaller...
        if self.num_effective_pix != self.h5_main.shape[0]:
            # TODO: Do this part correctly. See past solution:
            """
            # need to make new position datasets by taking every n'th index / value:
                new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :])
                pos_descriptor = []
                for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'],
                                             [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix)
                                              for dim_ind in range(h5_pos_inds.shape[1])]):
                    pos_descriptor.append(Dimension(name, units, np.arange(leng)))
                ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose)
                h5_pos_vals.data = np.atleast_2d(new_pos_vals)  # The data generated above varies linearly. Override.

            """
            h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets(self.h5_results_grp,
                                                                   Dimension('pixel', 'a.u.', self.num_effective_pix),
                                                                   is_spectral=False, verbose=self.verbose and self.mpi_rank==0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Created the new position ancillary dataset'.format(self.mpi_rank))

        else:
            h5_pos_inds_new = self.h5_main.h5_pos_inds
            h5_pos_vals_new = self.h5_main.h5_pos_vals

            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Reusing source datasets position datasets'.format(self.mpi_rank))

        if self.noise_threshold is not None:
            self.h5_noise_floors = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, 1), 'Noise_Floors',
                                                      'Noise', 'a.u.', None, Dimension('arb', '', [1]),
                                                      dtype=np.float32, aux_spec_prefix='Noise_Spec_',
                                                      h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new,
                                                      verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Noise_Floors dataset'.format(self.mpi_rank))

        if self.write_filtered:
            # Filtered data is identical to Main_Data in every way - just a duplicate
            self.h5_filtered = create_empty_dataset(self.h5_main, self.h5_main.dtype, 'Filtered_Data',
                                                    h5_group=self.h5_results_grp)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Filtered dataset'.format(self.mpi_rank))

        self.hot_inds = None

        if self.write_condensed:
            self.hot_inds = np.where(self.composite_filter > 0)[0]
            self.hot_inds = np.uint(self.hot_inds[int(0.5 * len(self.hot_inds)):])  # only need to keep half the data
            condensed_spec = Dimension('hot_frequencies', '', int(0.5 * len(self.hot_inds)))
            self.h5_condensed = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, len(self.hot_inds)),
                                                   'Condensed_Data', 'Complex', 'a. u.', None, condensed_spec,
                                                   h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new,
                                                   dtype=np.complex, verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Condensed dataset'.format(self.mpi_rank))

        if self.mpi_size > 1:
            self.mpi_comm.Barrier()
        self.h5_main.file.flush()
Пример #49
0
    def test_empty(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        main_data = (15, 14)
        main_data_name = 'Test_Main'
        quantity = 'Current'
        dset_units = 'nA'

        pos_sizes = [5, 3]
        pos_names = ['X', 'Y']
        pos_units = ['nm', 'um']

        pos_dims = []
        for length, name, units in zip(pos_sizes, pos_names, pos_units):
            pos_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))
        pos_data = np.vstack((np.tile(np.arange(5),
                                      3), np.repeat(np.arange(3), 5))).T

        spec_sizes = [7, 2]
        spec_names = ['Bias', 'Cycle']
        spec_units = ['V', '']
        spec_dims = []
        for length, name, units in zip(spec_sizes, spec_names, spec_units):
            spec_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))
        spec_data = np.vstack((np.tile(np.arange(7),
                                       2), np.repeat(np.arange(2), 7)))

        with h5py.File(file_path) as h5_f:
            usid_main = hdf_utils.write_main_dataset(h5_f,
                                                     main_data,
                                                     main_data_name,
                                                     quantity,
                                                     dset_units,
                                                     pos_dims,
                                                     spec_dims,
                                                     dtype=np.float16,
                                                     main_dset_attrs=None)
            self.assertIsInstance(usid_main, USIDataset)
            self.assertEqual(usid_main.name.split('/')[-1], main_data_name)
            self.assertEqual(usid_main.parent, h5_f)
            self.assertEqual(main_data, usid_main.shape)

            data_utils.validate_aux_dset_pair(self,
                                              h5_f,
                                              usid_main.h5_pos_inds,
                                              usid_main.h5_pos_vals,
                                              pos_names,
                                              pos_units,
                                              pos_data,
                                              h5_main=usid_main,
                                              is_spectral=False)

            data_utils.validate_aux_dset_pair(self,
                                              h5_f,
                                              usid_main.h5_spec_inds,
                                              usid_main.h5_spec_vals,
                                              spec_names,
                                              spec_units,
                                              spec_data,
                                              h5_main=usid_main,
                                              is_spectral=True)
        os.remove(file_path)
Пример #50
0
    def translate(self,
                  data_filepath,
                  out_filename,
                  verbose=False,
                  debug=False):
        '''
        The main function that translates the provided file into a .h5 file

        Parameters
        ----------------
        data_filepath : String / unicode
            Absolute path of the data file
        out_filename : String / unicode
            Name for the new generated hdf5 file. The new file will be
            saved in the same folder of the input file with
            file name "out_filename".
            NOTE: the .h5 extension is automatically added to "out_filename"
        debug : Boolean (Optional. default is false)
            Whether or not to print log statements

        Returns
        ----------------
        h5_path : String / unicode
            Absolute path of the generated .h5 file
        '''

        self.debug = debug

        # Open the datafile
        try:
            data_filepath = os.path.abspath(data_filepath)
            ARh5_file = h5py.File(data_filepath, 'r')
        except:
            print('Unable to open the file', data_filepath)
            raise

        # Get info from the origin file like Notes and Segments
        self.notes = ARh5_file.attrs['Note']
        self.segments = ARh5_file['ForceMap']['Segments']  #shape: (X, Y, 4)
        self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments'])
        self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0]
        self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1]
        self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels'])
        try:
            self.points_per_sec = np.float(
                self.note_value('ARDoIVPointsPerSec'))
        except NameError:
            self.points_per_sec = np.float(self.note_value('NumPtsPerSec'))
        if self.debug:
            print('Map size [X, Y]: ', self.map_size)
            print('Channels names: ', self.channels_name)

        # Only the extension 'Ext' segment can change size
        # so we get the shortest one and we trim all the others
        extension_idx = self.segments_name.index('Ext')
        short_ext = np.amin(np.array(self.segments[:, :, extension_idx]))
        longest_ext = np.amax(np.array(self.segments[:, :, extension_idx]))
        difference = longest_ext - short_ext  # this is a difference between integers
        tot_length = (np.amax(self.segments) - difference) + 1
        # +1 otherwise array(tot_length) will be of 1 position shorter
        points_trimmed = np.array(self.segments[:, :,
                                                extension_idx]) - short_ext
        if self.debug:
            print('Data were trimmed in the extension segment of {} points'.
                  format(difference))

        # Open the output hdf5 file
        folder_path = os.path.dirname(data_filepath)
        h5_path = os.path.join(folder_path, out_filename + '.h5')
        h5_file = h5py.File(h5_path, 'w')

        # Create the measurement group
        h5_meas_group = create_indexed_group(h5_file, 'Measurement')

        # Create all channels and main datasets
        # at this point the main dataset are just function of time
        x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')),
                            self.map_size['X'])
        y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')),
                            self.map_size['Y'])
        z_dim = np.arange(tot_length) / np.float(self.points_per_sec)
        pos_dims = [
            Dimension('Cols', 'm', x_dim),
            Dimension('Rows', 'm', y_dim)
        ]
        spec_dims = [Dimension('Time', 's', z_dim)]

        # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful
        # to parallelize these loops
        for index, channel in enumerate(self.channels_name):
            cur_chan = create_indexed_group(h5_meas_group, 'Channel')
            main_dset = np.empty(
                (self.map_size['X'], self.map_size['Y'], tot_length))
            for column in np.arange(self.map_size['X']):
                for row in np.arange(self.map_size['Y']):
                    AR_pos_string = str(column) + ':' + str(row)
                    seg_start = self.segments[column, row,
                                              extension_idx] - short_ext
                    main_dset[column,
                              row, :] = ARh5_file['ForceMap'][AR_pos_string][
                                  index, seg_start:]

            # Reshape with Fortran order to have the correct position indices
            main_dset = np.reshape(main_dset, (-1, tot_length), order='F')
            if index == 0:
                first_main_dset = cur_chan
                quant_unit = self.get_def_unit(channel)
                h5_raw = write_main_dataset(
                    cur_chan,  # parent HDF5 group
                    main_dset,  # 2D array of raw data
                    'Raw_' + channel,  # Name of main dset
                    channel,  # Physical quantity
                    self.get_def_unit(channel),  # Unit
                    pos_dims,  # position dimensions
                    spec_dims,  #spectroscopy dimensions
                )
            else:
                h5_raw = write_main_dataset(
                    cur_chan,  # parent HDF5 group
                    main_dset,  # 2D array of raw data
                    'Raw_' + channel,  # Name of main dset
                    channel,  # Physical quantity
                    self.get_def_unit(channel),  # Unit
                    pos_dims,  # position dimensions
                    spec_dims,  #spectroscopy dimensions
                    # Link Ancilliary dset to the first
                    h5_pos_inds=first_main_dset['Position_Indices'],
                    h5_pos_vals=first_main_dset['Position_Values'],
                    h5_spec_inds=first_main_dset['Spectroscopic_Indices'],
                    h5_spec_vals=first_main_dset['Spectroscopic_Values'],
                )

        # Make Channels with IMAGES.
        # Position indices/values are the same of all other channels
        # Spectroscopic indices/valus are they are just one single dimension
        img_spec_dims = [Dimension('arb', 'a.u.', [1])]
        for index, image in enumerate(ARh5_file['Image'].keys()):
            main_dset = np.reshape(np.array(ARh5_file['Image'][image]),
                                   (-1, 1),
                                   order='F')
            cur_chan = create_indexed_group(h5_meas_group, 'Channel')
            if index == 0:
                first_image_dset = cur_chan
                h5_raw = write_main_dataset(
                    cur_chan,  # parent HDF5 group
                    main_dset,  # 2D array of image (shape: P*Q x 1)
                    'Img_' + image,  # Name of main dset
                    image,  # Physical quantity
                    self.get_def_unit(image),  # Unit
                    pos_dims,  # position dimensions
                    img_spec_dims,  #spectroscopy dimensions
                    # Link Ancilliary dset to the first
                    h5_pos_inds=first_main_dset['Position_Indices'],
                    h5_pos_vals=first_main_dset['Position_Values'],
                )
            else:
                h5_raw = write_main_dataset(
                    cur_chan,  # parent HDF5 group
                    main_dset,  # 2D array of image (shape: P*Q x 1)
                    'Img_' + image,  # Name of main dset
                    image,  # Physical quantity
                    self.get_def_unit(image),  # Unit
                    pos_dims,  # position dimensions
                    img_spec_dims,  #spectroscopy dimensions
                    # Link Ancilliary dset to the first
                    h5_pos_inds=first_main_dset['Position_Indices'],
                    h5_pos_vals=first_main_dset['Position_Values'],
                    h5_spec_inds=first_image_dset['Spectroscopic_Indices'],
                    h5_spec_vals=first_image_dset['Spectroscopic_Values'],
                )

        # Create the new segments that will be stored as attribute
        new_segments = {}
        for seg, name in enumerate(self.segments_name):
            new_segments.update({name: self.segments[0, 0, seg] - short_ext})
        write_simple_attrs(
            h5_meas_group, {
                'Segments': new_segments,
                'Points_trimmed': points_trimmed,
                'Notes': self.notes
            })
        write_simple_attrs(
            h5_file, {
                'translator':
                'ARhdf5',
                'instrument':
                'Asylum Research ' + self.note_value('MicroscopeModel'),
                'AR sftware version':
                self.note_value('Version')
            })

        if self.debug:
            print(print_tree(h5_file))
            print('\n')
            for key, val in get_attributes(h5_meas_group).items():
                if key != 'Notes':
                    print('{} : {}'.format(key, val))
                else:
                    print('{} : {}'.format(
                        key, 'notes string too long to be written here.'))

        # Clean up
        ARh5_file.close()
        h5_file.close()
        self.translated = True
        return h5_path
Пример #51
0
    def _create_results_datasets(self):
        """
        Creates hdf5 datasets and datagroups to hold the resutls
        """
        # create all h5 datasets here:
        num_pos = self.h5_main.shape[0]

        if self.verbose and self.mpi_rank == 0:
            print('Now creating the datasets')

        self.h5_results_grp = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(self.h5_results_grp, {'algorithm_author': 'Kody J. Law', 'last_pixel': 0})
        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        if self.verbose and self.mpi_rank == 0:
            print('created group: {} with attributes:'.format(self.h5_results_grp.name))
            print(get_attributes(self.h5_results_grp))

        # One of those rare instances when the result is exactly the same as the source
        self.h5_i_corrected = create_empty_dataset(self.h5_main, np.float32, 'Corrected_Current', h5_group=self.h5_results_grp)

        if self.verbose and self.mpi_rank == 0:
            print('Created I Corrected')
            # print_tree(self.h5_results_grp)

        # For some reason, we cannot specify chunks or compression!
        # The resistance dataset requires the creation of a new spectroscopic dimension
        self.h5_resistance = write_main_dataset(self.h5_results_grp, (num_pos, self.num_x_steps), 'Resistance', 'Resistance',
                                                'GOhms', None, Dimension('Bias', 'V', self.num_x_steps),
                                                dtype=np.float32, # chunks=(1, self.num_x_steps), #compression='gzip',
                                                h5_pos_inds=self.h5_main.h5_pos_inds,
                                                h5_pos_vals=self.h5_main.h5_pos_vals)

        if self.verbose and self.mpi_rank == 0:
            print('Created Resistance')
            # print_tree(self.h5_results_grp)

        assert isinstance(self.h5_resistance, USIDataset)  # only here for PyCharm
        self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals

        # The variance is identical to the resistance dataset
        self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32, 'R_variance')

        if self.verbose and self.mpi_rank == 0:
            print('Created Variance')
            # print_tree(self.h5_results_grp)

        # The capacitance dataset requires new spectroscopic dimensions as well
        self.h5_cap = write_main_dataset(self.h5_results_grp, (num_pos, 1), 'Capacitance', 'Capacitance', 'pF', None,
                                         Dimension('Direction', '', [1]),  h5_pos_inds=self.h5_main.h5_pos_inds,
                                         h5_pos_vals=self.h5_main.h5_pos_vals, dtype=cap_dtype, #compression='gzip',
                                         aux_spec_prefix='Cap_Spec_')

        if self.verbose and self.mpi_rank == 0:
            print('Created Capacitance')
            # print_tree(self.h5_results_grp)
            print('Done creating all results datasets!')

        if self.mpi_size > 1:
            self.mpi_comm.Barrier()
        self.h5_main.file.flush()
Пример #52
0
    def translate(self, file_path, show_plots=True, save_plots=True, do_histogram=False):
        """
        Basic method that translates .dat data file(s) to a single .h5 file
        
        Inputs:
            file_path -- Absolute file path for one of the data files. 
            It is assumed that this file is of the OLD data format. 
            
        Outputs:
            Nothing
        """
        file_path = path.abspath(file_path)
        (folder_path, basename) = path.split(file_path)
        (basename, path_dict) = self._parse_file_path(file_path)

        h5_path = path.join(folder_path, basename + '.h5')
        if path.exists(h5_path):
            remove(h5_path)
        self.h5_file = h5py.File(h5_path, 'w')

        isBEPS = True
        parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms'])

        ignored_plt_grps = ['in-field']  # Here we assume that there is no in-field.
        # If in-field data is captured then the translator would have to be modified.

        # Technically, we could do away with this if statement, as isBEPS is always true for this translation
        if isBEPS:
            parm_dict['data_type'] = 'BEPSData'

            std_expt = parm_dict['VS_mode'] != 'load user defined VS Wave from file'

            if not std_expt:
                warn('This translator does not handle user defined voltage spectroscopy')
                return

            spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode'])

            # Check file sizes:
        if 'read_real' in path_dict.keys():
            real_size = path.getsize(path_dict['read_real'])
            imag_size = path.getsize(path_dict['read_imag'])
        else:
            real_size = path.getsize(path_dict['write_real'])
            imag_size = path.getsize(path_dict['write_imag'])

        if real_size != imag_size:
            raise ValueError("Real and imaginary file sizes DON'T match!. Ending")

        num_rows = int(parm_dict['grid_num_rows'])
        num_cols = int(parm_dict['grid_num_cols'])
        num_pix = num_rows * num_cols
        tot_bins = real_size / (num_pix * 4)  # Finding bins by simple division of entire datasize

        # Check for case where only a single pixel is missing.
        check_bins = real_size / ((num_pix - 1) * 4)

        if tot_bins % 1 and check_bins % 1:
            warn('Aborting! Some parameter appears to have changed in-between')
            return
        elif not tot_bins % 1:
            #             Everything's ok
            pass
        elif not check_bins % 1:
            tot_bins = check_bins
            warn('Warning:  A pixel seems to be missing from the data.  File will be padded with zeros.')

        tot_bins = int(tot_bins)
        (bin_inds, bin_freqs, bin_FFT, ex_wfm, dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms'])
        """
        Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less 
        than the actual number), we need to re-calculate it based on the available data. This is done below.
        """

        band_width = parm_dict['BE_band_width_[Hz]'] * (0.5 - parm_dict['BE_band_edge_trim'])
        st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width
        en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width
        bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32)

        # Forcing standardized datatypes:
        bin_inds = np.int32(bin_inds)
        bin_freqs = np.float32(bin_freqs)
        bin_FFT = np.complex64(bin_FFT)
        ex_wfm = np.float32(ex_wfm)

        self.FFT_BE_wave = bin_FFT

        (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict)

        # Remove the unused plot group columns before proceeding:
        (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs, UDVS_units, ignored_plt_grps)

        spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE)

        # Will assume that all excitation waveforms have same number of bins
        # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1
        num_actual_udvs_steps = UDVS_mat.shape[0] / 2
        bins_per_step = tot_bins / num_actual_udvs_steps

        # Some more checks
        if bins_per_step % 1:
            warn('Non integer number of bins per step!')
            return
        else:
            bins_per_step = int(bins_per_step)

        num_actual_udvs_steps = int(num_actual_udvs_steps)

        stind = 0
        for step_index in range(UDVS_mat.shape[0]):
            if UDVS_mat[step_index, 2] < 1E-3:  # invalid AC amplitude
                continue  # skip
            spec_inds[0, stind:stind + bins_per_step] = np.arange(bins_per_step, dtype=INDICES_DTYPE)  # Bin step
            spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones(bins_per_step,
                                                                             dtype=INDICES_DTYPE)  # UDVS step
            stind += bins_per_step
        del stind, step_index

        # Some very basic information that can help the processing / analysis crew
        parm_dict['num_bins'] = tot_bins
        parm_dict['num_pix'] = num_pix
        parm_dict['num_udvs_steps'] = num_actual_udvs_steps

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        global_parms['experiment_date'] = parm_dict['File_date_and_time']

        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict['data_type']  # self.__class__.__name__
        global_parms['translator'] = 'ODF'
        write_simple_attrs(self.h5_file, global_parms)

        # Create Measurement and Channel groups
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        chan_grp = create_indexed_group(meas_grp, 'Channel')
        chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1']

        # Create Auxilliary Datasets
        h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm)

        udvs_slices = dict()
        for col_ind, col_name in enumerate(UDVS_labs):
            udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1))
        h5_UDVS = chan_grp.create_dataset('UDVS',
                                          data=UDVS_mat,
                                          dtype=np.float32)
        write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units})

        h5_bin_steps = chan_grp.create_dataset('Bin_Steps',
                                               data=np.arange(bins_per_step, dtype=np.uint32),
                                               dtype=np.uint32)

        # Need to add the Bin Waveform type - infer from UDVS
        exec_bin_vec = self.signal_type * np.ones(len(bin_inds), dtype=np.int32)
        h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type',
                                             data=exec_bin_vec,
                                             dtype=np.int32)

        h5_bin_inds = chan_grp.create_dataset('Bin_Indices',
                                              data=bin_inds,
                                              dtype=np.uint32)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies',
                                              data=bin_freqs,
                                              dtype=np.float32)
        h5_bin_FFT = chan_grp.create_dataset('Bin_FFT',
                                             data=bin_FFT,
                                             dtype=np.complex64)
        # Noise floor should be of shape: (udvs_steps x 3 x positions)
        h5_noise_floor = chan_grp.create_dataset('Noise_Floor',
                                                 shape=(num_pix, num_actual_udvs_steps),
                                                 dtype=nf32,
                                                 chunks=(1, num_actual_udvs_steps))

        """ 
        ONLY ALLOCATING SPACE FOR MAIN DATA HERE!
        Chunk by each UDVS step - this makes it easy / quick to:
            1. read data for a single UDVS step from all pixels
            2. read an entire / multiple pixels at a time
        The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes.
        This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes
        meaning that the metadata would be very substantial.
        This assumption is fine since we almost do not handle any user defined cases
        """

        """
        New Method for chunking the Main_Data dataset.  Chunking is now done in N-by-N squares of UDVS steps by pixels.
        N is determined dinamically based on the dimensions of the dataset.  Currently it is set such that individual
        chunks are less than 10kB in size.
        
        Chris Smith -- [email protected]
        """
        pos_dims = [Dimension('X', 'nm', num_cols), Dimension('Y', 'nm', num_rows)]

        # Create Spectroscopic Values and Spectroscopic Values Labels datasets
        spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals(UDVS_mat, spec_inds,
                                                                                                bin_freqs,
                                                                                                exec_bin_vec,
                                                                                                parm_dict, UDVS_labs,
                                                                                                UDVS_units)

        spec_dims = list()
        for row_ind, row_name in enumerate(spec_vals_labs):
            spec_dims.append(Dimension(row_name,
                                            spec_vals_units[row_ind],
                                            spec_vals[row_ind]))

        pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps,
                                       bins_per_step, np.dtype('complex64').itemsize)
        chunking = np.floor(np.sqrt(pixel_chunking))
        chunking = max(1, chunking)
        chunking = min(num_actual_udvs_steps, num_pix, chunking)
        self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins), 'Raw_Data',
                                          'Piezoresponse', 'V',
                                          pos_dims, spec_dims,
                                          dtype=np.complex64,
                                          chunks=(chunking, chunking * bins_per_step),
                                          compression='gzip')

        self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]), dtype=np.complex64)
        self.max_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32)
        self.min_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32)

        # Now read the raw data files:
        self._read_data(path_dict['read_real'], path_dict['read_imag'], parm_dict)
        self.h5_file.flush()

        generatePlotGroups(self.ds_main, self.mean_resp, folder_path, basename, self.max_resp,
                           self.min_resp, max_mem_mb=self.max_ram, spec_label=spec_label, show_plots=show_plots,
                           save_plots=save_plots, do_histogram=do_histogram)

        self.h5_file.close()

        return h5_path
Пример #53
0
    def translate(self, parm_path):
        """
        Basic method that translates .mat data files to a single .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        self.parm_path = path.abspath(parm_path)
        (folder_path, file_name) = path.split(parm_path)
        (file_name, base_name) = path.split(folder_path)
        h5_path = path.join(folder_path, base_name + '.h5')

        # Read parameters
        parm_dict = readGmodeParms(parm_path)

        # Add the w^2 specific parameters to this list
        parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True)
        #freq_sweep_parms = parm_data['freqSweepParms']
        #parm_dict['freq_sweep_delay'] = np.float(freq_sweep_parms['delay'].item())
        gen_sig = parm_data['genSig']
        #parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item())
        #freq_array = np.float32(parm_data['freqArray'])

        # prepare and write spectroscopic values
        samp_rate = parm_dict['IO_down_samp_rate_[Hz]']
        num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate)

        w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins))

        # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid
        spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE)
        spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array))
        spec_val_mat[:, 1] = np.repeat(freq_array, num_bins)

        spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32)
        spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array))
        spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins)

        num_rows = parm_dict['grid_num_rows']
        num_cols = parm_dict['grid_num_cols']
        parm_dict['data_type'] = 'GVS'

        num_pix = num_rows * num_cols

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict['data_type']  # self.__class__.__name__
        global_parms['translator'] = 'GVS'

        # Now start creating datasets and populating:
        if path.exists(h5_path):
            remove(h5_path)

        h5_f = h5py.File(h5_path, 'w')
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        chan_grp = create_indexed_group(meas_grp, 'Channel')
        write_simple_attrs(chan_grp, parm_dict)


        pos_dims = [Dimension('X', 'nm', num_rows),
                    Dimension('Y', 'nm', num_cols)]
        spec_dims = [Dimension('Response Bin', 'a.u.', num_bins),
                     Dimension('Excitation Frequency ', 'Hz', len(freq_array))]

        # Minimize file size to the extent possible.
        # DAQs are rated at 16 bit so float16 should be most appropriate.
        # For some reason, compression is more effective on time series data

        h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data',
                                     'Deflection', 'V',
                                     pos_dims, spec_dims,
                                     chunks=(1, num_bins), dtype=np.float32)

        h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec)

        # Now doing link_h5_objects_as_attrs:
        link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq])

        # Now read the raw data files:
        pos_ind = 0
        for row_ind in range(1, num_rows + 1):
            for col_ind in range(1, num_cols + 1):
                file_path = path.join(folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat')
                print('Working on row {} col {}'.format(row_ind, col_ind))
                if path.exists(file_path):
                    # Load data file
                    pix_data = loadmat(file_path, squeeze_me=True)
                    pix_mat = pix_data['AI_mat']
                    # Take the inverse FFT on 2nd dimension
                    pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1)
                    # Verified with Matlab - no conjugate required here.
                    pix_vec = pix_mat.transpose().reshape(pix_mat.size)
                    h5_main[pos_ind, :] = np.float32(pix_vec)
                    h5_f.flush()  # flush from memory!
                else:
                    print('File not found for: row {} col {}'.format(row_ind, col_ind))
                pos_ind += 1
                if (100.0 * pos_ind / num_pix) % 10 == 0:
                    print('completed translating {} %'.format(int(100 * pos_ind / num_pix)))

        h5_f.close()

        return h5_path
Пример #54
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train',
                                          'BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'


        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)


        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path
Пример #55
0
    def translate(self,
                  file_path,
                  show_plots=True,
                  save_plots=True,
                  do_histogram=False):
        """
        Basic method that translates .dat data file(s) to a single .h5 file
        
        Inputs:
            file_path -- Absolute file path for one of the data files. 
            It is assumed that this file is of the OLD data format. 
            
        Outputs:
            Nothing
        """
        file_path = path.abspath(file_path)
        (folder_path, basename) = path.split(file_path)
        (basename, path_dict) = self._parse_file_path(file_path)

        h5_path = path.join(folder_path, basename + '.h5')
        if path.exists(h5_path):
            remove(h5_path)
        self.h5_file = h5py.File(h5_path, 'w')

        isBEPS = True
        parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms'])

        ignored_plt_grps = ['in-field'
                            ]  # Here we assume that there is no in-field.
        # If in-field data is captured then the translator would have to be modified.

        # Technically, we could do away with this if statement, as isBEPS is always true for this translation
        if isBEPS:
            parm_dict['data_type'] = 'BEPSData'

            std_expt = parm_dict[
                'VS_mode'] != 'load user defined VS Wave from file'

            if not std_expt:
                warn(
                    'This translator does not handle user defined voltage spectroscopy'
                )
                return

            spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode'])

            # Check file sizes:
        if 'read_real' in path_dict.keys():
            real_size = path.getsize(path_dict['read_real'])
            imag_size = path.getsize(path_dict['read_imag'])
        else:
            real_size = path.getsize(path_dict['write_real'])
            imag_size = path.getsize(path_dict['write_imag'])

        if real_size != imag_size:
            raise ValueError(
                "Real and imaginary file sizes DON'T match!. Ending")

        num_rows = int(parm_dict['grid_num_rows'])
        num_cols = int(parm_dict['grid_num_cols'])
        num_pix = num_rows * num_cols
        tot_bins = real_size / (
            num_pix * 4)  # Finding bins by simple division of entire datasize

        # Check for case where only a single pixel is missing.
        check_bins = real_size / ((num_pix - 1) * 4)

        if tot_bins % 1 and check_bins % 1:
            warn('Aborting! Some parameter appears to have changed in-between')
            return
        elif not tot_bins % 1:
            #             Everything's ok
            pass
        elif not check_bins % 1:
            tot_bins = check_bins
            warn(
                'Warning:  A pixel seems to be missing from the data.  File will be padded with zeros.'
            )

        tot_bins = int(tot_bins)
        (bin_inds, bin_freqs, bin_FFT, ex_wfm,
         dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms'])
        """
        Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less 
        than the actual number), we need to re-calculate it based on the available data. This is done below.
        """

        band_width = parm_dict['BE_band_width_[Hz]'] * (
            0.5 - parm_dict['BE_band_edge_trim'])
        st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width
        en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width
        bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32)

        # Forcing standardized datatypes:
        bin_inds = np.int32(bin_inds)
        bin_freqs = np.float32(bin_freqs)
        bin_FFT = np.complex64(bin_FFT)
        ex_wfm = np.float32(ex_wfm)

        self.FFT_BE_wave = bin_FFT

        (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict)

        # Remove the unused plot group columns before proceeding:
        (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs,
                                                     UDVS_units,
                                                     ignored_plt_grps)

        spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE)

        # Will assume that all excitation waveforms have same number of bins
        # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1
        num_actual_udvs_steps = UDVS_mat.shape[0] / 2
        bins_per_step = tot_bins / num_actual_udvs_steps

        # Some more checks
        if bins_per_step % 1:
            warn('Non integer number of bins per step!')
            return
        else:
            bins_per_step = int(bins_per_step)

        num_actual_udvs_steps = int(num_actual_udvs_steps)

        stind = 0
        for step_index in range(UDVS_mat.shape[0]):
            if UDVS_mat[step_index, 2] < 1E-3:  # invalid AC amplitude
                continue  # skip
            spec_inds[0, stind:stind + bins_per_step] = np.arange(
                bins_per_step, dtype=INDICES_DTYPE)  # Bin step
            spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones(
                bins_per_step, dtype=INDICES_DTYPE)  # UDVS step
            stind += bins_per_step
        del stind, step_index

        # Some very basic information that can help the processing / analysis crew
        parm_dict['num_bins'] = tot_bins
        parm_dict['num_pix'] = num_pix
        parm_dict['num_udvs_steps'] = num_actual_udvs_steps

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        global_parms['experiment_date'] = parm_dict['File_date_and_time']

        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict[
            'data_type']  # self.__class__.__name__
        global_parms['translator'] = 'ODF'
        write_simple_attrs(self.h5_file, global_parms)

        # Create Measurement and Channel groups
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        chan_grp = create_indexed_group(meas_grp, 'Channel')
        chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1']

        # Create Auxilliary Datasets
        h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm)

        udvs_slices = dict()
        for col_ind, col_name in enumerate(UDVS_labs):
            udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1))
        h5_UDVS = chan_grp.create_dataset('UDVS',
                                          data=UDVS_mat,
                                          dtype=np.float32)
        write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units})

        h5_bin_steps = chan_grp.create_dataset('Bin_Steps',
                                               data=np.arange(bins_per_step,
                                                              dtype=np.uint32),
                                               dtype=np.uint32)

        # Need to add the Bin Waveform type - infer from UDVS
        exec_bin_vec = self.signal_type * np.ones(len(bin_inds),
                                                  dtype=np.int32)
        h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type',
                                             data=exec_bin_vec,
                                             dtype=np.int32)

        h5_bin_inds = chan_grp.create_dataset('Bin_Indices',
                                              data=bin_inds,
                                              dtype=np.uint32)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies',
                                              data=bin_freqs,
                                              dtype=np.float32)
        h5_bin_FFT = chan_grp.create_dataset('Bin_FFT',
                                             data=bin_FFT,
                                             dtype=np.complex64)
        # Noise floor should be of shape: (udvs_steps x 3 x positions)
        h5_noise_floor = chan_grp.create_dataset(
            'Noise_Floor',
            shape=(num_pix, num_actual_udvs_steps),
            dtype=nf32,
            chunks=(1, num_actual_udvs_steps))
        """ 
        ONLY ALLOCATING SPACE FOR MAIN DATA HERE!
        Chunk by each UDVS step - this makes it easy / quick to:
            1. read data for a single UDVS step from all pixels
            2. read an entire / multiple pixels at a time
        The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes.
        This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes
        meaning that the metadata would be very substantial.
        This assumption is fine since we almost do not handle any user defined cases
        """
        """
        New Method for chunking the Main_Data dataset.  Chunking is now done in N-by-N squares of UDVS steps by pixels.
        N is determined dinamically based on the dimensions of the dataset.  Currently it is set such that individual
        chunks are less than 10kB in size.
        
        Chris Smith -- [email protected]
        """
        pos_dims = [
            Dimension('X', 'nm', num_cols),
            Dimension('Y', 'nm', num_rows)
        ]

        # Create Spectroscopic Values and Spectroscopic Values Labels datasets
        spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals(
            UDVS_mat, spec_inds, bin_freqs, exec_bin_vec, parm_dict, UDVS_labs,
            UDVS_units)

        spec_dims = list()
        for row_ind, row_name in enumerate(spec_vals_labs):
            spec_dims.append(
                Dimension(row_name, spec_vals_units[row_ind],
                          spec_vals[row_ind]))

        pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps,
                                       bins_per_step,
                                       np.dtype('complex64').itemsize)
        chunking = np.floor(np.sqrt(pixel_chunking))
        chunking = max(1, chunking)
        chunking = min(num_actual_udvs_steps, num_pix, chunking)
        self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins),
                                          'Raw_Data',
                                          'Piezoresponse',
                                          'V',
                                          pos_dims,
                                          spec_dims,
                                          dtype=np.complex64,
                                          chunks=(chunking,
                                                  chunking * bins_per_step),
                                          compression='gzip')

        self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]),
                                  dtype=np.complex64)
        self.max_resp = np.zeros(shape=(self.ds_main.shape[0]),
                                 dtype=np.float32)
        self.min_resp = np.zeros(shape=(self.ds_main.shape[0]),
                                 dtype=np.float32)

        # Now read the raw data files:
        self._read_data(path_dict['read_real'], path_dict['read_imag'],
                        parm_dict)
        self.h5_file.flush()

        generatePlotGroups(self.ds_main,
                           self.mean_resp,
                           folder_path,
                           basename,
                           self.max_resp,
                           self.min_resp,
                           max_mem_mb=self.max_ram,
                           spec_label=spec_label,
                           show_plots=show_plots,
                           save_plots=save_plots,
                           do_histogram=do_histogram)

        self.h5_file.close()

        return h5_path
Пример #56
0
    def translate(self, file_path, verbose=False, append_path='', 
                  grp_name='Measurement', parm_encoding='utf-8'):
        """
        Translates the provided file to .h5

        Parameters
        ----------
        file_path : String / unicode
            Absolute path of the .ibw file
        verbose : Boolean (Optional)
            Whether or not to show  print statements for debugging
        append_path : string (Optional)
            h5_file to add these data to, must be a path to the h5_file on disk
        grp_name : string (Optional)
            Change from default "Measurement" name to something specific
        parm_encoding : str, optional
            Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the .h5 file
        """
        file_path = path.abspath(file_path)
        # Prepare the .h5 file:
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        
        if not append_path:
            h5_path = path.join(folder_path, base_name + '.h5')
            if path.exists(h5_path):
                remove(h5_path)
            h5_file = h5py.File(h5_path, 'w')
        else:
            h5_path = append_path
            if not path.exists(append_path):
                raise Exception('File does not exist. Check pathname.')
            h5_file = h5py.File(h5_path, 'r+')
        

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)

        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[-1] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for layer 0 null set errors in older AR software

        if images.ndim == 3:  # Image stack
            if verbose:
                print('Found image stack of size {}'.format(images.shape))
            type_suffix = 'Image'

            num_rows = parm_dict['ScanLines']
            num_cols = parm_dict['ScanPoints']

            images = images.transpose(2, 1, 0)  # now ordered as [chan, Y, X] image
            images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

            pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))]

            spec_desc = Dimension('arb', 'a.u.', [1])

        else:  # single force curve
            if verbose:
                print('Found force curve of size {}'.format(images.shape))

            type_suffix = 'ForceCurve'
            images = np.atleast_3d(images)  # now [Z, chan, 1]
            images = images.transpose((1, 2, 0))  # [chan ,1, Z] force curve

            # The data generated above varies linearly. Override.
            # For now, we'll shove the Z sensor data into the spectroscopic values.

            # Find the channel that corresponds to either Z sensor or Raw:
            try:
                chan_ind = chan_labels.index('ZSnsr')
                spec_data = VALUES_DTYPE(images[chan_ind]).squeeze()
            except ValueError:
                try:
                    chan_ind = chan_labels.index('Raw')
                    spec_data = VALUES_DTYPE(images[chan_ind]).squeeze()
                except ValueError:
                    # We don't expect to come here. If we do, spectroscopic values remains as is
                    spec_data = np.arange(images.shape[2])

            pos_desc = Dimension('X', 'm', [1])
            spec_desc = Dimension('Z', 'm', spec_data)

        # Create measurement group
        meas_grp = create_indexed_group(h5_file, grp_name)

        # Write file and measurement level parameters
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'IgorIBW_' + type_suffix
        global_parms['translator'] = 'IgorIBW'
        write_simple_attrs(h5_file, global_parms)

        write_simple_attrs(meas_grp, parm_dict)

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            if verbose:
                print('channel', chan_name)
                print('unit', chan_unit)
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                               dtype=np.float32)

        if verbose:
            print('Finished preparing raw datasets')

        h5_file.close()
        return h5_path