Пример #1
0
    def _translate_image_stack(self, h5_meas_grp):
        """
        Reads the scan images from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp,
                                                         Dimension(
                                                             'single', 'a. u.',
                                                             1),
                                                         is_spectral=True)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                break

        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [
            Dimension('X', 'nm', layer_info['Samps/line']),
            Dimension('Y', 'nm', layer_info['Number of lines'])
        ],
                                                       is_spectral=False)

        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_2')
                data = self._read_image_layer(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(
                    h5_chan_grp,
                    np.reshape(data, (-1, 1)),
                    'Raw_Data',
                    # Quantity and Units needs to be fixed by someone who understands these files better
                    quantity,
                    'a. u.',
                    None,
                    None,
                    dtype=np.float32,
                    compression='gzip',
                    h5_pos_inds=h5_pos_inds,
                    h5_pos_vals=h5_pos_vals,
                    h5_spec_inds=h5_spec_inds,
                    h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes for rows and columns
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #2
0
    def _translate_force_curve(self, h5_meas_grp):
        """
        Reads the force curves from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp,
                                                       Dimension(
                                                           'single', 'a. u.',
                                                           1),
                                                       is_spectral=False)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                break
        tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')]

        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
            h5_meas_grp,
            Dimension('Z', 'nm', int(np.sum(tr_rt))),
            is_spectral=True)

        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_4')
                data = self._read_data_vector(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(
                    h5_chan_grp,
                    np.expand_dims(data, axis=0),
                    'Raw_Data',
                    # Quantity and Units needs to be fixed by someone who understands these files better
                    quantity,
                    'a. u.',
                    None,
                    None,
                    dtype=np.float32,
                    compression='gzip',
                    h5_pos_inds=h5_pos_inds,
                    h5_pos_vals=h5_pos_vals,
                    h5_spec_inds=h5_spec_inds,
                    h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #3
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the clustering results
        """
        print('Writing clustering results to file.')
        num_clusters = self.__mean_resp.shape[0]

        h5_cluster_group = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(h5_cluster_group, self.parms_dict)
        h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0]

        h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels',
                                       'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1),
                                       h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                       aux_spec_prefix='Cluster_', dtype=np.uint32)

        if self.num_comps != self.h5_main.shape[1]:
            '''
            Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components
            Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data
            for now until a better method is figured out
            '''
            """
            if isinstance(self.data_slice[1], np.ndarray):
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()]

            else:
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]]

            ds_centroid_values.data[0, :] = centroid_vals_mat
            """
            if isinstance(self.data_slice[1], np.ndarray):
                vals_slice = self.data_slice[1].tolist()
            else:
                vals_slice = self.data_slice[1]
            vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze()
            new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals)
            h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True)

        else:
            h5_inds = self.h5_main.h5_spec_inds
            h5_vals = self.h5_main.h5_spec_vals

        # For now, link centroids with default spectroscopic indices and values.
        h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response',
                                          get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0],
                                          Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None,
                                          h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_',
                                          h5_spec_vals=h5_vals)

        return h5_cluster_group
Пример #4
0
    def _translate_image_stack(self, h5_meas_grp):
        """
        Reads the scan images from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=True)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                break

        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [Dimension('X', 'nm', layer_info['Samps/line']),
                                                                     Dimension('Y', 'nm',
                                                                               layer_info['Number of lines'])],
                                                       is_spectral=False)

        for class_name in self.meta_data.keys():
            if 'Ciao image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_2')
                data = self._read_image_layer(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data',
                                   # Quantity and Units needs to be fixed by someone who understands these files better
                                   quantity, 'a. u.',
                                   None, None, dtype=np.float32, compression='gzip',
                                   h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                   h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes for rows and columns
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #5
0
    def _translate_force_curve(self, h5_meas_grp):
        """
        Reads the force curves from the proprietary file and writes them to HDF5 datasets

        Parameters
        ----------
        h5_meas_grp : h5py.Group object
            Reference to the measurement group
        """
        # since multiple channels will share the same position and spectroscopic dimensions, why not share them?
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=False)

        # Find out the size of the force curves from the metadata:
        layer_info = None
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                break
        tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')]

        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))),
                                                         is_spectral=True)

        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                layer_info = self.meta_data[class_name]
                quantity = layer_info.pop('Image Data_4')
                data = self._read_data_vector(layer_info)
                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_main_dataset(h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data',
                                   # Quantity and Units needs to be fixed by someone who understands these files better
                                   quantity, 'a. u.',
                                   None, None, dtype=np.float32, compression='gzip',
                                   h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                   h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals)
                # Think about standardizing attributes
                write_simple_attrs(h5_chan_grp, layer_info)
Пример #6
0
    def _create_results_datasets(self):
        """
        Creates all the datasets necessary for holding all parameters + data.
        """

        self.h5_results_grp = create_results_group(self.h5_main, self.process_name)

        self.parms_dict.update({'last_pixel': 0, 'algorithm': 'pycroscopy_SignalFilter'})

        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        assert isinstance(self.h5_results_grp, h5py.Group)

        if isinstance(self.composite_filter, np.ndarray):
            h5_comp_filt = self.h5_results_grp.create_dataset('Composite_Filter',
                                                              data=np.float32(self.composite_filter))

            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Composite_Filter dataset'.format(self.mpi_rank))

        # First create the position datsets if the new indices are smaller...
        if self.num_effective_pix != self.h5_main.shape[0]:
            # TODO: Do this part correctly. See past solution:
            """
            # need to make new position datasets by taking every n'th index / value:
                new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :])
                pos_descriptor = []
                for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'],
                                             [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix)
                                              for dim_ind in range(h5_pos_inds.shape[1])]):
                    pos_descriptor.append(Dimension(name, units, np.arange(leng)))
                ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose)
                h5_pos_vals.data = np.atleast_2d(new_pos_vals)  # The data generated above varies linearly. Override.

            """
            h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets(self.h5_results_grp,
                                                                   Dimension('pixel', 'a.u.', self.num_effective_pix),
                                                                   is_spectral=False, verbose=self.verbose and self.mpi_rank==0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Created the new position ancillary dataset'.format(self.mpi_rank))

        else:
            h5_pos_inds_new = self.h5_main.h5_pos_inds
            h5_pos_vals_new = self.h5_main.h5_pos_vals

            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Reusing source datasets position datasets'.format(self.mpi_rank))

        if self.noise_threshold is not None:
            self.h5_noise_floors = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, 1), 'Noise_Floors',
                                                      'Noise', 'a.u.', None, Dimension('arb', '', [1]),
                                                      dtype=np.float32, aux_spec_prefix='Noise_Spec_',
                                                      h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new,
                                                      verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Noise_Floors dataset'.format(self.mpi_rank))

        if self.write_filtered:
            # Filtered data is identical to Main_Data in every way - just a duplicate
            self.h5_filtered = create_empty_dataset(self.h5_main, self.h5_main.dtype, 'Filtered_Data',
                                                    h5_group=self.h5_results_grp)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Filtered dataset'.format(self.mpi_rank))

        self.hot_inds = None

        if self.write_condensed:
            self.hot_inds = np.where(self.composite_filter > 0)[0]
            self.hot_inds = np.uint(self.hot_inds[int(0.5 * len(self.hot_inds)):])  # only need to keep half the data
            condensed_spec = Dimension('hot_frequencies', '', int(0.5 * len(self.hot_inds)))
            self.h5_condensed = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, len(self.hot_inds)),
                                                   'Condensed_Data', 'Complex', 'a. u.', None, condensed_spec,
                                                   h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new,
                                                   dtype=np.complex, verbose=self.verbose and self.mpi_rank == 0)
            if self.verbose and self.mpi_rank == 0:
                print('Rank {} - Finished creating the Condensed dataset'.format(self.mpi_rank))

        if self.mpi_size > 1:
            self.mpi_comm.Barrier()
        self.h5_main.file.flush()
Пример #7
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file
        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = self._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=[
                              'BE_wave', 'FFT_BE_wave', 'total_cols',
                              'total_rows'
                          ])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))

        # Need to take the complex conjugate if reading from a .mat file
        # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave'])))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])
        self.points_per_pixel = len(be_wave)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel *
                                           num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # method 2 for calculating the exact excitation frequency:
        """
        fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave)))
        w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel,
                            self.points_per_pixel)
        hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3))
        ex_freq_correct = w_vec[hot_bins[-1]]
        """

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'

        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(
                self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_f = h5py.File(h5_path, 'w')
        global_parms = dict()
        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V',
                              np.tile(VALUES_DTYPE(be_wave), num_cols))

        first_dat = True
        for key in data_paths.keys():
            # Now that the file has been created, go over each raw data file:
            # 1. write all ancillary data. Link data. 2. Write main data sequentially
            """ We only allocate the space for the main data here.
            This does NOT change with each file. The data written to it does.
            The auxiliary datasets will not change with each raw data file since
            only one excitation waveform is used"""
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            if first_dat:
                if len(data_paths) > 1:
                    # All positions and spectra are shared between channels
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(
                        meas_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
                        meas_grp, spec_desc, is_spectral=True)
                elif len(data_paths) == 1:
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(
                        chan_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
                        chan_grp, spec_desc, is_spectral=True)

                first_dat = False
            else:
                pass

            h5_main = write_main_dataset(
                chan_grp, (self.num_rows, self.points_per_pixel * num_cols),
                'Raw_Data',
                'Deflection',
                'V',
                None,
                None,
                h5_pos_inds=h5_pos_inds,
                h5_pos_vals=h5_pos_vals,
                h5_spec_inds=h5_spec_inds,
                h5_spec_vals=h5_spec_vals,
                chunks=(1, self.points_per_pixel),
                dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            self._read_data(data_paths[key], h5_main)

        h5_f.close()
        print('G-Line translation complete!')

        return h5_path
Пример #8
0
    def translate(self, file_path, verbose=False, parm_encoding='utf-8', ftype='FF',
                  subfolder='Measurement_000', h5_path='', channel_label_name=True):
        """
        Translates the provided file to .h5
        Adapted heavily from pycroscopy IBW file, modified to work with Ginger format

        :param file_path: Absolute path of the .ibw file
        :type file_path: String / unicode
        
        :param verbose: Whether or not to show  print statements for debugging
        :type verbose: boolean, optional
        
        :param parm_encoding: Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'
        :type parm_encoding: str, optional
            
        :param ftype: Delineates Ginger Lab imaging file type to be imported (not case-sensitive)
            'FF' : FF-trEFM
            'SKPM' : FM-SKPM
            'ringdown' : Ringdown
            'trEFM' : normal trEFM
        :type ftype: str, optional
        
        :param subfolder: Specifies folder under root (/) to save data in. Default is standard pycroscopy format
        :type subfolder: str, optional
        
        :param h5_path: Existing H5 file to append to
        :type h5_path: str, optional
        
        :param channel_label_name: If True, uses the Channel as the subfolder name (e.g. Height, Phase, Amplitude, Charging)
        :type channel_label_name: bool, optional
        
        :returns: Absolute path of the .h5 file
        :rtype: String / unicode
            
        """

        # Prepare the .h5 file:
        if not any(h5_path):
            folder_path, base_name = path.split(file_path)
            base_name = base_name[:-4]
            h5_path = path.join(folder_path, base_name + '.h5')
            # hard-coded exception, rarely occurs but can be useful
            if path.exists(h5_path):
                h5_path = path.join(folder_path, base_name + '_00.h5')

        h5_file = h5py.File(h5_path, 'w')

        # If subfolder improperly formatted
        if subfolder == '':
            subfolder = '/'

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)
        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[2] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for weird null set errors in older AR software

        # Check if a Ginger Lab format ibw (has 'UserIn' in channel labels)
        _is_gl_type = any(['UserIn0' in str(s) for s in chan_labels])
        if _is_gl_type == True:
            chan_labels = self._get_image_type(chan_labels, ftype)

        if verbose:
            print('Processing image type', ftype, 'with channels', chan_labels)

        type_suffix = 'Image'

        num_rows = ibw_wave['wave_header']['nDim'][1]  # lines
        num_cols = ibw_wave['wave_header']['nDim'][0]  # points
        num_imgs = ibw_wave['wave_header']['nDim'][2]  # layers
        unit_scale = self._get_unit_factor(''.join([str(s)[-2] for s in ibw_wave['wave_header']['dimUnits'][0][0:2]]))
        data_scale = self._get_unit_factor(str(ibw_wave['wave_header']['dataUnits'][0])[-2])

        parm_dict['FastScanSize'] = unit_scale * num_cols * ibw_wave['wave_header']['sfA'][0]
        parm_dict['SlowScanSize'] = unit_scale * num_rows * ibw_wave['wave_header']['sfA'][1]

        images = images.transpose(2, 0, 1)  # now ordered as [chan, Y, X] image
        images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

        pos_desc = [Dimension(name='X', units='m', values=np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                    Dimension(name='Y', units='m', values=np.linspace(0, parm_dict['SlowScanSize'], num_rows))]
        spec_desc = [Dimension(name='arb', units='a.u.', values=[1])]

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_file['/'], pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_file['/'], spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            chan_grp = create_indexed_group(h5_file['/'], chan_name)
            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               pos_desc, spec_desc,
                               dtype=np.float32)

        if verbose:
            print('Finished writing all channels')

        h5_file.close()
        return h5_path
Пример #9
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths,
         data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=[
                              'AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1',
                              'BE_wave_train', 'BE_wave', 'total_cols',
                              'total_rows'
                          ])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel *
                                           num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'

        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(
                self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V',
                              np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp,
                                                     pos_desc,
                                                     is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp,
                                                         spec_desc,
                                                         is_spectral=True)

        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(
                chan_grp, (self.num_rows, self.points_per_pixel * num_cols),
                'Raw_Data',
                'Deflection',
                'V',
                None,
                None,
                h5_pos_inds=h5_pos_ind,
                h5_pos_vals=h5_pos_val,
                h5_spec_inds=h5_spec_inds,
                h5_spec_vals=h5_spec_vals,
                chunks=(1, self.points_per_pixel),
                dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index],
                                                    h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path
Пример #10
0
    def translate(self, data_channels=None, verbose=False):
        """
        Translate the data into a Pycroscopy compatible HDF5 file.

        Parameters
        ----------
        data_channels : (optional) list of str
            Names of channels that will be read and stored in the file.
            If not given, all channels in the file will be used.
        verbose : (optional) Boolean
            Whether or not to print statements

        Returns
        -------
        h5_path : str
            Filepath to the output HDF5 file.

        """
        if self.parm_dict is None or self.data_dict is None:
            self._read_data(self.data_path)

        if data_channels is None:
            print('No channels specified. All channels in file will be used.')
            data_channels = self.parm_dict['channel_parms'].keys()

        if verbose:
            print('Using the following channels')
            for channel in data_channels:
                print(channel)

        if os.path.exists(self.h5_path):
            os.remove(self.h5_path)

        h5_file = h5py.File(self.h5_path, 'w')

        # Create measurement group and assign attributes
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(
            meas_grp, self.parm_dict['meas_parms']
        )

        # Create datasets for positional and spectroscopic indices and values
        spec_dim = self.data_dict['Spectroscopic Dimensions']
        pos_dims = self.data_dict['Position Dimensions']
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_dims,
                                                       is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_dim,
                                                         is_spectral=True)

        # Create the datasets for all the channels
        num_points = h5_pos_inds.shape[0]
        for data_channel in data_channels:
            raw_data = self.data_dict[data_channel].reshape([num_points, -1])

            chan_grp = create_indexed_group(meas_grp, 'Channel')
            data_label = data_channel
            data_unit = self.parm_dict['channel_parms'][data_channel]['Unit']
            write_simple_attrs(
                chan_grp, self.parm_dict['channel_parms'][data_channel]
            )
            write_main_dataset(chan_grp, raw_data, 'Raw_Data',
                               data_label, data_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds,
                               h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds,
                               h5_spec_vals=h5_spec_vals)
            h5_file.flush()

        h5_file.close()
        print('Nanonis translation complete.')

        return self.h5_path
Пример #11
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the clustering results
        """
        print('Writing clustering results to file.')
        num_clusters = self.__mean_resp.shape[0]

        h5_cluster_group = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(h5_cluster_group, self.parms_dict)

        h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels',
                                       'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1),
                                       h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                       aux_spec_prefix='Cluster_', dtype=np.uint32)

        if self.num_comps != self.h5_main.shape[1]:
            '''
            Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components
            Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data
            for now until a better method is figured out
            '''
            """
            if isinstance(self.data_slice[1], np.ndarray):
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()]

            else:
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]]

            ds_centroid_values.data[0, :] = centroid_vals_mat
            """
            if isinstance(self.data_slice[1], np.ndarray):
                vals_slice = self.data_slice[1].tolist()
            else:
                vals_slice = self.data_slice[1]
            vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze()
            new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals)
            h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True)

        else:
            h5_inds = self.h5_main.h5_spec_inds
            h5_vals = self.h5_main.h5_spec_vals

        # For now, link centroids with default spectroscopic indices and values.
        h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response',
                                          get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0],
                                          Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None,
                                          h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_',
                                          h5_spec_vals=h5_vals)

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_cluster_group.create_dataset(self._status_dset_name,
                                                               data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return h5_cluster_group
Пример #12
0
    def translate(self, data_channels=None, verbose=False):
        """
        Translates the data in the Nanonis file into a Pycroscopy compatible HDF5 file.

        Parameters
        ----------
        data_channels : (optional) list of str
            Names of channels that will be read and stored in the file.
            If not given, all channels in the file will be used.
        verbose : (optional) Boolean
            Whether or not to print statements

        Returns
        -------
        h5_path : str
            Filepath to the output HDF5 file.

        """
        if self.parm_dict is None or self.data_dict is None:
            self._read_data(self.data_path)

        if data_channels is None:
            print('No channels specified.  All channels in file will be used.')
            data_channels = self.parm_dict['channels']

        if verbose:
            print('Using the following channels')
            for channel in data_channels:
                print(channel)

        if os.path.exists(self.h5_path):
            os.remove(self.h5_path)

        h5_file = h5py.File(self.h5_path, 'w')

        meas_grp = create_indexed_group(h5_file, 'Measurement')

        dc_offset = self.data_dict['sweep_signal']

        spec_label, spec_units = self.parm_dict['sweep_signal'].split()
        spec_units = spec_units.strip('()')
        spec_dim = Dimension(spec_label, spec_units, dc_offset)
        pos_dims = self.data_dict['Position Dimensions']

        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp,
                                                       pos_dims,
                                                       is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp,
                                                         spec_dim,
                                                         is_spectral=True)

        num_points = h5_pos_inds.shape[0]

        for data_channel in data_channels:
            raw_data = self.data_dict[data_channel].reshape(
                [num_points, -1]) * 1E9  # Convert to nA

            chan_grp = create_indexed_group(meas_grp, 'Channel')

            data_label, data_unit = data_channel.rsplit(maxsplit=1)
            data_unit = data_unit.strip('()')

            write_main_dataset(chan_grp,
                               raw_data,
                               'Raw_Data',
                               data_label,
                               data_unit,
                               None,
                               None,
                               h5_pos_inds=h5_pos_inds,
                               h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds,
                               h5_spec_vals=h5_spec_vals)

            h5_file.flush()

        h5_file.close()
        print('Nanonis translation complete.')

        return self.h5_path
Пример #13
0
    def translate(self, file_path, verbose=False, append_path='', 
                  grp_name='Measurement', parm_encoding='utf-8'):
        """
        Translates the provided file to .h5

        Parameters
        ----------
        file_path : String / unicode
            Absolute path of the .ibw file
        verbose : Boolean (Optional)
            Whether or not to show  print statements for debugging
        append_path : string (Optional)
            h5_file to add these data to, must be a path to the h5_file on disk
        grp_name : string (Optional)
            Change from default "Measurement" name to something specific
        parm_encoding : str, optional
            Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the .h5 file
        """
        file_path = path.abspath(file_path)
        # Prepare the .h5 file:
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        
        if not append_path:
            h5_path = path.join(folder_path, base_name + '.h5')
            if path.exists(h5_path):
                remove(h5_path)
            h5_file = h5py.File(h5_path, 'w')
        else:
            h5_path = append_path
            if not path.exists(append_path):
                raise Exception('File does not exist. Check pathname.')
            h5_file = h5py.File(h5_path, 'r+')
        

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)

        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[-1] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for layer 0 null set errors in older AR software

        if images.ndim == 3:  # Image stack
            if verbose:
                print('Found image stack of size {}'.format(images.shape))
            type_suffix = 'Image'

            num_rows = parm_dict['ScanLines']
            num_cols = parm_dict['ScanPoints']

            images = images.transpose(2, 1, 0)  # now ordered as [chan, Y, X] image
            images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

            pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))]

            spec_desc = Dimension('arb', 'a.u.', [1])

        else:  # single force curve
            if verbose:
                print('Found force curve of size {}'.format(images.shape))

            type_suffix = 'ForceCurve'
            images = np.atleast_3d(images)  # now [Z, chan, 1]
            images = images.transpose((1, 2, 0))  # [chan ,1, Z] force curve

            # The data generated above varies linearly. Override.
            # For now, we'll shove the Z sensor data into the spectroscopic values.

            # Find the channel that corresponds to either Z sensor or Raw:
            try:
                chan_ind = chan_labels.index('ZSnsr')
                spec_data = VALUES_DTYPE(images[chan_ind]).squeeze()
            except ValueError:
                try:
                    chan_ind = chan_labels.index('Raw')
                    spec_data = VALUES_DTYPE(images[chan_ind]).squeeze()
                except ValueError:
                    # We don't expect to come here. If we do, spectroscopic values remains as is
                    spec_data = np.arange(images.shape[2])

            pos_desc = Dimension('X', 'm', [1])
            spec_desc = Dimension('Z', 'm', spec_data)

        # Create measurement group
        meas_grp = create_indexed_group(h5_file, grp_name)

        # Write file and measurement level parameters
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'IgorIBW_' + type_suffix
        global_parms['translator'] = 'IgorIBW'
        write_simple_attrs(h5_file, global_parms)

        write_simple_attrs(meas_grp, parm_dict)

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            if verbose:
                print('channel', chan_name)
                print('unit', chan_unit)
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                               dtype=np.float32)

        if verbose:
            print('Finished preparing raw datasets')

        h5_file.close()
        return h5_path
Пример #14
0
with h5py.File(h5_path, mode='r+') as h5_f:
    h5_grp = h5_f['Measurement_000/Channel_000']
    f = usid.hdf_utils.get_attr(h5_grp, 'excitation_frequency_[Hz]')
    V0 = usid.hdf_utils.get_attr(h5_grp, 'excitation_amplitude_[V]')

    #original dataset
    h5_resh = usid.USIDataset(h5_grp[
        'Raw_Data-FFT_Filtering_000/Filtered_Data-Reshape_000/Reshaped_Data'])
    pixelInds = np.random.randint(0, h5_resh[()].shape[0], numPixels)
    print("PixelInds is {} with shape {}".format(pixelInds, pixelInds.shape))

    #copy subset to new h5 file
    f = h5py.File('subsetFile{}.h5'.format(time.time()), 'a')
    subsetGroup = f.create_group("subsetBoi")
    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(
        subsetGroup,
        Dimension("Bias", "V", int(h5_resh.h5_spec_inds.size)),
        is_spectral=True)
    h5_spec_vals[()] = h5_resh.h5_spec_vals[()]
    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(subsetGroup,
                                                   Dimension(
                                                       "Position", "m",
                                                       numPixels),
                                                   is_spectral=False)
    #h5_pos_vals[()] = h5_resh.h5_pos_vals[()][pixelInds, :]
    h5_subset = write_main_dataset(subsetGroup, (numPixels, h5_resh.shape[1]),
                                   "Measured Current",
                                   "Current",
                                   "nA",
                                   None,
                                   None,
                                   dtype=np.float64,
Пример #15
0
    def _create_results_datasets(self):
        """
		Creates all the datasets necessary for holding all parameters + data.
		"""

        self.h5_results_grp = create_results_group(self.h5_main,
                                                   self.process_name)

        self.params_dict.update({
            'last_pixel':
            0,
            'algorithm':
            'pycroscopy_AdaptiveBayesianInference'
        })

        # Write in our full_V and num_pixels as attributes to this new group
        write_simple_attrs(self.h5_results_grp, self.params_dict)

        assert isinstance(self.h5_results_grp, h5py.Group)

        # If we ended up parsing down the data, create new spectral datasets (i.e. smaller full_V's)
        # By convention, we convert the full_V back to a sine wave.
        if self.parse_mod != 1:
            h5_spec_inds_new, h5_spec_vals_new = write_ind_val_dsets(
                self.h5_results_grp,
                Dimension("Bias", "V", self.full_V.size),
                is_spectral=True)
            h5_spec_vals_new[()] = get_unshifted_response(
                self.full_V, self.shift_index)
        else:
            h5_spec_inds_new = self.h5_main.h5_spec_inds
            h5_spec_vals_new = self.h5_main.h5_spec_vals

        # Also make some new spectroscopic datasets for R and R_sig
        h5_spec_inds_R, h5_spec_vals_R = write_ind_val_dsets(
            self.h5_results_grp,
            Dimension("Bias", "V", 2 * self.M),
            is_spectral=True,
            base_name="Spectroscopic_R")
        h5_spec_vals_R[()] = np.concatenate((self.x, self.x)).T

        # Initialize our datasets
        # Note by convention, the spectroscopic values are stored as a sine wave
        # so i_recon and i_corrected are shifted at the end of bayesian_utils.process_pixel
        # accordingly.
        self.h5_R = write_main_dataset(self.h5_results_grp,
                                       (self.h5_main.shape[0], 2 * self.M),
                                       "Resistance",
                                       "Resistance",
                                       "GOhms",
                                       None,
                                       None,
                                       dtype=np.float64,
                                       h5_pos_inds=self.h5_main.h5_pos_inds,
                                       h5_pos_vals=self.h5_main.h5_pos_vals,
                                       h5_spec_inds=h5_spec_inds_R,
                                       h5_spec_vals=h5_spec_vals_R)

        assert isinstance(self.h5_R, usid.USIDataset)  # Quick sanity check
        self.h5_R_sig = create_empty_dataset(self.h5_R, np.float64, "R_sig")

        self.h5_capacitance = write_main_dataset(
            self.h5_results_grp, (self.h5_main.shape[0], 2),
            "Capacitance",
            "Capacitance",
            "pF",
            None,
            Dimension("Direction", "", 2),
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            dtype=np.float64,
            aux_spec_prefix="Cap_Spec_")

        # Not sure what units this should be so tentatively storing it as amps
        self.h5_i_recon = write_main_dataset(
            self.h5_results_grp, (self.h5_main.shape[0], self.full_V.size),
            "Reconstructed_Current",
            "Current",
            "nA",
            None,
            None,
            dtype=np.float64,
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            h5_spec_inds=h5_spec_inds_new,
            h5_spec_vals=h5_spec_vals_new)
        self.h5_i_corrected = create_empty_dataset(self.h5_i_recon, np.float64,
                                                   "Corrected_Current")
        '''
		# Initialize our datasets
		# Note, each pixel of the datasets will hold the forward and reverse sweeps concatenated together.
		# R and R_sig are plotted against [x, -x], and i_recon and i_corrected are plotted against full_V.
		self.h5_R = h5_results_grp.create_dataset("R", shape=(self.h5_main.shape[0], 2*self.M), dtype=np.float)
		self.h5_R_sig = h5_results_grp.create_dataset("R_sig", shape=(self.h5_main.shape[0], 2*self.M), dtype=np.float)
		self.h5_capacitance = h5_results_grp.create_dataset("capacitance", shape=(self.h5_main.shape[0], 2), dtype=np.float)
		self.h5_i_recon = h5_results_grp.create_dataset("i_recon", shape=(self.h5_main.shape[0], self.full_V.size), dtype=np.float)
		self.h5_i_corrected = h5_results_grp.create_dataset("i_corrected", shape=(self.h5_main.shape[0], self.full_V.size), dtype=np.float)
		'''

        if self.verbose: print("results datasets set up")
        self.h5_main.file.flush()
Пример #16
0
    def translate(self, file_path, verbose=False, parm_encoding='utf-8'):
        """
        Translates the provided file to .h5

        Parameters
        ----------
        file_path : String / unicode
            Absolute path of the .ibw file
        verbose : Boolean (Optional)
            Whether or not to show  print statements for debugging
        parm_encoding : str, optional
            Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the .h5 file
        """
        file_path = path.abspath(file_path)
        # Prepare the .h5 file:
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        h5_path = path.join(folder_path, base_name + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        h5_file = h5py.File(h5_path, 'w')

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)

        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[2] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for layer 0 null set errors in older AR software

        if images.ndim == 3:  # Image stack
            if verbose:
                print('Found image stack of size {}'.format(images.shape))
            type_suffix = 'Image'

            num_rows = parm_dict['ScanLines']
            num_cols = parm_dict['ScanPoints']

            images = images.transpose(2, 1, 0)  # now ordered as [chan, Y, X] image
            images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

            pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))]

            spec_desc = Dimension('arb', 'a.u.', [1])

        else:  # single force curve
            if verbose:
                print('Found force curve of size {}'.format(images.shape))

            type_suffix = 'ForceCurve'
            images = np.atleast_3d(images)  # now [Z, chan, 1]
            images = images.transpose((1, 2, 0))  # [chan ,1, Z] force curve

            # The data generated above varies linearly. Override.
            # For now, we'll shove the Z sensor data into the spectroscopic values.

            # Find the channel that corresponds to either Z sensor or Raw:
            try:
                chan_ind = chan_labels.index('ZSnsr')
                spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind]))
            except ValueError:
                try:
                    chan_ind = chan_labels.index('Raw')
                    spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind]))
                except ValueError:
                    # We don't expect to come here. If we do, spectroscopic values remains as is
                    spec_data = np.arange(images.shape[2])

            pos_desc = Dimension('X', 'm', [1])
            spec_desc = Dimension('Z', 'm', spec_data)

        # Create measurement group
        meas_grp = create_indexed_group(h5_file, 'Measurement')

        # Write file and measurement level parameters
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'IgorIBW_' + type_suffix
        global_parms['translator'] = 'IgorIBW'
        write_simple_attrs(h5_file, global_parms)

        write_simple_attrs(meas_grp, parm_dict)

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                               dtype=np.float32)

        if verbose:
            print('Finished preparing raw datasets')

        h5_file.close()
        return h5_path
Пример #17
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file
        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = self._parse_file_path(file_path)
        
        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename+'.h5')
        
        if path.exists(h5_path):
            remove(h5_path)
        
        # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'
        matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))

        # Need to take the complex conjugate if reading from a .mat file
        # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave'])))
        
        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])
        self.points_per_pixel = len(be_wave)
        
        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])
        
        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])
        
        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # method 2 for calculating the exact excitation frequency:
        """
        fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave)))
        w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel,
                            self.points_per_pixel)
        hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3))
        ex_freq_correct = w_vec[hot_bins[-1]]
        """

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'
            
        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))
        
        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size/self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_f = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        first_dat = True
        for key in data_paths.keys():
            # Now that the file has been created, go over each raw data file:
            # 1. write all ancillary data. Link data. 2. Write main data sequentially

            """ We only allocate the space for the main data here.
            This does NOT change with each file. The data written to it does.
            The auxiliary datasets will not change with each raw data file since
            only one excitation waveform is used"""
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            if first_dat:
                if len(data_paths) > 1:
                    # All positions and spectra are shared between channels
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)
                elif len(data_paths) == 1:
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True)

                first_dat = False
            else:
                pass

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            self._read_data(data_paths[key], h5_main)
            
        h5_f.close()
        print('G-Line translation complete!')

        return h5_path
Пример #18
0
    def test_existing_both_aux(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        main_data = np.random.rand(15, 14)
        main_data_name = 'Test_Main'
        quantity = 'Current'
        dset_units = 'nA'

        pos_sizes = [5, 3]
        pos_names = ['X', 'Y']
        pos_units = ['nm', 'um']
        pos_dims = []
        for length, name, units in zip(pos_sizes, pos_names, pos_units):
            pos_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))
        pos_data = np.vstack((np.tile(np.arange(5),
                                      3), np.repeat(np.arange(3), 5))).T

        spec_sizes = [7, 2]
        spec_names = ['Bias', 'Cycle']
        spec_units = ['V', '']
        spec_dims = []
        for length, name, units in zip(spec_sizes, spec_names, spec_units):
            spec_dims.append(
                write_utils.Dimension(name, units, np.arange(length)))
        spec_data = np.vstack((np.tile(np.arange(7),
                                       2), np.repeat(np.arange(2), 7)))

        with h5py.File(file_path) as h5_f:
            h5_spec_inds, h5_spec_vals = hdf_utils.write_ind_val_dsets(
                h5_f, spec_dims, is_spectral=True)
            h5_pos_inds, h5_pos_vals = hdf_utils.write_ind_val_dsets(
                h5_f, pos_dims, is_spectral=False)

            usid_main = hdf_utils.write_main_dataset(h5_f,
                                                     main_data,
                                                     main_data_name,
                                                     quantity,
                                                     dset_units,
                                                     None,
                                                     None,
                                                     h5_spec_inds=h5_spec_inds,
                                                     h5_spec_vals=h5_spec_vals,
                                                     h5_pos_vals=h5_pos_vals,
                                                     h5_pos_inds=h5_pos_inds,
                                                     main_dset_attrs=None)

            data_utils.validate_aux_dset_pair(self,
                                              h5_f,
                                              h5_pos_inds,
                                              h5_pos_vals,
                                              pos_names,
                                              pos_units,
                                              pos_data,
                                              h5_main=usid_main,
                                              is_spectral=False)

            data_utils.validate_aux_dset_pair(self,
                                              h5_f,
                                              h5_spec_inds,
                                              h5_spec_vals,
                                              spec_names,
                                              spec_units,
                                              spec_data,
                                              h5_main=usid_main,
                                              is_spectral=True)
        os.remove(file_path)
Пример #19
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train',
                                          'BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'


        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)


        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path