def make_pos_vals_inds_dims(self): x_range = float(self.params_dictionary['XScanRange']) y_range = float(self.params_dictionary['YScanRange']) x_center = float(self.params_dictionary['xCenter']) y_center = float(self.params_dictionary['yCenter']) x_start = x_center - (x_range / 2) x_end = x_center + (x_range / 2) y_start = y_center - (y_range / 2) y_end = y_center + (y_range / 2) dx = x_range / self.x_len dy = y_range / self.y_len #assumes y scan direction:down; scan angle: 0 deg y_linspace = -np.arange(y_start, y_end, step=dy) x_linspace = np.arange(x_start, x_end, step=dx) pos_ind, pos_val = build_ind_val_matrices(unit_values=(x_linspace, y_linspace), is_spectral=False) #Dimension uses ascii encoding, which can not encode # micron symbol, so we replace it, if present, with the letter u. pos_dims = [ Dimension('X', self.params_dictionary['XPhysUnit'].replace('\xb5', 'u'), self.x_len), Dimension('Y', self.params_dictionary['YPhysUnit'].replace('\xb5', 'u'), self.y_len) ] self.pos_ind, self.pos_val, self.pos_dims = pos_ind, pos_val, pos_dims
def save_cpd(h5_main, cpd_mat, cpd_sm): ''' :param h5_main: :type h5_main: :param cpd_mat: :type cpd_mat: :param cpd_mat_sm: :type cpd_mat_sm: :returns: :rtype: ''' parm_dict = usid.hdf_utils.get_attributes(h5_main) # Get relevant parameters num_rows = parm_dict['num_rows'] num_cols = parm_dict['num_cols'] h5_gp = h5_main.parent h5_meas_group = usid.hdf_utils.create_indexed_group(h5_gp, 'CPD') # Create dimensions pos_desc = [ Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows)) ] # ds_pos_ind, ds_pos_val = build_ind_val_matrices(pos_desc, is_spectral=False) spec_desc = [ Dimension('Time', 's', np.linspace(0, parm_dict['total_time'], cpd_mat.shape[1])) ] # ds_spec_inds, ds_spec_vals = build_ind_val_matrices(spec_desc, is_spectral=True) # Writes main dataset h5_cpd = usid.hdf_utils.write_main_dataset( h5_meas_group, cpd_mat, 'cpd', # Name of main dataset 'Contact Potential', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision ) # add smoothed dataset h5_meas_group.create_dataset('cpd_sm', data=cpd_sm, dtype=np.float32) usid.hdf_utils.copy_attributes(h5_main, h5_gp) return h5_cpd
def _translate_image_stack(self, h5_meas_grp): """ Reads the scan images from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension( 'single', 'a. u.', 1), is_spectral=True) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] break h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [ Dimension('X', 'nm', layer_info['Samps/line']), Dimension('Y', 'nm', layer_info['Number of lines']) ], is_spectral=False) for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_2') data = self._read_image_layer(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, layer_info)
def _translate_force_curve(self, h5_meas_grp): """ Reads the force curves from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension( 'single', 'a. u.', 1), is_spectral=False) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] break tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')] h5_spec_inds, h5_spec_vals = write_ind_val_dsets( h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))), is_spectral=True) for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_4') data = self._read_data_vector(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes write_simple_attrs(h5_chan_grp, layer_info)
def write_spectrograms(self): if bool(self.spectrogram_desc): for spectrogram_f, descriptors in self.spectrogram_desc.items(): channel_i = create_indexed_group(self.h5_meas_grp, 'Channel_') spec_vals_i = self.spectrogram_spec_vals[spectrogram_f] spectrogram_spec_dims = Dimension('Wavelength', descriptors[8], spec_vals_i) h5_raw = write_main_dataset( channel_i, # parent HDF5 group (self.x_len * self.y_len, len(spec_vals_i)), # shape of Main dataset 'Raw_Data', # Name of main dataset 'Spectrogram', # Physical quantity contained in Main dataset descriptors[3], # Units for the physical quantity self.pos_dims, # Position dimensions spectrogram_spec_dims, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs={ 'Caption': descriptors[0], 'Bytes_Per_Pixel': descriptors[1], 'Scale': descriptors[2], 'Physical_Units': descriptors[3], 'Offset': descriptors[4], 'Datatype': descriptors[5], 'Bytes_Per_Reading': descriptors[6], 'Wavelength_File': descriptors[7], 'Wavelength_Units': descriptors[8] }) h5_raw.h5_pos_vals[:, :] = self.pos_val h5_raw[:, :] = self.spectrograms[spectrogram_f].reshape( h5_raw.shape)
def write_ps_spectra(self): if bool(self.pspectrum_desc): for spec_f, descriptors in self.pspectrum_desc.items(): # create new measurement group for ea spectrum self.h5_meas_grp = create_indexed_group( self.h5_f, 'Measurement_') x_name = self.spectra_x_y_dim_name[spec_f][0].split(' ')[0] x_unit = self.spectra_x_y_dim_name[spec_f][0].split(' ')[1] y_name = self.spectra_x_y_dim_name[spec_f][1].split(' ')[0] y_unit = self.spectra_x_y_dim_name[spec_f][1].split(' ')[1] spec_i_spec_dims = Dimension(x_name, x_unit, self.spectra_spec_vals[spec_f]) spec_i_pos_dims = [ Dimension( 'X', self.params_dictionary['XPhysUnit'].replace( '\xb5', 'u'), np.array([0])), Dimension( 'Y', self.params_dictionary['YPhysUnit'].replace( '\xb5', 'u'), np.array([0])) ] # write data to a channel in the measurement group spec_i_ch = create_indexed_group(self.h5_meas_grp, 'PowerSpectrum_') h5_raw = write_main_dataset( spec_i_ch, # parent HDF5 group (1, len(self.spectra_spec_vals[spec_f])), # shape of Main dataset 'Raw_Spectrum', # Name of main dataset y_name, # Physical quantity contained in Main dataset y_unit, # Units for the physical quantity # Position dimensions pos_dims=spec_i_pos_dims, spec_dims=spec_i_spec_dims, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs={ 'XLoc': 0, 'YLoc': 0 }) h5_raw[:, :] = self.spectra[spec_f].reshape(h5_raw.shape)
def setUp(self): self.h5_f = h5py.File(test_h5_file_path) h5_raw_grp = self.h5_f.create_group('Raw_Measurement') num_rows = 3 num_cols = 5 num_cycles = 2 num_cycle_pts = 7 # Create Main dataset and ancillaries source_dset_name = 'source_main' pos_dims = [Dimension('X', 'nm', num_rows), Dimension('Y', 'nm', num_cols)] spec_dims = [Dimension('Bias', 'V', num_cycle_pts), Dimension('Cycle', 'a.u.', num_cycles)] source_main_data = np.random.rand(num_rows * num_cols, num_cycle_pts * num_cycles) h5_source_main = write_main_dataset(h5_raw_grp, source_main_data, source_dset_name, 'Current', 'A', pos_dims, spec_dims) # Create Guess dataset and ancillaries h5_guess_grp = h5_raw_grp.create_group(source_dset_name+'-Fitter_000') guess_data = np.random.rand(num_rows * num_cols, num_cycles) guess_spec_dims = spec_dims[1] self.h5_guess = write_main_dataset(h5_guess_grp, guess_data, 'Guess', 'Guess', 'a.u.', pos_dims, guess_spec_dims) self.fitter = Fitter(h5_source_main, variables=['Bias']) self.h5_main = h5_source_main self.h5_f.flush()
def _translate_force_map(self, h5_meas_grp): """ Reads the scan image + force map from the proprietary file and writes it to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # First lets write the image into the measurement group that has already been created: image_parms = self.meta_data['Ciao image list'] quantity = image_parms.pop('Image Data_2') image_mat = self._read_image_layer(image_parms) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.reshape(image_mat, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', [ Dimension('X', 'nm', image_parms['Samps/line']), Dimension('Y', 'nm', image_parms['Number of lines']) ], Dimension('single', 'a. u.', 1), dtype=np.float32, compression='gzip') # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, image_parms) # Now work on the force map: force_map_parms = self.meta_data['Ciao force image list'] quantity = force_map_parms.pop('Image Data_4') force_map_vec = self._read_data_vector(force_map_parms) tr_rt = [ int(item) for item in force_map_parms['Samps/line'].split(' ') ] force_map_2d = force_map_vec.reshape(image_mat.size, np.sum(tr_rt)) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, force_map_2d, 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', [ Dimension('X', 'nm', image_parms['Samps/line']), Dimension('Y', 'nm', image_parms['Number of lines']) ], Dimension('Z', 'nm', int(np.sum(tr_rt))), dtype=np.float32, compression='gzip') # Think about standardizing attributes write_simple_attrs(h5_chan_grp, force_map_parms)
def load_pixel_averaged_from_raw(h5_file, verbose=True, loadverbose=True): """ Creates a new group FF_Avg where the FF_raw file is averaged together. This is more useful as pixel-wise averages are more relevant in FF-processing This Dataset is (n_pixels*n_rows, n_pnts_per_avg) :param h5_file: H5 File to be examined. File typically set as h5_file = hdf.file hdf = px.ioHDF5(h5_path), h5_path = path to disk :type h5_file: h5py File :param verbose: Display outputs of each function or not :type verbose: bool, optional :param loadverbose: Whether to print any simple "loading Line X" statements for feedback :type loadverbose: bool, optional :returns: The new averaged Dataset :rtype: Dataset """ hdf = h5py.File(h5_file) h5_main = usid.hdf_utils.find_dataset(hdf.file, 'FF_Raw')[0] try: ff_avg_group = h5_main.parent.create_group('FF_Avg') except: ff_avg_group = usid.hdf_utils.create_indexed_group(h5_main.parent, 'FF_Avg') parm_dict = get_attributes(h5_main.parent) num_rows = parm_dict['num_rows'] num_cols = parm_dict['num_cols'] pnts_per_avg = parm_dict['pnts_per_avg'] pnts_per_line = parm_dict['pnts_per_line'] pnts_per_pixel = parm_dict['pnts_per_pixel'] parm_dict['pnts_per_pixel'] = 1 # only 1 average per pixel now parm_dict['pnts_per_line'] = num_cols # equivalent now with averaged data n_pix = int(pnts_per_line / pnts_per_pixel) dt = 1 / parm_dict['sampling_rate'] # Set up the position vectors for the data pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = [Dimension('Time', 's', np.linspace(0, parm_dict['total_time'], pnts_per_avg))] for p in parm_dict: ff_avg_group.attrs[p] = parm_dict[p] ff_avg_group.attrs['pnts_per_line'] = num_cols # to change number of pnts in a line ff_avg_group.attrs['pnts_per_pixel'] = 1 # to change number of pnts in a pixel h5_avg = usid.hdf_utils.write_main_dataset(ff_avg_group, # parent HDF5 group (num_rows * num_cols, pnts_per_avg), # shape of Main dataset 'FF_Avg', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision compression='gzip', main_dset_attrs=parm_dict) # Uses get_line to extract line. Averages and returns to the Dataset FF_Avg # We can operate on the dataset array directly, get_line is used for future_proofing if # we want to add additional operation (such as create an Image class) for i in range(num_rows): if loadverbose == True: print('#### Row:', i, '####') _ll = get_utils.get_line(h5_main, pnts=pnts_per_line, line_num=i, array_form=False, avg=False) _ll = _ll.pixel_wise_avg() h5_avg[i * num_cols:(i + 1) * num_cols, :] = _ll[:, :] if verbose == True: usid.hdf_utils.print_tree(hdf.file, rel_paths=True) h5_avg = usid.hdf_utils.find_dataset(hdf.file, 'FF_Avg')[0] print('H5_avg of size:', h5_avg.shape) hdf.flush() return h5_avg
def _parse_sxm_parms(header_dict, signal_dict): """ Parse sxm files. Parameters ---------- header_dict : dict signal_dict : dict Returns ------- parm_dict : dict """ parm_dict = dict() data_dict = dict() # Create dictionary with measurement parameters meas_parms = {key: value for key, value in header_dict.items() if value is not None} info_dict = meas_parms.pop('data_info') parm_dict['meas_parms'] = meas_parms # Create dictionary with channel parameters channel_parms = dict() channel_names = info_dict['Name'] single_channel_parms = {name: dict() for name in channel_names} for field_name, field_value, in info_dict.items(): for channel_name, value in zip(channel_names, field_value): single_channel_parms[channel_name][field_name] = value for value in single_channel_parms.values(): if value['Direction'] == 'both': value['Direction'] = ['forward', 'backward'] else: direction = [value['Direction']] scan_dir = meas_parms['scan_dir'] for name, parms in single_channel_parms.items(): for direction in parms['Direction']: key = ' '.join((name, direction)) channel_parms[key] = dict(parms) channel_parms[key]['Direction'] = direction data = signal_dict[name][direction] if scan_dir == 'up': data = np.flip(data, axis=0) if direction == 'backward': data = np.flip(data, axis=1) data_dict[key] = data parm_dict['channel_parms'] = channel_parms # Position dimensions num_cols, num_rows = header_dict['scan_pixels'] width, height = header_dict['scan_range'] pos_names = ['X', 'Y'] pos_units = ['nm', 'nm'] pos_vals = np.vstack([ np.linspace(0, width, num_cols), np.linspace(0, height, num_rows), ]) pos_vals *= 1e9 pos_dims = [Dimension(name, unit, values) for name, unit, values in zip(pos_names, pos_units, pos_vals)] data_dict['Position Dimensions'] = pos_dims # Spectroscopic dimensions spec_dims = Dimension('arb.', 'a. u.', np.arange(1, dtype=np.float32)) data_dict['Spectroscopic Dimensions'] = spec_dims return parm_dict, data_dict
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file matread = loadmat(parm_paths['parm_mat'], variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train', 'BE_wave', 'total_cols', 'total_rows']) be_wave = np.float32(np.squeeze(matread['BE_wave'])) be_wave_train = np.float32(np.squeeze(matread['BE_wave_train'])) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) self.points_per_line = len(be_wave_train) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['points_per_line'] = self.points_per_line parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_file = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_file, global_parms) # Next create the Measurement and Channel groups and write the appropriate parameters to them meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) # Now that the file has been created, go over each raw data file: """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used """ pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) for f_index in data_paths.keys(): chan_grp = create_indexed_group(meas_grp, 'Channel') h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main) h5_file.close() print('G-Tune translation complete!') return h5_path
def load_FF(data_files, parm_dict, h5_path, verbose=False, loadverbose=True, average=True, mirror=True): """ Generates the HDF5 file given path to data_files and parameters dictionary Creates a Datagroup FFtrEFM_Group with a single dataset in chunks :param data_files: List of the \*.ibw files to be invidually scanned. This is generated by load_folder above :type data_files: list :param parm_dict: Scan parameters to be saved as attributes. This is generated by load_folder above, or you can pass this explicitly. :type parm_dict: dict :param h5_path: :type h5_path : string :param verbose: Display outputs of each function or not :type verbose: bool, optional :param loadverbose: Whether to print any simple "loading Line X" statements for feedback :type loadverbose: bool, optional :param average: Whether to average each pixel before saving to H5. This saves both time and space :type average: bool, optional :param mirror: Mirrors the data when saving. This parameter is to match the FFtrEFM data with the associate topography as FFtrEFM is acquired during a retrace while topo is saved during a forward trace :type mirror: bool, optional :returns: The filename path to the H5 file created :rtype: str """ # Prepare data for writing to HDF num_rows = parm_dict['num_rows'] num_cols = parm_dict['num_cols'] pnts_per_avg = parm_dict['pnts_per_avg'] name = 'FF_Raw' if average: parm_dict['pnts_per_pixel'] = 1 parm_dict['pnts_per_line'] = num_cols name = 'FF_Avg' pnts_per_pixel = parm_dict['pnts_per_pixel'] pnts_per_line = parm_dict['pnts_per_line'] dt = 1 / parm_dict['sampling_rate'] def_vec = np.arange(0, parm_dict['total_time'], dt) if def_vec.shape[0] != parm_dict['pnts_per_avg']: def_vec = def_vec[:-1] # warnings.warn('Time-per-point calculation error') # To do: Fix the labels/attributes on the relevant data sets try: hdf = h5py.File(h5_path, 'r+') except: print('Creating HDF5 file...') hdf = h5py.File(h5_path, 'w') try: ff_group = hdf.create_group('FF_Group') except: print('Group already exists, creating new one') ff_group = usid.hdf_utils.create_indexed_group(hdf['/'], 'FF_Group') # Set up the position vectors for the data pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols * pnts_per_pixel)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = [Dimension('Time', 's', np.linspace(0, parm_dict['total_time'], pnts_per_avg))] for p in parm_dict: ff_group.attrs[p] = parm_dict[p] ff_group.attrs['pnts_per_line'] = num_cols h5_ff = usid.hdf_utils.write_main_dataset(ff_group, # parent HDF5 group (num_rows * num_cols * pnts_per_pixel, pnts_per_avg), # shape of Main dataset name, # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision compression='gzip', main_dset_attrs=parm_dict) pnts_per_line = parm_dict['pnts_per_line'] # Cycles through the remaining files. This takes a while (~few minutes) for k, num in zip(data_files, np.arange(0, len(data_files))): if loadverbose: fname = k.replace('/', '\\') print('####', fname.split('\\')[-1], '####') fname = str(num).rjust(4, '0') line_file = load.signal(k) if average: _ll = line.Line(line_file, parm_dict, n_pixels=num_cols, pycroscopy=False) _ll = _ll.pixel_wise_avg().T else: _ll = line_file.transpose() f = hdf.file[h5_ff.name] if mirror: f[pnts_per_line * num:pnts_per_line * (num + 1), :] = np.flipud(_ll[:, :]) else: f[pnts_per_line * num:pnts_per_line * (num + 1), :] = _ll[:, :] if verbose == True: usid.hdf_utils.print_tree(hdf.file, rel_paths=True) hdf.flush() return h5_ff
def _create_results_datasets(self): """ Creates hdf5 datasets and datagroups to hold the resutls """ # create all h5 datasets here: num_pos = self.h5_main.shape[0] if self.verbose and self.mpi_rank == 0: print('Now creating the datasets') self.h5_results_grp = create_results_group(self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) write_simple_attrs(self.h5_results_grp, {'algorithm_author': 'Kody J. Law', 'last_pixel': 0}) write_simple_attrs(self.h5_results_grp, self.parms_dict) if self.verbose and self.mpi_rank == 0: print('created group: {} with attributes:'.format(self.h5_results_grp.name)) print(get_attributes(self.h5_results_grp)) # One of those rare instances when the result is exactly the same as the source self.h5_i_corrected = create_empty_dataset(self.h5_main, np.float32, 'Corrected_Current', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print('Created I Corrected') # print_tree(self.h5_results_grp) # For some reason, we cannot specify chunks or compression! # The resistance dataset requires the creation of a new spectroscopic dimension self.h5_resistance = write_main_dataset(self.h5_results_grp, (num_pos, self.num_x_steps), 'Resistance', 'Resistance', 'GOhms', None, Dimension('Bias', 'V', self.num_x_steps), dtype=np.float32, # chunks=(1, self.num_x_steps), #compression='gzip', h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) if self.verbose and self.mpi_rank == 0: print('Created Resistance') # print_tree(self.h5_results_grp) assert isinstance(self.h5_resistance, USIDataset) # only here for PyCharm self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals # The variance is identical to the resistance dataset self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32, 'R_variance') if self.verbose and self.mpi_rank == 0: print('Created Variance') # print_tree(self.h5_results_grp) # The capacitance dataset requires new spectroscopic dimensions as well self.h5_cap = write_main_dataset(self.h5_results_grp, (num_pos, 1), 'Capacitance', 'Capacitance', 'pF', None, Dimension('Direction', '', [1]), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=cap_dtype, #compression='gzip', aux_spec_prefix='Cap_Spec_') if self.verbose and self.mpi_rank == 0: print('Created Capacitance') # print_tree(self.h5_results_grp) print('Done creating all results datasets!') if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()
def _create_results_datasets(self): ''' Creates the datasets an Groups necessary to store the results. ''' print('Creating CPD results datasets') # Get relevant parameters num_rows = self.parm_dict['num_rows'] num_cols = self.parm_dict['num_cols'] pnts_per_avg = self.parm_dict['pnts_per_avg'] ds_shape = [num_rows * num_cols, pnts_per_avg] cpd_ds_shape = [num_rows * num_cols, self.cpd_dict['num_CPD']] self.h5_results_grp = usid.hdf_utils.create_results_group( self.h5_main, self.process_name) self.h5_cpd_grp = usid.hdf_utils.create_results_group( self.h5_main, self.process_name + '_CPD') usid.hdf_utils.copy_attributes(self.h5_main.parent, self.h5_results_grp) usid.hdf_utils.copy_attributes(self.h5_main.parent, self.h5_cpd_grp) # Create dimensions pos_desc = [ Dimension('X', 'm', np.linspace(0, self.parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, self.parm_dict['SlowScanSize'], num_rows)) ] # ds_pos_ind, ds_pos_val = build_ind_val_matrices(pos_desc, is_spectral=False) spec_desc = [ Dimension( 'Time', 's', np.linspace(0, self.parm_dict['total_time'], pnts_per_avg)) ] cpd_spec_desc = [ Dimension( 'Time', 's', np.linspace(0, self.parm_dict['total_time'], self.cpd_dict['num_CPD'])) ] # ds_spec_inds, ds_spec_vals = build_ind_val_matrices(spec_desc, is_spectral=True) # Writes main dataset self.h5_force = usid.hdf_utils.write_main_dataset( self.h5_results_grp, ds_shape, 'force', # Name of main dataset 'Force', # Physical quantity contained in Main dataset 'N', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_cpd = usid.hdf_utils.write_main_dataset( self.h5_cpd_grp, cpd_ds_shape, 'CPD', # Name of main dataset 'Potential', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity None, # Position dimensions cpd_spec_desc, # Spectroscopic dimensions h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_cap = usid.hdf_utils.write_main_dataset( self.h5_cpd_grp, cpd_ds_shape, 'capacitance', # Name of main dataset 'Capacitance', # Physical quantity contained in Main dataset 'F', # Units for the physical quantity None, # Position dimensions None, h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=self.h5_cpd.h5_spec_inds, # Copy Spectroscopy Dimensions h5_spec_vals=self.h5_cpd.h5_spec_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_cpd.file.flush() return
def translate(self, parm_path): """ Basic method that translates .mat data files to a single .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ self.parm_path = path.abspath(parm_path) (folder_path, file_name) = path.split(parm_path) (file_name, base_name) = path.split(folder_path) h5_path = path.join(folder_path, base_name + '.h5') # Read parameters parm_dict = readGmodeParms(parm_path) # Add the w^2 specific parameters to this list parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) freq_sweep_parms = parm_data['freqSweepParms'] parm_dict['freq_sweep_delay'] = np.float( freq_sweep_parms['delay'].item()) gen_sig = parm_data['genSig'] parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item()) freq_array = np.float32(parm_data['freqArray']) # prepare and write spectroscopic values samp_rate = parm_dict['IO_down_samp_rate_[Hz]'] num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate) w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins)) # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE) spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array)) spec_val_mat[:, 1] = np.repeat(freq_array, num_bins) spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32) spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array)) spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins) num_rows = parm_dict['grid_num_rows'] num_cols = parm_dict['grid_num_cols'] parm_dict['data_type'] = 'GmodeW2' num_pix = num_rows * num_cols global_parms = dict() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict[ 'data_type'] # self.__class__.__name__ global_parms['translator'] = 'W2' # Now start creating datasets and populating: if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) pos_dims = [ Dimension('X', 'nm', num_rows), Dimension('Y', 'nm', num_cols) ] spec_dims = [ Dimension('Response Bin', 'a.u.', num_bins), Dimension('Excitation Frequency ', 'Hz', len(freq_array)) ] # Minimize file size to the extent possible. # DAQs are rated at 16 bit so float16 should be most appropriate. # For some reason, compression is more effective on time series data h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data', 'Deflection', 'V', pos_dims, spec_dims, chunks=(1, num_bins), dtype=np.float32) h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec) # Now doing link_h5_objects_as_attrs: link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq]) # Now read the raw data files: pos_ind = 0 for row_ind in range(1, num_rows + 1): for col_ind in range(1, num_cols + 1): file_path = path.join( folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat') print('Working on row {} col {}'.format(row_ind, col_ind)) if path.exists(file_path): # Load data file pix_data = loadmat(file_path, squeeze_me=True) pix_mat = pix_data['AI_mat'] # Take the inverse FFT on 2nd dimension pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1) # Verified with Matlab - no conjugate required here. pix_vec = pix_mat.transpose().reshape(pix_mat.size) h5_main[pos_ind, :] = np.float32(pix_vec) h5_f.flush() # flush from memory! else: print('File not found for: row {} col {}'.format( row_ind, col_ind)) pos_ind += 1 if (100.0 * pos_ind / num_pix) % 10 == 0: print('completed translating {} %'.format( int(100 * pos_ind / num_pix))) h5_f.close() return h5_path
def _create_results_datasets(self): """ Creates all the datasets necessary for holding all parameters + data. """ self.h5_results_grp = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self.parms_dict.update({ 'last_pixel': 0, 'algorithm': 'pycroscopy_SignalFilter' }) write_simple_attrs(self.h5_results_grp, self.parms_dict) assert isinstance(self.h5_results_grp, h5py.Group) if isinstance(self.composite_filter, np.ndarray): h5_comp_filt = self.h5_results_grp.create_dataset( 'Composite_Filter', data=np.float32(self.composite_filter)) if self.verbose and self.mpi_rank == 0: print( 'Rank {} - Finished creating the Composite_Filter dataset'. format(self.mpi_rank)) # First create the position datsets if the new indices are smaller... if self.num_effective_pix != self.h5_main.shape[0]: # TODO: Do this part correctly. See past solution: """ # need to make new position datasets by taking every n'th index / value: new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :]) pos_descriptor = [] for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'], [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix) for dim_ind in range(h5_pos_inds.shape[1])]): pos_descriptor.append(Dimension(name, units, np.arange(leng))) ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose) h5_pos_vals.data = np.atleast_2d(new_pos_vals) # The data generated above varies linearly. Override. """ h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets( self.h5_results_grp, Dimension('pixel', 'a.u.', self.num_effective_pix), is_spectral=False, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Created the new position ancillary dataset'. format(self.mpi_rank)) else: h5_pos_inds_new = self.h5_main.h5_pos_inds h5_pos_vals_new = self.h5_main.h5_pos_vals if self.verbose and self.mpi_rank == 0: print('Rank {} - Reusing source datasets position datasets'. format(self.mpi_rank)) if self.noise_threshold is not None: self.h5_noise_floors = write_main_dataset( self.h5_results_grp, (self.num_effective_pix, 1), 'Noise_Floors', 'Noise', 'a.u.', None, Dimension('arb', '', [1]), dtype=np.float32, aux_spec_prefix='Noise_Spec_', h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Finished creating the Noise_Floors dataset'. format(self.mpi_rank)) if self.write_filtered: # Filtered data is identical to Main_Data in every way - just a duplicate self.h5_filtered = create_empty_dataset( self.h5_main, self.h5_main.dtype, 'Filtered_Data', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print( 'Rank {} - Finished creating the Filtered dataset'.format( self.mpi_rank)) self.hot_inds = None if self.write_condensed: self.hot_inds = np.where(self.composite_filter > 0)[0] self.hot_inds = np.uint(self.hot_inds[int(0.5 * len(self.hot_inds)):] ) # only need to keep half the data condensed_spec = Dimension('hot_frequencies', '', int(0.5 * len(self.hot_inds))) self.h5_condensed = write_main_dataset( self.h5_results_grp, (self.num_effective_pix, len(self.hot_inds)), 'Condensed_Data', 'Complex', 'a. u.', None, condensed_spec, h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new, dtype=np.complex, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print( 'Rank {} - Finished creating the Condensed dataset'.format( self.mpi_rank)) if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()
def _write_results_chunk(self): """ Writes the provided SVD results to file Parameters ---------- """ comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s)) h5_svd_group = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self.h5_results_grp = h5_svd_group self._write_source_dset_provenance() write_simple_attrs(h5_svd_group, self.parms_dict) write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'}) h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize)) # print(get_attr(self.h5_main, 'quantity')[0]) h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0], 'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds, h5_spec_vals=self.h5_main.h5_spec_vals, chunks=calc_chunks( self.__v.shape, self.h5_main.dtype.itemsize)) # No point making this 1D dataset a main dataset h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s)) ''' Check h5_main for plot group references. Copy them into V if they exist ''' for key in self.h5_main.attrs.keys(): if '_Plot_Group' not in key: continue ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners') ref_inds = ref_inds.reshape([-1, 2, 2]) ref_inds[:, 1, 0] = h5_v.shape[0] - 1 svd_ref = create_region_reference(h5_v, ref_inds) h5_v.attrs[key] = svd_ref # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_svd_group.create_dataset( self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
def save_Yout(h5_main, Yout, yout): ''' Writes the results to teh HDF5 file :param h5_main: :type h5_main: h5py dataset of USIDataset :param Yout: :type Yout: :param yout: :type yout: ''' parm_dict = usid.hdf_utils.get_attributes(h5_main) # Get relevant parameters num_rows = parm_dict['num_rows'] num_cols = parm_dict['num_cols'] pnts_per_avg = parm_dict['pnts_per_avg'] h5_gp = h5_main.parent h5_meas_group = usid.hdf_utils.create_indexed_group( h5_gp, 'GKPFM_Frequency') # Create dimensions pos_desc = [ Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows)) ] # ds_pos_ind, ds_pos_val = build_ind_val_matrices(pos_desc, is_spectral=False) spec_desc = [ Dimension('Frequency', 'Hz', np.linspace(0, parm_dict['sampling_rate'], pnts_per_avg)) ] # ds_spec_inds, ds_spec_vals = build_ind_val_matrices(spec_desc, is_spectral=True) # Writes main dataset h5_y = usid.hdf_utils.write_main_dataset( h5_meas_group, Yout, 'Y', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.cdouble, # data type / precision main_dset_attrs=parm_dict) usid.hdf_utils.copy_attributes(h5_y, h5_gp) h5_meas_group = usid.hdf_utils.create_indexed_group(h5_gp, 'GKPFM_Time') spec_desc = [ Dimension('Time', 's', np.linspace(0, parm_dict['total_time'], pnts_per_avg)) ] h5_y = usid.hdf_utils.write_main_dataset( h5_meas_group, yout, 'y_time', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs=parm_dict) usid.hdf_utils.copy_attributes(h5_y, h5_gp) h5_y.file.flush() return
def write_images(self): if bool(self.img_desc): for img_f, descriptors in self.img_desc.items(): #check for existing spectrogram or image and link position/spec inds/vals #at most two channels worth of need to be checked (Fwd and Bwd) try: str_main = str( get_all_main(self.h5_f['Measurement_000/Channel_000'])) i_beg = str_main.find('located at: \n\t') + 14 i_end = str_main.find('\nData contains') - 1 data_loc = str_main[i_beg:i_end] channel_data = USIDataset(self.h5_f[data_loc]) h5_pos_inds = channel_data.h5_pos_inds h5_pos_vals = channel_data.h5_pos_vals pos_dims = None write_pos_vals = False if channel_data.spec_dim_sizes[0] == 1: h5_spec_inds = channel_data.h5_spec_inds h5_spec_vals = channel_data.h5_spec_vals spec_dims = None #if channel 000 is spectrogram, check next dataset elif channel_data.spec_dim_sizes[0] != 1: str_main = str( get_all_main( self.h5_f['Measurement_000/Channel_001'])) i_beg = str_main.find('located at: \n\t') + 14 i_end = str_main.find('\nData contains') - 1 data_loc = str_main[i_beg:i_end] channel_data = USIDataset(self.h5_f[data_loc]) #channel data is an image, & we link their spec inds/vals if channel_data.spec_dim_sizes[0] == 1: h5_spec_inds = channel_data.h5_spec_inds h5_spec_vals = channel_data.h5_spec_vals spec_dims = None else: # If a forward/bwd spectrogram exist h5_spec_inds = None h5_spec_vals = None spec_dims = Dimension('arb', 'a.u', 1) #in case where channel does not exist, we make new spec/pos inds/vals except KeyError: #pos dims h5_pos_inds = None h5_pos_vals = None pos_dims = self.pos_dims write_pos_vals = True #spec dims h5_spec_inds = None h5_spec_vals = None spec_dims = Dimension('arb', 'a.u', 1) channel_i = create_indexed_group(self.h5_meas_grp, 'Channel_') h5_raw = write_main_dataset( channel_i, #parent HDF5 group (self.x_len * self.y_len, 1), # shape of Main dataset 'Raw_' + descriptors[0].replace('-', '_'), # Name of main dataset descriptors[0], # Physical quantity contained in Main dataset descriptors[2], # Units for the physical quantity h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, # Position dimensions pos_dims=pos_dims, # Spectroscopic dimensions h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, spec_dims=spec_dims, dtype=np.float32, # data type / precision main_dset_attrs={ 'Caption': descriptors[0], 'Scale': descriptors[1], 'Physical_Units': descriptors[2], 'Offset': descriptors[3] }) h5_raw[:, :] = self.imgs[img_f].reshape(h5_raw.shape) if write_pos_vals: h5_raw.h5_pos_vals[:, :] = self.pos_val
def _parse_3ds_parms(header_dict, signal_dict): """ Parse 3ds files. Parameters ---------- header_dict : dict signal_dict : dict Returns ------- parm_dict : dict """ parm_dict = dict() data_dict = dict() # Create dictionary with measurement parameters meas_parms = {key: value for key, value in header_dict.items() if value is not None} channels = meas_parms.pop('channels') for key, parm_grid in zip(meas_parms.pop('fixed_parameters') + meas_parms.pop('experimental_parameters'), signal_dict['params'].T): # Collapse the parm_grid along one axis if it's constant # along said axis if parm_grid.ndim > 1: dim_slice = list() # Find dimensions that are constant for idim in range(parm_grid.ndim): tmp_grid = np.moveaxis(parm_grid.copy(), idim, 0) if np.all(np.equal(tmp_grid[0], tmp_grid[1])): dim_slice.append(0) else: dim_slice.append(slice(None)) # print(key, dim_slice) # print(parm_grid[tuple(dim_slice)]) parm_grid = parm_grid[tuple(dim_slice)] meas_parms[key] = parm_grid parm_dict['meas_parms'] = meas_parms # Create dictionary with channel parameters and # save channel data before renaming keys data_channel_parms = dict() for chan_name in channels: splitted_chan_name = chan_name.split(maxsplit=2) if len(splitted_chan_name) == 2: direction = 'forward' elif len(splitted_chan_name) == 3: direction = 'backward' splitted_chan_name.pop(1) name, unit = splitted_chan_name key = ' '.join((name, direction)) data_channel_parms[key] = {'Name': name, 'Direction': direction, 'Unit': unit.strip('()'), } data_dict[key] = signal_dict.pop(chan_name) parm_dict['channel_parms'] = data_channel_parms # Add remaining signal_dict elements to data_dict data_dict.update(signal_dict) # Position dimensions nx, ny = header_dict['dim_px'] if 'X (m)' in parm_dict: row_vals = parm_dict.pop('X (m)') else: row_vals = np.arange(nx, dtype=np.float32) if 'Y (m)' in parm_dict: col_vals = parm_dict.pop('Y (m)') else: col_vals = np.arange(ny, dtype=np.float32) pos_vals = np.hstack([row_vals.reshape(-1, 1), col_vals.reshape(-1, 1)]) pos_names = ['X', 'Y'] pos_dims = [Dimension(label, 'nm', values) for label, values in zip(pos_names, pos_vals.T)] data_dict['Position Dimensions'] = pos_dims # Spectroscopic dimensions sweep_signal = header_dict['sweep_signal'] spec_label, spec_unit = sweep_signal.split(maxsplit=1) spec_unit = spec_unit.strip('()') # parm_dict['sweep_signal'] = (sweep_name, sweep_unit) dc_offset = data_dict['sweep_signal'] spec_dim = Dimension(spec_label, spec_unit, dc_offset) data_dict['Spectroscopic Dimensions'] = spec_dim return parm_dict, data_dict
def translate(self, raw_data_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ raw_data_path : string / unicode Absolute file path of the data .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ raw_data_path = path.abspath(raw_data_path) folder_path, file_name = path.split(raw_data_path) h5_path = path.join(folder_path, file_name[:-4] + '.h5') if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') self.h5_read = True try: h5_raw = h5py.File(raw_data_path, 'r') except: self.h5_read = False h5_raw = loadmat(raw_data_path) try: excite_cell = h5_raw['dc_amp_cell3'] except KeyError: excite_cell = [h5_raw['VS_amp_vec']] test = excite_cell[0][0] if self.h5_read: excitation_vec = h5_raw[test] else: excitation_vec = np.float32(np.squeeze(test)) try: current_cell = h5_raw['current_cell3'] except KeyError: current_cell = h5_raw['IV_dat'] num_rows = current_cell.shape[0] num_cols = current_cell.shape[1] num_iv_pts = excitation_vec.size num_cycles = 0 if len(current_cell.shape) == 4: num_cycles = current_cell.shape[-1] current_data = np.zeros(shape=(num_rows * num_cols, num_iv_pts * num_cycles), dtype=np.float32) else: current_data = np.zeros(shape=(num_rows * num_cols, num_iv_pts), dtype=np.float32) for row_ind in range(num_rows): for col_ind in range(num_cols): pix_ind = row_ind * num_cols + col_ind if self.h5_read: curr_val = np.squeeze( h5_raw[current_cell[row_ind][col_ind]].value) else: curr_val = np.float32( np.squeeze(current_cell[row_ind][col_ind])) curr_val = curr_val.reshape(current_data[0, :].shape) current_data[pix_ind, :] = 1E+9 * curr_val parm_dict = self._read_parms(h5_raw) parm_dict.update({'translator': 'FORC_IV'}) pos_desc = [ Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols)) ] if num_cycles > 0: spec_desc = [ Dimension('DC Bias', 'V', excitation_vec), Dimension('Cycles', 'number', np.arange(num_cycles)) ] else: spec_desc = [Dimension('DC Bias', 'V', excitation_vec)] meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) h5_main = write_main_dataset(chan_grp, current_data, 'Raw_Data', 'Current', '1E-9 A', pos_desc, spec_desc) return h5_main
def _create_results_datasets(self): ''' Creates the datasets an Groups necessary to store the results. Parameters ---------- h5_if : 'Inst_Freq' h5 Dataset Contains the Instantaneous Frequencies tfp : 'tfp' h5 Dataset Contains the time-to-first-peak data as a 1D matrix shift : 'shift' h5 Dataset Contains the frequency shift data as a 1D matrix ''' print('Creating results datasets') # Get relevant parameters num_rows = self.parm_dict['num_rows'] num_cols = self.parm_dict['num_cols'] pnts_per_avg = self.parm_dict['pnts_per_avg'] ds_shape = [num_rows * num_cols, pnts_per_avg] self.h5_results_grp = create_results_group(self.h5_main, self.process_name) copy_attributes(self.h5_main.parent, self.h5_results_grp) # Create dimensions pos_desc = [ Dimension('X', 'm', np.linspace(0, self.parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, self.parm_dict['SlowScanSize'], num_rows)) ] # ds_pos_ind, ds_pos_val = build_ind_val_matrices(pos_desc, is_spectral=False) spec_desc = [ Dimension( 'Time', 's', np.linspace(0, self.parm_dict['total_time'], pnts_per_avg)) ] # ds_spec_inds, ds_spec_vals = build_ind_val_matrices(spec_desc, is_spectral=True) # Writes main dataset self.h5_if = write_main_dataset( self.h5_results_grp, ds_shape, 'Inst_Freq', # Name of main dataset 'Frequency', # Physical quantity contained in Main dataset 'Hz', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_amp = write_main_dataset( self.h5_results_grp, ds_shape, 'Amplitude', # Name of main dataset 'Amplitude', # Physical quantity contained in Main dataset 'nm', # Units for the physical quantity None, # Position dimensions None, # Spectroscopic dimensions h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=self.h5_main.h5_spec_inds, # Copy Spectroscopy Dimensions h5_spec_vals=self.h5_main.h5_spec_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_phase = write_main_dataset( self.h5_results_grp, ds_shape, 'Phase', # Name of main dataset 'Phase', # Physical quantity contained in Main dataset 'degrees', # Units for the physical quantity None, # Position dimensions None, # Spectroscopic dimensions h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=self.h5_main.h5_spec_inds, # Copy Spectroscopy Dimensions h5_spec_vals=self.h5_main.h5_spec_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_pwrdis = write_main_dataset( self.h5_results_grp, ds_shape, 'PowerDissipation', # Name of main dataset 'Power', # Physical quantity contained in Main dataset 'W', # Units for the physical quantity None, # Position dimensions None, # Spectroscopic dimensions h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=self.h5_main.h5_spec_inds, # Copy Spectroscopy Dimensions h5_spec_vals=self.h5_main.h5_spec_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) _arr = np.zeros([num_rows * num_cols, 1]) self.h5_tfp = self.h5_results_grp.create_dataset('tfp', data=_arr, dtype=np.float32) self.h5_shift = self.h5_results_grp.create_dataset('shift', data=_arr, dtype=np.float32) self.h5_if.file.flush() return
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) folder_path, base_name = path.split(parm_path) waste, base_name = path.split(folder_path) # Until a better method is provided.... with h5py.File(path.join(folder_path, 'line_1.mat'), 'r') as h5_mat_line_1: num_ai_chans = h5_mat_line_1['data'].shape[1] h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) with h5py.File(h5_path) as h5_f: h5_meas_grp = create_indexed_group(h5_f, 'Measurement') global_parms = dict() global_parms.update({'data_type': 'gIV', 'translator': 'gIV'}) write_simple_attrs(h5_meas_grp, global_parms) # Only prepare the instructions for the dimensions here spec_dims = Dimension('Bias', 'V', excit_wfm) pos_dims = Dimension( 'Y', 'm', np.linspace(0, parm_dict['grid_scan_height_[m]'], parm_dict['grid_num_rows'])) self.raw_datasets = list() for chan_index in range(num_ai_chans): h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_simple_attrs(h5_chan_grp, parm_dict) """ Minimize file size to the extent possible. DAQs are rated at 16 bit so float16 should be most appropriate. For some reason, compression is effective only on time series data """ h5_raw = write_main_dataset( h5_chan_grp, (parm_dict['grid_num_rows'], excit_wfm.size), 'Raw_Data', 'Current', '1E-{} A'.format(parm_dict['IO_amplifier_gain']), pos_dims, spec_dims, dtype=np.float16, chunks=(1, excit_wfm.size), compression='gzip') self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, folder_path) return h5_path
def _build_ancillary_datasets(self): """ Parameters ---------- None Returns ------- ds_pos_inds : VirtualDataset Position Indices ds_pos_vals : VirtualDataset Position Values ds_spec_inds : VirtualDataset Spectrosocpic Indices ds_spec_vals : VirtualDataset Spectroscopic Values """ # create spectrogram at each pixel from the coefficients spec_step = np.arange(0, 1, 1 / self.n_steps) V_vec = 10 * np.arcsin(np.sin( self.n_fields * np.pi * spec_step)) * 2 / np.pi # build DC vector for typical BEPS Vdc_mat = np.vstack( (V_vec, np.full(np.shape(V_vec), np.nan))) # Add out-of-field values IF_vec = Vdc_mat.T.flatten() # Base DC vector IF_vec = np.tile(IF_vec, self.n_cycles) # Now with Cycles IF_vec = np.dot(1 + np.arange(self.forc_cycles)[:, None], IF_vec[None, :]) # Do a single FORC IF_vec = np.tile(IF_vec.flatten(), self.forc_repeats) # Repeat the FORC IF_inds = np.logical_not(np.isnan(IF_vec)) Vdc_vec = np.where(IF_inds, IF_vec, 0) # build AC vector Vac_vec = np.ones(np.shape(Vdc_vec)) # Build the Spectroscopic Values matrix spec_dims = [ self.n_fields, self.n_steps, self.n_cycles, self.forc_cycles, self.forc_repeats, self.n_bins ] spec_labs = [ 'Field', 'DC_Offset', 'Cycle', 'FORC', 'FORC_repeat', 'Frequency' ] spec_units = ['', 'V', '', '', '', 'Hz'] spec_start = [0, 0, 0, 0, 0, self.start_freq] spec_steps = [ 1, 1, 1, 1, 1, (self.end_freq - self.start_freq) / self.n_bins ] # Remove dimensions with single values real_dims = np.argwhere(np.array(spec_dims) != 1).squeeze() spec_dims = [spec_dims[idim] for idim in real_dims] spec_labs = [spec_labs[idim] for idim in real_dims] spec_units = [spec_units[idim] for idim in real_dims] spec_start = [spec_start[idim] for idim in real_dims] spec_steps = [spec_steps[idim] for idim in real_dims] # Correct the DC Offset dimension spec_dims_corrected = list() for dim_size, dim_name, dim_units, step_size, init_val in zip( spec_dims, spec_labs, spec_units, spec_steps, spec_start): if dim_name == 'DC_Offset': value = Vdc_vec[::2] else: value = np.arange(dim_size) * step_size + init_val spec_dims_corrected.append(Dimension(dim_name, dim_units, value)) pos_dims = list() for dim_size, dim_name, dim_units, step_size, init_val in zip( [self.N_y, self.N_x], ['Y', 'X'], ['um', 'um'], [10 / self.N_y, 10 / self.N_x], [-5, -5]): pos_dims.append( Dimension(dim_name, dim_units, np.arange(dim_size) * step_size + init_val)) return pos_dims, spec_dims_corrected
def translate(self, data_filepath, out_filename, verbose=False, debug=False): ''' The main function that translates the provided file into a .h5 file Parameters ---------------- data_filepath : String / unicode Absolute path of the data file out_filename : String / unicode Name for the new generated hdf5 file. The new file will be saved in the same folder of the input file with file name "out_filename". NOTE: the .h5 extension is automatically added to "out_filename" debug : Boolean (Optional. default is false) Whether or not to print log statements Returns ---------------- h5_path : String / unicode Absolute path of the generated .h5 file ''' self.debug = debug # Open the datafile try: data_filepath = os.path.abspath(data_filepath) ARh5_file = h5py.File(data_filepath, 'r') except: print('Unable to open the file', data_filepath) raise # Get info from the origin file like Notes and Segments self.notes = ARh5_file.attrs['Note'] self.segments = ARh5_file['ForceMap']['Segments'] #shape: (X, Y, 4) self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments']) self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0] self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1] self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels']) try: self.points_per_sec = np.float( self.note_value('ARDoIVPointsPerSec')) except NameError: self.points_per_sec = np.float(self.note_value('NumPtsPerSec')) if self.debug: print('Map size [X, Y]: ', self.map_size) print('Channels names: ', self.channels_name) # Only the extension 'Ext' segment can change size # so we get the shortest one and we trim all the others extension_idx = self.segments_name.index('Ext') short_ext = np.amin(np.array(self.segments[:, :, extension_idx])) longest_ext = np.amax(np.array(self.segments[:, :, extension_idx])) difference = longest_ext - short_ext # this is a difference between integers tot_length = (np.amax(self.segments) - difference) + 1 # +1 otherwise array(tot_length) will be of 1 position shorter points_trimmed = np.array(self.segments[:, :, extension_idx]) - short_ext if self.debug: print('Data were trimmed in the extension segment of {} points'. format(difference)) # Open the output hdf5 file folder_path = os.path.dirname(data_filepath) h5_path = os.path.join(folder_path, out_filename + '.h5') h5_file = h5py.File(h5_path, 'w') # Create the measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') # Create all channels and main datasets # at this point the main dataset are just function of time x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['X']) y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['Y']) z_dim = np.arange(tot_length) / np.float(self.points_per_sec) pos_dims = [ Dimension('Cols', 'm', x_dim), Dimension('Rows', 'm', y_dim) ] spec_dims = [Dimension('Time', 's', z_dim)] # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful # to parallelize these loops for index, channel in enumerate(self.channels_name): cur_chan = create_indexed_group(h5_meas_group, 'Channel') main_dset = np.empty( (self.map_size['X'], self.map_size['Y'], tot_length)) for column in np.arange(self.map_size['X']): for row in np.arange(self.map_size['Y']): AR_pos_string = str(column) + ':' + str(row) seg_start = self.segments[column, row, extension_idx] - short_ext main_dset[column, row, :] = ARh5_file['ForceMap'][AR_pos_string][ index, seg_start:] # Reshape with Fortran order to have the correct position indices main_dset = np.reshape(main_dset, (-1, tot_length), order='F') if index == 0: first_main_dset = cur_chan quant_unit = self.get_def_unit(channel) h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_' + channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions ) else: h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_' + channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_main_dset['Spectroscopic_Indices'], h5_spec_vals=first_main_dset['Spectroscopic_Values'], ) # Make Channels with IMAGES. # Position indices/values are the same of all other channels # Spectroscopic indices/valus are they are just one single dimension img_spec_dims = [Dimension('arb', 'a.u.', [1])] for index, image in enumerate(ARh5_file['Image'].keys()): main_dset = np.reshape(np.array(ARh5_file['Image'][image]), (-1, 1), order='F') cur_chan = create_indexed_group(h5_meas_group, 'Channel') if index == 0: first_image_dset = cur_chan h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_' + image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], ) else: h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_' + image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_image_dset['Spectroscopic_Indices'], h5_spec_vals=first_image_dset['Spectroscopic_Values'], ) # Create the new segments that will be stored as attribute new_segments = {} for seg, name in enumerate(self.segments_name): new_segments.update({name: self.segments[0, 0, seg] - short_ext}) write_simple_attrs( h5_meas_group, { 'Segments': new_segments, 'Points_trimmed': points_trimmed, 'Notes': self.notes }) write_simple_attrs( h5_file, { 'translator': 'ARhdf5', 'instrument': 'Asylum Research ' + self.note_value('MicroscopeModel'), 'AR sftware version': self.note_value('Version') }) if self.debug: print(print_tree(h5_file)) print('\n') for key, val in get_attributes(h5_meas_group).items(): if key != 'Notes': print('{} : {}'.format(key, val)) else: print('{} : {}'.format( key, 'notes string too long to be written here.')) # Clean up ARh5_file.close() h5_file.close() self.translated = True return h5_path
def write_results(self, verbose=False, name='inst_freq_masked'): ''' Writes a new main data set :param verbose: :type verbose: bool :param name: :type name: str :returns: :rtype: ''' h5_dist_clust_group = px.hdf_utils.create_indexed_group( self.h5_main.parent, 'dist-cluster') # Create dimensions pos_desc = [Dimension('Grain Distance', 'm', self.data_dist)] ds_pos_ind, ds_pos_val = build_ind_val_dsets(pos_desc, is_spectral=False, verbose=verbose) spec_desc = [ Dimension( 'Time', 's', np.linspace(0, self.pxl_time, self.parms_dict['pnts_per_avg'])) ] ds_spec_inds, ds_spec_vals = build_ind_val_dsets(spec_desc, is_spectral=True, verbose=verbose) # Writes main dataset h5_clust = px.hdf_utils.write_main_dataset( h5_dist_clust_group, self.data_scatter[:, 1:], name, # Name of main dataset 'Frequency', # Physical quantity contained in Main dataset 'Hz', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs=self.parms_dict) # Adds mask and grain min distances and mean distances grp = px.io.VirtualGroup(h5_dist_clust_group.name) mask = px.io.VirtualDataset('mask', self.mask, parent=self.h5_main.parent) dist_min = px.io.VirtualDataset('dist_min', self.data_dist, parent=self.h5_main.parent) dist_mean = px.io.VirtualDataset('dist_mean', self.data_avg_dist, parent=self.h5_main.parent) data_pos = px.io.VirtualDataset('coordinates', self.mask_off_1D_pos, parent=self.h5_main.parent) data_avg = px.io.VirtualDataset('data_avg', self.data_avg_1D_vals, parent=self.h5_main.parent) grp.add_children([mask]) grp.add_children([dist_min]) grp.add_children([dist_mean]) grp.add_children([data_pos]) grp.add_children([data_avg]) # Find folder, write to it hdf = px.io.HDFwriter(self.h5_main.file) h5_refs = hdf.write(grp, print_log=verbose) return h5_clust
def _write_results_chunk(self): """ Writes the labels and mean response to the h5 file Returns --------- h5_group : HDF5 Group reference Reference to the group that contains the clustering results """ print('Writing clustering results to file.') num_clusters = self.__mean_resp.shape[0] self.h5_results_grp = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self._write_source_dset_provenance() write_simple_attrs(self.h5_results_grp, self.parms_dict) h5_labels = write_main_dataset(self.h5_results_grp, np.uint32(self.__labels.reshape([-1, 1])), 'Labels', 'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, aux_spec_prefix='Cluster_', dtype=np.uint32) if self.num_comps != self.h5_main.shape[1]: ''' Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data for now until a better method is figured out ''' """ if isinstance(self.data_slice[1], np.ndarray): centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()] else: centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]] ds_centroid_values.data[0, :] = centroid_vals_mat """ if isinstance(self.data_slice[1], np.ndarray): vals_slice = self.data_slice[1].tolist() else: vals_slice = self.data_slice[1] vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze() new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals) h5_inds, h5_vals = write_ind_val_dsets(self.h5_results_grp, new_spec, is_spectral=True) else: h5_inds = self.h5_main.h5_spec_inds h5_vals = self.h5_main.h5_spec_vals # For now, link centroids with default spectroscopic indices and values. h5_centroids = write_main_dataset(self.h5_results_grp, self.__mean_resp, 'Mean_Response', get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0], Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None, h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_', h5_spec_vals=h5_vals) # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = self.h5_results_grp.create_dataset( self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: self.h5_results_grp.attrs['last_pixel'] = self.h5_main.shape[0] return self.h5_results_grp
def reshape_from_lines_to_pixels(h5_main, pts_per_cycle, scan_step_x_m=None): """ Breaks up the provided raw G-mode dataset into lines and pixels (from just lines) Parameters ---------- h5_main : h5py.Dataset object Reference to the main dataset that contains the raw data that is only broken up by lines pts_per_cycle : unsigned int Number of points in a single pixel scan_step_x_m : float Step in meters for pixels Returns ------- h5_resh : h5py.Dataset object Reference to the main dataset that contains the reshaped data """ if not check_if_main(h5_main): raise TypeError('h5_main is not a Main dataset') h5_main = USIDataset(h5_main) if pts_per_cycle % 1 != 0 or pts_per_cycle < 1: raise TypeError('pts_per_cycle should be a positive integer') if scan_step_x_m is not None: if not isinstance(scan_step_x_m, Number): raise TypeError('scan_step_x_m should be a real number') else: scan_step_x_m = 1 if h5_main.shape[1] % pts_per_cycle != 0: warn( 'Error in reshaping the provided dataset to pixels. Check points per pixel' ) raise ValueError num_cols = int(h5_main.shape[1] / pts_per_cycle) # TODO: DO NOT assume simple 1 spectral dimension! single_ao = np.squeeze(h5_main.h5_spec_vals[:, :pts_per_cycle]) spec_dims = Dimension( get_attr(h5_main.h5_spec_vals, 'labels')[0], get_attr(h5_main.h5_spec_vals, 'units')[0], single_ao) # TODO: DO NOT assume simple 1D in positions! pos_dims = [ Dimension('X', 'm', np.linspace(0, scan_step_x_m, num_cols)), Dimension('Y', 'm', np.linspace(0, h5_main.h5_pos_vals[1, 0], h5_main.shape[0])) ] h5_group = create_results_group(h5_main, 'Reshape') # TODO: Create empty datasets and then write for very large datasets h5_resh = write_main_dataset(h5_group, (num_cols * h5_main.shape[0], pts_per_cycle), 'Reshaped_Data', get_attr(h5_main, 'quantity')[0], get_attr(h5_main, 'units')[0], pos_dims, spec_dims, chunks=(10, pts_per_cycle), dtype=h5_main.dtype, compression=h5_main.compression) # TODO: DON'T write in one shot assuming small datasets fit in memory! print('Starting to reshape G-mode line data. Please be patient') h5_resh[()] = np.reshape(h5_main[()], (-1, pts_per_cycle)) print('Finished reshaping G-mode line data to rows and columns') return USIDataset(h5_resh)
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets( meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets( chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path