def _translate_force_map(self, h5_meas_grp): """ Reads the scan image + force map from the proprietary file and writes it to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # First lets write the image into the measurement group that has already been created: image_parms = self.meta_data['Ciao image list'] quantity = image_parms.pop('Image Data_2') image_mat = self._read_image_layer(image_parms) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.reshape(image_mat, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', [ Dimension('X', 'nm', image_parms['Samps/line']), Dimension('Y', 'nm', image_parms['Number of lines']) ], Dimension('single', 'a. u.', 1), dtype=np.float32, compression='gzip') # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, image_parms) # Now work on the force map: force_map_parms = self.meta_data['Ciao force image list'] quantity = force_map_parms.pop('Image Data_4') force_map_vec = self._read_data_vector(force_map_parms) tr_rt = [ int(item) for item in force_map_parms['Samps/line'].split(' ') ] force_map_2d = force_map_vec.reshape(image_mat.size, np.sum(tr_rt)) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, force_map_2d, 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', [ Dimension('X', 'nm', image_parms['Samps/line']), Dimension('Y', 'nm', image_parms['Number of lines']) ], Dimension('Z', 'nm', int(np.sum(tr_rt))), dtype=np.float32, compression='gzip') # Think about standardizing attributes write_simple_attrs(h5_chan_grp, force_map_parms)
def create_hdf5_file(self, append_path='', grp_name='Measurement', overwrite=False): if not append_path: h5_path = os.path.join(self.directory, self.basename.replace('.txt', '.h5')) if os.path.exists(h5_path): if not overwrite: raise FileExistsError( 'This file already exists). Set attribute overwrite to True' ) else: print('Overwriting file', h5_path) #os.remove(h5_path) self.h5_f = h5py.File(h5_path, mode='w') else: if not os.path.exists(append_path): raise Exception('File does not exist. Check pathname.') self.h5_f = h5py.File(append_path, mode='r+') self.h5_meas_grp = create_indexed_group(self.h5_f, grp_name) write_simple_attrs(self.h5_meas_grp, self.params_dictionary) return
def write_spectrograms(self): if bool(self.spectrogram_desc): for spectrogram_f, descriptors in self.spectrogram_desc.items(): channel_i = create_indexed_group(self.h5_meas_grp, 'Channel_') spec_vals_i = self.spectrogram_spec_vals[spectrogram_f] spectrogram_spec_dims = Dimension('Wavelength', descriptors[8], spec_vals_i) h5_raw = write_main_dataset( channel_i, # parent HDF5 group (self.x_len * self.y_len, len(spec_vals_i)), # shape of Main dataset 'Raw_Data', # Name of main dataset 'Spectrogram', # Physical quantity contained in Main dataset descriptors[3], # Units for the physical quantity self.pos_dims, # Position dimensions spectrogram_spec_dims, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs={ 'Caption': descriptors[0], 'Bytes_Per_Pixel': descriptors[1], 'Scale': descriptors[2], 'Physical_Units': descriptors[3], 'Offset': descriptors[4], 'Datatype': descriptors[5], 'Bytes_Per_Reading': descriptors[6], 'Wavelength_File': descriptors[7], 'Wavelength_Units': descriptors[8] }) h5_raw.h5_pos_vals[:, :] = self.pos_val h5_raw[:, :] = self.spectrograms[spectrogram_f].reshape( h5_raw.shape)
def _create_root_image(self, image_path): """ Create the Groups and Datasets for a single root image Parameters ---------- image_path : str Path to the image file Returns ------- None """ image, image_parms = read_dm3(image_path) if image.ndim == 3: image = np.sum(image, axis=0) ''' Create the Measurement and Channel Groups to hold the image Datasets ''' meas_grp = create_indexed_group(self.h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') ''' Set the Measurement Group attributes ''' usize, vsize = image.shape image_parms['image_size_u'] = usize image_parms['image_size_v'] = vsize image_parms['translator'] = 'OneView' image_parms['num_pixels'] = image.size write_simple_attrs(meas_grp, image_parms) ''' Build Spectroscopic and Position dimensions ''' spec_desc = Dimension('Image', 'a.u.', [1]) pos_desc = [Dimension('X', 'pixel', np.arange(image.shape[0])), Dimension('Y', 'pixel', np.arange(image.shape[1]))] h5_image = write_main_dataset(chan_grp, np.reshape(image, (-1, 1)), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc) self.root_image_list.append(h5_image)
def _translate_gsf(self, file_path, meas_grp): """ Parameters ---------- file_path meas_grp For more information on the .gsf file format visit the link below - http://gwyddion.net/documentation/user-guide-en/gsf.html """ # Read the data in from the specified file gsf_meta, gsf_values = gsf_read(file_path) # Write parameters where available specifically for sample_name # data_type, comments and experiment_date to file-level parms # Using pop, move some global parameters from gsf_meta to global_parms: self.global_parms['data_type'] = 'Gwyddion_GSF' self.global_parms['comments'] = gsf_meta.get('comment', '') self.global_parms['experiment_date'] = gsf_meta.get('date', '') # overwrite some parameters at the file level: write_simple_attrs(meas_grp.parent, self.global_parms) # Build the reference values for the ancillary position datasets: # TODO: Remove information from parameters once it is used meaningfully where it needs to be. # Here, it is no longer necessary to save XReal anymore so we will pop (remove) it from gsf_meta x_offset = gsf_meta.get('XOffset', 0) x_range = gsf_meta.get('XReal', 1.0) # TODO: Use Numpy wherever possible instead of pure python x_vals = np.linspace(0, x_range, gsf_meta.get('XRes')) + x_offset y_offset = gsf_meta.get('YOffset', 0) y_range = gsf_meta.get('YReal', 1.0) y_vals = np.linspace(0, y_range, gsf_meta.get('YRes')) + y_offset # Just define the ancillary position and spectral dimensions. Do not create datasets yet pos_desc = [Dimension('X', gsf_meta.get('XYUnits', 'arb. units'), x_vals), Dimension('Y', gsf_meta.get('XYUnits', 'arb. units'), y_vals)] spec_desc = Dimension('Intensity', gsf_meta.get('ZUnits', 'arb. units'), [1]) """ You only need to prepare the dimensions for positions and spectroscopic. You do not need to write the ancillary datasets at this point. write_main_dataset will take care of that. You only need to use write_ind_val_datasets() for the cases where you may need to reuse the datasets. See the tutorial online. """ # Create the channel-level group chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, gsf_meta) # Create the main dataset (and the two_dim_image = gsf_values write_main_dataset(chan_grp, np.atleast_2d(np.reshape(two_dim_image, len(pos_desc[0].values) * len(pos_desc[1].values))).transpose(), 'Raw_Data', gsf_meta.get('Title', 'Unknown'), gsf_meta.get('ZUnits', 'arb. units'), pos_desc, spec_desc)
def _translate_image_stack(self, meas_grp, gwy_data, obj, channels): """ Use this function to write data corresponding to a stack of scan images (most common) Returns ------- """ current_channel = '' # Iterate through each object in the gwy dataset gwy_key = obj.split('/') # Test whether a new channel needs to be created # The 'filename' structure in the gwy file should not have a channel created hence the try/except block try: if int(gwy_key[1]) not in channels.keys(): current_channel = create_indexed_group(meas_grp, "Channel") channels[int(gwy_key[1])] = current_channel else: current_channel = channels[int(gwy_key[1])] except ValueError: if obj.endswith('filename'): pass # The data structure of the gwy file will be used to create the main dataset in the h5 file if obj.endswith('data'): x_range = gwy_data[obj].get('xreal', 1.0) x_vals = np.linspace(0, x_range, gwy_data[obj]['xres']) # print('obj {}\nx_vals {}'.format(obj, x_vals)) y_range = gwy_data[obj].get('yreal', 1.0) y_vals = np.linspace(0, y_range, gwy_data[obj]['yres']) pos_desc = [Dimension('X', gwy_data[obj]['si_unit_xy'].get('unitstr'), x_vals), Dimension('Y', gwy_data[obj]['si_unit_xy'].get('unitstr'), y_vals)] # print(pos_desc) spec_dim = gwy_data['/{}/data/title'.format(gwy_key[1])] spec_desc = Dimension(spec_dim, gwy_data[obj]['si_unit_z'].get('unitstr', 'arb. units'), [0]) two_dim_image = gwy_data[obj]['data'] write_main_dataset(current_channel, np.atleast_2d(np.reshape(two_dim_image, len(pos_desc[0].values) * len(pos_desc[1].values))).transpose(), 'Raw_Data', spec_dim, gwy_data[obj]['si_unit_z'].get('unitstr'), pos_desc, spec_desc) # print('main dataset has been written') # image data processing elif obj.endswith('meta'): meta = {} write_simple_attrs(current_channel, meta, verbose=False) return channels
def write_ps_spectra(self): if bool(self.pspectrum_desc): for spec_f, descriptors in self.pspectrum_desc.items(): # create new measurement group for ea spectrum self.h5_meas_grp = create_indexed_group( self.h5_f, 'Measurement_') x_name = self.spectra_x_y_dim_name[spec_f][0].split(' ')[0] x_unit = self.spectra_x_y_dim_name[spec_f][0].split(' ')[1] y_name = self.spectra_x_y_dim_name[spec_f][1].split(' ')[0] y_unit = self.spectra_x_y_dim_name[spec_f][1].split(' ')[1] spec_i_spec_dims = Dimension(x_name, x_unit, self.spectra_spec_vals[spec_f]) spec_i_pos_dims = [ Dimension( 'X', self.params_dictionary['XPhysUnit'].replace( '\xb5', 'u'), np.array([0])), Dimension( 'Y', self.params_dictionary['YPhysUnit'].replace( '\xb5', 'u'), np.array([0])) ] # write data to a channel in the measurement group spec_i_ch = create_indexed_group(self.h5_meas_grp, 'PowerSpectrum_') h5_raw = write_main_dataset( spec_i_ch, # parent HDF5 group (1, len(self.spectra_spec_vals[spec_f])), # shape of Main dataset 'Raw_Spectrum', # Name of main dataset y_name, # Physical quantity contained in Main dataset y_unit, # Units for the physical quantity # Position dimensions pos_dims=spec_i_pos_dims, spec_dims=spec_i_spec_dims, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs={ 'XLoc': 0, 'YLoc': 0 }) h5_raw[:, :] = self.spectra[spec_f].reshape(h5_raw.shape)
def _setupH5(self, image_parms): """ Setup the HDF5 file in which to store the data Due to the structure of the ndata format, we can only create the Measurement and Channel groups here Parameters ---------- image_parms : dict Dictionary of parameters Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' # Create the hdf5 data Group write_simple_attrs(self.h5_f, root_parms) h5_channels = list() for meas_parms in image_parms: # Create new measurement group for each set of parameters meas_grp = create_indexed_group(self.h5_f, 'Measurement') # Write the parameters as attributes of the group write_simple_attrs(meas_grp, meas_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') h5_channels.append(chan_grp) self.h5_f.flush() return h5_channels
def _setupH5(self, image_parms): """ Setup the HDF5 file in which to store the data Due to the structure of the ndata format, we can only create the Measurement and Channel groups here Parameters ---------- image_parms : dict Dictionary of parameters Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ root_parms = dict() root_parms['data_type'] = 'PtychographyData' # Create the hdf5 data Group write_simple_attrs(self.h5_f, root_parms) h5_channels = list() for meas_parms in image_parms: # Create new measurement group for each set of parameters meas_grp = create_indexed_group(self.h5_f, 'Measurement') # Write the parameters as attributes of the group write_simple_attrs(meas_grp, meas_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') h5_channels.append(chan_grp) self.h5_f.flush() return h5_channels
def _translate_image_stack(self, h5_meas_grp): """ Reads the scan images from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension( 'single', 'a. u.', 1), is_spectral=True) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] break h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [ Dimension('X', 'nm', layer_info['Samps/line']), Dimension('Y', 'nm', layer_info['Number of lines']) ], is_spectral=False) for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_2') data = self._read_image_layer(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, layer_info)
def _translate_force_curve(self, h5_meas_grp): """ Reads the force curves from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension( 'single', 'a. u.', 1), is_spectral=False) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] break tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')] h5_spec_inds, h5_spec_vals = write_ind_val_dsets( h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))), is_spectral=True) for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_4') data = self._read_data_vector(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes write_simple_attrs(h5_chan_grp, layer_info)
def _translate_force_map(self, h5_meas_grp): """ Reads the scan image + force map from the proprietary file and writes it to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # First lets write the image into the measurement group that has already been created: image_parms = self.meta_data['Ciao image list'] quantity = image_parms.pop('Image Data_2') image_mat = self._read_image_layer(image_parms) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset(h5_chan_grp, np.reshape(image_mat, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', [Dimension('X', 'nm', image_parms['Samps/line']), Dimension('Y', 'nm', image_parms['Number of lines'])], Dimension('single', 'a. u.', 1), dtype=np.float32, compression='gzip') # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, image_parms) # Now work on the force map: force_map_parms = self.meta_data['Ciao force image list'] quantity = force_map_parms.pop('Image Data_4') force_map_vec = self._read_data_vector(force_map_parms) tr_rt = [int(item) for item in force_map_parms['Samps/line'].split(' ')] force_map_2d = force_map_vec.reshape(image_mat.size, np.sum(tr_rt)) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset(h5_chan_grp, force_map_2d, 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', [Dimension('X', 'nm', image_parms['Samps/line']), Dimension('Y', 'nm', image_parms['Number of lines'])], Dimension('Z', 'nm', int(np.sum(tr_rt))), dtype=np.float32, compression='gzip') # Think about standardizing attributes write_simple_attrs(h5_chan_grp, force_map_parms)
def translate(self, file_path, *args, **kwargs): # Two kinds of files: # 1. Simple GSF files -> use metadata, data = gsf_read(file_path) # 2. Native .gwy files -> use the gwyfile package # I have a notebook that shows how such data can be read. # Create the .h5 file from the input file if not isinstance(file_path, (str, unicode)): raise TypeError('file_path should be a string!') if not (file_path.endswith('.gsf') or file_path.endswith('.gwy')): # TODO: Gwyddion is weird, it doesn't append the file extension some times. # In theory, you could identify the kind of file by looking at the header (line 38 in gsf_read()). # Ideally the header check should be used instead of the extension check raise ValueError('file_path must have a .gsf or .gwy extension!') file_path = path.abspath(file_path) folder_path, base_name = path.split(file_path) base_name = base_name[:-4] h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) self.h5_file = h5py.File(h5_path, 'w') """ Setup the global parameters --------------------------- translator: Gywddion data_type: depends on file type GwyddionGSF_<gsf_meta['title']> or GwyddionGWY_<gwy_meta['title']> """ self.global_parms = generate_dummy_main_parms() self.global_parms['translator'] = 'Gwyddion' # Create the measurement group meas_grp = create_indexed_group(self.h5_file, 'Measurement') if file_path.endswith('.gsf'): self._translate_gsf(file_path, meas_grp) if file_path.endswith('gwy'): self._translate_gwy(file_path, meas_grp) write_simple_attrs(self.h5_file, self.global_parms) return h5_path
def _translate_spectra(self, meas_grp, gwy_data, obj, channels): """ Use this to translate simple 1D data like force curves Returns ------- """ current_channel = '' gwy_key = obj.split('/') try: if int(gwy_key[2]) not in channels.keys(): current_channel = create_indexed_group(meas_grp, "Channel") channels[int(gwy_key[2])] = current_channel else: current_channel = channels[int(gwy_key[2])] except ValueError: if obj.endswith('filename'): pass else: raise ValueError('There was an unexpected directory in the spectra file') title = obj['title'] unitstr = obj['unitstr'] coords = obj['coords'] res = obj['data']['res'] real = obj['data']['real'] offset = obj['data']['off'] x_units = obj['data']['si_unit_x']['unitstr'] y_units = obj['data']['si_unit_y']['unitstr'] data = obj['data']['data'] indices = obj['selected'] x_vals = np.linspace(offset, real, res) pos_desc = [Dimension('X', x_units, x_vals)] spec_desc = [Dimension(title, y_units, 0)] write_main_dataset(current_channel, data, 'Raw_Data', title, gwy_data[obj]['si_unit_y'], pos_desc, spec_desc) return channels
def _translate_image_stack(self, h5_meas_grp): """ Reads the scan images from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=True) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] break h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [Dimension('X', 'nm', layer_info['Samps/line']), Dimension('Y', 'nm', layer_info['Number of lines'])], is_spectral=False) for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_2') data = self._read_image_layer(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset(h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, layer_info)
def _translate_force_curve(self, h5_meas_grp): """ Reads the force curves from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=False) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] break tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')] h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))), is_spectral=True) for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_4') data = self._read_data_vector(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset(h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes write_simple_attrs(h5_chan_grp, layer_info)
def write_images(self): if bool(self.img_desc): for img_f, descriptors in self.img_desc.items(): #check for existing spectrogram or image and link position/spec inds/vals #at most two channels worth of need to be checked (Fwd and Bwd) try: str_main = str( get_all_main(self.h5_f['Measurement_000/Channel_000'])) i_beg = str_main.find('located at: \n\t') + 14 i_end = str_main.find('\nData contains') - 1 data_loc = str_main[i_beg:i_end] channel_data = USIDataset(self.h5_f[data_loc]) h5_pos_inds = channel_data.h5_pos_inds h5_pos_vals = channel_data.h5_pos_vals pos_dims = None write_pos_vals = False if channel_data.spec_dim_sizes[0] == 1: h5_spec_inds = channel_data.h5_spec_inds h5_spec_vals = channel_data.h5_spec_vals spec_dims = None #if channel 000 is spectrogram, check next dataset elif channel_data.spec_dim_sizes[0] != 1: str_main = str( get_all_main( self.h5_f['Measurement_000/Channel_001'])) i_beg = str_main.find('located at: \n\t') + 14 i_end = str_main.find('\nData contains') - 1 data_loc = str_main[i_beg:i_end] channel_data = USIDataset(self.h5_f[data_loc]) #channel data is an image, & we link their spec inds/vals if channel_data.spec_dim_sizes[0] == 1: h5_spec_inds = channel_data.h5_spec_inds h5_spec_vals = channel_data.h5_spec_vals spec_dims = None else: # If a forward/bwd spectrogram exist h5_spec_inds = None h5_spec_vals = None spec_dims = Dimension('arb', 'a.u', 1) #in case where channel does not exist, we make new spec/pos inds/vals except KeyError: #pos dims h5_pos_inds = None h5_pos_vals = None pos_dims = self.pos_dims write_pos_vals = True #spec dims h5_spec_inds = None h5_spec_vals = None spec_dims = Dimension('arb', 'a.u', 1) channel_i = create_indexed_group(self.h5_meas_grp, 'Channel_') h5_raw = write_main_dataset( channel_i, #parent HDF5 group (self.x_len * self.y_len, 1), # shape of Main dataset 'Raw_' + descriptors[0].replace('-', '_'), # Name of main dataset descriptors[0], # Physical quantity contained in Main dataset descriptors[2], # Units for the physical quantity h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, # Position dimensions pos_dims=pos_dims, # Spectroscopic dimensions h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, spec_dims=spec_dims, dtype=np.float32, # data type / precision main_dset_attrs={ 'Caption': descriptors[0], 'Scale': descriptors[1], 'Physical_Units': descriptors[2], 'Offset': descriptors[3] }) h5_raw[:, :] = self.imgs[img_f].reshape(h5_raw.shape) if write_pos_vals: h5_raw.h5_pos_vals[:, :] = self.pos_val
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) excit_wfm = excit_wfm[1::2] self._parse_file_path(parm_path) num_dat_files = len(self.file_list) f = open(self.file_list[0], 'rb') spectrogram_size, count_vals = self._parse_spectrogram_size(f) print("Excitation waveform shape: ", excit_wfm.shape) print("spectrogram size:", spectrogram_size) num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols'] print('Number of pixels: ', num_pixels) print('Count Values: ', count_vals) # if (num_pixels + 1) != count_vals: # print("Data size does not match number of pixels expected. Cannot continue") # Find how many channels we have to make num_ai_chans = num_dat_files // 2 # Division by 2 due to real/imaginary # Now start creating datasets and populating: # Start with getting an h5 file h5_file = h5py.File(self.h5_path) # First create a measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') # Set up some parameters that will be written as attributes to this Measurement group global_parms = dict() global_parms['data_type'] = 'trKPFM' global_parms['translator'] = 'trKPFM' write_simple_attrs(h5_meas_group, global_parms) write_simple_attrs(h5_meas_group, parm_dict) # Now start building the position and spectroscopic dimension containers # There's only one spectroscpoic dimension and two position dimensions # The excit_wfm only has the DC values without any information on cycles, time, etc. # What we really need is to add the time component. For every DC step there are some time steps. num_time_steps = ( spectrogram_size - 5 ) // excit_wfm.size // 2 # Need to divide by 2 because it considers on and off field # There should be three spectroscopic axes # In order of fastest to slowest varying, we have # time, voltage, field time_vec = np.linspace(0, parm_dict['IO_time'], num_time_steps) print('Num time steps: {}'.format(num_time_steps)) print('DC Vec size: {}'.format(excit_wfm.shape)) print('Spectrogram size: {}'.format(spectrogram_size)) field_vec = np.array([0, 1]) spec_dims = [ Dimension('Time', 's', time_vec), Dimension('Field', 'Binary', field_vec), Dimension('Bias', 'V', excit_wfm) ] pos_dims = [ Dimension('Cols', 'm', int(parm_dict['grid_num_cols'])), Dimension('Rows', 'm', int(parm_dict['grid_num_rows'])) ] self.raw_datasets = list() for chan_index in range(num_ai_chans): chan_grp = create_indexed_group(h5_meas_group, 'Channel') if chan_index == 0: write_simple_attrs(chan_grp, {'Harmonic': 1}) else: write_simple_attrs(chan_grp, {'Harmonic': 2}) h5_raw = write_main_dataset( chan_grp, # parent HDF5 group (num_pixels, spectrogram_size - 5), # shape of Main dataset 'Raw_Data', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_dims, # Position dimensions spec_dims, # Spectroscopic dimensions dtype=np.complex64, # data type / precision compression='gzip', chunks=(1, spectrogram_size - 5), main_dset_attrs={'quantity': 'Complex'}) # h5_refs = hdf.write(chan_grp, print_log=False) # h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] # link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs)) self.raw_datasets.append(h5_raw) self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, parm_path, spectrogram_size) h5_file.file.close() # hdf.close() return self.h5_path
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) self._parse_file_path(parm_path) num_dat_files = len(self.file_list) f = open(self.file_list[0], 'rb') spectrogram_size, count_vals = self._parse_spectrogram_size(f) print("Excitation waveform shape: ", excit_wfm.shape) print("spectrogram size:", spectrogram_size) num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols'] print('Number of pixels: ', num_pixels) print('Count Values: ', count_vals) if (num_pixels + 1) != count_vals: print("Data size does not match number of pixels expected. Cannot continue") #Find how many channels we have to make num_ai_chans = num_dat_files // 2 # Division by 2 due to real/imaginary # Now start creating datasets and populating: #Start with getting an h5 file h5_file = h5py.File(self.h5_path) #First create a measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') #Set up some parameters that will be written as attributes to this Measurement group global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'trKPFM' global_parms['translator'] = 'trKPFM' write_simple_attrs(h5_meas_group, global_parms) write_simple_attrs(h5_meas_group, parm_dict) #Now start building the position and spectroscopic dimension containers #There's only one spectroscpoic dimension and two position dimensions #The excit_wfm only has the DC values without any information on cycles, time, etc. #What we really need is to add the time component. For every DC step there are some time steps. num_time_steps = (spectrogram_size-5) //excit_wfm.size #Let's repeat the excitation so that we get the full vector of same size as the spectrogram #TODO: Check if this is the norm for this type of dataset full_spect_val = np.copy(excit_wfm).repeat(num_time_steps) spec_dims = Dimension('Bias', 'V', full_spect_val) pos_dims = [Dimension('Cols', 'nm', parm_dict['grid_num_cols']), Dimension('Rows', 'um', parm_dict['grid_num_rows'])] self.raw_datasets = list() for chan_index in range(num_ai_chans): chan_grp = create_indexed_group(h5_meas_group,'Channel') if chan_index == 0: write_simple_attrs(chan_grp,{'Harmonic': 1}) else: write_simple_attrs(chan_grp,{'Harmonic': 2}) h5_raw = write_main_dataset(chan_grp, # parent HDF5 group (num_pixels, spectrogram_size - 5), # shape of Main dataset 'Raw_Data', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_dims, # Position dimensions spec_dims, # Spectroscopic dimensions dtype=np.complex64, # data type / precision compression='gzip', chunks=(1, spectrogram_size - 5), main_dset_attrs={'quantity': 'Complex'}) #h5_refs = hdf.write(chan_grp, print_log=False) #h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] #link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs)) self.raw_datasets.append(h5_raw) self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, parm_path, spectrogram_size) h5_file.file.close() #hdf.close() return self.h5_path
def translate(self, file_path, verbose=False, append_path='', grp_name='Measurement', parm_encoding='utf-8'): """ Translates the provided file to .h5 Parameters ---------- file_path : String / unicode Absolute path of the .ibw file verbose : Boolean (Optional) Whether or not to show print statements for debugging append_path : string (Optional) h5_file to add these data to, must be a path to the h5_file on disk grp_name : string (Optional) Change from default "Measurement" name to something specific parm_encoding : str, optional Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' Returns ------- h5_path : String / unicode Absolute path of the .h5 file """ file_path = path.abspath(file_path) # Prepare the .h5 file: folder_path, base_name = path.split(file_path) base_name = base_name[:-4] if not append_path: h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') else: h5_path = append_path if not path.exists(append_path): raise Exception('File does not exist. Check pathname.') h5_file = h5py.File(h5_path, 'r+') # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[-1] != len(chan_labels): chan_labels = chan_labels[1:] # for layer 0 null set errors in older AR software if images.ndim == 3: # Image stack if verbose: print('Found image stack of size {}'.format(images.shape)) type_suffix = 'Image' num_rows = parm_dict['ScanLines'] num_cols = parm_dict['ScanPoints'] images = images.transpose(2, 1, 0) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = Dimension('arb', 'a.u.', [1]) else: # single force curve if verbose: print('Found force curve of size {}'.format(images.shape)) type_suffix = 'ForceCurve' images = np.atleast_3d(images) # now [Z, chan, 1] images = images.transpose((1, 2, 0)) # [chan ,1, Z] force curve # The data generated above varies linearly. Override. # For now, we'll shove the Z sensor data into the spectroscopic values. # Find the channel that corresponds to either Z sensor or Raw: try: chan_ind = chan_labels.index('ZSnsr') spec_data = VALUES_DTYPE(images[chan_ind]).squeeze() except ValueError: try: chan_ind = chan_labels.index('Raw') spec_data = VALUES_DTYPE(images[chan_ind]).squeeze() except ValueError: # We don't expect to come here. If we do, spectroscopic values remains as is spec_data = np.arange(images.shape[2]) pos_desc = Dimension('X', 'm', [1]) spec_desc = Dimension('Z', 'm', spec_data) # Create measurement group meas_grp = create_indexed_group(h5_file, grp_name) # Write file and measurement level parameters global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'IgorIBW_' + type_suffix global_parms['translator'] = 'IgorIBW' write_simple_attrs(h5_file, global_parms) write_simple_attrs(meas_grp, parm_dict) # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): if verbose: print('channel', chan_name) print('unit', chan_unit) chan_grp = create_indexed_group(meas_grp, 'Channel') write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, dtype=np.float32) if verbose: print('Finished preparing raw datasets') h5_file.close() return h5_path
def translate(self, file_path, *args, **kwargs): """ Translates a given Bruker / Veeco / Nanoscope AFM derived file to HDF5. Currently handles scans, force curves, and force-distance maps Note that this translator was written with a single example file for each modality and may be buggy. Parameters ---------- file_path : str / unicode path to data file Returns ------- h5_path : str / unicode path to translated HDF5 file """ self.file_path = path.abspath(file_path) self.meta_data, other_parms = self._extract_metadata() # These files are weirdly named with extensions such as .001 h5_path = file_path.replace('.', '_') + '.h5' if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') type_suffixes = ['Image', 'Force_Curve', 'Force_Map'] # 0 - stack of scan images # 1 - single force curve # 2 - force map force_count = 0 image_count = 0 for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: force_count += 1 elif 'Ciao image list' in class_name: image_count += 1 data_type = 0 if force_count > 0: if image_count > 0: data_type = 2 else: data_type = 1 global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'Bruker_AFM_' + type_suffixes[data_type] global_parms['translator'] = 'Bruker_AFM' write_simple_attrs(h5_file, global_parms) # too many parameters. Making a dummy group just for the parameters. h5_parms_grp = h5_file.create_group('Parameters') # We currently have a dictionary of dictionaries. This needs to be flattened flat_dict = dict() for class_name, sub_dict in other_parms.items(): for key, val in sub_dict.items(): flat_dict[class_name + '_' + key] = val write_simple_attrs(h5_parms_grp, flat_dict) # Create measurement group h5_meas_grp = create_indexed_group(h5_file, 'Measurement') # Call the data specific translation function trans_funcs = [ self._translate_image_stack, self._translate_force_curve, self._translate_force_map ] trans_funcs[data_type](h5_meas_grp) # wrap up and return path h5_file.close() return h5_path
def _setup_h5(self, data_gen_parms): """ Setups up the hdf5 file structure before doing the actual generation Parameters ---------- data_gen_parms : dict Dictionary containing the parameters to write to the Measurement Group as attributes Returns ------- """ ''' Build the group structure down to the channel group ''' # Set up the basic group structure root_parms = dict() root_parms['translator'] = 'FAKEBEPS' root_parms['data_type'] = data_gen_parms['data_type'] # Write the file self.h5_f = h5py.File(self.h5_path, 'w') write_simple_attrs(self.h5_f, root_parms) meas_grp = create_indexed_group(self.h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(meas_grp, data_gen_parms) # Create the Position and Spectroscopic datasets for the Raw Data h5_pos_dims, h5_spec_dims = self._build_ancillary_datasets() h5_raw_data = write_main_dataset(chan_grp, (self.n_pixels, self.n_spec_bins), 'Raw_Data', 'Deflection', 'Volts', h5_pos_dims, h5_spec_dims, slow_to_fast=True, dtype=np.complex64, verbose=True) ''' Build the SHO Group ''' sho_grp = create_results_group(h5_raw_data, 'SHO_Fit') # Build the Spectroscopic datasets for the SHO Guess and Fit h5_sho_spec_inds, h5_sho_spec_vals = write_reduced_anc_dsets( sho_grp, h5_raw_data.h5_spec_inds, h5_raw_data.h5_spec_vals, 'Frequency', is_spec=True) h5_sho_fit = write_main_dataset( sho_grp, (self.n_pixels, int(self.n_spec_bins // self.n_bins)), 'Fit', 'SHO Parameters', 'a.u.', None, None, h5_pos_inds=h5_raw_data.h5_pos_inds, h5_pos_vals=h5_raw_data.h5_pos_vals, h5_spec_inds=h5_sho_spec_inds, h5_spec_vals=h5_sho_spec_vals, slow_to_fast=True, dtype=sho32) h5_sho_guess = copy_dataset(h5_sho_fit, sho_grp, alias='Guess') ''' Build the loop group ''' loop_grp = create_results_group(h5_sho_fit, 'Loop_Fit') # Build the Spectroscopic datasets for the loops h5_loop_spec_inds, h5_loop_spec_vals = write_reduced_anc_dsets( loop_grp, h5_sho_fit.h5_spec_inds, h5_sho_fit.h5_spec_vals, 'DC_Offset', is_spec=True) h5_loop_fit = write_main_dataset(loop_grp, (self.n_pixels, self.n_loops), 'Fit', 'Loop Fitting Parameters', 'a.u.', None, None, h5_pos_inds=h5_raw_data.h5_pos_inds, h5_pos_vals=h5_raw_data.h5_pos_vals, h5_spec_inds=h5_loop_spec_inds, h5_spec_vals=h5_loop_spec_vals, slow_to_fast=True, dtype=loop_fit32) h5_loop_guess = copy_dataset(h5_loop_fit, loop_grp, alias='Guess') copy_all_region_refs(h5_loop_guess, h5_loop_fit) self.h5_raw = h5_raw_data self.h5_sho_guess = h5_sho_guess self.h5_sho_fit = h5_sho_fit self.h5_loop_guess = h5_loop_guess self.h5_loop_fit = h5_loop_fit self.h5_spec_vals = h5_raw_data.h5_spec_vals self.h5_spec_inds = h5_raw_data.h5_spec_inds self.h5_sho_spec_inds = h5_sho_fit.h5_spec_inds self.h5_sho_spec_vals = h5_sho_fit.h5_spec_vals self.h5_loop_spec_inds = h5_loop_fit.h5_spec_inds self.h5_loop_spec_vals = h5_loop_fit.h5_spec_vals self.h5_file = h5_raw_data.file return
def translate(self, file_path, verbose=False, parm_encoding='utf-8', ftype='FF', subfolder='Measurement_000', h5_path='', channel_label_name=True): """ Translates the provided file to .h5 Adapted heavily from pycroscopy IBW file, modified to work with Ginger format :param file_path: Absolute path of the .ibw file :type file_path: String / unicode :param verbose: Whether or not to show print statements for debugging :type verbose: boolean, optional :param parm_encoding: Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' :type parm_encoding: str, optional :param ftype: Delineates Ginger Lab imaging file type to be imported (not case-sensitive) 'FF' : FF-trEFM 'SKPM' : FM-SKPM 'ringdown' : Ringdown 'trEFM' : normal trEFM :type ftype: str, optional :param subfolder: Specifies folder under root (/) to save data in. Default is standard pycroscopy format :type subfolder: str, optional :param h5_path: Existing H5 file to append to :type h5_path: str, optional :param channel_label_name: If True, uses the Channel as the subfolder name (e.g. Height, Phase, Amplitude, Charging) :type channel_label_name: bool, optional :returns: Absolute path of the .h5 file :rtype: String / unicode """ # Prepare the .h5 file: if not any(h5_path): folder_path, base_name = path.split(file_path) base_name = base_name[:-4] h5_path = path.join(folder_path, base_name + '.h5') # hard-coded exception, rarely occurs but can be useful if path.exists(h5_path): h5_path = path.join(folder_path, base_name + '_00.h5') h5_file = h5py.File(h5_path, 'w') # If subfolder improperly formatted if subfolder == '': subfolder = '/' # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[2] != len(chan_labels): chan_labels = chan_labels[1:] # for weird null set errors in older AR software # Check if a Ginger Lab format ibw (has 'UserIn' in channel labels) _is_gl_type = any(['UserIn0' in str(s) for s in chan_labels]) if _is_gl_type == True: chan_labels = self._get_image_type(chan_labels, ftype) if verbose: print('Processing image type', ftype, 'with channels', chan_labels) type_suffix = 'Image' num_rows = ibw_wave['wave_header']['nDim'][1] # lines num_cols = ibw_wave['wave_header']['nDim'][0] # points num_imgs = ibw_wave['wave_header']['nDim'][2] # layers unit_scale = self._get_unit_factor(''.join([str(s)[-2] for s in ibw_wave['wave_header']['dimUnits'][0][0:2]])) data_scale = self._get_unit_factor(str(ibw_wave['wave_header']['dataUnits'][0])[-2]) parm_dict['FastScanSize'] = unit_scale * num_cols * ibw_wave['wave_header']['sfA'][0] parm_dict['SlowScanSize'] = unit_scale * num_rows * ibw_wave['wave_header']['sfA'][1] images = images.transpose(2, 0, 1) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension(name='X', units='m', values=np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension(name='Y', units='m', values=np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = [Dimension(name='arb', units='a.u.', values=[1])] # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_file['/'], pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_file['/'], spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): chan_grp = create_indexed_group(h5_file['/'], chan_name) write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, pos_desc, spec_desc, dtype=np.float32) if verbose: print('Finished writing all channels') h5_file.close() return h5_path
def _setupH5(self, usize, vsize, data_type, num_images, main_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as num_images : int Number of images in the movie main_parms : dict Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms['num_images'] = num_images main_parms['image_size_u'] = usize main_parms['image_size_v'] = vsize main_parms['num_pixels'] = num_pixels main_parms['translator'] = 'Movie' # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_dim = Dimension('Time', 's', np.arange(num_images)) pos_dims = [ Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize)) ] ds_chunking = calc_chunks([num_pixels, num_images], data_type(0).itemsize, unit_chunks=(num_pixels, 1)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data', 'Intensity', 'a.u.', pos_dims, spec_dim, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean', data=np.zeros(num_images, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def translate(self, file_path, show_plots=True, save_plots=True, do_histogram=False): """ Basic method that translates .dat data file(s) to a single .h5 file Inputs: file_path -- Absolute file path for one of the data files. It is assumed that this file is of the OLD data format. Outputs: Nothing """ file_path = path.abspath(file_path) (folder_path, basename) = path.split(file_path) (basename, path_dict) = self._parse_file_path(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) self.h5_file = h5py.File(h5_path, 'w') isBEPS = True parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms']) ignored_plt_grps = ['in-field'] # Here we assume that there is no in-field. # If in-field data is captured then the translator would have to be modified. # Technically, we could do away with this if statement, as isBEPS is always true for this translation if isBEPS: parm_dict['data_type'] = 'BEPSData' std_expt = parm_dict['VS_mode'] != 'load user defined VS Wave from file' if not std_expt: warn('This translator does not handle user defined voltage spectroscopy') return spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode']) # Check file sizes: if 'read_real' in path_dict.keys(): real_size = path.getsize(path_dict['read_real']) imag_size = path.getsize(path_dict['read_imag']) else: real_size = path.getsize(path_dict['write_real']) imag_size = path.getsize(path_dict['write_imag']) if real_size != imag_size: raise ValueError("Real and imaginary file sizes DON'T match!. Ending") num_rows = int(parm_dict['grid_num_rows']) num_cols = int(parm_dict['grid_num_cols']) num_pix = num_rows * num_cols tot_bins = real_size / (num_pix * 4) # Finding bins by simple division of entire datasize # Check for case where only a single pixel is missing. check_bins = real_size / ((num_pix - 1) * 4) if tot_bins % 1 and check_bins % 1: warn('Aborting! Some parameter appears to have changed in-between') return elif not tot_bins % 1: # Everything's ok pass elif not check_bins % 1: tot_bins = check_bins warn('Warning: A pixel seems to be missing from the data. File will be padded with zeros.') tot_bins = int(tot_bins) (bin_inds, bin_freqs, bin_FFT, ex_wfm, dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms']) """ Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less than the actual number), we need to re-calculate it based on the available data. This is done below. """ band_width = parm_dict['BE_band_width_[Hz]'] * (0.5 - parm_dict['BE_band_edge_trim']) st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32) # Forcing standardized datatypes: bin_inds = np.int32(bin_inds) bin_freqs = np.float32(bin_freqs) bin_FFT = np.complex64(bin_FFT) ex_wfm = np.float32(ex_wfm) self.FFT_BE_wave = bin_FFT (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict) # Remove the unused plot group columns before proceeding: (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs, UDVS_units, ignored_plt_grps) spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE) # Will assume that all excitation waveforms have same number of bins # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1 num_actual_udvs_steps = UDVS_mat.shape[0] / 2 bins_per_step = tot_bins / num_actual_udvs_steps # Some more checks if bins_per_step % 1: warn('Non integer number of bins per step!') return else: bins_per_step = int(bins_per_step) num_actual_udvs_steps = int(num_actual_udvs_steps) stind = 0 for step_index in range(UDVS_mat.shape[0]): if UDVS_mat[step_index, 2] < 1E-3: # invalid AC amplitude continue # skip spec_inds[0, stind:stind + bins_per_step] = np.arange(bins_per_step, dtype=INDICES_DTYPE) # Bin step spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones(bins_per_step, dtype=INDICES_DTYPE) # UDVS step stind += bins_per_step del stind, step_index # Some very basic information that can help the processing / analysis crew parm_dict['num_bins'] = tot_bins parm_dict['num_pix'] = num_pix parm_dict['num_udvs_steps'] = num_actual_udvs_steps global_parms = generate_dummy_main_parms() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] global_parms['experiment_date'] = parm_dict['File_date_and_time'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict['data_type'] # self.__class__.__name__ global_parms['translator'] = 'ODF' write_simple_attrs(self.h5_file, global_parms) # Create Measurement and Channel groups meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) chan_grp = create_indexed_group(meas_grp, 'Channel') chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1'] # Create Auxilliary Datasets h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm) udvs_slices = dict() for col_ind, col_name in enumerate(UDVS_labs): udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1)) h5_UDVS = chan_grp.create_dataset('UDVS', data=UDVS_mat, dtype=np.float32) write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units}) h5_bin_steps = chan_grp.create_dataset('Bin_Steps', data=np.arange(bins_per_step, dtype=np.uint32), dtype=np.uint32) # Need to add the Bin Waveform type - infer from UDVS exec_bin_vec = self.signal_type * np.ones(len(bin_inds), dtype=np.int32) h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type', data=exec_bin_vec, dtype=np.int32) h5_bin_inds = chan_grp.create_dataset('Bin_Indices', data=bin_inds, dtype=np.uint32) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', data=bin_freqs, dtype=np.float32) h5_bin_FFT = chan_grp.create_dataset('Bin_FFT', data=bin_FFT, dtype=np.complex64) # Noise floor should be of shape: (udvs_steps x 3 x positions) h5_noise_floor = chan_grp.create_dataset('Noise_Floor', shape=(num_pix, num_actual_udvs_steps), dtype=nf32, chunks=(1, num_actual_udvs_steps)) """ ONLY ALLOCATING SPACE FOR MAIN DATA HERE! Chunk by each UDVS step - this makes it easy / quick to: 1. read data for a single UDVS step from all pixels 2. read an entire / multiple pixels at a time The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes. This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes meaning that the metadata would be very substantial. This assumption is fine since we almost do not handle any user defined cases """ """ New Method for chunking the Main_Data dataset. Chunking is now done in N-by-N squares of UDVS steps by pixels. N is determined dinamically based on the dimensions of the dataset. Currently it is set such that individual chunks are less than 10kB in size. Chris Smith -- [email protected] """ pos_dims = [Dimension('X', 'nm', num_cols), Dimension('Y', 'nm', num_rows)] # Create Spectroscopic Values and Spectroscopic Values Labels datasets spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals(UDVS_mat, spec_inds, bin_freqs, exec_bin_vec, parm_dict, UDVS_labs, UDVS_units) spec_dims = list() for row_ind, row_name in enumerate(spec_vals_labs): spec_dims.append(Dimension(row_name, spec_vals_units[row_ind], spec_vals[row_ind])) pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps, bins_per_step, np.dtype('complex64').itemsize) chunking = np.floor(np.sqrt(pixel_chunking)) chunking = max(1, chunking) chunking = min(num_actual_udvs_steps, num_pix, chunking) self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins), 'Raw_Data', 'Piezoresponse', 'V', pos_dims, spec_dims, dtype=np.complex64, chunks=(chunking, chunking * bins_per_step), compression='gzip') self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]), dtype=np.complex64) self.max_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32) self.min_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32) # Now read the raw data files: self._read_data(path_dict['read_real'], path_dict['read_imag'], parm_dict) self.h5_file.flush() generatePlotGroups(self.ds_main, self.mean_resp, folder_path, basename, self.max_resp, self.min_resp, max_mem_mb=self.max_ram, spec_label=spec_label, show_plots=show_plots, save_plots=save_plots, do_histogram=do_histogram) self.h5_file.close() return h5_path
def translate(self, file_path, *args, **kwargs): """ Translates a given Bruker / Veeco / Nanoscope AFM derived file to HDF5. Currently handles scans, force curves, and force-distance maps Note that this translator was written with a single example file for each modality and may be buggy. Parameters ---------- file_path : str / unicode path to data file Returns ------- h5_path : str / unicode path to translated HDF5 file """ self.file_path = path.abspath(file_path) self.meta_data, other_parms = self._extract_metadata() # These files are weirdly named with extensions such as .001 h5_path = file_path.replace('.', '_') + '.h5' if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') type_suffixes = ['Image', 'Force_Curve', 'Force_Map'] # 0 - stack of scan images # 1 - single force curve # 2 - force map force_count = 0 image_count = 0 for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: force_count += 1 elif 'Ciao image list' in class_name: image_count += 1 data_type = 0 if force_count > 0: if image_count > 0: data_type = 2 else: data_type = 1 global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'Bruker_AFM_' + type_suffixes[data_type] global_parms['translator'] = 'Bruker_AFM' write_simple_attrs(h5_file, global_parms) # too many parameters. Making a dummy group just for the parameters. h5_parms_grp = h5_file.create_group('Parameters') # We currently have a dictionary of dictionaries. This needs to be flattened flat_dict = dict() for class_name, sub_dict in other_parms.items(): for key, val in sub_dict.items(): flat_dict[class_name + '_' + key] = val write_simple_attrs(h5_parms_grp, flat_dict) # Create measurement group h5_meas_grp = create_indexed_group(h5_file, 'Measurement') # Call the data specific translation function trans_funcs = [self._translate_image_stack, self._translate_force_curve, self._translate_force_map] trans_funcs[data_type](h5_meas_grp) # wrap up and return path h5_file.close() return h5_path
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename+'.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows']) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size/self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path
def translate(self, data_channels=None, verbose=False): """ Translates the data in the Nanonis file into a Pycroscopy compatible HDF5 file. Parameters ---------- data_channels : (optional) list of str Names of channels that will be read and stored in the file. If not given, all channels in the file will be used. verbose : (optional) Boolean Whether or not to print statements Returns ------- h5_path : str Filepath to the output HDF5 file. """ if self.parm_dict is None or self.data_dict is None: self._read_data(self.data_path) if data_channels is None: print('No channels specified. All channels in file will be used.') data_channels = self.parm_dict['channels'] if verbose: print('Using the following channels') for channel in data_channels: print(channel) if os.path.exists(self.h5_path): os.remove(self.h5_path) h5_file = h5py.File(self.h5_path, 'w') meas_grp = create_indexed_group(h5_file, 'Measurement') dc_offset = self.data_dict['sweep_signal'] spec_label, spec_units = self.parm_dict['sweep_signal'].split() spec_units = spec_units.strip('()') spec_dim = Dimension(spec_label, spec_units, dc_offset) pos_dims = self.data_dict['Position Dimensions'] h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_dims, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_dim, is_spectral=True) num_points = h5_pos_inds.shape[0] for data_channel in data_channels: raw_data = self.data_dict[data_channel].reshape( [num_points, -1]) * 1E9 # Convert to nA chan_grp = create_indexed_group(meas_grp, 'Channel') data_label, data_unit = data_channel.rsplit(maxsplit=1) data_unit = data_unit.strip('()') write_main_dataset(chan_grp, raw_data, 'Raw_Data', data_label, data_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) h5_file.flush() h5_file.close() print('Nanonis translation complete.') return self.h5_path
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) folder_path, base_name = path.split(parm_path) waste, base_name = path.split(folder_path) # Until a better method is provided.... with h5py.File(path.join(folder_path, 'line_1.mat'), 'r') as h5_mat_line_1: num_ai_chans = h5_mat_line_1['data'].shape[1] h5_path = path.join(folder_path, base_name+'.h5') if path.exists(h5_path): remove(h5_path) with h5py.File(h5_path) as h5_f: h5_meas_grp = create_indexed_group(h5_f, 'Measurement') global_parms = generate_dummy_main_parms() global_parms.update({'data_type': 'gIV', 'translator': 'gIV'}) write_simple_attrs(h5_meas_grp, global_parms) # Only prepare the instructions for the dimensions here spec_dims = Dimension('Bias', 'V', excit_wfm) pos_dims = Dimension('Y', 'm', np.linspace(0, parm_dict['grid_scan_height_[m]'], parm_dict['grid_num_rows'])) self.raw_datasets = list() for chan_index in range(num_ai_chans): h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_simple_attrs(h5_chan_grp, parm_dict) """ Minimize file size to the extent possible. DAQs are rated at 16 bit so float16 should be most appropriate. For some reason, compression is effective only on time series data """ h5_raw = write_main_dataset(h5_chan_grp, (parm_dict['grid_num_rows'], excit_wfm.size), 'Raw_Data', 'Current', '1E-{} A'.format(parm_dict['IO_amplifier_gain']), pos_dims, spec_dims, dtype=np.float16, chunks=(1, excit_wfm.size), compression='gzip') self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, folder_path) return h5_path
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train', 'BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) be_wave_train = np.float32(np.squeeze(matread['BE_wave_train'])) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) self.points_per_line = len(be_wave_train) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['points_per_line'] = self.points_per_line parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_file = h5py.File(h5_path, 'w') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_file, global_parms) # Next create the Measurement and Channel groups and write the appropriate parameters to them meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) # Now that the file has been created, go over each raw data file: """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used """ pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) for f_index in data_paths.keys(): chan_grp = create_indexed_group(meas_grp, 'Channel') h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main) h5_file.close() print('G-Tune translation complete!') return h5_path
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets( meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets( chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path
def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as scan_size_x : int Number of images in the x dimension scan_size_y : int Number of images in the y dimension Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize num_files = scan_size_x * scan_size_y root_parms = dict() root_parms['data_type'] = 'ImageStackData' main_parms = { 'num_images': num_files, 'image_size_u': usize, 'image_size_v': vsize, 'num_pixels': num_pixels, 'translator': 'ImageStack', 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y } # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_desc = [ Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize)) ] pos_desc = [ Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y)) ] ds_chunking = calc_chunks([num_files, num_pixels], data_type(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Stack_Mean', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Image_Means', data=np.zeros(num_files, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def translate(self, data_filepath, out_filename, verbose=False, debug=False): ''' The main function that translates the provided file into a .h5 file Parameters ---------------- data_filepath : String / unicode Absolute path of the data file out_filename : String / unicode Name for the new generated hdf5 file. The new file will be saved in the same folder of the input file with file name "out_filename". NOTE: the .h5 extension is automatically added to "out_filename" debug : Boolean (Optional. default is false) Whether or not to print log statements Returns ---------------- h5_path : String / unicode Absolute path of the generated .h5 file ''' self.debug = debug # Open the datafile try: data_filepath = os.path.abspath(data_filepath) ARh5_file = h5py.File(data_filepath, 'r') except: print('Unable to open the file', data_filepath) raise # Get info from the origin file like Notes and Segments self.notes = ARh5_file.attrs['Note'] self.segments = ARh5_file['ForceMap']['Segments'] #shape: (X, Y, 4) self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments']) self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0] self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1] self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels']) try: self.points_per_sec = np.float( self.note_value('ARDoIVPointsPerSec')) except NameError: self.points_per_sec = np.float(self.note_value('NumPtsPerSec')) if self.debug: print('Map size [X, Y]: ', self.map_size) print('Channels names: ', self.channels_name) # Only the extension 'Ext' segment can change size # so we get the shortest one and we trim all the others extension_idx = self.segments_name.index('Ext') short_ext = np.amin(np.array(self.segments[:, :, extension_idx])) longest_ext = np.amax(np.array(self.segments[:, :, extension_idx])) difference = longest_ext - short_ext # this is a difference between integers tot_length = (np.amax(self.segments) - difference) + 1 # +1 otherwise array(tot_length) will be of 1 position shorter points_trimmed = np.array(self.segments[:, :, extension_idx]) - short_ext if self.debug: print('Data were trimmed in the extension segment of {} points'. format(difference)) # Open the output hdf5 file folder_path = os.path.dirname(data_filepath) h5_path = os.path.join(folder_path, out_filename + '.h5') h5_file = h5py.File(h5_path, 'w') # Create the measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') # Create all channels and main datasets # at this point the main dataset are just function of time x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['X']) y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['Y']) z_dim = np.arange(tot_length) / np.float(self.points_per_sec) pos_dims = [ Dimension('Cols', 'm', x_dim), Dimension('Rows', 'm', y_dim) ] spec_dims = [Dimension('Time', 's', z_dim)] # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful # to parallelize these loops for index, channel in enumerate(self.channels_name): cur_chan = create_indexed_group(h5_meas_group, 'Channel') main_dset = np.empty( (self.map_size['X'], self.map_size['Y'], tot_length)) for column in np.arange(self.map_size['X']): for row in np.arange(self.map_size['Y']): AR_pos_string = str(column) + ':' + str(row) seg_start = self.segments[column, row, extension_idx] - short_ext main_dset[column, row, :] = ARh5_file['ForceMap'][AR_pos_string][ index, seg_start:] # Reshape with Fortran order to have the correct position indices main_dset = np.reshape(main_dset, (-1, tot_length), order='F') if index == 0: first_main_dset = cur_chan quant_unit = self.get_def_unit(channel) h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_' + channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions ) else: h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_' + channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_main_dset['Spectroscopic_Indices'], h5_spec_vals=first_main_dset['Spectroscopic_Values'], ) # Make Channels with IMAGES. # Position indices/values are the same of all other channels # Spectroscopic indices/valus are they are just one single dimension img_spec_dims = [Dimension('arb', 'a.u.', [1])] for index, image in enumerate(ARh5_file['Image'].keys()): main_dset = np.reshape(np.array(ARh5_file['Image'][image]), (-1, 1), order='F') cur_chan = create_indexed_group(h5_meas_group, 'Channel') if index == 0: first_image_dset = cur_chan h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_' + image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], ) else: h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_' + image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_image_dset['Spectroscopic_Indices'], h5_spec_vals=first_image_dset['Spectroscopic_Values'], ) # Create the new segments that will be stored as attribute new_segments = {} for seg, name in enumerate(self.segments_name): new_segments.update({name: self.segments[0, 0, seg] - short_ext}) write_simple_attrs( h5_meas_group, { 'Segments': new_segments, 'Points_trimmed': points_trimmed, 'Notes': self.notes }) write_simple_attrs( h5_file, { 'translator': 'ARhdf5', 'instrument': 'Asylum Research ' + self.note_value('MicroscopeModel'), 'AR sftware version': self.note_value('Version') }) if self.debug: print(print_tree(h5_file)) print('\n') for key, val in get_attributes(h5_meas_group).items(): if key != 'Notes': print('{} : {}'.format(key, val)) else: print('{} : {}'.format( key, 'notes string too long to be written here.')) # Clean up ARh5_file.close() h5_file.close() self.translated = True return h5_path
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice( components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError( 'SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0**2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print( 'Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format( comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file matread = loadmat(parm_paths['parm_mat'], variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train', 'BE_wave', 'total_cols', 'total_rows']) be_wave = np.float32(np.squeeze(matread['BE_wave'])) be_wave_train = np.float32(np.squeeze(matread['BE_wave_train'])) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) self.points_per_line = len(be_wave_train) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['points_per_line'] = self.points_per_line parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_file = h5py.File(h5_path, 'w') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_file, global_parms) # Next create the Measurement and Channel groups and write the appropriate parameters to them meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) # Now that the file has been created, go over each raw data file: """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used """ pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) for f_index in data_paths.keys(): chan_grp = create_indexed_group(meas_grp, 'Channel') h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main) h5_file.close() print('G-Tune translation complete!') return h5_path
def translate(self, parm_path): """ Basic method that translates .mat data files to a single .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ self.parm_path = path.abspath(parm_path) (folder_path, file_name) = path.split(parm_path) (file_name, base_name) = path.split(folder_path) h5_path = path.join(folder_path, base_name + '.h5') # Read parameters parm_dict = readGmodeParms(parm_path) # Add the w^2 specific parameters to this list parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) #freq_sweep_parms = parm_data['freqSweepParms'] #parm_dict['freq_sweep_delay'] = np.float(freq_sweep_parms['delay'].item()) gen_sig = parm_data['genSig'] #parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item()) #freq_array = np.float32(parm_data['freqArray']) # prepare and write spectroscopic values samp_rate = parm_dict['IO_down_samp_rate_[Hz]'] num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate) w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins)) # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE) spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array)) spec_val_mat[:, 1] = np.repeat(freq_array, num_bins) spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32) spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array)) spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins) num_rows = parm_dict['grid_num_rows'] num_cols = parm_dict['grid_num_cols'] parm_dict['data_type'] = 'GVS' num_pix = num_rows * num_cols global_parms = generate_dummy_main_parms() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict['data_type'] # self.__class__.__name__ global_parms['translator'] = 'GVS' # Now start creating datasets and populating: if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) pos_dims = [Dimension('X', 'nm', num_rows), Dimension('Y', 'nm', num_cols)] spec_dims = [Dimension('Response Bin', 'a.u.', num_bins), Dimension('Excitation Frequency ', 'Hz', len(freq_array))] # Minimize file size to the extent possible. # DAQs are rated at 16 bit so float16 should be most appropriate. # For some reason, compression is more effective on time series data h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data', 'Deflection', 'V', pos_dims, spec_dims, chunks=(1, num_bins), dtype=np.float32) h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec) # Now doing link_h5_objects_as_attrs: link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq]) # Now read the raw data files: pos_ind = 0 for row_ind in range(1, num_rows + 1): for col_ind in range(1, num_cols + 1): file_path = path.join(folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat') print('Working on row {} col {}'.format(row_ind, col_ind)) if path.exists(file_path): # Load data file pix_data = loadmat(file_path, squeeze_me=True) pix_mat = pix_data['AI_mat'] # Take the inverse FFT on 2nd dimension pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1) # Verified with Matlab - no conjugate required here. pix_vec = pix_mat.transpose().reshape(pix_mat.size) h5_main[pos_ind, :] = np.float32(pix_vec) h5_f.flush() # flush from memory! else: print('File not found for: row {} col {}'.format(row_ind, col_ind)) pos_ind += 1 if (100.0 * pos_ind / num_pix) % 10 == 0: print('completed translating {} %'.format(int(100 * pos_ind / num_pix))) h5_f.close() return h5_path
def _setupH5(self, usize, vsize, data_type, num_images, main_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as num_images : int Number of images in the movie main_parms : dict Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms['num_images'] = num_images main_parms['image_size_u'] = usize main_parms['image_size_v'] = vsize main_parms['num_pixels'] = num_pixels main_parms['translator'] = 'Movie' # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_dim = Dimension('Time', 's', np.arange(num_images)) pos_dims = [Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize))] ds_chunking = calc_chunks([num_pixels, num_images], data_type(0).itemsize, unit_chunks=(num_pixels, 1)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data', 'Intensity', 'a.u.', pos_dims, spec_dim, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean', data=np.zeros(num_images, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def translate(self, raw_data_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ raw_data_path : string / unicode Absolute file path of the data .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ raw_data_path = path.abspath(raw_data_path) folder_path, file_name = path.split(raw_data_path) h5_path = path.join(folder_path, file_name[:-4] + '.h5') if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') self.h5_read = True try: h5_raw = h5py.File(raw_data_path, 'r') except ImportError: self.h5_read = False h5_raw = loadmat(raw_data_path) excite_cell = h5_raw['dc_amp_cell3'] test = excite_cell[0][0] if self.h5_read: excitation_vec = h5_raw[test] else: excitation_vec = np.float32(np.squeeze(test)) current_cell = h5_raw['current_cell3'] num_rows = current_cell.shape[0] num_cols = current_cell.shape[1] num_iv_pts = excitation_vec.size current_data = np.zeros(shape=(num_rows * num_cols, num_iv_pts), dtype=np.float32) for row_ind in range(num_rows): for col_ind in range(num_cols): pix_ind = row_ind * num_cols + col_ind if self.h5_read: curr_val = np.squeeze(h5_raw[current_cell[row_ind][col_ind]].value) else: curr_val = np.float32(np.squeeze(current_cell[row_ind][col_ind])) current_data[pix_ind, :] = 1E+9 * curr_val parm_dict = self._read_parms(h5_raw) parm_dict.update({'translator': 'FORC_IV'}) pos_desc = [Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols))] spec_desc = [Dimension('DC Bias', 'V', excitation_vec)] meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) h5_main = write_main_dataset(chan_grp, current_data, 'Raw_Data', 'Current', '1E-9 A', pos_desc, spec_desc) return
def translate(self, file_path, verbose=False, parm_encoding='utf-8'): """ Translates the provided file to .h5 Parameters ---------- file_path : String / unicode Absolute path of the .ibw file verbose : Boolean (Optional) Whether or not to show print statements for debugging parm_encoding : str, optional Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' Returns ------- h5_path : String / unicode Absolute path of the .h5 file """ file_path = path.abspath(file_path) # Prepare the .h5 file: folder_path, base_name = path.split(file_path) base_name = base_name[:-4] h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[2] != len(chan_labels): chan_labels = chan_labels[1:] # for layer 0 null set errors in older AR software if images.ndim == 3: # Image stack if verbose: print('Found image stack of size {}'.format(images.shape)) type_suffix = 'Image' num_rows = parm_dict['ScanLines'] num_cols = parm_dict['ScanPoints'] images = images.transpose(2, 1, 0) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = Dimension('arb', 'a.u.', [1]) else: # single force curve if verbose: print('Found force curve of size {}'.format(images.shape)) type_suffix = 'ForceCurve' images = np.atleast_3d(images) # now [Z, chan, 1] images = images.transpose((1, 2, 0)) # [chan ,1, Z] force curve # The data generated above varies linearly. Override. # For now, we'll shove the Z sensor data into the spectroscopic values. # Find the channel that corresponds to either Z sensor or Raw: try: chan_ind = chan_labels.index('ZSnsr') spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind])) except ValueError: try: chan_ind = chan_labels.index('Raw') spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind])) except ValueError: # We don't expect to come here. If we do, spectroscopic values remains as is spec_data = np.arange(images.shape[2]) pos_desc = Dimension('X', 'm', [1]) spec_desc = Dimension('Z', 'm', spec_data) # Create measurement group meas_grp = create_indexed_group(h5_file, 'Measurement') # Write file and measurement level parameters global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'IgorIBW_' + type_suffix global_parms['translator'] = 'IgorIBW' write_simple_attrs(h5_file, global_parms) write_simple_attrs(meas_grp, parm_dict) # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): chan_grp = create_indexed_group(meas_grp, 'Channel') write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, dtype=np.float32) if verbose: print('Finished preparing raw datasets') h5_file.close() return h5_path
def translate(self, raw_data_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ raw_data_path : string / unicode Absolute file path of the data .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ raw_data_path = path.abspath(raw_data_path) folder_path, file_name = path.split(raw_data_path) h5_path = path.join(folder_path, file_name[:-4] + '.h5') if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') self.h5_read = True try: h5_raw = h5py.File(raw_data_path, 'r') except ImportError: self.h5_read = False h5_raw = loadmat(raw_data_path) excite_cell = h5_raw['dc_amp_cell3'] test = excite_cell[0][0] if self.h5_read: excitation_vec = h5_raw[test] else: excitation_vec = np.float32(np.squeeze(test)) current_cell = h5_raw['current_cell3'] num_rows = current_cell.shape[0] num_cols = current_cell.shape[1] num_iv_pts = excitation_vec.size current_data = np.zeros(shape=(num_rows * num_cols, num_iv_pts), dtype=np.float32) for row_ind in range(num_rows): for col_ind in range(num_cols): pix_ind = row_ind * num_cols + col_ind if self.h5_read: curr_val = np.squeeze( h5_raw[current_cell[row_ind][col_ind]].value) else: curr_val = np.float32( np.squeeze(current_cell[row_ind][col_ind])) current_data[pix_ind, :] = 1E+9 * curr_val parm_dict = self._read_parms(h5_raw) parm_dict.update({'translator': 'FORC_IV'}) pos_desc = [ Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols)) ] spec_desc = [Dimension('DC Bias', 'V', excitation_vec)] meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) h5_main = write_main_dataset(chan_grp, current_data, 'Raw_Data', 'Current', '1E-9 A', pos_desc, spec_desc) return
def translate(self, data_filepath, out_filename, verbose=False, debug=False): ''' The main function that translates the provided file into a .h5 file Parameters ---------------- data_filepath : String / unicode Absolute path of the data file out_filename : String / unicode Name for the new generated hdf5 file. The new file will be saved in the same folder of the input file with file name "out_filename". NOTE: the .h5 extension is automatically added to "out_filename" debug : Boolean (Optional. default is false) Whether or not to print log statements Returns ---------------- h5_path : String / unicode Absolute path of the generated .h5 file ''' self.debug = debug # Open the datafile try: data_filepath = os.path.abspath(data_filepath) ARh5_file = h5py.File(data_filepath, 'r') except: print('Unable to open the file', data_filepath) raise # Get info from the origin file like Notes and Segments self.notes = ARh5_file.attrs['Note'] self.segments = ARh5_file['ForceMap']['Segments'] #shape: (X, Y, 4) self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments']) self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0] self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1] self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels']) try: self.points_per_sec = np.float(self.note_value('ARDoIVPointsPerSec')) except NameError: self.points_per_sec = np.float(self.note_value('NumPtsPerSec')) if self.debug: print('Map size [X, Y]: ', self.map_size) print('Channels names: ', self.channels_name) # Only the extension 'Ext' segment can change size # so we get the shortest one and we trim all the others extension_idx = self.segments_name.index('Ext') short_ext = np.amin(np.array(self.segments[:, :, extension_idx])) longest_ext = np.amax(np.array(self.segments[:, :, extension_idx])) difference = longest_ext - short_ext # this is a difference between integers tot_length = (np.amax(self.segments) - difference) + 1 # +1 otherwise array(tot_length) will be of 1 position shorter points_trimmed = np.array(self.segments[:, :, extension_idx]) - short_ext if self.debug: print('Data were trimmed in the extension segment of {} points'.format(difference)) # Open the output hdf5 file folder_path = os.path.dirname(data_filepath) h5_path = os.path.join(folder_path, out_filename + '.h5') h5_file = h5py.File(h5_path, 'w') # Create the measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') # Create all channels and main datasets # at this point the main dataset are just function of time x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['X']) y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['Y']) z_dim = np.arange(tot_length) / np.float(self.points_per_sec) pos_dims = [Dimension('Cols', 'm', x_dim), Dimension('Rows', 'm', y_dim)] spec_dims = [Dimension('Time', 's', z_dim)] # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful # to parallelize these loops for index, channel in enumerate(self.channels_name): cur_chan = create_indexed_group(h5_meas_group, 'Channel') main_dset = np.empty((self.map_size['X'], self.map_size['Y'], tot_length)) for column in np.arange(self.map_size['X']): for row in np.arange(self.map_size['Y']): AR_pos_string = str(column) + ':' + str(row) seg_start = self.segments[column, row, extension_idx] - short_ext main_dset[column, row, :] = ARh5_file['ForceMap'][AR_pos_string][index, seg_start:] # Reshape with Fortran order to have the correct position indices main_dset = np.reshape(main_dset, (-1, tot_length), order='F') if index == 0: first_main_dset = cur_chan quant_unit = self.get_def_unit(channel) h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_'+channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions ) else: h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_'+channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_main_dset['Spectroscopic_Indices'], h5_spec_vals=first_main_dset['Spectroscopic_Values'], ) # Make Channels with IMAGES. # Position indices/values are the same of all other channels # Spectroscopic indices/valus are they are just one single dimension img_spec_dims = [Dimension('arb', 'a.u.', [1])] for index, image in enumerate(ARh5_file['Image'].keys()): main_dset = np.reshape(np.array(ARh5_file['Image'][image]), (-1,1), order='F') cur_chan = create_indexed_group(h5_meas_group, 'Channel') if index == 0: first_image_dset = cur_chan h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_'+image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], ) else: h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_'+image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_image_dset['Spectroscopic_Indices'], h5_spec_vals=first_image_dset['Spectroscopic_Values'], ) # Create the new segments that will be stored as attribute new_segments = {} for seg, name in enumerate(self.segments_name): new_segments.update({name:self.segments[0,0,seg] - short_ext}) write_simple_attrs(h5_meas_group, {'Segments':new_segments, 'Points_trimmed':points_trimmed, 'Notes':self.notes}) write_simple_attrs(h5_file, {'translator':'ARhdf5', 'instrument':'Asylum Research '+self.note_value('MicroscopeModel'), 'AR sftware version':self.note_value('Version')}) if self.debug: print(print_tree(h5_file)) print('\n') for key, val in get_attributes(h5_meas_group).items(): if key != 'Notes': print('{} : {}'.format(key, val)) else: print('{} : {}'.format(key, 'notes string too long to be written here.')) # Clean up ARh5_file.close() h5_file.close() self.translated = True return h5_path
def translate(self, data_channels=None, verbose=False): """ Translate the data into a Pycroscopy compatible HDF5 file. Parameters ---------- data_channels : (optional) list of str Names of channels that will be read and stored in the file. If not given, all channels in the file will be used. verbose : (optional) Boolean Whether or not to print statements Returns ------- h5_path : str Filepath to the output HDF5 file. """ if self.parm_dict is None or self.data_dict is None: self._read_data(self.data_path) if data_channels is None: print('No channels specified. All channels in file will be used.') data_channels = self.parm_dict['channel_parms'].keys() if verbose: print('Using the following channels') for channel in data_channels: print(channel) if os.path.exists(self.h5_path): os.remove(self.h5_path) h5_file = h5py.File(self.h5_path, 'w') # Create measurement group and assign attributes meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs( meas_grp, self.parm_dict['meas_parms'] ) # Create datasets for positional and spectroscopic indices and values spec_dim = self.data_dict['Spectroscopic Dimensions'] pos_dims = self.data_dict['Position Dimensions'] h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_dims, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_dim, is_spectral=True) # Create the datasets for all the channels num_points = h5_pos_inds.shape[0] for data_channel in data_channels: raw_data = self.data_dict[data_channel].reshape([num_points, -1]) chan_grp = create_indexed_group(meas_grp, 'Channel') data_label = data_channel data_unit = self.parm_dict['channel_parms'][data_channel]['Unit'] write_simple_attrs( chan_grp, self.parm_dict['channel_parms'][data_channel] ) write_main_dataset(chan_grp, raw_data, 'Raw_Data', data_label, data_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) h5_file.flush() h5_file.close() print('Nanonis translation complete.') return self.h5_path
def translate(self, parm_path): """ Basic method that translates .mat data files to a single .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ self.parm_path = path.abspath(parm_path) (folder_path, file_name) = path.split(parm_path) (file_name, base_name) = path.split(folder_path) h5_path = path.join(folder_path, base_name + '.h5') # Read parameters parm_dict = readGmodeParms(parm_path) # Add the w^2 specific parameters to this list parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) freq_sweep_parms = parm_data['freqSweepParms'] parm_dict['freq_sweep_delay'] = np.float( freq_sweep_parms['delay'].item()) gen_sig = parm_data['genSig'] parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item()) freq_array = np.float32(parm_data['freqArray']) # prepare and write spectroscopic values samp_rate = parm_dict['IO_down_samp_rate_[Hz]'] num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate) w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins)) # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE) spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array)) spec_val_mat[:, 1] = np.repeat(freq_array, num_bins) spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32) spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array)) spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins) num_rows = parm_dict['grid_num_rows'] num_cols = parm_dict['grid_num_cols'] parm_dict['data_type'] = 'GmodeW2' num_pix = num_rows * num_cols global_parms = dict() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict[ 'data_type'] # self.__class__.__name__ global_parms['translator'] = 'W2' # Now start creating datasets and populating: if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) pos_dims = [ Dimension('X', 'nm', num_rows), Dimension('Y', 'nm', num_cols) ] spec_dims = [ Dimension('Response Bin', 'a.u.', num_bins), Dimension('Excitation Frequency ', 'Hz', len(freq_array)) ] # Minimize file size to the extent possible. # DAQs are rated at 16 bit so float16 should be most appropriate. # For some reason, compression is more effective on time series data h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data', 'Deflection', 'V', pos_dims, spec_dims, chunks=(1, num_bins), dtype=np.float32) h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec) # Now doing link_h5_objects_as_attrs: link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq]) # Now read the raw data files: pos_ind = 0 for row_ind in range(1, num_rows + 1): for col_ind in range(1, num_cols + 1): file_path = path.join( folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat') print('Working on row {} col {}'.format(row_ind, col_ind)) if path.exists(file_path): # Load data file pix_data = loadmat(file_path, squeeze_me=True) pix_mat = pix_data['AI_mat'] # Take the inverse FFT on 2nd dimension pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1) # Verified with Matlab - no conjugate required here. pix_vec = pix_mat.transpose().reshape(pix_mat.size) h5_main[pos_ind, :] = np.float32(pix_vec) h5_f.flush() # flush from memory! else: print('File not found for: row {} col {}'.format( row_ind, col_ind)) pos_ind += 1 if (100.0 * pos_ind / num_pix) % 10 == 0: print('completed translating {} %'.format( int(100 * pos_ind / num_pix))) h5_f.close() return h5_path
def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y, image_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as scan_size_x : int Number of images in the x dimension scan_size_y : int Number of images in the y dimension image_parms : dict Dictionary of parameters Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize num_files = scan_size_x * scan_size_y root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms = {'num_images': num_files, 'image_size_u': usize, 'image_size_v': vsize, 'num_pixels': num_pixels, 'translator': 'Ptychography', 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y} main_parms.update(image_parms) # Create the hdf5 data Group write_simple_attrs(self.h5_f, root_parms) meas_grp = create_indexed_group(self.h5_f, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_desc = [Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize))] pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y))] ds_chunking = calc_chunks([num_files, num_pixels], data_type(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=data_type) h5_ronch= chan_grp.create_dataset('Mean_Ronchigram', shape=[num_pixels], dtype=np.float32) h5_mean_spec = chan_grp.create_dataset('Spectroscopic_Mean', shape=[num_files], dtype=np.float32) self.h5_f.flush() return h5_main, h5_mean_spec, h5_ronch
def translate(self, file_path, show_plots=True, save_plots=True, do_histogram=False): """ Basic method that translates .dat data file(s) to a single .h5 file Inputs: file_path -- Absolute file path for one of the data files. It is assumed that this file is of the OLD data format. Outputs: Nothing """ file_path = path.abspath(file_path) (folder_path, basename) = path.split(file_path) (basename, path_dict) = self._parse_file_path(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) self.h5_file = h5py.File(h5_path, 'w') isBEPS = True parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms']) ignored_plt_grps = ['in-field' ] # Here we assume that there is no in-field. # If in-field data is captured then the translator would have to be modified. # Technically, we could do away with this if statement, as isBEPS is always true for this translation if isBEPS: parm_dict['data_type'] = 'BEPSData' std_expt = parm_dict[ 'VS_mode'] != 'load user defined VS Wave from file' if not std_expt: warn( 'This translator does not handle user defined voltage spectroscopy' ) return spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode']) # Check file sizes: if 'read_real' in path_dict.keys(): real_size = path.getsize(path_dict['read_real']) imag_size = path.getsize(path_dict['read_imag']) else: real_size = path.getsize(path_dict['write_real']) imag_size = path.getsize(path_dict['write_imag']) if real_size != imag_size: raise ValueError( "Real and imaginary file sizes DON'T match!. Ending") num_rows = int(parm_dict['grid_num_rows']) num_cols = int(parm_dict['grid_num_cols']) num_pix = num_rows * num_cols tot_bins = real_size / ( num_pix * 4) # Finding bins by simple division of entire datasize # Check for case where only a single pixel is missing. check_bins = real_size / ((num_pix - 1) * 4) if tot_bins % 1 and check_bins % 1: warn('Aborting! Some parameter appears to have changed in-between') return elif not tot_bins % 1: # Everything's ok pass elif not check_bins % 1: tot_bins = check_bins warn( 'Warning: A pixel seems to be missing from the data. File will be padded with zeros.' ) tot_bins = int(tot_bins) (bin_inds, bin_freqs, bin_FFT, ex_wfm, dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms']) """ Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less than the actual number), we need to re-calculate it based on the available data. This is done below. """ band_width = parm_dict['BE_band_width_[Hz]'] * ( 0.5 - parm_dict['BE_band_edge_trim']) st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32) # Forcing standardized datatypes: bin_inds = np.int32(bin_inds) bin_freqs = np.float32(bin_freqs) bin_FFT = np.complex64(bin_FFT) ex_wfm = np.float32(ex_wfm) self.FFT_BE_wave = bin_FFT (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict) # Remove the unused plot group columns before proceeding: (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs, UDVS_units, ignored_plt_grps) spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE) # Will assume that all excitation waveforms have same number of bins # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1 num_actual_udvs_steps = UDVS_mat.shape[0] / 2 bins_per_step = tot_bins / num_actual_udvs_steps # Some more checks if bins_per_step % 1: warn('Non integer number of bins per step!') return else: bins_per_step = int(bins_per_step) num_actual_udvs_steps = int(num_actual_udvs_steps) stind = 0 for step_index in range(UDVS_mat.shape[0]): if UDVS_mat[step_index, 2] < 1E-3: # invalid AC amplitude continue # skip spec_inds[0, stind:stind + bins_per_step] = np.arange( bins_per_step, dtype=INDICES_DTYPE) # Bin step spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones( bins_per_step, dtype=INDICES_DTYPE) # UDVS step stind += bins_per_step del stind, step_index # Some very basic information that can help the processing / analysis crew parm_dict['num_bins'] = tot_bins parm_dict['num_pix'] = num_pix parm_dict['num_udvs_steps'] = num_actual_udvs_steps global_parms = generate_dummy_main_parms() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] global_parms['experiment_date'] = parm_dict['File_date_and_time'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict[ 'data_type'] # self.__class__.__name__ global_parms['translator'] = 'ODF' write_simple_attrs(self.h5_file, global_parms) # Create Measurement and Channel groups meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) chan_grp = create_indexed_group(meas_grp, 'Channel') chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1'] # Create Auxilliary Datasets h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm) udvs_slices = dict() for col_ind, col_name in enumerate(UDVS_labs): udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1)) h5_UDVS = chan_grp.create_dataset('UDVS', data=UDVS_mat, dtype=np.float32) write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units}) h5_bin_steps = chan_grp.create_dataset('Bin_Steps', data=np.arange(bins_per_step, dtype=np.uint32), dtype=np.uint32) # Need to add the Bin Waveform type - infer from UDVS exec_bin_vec = self.signal_type * np.ones(len(bin_inds), dtype=np.int32) h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type', data=exec_bin_vec, dtype=np.int32) h5_bin_inds = chan_grp.create_dataset('Bin_Indices', data=bin_inds, dtype=np.uint32) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', data=bin_freqs, dtype=np.float32) h5_bin_FFT = chan_grp.create_dataset('Bin_FFT', data=bin_FFT, dtype=np.complex64) # Noise floor should be of shape: (udvs_steps x 3 x positions) h5_noise_floor = chan_grp.create_dataset( 'Noise_Floor', shape=(num_pix, num_actual_udvs_steps), dtype=nf32, chunks=(1, num_actual_udvs_steps)) """ ONLY ALLOCATING SPACE FOR MAIN DATA HERE! Chunk by each UDVS step - this makes it easy / quick to: 1. read data for a single UDVS step from all pixels 2. read an entire / multiple pixels at a time The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes. This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes meaning that the metadata would be very substantial. This assumption is fine since we almost do not handle any user defined cases """ """ New Method for chunking the Main_Data dataset. Chunking is now done in N-by-N squares of UDVS steps by pixels. N is determined dinamically based on the dimensions of the dataset. Currently it is set such that individual chunks are less than 10kB in size. Chris Smith -- [email protected] """ pos_dims = [ Dimension('X', 'nm', num_cols), Dimension('Y', 'nm', num_rows) ] # Create Spectroscopic Values and Spectroscopic Values Labels datasets spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals( UDVS_mat, spec_inds, bin_freqs, exec_bin_vec, parm_dict, UDVS_labs, UDVS_units) spec_dims = list() for row_ind, row_name in enumerate(spec_vals_labs): spec_dims.append( Dimension(row_name, spec_vals_units[row_ind], spec_vals[row_ind])) pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps, bins_per_step, np.dtype('complex64').itemsize) chunking = np.floor(np.sqrt(pixel_chunking)) chunking = max(1, chunking) chunking = min(num_actual_udvs_steps, num_pix, chunking) self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins), 'Raw_Data', 'Piezoresponse', 'V', pos_dims, spec_dims, dtype=np.complex64, chunks=(chunking, chunking * bins_per_step), compression='gzip') self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]), dtype=np.complex64) self.max_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32) self.min_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32) # Now read the raw data files: self._read_data(path_dict['read_real'], path_dict['read_imag'], parm_dict) self.h5_file.flush() generatePlotGroups(self.ds_main, self.mean_resp, folder_path, basename, self.max_resp, self.min_resp, max_mem_mb=self.max_ram, spec_label=spec_label, show_plots=show_plots, save_plots=save_plots, do_histogram=do_histogram) self.h5_file.close() return h5_path
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print('Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt