def _axes_list_to_dimensions(axes_list, data_shape, is_spec): dim_list = [] dim_type = 'Pos' if is_spec: dim_type = 'Spec' # for dim_ind, (dim_size, dim) in enumerate(zip(data_shape, axes_list)): # we are going by data_shape for order (slowest to fastest) # so the order in axes_list does not matter for dim_ind, dim in enumerate(axes_list): dim = axes_list[dim_ind] dim_name = dim_type + '_Dim_' + str(dim_ind) if isinstance(dim.name, str): temp = dim.name.strip() if len(temp) > 0: dim_name = temp dim_units = 'a. u.' if isinstance(dim.units, str): temp = dim.units.strip() if len(temp) > 0: dim_units = temp # use REAL dimension size rather than what is presented in the # axes manager dim_size = data_shape[len(data_shape) - 1 - dim_ind] ar = np.arange(dim_size) * dim.scale + dim.offset dim_list.append(usid.Dimension(dim_name, dim_units, ar)) if len(dim_list) == 0: return usid.Dimension('Arb', 'a. u.', 1) return dim_list[::-1]
def gen_2pos_2spec(s2f_aux=True, mode=None): pos_dims = [ usid.Dimension('X', 'nm', [-250, 750]), usid.Dimension('Y', 'um', np.linspace(0, 60, num=7)) ] spec_dims = [ usid.Dimension('Frequency', 'kHz', [300, 350, 400]), usid.Dimension('Bias', 'V', np.linspace(-4, 4, num=5)) ] if s2f_aux: pos_dims = pos_dims[::-1] spec_dims = spec_dims[::-1] ndim_shape = (7, 2, 5, 3) if mode is None: # Typcial floating point dataset ndata = np.random.rand(*ndim_shape) elif mode == 'complex': ndata = np.random.rand(*ndim_shape) + 1j * np.random.rand(*ndim_shape) elif mode == 'compound': struc_dtype = np.dtype({ 'names': ['amp', 'phas'], 'formats': [np.float16, np.float32] }) ndata = np.zeros(shape=ndim_shape, dtype=struc_dtype) ndata['amp'] = np.random.random(size=ndim_shape) ndata['phas'] = np.random.random(size=ndim_shape) data_2d = ndata.reshape(np.prod(ndata.shape[:2]), np.prod(ndata.shape[2:])) return pos_dims, spec_dims, ndata, data_2d
def _save_converted_data(self, data2d, ave_spectrum, total_3d_map): ''' Save converted data block to USID formatted Main dataset Inputs: -------- data2d : numpy.ndarray 2D numpy array, with observations along the vertical axis and spectroscopic dimensions along horizontal axis ave_spectrum : numpy.ndarray previously calculated average spectrum for experiment total_3d_map : numpy.ndarray previously calculated 2D TIC ion map for experiment ''' z_points = self.z_points if z_points < 1: z_points = 1 position_dimensions = [] position_dimensions.append(usid.Dimension('y', 'um', np.linspace(0, self.y_points, self.y_points) * self.xy_resolution)) position_dimensions.append(usid.Dimension('x', 'um', np.linspace(0, self.x_points, self.x_points) * self.xy_resolution)) position_dimensions.append(usid.Dimension('z','um', np.linspace(0, -z_points, z_points) * self.z_resolution)) spectroscopic_dimension = usid.Dimension('m/z', 'Th', self.spectra_mass) usid.hdf_utils.write_main_dataset( self.conv_h5f[self.h5_grp_name], main_data=data2d, main_data_name='Data_full', quantity='mass-to-charge ratio', units='Th', pos_dims=position_dimensions, spec_dims=spectroscopic_dimension ) grp = self.conv_h5f[self.h5_grp_name].create_group('Averaged_data') grp.create_dataset('Ave_spectrum', data=ave_spectrum) grp.create_dataset('Total_3d_map', data=total_3d_map) self.conv_h5f.flush()
def test_non_linear_dimension(self): pos_dims = [ usid.Dimension('Y', 'um', np.linspace(0, 60, num=5)), usid.Dimension('X', 'nm', [-250, 750]) ] spec_dims = [ usid.Dimension('Bias', 'V', np.sin(np.linspace(0, 2 * np.pi, num=7))), usid.Dimension('Frequency', 'kHz', [300, 350, 400]) ] ndata = np.random.rand(5, 2, 7, 3) phy_quant = 'Current' phy_unit = 'nA' data_2d = ndata.reshape(np.prod(ndata.shape[:2]), np.prod(ndata.shape[2:])) tran = usid.ArrayTranslator() with tempfile.TemporaryDirectory() as tmp_dir: file_path = tmp_dir + 'usid_n_pos_n_spec_non_lin_dim.h5' _ = tran.translate(file_path, 'Blah', data_2d, phy_quant, phy_unit, pos_dims, spec_dims, slow_to_fast=True) with pytest.raises(ValueError): _ = hs.load(file_path, ignore_non_linear_dims=False) with pytest.warns(UserWarning) as _: new_sig = hs.load(file_path) compare_signal_from_usid(file_path, ndata, new_sig, axes_to_spec=['Frequency', 'Bias'], invalid_axes=True)
def _create_results_grp_dsets(h5_main, process_name, parms_dict, h5_parent_group=None): h5_results_grp = usid.hdf_utils.create_results_group(h5_main, process_name, h5_parent_group=h5_parent_group) usid.hdf_utils.write_simple_attrs(h5_results_grp, parms_dict) spec_dims = usid.Dimension('Empty', 'a. u.', 1) # 3. Create an empty results dataset that will hold all the results h5_results = usid.hdf_utils.write_main_dataset( h5_results_grp, (h5_main.shape[0], 1), 'Results', 'quantity', 'units', None, spec_dims, dtype=np.float32, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals) return h5_results_grp, h5_results
def gen_2dim(all_pos=False, s2f_aux=True): ndata = np.random.rand(3, 2) if all_pos: pos_dims = [ usid.Dimension('X', 'nm', [-250, 750]), usid.Dimension('Y', 'um', [-2, 0, 2]) ] spec_dims = [usid.Dimension('arb', 'a.u.', 1)] data_2d = ndata.reshape(-1, 1) else: pos_dims = [usid.Dimension('arb', 'a.u.', 1)] spec_dims = [ usid.Dimension('Frequency', 'kHz', [0, 100]), usid.Dimension('Bias', 'V', [-5, 0, 5]) ] data_2d = ndata.reshape(1, -1) if s2f_aux: pos_dims = pos_dims[::-1] spec_dims = spec_dims[::-1] return pos_dims, spec_dims, ndata, data_2d
def translate(self, input_file_path): """ Extracts the data and metadata out of proprietary formatted files and writes it into a SID formatted HDF5 file Parameters ---------- input_file_path : str Path to the input data file containing all the information Returns ------- h5_path_out_2 : str Path to the USID HDF5 output file """ """ -------------------------------------------------------------------------------------------- 1. Extracting data and metadata out of the proprietary file -------------------------------------------------------------------------------------------- 1.2 Read the contents of the file into memory """ with open(input_file_path, 'r') as file_handle: string_lines = file_handle.readlines() """ 1.3 Extract all experiment and instrument related parameters """ parm_dict = dict() for line in string_lines[3:17]: line = line.replace('# ', '') line = line.replace('\n', '') temp = line.split('=') test = temp[1].strip() try: test = float(test) if test % 1 == 0: test = int(test) except ValueError: pass parm_dict[temp[0].strip()] = test """ 1.4 Extract or generate parameters that define the three dimensions """ num_rows = int(parm_dict['y-pixels']) num_cols = int(parm_dict['x-pixels']) num_pos = num_rows * num_cols spectra_length = int(parm_dict['z-points']) # We will assume that data was collected from -3 nm to +7 nm on the Y-axis or along the rows y_qty = 'Y' y_units = 'nm' y_vec = np.linspace(-3, 7, num_rows, endpoint=True) # We will assume that data was collected from -5 nm to +5 nm on the X-axis or along the columns x_qty = 'X' x_units = 'nm' x_vec = np.linspace(-5, 5, num_cols, endpoint=True) # The bias was sampled from -1 to +1 V in the experiment. Here is how we generate the Bias axis: bias_qty = 'Bias' bias_units = 'V' bias_vec = np.linspace(-1, 1, spectra_length) """ 1.5 Extract the data """ num_headers = 403 raw_data_2d = np.zeros(shape=(num_pos, spectra_length), dtype=np.float32) # Iterate over ever measurement position: for pos_index in range(num_pos): this_line = string_lines[num_headers + pos_index] string_spectrum = this_line.split( '\t')[:-1] # omitting the new line raw_data_2d[pos_index] = np.array(string_spectrum, dtype=np.float32) """ 2.1 Prepare the output file path """ folder_path, file_name = os.path.split(data_file_path) h5_path = os.path.join(folder_path, file_name[:-4] + '_Class' + '.h5') """ -------------------------------------------------------------------------------------------- 2.B Writing to h5USID file using pyUSID -------------------------------------------------------------------------------------------- 2.B.2 Expressing the Position and Spectroscopic Dimensions using pyUSID.Dimension objects """ pos_dims = [ usid.Dimension(x_qty, x_units, x_vec), usid.Dimension(y_qty, y_units, y_vec) ] spec_dims = usid.Dimension(bias_qty, bias_units, bias_vec) """ 2.B.3 Reshape the Main data from its original N-dimensional form to the USID 2D form We skip this step since it is unnecessary in this case 2.B.4 Call the translate() function of the base NumpyTranslator class """ _ = super(ExampleTranslator, self).translate(h5_path, main_data_name, raw_data_2d, main_qty, main_units, pos_dims, spec_dims, parm_dict=parm_dict) return h5_path
# # 2.A.2 Preparing `Dimension` objects # =================================== # Before the ``NumpyTranslator`` can be used, we need to formally define the dimensions that define the # three-dimensional measurement in the data file. In this example, we have two `Position` dimensions - ``X`` and ``Y`` # and one `Spectroscopic` dimension - ``Bias`` against which data for each spectra were collected. # # In pyUSID, we formally define dimensions using simple # `pyUSID.Dimension <../intermediate/plot_write_utils.html#dimension>`_ objects. These ``Dimension`` objects are simply # descriptors of dimensions and take the name of the quantity, physical units, and the values over which the dimension # was varied. Both, the `Position` and `Spectroscopic` dimensions need to be defined using ``Dimension`` objects and the # ``Dimension`` objects should be arranged from fastest varying to slowest varying dimensions. # # The `Spectroscopic` dimensions are trivial since we only have one dimension - ``Bias``. spec_dims = usid.Dimension(bias_qty, bias_units, bias_vec) #################################################################################### # Given that the spectra were acquired column-by-column and then row-by-row, we would need to arrange the `Position` # dimensions as ``X`` followed by ``Y``. pos_dims = [ usid.Dimension(x_qty, x_units, x_vec), usid.Dimension(y_qty, y_units, y_vec) ] #################################################################################### # 2.A.3 Reshape the Main data (if necessary) # ========================================== # Recall that ``Main`` datasets in USID are two dimensional in shape where all position dimensions (``X``, and ``Y`` in # this case) are collapsed along the first axis and the spectroscopic dimensions (``Bias`` in this case) are
def _convert_multiprocessing(self, shift_corr=False): ''' Start data conversion process using Inputs: -------- shift_corr : bool #!!! ''' t0=time.time() print("SIMS data conversion...") def param_generator(self, counts): for i in range(0, counts, self.chunk_size): yield (i, (self.raw_h5_path, self.chunk_size, self.xy_bins, self.z_bins, self.spectra_tofs, self.tof_resolution)) print('dset size = (',self.x_points, self.y_points, z_points, self.spectra_len,')') z_points = self.z_points if z_points < 1: z_points = 1 position_dimensions = [] position_dimensions.append(usid.Dimension('y', 'um', np.linspace(0, self.y_points, self.y_points) * self.xy_resolution)) position_dimensions.append(usid.Dimension('x', 'um', np.linspace(0, self.x_points, self.x_points) * self.xy_resolution)) position_dimensions.append(usid.Dimension('z','um', np.linspace(0, -z_points, z_points) * self.z_resolution)) spectroscopic_dimension = usid.Dimension('m/z', 'Th', self.spectra_mass) dset = usid.hdf_utils.write_main_dataset( self.conv_h5f[self.h5_grp_name], main_data_name='Data_full', quantity='mass-to-charge ratio', units='Th', pos_dims=position_dimensions, spec_dims=spectroscopic_dimension ) print('USID Main Dataset created successfully, beginning conversion.') pos_indices = self.conv_h5f[self.h5_grp_name]['Position_Indices'] counts = self.raw_h5f['Raw_data']['Raw_data'].shape[0] p_wrapped = param_generator(self, counts) lock = Lock() pool = Pool(processes=self.cores, initializer=init_mp_lock, initargs=(lock,)) mapped_results = pool.imap_unordered(convert_chunk, p_wrapped) ave_spectrum = np.zeros(self.spectra_len) z_points = self.z_points if z_points < 1: z_points = 1 ave_3d_map = np.zeros((z_points, self.x_points, self.y_points)) chunk_no=0 cc = 0 for out_block in mapped_results: for spectrum in out_block: x, y, z = coord = tuple(spectrum[:3]) coord = np.array(coord) dset[cc] += spectrum[3:] pos_indices[cc] = coord[:] ave_3d_map[z, x, y] += np.sum(spectrum[3:]) ave_spectrum += spectrum[3:] chunk_no += 1 print('%d chunks of %d are processed'%(chunk_no, int(counts / self.chunk_size)+1)) self.conv_h5f.flush() pool.close() print('Conversion finished, saving averaged data...') grp = self.conv_h5f[self.h5_grp_name].create_group('Averaged_data') grp.create_dataset('Ave_spectrum', data=ave_spectrum) grp.create_dataset('Total_3d_map', data=total_3d_map) self.conv_h5f.flush() # print("SIMS saving converted data...") # data_out = data_out.reshape((-1, self.spectra_len)) # self._save_converted_data(data_out, ave_spectrum, ave_3d_map) print("Conversion complete. %d sec"%(time.time() - t0))
def translate(self, input_folder_path, h5_folder_path): """ This function extracts data and metadata from Omicron Scanning Tunnelling Microscope (STM) Flat Files and translates this into the Pycroscopy compatible pyUSID format. Parameters ---------- input_folder_path : str Path to the input data folder containing all the files and their information h5_folder_path : str Path to h5 data folder to store the USID formatted files Returns ------- h5_path : str Path to the USID h5 output folder """ """ -------------------------------------------------------------------------------------------- 1. Extracting data and metadata out of the proprietary file -------------------------------------------------------------------------------------------- 1.2 Read the contents of the file into memory """ prevdir = os.getcwd() os.chdir(input_folder_path) h5_path_array = [] for file in os.listdir(input_folder_path): file_flat = file load_file = FlatFile(file_flat) d = load_file.getData() """ 1.3 Extract all experiment and instrument related parameters """ for i in enumerate(d): raw_data = d[i[0]].data metadata = d[i[0]].info """ 1.4 Prepare the output file path """ folder_path, file_name = os.path.split(file_flat) file_name = file_name[:-7] + "_" + metadata["direction"] h5_path = os.path.join(h5_folder_path, file_name + ".h5") """ 1.5 Reshape raw_data into USID 2D shape (position x spectral) """ raw_data_2D = np.reshape( raw_data, (raw_data.shape[0] * raw_data.shape[1], 1)) """ 1.6 Extract or generate parameters that define the position and spectral dimensions """ xaxis = metadata["xreal"] yaxis = metadata["yreal"] xaxis = xaxis / 2 yaxis = yaxis / 2 num_rows = int(metadata["yres"]) num_cols = int(metadata["xres"]) num_pos = num_rows * num_cols y_qty = "Y" y_units = "nm" y_vec = np.linspace(-yaxis, yaxis, num_rows, endpoint=True) x_qty = "X" x_units = "nm" x_vec = np.linspace(-xaxis, xaxis, num_cols, endpoint=True) main_data_name = "STM" main_qty = "Z-height" main_units = "nm" """ -------------------------------------------------------------------------------------------- 2. Writing to h5USID file using pyUSID -------------------------------------------------------------------------------------------- 2.2 Expressing the Position and Spectroscopic Dimensions using pyUSID.Dimension objects """ pos_dims = [ usid.Dimension(x_qty, x_units, x_vec), usid.Dimension(y_qty, y_units, y_vec), ] spec_dims = usid.Dimension(name="arb.", units="", values=int(1)) """ 2.3 Call the translate() function of the base NumpyTranslator class """ _ = super(FlatFileTranslator, self).translate( h5_path, main_data_name, raw_data_2D, main_qty, main_units, pos_dims, spec_dims, parm_dict=metadata, ) h5_path_array.append(h5_path) # Changing back to original directory os.chdir(prevdir) return h5_path_array