def _write_results_chunk(self): """ Writes the provided SVD results to file Parameters ---------- """ comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s)) h5_svd_group = create_results_group(self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self.h5_results_grp = h5_svd_group self._write_source_dset_provenance() write_simple_attrs(h5_svd_group, self.parms_dict) write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'}) h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize)) # print(get_attr(self.h5_main, 'quantity')[0]) h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0], 'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds, h5_spec_vals=self.h5_main.h5_spec_vals, chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize)) # No point making this 1D dataset a main dataset h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s)) ''' Check h5_main for plot group references. Copy them into V if they exist ''' for key in self.h5_main.attrs.keys(): if '_Plot_Group' not in key: continue ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners') ref_inds = ref_inds.reshape([-1, 2, 2]) ref_inds[:, 1, 0] = h5_v.shape[0] - 1 svd_ref = create_region_reference(h5_v, ref_inds) h5_v.attrs[key] = svd_ref # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
def _write_results_chunk(self): """ Writes the provided SVD results to file Parameters ---------- """ comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s)) h5_svd_group = create_results_group(self.h5_main, self.process_name) self.h5_results_grp = h5_svd_group write_simple_attrs(h5_svd_group, self.parms_dict) write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'}) h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize)) # print(get_attr(self.h5_main, 'quantity')[0]) h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0], 'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds, h5_spec_vals=self.h5_main.h5_spec_vals, chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize)) # No point making this 1D dataset a main dataset h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s)) ''' Check h5_main for plot group references. Copy them into V if they exist ''' for key in self.h5_main.attrs.keys(): if '_Plot_Group' not in key: continue ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners') ref_inds = ref_inds.reshape([-1, 2, 2]) ref_inds[:, 1, 0] = h5_v.shape[0] - 1 svd_ref = create_region_reference(h5_v, ref_inds) h5_v.attrs[key] = svd_ref # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
def test_shape_mismatch(self): dimensions = (16384, 16384 * 4) dtype_bytesize = 4 unit_chunks = (1, 5, 9) with self.assertRaises(ValueError): _ = write_utils.calc_chunks(dimensions, dtype_bytesize, unit_chunks=unit_chunks)
def test_unit_not_iterable(self): dimensions = (16384, 16384 * 4) dtype_bytesize = 4 unit_chunks = 4 with self.assertRaises(TypeError): _ = write_utils.calc_chunks(dimensions, dtype_bytesize, unit_chunks=unit_chunks)
def test_unit_chunk_max_mem(self): dimensions = (16384, 16384 * 4) dtype_bytesize = 4 unit_chunks = (3, 7) max_mem = 50000 ret_val = write_utils.calc_chunks(dimensions, dtype_bytesize, unit_chunks=unit_chunks, max_chunk_mem=max_mem) self.assertTrue(np.allclose(ret_val, (57, 224)))
def test_unit_chunk(self): dimensions = (16384, 16384 * 4) dtype_bytesize = 4 unit_chunks = (3, 7) ret_val = write_utils.calc_chunks(dimensions, dtype_bytesize, unit_chunks=unit_chunks) self.assertTrue(np.allclose(ret_val, (27, 98)))
def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as scan_size_x : int Number of images in the x dimension scan_size_y : int Number of images in the y dimension Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize num_files = scan_size_x * scan_size_y root_parms = dict() root_parms['data_type'] = 'ImageStackData' main_parms = { 'num_images': num_files, 'image_size_u': usize, 'image_size_v': vsize, 'num_pixels': num_pixels, 'translator': 'ImageStack', 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y } # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_desc = [ Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize)) ] pos_desc = [ Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y)) ] ds_chunking = calc_chunks([num_files, num_pixels], data_type(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Stack_Mean', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Image_Means', data=np.zeros(num_files, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def _setupH5(self, usize, vsize, data_type, num_images, main_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as num_images : int Number of images in the movie main_parms : dict Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms['num_images'] = num_images main_parms['image_size_u'] = usize main_parms['image_size_v'] = vsize main_parms['num_pixels'] = num_pixels main_parms['translator'] = 'Movie' # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_dim = Dimension('Time', 's', np.arange(num_images)) pos_dims = [ Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize)) ] ds_chunking = calc_chunks([num_pixels, num_images], data_type(0).itemsize, unit_chunks=(num_pixels, 1)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data', 'Intensity', 'a.u.', pos_dims, spec_dim, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean', data=np.zeros(num_images, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def test_invalid_types(self): with self.assertRaises(TypeError): _ = write_utils.calc_chunks("Fdfd", 14) with self.assertRaises(TypeError): _ = write_utils.calc_chunks((16384, 16384 * 4), 2.124)
def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y, image_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as scan_size_x : int Number of images in the x dimension scan_size_y : int Number of images in the y dimension image_parms : dict Dictionary of parameters Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize num_files = scan_size_x * scan_size_y root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms = {'num_images': num_files, 'image_size_u': usize, 'image_size_v': vsize, 'num_pixels': num_pixels, 'translator': 'Ptychography', 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y} main_parms.update(image_parms) # Create the hdf5 data Group write_simple_attrs(self.h5_f, root_parms) meas_grp = create_indexed_group(self.h5_f, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_desc = [Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize))] pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y))] ds_chunking = calc_chunks([num_files, num_pixels], data_type(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=data_type) h5_ronch= chan_grp.create_dataset('Mean_Ronchigram', shape=[num_pixels], dtype=np.float32) h5_mean_spec = chan_grp.create_dataset('Spectroscopic_Mean', shape=[num_files], dtype=np.float32) self.h5_f.flush() return h5_main, h5_mean_spec, h5_ronch
def _read_data(self, file_list, h5_channels): """ Iterates over the images in `file_list`, reading each image and downsampling if reqeusted, and writes the flattened image to file. Also builds the Mean_Ronchigram and the Spectroscopic_Mean datasets at the same time. Parameters ---------- file_list : list of str List of all files in `image_path` that will be read h5_main : h5py.Dataset Dataset which will hold the Ronchigrams h5_mean_spec : h5py.Dataset Dataset which will hold the Spectroscopic Mean h5_ronch : h5py.Dataset Dataset which will hold the Mean Ronchigram image_path : str Absolute file path to the directory which hold the images Returns ------- None """ h5_main_list = list() ''' For each file, we must read the data then create the neccessary datasets, add them to the channel, and write it all to file ''' ''' Get zipfile handles for all the ndata1 files that were found in the image_path ''' for ifile, (this_file, this_channel) in enumerate(zip(file_list, h5_channels)): _, ext = os.path.splitext(this_file) if ext in ['.ndata1', '.ndata']: ''' Extract the data file from the zip archive and read it into an array ''' this_zip = zipfile.ZipFile(this_file, 'r') tmp_path = this_zip.extract('data.npy') this_data = np.load(tmp_path) os.remove(tmp_path) elif ext == '.npy': # Read data directly from npy file this_data = np.load(this_file) ''' Find the shape of the data, then calculate the final dimensions based on the crop and downsampling parameters ''' while this_data.ndim < 4: this_data = np.expand_dims(this_data, 0) this_data = self.crop_ronc(this_data) scan_size_x, scan_size_y, usize, vsize = this_data.shape usize = int(round(1.0 * usize / self.bin_factor[-2])) vsize = int(round(1.0 * vsize / self.bin_factor[-1])) num_images = scan_size_x * scan_size_y num_pixels = usize * vsize ''' Write these attributes to the Measurement group ''' new_attrs = {'image_size_u': usize, 'image_size_v': vsize, 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y} write_simple_attrs(this_channel.parent, new_attrs) # Get the Position and Spectroscopic Datasets spec_desc = [Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize))] pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y))] ds_chunking = calc_chunks([num_images, num_pixels], np.float32(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged DataX h5_main = write_main_dataset(this_channel, (num_images, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=np.float32) h5_ronch = this_channel.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32)) h5_mean_spec = this_channel.create_dataset('Mean_Spectrogram', data=np.zeros(num_images, dtype=np.float32)) this_data = self.binning_func(this_data, self.bin_factor, self.bin_func).reshape(h5_main.shape) h5_main[:, :] = this_data h5_mean_spec[:] = np.mean(this_data, axis=1) h5_ronch[:] = np.mean(this_data, axis=0) self.h5_f.flush() h5_main_list.append(h5_main) self.h5_f.flush()
def _setup_h5(self, data_gen_parms): """ Setups up the hdf5 file structure before doing the actual generation Parameters ---------- data_gen_parms : dict Dictionary containing the parameters to write to the Measurement Group as attributes Returns ------- """ ''' Build the group structure down to the channel group ''' # Set up the basic group structure root_grp = VirtualGroup('') root_parms = generate_dummy_main_parms() root_parms['translator'] = 'FAKEBEPS' root_parms['data_type'] = data_gen_parms['data_type'] root_grp.attrs = root_parms meas_grp = VirtualGroup('Measurement_') chan_grp = VirtualGroup('Channel_') meas_grp.attrs.update(data_gen_parms) # Create the Position and Spectroscopic datasets for the Raw Data ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals = self._build_ancillary_datasets( ) raw_chunking = calc_chunks([self.n_pixels, self.n_spec_bins], np.complex64(0).itemsize, unit_chunks=[1, self.n_bins]) ds_raw_data = VirtualDataset( 'Raw_Data', data=None, maxshape=[self.n_pixels, self.n_spec_bins], dtype=np.complex64, compression='gzip', chunking=raw_chunking, parent=meas_grp) chan_grp.add_children([ ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals, ds_raw_data ]) meas_grp.add_children([chan_grp]) root_grp.add_children([meas_grp]) hdf = HDFwriter(self.h5_path) hdf.delete() h5_refs = hdf.write(root_grp) # Delete the MicroDatasets to save memory del ds_raw_data, ds_spec_inds, ds_spec_vals, ds_pos_inds, ds_pos_vals # Get the file and Raw_Data objects h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] h5_chan_grp = h5_raw.parent # Get the Position and Spectroscopic dataset objects h5_pos_inds = get_h5_obj_refs(['Position_Indices'], h5_refs)[0] h5_pos_vals = get_h5_obj_refs(['Position_Values'], h5_refs)[0] h5_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_refs)[0] h5_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_refs)[0] # Link the Position and Spectroscopic datasets as attributes of Raw_Data link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals) ''' Build the SHO Group ''' sho_grp = VirtualGroup('Raw_Data-SHO_Fit_', parent=h5_chan_grp.name) # Build the Spectroscopic datasets for the SHO Guess and Fit sho_spec_starts = np.where( h5_spec_inds[h5_spec_inds.attrs['Frequency']].squeeze() == 0)[0] sho_spec_labs = get_attr(h5_spec_inds, 'labels') ds_sho_spec_inds, ds_sho_spec_vals = build_reduced_spec_dsets( h5_spec_inds, h5_spec_vals, keep_dim=sho_spec_labs != 'Frequency', step_starts=sho_spec_starts) sho_chunking = calc_chunks([self.n_pixels, self.n_sho_bins], sho32.itemsize, unit_chunks=[1, 1]) ds_sho_fit = VirtualDataset('Fit', data=None, maxshape=[self.n_pixels, self.n_sho_bins], dtype=sho32, compression='gzip', chunking=sho_chunking, parent=sho_grp) ds_sho_guess = VirtualDataset( 'Guess', data=None, maxshape=[self.n_pixels, self.n_sho_bins], dtype=sho32, compression='gzip', chunking=sho_chunking, parent=sho_grp) sho_grp.add_children( [ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals]) # Write the SHO group and datasets to the file and delete the MicroDataset objects h5_sho_refs = hdf.write(sho_grp) del ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals # Get the dataset handles for the fit and guess h5_sho_fit = get_h5_obj_refs(['Fit'], h5_sho_refs)[0] h5_sho_guess = get_h5_obj_refs(['Guess'], h5_sho_refs)[0] # Get the dataset handles for the SHO Spectroscopic datasets h5_sho_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_sho_refs)[0] h5_sho_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_sho_refs)[0] # Link the Position and Spectroscopic datasets as attributes of the SHO Fit and Guess link_as_main(h5_sho_fit, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) ''' Build the loop group ''' loop_grp = VirtualGroup('Fit-Loop_Fit_', parent=h5_sho_fit.parent.name) # Build the Spectroscopic datasets for the loops loop_spec_starts = np.where(h5_sho_spec_inds[ h5_sho_spec_inds.attrs['DC_Offset']].squeeze() == 0)[0] loop_spec_labs = get_attr(h5_sho_spec_inds, 'labels') ds_loop_spec_inds, ds_loop_spec_vals = build_reduced_spec_dsets( h5_sho_spec_inds, h5_sho_spec_vals, keep_dim=loop_spec_labs != 'DC_Offset', step_starts=loop_spec_starts) # Create the loop fit and guess MicroDatasets loop_chunking = calc_chunks([self.n_pixels, self.n_loops], loop_fit32.itemsize, unit_chunks=[1, 1]) ds_loop_fit = VirtualDataset('Fit', data=None, maxshape=[self.n_pixels, self.n_loops], dtype=loop_fit32, compression='gzip', chunking=loop_chunking, parent=loop_grp) ds_loop_guess = VirtualDataset('Guess', data=None, maxshape=[self.n_pixels, self.n_loops], dtype=loop_fit32, compression='gzip', chunking=loop_chunking, parent=loop_grp) # Add the datasets to the loop group then write it to the file loop_grp.add_children( [ds_loop_fit, ds_loop_guess, ds_loop_spec_inds, ds_loop_spec_vals]) h5_loop_refs = hdf.write(loop_grp) # Delete the MicroDatasets del ds_loop_spec_vals, ds_loop_spec_inds, ds_loop_guess, ds_loop_fit # Get the handles to the datasets h5_loop_fit = get_h5_obj_refs(['Fit'], h5_loop_refs)[0] h5_loop_guess = get_h5_obj_refs(['Guess'], h5_loop_refs)[0] h5_loop_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_loop_refs)[0] h5_loop_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_loop_refs)[0] # Link the Position and Spectroscopic datasets to the Loop Guess and Fit link_as_main(h5_loop_fit, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals) link_as_main(h5_loop_guess, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals) self.h5_raw = USIDataset(h5_raw) self.h5_sho_guess = USIDataset(h5_sho_guess) self.h5_sho_fit = USIDataset(h5_sho_fit) self.h5_loop_guess = USIDataset(h5_loop_guess) self.h5_loop_fit = USIDataset(h5_loop_fit) self.h5_spec_vals = h5_spec_vals self.h5_spec_inds = h5_spec_inds self.h5_sho_spec_inds = h5_sho_spec_inds self.h5_sho_spec_vals = h5_sho_spec_vals self.h5_loop_spec_inds = h5_loop_spec_inds self.h5_loop_spec_vals = h5_loop_spec_vals self.h5_file = h5_raw.file return
def __initialize_meas_group(self, num_pix, current_pixels): """ Creates and initializes the primary (and auxillary) datasets and datagroups to hold the raw data for the current set of experimental parameters. Parameters ---------- num_pix : unsigned int Number of pixels this datagroup is expected to hold current_pixels : dictionary of BEPSndfPixel objects Extracted data for the first pixel in this group Returns --------- h5_refs : list of HDF5group and HDF5Dataset references references of the written H5 datasets """ tot_bins = 0 tot_pts = 0 # Each wavetype can have different number of bins for pixl in current_pixels.values(): tot_bins += pixl.num_bins tot_pts += pixl.num_bins * pixl.num_steps # Need to halve the number of steps when only in / out field is acquired: if self.halve_udvs_steps: tot_pts = int(tot_pts / 2) # Populate information from the columns within the pixels such as the FFT, bin freq, indices, etc. bin_freqs = np.zeros(shape=tot_bins, dtype=np.float32) bin_inds = np.zeros(shape=tot_bins, dtype=np.uint32) bin_FFT = np.zeros(shape=tot_bins, dtype=np.complex64) exec_bin_vec = np.zeros(shape=tot_bins, dtype=np.int32) pixel_bins = {} # Might be useful later stind = 0 for wave_type in self.__unique_waves__: pixl = current_pixels[wave_type] exec_bin_vec[stind:stind + pixl.num_bins] = wave_type * np.ones(pixl.num_bins) bin_inds[stind:stind + pixl.num_bins] = pixl.BE_bin_ind bin_freqs[stind:stind + pixl.num_bins] = pixl.BE_bin_w bin_FFT[stind:stind + pixl.num_bins] = pixl.FFT_BE_wave pixel_bins[wave_type] = [stind, pixl.num_bins] stind += pixl.num_bins del pixl, stind # Make the index matrix that has the UDVS step number and bin indices spec_inds = np.zeros(shape=(2, tot_pts), dtype=INDICES_DTYPE) stind = 0 # Need to go through the UDVS file and reconstruct chronologically for step_index, wave_type in enumerate(self.excit_type_vec): if self.halve_udvs_steps and self.udvs_mat[step_index, 2] < 1E-3: # invalid AC amplitude continue # skip vals = pixel_bins[wave_type] spec_inds[1, stind:stind + vals[1]] = step_index * np.ones(vals[1]) # UDVS step spec_inds[0, stind:stind + vals[1]] = np.arange(vals[0], vals[0] + vals[1]) # Bin step stind += vals[1] del stind, wave_type, step_index self.spec_inds = spec_inds # will need this for plot group generation ds_ex_wfm = VirtualDataset('Excitation_Waveform', np.float32(np.real(np.fft.ifft(np.fft.ifftshift(self.BE_wave))))) ds_bin_freq = VirtualDataset('Bin_Frequencies', bin_freqs) ds_bin_inds = VirtualDataset('Bin_Indices', bin_inds - 1, dtype=np.uint32) # From Matlab to Python (base 0) ds_bin_fft = VirtualDataset('Bin_FFT', bin_FFT) ds_wfm_typ = VirtualDataset('Bin_Wfm_Type', exec_bin_vec) ds_bin_steps = VirtualDataset('Bin_Step', np.arange(tot_bins, dtype=np.uint32)) curr_parm_dict = self.parm_dict # Some very basic information that can help the processing crew curr_parm_dict['num_bins'] = tot_bins curr_parm_dict['num_pix'] = num_pix # technically should change the date, etc. self.current_group = '{:s}'.format('Measurement_') meas_grp = VirtualGroup(self.current_group, '/') meas_grp.attrs = curr_parm_dict chan_grp = VirtualGroup('Channel_') chan_grp.attrs['Channel_Input'] = curr_parm_dict['IO_Analog_Input_1'] meas_grp.add_children([chan_grp]) udvs_slices = dict() for col_ind, col_name in enumerate(self.udvs_labs): udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1)) # print('UDVS column index {} = {}'.format(col_ind,col_name)) ds_udvs_mat = VirtualDataset('UDVS', self.udvs_mat) ds_udvs_mat.attrs['labels'] = udvs_slices ds_udvs_mat.attrs['units'] = self.udvs_units actual_udvs_steps = self.num_udvs_steps if self.halve_udvs_steps: actual_udvs_steps /= 2 if actual_udvs_steps % 1: raise ValueError('Actual number of UDVS steps should be an integer') actual_udvs_steps = int(actual_udvs_steps) curr_parm_dict['num_udvs_steps'] = actual_udvs_steps ds_udvs_inds = VirtualDataset('UDVS_Indices', self.spec_inds[1]) # ds_udvs_inds.attrs['labels'] = {'UDVS_step':(slice(None),)} ''' Create the Spectroscopic Values tables ''' spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_labs_names = \ createSpecVals(self.udvs_mat, spec_inds, bin_freqs, exec_bin_vec, curr_parm_dict, np.array(self.udvs_labs), self.udvs_units) spec_vals_slices = dict() for row_ind, row_name in enumerate(spec_vals_labs): spec_vals_slices[row_name] = (slice(row_ind, row_ind + 1), slice(None)) ds_spec_vals_mat = VirtualDataset('Spectroscopic_Values', np.array(spec_vals, dtype=VALUES_DTYPE)) ds_spec_vals_mat.attrs['labels'] = spec_vals_slices ds_spec_vals_mat.attrs['units'] = spec_vals_units ds_spec_mat = VirtualDataset('Spectroscopic_Indices', spec_inds, dtype=INDICES_DTYPE) ds_spec_mat.attrs['labels'] = spec_vals_slices ds_spec_mat.attrs['units'] = spec_vals_units for entry in spec_vals_labs_names: label = entry[0] + '_parameters' names = entry[1] ds_spec_mat.attrs[label] = names ds_spec_vals_mat.attrs[label] = names ''' New Method for chunking the Main_Data dataset. Chunking is now done in N-by-N squares of UDVS steps by pixels. N is determined dinamically based on the dimensions of the dataset. Currently it is set such that individual chunks are less than 10kB in size. Chris Smith -- [email protected] ''' max_bins_per_pixel = np.max(list(pixel_bins.values())) beps_chunks = calc_chunks([num_pix, tot_pts], np.complex64(0).itemsize, unit_chunks=(1, max_bins_per_pixel)) ds_main_data = VirtualDataset('Raw_Data', np.zeros(shape=(1, tot_pts), dtype=np.complex64), chunking=beps_chunks, resizable=True, compression='gzip', attrs={'quantity': 'Piezoresponse', 'units': 'V'}) ds_noise = VirtualDataset('Noise_Floor', np.zeros(shape=(1, actual_udvs_steps), dtype=nf32), chunking=(1, actual_udvs_steps), resizable=True, compression='gzip') # Allocate space for the first pixel for now and write along with the complete tree... # Positions CANNOT be written at this time since we don't know if the parameter changed chan_grp.add_children([ds_main_data, ds_noise, ds_ex_wfm, ds_spec_mat, ds_wfm_typ, ds_bin_steps, ds_bin_inds, ds_bin_freq, ds_bin_fft, ds_udvs_mat, ds_spec_vals_mat, ds_udvs_inds]) # meas_grp.showTree() h5_refs = self.hdf.write(meas_grp) self.ds_noise = get_h5_obj_refs(['Noise_Floor'], h5_refs)[0] self.ds_main = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] self.pos_vals_list = list() # self.dset_index += 1 # raise dset index after closing only self.ds_pixel_index = 0 # Use this for plot groups: self.mean_resp = np.zeros(shape=tot_pts, dtype=np.complex64) # Used for Histograms self.max_resp = np.zeros(shape=num_pix, dtype=np.float32) self.min_resp = np.zeros(shape=num_pix, dtype=np.float32) return h5_refs
def _setupH5(self, usize, vsize, data_type, num_images, main_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as num_images : int Number of images in the movie main_parms : dict Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms['num_images'] = num_images main_parms['image_size_u'] = usize main_parms['image_size_v'] = vsize main_parms['num_pixels'] = num_pixels main_parms['translator'] = 'Movie' # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_dim = Dimension('Time', 's', np.arange(num_images)) pos_dims = [Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize))] ds_chunking = calc_chunks([num_pixels, num_images], data_type(0).itemsize, unit_chunks=(num_pixels, 1)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data', 'Intensity', 'a.u.', pos_dims, spec_dim, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean', data=np.zeros(num_images, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def _setup_h5(self, data_gen_parms): """ Setups up the hdf5 file structure before doing the actual generation Parameters ---------- data_gen_parms : dict Dictionary containing the parameters to write to the Measurement Group as attributes Returns ------- """ ''' Build the group structure down to the channel group ''' # Set up the basic group structure root_grp = VirtualGroup('') root_parms = generate_dummy_main_parms() root_parms['translator'] = 'FAKEBEPS' root_parms['data_type'] = data_gen_parms['data_type'] root_grp.attrs = root_parms meas_grp = VirtualGroup('Measurement_') chan_grp = VirtualGroup('Channel_') meas_grp.attrs.update(data_gen_parms) # Create the Position and Spectroscopic datasets for the Raw Data ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals = self._build_ancillary_datasets() raw_chunking = calc_chunks([self.n_pixels, self.n_spec_bins], np.complex64(0).itemsize, unit_chunks=[1, self.n_bins]) ds_raw_data = VirtualDataset('Raw_Data', data=None, maxshape=[self.n_pixels, self.n_spec_bins], dtype=np.complex64, compression='gzip', chunking=raw_chunking, parent=meas_grp) chan_grp.add_children([ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals, ds_raw_data]) meas_grp.add_children([chan_grp]) root_grp.add_children([meas_grp]) hdf = HDFwriter(self.h5_path) hdf.delete() h5_refs = hdf.write(root_grp) # Delete the MicroDatasets to save memory del ds_raw_data, ds_spec_inds, ds_spec_vals, ds_pos_inds, ds_pos_vals # Get the file and Raw_Data objects h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] h5_chan_grp = h5_raw.parent # Get the Position and Spectroscopic dataset objects h5_pos_inds = get_h5_obj_refs(['Position_Indices'], h5_refs)[0] h5_pos_vals = get_h5_obj_refs(['Position_Values'], h5_refs)[0] h5_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_refs)[0] h5_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_refs)[0] # Link the Position and Spectroscopic datasets as attributes of Raw_Data link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals) ''' Build the SHO Group ''' sho_grp = VirtualGroup('Raw_Data-SHO_Fit_', parent=h5_chan_grp.name) # Build the Spectroscopic datasets for the SHO Guess and Fit sho_spec_starts = np.where(h5_spec_inds[h5_spec_inds.attrs['Frequency']].squeeze() == 0)[0] sho_spec_labs = get_attr(h5_spec_inds, 'labels') ds_sho_spec_inds, ds_sho_spec_vals = build_reduced_spec_dsets(h5_spec_inds, h5_spec_vals, keep_dim=sho_spec_labs != 'Frequency', step_starts=sho_spec_starts) sho_chunking = calc_chunks([self.n_pixels, self.n_sho_bins], sho32.itemsize, unit_chunks=[1, 1]) ds_sho_fit = VirtualDataset('Fit', data=None, maxshape=[self.n_pixels, self.n_sho_bins], dtype=sho32, compression='gzip', chunking=sho_chunking, parent=sho_grp) ds_sho_guess = VirtualDataset('Guess', data=None, maxshape=[self.n_pixels, self.n_sho_bins], dtype=sho32, compression='gzip', chunking=sho_chunking, parent=sho_grp) sho_grp.add_children([ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals]) # Write the SHO group and datasets to the file and delete the MicroDataset objects h5_sho_refs = hdf.write(sho_grp) del ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals # Get the dataset handles for the fit and guess h5_sho_fit = get_h5_obj_refs(['Fit'], h5_sho_refs)[0] h5_sho_guess = get_h5_obj_refs(['Guess'], h5_sho_refs)[0] # Get the dataset handles for the SHO Spectroscopic datasets h5_sho_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_sho_refs)[0] h5_sho_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_sho_refs)[0] # Link the Position and Spectroscopic datasets as attributes of the SHO Fit and Guess link_as_main(h5_sho_fit, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) ''' Build the loop group ''' loop_grp = VirtualGroup('Fit-Loop_Fit_', parent=h5_sho_fit.parent.name) # Build the Spectroscopic datasets for the loops loop_spec_starts = np.where(h5_sho_spec_inds[h5_sho_spec_inds.attrs['DC_Offset']].squeeze() == 0)[0] loop_spec_labs = get_attr(h5_sho_spec_inds, 'labels') ds_loop_spec_inds, ds_loop_spec_vals = build_reduced_spec_dsets(h5_sho_spec_inds, h5_sho_spec_vals, keep_dim=loop_spec_labs != 'DC_Offset', step_starts=loop_spec_starts) # Create the loop fit and guess MicroDatasets loop_chunking = calc_chunks([self.n_pixels, self.n_loops], loop_fit32.itemsize, unit_chunks=[1, 1]) ds_loop_fit = VirtualDataset('Fit', data=None, maxshape=[self.n_pixels, self.n_loops], dtype=loop_fit32, compression='gzip', chunking=loop_chunking, parent=loop_grp) ds_loop_guess = VirtualDataset('Guess', data=None, maxshape=[self.n_pixels, self.n_loops], dtype=loop_fit32, compression='gzip', chunking=loop_chunking, parent=loop_grp) # Add the datasets to the loop group then write it to the file loop_grp.add_children([ds_loop_fit, ds_loop_guess, ds_loop_spec_inds, ds_loop_spec_vals]) h5_loop_refs = hdf.write(loop_grp) # Delete the MicroDatasets del ds_loop_spec_vals, ds_loop_spec_inds, ds_loop_guess, ds_loop_fit # Get the handles to the datasets h5_loop_fit = get_h5_obj_refs(['Fit'], h5_loop_refs)[0] h5_loop_guess = get_h5_obj_refs(['Guess'], h5_loop_refs)[0] h5_loop_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_loop_refs)[0] h5_loop_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_loop_refs)[0] # Link the Position and Spectroscopic datasets to the Loop Guess and Fit link_as_main(h5_loop_fit, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals) link_as_main(h5_loop_guess, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals) self.h5_raw = USIDataset(h5_raw) self.h5_sho_guess = USIDataset(h5_sho_guess) self.h5_sho_fit = USIDataset(h5_sho_fit) self.h5_loop_guess = USIDataset(h5_loop_guess) self.h5_loop_fit = USIDataset(h5_loop_fit) self.h5_spec_vals = h5_spec_vals self.h5_spec_inds = h5_spec_inds self.h5_sho_spec_inds = h5_sho_spec_inds self.h5_sho_spec_vals = h5_sho_spec_vals self.h5_loop_spec_inds = h5_loop_spec_inds self.h5_loop_spec_vals = h5_loop_spec_vals self.h5_file = h5_raw.file return