def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice( components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError( 'SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0**2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print( 'Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format( comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt
def translate(self, h5_path, force_patch=False, **kwargs): """ Add the needed references and attributes to the h5 file that are not created by the LabView data aquisition program. Parameters ---------- h5_path : str path to the h5 file force_patch : bool, optional Should the check to see if the file has already been patched be ignored. Default False. Returns ------- h5_file : h5py.File patched hdf5 file """ # Open the file and check if a patch is needed h5_file = h5py.File(os.path.abspath(h5_path), 'r+') if h5_file.attrs.get('translator') is not None and not force_patch: print('File is already Pycroscopy ready.') return h5_file ''' Get the list of all Raw_Data Datasets Loop over the list and update the needed attributes ''' raw_list = find_dataset(h5_file, 'Raw_Data') for h5_raw in raw_list: if 'quantity' not in h5_raw.attrs: h5_raw.attrs['quantity'] = 'quantity' if 'units' not in h5_raw.attrs: h5_raw.attrs['units'] = 'a.u.' # Grab the channel and measurement group of the data to check some needed attributes h5_chan = h5_raw.parent try: c_type = get_attr(h5_chan, 'channel_type') except KeyError: warn_str = "'channel_type' was not found as an attribute of {}.\n".format( h5_chan.name) warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \ "please run the following piece of code. Afterwards, run this function again.\n" + \ "CODE: " \ "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name) warn(warn_str) return h5_file except: raise if c_type != 'BE': continue h5_meas = h5_chan.parent h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps'] # Get the object handles for the Indices and Values datasets h5_pos_inds = h5_chan['Position_Indices'] h5_pos_vals = h5_chan['Position_Values'] h5_spec_inds = h5_chan['Spectroscopic_Indices'] h5_spec_vals = h5_chan['Spectroscopic_Values'] # Make sure we have correct spectroscopic indices for the given values ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()]) if not np.allclose(ds_spec_inds, h5_spec_inds[()]): h5_spec_inds[:, :] = ds_spec_inds[:, :] h5_file.flush() # Get the labels and units for the Spectroscopic datasets h5_spec_labels = h5_spec_inds.attrs['labels'] inds_and_vals = [ h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals ] for dset in inds_and_vals: spec_labels = dset.attrs['labels'] try: spec_units = dset.attrs['units'] if len(spec_units) != len(spec_labels): raise KeyError except KeyError: dset['units'] = ['' for _ in spec_labels] except: raise for ilabel, label in enumerate(h5_spec_labels): label_slice = (slice(ilabel, ilabel + 1), slice(None)) if label == '': label = 'Step' h5_spec_inds.attrs[label] = h5_spec_inds.regionref[label_slice] h5_spec_vals.attrs[label] = h5_spec_vals.regionref[label_slice] # Link the references to the Indices and Values datasets to the Raw_Data link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals) # Also link the Bin_Frequencies and Bin_Wfm_Type datasets h5_freqs = h5_chan['Bin_Frequencies'] aux_dset_names = ['Bin_Frequencies'] aux_dset_refs = [h5_freqs.ref] check_and_link_ancillary(h5_raw, aux_dset_names, anc_refs=aux_dset_refs) ''' Get all SHO_Fit groups for the Raw_Data and loop over them Get the Guess and Spectroscopic Datasets for each SHO_Fit group ''' sho_list = find_results_groups(h5_raw, 'SHO_Fit') for h5_sho in sho_list: h5_sho_guess = h5_sho['Guess'] h5_sho_spec_inds = h5_sho['Spectroscopic_Indices'] h5_sho_spec_vals = h5_sho['Spectroscopic_Values'] # Make sure we have correct spectroscopic indices for the given values ds_sho_spec_inds = create_spec_inds_from_vals( h5_sho_spec_inds[()]) if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]): h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :] # Get the labels and units for the Spectroscopic datasets h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels') link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals] for dset in sho_inds_and_vals: spec_labels = get_attr(dset, 'labels') try: spec_units = get_attr(dset, 'units') if len(spec_units) != len(spec_labels): raise KeyError except KeyError: spec_units = [''.encode('utf-8') for _ in spec_labels] dset.attrs['units'] = spec_units except: raise # Make region references in the for ilabel, label in enumerate(h5_sho_spec_labels): label_slice = (slice(ilabel, ilabel + 1), slice(None)) if label == '': label = 'Step'.encode('utf-8') h5_sho_spec_inds.attrs[label] = h5_sho_spec_inds.regionref[ label_slice] h5_sho_spec_vals.attrs[label] = h5_sho_spec_vals.regionref[ label_slice] h5_file.flush() h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8') return h5_file
def plot_svd(h5_main, savefig=False, num_plots=16, **kwargs): ''' Replots the SVD showing the skree, abundance maps, and eigenvectors. If h5_main is a Dataset, it will default to the most recent SVD group from that Dataset. If h5_main is the results group, then it will plot the values for that group. :param h5_main: :type h5_main: USIDataset or h5py Dataset or h5py Group :param savefig: Saves the figures to disk with some default names :type savefig: bool, optional :param num_plots: Default number of eigenvectors and abundance plots to show :type num_plots: int :param kwargs: keyword arguments for svd filtering :type kwarrgs: dict, optional ''' if isinstance(h5_main, h5py.Group): _U = find_dataset(h5_main, 'U')[-1] _V = find_dataset(h5_main, 'V')[-1] units = 'arbitrary (a.u.)' h5_spec_vals = np.arange(_V.shape[1]) h5_svd_group = _U.parent else: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] units = h5_main.attrs['quantity'] h5_spec_vals = h5_main.get_spec_values('Time') h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] h5_S = h5_svd_group['S'] _U = USIDataset(h5_U) [num_rows, num_cols] = _U.pos_dim_sizes abun_maps = np.reshape(h5_U[:, :16], (num_rows, num_cols, -1)) eigen_vecs = h5_V[:16, :] skree_sum = np.zeros(h5_S.shape) for i in range(h5_S.shape[0]): skree_sum[i] = np.sum(h5_S[:i]) / np.sum(h5_S) plt.figure() plt.plot(skree_sum, 'bo') plt.title('Cumulative Variance') plt.xlabel('Total Components') plt.ylabel('Total variance ratio (a.u.)') if savefig: plt.savefig('Cumulative_variance_plot.png') fig_skree, axes = plot_utils.plot_scree(h5_S, title='Scree plot') fig_skree.tight_layout() if savefig: plt.savefig('Scree_plot.png') fig_abun, axes = plot_utils.plot_map_stack(abun_maps, num_comps=num_plots, title='SVD Abundance Maps', color_bar_mode='single', cmap='inferno', reverse_dims=True, fig_mult=(3.5, 3.5), facecolor='white', **kwargs) fig_abun.tight_layout() if savefig: plt.savefig('Abundance_maps.png') fig_eigvec, axes = plot_utils.plot_curves(h5_spec_vals * 1e3, eigen_vecs, use_rainbow_plots=False, x_label='Time (ms)', y_label=units, num_plots=num_plots, subtitle_prefix='Component', title='SVD Eigenvectors', evenly_spaced=False, **kwargs) fig_eigvec.tight_layout() if savefig: plt.savefig('Eigenvectors.png') return
def _check_for_old_fit(self): """ Returns three lists of h5py.Dataset objects where the group contained: 1. Completed guess only 2. Partial Fit 3. Completed Fit Returns ------- """ # First find all groups that match the basic condition of matching tool name all_groups = find_results_groups(self.h5_main, self._fitter_name) if self._verbose: print( 'Groups that matched the nomenclature: {}'.format(all_groups)) # Next sort these groups into three categories: completed_guess = [] partial_fits = [] completed_fits = [] for h5_group in all_groups: if 'Fit' in h5_group.keys(): # check group for fit attribute h5_fit = h5_group['Fit'] # check Fit dataset against parms_dict if not check_for_matching_attrs(h5_fit, new_parms=self._parms_dict, verbose=self._verbose): if self._verbose: print('{} did not match the given parameters'.format( h5_fit.name)) continue # sort this dataset: try: last_pix = get_attr(h5_fit, 'last_pixel') except KeyError: last_pix = None # For now skip any fits that are missing 'last_pixel' if last_pix is None: continue elif last_pix < self.h5_main.shape[0]: partial_fits.append(h5_fit.parent) else: completed_fits.append(h5_fit) else: if 'Guess' in h5_group.keys(): h5_guess = h5_group['Guess'] # sort this dataset: try: last_pix = get_attr(h5_guess, 'last_pixel') except KeyError: last_pix = None # For now skip any fits that are missing 'last_pixel' if last_pix is None: continue elif last_pix == self.h5_main.shape[0]: if self._verbose: print('{} was a completed Guess'.format( h5_guess.name)) completed_guess.append(h5_guess) else: if self._verbose: print( '{} did not not have completed Guesses'.format( h5_guess.name)) else: if self._verbose: print( '{} did not even have Guess. Categorizing as defective Group' .format(h5_group.name)) return completed_guess, partial_fits, completed_fits
def translate(self, h5_path, force_patch=False, **kwargs): """ Add the needed references and attributes to the h5 file that are not created by the LabView data aquisition program. Parameters ---------- h5_path : str path to the h5 file force_patch : bool, optional Should the check to see if the file has already been patched be ignored. Default False. Returns ------- h5_file : str path to the patched dataset """ # Open the file and check if a patch is needed h5_file = h5py.File(os.path.abspath(h5_path), 'r+') if h5_file.attrs.get('translator') is not None and not force_patch: print('File is already Pycroscopy ready.') h5_file.close() return h5_path ''' Get the list of all Raw_Data Datasets Loop over the list and update the needed attributes ''' raw_list = find_dataset(h5_file, 'Raw_Data') for h5_raw in raw_list: if 'quantity' not in h5_raw.attrs: h5_raw.attrs['quantity'] = 'quantity' if 'units' not in h5_raw.attrs: h5_raw.attrs['units'] = 'a.u.' # Grab the channel and measurement group of the data to check some needed attributes h5_chan = h5_raw.parent try: c_type = get_attr(h5_chan, 'channel_type') except KeyError: warn_str = "'channel_type' was not found as an attribute of {}.\n".format( h5_chan.name) warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \ "please run the following piece of code. Afterwards, run this function again.\n" + \ "CODE: " \ "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name) warn(warn_str) h5_file.close() return h5_path except: raise if c_type != 'BE': continue h5_meas = h5_chan.parent h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps'] # Get the object handles for the Indices and Values datasets h5_pos_inds = h5_chan['Position_Indices'] h5_pos_vals = h5_chan['Position_Values'] h5_spec_inds = h5_chan['Spectroscopic_Indices'] h5_spec_vals = h5_chan['Spectroscopic_Values'] # Make sure we have correct spectroscopic indices for the given values ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()]) if not np.allclose(ds_spec_inds, h5_spec_inds[()]): h5_spec_inds[:, :] = ds_spec_inds[:, :] h5_file.flush() # Get the labels and units for the Spectroscopic datasets h5_spec_labels = h5_spec_inds.attrs['labels'] inds_and_vals = [ h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals ] for dset in inds_and_vals: spec_labels = dset.attrs['labels'] try: spec_units = dset.attrs['units'] if len(spec_units) != len(spec_labels): raise KeyError except KeyError: dset['units'] = ['' for _ in spec_labels] except: raise """" In early versions, too many spectroscopic dimension labels and units were listed compared to the number of rows. Remove here: """ remove_non_exist_spec_dim_labs(h5_spec_inds, h5_spec_vals, h5_meas, verbose=False) """ Add back some standard metadata to be consistent with older BE data """ missing_metadata = dict() if 'File_file_name' not in h5_meas.attrs.keys(): missing_metadata['File_file_name'] = os.path.split( h5_raw.file.filename)[-1].replace('.h5', '') if 'File_date_and_time' not in h5_meas.attrs.keys(): try: date_str = get_attr(h5_raw.file, 'date_string') time_str = get_attr(h5_raw.file, 'time_string') full_str = date_str.strip() + ' ' + time_str.strip() """ convert: date_string : 2018-12-05 time_string : 3:41:45 PM to: File_date_and_time: 19-Jun-2009 18:44:56 """ try: dt_obj = datetime.datetime.strptime( full_str, "%Y-%m-%d %I:%M:%S %p") missing_metadata[ 'File_date_and_time'] = dt_obj.strftime( '%d-%b-%Y %H:%M:%S') except ValueError: pass except KeyError: pass # Now write to measurement group: if len(missing_metadata) > 0: write_simple_attrs(h5_meas, missing_metadata) # Link the references to the Indices and Values datasets to the Raw_Data link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals) # Also link the Bin_Frequencies and Bin_Wfm_Type datasets h5_freqs = h5_chan['Bin_Frequencies'] aux_dset_names = ['Bin_Frequencies'] aux_dset_refs = [h5_freqs.ref] check_and_link_ancillary(h5_raw, aux_dset_names, anc_refs=aux_dset_refs) ''' Get all SHO_Fit groups for the Raw_Data and loop over them Get the Guess and Spectroscopic Datasets for each SHO_Fit group ''' sho_list = find_results_groups(h5_raw, 'SHO_Fit') for h5_sho in sho_list: h5_sho_guess = h5_sho['Guess'] h5_sho_spec_inds = h5_sho['Spectroscopic_Indices'] h5_sho_spec_vals = h5_sho['Spectroscopic_Values'] # Make sure we have correct spectroscopic indices for the given values ds_sho_spec_inds = create_spec_inds_from_vals( h5_sho_spec_inds[()]) if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]): h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :] # Get the labels and units for the Spectroscopic datasets h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels') link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals] for dset in sho_inds_and_vals: spec_labels = get_attr(dset, 'labels') try: spec_units = get_attr(dset, 'units') if len(spec_units) != len(spec_labels): raise KeyError except KeyError: spec_units = [''.encode('utf-8') for _ in spec_labels] dset.attrs['units'] = spec_units except: raise h5_file.flush() h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8') h5_file.close() return h5_path
def translate(self, h5_path, force_patch=False, **kwargs): """ Add the needed references and attributes to the h5 file that are not created by the LabView data aquisition program. Parameters ---------- h5_path : str path to the h5 file force_patch : bool, optional Should the check to see if the file has already been patched be ignored. Default False. Returns ------- h5_file : h5py.File patched hdf5 file """ # Open the file and check if a patch is needed h5_file = h5py.File(os.path.abspath(h5_path), 'r+') if h5_file.attrs.get('translator') is not None and not force_patch: print('File is already Pycroscopy ready.') return h5_file ''' Get the list of all Raw_Data Datasets Loop over the list and update the needed attributes ''' raw_list = find_dataset(h5_file, 'Raw_Data') for h5_raw in raw_list: if 'quantity' not in h5_raw.attrs: h5_raw.attrs['quantity'] = 'quantity' if 'units' not in h5_raw.attrs: h5_raw.attrs['units'] = 'a.u.' # Grab the channel and measurement group of the data to check some needed attributes h5_chan = h5_raw.parent try: c_type = get_attr(h5_chan, 'channel_type') except KeyError: warn_str = "'channel_type' was not found as an attribute of {}.\n".format(h5_chan.name) warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \ "please run the following piece of code. Afterwards, run this function again.\n" + \ "CODE: " \ "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name) warn(warn_str) return h5_file except: raise if c_type != 'BE': continue h5_meas = h5_chan.parent h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps'] # Get the object handles for the Indices and Values datasets h5_pos_inds = h5_chan['Position_Indices'] h5_pos_vals = h5_chan['Position_Values'] h5_spec_inds = h5_chan['Spectroscopic_Indices'] h5_spec_vals = h5_chan['Spectroscopic_Values'] # Make sure we have correct spectroscopic indices for the given values ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()]) if not np.allclose(ds_spec_inds, h5_spec_inds[()]): h5_spec_inds[:, :] = ds_spec_inds[:, :] h5_file.flush() # Get the labels and units for the Spectroscopic datasets h5_spec_labels = h5_spec_inds.attrs['labels'] inds_and_vals = [h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals] for dset in inds_and_vals: spec_labels = dset.attrs['labels'] try: spec_units = dset.attrs['units'] if len(spec_units) != len(spec_labels): raise KeyError except KeyError: dset['units'] = ['' for _ in spec_labels] except: raise for ilabel, label in enumerate(h5_spec_labels): label_slice = (slice(ilabel, ilabel + 1), slice(None)) if label == '': label = 'Step' h5_spec_inds.attrs[label] = h5_spec_inds.regionref[label_slice] h5_spec_vals.attrs[label] = h5_spec_vals.regionref[label_slice] # Link the references to the Indices and Values datasets to the Raw_Data link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals) # Also link the Bin_Frequencies and Bin_Wfm_Type datasets h5_freqs = h5_chan['Bin_Frequencies'] aux_dset_names = ['Bin_Frequencies'] aux_dset_refs = [h5_freqs.ref] check_and_link_ancillary(h5_raw, aux_dset_names, anc_refs=aux_dset_refs) ''' Get all SHO_Fit groups for the Raw_Data and loop over them Get the Guess and Spectroscopic Datasets for each SHO_Fit group ''' sho_list = find_results_groups(h5_raw, 'SHO_Fit') for h5_sho in sho_list: h5_sho_guess = h5_sho['Guess'] h5_sho_spec_inds = h5_sho['Spectroscopic_Indices'] h5_sho_spec_vals = h5_sho['Spectroscopic_Values'] # Make sure we have correct spectroscopic indices for the given values ds_sho_spec_inds = create_spec_inds_from_vals(h5_sho_spec_inds[()]) if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]): h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :] # Get the labels and units for the Spectroscopic datasets h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels') link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals) sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals] for dset in sho_inds_and_vals: spec_labels = get_attr(dset, 'labels') try: spec_units = get_attr(dset, 'units') if len(spec_units) != len(spec_labels): raise KeyError except KeyError: spec_units = [''.encode('utf-8') for _ in spec_labels] dset.attrs['units'] = spec_units except: raise # Make region references in the for ilabel, label in enumerate(h5_sho_spec_labels): label_slice = (slice(ilabel, ilabel + 1), slice(None)) if label == '': label = 'Step'.encode('utf-8') h5_sho_spec_inds.attrs[label] = h5_sho_spec_inds.regionref[label_slice] h5_sho_spec_vals.attrs[label] = h5_sho_spec_vals.regionref[label_slice] h5_file.flush() h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8') return h5_file
def _check_for_old_fit(self): """ Returns three lists of h5py.Dataset objects where the group contained: 1. Completed guess only 2. Partial Fit 3. Completed Fit Returns ------- """ # First find all groups that match the basic condition of matching tool name all_groups = find_results_groups(self.h5_main, self._fitter_name) if self._verbose: print('Groups that matched the nomenclature: {}'.format(all_groups)) # Next sort these groups into three categories: completed_guess = [] partial_fits = [] completed_fits = [] for h5_group in all_groups: if 'Fit' in h5_group.keys(): # check group for fit attribute h5_fit = h5_group['Fit'] # check Fit dataset against parms_dict if not check_for_matching_attrs(h5_fit, new_parms=self._parms_dict, verbose=self._verbose): if self._verbose: print('{} did not match the given parameters'.format(h5_fit.name)) continue # sort this dataset: try: last_pix = get_attr(h5_fit, 'last_pixel') except KeyError: last_pix = None # For now skip any fits that are missing 'last_pixel' if last_pix is None: continue elif last_pix < self.h5_main.shape[0]: partial_fits.append(h5_fit.parent) else: completed_fits.append(h5_fit) else: if 'Guess' in h5_group.keys(): h5_guess = h5_group['Guess'] # sort this dataset: try: last_pix = get_attr(h5_guess, 'last_pixel') except KeyError: last_pix = None # For now skip any fits that are missing 'last_pixel' if last_pix is None: continue elif last_pix == self.h5_main.shape[0]: if self._verbose: print('{} was a completed Guess'.format(h5_guess.name)) completed_guess.append(h5_guess) else: if self._verbose: print('{} did not not have completed Guesses'.format(h5_guess.name)) else: if self._verbose: print('{} did not even have Guess. Categorizing as defective Group'.format(h5_group.name)) return completed_guess, partial_fits, completed_fits
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print('Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt