def _check_for_old_guess(self): """ Returns a list of datasets where the same parameters have already been used to compute Guesses for this dataset Returns ------- list List of datasets with results from do_guess on this dataset """ groups = check_for_old(self.h5_main, self._fitter_name, new_parms=self._parms_dict, target_dset='Guess', verbose=self._verbose) datasets = [grp['Guess'] for grp in groups] # Now sort these datasets into partial and complete: completed_dsets = [] partial_dsets = [] for dset in datasets: try: last_pix = get_attr(dset, 'last_pixel') except KeyError: last_pix = None # Skip datasets without last_pixel attribute if last_pix is None: continue elif last_pix < self.h5_main.shape[0]: partial_dsets.append(dset) else: completed_dsets.append(dset) return partial_dsets, completed_dsets
def _check_for_duplicates(self): """ Checks for instances where the process was applied to the same dataset with the same parameters Returns ------- duplicate_h5_groups : list of h5py.Datagroup objects List of groups satisfying the above conditions """ if self.verbose and self.mpi_rank == 0: print('Checking for duplicates:') duplicate_h5_groups = check_for_old(self.h5_main, self.process_name, new_parms=self.parms_dict) partial_h5_groups = [] # First figure out which ones are partially completed: if len(duplicate_h5_groups) > 0: for index, curr_group in enumerate(duplicate_h5_groups): if curr_group.attrs['last_pixel'] < self.h5_main.shape[0]: # remove from duplicates and move to partial partial_h5_groups.append(duplicate_h5_groups.pop(index)) if len(duplicate_h5_groups) > 0 and self.mpi_rank == 0: print( 'Note: ' + self.process_name + ' has already been performed with the same parameters before. ' 'These results will be returned by compute() by default. ' 'Set override to True to force fresh computation') print(duplicate_h5_groups) if partial_h5_groups and self.mpi_rank == 0: print( 'Note: ' + self.process_name + ' has already been performed PARTIALLY with the same parameters. ' 'compute() will resuming computation in the last group below. ' 'To choose a different group call use_patial_computation()' 'Set override to True to force fresh computation or resume from a ' 'data group besides the last in the list.') print(partial_h5_groups) return duplicate_h5_groups, partial_h5_groups
def _check_for_duplicates(self): """ Checks for instances where the process was applied to the same dataset with the same parameters Returns ------- duplicate_h5_groups : list of h5py.Group objects List of groups satisfying the above conditions """ if self.verbose: print('Checking for duplicates:') # This list will contain completed runs only duplicate_h5_groups = check_for_old(self.h5_main, self.process_name, new_parms=self.parms_dict) partial_h5_groups = [] # First figure out which ones are partially completed: if len(duplicate_h5_groups) > 0: for index, curr_group in enumerate(duplicate_h5_groups): """ Earlier, we only checked the 'last_pixel' but to be rigorous we should check self._status_dset_name The last_pixel attribute check may be deprecated in the future. Note that legacy computations did not have this dataset. We can add to partially computed datasets """ if self._status_dset_name in curr_group.keys(): # Case 1: Modern Process results: status_dset = curr_group[self._status_dset_name] if not isinstance(status_dset, h5py.Dataset): # We should not come here if things were implemented correctly print('Results group: {} contained an object named: {} that should have been a dataset' '.'.format(curr_group, self._status_dset_name)) if self.h5_main.shape[0] != status_dset.shape[0] or len(status_dset.shape) > 1 or \ status_dset.dtype != np.uint8: print('Status dataset: {} was not of the expected shape or datatype'.format(status_dset)) # Finally, check how far the computation was completed. if len(np.where(status_dset[()] == 0)[0]) != 0: # If there are pixels uncompleted # remove from duplicates and move to partial partial_h5_groups.append(duplicate_h5_groups.pop(index)) # Let's write the legacy attribute for safety curr_group.attrs['last_pixel'] = self.h5_main.shape[0] # No further checks necessary continue else: # Optionally calculate how much was completed: if len(np.where(status_dset[()] == 0)[0]) > 0: # if there are unfinished pixels percent_complete = int(100 * len(np.where(status_dset[()] == 0)[0]) / status_dset.shape[0]) print('Group: {}: computation was {}% completed'.format(curr_group, percent_complete)) # Case 2: Legacy results group: if 'last_pixel' not in curr_group.attrs.keys(): # Should not be coming here at all print('Group: {} had neither the status HDF5 dataset or the legacy attribute: "last_pixel"' '.'.format(curr_group)) # Not sure what to do with such groups. Don't consider them in the future duplicate_h5_groups.pop(index) continue # Finally, do the legacy test: if curr_group.attrs['last_pixel'] < self.h5_main.shape[0]: # Should we create the dataset here, to make the group future-proof? # remove from duplicates and move to partial partial_h5_groups.append(duplicate_h5_groups.pop(index)) if len(duplicate_h5_groups) > 0: print('Note: ' + self.process_name + ' has already been performed with the same parameters before. ' 'These results will be returned by compute() by default. ' 'Set override to True to force fresh computation') print(duplicate_h5_groups) if len(partial_h5_groups) > 0: print('Note: ' + self.process_name + ' has already been performed PARTIALLY with the same parameters. ' 'compute() will resuming computation in the last group below. ' 'To choose a different group call use_partial_computation()' 'Set override to True to force fresh computation or resume from a ' 'data group besides the last in the list.') print(partial_h5_groups) return duplicate_h5_groups, partial_h5_groups