Пример #1
0
    def _check_for_old_guess(self):
        """
        Returns a list of datasets where the same parameters have already been used to compute Guesses for this dataset

        Returns
        -------
        list
            List of datasets with results from do_guess on this dataset
        """
        groups = check_for_old(self.h5_main,
                               self._fitter_name,
                               new_parms=self._parms_dict,
                               target_dset='Guess',
                               verbose=self._verbose)
        datasets = [grp['Guess'] for grp in groups]

        # Now sort these datasets into partial and complete:
        completed_dsets = []
        partial_dsets = []

        for dset in datasets:
            try:
                last_pix = get_attr(dset, 'last_pixel')
            except KeyError:
                last_pix = None

            # Skip datasets without last_pixel attribute
            if last_pix is None:
                continue
            elif last_pix < self.h5_main.shape[0]:
                partial_dsets.append(dset)
            else:
                completed_dsets.append(dset)

        return partial_dsets, completed_dsets
Пример #2
0
    def _check_for_old_guess(self):
        """
        Returns a list of datasets where the same parameters have already been used to compute Guesses for this dataset

        Returns
        -------
        list
            List of datasets with results from do_guess on this dataset
        """
        groups = check_for_old(self.h5_main, self._fitter_name, new_parms=self._parms_dict, target_dset='Guess',
                               verbose=self._verbose)
        datasets = [grp['Guess'] for grp in groups]

        # Now sort these datasets into partial and complete:
        completed_dsets = []
        partial_dsets = []

        for dset in datasets:
            try:
                last_pix = get_attr(dset, 'last_pixel')
            except KeyError:
                last_pix = None
                
            # Skip datasets without last_pixel attribute
            if last_pix is None:
                continue
            elif last_pix < self.h5_main.shape[0]:
                partial_dsets.append(dset)
            else:
                completed_dsets.append(dset)

        return partial_dsets, completed_dsets
Пример #3
0
    def _check_for_duplicates(self):
        """
        Checks for instances where the process was applied to the same dataset with the same parameters
        Returns
        -------
        duplicate_h5_groups : list of h5py.Datagroup objects
            List of groups satisfying the above conditions
        """
        if self.verbose and self.mpi_rank == 0:
            print('Checking for duplicates:')

        duplicate_h5_groups = check_for_old(self.h5_main,
                                            self.process_name,
                                            new_parms=self.parms_dict)
        partial_h5_groups = []

        # First figure out which ones are partially completed:
        if len(duplicate_h5_groups) > 0:
            for index, curr_group in enumerate(duplicate_h5_groups):
                if curr_group.attrs['last_pixel'] < self.h5_main.shape[0]:
                    # remove from duplicates and move to partial
                    partial_h5_groups.append(duplicate_h5_groups.pop(index))

        if len(duplicate_h5_groups) > 0 and self.mpi_rank == 0:
            print(
                'Note: ' + self.process_name +
                ' has already been performed with the same parameters before. '
                'These results will be returned by compute() by default. '
                'Set override to True to force fresh computation')
            print(duplicate_h5_groups)

        if partial_h5_groups and self.mpi_rank == 0:
            print(
                'Note: ' + self.process_name +
                ' has already been performed PARTIALLY with the same parameters. '
                'compute() will resuming computation in the last group below. '
                'To choose a different group call use_patial_computation()'
                'Set override to True to force fresh computation or resume from a '
                'data group besides the last in the list.')
            print(partial_h5_groups)

        return duplicate_h5_groups, partial_h5_groups
Пример #4
0
    def _check_for_duplicates(self):
        """
        Checks for instances where the process was applied to the same dataset with the same parameters
        Returns
        -------
        duplicate_h5_groups : list of h5py.Group objects
            List of groups satisfying the above conditions
        """
        if self.verbose:
            print('Checking for duplicates:')

        # This list will contain completed runs only
        duplicate_h5_groups = check_for_old(self.h5_main, self.process_name, new_parms=self.parms_dict)
        partial_h5_groups = []

        # First figure out which ones are partially completed:
        if len(duplicate_h5_groups) > 0:
            for index, curr_group in enumerate(duplicate_h5_groups):
                """
                Earlier, we only checked the 'last_pixel' but to be rigorous we should check self._status_dset_name
                The last_pixel attribute check may be deprecated in the future.
                Note that legacy computations did not have this dataset. We can add to partially computed datasets
                """
                if self._status_dset_name in curr_group.keys():

                    # Case 1: Modern Process results:
                    status_dset = curr_group[self._status_dset_name]

                    if not isinstance(status_dset, h5py.Dataset):
                        # We should not come here if things were implemented correctly
                        print('Results group: {} contained an object named: {} that should have been a dataset'
                              '.'.format(curr_group, self._status_dset_name))

                    if self.h5_main.shape[0] != status_dset.shape[0] or len(status_dset.shape) > 1 or \
                            status_dset.dtype != np.uint8:
                        print('Status dataset: {} was not of the expected shape or datatype'.format(status_dset))

                    # Finally, check how far the computation was completed.
                    if len(np.where(status_dset[()] == 0)[0]) != 0:  # If there are pixels uncompleted
                        # remove from duplicates and move to partial
                        partial_h5_groups.append(duplicate_h5_groups.pop(index))
                        # Let's write the legacy attribute for safety
                        curr_group.attrs['last_pixel'] = self.h5_main.shape[0]
                        # No further checks necessary
                        continue
                    else:
                        # Optionally calculate how much was completed:
                        if len(np.where(status_dset[()] == 0)[0]) > 0:  # if there are unfinished pixels
                            percent_complete = int(100 * len(np.where(status_dset[()] == 0)[0]) / status_dset.shape[0])
                            print('Group: {}: computation was {}% completed'.format(curr_group, percent_complete))

                # Case 2: Legacy results group:
                if 'last_pixel' not in curr_group.attrs.keys():
                    # Should not be coming here at all
                    print('Group: {} had neither the status HDF5 dataset or the legacy attribute: "last_pixel"'
                          '.'.format(curr_group))
                    # Not sure what to do with such groups. Don't consider them in the future
                    duplicate_h5_groups.pop(index)
                    continue

                # Finally, do the legacy test:
                if curr_group.attrs['last_pixel'] < self.h5_main.shape[0]:
                    # Should we create the dataset here, to make the group future-proof?
                    # remove from duplicates and move to partial
                    partial_h5_groups.append(duplicate_h5_groups.pop(index))

        if len(duplicate_h5_groups) > 0:
            print('Note: ' + self.process_name + ' has already been performed with the same parameters before. '
                                                 'These results will be returned by compute() by default. '
                                                 'Set override to True to force fresh computation')
            print(duplicate_h5_groups)

        if len(partial_h5_groups) > 0:
            print('Note: ' + self.process_name + ' has already been performed PARTIALLY with the same parameters. '
                                                 'compute() will resuming computation in the last group below. '
                                                 'To choose a different group call use_partial_computation()'
                                                 'Set override to True to force fresh computation or resume from a '
                                                 'data group besides the last in the list.')
            print(partial_h5_groups)

        return duplicate_h5_groups, partial_h5_groups