Пример #1
0
    def test_h5_no_sort_reqd(self):
        with h5py.File(data_utils.std_beps_path, mode='r') as h5_f:
            h5_main = h5_f['/Raw_Measurement/source_main']
            num_rows = 3
            num_cols = 5
            num_cycles = 2
            num_cycle_pts = 7

            n_dim, success, labels = hdf_utils.reshape_to_n_dims(
                h5_main, get_labels=True, sort_dims=False, lazy=False)
            self.assertTrue(
                np.all([
                    x == y for x, y in zip(labels, ['X', 'Y', 'Bias', 'Cycle'])
                ]))
            expected_n_dim = np.reshape(
                h5_main[()], (num_rows, num_cols, num_cycles, num_cycle_pts))
            expected_n_dim = np.transpose(expected_n_dim, (1, 0, 3, 2))
            self.assertTrue(np.allclose(expected_n_dim, n_dim))

            n_dim, success, labels = hdf_utils.reshape_to_n_dims(
                h5_main, get_labels=True, sort_dims=True, lazy=False)
            self.assertTrue(
                np.all([
                    x == y for x, y in zip(labels, ['X', 'Y', 'Bias', 'Cycle'])
                ]))
            expected_n_dim = np.reshape(
                h5_main[()], (num_rows, num_cols, num_cycles, num_cycle_pts))
            expected_n_dim = np.transpose(expected_n_dim, (1, 0, 3, 2))
            self.assertTrue(np.allclose(expected_n_dim, n_dim))
Пример #2
0
    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        U : :class:`numpy.ndarray`
            Abundance matrix
        S : :class:`numpy.ndarray`
            variance vector
        V : :class:`numpy.ndarray`
            eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0))
        if not success:
            raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success)

        v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success)

        return u_mat, self.__s, v_mat
Пример #3
0
    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        U : :class:`numpy.ndarray`
            Abundance matrix
        S : :class:`numpy.ndarray`
            variance vector
        V : :class:`numpy.ndarray`
            eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0))
        if not success:
            raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success)

        v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success)

        return u_mat, self.__s, v_mat
Пример #4
0
    def test(self, override=False):
        """
        Decomposes the hdf5 dataset to calculate the components and projection. This function does NOT write results to
        the hdf5 file. Call :meth:`~pycroscopy.processing.Decomposition.compute()` to  write to the file. Handles
        complex, compound datasets such that the
        components are of the same data-type as the input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        components : :class:`numpy.ndarray`
            Components
        projections : :class:`numpy.ndarray`
            Projections
        """
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return USIDataset(self.h5_results_grp['Components']).get_n_dim_form(), \
                       USIDataset(self.h5_results_grp['Projection']).get_n_dim_form()

        self.h5_results_grp = None

        print('Performing Decomposition on {}.'.format(self.h5_main.name))

        t0 = time.time()
        self._fit()
        self._transform()
        print('Took {} to compute {}'.format(format_time(time.time() - t0), self.method_name))

        self.__components = stack_real_to_target_dtype(self.estimator.components_, self.h5_main.dtype)
        projection_mat, success = reshape_to_n_dims(self.__projection, h5_pos=self.h5_main.h5_pos_inds,
                                                    h5_spec=np.expand_dims(np.arange(self.__projection.shape[1]),
                                                                           axis=0))
        if not success:
            raise ValueError('Could not reshape projections to N-Dimensional dataset! Error:' + success)

        components_mat, success = reshape_to_n_dims(self.__components, h5_spec=self.h5_main.h5_spec_inds,
                                                    h5_pos=np.expand_dims(np.arange(self.__components.shape[0]),
                                                                          axis=1))

        if not success:
            raise ValueError('Could not reshape components to N-Dimensional dataset! Error:' + success)

        return components_mat, projection_mat
Пример #5
0
    def test(self, override=False):
        """
        Decomposes the hdf5 dataset to calculate the components and projection. This function does NOT write results to
        the hdf5 file. Call :meth:`~pycroscopy.processing.Decomposition.compute()` to  write to the file. Handles
        complex, compound datasets such that the
        components are of the same data-type as the input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        components : :class:`numpy.ndarray`
            Components
        projections : :class:`numpy.ndarray`
            Projections
        """
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return USIDataset(self.h5_results_grp['Components']).get_n_dim_form(), \
                       USIDataset(self.h5_results_grp['Projection']).get_n_dim_form()

        self.h5_results_grp = None

        print('Performing Decomposition on {}.'.format(self.h5_main.name))

        t0 = time.time()
        self._fit()
        self._transform()
        print('Took {} to compute {}'.format(format_time(time.time() - t0), self.method_name))

        self.__components = stack_real_to_target_dtype(self.estimator.components_, self.h5_main.dtype)
        projection_mat, success = reshape_to_n_dims(self.__projection, h5_pos=self.h5_main.h5_pos_inds,
                                                    h5_spec=np.expand_dims(np.arange(self.__projection.shape[1]),
                                                                           axis=0))
        if not success:
            raise ValueError('Could not reshape projections to N-Dimensional dataset! Error:' + success)

        components_mat, success = reshape_to_n_dims(self.__components, h5_spec=self.h5_main.h5_spec_inds,
                                                    h5_pos=np.expand_dims(np.arange(self.__components.shape[0]),
                                                                          axis=1))

        if not success:
            raise ValueError('Could not reshape components to N-Dimensional dataset! Error:' + success)

        return components_mat, projection_mat
Пример #6
0
    def test_numpy(self):
        num_rows = 3
        num_cols = 5
        num_cycles = 2
        num_cycle_pts = 7
        # arrange as slow, fast instead of fast, slow
        source_pos_data = np.vstack(
            (np.repeat(np.arange(num_rows),
                       num_cols), np.tile(np.arange(num_cols), num_rows))).T

        source_main_data = np.zeros(shape=(num_rows * num_cols,
                                           num_cycle_pts * num_cycles),
                                    dtype=np.float16)
        for row_ind in range(num_rows):
            for col_ind in range(num_cols):
                for cycle_ind in range(num_cycles):
                    for bias_ind in range(num_cycle_pts):
                        val = 1E+3 * row_ind + 1E+2 * col_ind + 1E+1 * cycle_ind + bias_ind
                        source_main_data[row_ind * num_cols + col_ind,
                                         cycle_ind * num_cycle_pts +
                                         bias_ind] = val

        # make spectroscopic slow, fast instead of fast, slow
        source_spec_data = np.vstack(
            (np.repeat(np.arange(num_cycles), num_cycle_pts),
             np.tile(np.arange(num_cycle_pts), num_cycles)))
        n_dim, success = hdf_utils.reshape_to_n_dims(source_main_data,
                                                     h5_pos=source_pos_data,
                                                     h5_spec=source_spec_data,
                                                     get_labels=False,
                                                     lazy=False)
        expected_n_dim = np.reshape(
            source_main_data, (num_rows, num_cols, num_cycles, num_cycle_pts))
        self.assertTrue(np.allclose(expected_n_dim, n_dim))
Пример #7
0
    def _reshape_sho_matrix(self, raw_2d):
        """
        Reshapes the raw 2D SHO matrix (as read from the file) to 2D array
        arranged as [instance x points for a single loop]

        Parameters
        ----------
        raw_2d : 2D compound numpy array
            Raw SHO fitted data arranged as [position, data for a single FORC cycle]

        Returns
        -------
        loops_2d : 2D numpy compound array
            SHO fitted data arranged as [instance or position x dc voltage steps]
        order_dc_offset_reverse : tuple
            Order in which the N dimensional data should be transposed to return it to the same format
            as the input data of this function
        nd_mat_shape_dc_first : 1D numpy unsigned int array
            Shape of the N dimensional array that the loops_2d can be turned into.
            Use the order_dc_offset_reverse after this reshape
        """
        # step 4: reshape to N dimensions
        fit_nd, success = reshape_to_n_dims(raw_2d,
                                            h5_pos=None,
                                            h5_spec=self._sho_spec_inds[self._sho_all_but_forc_inds,
                                                                        self._current_sho_spec_slice],
                                            verbose=self._verbose)
        if not success:
            warn('Error - could not reshape provided raw data chunk...')
            return None

        dim_names_orig = np.hstack(('Positions', np.array(self.h5_main.spec_dim_labels)[self._sho_all_but_forc_inds]))

        if self._verbose:
            print('Shape of N dimensional dataset:', fit_nd.shape)
            print('Dimensions of order:', dim_names_orig)

        # step 5: Move the voltage dimension to the first dim
        order_dc_outside_nd = [self._fit_offset_index] + list(range(self._fit_offset_index)) + \
                              list(range(self._fit_offset_index + 1, len(fit_nd.shape)))
        order_dc_offset_reverse = list(range(1, self._fit_offset_index + 1)) + [0] + \
                                  list(range(self._fit_offset_index + 1, len(fit_nd.shape)))
        fit_nd2 = np.transpose(fit_nd, tuple(order_dc_outside_nd))
        dim_names_dc_out = dim_names_orig[order_dc_outside_nd]
        if self._verbose:
            print('originally:', fit_nd.shape, ', after moving DC offset outside:', fit_nd2.shape)
            print('new dim names:', dim_names_dc_out)

        # step 6: reshape the ND data to 2D arrays
        loops_2d = np.reshape(fit_nd2, (fit_nd2.shape[0], -1))
        if self._verbose:
            print('Loops ready to be projected of shape (Vdc, all other dims besides FORC):', loops_2d.shape)

        return loops_2d, order_dc_offset_reverse, fit_nd2.shape
Пример #8
0
    def _get_dc_offset(self):
        """
        Gets the DC offset for the current FORC step

        Parameters
        ----------
        verbose : boolean (optional)
            Whether or not to print debugging statements

        Returns
        -------
        dc_vec : 1D float numpy array
            DC offsets for the current FORC step
        """
        # apply this knowledge to reshape the spectroscopic values
        # remember to reshape such that the dimensions are arranged in reverse order (slow to fast)
        spec_vals_nd, success = reshape_to_n_dims(self._sho_spec_vals[self._sho_all_but_forc_inds,
                                                                      self._current_sho_spec_slice],
                                                  h5_spec=self._sho_spec_inds[self._sho_all_but_forc_inds,
                                                                              self._current_sho_spec_slice])
        # This should result in a N+1 dimensional matrix where the first index contains the actual data
        # the other dimensions are present to easily slice the data
        spec_labels_sorted = np.hstack(('Dim', self.h5_main.spec_dim_labels))
        if self._verbose:
            print('Spectroscopic dimensions sorted by rate of change:')
            print(spec_labels_sorted)
        # slice the N dimensional dataset such that we only get the DC offset for default values of other dims
        fit_dim_pos = np.argwhere(spec_labels_sorted == self._fit_dim_name)[0][0]
        # fit_dim_slice = list()
        # for dim_ind in range(spec_labels_sorted.size):
        #     if dim_ind == fit_dim_pos:
        #         fit_dim_slice.append(slice(None))
        #     else:
        #         fit_dim_slice.append(slice(0, 1))

        fit_dim_slice = [fit_dim_pos]
        for idim, dim in enumerate(spec_labels_sorted[1:]):
            if dim == self._fit_dim_name:
                fit_dim_slice.append(slice(None))
                fit_dim_slice[0] = idim
            elif dim in ['FORC', 'FORC_repeat', 'FORC_Cycle']:
                continue
            else:
                fit_dim_slice.append(slice(0, 1))

        if self._verbose:
            print('slice to extract Vdc:')
            print(fit_dim_slice)

        self.fit_dim_vec = np.squeeze(spec_vals_nd[tuple(fit_dim_slice)])

        return
Пример #9
0
    def test_sorted_and_unsorted(self):
        with h5py.File(test_h5_file_path, mode='r') as h5_f:
            usi_dset = USIDataset(h5_f['/Raw_Measurement/source_main'])
            nd_slow_to_fast, nd_fast_to_slow = self.get_expected_n_dim(h5_f)
            actual_f2s = usi_dset.get_n_dim_form(lazy=False)
            self.assertTrue(np.allclose(nd_fast_to_slow, actual_f2s))

            nd_form, success = hdf_utils.reshape_to_n_dims(usi_dset, sort_dims=True)
            print(nd_form.shape)

            usi_dset.toggle_sorting()
            actual_s2f = usi_dset.get_n_dim_form(lazy=False)
            self.assertTrue(np.allclose(nd_slow_to_fast, actual_s2f))
Пример #10
0
    def test_h5_not_main_dset(self):
        with h5py.File(data_utils.std_beps_path, mode='r') as h5_f:
            h5_main = h5_f['/Raw_Measurement/Ancillary']
            h5_pos = h5_f['/Raw_Measurement/Position_Indices']
            h5_spec = h5_f['/Raw_Measurement/Spectroscopic_Indices']

            # Not main
            with self.assertRaises(ValueError):
                _ = hdf_utils.reshape_to_n_dims(h5_main)

            # Not main and not helping that we are supplign incompatible ancillary datasets
            with self.assertRaises(ValueError):
                _ = hdf_utils.reshape_to_n_dims(h5_main,
                                                h5_pos=h5_pos,
                                                h5_spec=h5_spec)

            # main but we are supplign incompatible ancillary datasets
            h5_main = h5_f[
                '/Raw_Measurement/source_main-Fitter_000/results_main']
            with self.assertRaises(ValueError):
                _ = hdf_utils.reshape_to_n_dims(h5_main,
                                                h5_pos=h5_pos,
                                                h5_spec=h5_spec)
Пример #11
0
    def reshape_sho_chunk_to_nd(data_2d,
                                raw_dim_labels,
                                h5_pos_inds,
                                h5_spec_inds,
                                verbose=False):

        ret_vals = reshape_to_n_dims(data_2d, h5_pos_inds[:data_2d.shape[0]],
                                     h5_spec_inds)
        data_nd_auto, success = ret_vals

        if success != True:
            raise ValueError(
                'Unable to reshape data chunk of shape {} to N dimensions'.
                format(data_2d.shape))

        if verbose:
            print('Reshaped raw data from: {} to {}'.format(
                data_2d.shape, data_nd_auto.shape))

        # By default it is fast to slow!
        pos_sort = get_sort_order(h5_pos_inds)[::-1]
        spec_sort = get_sort_order(h5_spec_inds)[::-1]
        swap_order = list(pos_sort) + list(len(pos_sort) + spec_sort)

        if verbose:
            print(
                'Dimensions will be permuted as {} to arrange them from slowest to fastest'
                .format(swap_order))

        data_nd_s2f = data_nd_auto.transpose(swap_order)
        dim_labels_s2f = np.array(raw_dim_labels)[swap_order]

        if verbose:
            print(
                'After rearranging array is of shape: {}, dimensions are ordered as: {}'
                .format(data_nd_s2f.shape, dim_labels_s2f))

        return data_nd_s2f, dim_labels_s2f
Пример #12
0
    def test_sort_required(self):
        file_path = 'reshape_to_n_dim_sort_required.h5'
        data_utils.delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            h5_raw_grp = h5_f.create_group('Raw_Measurement')

            num_rows = 3
            num_cols = 5
            num_cycles = 2
            num_cycle_pts = 7

            source_dset_name = 'source_main'

            # arrange as slow, fast instead of fast, slow
            source_pos_data = np.vstack(
                (np.repeat(np.arange(num_rows),
                           num_cols), np.tile(np.arange(num_cols),
                                              num_rows))).T
            pos_attrs = {'units': ['nm', 'um'], 'labels': ['X', 'Y']}

            h5_pos_inds = h5_raw_grp.create_dataset('Position_Indices',
                                                    data=source_pos_data,
                                                    dtype=np.uint16)
            data_utils.write_aux_reg_ref(h5_pos_inds,
                                         pos_attrs['labels'],
                                         is_spec=False)
            data_utils.write_string_list_as_attr(h5_pos_inds, pos_attrs)

            h5_pos_vals = h5_raw_grp.create_dataset('Position_Values',
                                                    data=source_pos_data,
                                                    dtype=np.float32)
            data_utils.write_aux_reg_ref(h5_pos_vals,
                                         pos_attrs['labels'],
                                         is_spec=False)
            data_utils.write_string_list_as_attr(h5_pos_vals, pos_attrs)

            source_main_data = np.zeros(shape=(num_rows * num_cols,
                                               num_cycle_pts * num_cycles),
                                        dtype=np.float16)
            for row_ind in range(num_rows):
                for col_ind in range(num_cols):
                    for cycle_ind in range(num_cycles):
                        for bias_ind in range(num_cycle_pts):
                            val = 1E+3 * row_ind + 1E+2 * col_ind + 1E+1 * cycle_ind + bias_ind
                            source_main_data[row_ind * num_cols + col_ind,
                                             cycle_ind * num_cycle_pts +
                                             bias_ind] = val

            # source_main_data = np.random.rand(num_rows * num_cols, num_cycle_pts * num_cycles)
            h5_source_main = h5_raw_grp.create_dataset(source_dset_name,
                                                       data=source_main_data)
            data_utils.write_safe_attrs(h5_source_main, {
                'units': 'A',
                'quantity': 'Current'
            })

            # make spectroscopic slow, fast instead of fast, slow
            source_spec_data = np.vstack(
                (np.repeat(np.arange(num_cycles), num_cycle_pts),
                 np.tile(np.arange(num_cycle_pts), num_cycles)))
            source_spec_attrs = {
                'units': ['', 'V'],
                'labels': ['Cycle', 'Bias']
            }

            h5_source_spec_inds = h5_raw_grp.create_dataset(
                'Spectroscopic_Indices',
                data=source_spec_data,
                dtype=np.uint16)
            data_utils.write_aux_reg_ref(h5_source_spec_inds,
                                         source_spec_attrs['labels'],
                                         is_spec=True)
            data_utils.write_string_list_as_attr(h5_source_spec_inds,
                                                 source_spec_attrs)

            h5_source_spec_vals = h5_raw_grp.create_dataset(
                'Spectroscopic_Values',
                data=source_spec_data,
                dtype=np.float32)
            data_utils.write_aux_reg_ref(h5_source_spec_vals,
                                         source_spec_attrs['labels'],
                                         is_spec=True)
            data_utils.write_string_list_as_attr(h5_source_spec_vals,
                                                 source_spec_attrs)

            # Now need to link as main!
            for dset in [
                    h5_pos_inds, h5_pos_vals, h5_source_spec_inds,
                    h5_source_spec_vals
            ]:
                h5_source_main.attrs[dset.name.split('/')[-1]] = dset.ref

            n_dim, success, labels = hdf_utils.reshape_to_n_dims(
                h5_source_main, get_labels=True, sort_dims=True, lazy=False)
            self.assertTrue(
                np.all([
                    x == y for x, y in zip(labels, ['Y', 'X', 'Bias', 'Cycle'])
                ]))
            expected_n_dim = np.reshape(
                source_main_data,
                (num_rows, num_cols, num_cycles, num_cycle_pts))
            expected_n_dim = np.transpose(expected_n_dim, [1, 0, 3, 2])
            self.assertTrue(np.allclose(expected_n_dim, n_dim))

        os.remove(file_path)
Пример #13
0
    def test(self, rearrange_clusters=True, override=False):
        """
        Clusters the hdf5 dataset and calculates mean response for each cluster. This function does NOT write results to
        the hdf5 file. Call :meth:`~pycroscopy.processing.Cluster.compute()` to  write to the file.
        Handles complex, compound datasets such that the
        mean response vector for each cluster matrix is of the same data-type as the input matrix.

        Parameters
        ----------
        rearrange_clusters : bool, optional. Default = True
            Whether or not the clusters should be re-ordered by relative distances between the mean response
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        labels : :class:`numpy.ndarray`
            1D unsigned integer array containing the cluster labels as obtained from the fit
        mean_response : :class:`numpy.ndarray`
            2D array containing the mean response for each cluster arranged as [cluster number, response]
        """
        if not override:
            if isinstance(self.duplicate_h5_groups,
                          list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(
                    self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return np.squeeze(reshape_to_n_dims(self.h5_results_grp['Labels'])[0]), \
                       reshape_to_n_dims(self.h5_results_grp['Mean_Response'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        print('Performing clustering on {}.'.format(self.h5_main.name))
        # perform fit on the real dataset
        results = self.estimator.fit(
            self.data_transform_func(self.h5_main[self.data_slice]))

        print('Took {} to compute {}'.format(format_time(time.time() - t1),
                                             self.method_name))

        t1 = time.time()
        self.__mean_resp = self._get_mean_response(results.labels_)
        print('Took {} to calculate mean response per cluster'.format(
            format_time(time.time() - t1)))

        self.__labels = results.labels_
        if rearrange_clusters:
            self.__labels, self.__mean_resp = reorder_clusters(
                results.labels_, self.__mean_resp, self.data_transform_func)

        # TODO: What if test() is called repeatedly?
        labels_mat, success = reshape_to_n_dims(
            np.expand_dims(np.squeeze(self.__labels), axis=1),
            h5_pos=self.h5_main.h5_pos_inds,
            h5_spec=np.expand_dims([0], axis=0))
        if not success:
            raise ValueError(
                'Could not reshape labels to N-Dimensional dataset! Error:' +
                success)

        centroid_mat, success = reshape_to_n_dims(
            self.__mean_resp,
            h5_spec=self.h5_main.h5_spec_inds[:, :self.num_comps],
            h5_pos=np.expand_dims(np.arange(self.__mean_resp.shape[0]),
                                  axis=1))

        if not success:
            raise ValueError(
                'Could not reshape mean response to N-Dimensional dataset! Error:'
                + success)

        return np.squeeze(labels_mat), centroid_mat
Пример #14
0
    def _read_data_chunk(self):
        """
        Returns the next chunk of data for the guess or the fit
        """

        # The Process class should take care of all the basic reading
        super(BELoopProjector, self)._read_data_chunk()

        if self.data is None:
            # Nothing we can do at this point
            return

        if self.verbose and self.mpi_rank == 0:
            print('BELoopProjector got raw data of shape {} from super'
                  '.'.format(self.data.shape))
        """
        Now self.data contains data for N pixels. 
        The challenge is that this may contain M FORC cycles 
        Each FORC cycle needs its own V DC vector
        So, we can't blindly use the inherited unit_compute. 
        Our variables now are Position, Vdc, FORC, all others

        We want M lists of [VDC x all other variables]

        The challenge is that VDC and FORC are inner dimensions - 
        neither the fastest nor the slowest (guaranteed)
        """

        spec_dim_order_s2f = get_sort_order(self.h5_main.h5_spec_inds)[::-1]

        # order_to_s2f = list(pos_dim_order_s2f) + list( len(pos_dim_order_s2f) + spec_dim_order_s2f)
        order_to_s2f = [0] + list(1 + spec_dim_order_s2f)
        print('Order for reshaping to S2F: {}'.format(order_to_s2f))

        self._dim_labels_s2f = list(['Positions']) + list(
            np.array(self.h5_main.spec_dim_labels)[spec_dim_order_s2f])

        print(self._dim_labels_s2f, order_to_s2f)

        self._num_forcs = int(
            any([
                targ in self.h5_main.spec_dim_labels
                for targ in ['FORC', 'FORC_Cycle']
            ]))
        if self._num_forcs:
            forc_pos = self.h5_main.spec_dim_labels.index(self._forc_dim_name)
            self._num_forcs = self.h5_main.spec_dim_sizes[forc_pos]
        print('Num FORCS: {}'.format(self._num_forcs))

        all_but_forc_rows = []
        for ind, dim_name in enumerate(self.h5_main.spec_dim_labels):
            if dim_name not in ['FORC', 'FORC_Cycle', 'FORC_repeat']:
                all_but_forc_rows.append(ind)
        print('All but FORC rows: {}'.format(all_but_forc_rows))

        dc_mats = []

        forc_mats = []

        num_reps = 1 if self._num_forcs == 0 else self._num_forcs
        for forc_ind in range(num_reps):
            print('')
            print('Working on FORC #{}'.format(forc_ind))
            if self._num_forcs:
                this_forc_spec_inds = \
                np.where(self.h5_main.h5_spec_inds[forc_pos] == forc_ind)[0]
            else:
                this_forc_spec_inds = np.ones(
                    shape=self.h5_main.h5_spec_inds.shape[1], dtype=np.bool)

            if self._num_forcs:
                this_forc_dc_vec = get_unit_values(
                    self.h5_main.h5_spec_inds[all_but_forc_rows]
                    [:, this_forc_spec_inds],
                    self.h5_main.h5_spec_vals[all_but_forc_rows]
                    [:, this_forc_spec_inds],
                    all_dim_names=list(
                        np.array(
                            self.h5_main.spec_dim_labels)[all_but_forc_rows]),
                    dim_names=self._fit_dim_name)
            else:
                this_forc_dc_vec = get_unit_values(
                    self.h5_main.h5_spec_inds,
                    self.h5_main.h5_spec_vals,
                    dim_names=self._fit_dim_name)
            this_forc_dc_vec = this_forc_dc_vec[self._fit_dim_name]
            dc_mats.append(this_forc_dc_vec)

            this_forc_2d = self.h5_main[:, this_forc_spec_inds]
            print('2D slice shape for this FORC: {}'.format(
                this_forc_2d.shape))
            """
            this_forc_nd, success = reshape_to_n_dims(this_forc_2d, 
                                                      h5_pos=self.h5_main.h5_pos_inds[:,:], # THis line will need to change
                                                      h5_spec=self.h5_main.h5_spec_inds[:, this_forc_spec_inds])
            """
            this_forc_nd, success = reshape_to_n_dims(
                this_forc_2d,
                h5_pos=None,
                # THis line will need to change
                h5_spec=self.h5_main.h5_spec_inds[:, this_forc_spec_inds])
            print(this_forc_nd.shape)

            this_forc_nd_s2f = this_forc_nd.transpose(
                order_to_s2f).squeeze()  # squeeze out FORC
            dim_names_s2f = self._dim_labels_s2f.copy()
            if self._num_forcs > 0:
                dim_names_s2f.remove(
                    self._forc_dim_name
                )  # because it was never there in the first place.
            print('Reordered to S2F: {}, {}'.format(this_forc_nd_s2f.shape,
                                                    dim_names_s2f))

            rest_dc_order = list(range(len(dim_names_s2f)))
            _dc_ind = dim_names_s2f.index(self._fit_dim_name)
            rest_dc_order.remove(_dc_ind)
            rest_dc_order = rest_dc_order + [_dc_ind]
            print('Transpose for reordering to rest, DC: {}'.format(
                rest_dc_order))

            rest_dc_nd = this_forc_nd_s2f.transpose(rest_dc_order)
            rest_dc_names = list(np.array(dim_names_s2f)[rest_dc_order])

            self._pre_flattening_shape = list(rest_dc_nd.shape)
            self._pre_flattening_dim_name_order = list(rest_dc_names)

            print('After reodering: {}, {}'.format(rest_dc_nd.shape,
                                                   rest_dc_names))

            dc_rest_2d = rest_dc_nd.reshape(np.prod(rest_dc_nd.shape[:-1]),
                                            np.prod(rest_dc_nd.shape[-1]))
            print('Shape after flattening to 2D: {}'.format(dc_rest_2d.shape))
            forc_mats.append(dc_rest_2d)

            self.data = forc_mats, dc_mats
Пример #15
0
    def test(self, rearrange_clusters=True, override=False):
        """
        Clusters the hdf5 dataset and calculates mean response for each cluster. This function does NOT write results to
        the hdf5 file. Call :meth:`~pycroscopy.processing.Cluster.compute()` to  write to the file.
        Handles complex, compound datasets such that the
        mean response vector for each cluster matrix is of the same data-type as the input matrix.

        Parameters
        ----------
        rearrange_clusters : bool, optional. Default = True
            Whether or not the clusters should be re-ordered by relative distances between the mean response
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        labels : :class:`numpy.ndarray`
            1D unsigned integer array containing the cluster labels as obtained from the fit
        mean_response : :class:`numpy.ndarray`
            2D array containing the mean response for each cluster arranged as [cluster number, response]
        """
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return np.squeeze(reshape_to_n_dims(self.h5_results_grp['Labels'])[0]), \
                       reshape_to_n_dims(self.h5_results_grp['Mean_Response'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        print('Performing clustering on {}.'.format(self.h5_main.name))
        # perform fit on the real dataset
        results = self.estimator.fit(self.data_transform_func(self.h5_main[self.data_slice]))

        print('Took {} to compute {}'.format(format_time(time.time() - t1), self.method_name))

        t1 = time.time()
        self.__mean_resp = self._get_mean_response(results.labels_)
        print('Took {} to calculate mean response per cluster'.format(format_time(time.time() - t1)))

        self.__labels = results.labels_
        if rearrange_clusters:
            self.__labels, self.__mean_resp = reorder_clusters(results.labels_, self.__mean_resp,
                                                               self.data_transform_func)

        # TODO: What if test() is called repeatedly?
        labels_mat, success = reshape_to_n_dims(np.expand_dims(np.squeeze(self.__labels), axis=1),
                                                h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims([0], axis=0))
        if not success:
            raise ValueError('Could not reshape labels to N-Dimensional dataset! Error:' + success)

        centroid_mat, success = reshape_to_n_dims(self.__mean_resp,
                                                  h5_spec=self.h5_main.h5_spec_inds[:, :self.num_comps],
                                                  h5_pos=np.expand_dims(np.arange(self.__mean_resp.shape[0]), axis=1))

        if not success:
            raise ValueError('Could not reshape mean response to N-Dimensional dataset! Error:' + success)

        return np.squeeze(labels_mat), centroid_mat
Пример #16
0
    def _get_dc_offsets(h5_spec_inds,
                        h5_spec_vals,
                        fit_dim_name,
                        forc_dim_name,
                        verbose=False):
        # FORC is the decider whether or not DC_Offset changes.
        # FORC_Repeats etc. should not matter
        spec_unit_vals = get_unit_values(h5_spec_inds,
                                         h5_spec_vals,
                                         verbose=False)
        if forc_dim_name not in spec_unit_vals.keys():
            if verbose:
                print(
                    'This is not a FORC dataset. Just taking unit values for DC Offset'
                )
            dc_val_mat = np.expand_dims(spec_unit_vals[fit_dim_name], axis=0)
        else:
            # Reshape the Spec values matrix into an N dimensional array
            if verbose:
                print(
                    'This is a FORC dataset. Reshaping Spectroscopic Values to N dimensions'
                )
            ret_vals = reshape_to_n_dims(h5_spec_vals,
                                         np.expand_dims(np.arange(
                                             h5_spec_vals.shape[0]),
                                                        axis=1),
                                         h5_spec_inds,
                                         get_labels=True)
            spec_vals_nd, success, spec_nd_labels = ret_vals

            if success != True:
                raise ValueError(
                    'Unable to reshape Spectroscopic values to get DC offsets for each FORC'
                )

            # We will be using "in" quite a bit. So convert to list
            spec_nd_labels = list(spec_nd_labels)

            if verbose:
                print('Reshaped Spectroscopic Values to: {}'.format(
                    spec_vals_nd.shape))
                print(
                    'Spectroscopic dimension names: {}'.format(spec_nd_labels))

            # Note the indices of all other dimensions
            all_other_dims = set(range(len(spec_nd_labels))) - \
                             set([spec_nd_labels.index(fit_dim_name),
                                  spec_nd_labels.index(forc_dim_name)])
            # Set up a new order where FORC is at 0 and DC is at 1 and all
            # other dimensions (useless) follow
            new_order = [
                spec_nd_labels.index(forc_dim_name),
                spec_nd_labels.index(fit_dim_name)
            ] + list(all_other_dims)
            if verbose:
                print('Will transpose this N-dim matrix as: {}'.format(
                    new_order))

            # Apply this new order to the matrix and the labels
            spec_vals_nd = spec_vals_nd.transpose(new_order)
            spec_nd_labels = np.array(spec_nd_labels)[new_order]
            if verbose:
                print('After transpose shape and names:\n\t{}\n\t{}'.format(
                    spec_vals_nd.shape, spec_nd_labels))

            # Now remove all other dimensions using a list of slices:
            keep_list = [slice(None), slice(None)
                         ] + [slice(0, 1) for _ in range(len(all_other_dims))]
            # Don't forget to remove singular dimensions using squeeze
            dc_val_mat = spec_vals_nd[keep_list].squeeze()
            # Unnecessary but let's keep track of dimension names anyway
            spec_nd_labels = spec_nd_labels[:2]
            if verbose:
                print(
                    'After removing all other dimensions. Shape is: {} and dimensions are: {}'
                    .format(dc_val_mat.shape, spec_nd_labels))

        return dc_val_mat