def test_h5_no_sort_reqd(self): with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: h5_main = h5_f['/Raw_Measurement/source_main'] num_rows = 3 num_cols = 5 num_cycles = 2 num_cycle_pts = 7 n_dim, success, labels = hdf_utils.reshape_to_n_dims( h5_main, get_labels=True, sort_dims=False, lazy=False) self.assertTrue( np.all([ x == y for x, y in zip(labels, ['X', 'Y', 'Bias', 'Cycle']) ])) expected_n_dim = np.reshape( h5_main[()], (num_rows, num_cols, num_cycles, num_cycle_pts)) expected_n_dim = np.transpose(expected_n_dim, (1, 0, 3, 2)) self.assertTrue(np.allclose(expected_n_dim, n_dim)) n_dim, success, labels = hdf_utils.reshape_to_n_dims( h5_main, get_labels=True, sort_dims=True, lazy=False) self.assertTrue( np.all([ x == y for x, y in zip(labels, ['X', 'Y', 'Bias', 'Cycle']) ])) expected_n_dim = np.reshape( h5_main[()], (num_rows, num_cols, num_cycles, num_cycle_pts)) expected_n_dim = np.transpose(expected_n_dim, (1, 0, 3, 2)) self.assertTrue(np.allclose(expected_n_dim, n_dim))
def test(self, override=False): """ Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the input matrix. Parameters ---------- override : bool, optional. default = False Set to true to recompute results if prior results are available. Else, returns existing results Returns ------- U : :class:`numpy.ndarray` Abundance matrix S : :class:`numpy.ndarray` variance vector V : :class:`numpy.ndarray` eigenvector matrix """ ''' Check if a number of compnents has been set and ensure that the number is less than the minimum axis length of the data. If both conditions are met, use fsvd. If not use the regular svd. C.Smith -- We might need to put a lower limit on num_comps in the future. I don't know enough about svd to be sure. ''' if not override: if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0: self.h5_results_grp = self.duplicate_h5_groups[-1] print('Returning previously computed results from: {}'.format(self.h5_results_grp.name)) print('set the "override" flag to True to recompute results') return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \ reshape_to_n_dims(self.h5_results_grp['V'])[0] self.h5_results_grp = None t1 = time.time() self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components, n_iter=3) self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype) print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1))) u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0)) if not success: raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success) v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1), h5_spec=self.h5_main.h5_spec_inds) if not success: raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success) return u_mat, self.__s, v_mat
def test(self, override=False): """ Decomposes the hdf5 dataset to calculate the components and projection. This function does NOT write results to the hdf5 file. Call :meth:`~pycroscopy.processing.Decomposition.compute()` to write to the file. Handles complex, compound datasets such that the components are of the same data-type as the input matrix. Parameters ---------- override : bool, optional. default = False Set to true to recompute results if prior results are available. Else, returns existing results Returns ------- components : :class:`numpy.ndarray` Components projections : :class:`numpy.ndarray` Projections """ if not override: if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0: self.h5_results_grp = self.duplicate_h5_groups[-1] print('Returning previously computed results from: {}'.format(self.h5_results_grp.name)) print('set the "override" flag to True to recompute results') return USIDataset(self.h5_results_grp['Components']).get_n_dim_form(), \ USIDataset(self.h5_results_grp['Projection']).get_n_dim_form() self.h5_results_grp = None print('Performing Decomposition on {}.'.format(self.h5_main.name)) t0 = time.time() self._fit() self._transform() print('Took {} to compute {}'.format(format_time(time.time() - t0), self.method_name)) self.__components = stack_real_to_target_dtype(self.estimator.components_, self.h5_main.dtype) projection_mat, success = reshape_to_n_dims(self.__projection, h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims(np.arange(self.__projection.shape[1]), axis=0)) if not success: raise ValueError('Could not reshape projections to N-Dimensional dataset! Error:' + success) components_mat, success = reshape_to_n_dims(self.__components, h5_spec=self.h5_main.h5_spec_inds, h5_pos=np.expand_dims(np.arange(self.__components.shape[0]), axis=1)) if not success: raise ValueError('Could not reshape components to N-Dimensional dataset! Error:' + success) return components_mat, projection_mat
def test_numpy(self): num_rows = 3 num_cols = 5 num_cycles = 2 num_cycle_pts = 7 # arrange as slow, fast instead of fast, slow source_pos_data = np.vstack( (np.repeat(np.arange(num_rows), num_cols), np.tile(np.arange(num_cols), num_rows))).T source_main_data = np.zeros(shape=(num_rows * num_cols, num_cycle_pts * num_cycles), dtype=np.float16) for row_ind in range(num_rows): for col_ind in range(num_cols): for cycle_ind in range(num_cycles): for bias_ind in range(num_cycle_pts): val = 1E+3 * row_ind + 1E+2 * col_ind + 1E+1 * cycle_ind + bias_ind source_main_data[row_ind * num_cols + col_ind, cycle_ind * num_cycle_pts + bias_ind] = val # make spectroscopic slow, fast instead of fast, slow source_spec_data = np.vstack( (np.repeat(np.arange(num_cycles), num_cycle_pts), np.tile(np.arange(num_cycle_pts), num_cycles))) n_dim, success = hdf_utils.reshape_to_n_dims(source_main_data, h5_pos=source_pos_data, h5_spec=source_spec_data, get_labels=False, lazy=False) expected_n_dim = np.reshape( source_main_data, (num_rows, num_cols, num_cycles, num_cycle_pts)) self.assertTrue(np.allclose(expected_n_dim, n_dim))
def _reshape_sho_matrix(self, raw_2d): """ Reshapes the raw 2D SHO matrix (as read from the file) to 2D array arranged as [instance x points for a single loop] Parameters ---------- raw_2d : 2D compound numpy array Raw SHO fitted data arranged as [position, data for a single FORC cycle] Returns ------- loops_2d : 2D numpy compound array SHO fitted data arranged as [instance or position x dc voltage steps] order_dc_offset_reverse : tuple Order in which the N dimensional data should be transposed to return it to the same format as the input data of this function nd_mat_shape_dc_first : 1D numpy unsigned int array Shape of the N dimensional array that the loops_2d can be turned into. Use the order_dc_offset_reverse after this reshape """ # step 4: reshape to N dimensions fit_nd, success = reshape_to_n_dims(raw_2d, h5_pos=None, h5_spec=self._sho_spec_inds[self._sho_all_but_forc_inds, self._current_sho_spec_slice], verbose=self._verbose) if not success: warn('Error - could not reshape provided raw data chunk...') return None dim_names_orig = np.hstack(('Positions', np.array(self.h5_main.spec_dim_labels)[self._sho_all_but_forc_inds])) if self._verbose: print('Shape of N dimensional dataset:', fit_nd.shape) print('Dimensions of order:', dim_names_orig) # step 5: Move the voltage dimension to the first dim order_dc_outside_nd = [self._fit_offset_index] + list(range(self._fit_offset_index)) + \ list(range(self._fit_offset_index + 1, len(fit_nd.shape))) order_dc_offset_reverse = list(range(1, self._fit_offset_index + 1)) + [0] + \ list(range(self._fit_offset_index + 1, len(fit_nd.shape))) fit_nd2 = np.transpose(fit_nd, tuple(order_dc_outside_nd)) dim_names_dc_out = dim_names_orig[order_dc_outside_nd] if self._verbose: print('originally:', fit_nd.shape, ', after moving DC offset outside:', fit_nd2.shape) print('new dim names:', dim_names_dc_out) # step 6: reshape the ND data to 2D arrays loops_2d = np.reshape(fit_nd2, (fit_nd2.shape[0], -1)) if self._verbose: print('Loops ready to be projected of shape (Vdc, all other dims besides FORC):', loops_2d.shape) return loops_2d, order_dc_offset_reverse, fit_nd2.shape
def _get_dc_offset(self): """ Gets the DC offset for the current FORC step Parameters ---------- verbose : boolean (optional) Whether or not to print debugging statements Returns ------- dc_vec : 1D float numpy array DC offsets for the current FORC step """ # apply this knowledge to reshape the spectroscopic values # remember to reshape such that the dimensions are arranged in reverse order (slow to fast) spec_vals_nd, success = reshape_to_n_dims(self._sho_spec_vals[self._sho_all_but_forc_inds, self._current_sho_spec_slice], h5_spec=self._sho_spec_inds[self._sho_all_but_forc_inds, self._current_sho_spec_slice]) # This should result in a N+1 dimensional matrix where the first index contains the actual data # the other dimensions are present to easily slice the data spec_labels_sorted = np.hstack(('Dim', self.h5_main.spec_dim_labels)) if self._verbose: print('Spectroscopic dimensions sorted by rate of change:') print(spec_labels_sorted) # slice the N dimensional dataset such that we only get the DC offset for default values of other dims fit_dim_pos = np.argwhere(spec_labels_sorted == self._fit_dim_name)[0][0] # fit_dim_slice = list() # for dim_ind in range(spec_labels_sorted.size): # if dim_ind == fit_dim_pos: # fit_dim_slice.append(slice(None)) # else: # fit_dim_slice.append(slice(0, 1)) fit_dim_slice = [fit_dim_pos] for idim, dim in enumerate(spec_labels_sorted[1:]): if dim == self._fit_dim_name: fit_dim_slice.append(slice(None)) fit_dim_slice[0] = idim elif dim in ['FORC', 'FORC_repeat', 'FORC_Cycle']: continue else: fit_dim_slice.append(slice(0, 1)) if self._verbose: print('slice to extract Vdc:') print(fit_dim_slice) self.fit_dim_vec = np.squeeze(spec_vals_nd[tuple(fit_dim_slice)]) return
def test_sorted_and_unsorted(self): with h5py.File(test_h5_file_path, mode='r') as h5_f: usi_dset = USIDataset(h5_f['/Raw_Measurement/source_main']) nd_slow_to_fast, nd_fast_to_slow = self.get_expected_n_dim(h5_f) actual_f2s = usi_dset.get_n_dim_form(lazy=False) self.assertTrue(np.allclose(nd_fast_to_slow, actual_f2s)) nd_form, success = hdf_utils.reshape_to_n_dims(usi_dset, sort_dims=True) print(nd_form.shape) usi_dset.toggle_sorting() actual_s2f = usi_dset.get_n_dim_form(lazy=False) self.assertTrue(np.allclose(nd_slow_to_fast, actual_s2f))
def test_h5_not_main_dset(self): with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: h5_main = h5_f['/Raw_Measurement/Ancillary'] h5_pos = h5_f['/Raw_Measurement/Position_Indices'] h5_spec = h5_f['/Raw_Measurement/Spectroscopic_Indices'] # Not main with self.assertRaises(ValueError): _ = hdf_utils.reshape_to_n_dims(h5_main) # Not main and not helping that we are supplign incompatible ancillary datasets with self.assertRaises(ValueError): _ = hdf_utils.reshape_to_n_dims(h5_main, h5_pos=h5_pos, h5_spec=h5_spec) # main but we are supplign incompatible ancillary datasets h5_main = h5_f[ '/Raw_Measurement/source_main-Fitter_000/results_main'] with self.assertRaises(ValueError): _ = hdf_utils.reshape_to_n_dims(h5_main, h5_pos=h5_pos, h5_spec=h5_spec)
def reshape_sho_chunk_to_nd(data_2d, raw_dim_labels, h5_pos_inds, h5_spec_inds, verbose=False): ret_vals = reshape_to_n_dims(data_2d, h5_pos_inds[:data_2d.shape[0]], h5_spec_inds) data_nd_auto, success = ret_vals if success != True: raise ValueError( 'Unable to reshape data chunk of shape {} to N dimensions'. format(data_2d.shape)) if verbose: print('Reshaped raw data from: {} to {}'.format( data_2d.shape, data_nd_auto.shape)) # By default it is fast to slow! pos_sort = get_sort_order(h5_pos_inds)[::-1] spec_sort = get_sort_order(h5_spec_inds)[::-1] swap_order = list(pos_sort) + list(len(pos_sort) + spec_sort) if verbose: print( 'Dimensions will be permuted as {} to arrange them from slowest to fastest' .format(swap_order)) data_nd_s2f = data_nd_auto.transpose(swap_order) dim_labels_s2f = np.array(raw_dim_labels)[swap_order] if verbose: print( 'After rearranging array is of shape: {}, dimensions are ordered as: {}' .format(data_nd_s2f.shape, dim_labels_s2f)) return data_nd_s2f, dim_labels_s2f
def test_sort_required(self): file_path = 'reshape_to_n_dim_sort_required.h5' data_utils.delete_existing_file(file_path) with h5py.File(file_path) as h5_f: h5_raw_grp = h5_f.create_group('Raw_Measurement') num_rows = 3 num_cols = 5 num_cycles = 2 num_cycle_pts = 7 source_dset_name = 'source_main' # arrange as slow, fast instead of fast, slow source_pos_data = np.vstack( (np.repeat(np.arange(num_rows), num_cols), np.tile(np.arange(num_cols), num_rows))).T pos_attrs = {'units': ['nm', 'um'], 'labels': ['X', 'Y']} h5_pos_inds = h5_raw_grp.create_dataset('Position_Indices', data=source_pos_data, dtype=np.uint16) data_utils.write_aux_reg_ref(h5_pos_inds, pos_attrs['labels'], is_spec=False) data_utils.write_string_list_as_attr(h5_pos_inds, pos_attrs) h5_pos_vals = h5_raw_grp.create_dataset('Position_Values', data=source_pos_data, dtype=np.float32) data_utils.write_aux_reg_ref(h5_pos_vals, pos_attrs['labels'], is_spec=False) data_utils.write_string_list_as_attr(h5_pos_vals, pos_attrs) source_main_data = np.zeros(shape=(num_rows * num_cols, num_cycle_pts * num_cycles), dtype=np.float16) for row_ind in range(num_rows): for col_ind in range(num_cols): for cycle_ind in range(num_cycles): for bias_ind in range(num_cycle_pts): val = 1E+3 * row_ind + 1E+2 * col_ind + 1E+1 * cycle_ind + bias_ind source_main_data[row_ind * num_cols + col_ind, cycle_ind * num_cycle_pts + bias_ind] = val # source_main_data = np.random.rand(num_rows * num_cols, num_cycle_pts * num_cycles) h5_source_main = h5_raw_grp.create_dataset(source_dset_name, data=source_main_data) data_utils.write_safe_attrs(h5_source_main, { 'units': 'A', 'quantity': 'Current' }) # make spectroscopic slow, fast instead of fast, slow source_spec_data = np.vstack( (np.repeat(np.arange(num_cycles), num_cycle_pts), np.tile(np.arange(num_cycle_pts), num_cycles))) source_spec_attrs = { 'units': ['', 'V'], 'labels': ['Cycle', 'Bias'] } h5_source_spec_inds = h5_raw_grp.create_dataset( 'Spectroscopic_Indices', data=source_spec_data, dtype=np.uint16) data_utils.write_aux_reg_ref(h5_source_spec_inds, source_spec_attrs['labels'], is_spec=True) data_utils.write_string_list_as_attr(h5_source_spec_inds, source_spec_attrs) h5_source_spec_vals = h5_raw_grp.create_dataset( 'Spectroscopic_Values', data=source_spec_data, dtype=np.float32) data_utils.write_aux_reg_ref(h5_source_spec_vals, source_spec_attrs['labels'], is_spec=True) data_utils.write_string_list_as_attr(h5_source_spec_vals, source_spec_attrs) # Now need to link as main! for dset in [ h5_pos_inds, h5_pos_vals, h5_source_spec_inds, h5_source_spec_vals ]: h5_source_main.attrs[dset.name.split('/')[-1]] = dset.ref n_dim, success, labels = hdf_utils.reshape_to_n_dims( h5_source_main, get_labels=True, sort_dims=True, lazy=False) self.assertTrue( np.all([ x == y for x, y in zip(labels, ['Y', 'X', 'Bias', 'Cycle']) ])) expected_n_dim = np.reshape( source_main_data, (num_rows, num_cols, num_cycles, num_cycle_pts)) expected_n_dim = np.transpose(expected_n_dim, [1, 0, 3, 2]) self.assertTrue(np.allclose(expected_n_dim, n_dim)) os.remove(file_path)
def test(self, rearrange_clusters=True, override=False): """ Clusters the hdf5 dataset and calculates mean response for each cluster. This function does NOT write results to the hdf5 file. Call :meth:`~pycroscopy.processing.Cluster.compute()` to write to the file. Handles complex, compound datasets such that the mean response vector for each cluster matrix is of the same data-type as the input matrix. Parameters ---------- rearrange_clusters : bool, optional. Default = True Whether or not the clusters should be re-ordered by relative distances between the mean response override : bool, optional. default = False Set to true to recompute results if prior results are available. Else, returns existing results Returns ------- labels : :class:`numpy.ndarray` 1D unsigned integer array containing the cluster labels as obtained from the fit mean_response : :class:`numpy.ndarray` 2D array containing the mean response for each cluster arranged as [cluster number, response] """ if not override: if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0: self.h5_results_grp = self.duplicate_h5_groups[-1] print('Returning previously computed results from: {}'.format( self.h5_results_grp.name)) print('set the "override" flag to True to recompute results') return np.squeeze(reshape_to_n_dims(self.h5_results_grp['Labels'])[0]), \ reshape_to_n_dims(self.h5_results_grp['Mean_Response'])[0] self.h5_results_grp = None t1 = time.time() print('Performing clustering on {}.'.format(self.h5_main.name)) # perform fit on the real dataset results = self.estimator.fit( self.data_transform_func(self.h5_main[self.data_slice])) print('Took {} to compute {}'.format(format_time(time.time() - t1), self.method_name)) t1 = time.time() self.__mean_resp = self._get_mean_response(results.labels_) print('Took {} to calculate mean response per cluster'.format( format_time(time.time() - t1))) self.__labels = results.labels_ if rearrange_clusters: self.__labels, self.__mean_resp = reorder_clusters( results.labels_, self.__mean_resp, self.data_transform_func) # TODO: What if test() is called repeatedly? labels_mat, success = reshape_to_n_dims( np.expand_dims(np.squeeze(self.__labels), axis=1), h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims([0], axis=0)) if not success: raise ValueError( 'Could not reshape labels to N-Dimensional dataset! Error:' + success) centroid_mat, success = reshape_to_n_dims( self.__mean_resp, h5_spec=self.h5_main.h5_spec_inds[:, :self.num_comps], h5_pos=np.expand_dims(np.arange(self.__mean_resp.shape[0]), axis=1)) if not success: raise ValueError( 'Could not reshape mean response to N-Dimensional dataset! Error:' + success) return np.squeeze(labels_mat), centroid_mat
def _read_data_chunk(self): """ Returns the next chunk of data for the guess or the fit """ # The Process class should take care of all the basic reading super(BELoopProjector, self)._read_data_chunk() if self.data is None: # Nothing we can do at this point return if self.verbose and self.mpi_rank == 0: print('BELoopProjector got raw data of shape {} from super' '.'.format(self.data.shape)) """ Now self.data contains data for N pixels. The challenge is that this may contain M FORC cycles Each FORC cycle needs its own V DC vector So, we can't blindly use the inherited unit_compute. Our variables now are Position, Vdc, FORC, all others We want M lists of [VDC x all other variables] The challenge is that VDC and FORC are inner dimensions - neither the fastest nor the slowest (guaranteed) """ spec_dim_order_s2f = get_sort_order(self.h5_main.h5_spec_inds)[::-1] # order_to_s2f = list(pos_dim_order_s2f) + list( len(pos_dim_order_s2f) + spec_dim_order_s2f) order_to_s2f = [0] + list(1 + spec_dim_order_s2f) print('Order for reshaping to S2F: {}'.format(order_to_s2f)) self._dim_labels_s2f = list(['Positions']) + list( np.array(self.h5_main.spec_dim_labels)[spec_dim_order_s2f]) print(self._dim_labels_s2f, order_to_s2f) self._num_forcs = int( any([ targ in self.h5_main.spec_dim_labels for targ in ['FORC', 'FORC_Cycle'] ])) if self._num_forcs: forc_pos = self.h5_main.spec_dim_labels.index(self._forc_dim_name) self._num_forcs = self.h5_main.spec_dim_sizes[forc_pos] print('Num FORCS: {}'.format(self._num_forcs)) all_but_forc_rows = [] for ind, dim_name in enumerate(self.h5_main.spec_dim_labels): if dim_name not in ['FORC', 'FORC_Cycle', 'FORC_repeat']: all_but_forc_rows.append(ind) print('All but FORC rows: {}'.format(all_but_forc_rows)) dc_mats = [] forc_mats = [] num_reps = 1 if self._num_forcs == 0 else self._num_forcs for forc_ind in range(num_reps): print('') print('Working on FORC #{}'.format(forc_ind)) if self._num_forcs: this_forc_spec_inds = \ np.where(self.h5_main.h5_spec_inds[forc_pos] == forc_ind)[0] else: this_forc_spec_inds = np.ones( shape=self.h5_main.h5_spec_inds.shape[1], dtype=np.bool) if self._num_forcs: this_forc_dc_vec = get_unit_values( self.h5_main.h5_spec_inds[all_but_forc_rows] [:, this_forc_spec_inds], self.h5_main.h5_spec_vals[all_but_forc_rows] [:, this_forc_spec_inds], all_dim_names=list( np.array( self.h5_main.spec_dim_labels)[all_but_forc_rows]), dim_names=self._fit_dim_name) else: this_forc_dc_vec = get_unit_values( self.h5_main.h5_spec_inds, self.h5_main.h5_spec_vals, dim_names=self._fit_dim_name) this_forc_dc_vec = this_forc_dc_vec[self._fit_dim_name] dc_mats.append(this_forc_dc_vec) this_forc_2d = self.h5_main[:, this_forc_spec_inds] print('2D slice shape for this FORC: {}'.format( this_forc_2d.shape)) """ this_forc_nd, success = reshape_to_n_dims(this_forc_2d, h5_pos=self.h5_main.h5_pos_inds[:,:], # THis line will need to change h5_spec=self.h5_main.h5_spec_inds[:, this_forc_spec_inds]) """ this_forc_nd, success = reshape_to_n_dims( this_forc_2d, h5_pos=None, # THis line will need to change h5_spec=self.h5_main.h5_spec_inds[:, this_forc_spec_inds]) print(this_forc_nd.shape) this_forc_nd_s2f = this_forc_nd.transpose( order_to_s2f).squeeze() # squeeze out FORC dim_names_s2f = self._dim_labels_s2f.copy() if self._num_forcs > 0: dim_names_s2f.remove( self._forc_dim_name ) # because it was never there in the first place. print('Reordered to S2F: {}, {}'.format(this_forc_nd_s2f.shape, dim_names_s2f)) rest_dc_order = list(range(len(dim_names_s2f))) _dc_ind = dim_names_s2f.index(self._fit_dim_name) rest_dc_order.remove(_dc_ind) rest_dc_order = rest_dc_order + [_dc_ind] print('Transpose for reordering to rest, DC: {}'.format( rest_dc_order)) rest_dc_nd = this_forc_nd_s2f.transpose(rest_dc_order) rest_dc_names = list(np.array(dim_names_s2f)[rest_dc_order]) self._pre_flattening_shape = list(rest_dc_nd.shape) self._pre_flattening_dim_name_order = list(rest_dc_names) print('After reodering: {}, {}'.format(rest_dc_nd.shape, rest_dc_names)) dc_rest_2d = rest_dc_nd.reshape(np.prod(rest_dc_nd.shape[:-1]), np.prod(rest_dc_nd.shape[-1])) print('Shape after flattening to 2D: {}'.format(dc_rest_2d.shape)) forc_mats.append(dc_rest_2d) self.data = forc_mats, dc_mats
def test(self, rearrange_clusters=True, override=False): """ Clusters the hdf5 dataset and calculates mean response for each cluster. This function does NOT write results to the hdf5 file. Call :meth:`~pycroscopy.processing.Cluster.compute()` to write to the file. Handles complex, compound datasets such that the mean response vector for each cluster matrix is of the same data-type as the input matrix. Parameters ---------- rearrange_clusters : bool, optional. Default = True Whether or not the clusters should be re-ordered by relative distances between the mean response override : bool, optional. default = False Set to true to recompute results if prior results are available. Else, returns existing results Returns ------- labels : :class:`numpy.ndarray` 1D unsigned integer array containing the cluster labels as obtained from the fit mean_response : :class:`numpy.ndarray` 2D array containing the mean response for each cluster arranged as [cluster number, response] """ if not override: if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0: self.h5_results_grp = self.duplicate_h5_groups[-1] print('Returning previously computed results from: {}'.format(self.h5_results_grp.name)) print('set the "override" flag to True to recompute results') return np.squeeze(reshape_to_n_dims(self.h5_results_grp['Labels'])[0]), \ reshape_to_n_dims(self.h5_results_grp['Mean_Response'])[0] self.h5_results_grp = None t1 = time.time() print('Performing clustering on {}.'.format(self.h5_main.name)) # perform fit on the real dataset results = self.estimator.fit(self.data_transform_func(self.h5_main[self.data_slice])) print('Took {} to compute {}'.format(format_time(time.time() - t1), self.method_name)) t1 = time.time() self.__mean_resp = self._get_mean_response(results.labels_) print('Took {} to calculate mean response per cluster'.format(format_time(time.time() - t1))) self.__labels = results.labels_ if rearrange_clusters: self.__labels, self.__mean_resp = reorder_clusters(results.labels_, self.__mean_resp, self.data_transform_func) # TODO: What if test() is called repeatedly? labels_mat, success = reshape_to_n_dims(np.expand_dims(np.squeeze(self.__labels), axis=1), h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims([0], axis=0)) if not success: raise ValueError('Could not reshape labels to N-Dimensional dataset! Error:' + success) centroid_mat, success = reshape_to_n_dims(self.__mean_resp, h5_spec=self.h5_main.h5_spec_inds[:, :self.num_comps], h5_pos=np.expand_dims(np.arange(self.__mean_resp.shape[0]), axis=1)) if not success: raise ValueError('Could not reshape mean response to N-Dimensional dataset! Error:' + success) return np.squeeze(labels_mat), centroid_mat
def _get_dc_offsets(h5_spec_inds, h5_spec_vals, fit_dim_name, forc_dim_name, verbose=False): # FORC is the decider whether or not DC_Offset changes. # FORC_Repeats etc. should not matter spec_unit_vals = get_unit_values(h5_spec_inds, h5_spec_vals, verbose=False) if forc_dim_name not in spec_unit_vals.keys(): if verbose: print( 'This is not a FORC dataset. Just taking unit values for DC Offset' ) dc_val_mat = np.expand_dims(spec_unit_vals[fit_dim_name], axis=0) else: # Reshape the Spec values matrix into an N dimensional array if verbose: print( 'This is a FORC dataset. Reshaping Spectroscopic Values to N dimensions' ) ret_vals = reshape_to_n_dims(h5_spec_vals, np.expand_dims(np.arange( h5_spec_vals.shape[0]), axis=1), h5_spec_inds, get_labels=True) spec_vals_nd, success, spec_nd_labels = ret_vals if success != True: raise ValueError( 'Unable to reshape Spectroscopic values to get DC offsets for each FORC' ) # We will be using "in" quite a bit. So convert to list spec_nd_labels = list(spec_nd_labels) if verbose: print('Reshaped Spectroscopic Values to: {}'.format( spec_vals_nd.shape)) print( 'Spectroscopic dimension names: {}'.format(spec_nd_labels)) # Note the indices of all other dimensions all_other_dims = set(range(len(spec_nd_labels))) - \ set([spec_nd_labels.index(fit_dim_name), spec_nd_labels.index(forc_dim_name)]) # Set up a new order where FORC is at 0 and DC is at 1 and all # other dimensions (useless) follow new_order = [ spec_nd_labels.index(forc_dim_name), spec_nd_labels.index(fit_dim_name) ] + list(all_other_dims) if verbose: print('Will transpose this N-dim matrix as: {}'.format( new_order)) # Apply this new order to the matrix and the labels spec_vals_nd = spec_vals_nd.transpose(new_order) spec_nd_labels = np.array(spec_nd_labels)[new_order] if verbose: print('After transpose shape and names:\n\t{}\n\t{}'.format( spec_vals_nd.shape, spec_nd_labels)) # Now remove all other dimensions using a list of slices: keep_list = [slice(None), slice(None) ] + [slice(0, 1) for _ in range(len(all_other_dims))] # Don't forget to remove singular dimensions using squeeze dc_val_mat = spec_vals_nd[keep_list].squeeze() # Unnecessary but let's keep track of dimension names anyway spec_nd_labels = spec_nd_labels[:2] if verbose: print( 'After removing all other dimensions. Shape is: {} and dimensions are: {}' .format(dc_val_mat.shape, spec_nd_labels)) return dc_val_mat