def transform(self, data, outliers=None): """ Applies the learned Gaussian mixture model and returns a labelled array. Args: data (NumPy Array): Material properties array of shape (height, width, n_properties) outliers (NumPy Array): Binary array indicating outliers of shape (height, width) Returns: labels (NumPy Array): The segmented array of shape (height, width). Each pixel receives a label corresponding to its segment. """ if self.gmm is None: logger.warning("Attempting to transform prior to fitting. You must call .fit() first.") return None h, w, c = data.shape data = self.get_pca_components(data) labels = self.gmm.predict(data) labels = np.reshape(labels, (h, w)) labels += 1 # all labels move up one if outliers is not None: labels *= 1 - outliers # outliers map to label 0 labels = slu.relabel(labels) return labels
def transform(self, data, outliers=None, pers_thresh=DEF_THRESH ): # NOTE need data as input to use as GMM Segmenter """ Applies threshold to the watershed graph and returns a labelled array. Args: data (NumPy Array): Material property array of shape (height, width). outliers (NumPy Array): Binary array indicating outliers of shape (height, width) pers_thresh (float): merging threshold Returns: (NumPy Array): The segmented array of shape (height, width). Each pixel receives a label corresponding to its segment. """ if self.pws is None: logger.warning( "Attempting to transform prior to fitting. You must call .fit() first." ) return None if self.normalize: data = SegmenterWatershed.normalize_data(data) if self.smooth: data = SegmenterWatershed.smooth_data(data) labels = self.pws.apply_threshold(pers_thresh) if outliers is not None: labels *= 1 - outliers # outliers map to label 0 which are borders between grains labels = slu.relabel(labels) return labels
def get_grains(labels): """ Segments classes labels into grain labels """ new_labels = measure.label(labels, connectivity=2, background=0) new_labels = slu.relabel(new_labels) return new_labels
def m2py_pipeline(dataframe, heightless, outlier_threshold, n_components, padding, embedding_dim, thresh, nonlinear, normalize, zscale, data_type, data_subtype, input_cmap): """ Wrapper function for m2py tools. Allows to include or exclude m2py tools in the order shown in the code. Args: dataframe - np.array(). 3D array of SPM data heightless - bool. if 'True', height channel is removed, according to channel labels in config.py outlier_threshold n_components - int. number specifying the number of Gaussian phase distributions to be identified in dataframe padding - int. number specifying the degree of closeness of neighbors to be included in GMM labelling embedding_dims - int. number specifying the number of principle components to use in PCA before GMM labelling thresh nonlinear - bool. if 'True', nonlinear properties are generated prior to analysis. Includes x^2, x^3, abs(x), and 1/x normalize zscale data_type data_subtype input_cmap Returns: outliers - np.array(). 2D array of outliers seg1_labels - np.array(). 2D array of GMM labels seg2_labels - np.array(). 2D array of clustering labels """ fill_zeros_flag = True remove_outliers_flag = True input_cmap = input_cmap # Apply frequency removal data = pre.apply_frequency_removal(dataframe, data_type) data_properties = config.data_info[data_type]['properties'] # Extract outliers # outliers = pre.extract_outliers(data, data_type, prop = 'Zscale', threshold = outlier_threshold, chip_size = 256, stride = 256) h, w, c = data.shape outliers = np.zeros((h, w)) for prop in data_properties: temp_outliers = pre.extract_outliers(data, data_type, prop, 2.5, int(h / 2), int(w / 2)) pre.show_outliers(data, data_type, prop, temp_outliers) outliers = np.logical_or(outliers, temp_outliers) no_outliers_data = pre.smooth_outliers_from_data(data, outliers) plt.imshow(outliers) plt.show() # Show a-priori distributions pre.show_property_distributions(data, data_type, outliers) c = data.shape[2] num_pca_components = min(embedding_dim, c) # Run GMM segmentation seg1 = seg_gmm.SegmenterGMM(n_components=n_components, embedding_dim=num_pca_components, padding=padding, nonlinear=nonlinear, normalize=normalize, zscale=zscale) ######################################## Heightless == True ########################################## if heightless == True: # Remove height property zscale_id = data_properties.index("Zscale") height_id = data_properties.index("Height") no_height_data = np.delete(no_outliers_data, [zscale_id, height_id], axis=2) seg1_labels = seg1.fit_transform(no_height_data, outliers) if fill_zeros_flag: seg1_labels = slu.fill_out_zeros(seg1_labels, outliers) elif remove_outliers_flag: seg1_labels = np.ma.masked_where(outliers == 1, seg1_labels) post.show_classification(seg1_labels, no_height_data, data_type) # post.show_classification_correlation(seg1_labels, no_height_data, data_type) # post.show_distributions_together(seg1_labels, no_height_data, data_type, input_cmap = 'jet') # post.show_grain_area_distribution(seg1_labels, data_type, data_subtype) # Overlay distributions on pixels probs = seg1.get_probabilities(no_height_data) post.show_overlaid_distribution(probs, no_height_data, data_type) if embedding_dim != None: h, w, c = no_height_data.shape pca_components = seg1.get_pca_components(no_height_data) pca_components = pca_components.reshape(h, w, num_pca_components) post.show_classification_correlation(seg1_labels, pca_components, data_type, title_flag=False) # post.show_classification_distributions(seg1_labels, pca_components, data_type, title_flag=False) else: pass ## Persistence Watershed Segmentation clustering if thresh != None: comp_labels = list(np.unique(seg1_labels)) if 0 in comp_labels: # Avoid outlier components / class comp_labels.remove(0) watershed_id = data_properties.index( "Adhesion") #Perhaps I should try Stiffness instead seg2 = seg_water.SegmenterWatershed() thresh = thresh summed_labels = np.zeros_like(data[:, :, 0], dtype=np.int64) for l in comp_labels: watershed_data = no_outliers_data[:, :, watershed_id] * ( seg1_labels == l) temp_labels = seg2.fit_transform(watershed_data, outliers, pers_thresh=thresh) # NOTE: no need to fill out zeros in this case # Instance (grains) segmentation of properties print(f"Watershed Segmentation of GMM component {l}") post.show_classification(temp_labels, no_outliers_data, data_type) # Add results from different components temp_labels += np.max( summed_labels ) # To distinguish labels from different components temp_labels *= (seg1_labels == l) summed_labels += temp_labels seg2_labels = slu.relabel(summed_labels) # Instance (grains) segmentation of properties print("Watershed Segmentation of combined GMM components") post.show_classification(seg2_labels, no_outliers_data, data_type) ## Conected-components clustering else: post_labels = seg1.get_grains(seg1_labels) seg2_labels = slu.get_significant_labels(post_labels, bg_contrast_flag=False) post.show_classification(seg2_labels, no_height_data, data_type, input_cmap=input_cmap) # post.show_grain_area_distribution(seg2_labels, data_type, data_subtype) ######################################## Heightless == False ########################################## elif heightless == False: seg1_labels = seg1.fit_transform(no_outliers_data, outliers) if fill_zeros_flag: seg1_labels = slu.fill_out_zeros(seg1_labels, outliers) elif remove_outliers_flag: seg1_labels = np.ma.masked_where(outliers == 1, seg1_labels) post.show_classification(seg1_labels, no_outliers_data, data_type) post.show_distributions_together(seg1_labels, no_outliers_data, data_type, input_cmap=input_cmap) post.show_classification(seg1_labels, no_outliers_data, data_type) post.show_classification_correlation(seg1_labels, no_outliers_data, data_type) post.show_distributions_together(seg1_labels, no_outliers_data, data_type, input_cmap=input_cmap) # Overlay distributions on pixels probs = seg1.get_probabilities(no_outliers_data) post.show_overlaid_distribution(probs, no_outliers_data, data_type) post.show_grain_area_distribution(seg1_labels, data_type, data_subtype) if embedding_dim != None: h, w, c = no_outliers_data.shape pca_components = seg1.get_pca_components(no_outliers_data) pca_components = pca_components.reshape(h, w, num_pca_components) post.show_classification_correlation(seg1_labels, pca_components, data_type, title_flag=False) post.show_classification_distributions(seg1_labels, pca_components, outliers, data_type, title_flag=False) else: pass ## Persistence Watershed Segmentation clustering if thresh != None: comp_labels = list(np.unique(seg1_labels)) if 0 in comp_labels: # Avoid outlier components / class comp_labels.remove(0) watershed_id = data_properties.index("Adhesion") seg2 = seg_water.SegmenterWatershed() thresh = thresh summed_labels = np.zeros_like(data[:, :, 0], dtype=np.int64) for l in comp_labels: watershed_data = no_outliers_data[:, :, watershed_id] * ( seg1_labels == l) # Why the '*'? Can this be heightless?? temp_labels = seg2.fit_transform(watershed_data, outliers, pers_thresh=thresh) # NOTE: no need to fill out zeros in this case # Instance (grains) segmentation of properties print(f"Watershed Segmentation of GMM component {l}") post.show_classification(temp_labels, no_outliers_data, data_type) # Add results from different components temp_labels += np.max( summed_labels ) # To distinguish labels from different components temp_labels *= (seg1_labels == l) summed_labels += temp_labels seg2_labels = slu.relabel(summed_labels) # Instance (grains) segmentation of properties print("Watershed Segmentation of combined GMM components") post.show_classification(seg2_labels, no_outliers_data, data_type) ## Conected-components clustering else: post_labels = seg1.get_grains(seg1_labels) seg2_labels = slu.get_significant_labels(post_labels, bg_contrast_flag=True) post.show_classification(seg2_labels, no_outliers_data, data_type, input_cmap=input_cmap) post.show_grain_area_distribution(seg2_labels, data_type, data_subtype) else: print('Error: Heightless flag incorrect') return outliers, seg1_labels, seg2_labels
seg1_dict = {} seg2_dict = {} for fl in files: if fl[-5] == '1': #Seg1 files seg1_fl_list.append(fl) elif fl[-5] == '2': #Seg2 files seg2_fl_list.append(fl) elif fl[-1] == 'v': #Morphology descriptor .csv files pass else: print(fl[:-4], ' is messed up') for k, fl in enumerate(seg1_fl_list): seg1_dict[k] = np.load(map_file_path+fl) for k, fl in enumerate(seg2_fl_list): seg2_dict[k] = np.load(map_file_path+fl) for i in range(len(seg1_dict)): domain_labels = slu.relabel(seg2_dict[i]) phase_labels = slu.relabel(seg1_dict[i]) domain_props = slu.all_domain_properties(phase_labels, domain_labels) outfile = map_file_path+seg1_fl_list[i][:-8]+'domain_metrics.csv' domain_props.to_csv(outfile) print ('finished file # ', i)