示例#1
0
    def transform(self, data, outliers=None):
        """
        Applies the learned Gaussian mixture model and
        returns a labelled array.

        Args:
            data (NumPy Array): Material properties array of shape (height, width, n_properties)
            outliers (NumPy Array): Binary array indicating outliers of shape (height, width)

        Returns:
            labels (NumPy Array): The segmented array of shape (height, width). Each pixel receives
                a label corresponding to its segment.
        """
        if self.gmm is None:
            logger.warning("Attempting to transform prior to fitting. You must call .fit() first.")
            return None

        h, w, c = data.shape
        data = self.get_pca_components(data)

        labels = self.gmm.predict(data)
        labels = np.reshape(labels, (h, w))
        labels += 1  # all labels move up one
        if outliers is not None:
            labels *= 1 - outliers  # outliers map to label 0

        labels = slu.relabel(labels)
        return labels
示例#2
0
    def transform(self,
                  data,
                  outliers=None,
                  pers_thresh=DEF_THRESH
                  ):  # NOTE need data as input to use as GMM Segmenter
        """
        Applies threshold to the watershed graph and returns a labelled array.

        Args:
            data (NumPy Array): Material property array of shape (height, width).
            outliers (NumPy Array): Binary array indicating outliers of shape (height, width)
            pers_thresh (float): merging threshold

        Returns:
            (NumPy Array): The segmented array of shape (height, width). Each pixel receives
                a label corresponding to its segment.
        """
        if self.pws is None:
            logger.warning(
                "Attempting to transform prior to fitting. You must call .fit() first."
            )
            return None

        if self.normalize:
            data = SegmenterWatershed.normalize_data(data)

        if self.smooth:
            data = SegmenterWatershed.smooth_data(data)

        labels = self.pws.apply_threshold(pers_thresh)
        if outliers is not None:
            labels *= 1 - outliers  # outliers map to label 0 which are borders between grains

        labels = slu.relabel(labels)
        return labels
示例#3
0
 def get_grains(labels):
     """ Segments classes labels into grain labels """
     new_labels = measure.label(labels, connectivity=2, background=0)
     new_labels = slu.relabel(new_labels)
     return new_labels
示例#4
0
def m2py_pipeline(dataframe, heightless, outlier_threshold, n_components,
                  padding, embedding_dim, thresh, nonlinear, normalize, zscale,
                  data_type, data_subtype, input_cmap):
    """
    Wrapper function for m2py tools. Allows to include or exclude m2py tools in the order shown in the code.
    
    Args:
    dataframe - np.array(). 3D array of SPM data
    heightless - bool. if 'True', height channel is removed, according to channel labels in config.py
    outlier_threshold
    n_components - int. number specifying the number of Gaussian phase distributions to be identified in dataframe
    padding - int. number specifying the degree of closeness of neighbors to be included in GMM labelling
    embedding_dims - int. number specifying the number of principle components to use in PCA before GMM labelling
    thresh
    nonlinear - bool. if 'True', nonlinear properties are generated prior to analysis. Includes x^2, x^3, abs(x),
                and 1/x
    normalize
    zscale
    data_type
    data_subtype
    input_cmap
                
    Returns:
    outliers - np.array(). 2D array of outliers
    seg1_labels - np.array(). 2D array of GMM labels
    seg2_labels - np.array(). 2D array of clustering labels
    """

    fill_zeros_flag = True
    remove_outliers_flag = True
    input_cmap = input_cmap

    # Apply frequency removal
    data = pre.apply_frequency_removal(dataframe, data_type)

    data_properties = config.data_info[data_type]['properties']

    # Extract outliers
    #     outliers = pre.extract_outliers(data, data_type, prop = 'Zscale', threshold = outlier_threshold, chip_size = 256, stride = 256)

    h, w, c = data.shape
    outliers = np.zeros((h, w))
    for prop in data_properties:
        temp_outliers = pre.extract_outliers(data, data_type, prop, 2.5,
                                             int(h / 2), int(w / 2))
        pre.show_outliers(data, data_type, prop, temp_outliers)

        outliers = np.logical_or(outliers, temp_outliers)

    no_outliers_data = pre.smooth_outliers_from_data(data, outliers)

    plt.imshow(outliers)
    plt.show()

    # Show a-priori distributions
    pre.show_property_distributions(data, data_type, outliers)

    c = data.shape[2]
    num_pca_components = min(embedding_dim, c)

    # Run GMM segmentation
    seg1 = seg_gmm.SegmenterGMM(n_components=n_components,
                                embedding_dim=num_pca_components,
                                padding=padding,
                                nonlinear=nonlinear,
                                normalize=normalize,
                                zscale=zscale)

    ######################################## Heightless == True ##########################################

    if heightless == True:
        # Remove height property
        zscale_id = data_properties.index("Zscale")
        height_id = data_properties.index("Height")

        no_height_data = np.delete(no_outliers_data, [zscale_id, height_id],
                                   axis=2)

        seg1_labels = seg1.fit_transform(no_height_data, outliers)

        if fill_zeros_flag:
            seg1_labels = slu.fill_out_zeros(seg1_labels, outliers)

        elif remove_outliers_flag:
            seg1_labels = np.ma.masked_where(outliers == 1, seg1_labels)

        post.show_classification(seg1_labels, no_height_data, data_type)
        #        post.show_classification_correlation(seg1_labels, no_height_data, data_type)
        #        post.show_distributions_together(seg1_labels, no_height_data, data_type, input_cmap = 'jet')
        #        post.show_grain_area_distribution(seg1_labels, data_type, data_subtype)

        # Overlay distributions on pixels
        probs = seg1.get_probabilities(no_height_data)
        post.show_overlaid_distribution(probs, no_height_data, data_type)

        if embedding_dim != None:
            h, w, c = no_height_data.shape

            pca_components = seg1.get_pca_components(no_height_data)
            pca_components = pca_components.reshape(h, w, num_pca_components)
            post.show_classification_correlation(seg1_labels,
                                                 pca_components,
                                                 data_type,
                                                 title_flag=False)

#            post.show_classification_distributions(seg1_labels, pca_components, data_type, title_flag=False)

        else:
            pass

## Persistence Watershed Segmentation clustering

        if thresh != None:

            comp_labels = list(np.unique(seg1_labels))
            if 0 in comp_labels:  # Avoid outlier components / class
                comp_labels.remove(0)

            watershed_id = data_properties.index(
                "Adhesion")  #Perhaps I should try Stiffness instead

            seg2 = seg_water.SegmenterWatershed()
            thresh = thresh

            summed_labels = np.zeros_like(data[:, :, 0], dtype=np.int64)
            for l in comp_labels:
                watershed_data = no_outliers_data[:, :, watershed_id] * (
                    seg1_labels == l)
                temp_labels = seg2.fit_transform(watershed_data,
                                                 outliers,
                                                 pers_thresh=thresh)

                # NOTE: no need to fill out zeros in this case

                # Instance (grains) segmentation of properties
                print(f"Watershed Segmentation of GMM component {l}")
                post.show_classification(temp_labels, no_outliers_data,
                                         data_type)

                # Add results from different components
                temp_labels += np.max(
                    summed_labels
                )  # To distinguish labels from different components
                temp_labels *= (seg1_labels == l)
                summed_labels += temp_labels

            seg2_labels = slu.relabel(summed_labels)

            # Instance (grains) segmentation of properties
            print("Watershed Segmentation of combined GMM components")
            post.show_classification(seg2_labels, no_outliers_data, data_type)

## Conected-components clustering
        else:
            post_labels = seg1.get_grains(seg1_labels)
            seg2_labels = slu.get_significant_labels(post_labels,
                                                     bg_contrast_flag=False)

        post.show_classification(seg2_labels,
                                 no_height_data,
                                 data_type,
                                 input_cmap=input_cmap)
#        post.show_grain_area_distribution(seg2_labels, data_type, data_subtype)

######################################## Heightless == False ##########################################

    elif heightless == False:

        seg1_labels = seg1.fit_transform(no_outliers_data, outliers)

        if fill_zeros_flag:
            seg1_labels = slu.fill_out_zeros(seg1_labels, outliers)

        elif remove_outliers_flag:
            seg1_labels = np.ma.masked_where(outliers == 1, seg1_labels)

        post.show_classification(seg1_labels, no_outliers_data, data_type)
        post.show_distributions_together(seg1_labels,
                                         no_outliers_data,
                                         data_type,
                                         input_cmap=input_cmap)

        post.show_classification(seg1_labels, no_outliers_data, data_type)
        post.show_classification_correlation(seg1_labels, no_outliers_data,
                                             data_type)
        post.show_distributions_together(seg1_labels,
                                         no_outliers_data,
                                         data_type,
                                         input_cmap=input_cmap)

        # Overlay distributions on pixels
        probs = seg1.get_probabilities(no_outliers_data)
        post.show_overlaid_distribution(probs, no_outliers_data, data_type)
        post.show_grain_area_distribution(seg1_labels, data_type, data_subtype)

        if embedding_dim != None:
            h, w, c = no_outliers_data.shape

            pca_components = seg1.get_pca_components(no_outliers_data)
            pca_components = pca_components.reshape(h, w, num_pca_components)
            post.show_classification_correlation(seg1_labels,
                                                 pca_components,
                                                 data_type,
                                                 title_flag=False)

            post.show_classification_distributions(seg1_labels,
                                                   pca_components,
                                                   outliers,
                                                   data_type,
                                                   title_flag=False)

        else:
            pass

## Persistence Watershed Segmentation clustering

        if thresh != None:

            comp_labels = list(np.unique(seg1_labels))
            if 0 in comp_labels:  # Avoid outlier components / class
                comp_labels.remove(0)

            watershed_id = data_properties.index("Adhesion")

            seg2 = seg_water.SegmenterWatershed()
            thresh = thresh

            summed_labels = np.zeros_like(data[:, :, 0], dtype=np.int64)
            for l in comp_labels:
                watershed_data = no_outliers_data[:, :, watershed_id] * (
                    seg1_labels == l)  # Why the '*'? Can this be heightless??
                temp_labels = seg2.fit_transform(watershed_data,
                                                 outliers,
                                                 pers_thresh=thresh)

                # NOTE: no need to fill out zeros in this case

                # Instance (grains) segmentation of properties
                print(f"Watershed Segmentation of GMM component {l}")
                post.show_classification(temp_labels, no_outliers_data,
                                         data_type)

                # Add results from different components
                temp_labels += np.max(
                    summed_labels
                )  # To distinguish labels from different components
                temp_labels *= (seg1_labels == l)
                summed_labels += temp_labels

            seg2_labels = slu.relabel(summed_labels)

            # Instance (grains) segmentation of properties
            print("Watershed Segmentation of combined GMM components")
            post.show_classification(seg2_labels, no_outliers_data, data_type)


## Conected-components clustering
        else:
            post_labels = seg1.get_grains(seg1_labels)
            seg2_labels = slu.get_significant_labels(post_labels,
                                                     bg_contrast_flag=True)

        post.show_classification(seg2_labels,
                                 no_outliers_data,
                                 data_type,
                                 input_cmap=input_cmap)
        post.show_grain_area_distribution(seg2_labels, data_type, data_subtype)

    else:
        print('Error: Heightless flag incorrect')

    return outliers, seg1_labels, seg2_labels
示例#5
0
seg1_dict = {}
seg2_dict = {}

for fl in files:
    if fl[-5] == '1':   #Seg1 files
        seg1_fl_list.append(fl)
    elif fl[-5] == '2': #Seg2 files
        seg2_fl_list.append(fl)
    elif fl[-1] == 'v': #Morphology descriptor .csv files
        pass
    else:
        print(fl[:-4], ' is messed up')

for k, fl in enumerate(seg1_fl_list):
    seg1_dict[k] = np.load(map_file_path+fl)
    
for k, fl in enumerate(seg2_fl_list):
    seg2_dict[k] = np.load(map_file_path+fl)
    
for i in range(len(seg1_dict)):
    domain_labels = slu.relabel(seg2_dict[i])
    phase_labels = slu.relabel(seg1_dict[i])
    
    domain_props = slu.all_domain_properties(phase_labels, domain_labels)
    outfile = map_file_path+seg1_fl_list[i][:-8]+'domain_metrics.csv'
    domain_props.to_csv(outfile)
    
    print ('finished file # ', i)