Exemplo n.º 1
0
def test_grid_to_graph():
    # Checking that the function works with graphs containing no edges
    size = 2
    roi_size = 1
    # Generating two convex parts with one vertex
    # Thus, edges will be empty in _to_graph
    mask = np.zeros((size, size), dtype=np.bool)
    mask[0:roi_size, 0:roi_size] = True
    mask[-roi_size:, -roi_size:] = True
    mask = mask.reshape(size ** 2)
    A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray)
    assert_true(connected_components(A)[0] == 2)

    # Checking that the function works whatever the type of mask is
    mask = np.ones((size, size), dtype=np.int16)
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask)
    assert_true(connected_components(A)[0] == 1)

    # Checking dtype of the graph
    mask = np.ones((size, size))
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.bool)
    assert_true(A.dtype == np.bool)
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.int)
    assert_true(A.dtype == np.int)
    A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask,
                      dtype=np.float64)
    assert_true(A.dtype == np.float64)
Exemplo n.º 2
0
def test_connect_regions_with_grid():
    lena = sp.misc.lena()
    mask = lena > 50
    graph = grid_to_graph(*lena.shape, mask=mask)
    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])

    mask = lena > 150
    graph = grid_to_graph(*lena.shape, mask=mask, dtype=None)
    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])
def cluster_spatial_data(X, n_parcels, xyz=None, shape=None, mask=None,
                         method='ward', verbose=False):
    """Cluster the data using Ward's algorithm

    Parameters
    ==========
    X: array of shape(n_voxels, n_subjects)
       the functional data, across subjects
    n_parcels: int, the desired number of parcels
    xyz: array of shape (n_voxels, 3), optional
         positions of the voxels in grid coordinates
    shape: tuple: the domain shape (assuming a grid structure), optional
          alternative specification of positions
    mask: arbitrary array of arbitrary dimension,optional
          alternative specification of positions
    method: string, one of ['ward', 'spectral', 'kmeans'], optional
            clustering method

    Returns
    =======
    label: array of shape(n_voxels): the resulting cluster assignment

    Note
    ====
    One of xyz, shape or mask needs to be provided
    """
    from sklearn.cluster import spectral_clustering, k_means
    if mask is not None:
        connectivity = grid_to_graph(*shape, mask=mask)
    elif shape is not None:
        connectivity = grid_to_graph(*shape)
    elif xyz is not None:
        from sklearn.neighbors import kneighbors_graph
        n_neighbors = 2 * xyz.shape[1]
        connectivity = kneighbors_graph(xyz, n_neighbors=n_neighbors)
    else:
        raise ValueError('One of mask, shape or xyz has to be provided')

    if n_parcels == 1:
        return np.zeros(X.shape[0])
    if method == 'ward':
        connectivity = connectivity.tocsr()
        ward = Ward(n_clusters=n_parcels, connectivity=connectivity).fit(X)
        label = ward.labels_
    elif method == 'spectral':
        i, j = connectivity.nonzero()
        sigma = np.sum((X[i] - X[j]) ** 2, 1).mean()
        connectivity.data = np.exp(- np.sum((X[i] - X[j]) ** 2, 1) /
                                      (2 * sigma))
        label = spectral_clustering(connectivity, n_clusters=n_parcels)
    elif method == 'kmeans':
        _, label, _ = k_means(X, n_parcels)
    else:
        raise ValueError('Unknown method for parcellation')
    return label
Exemplo n.º 4
0
def test_connect_regions_with_grid():
    try:
        face = sp.face(gray=True)
    except AttributeError:
        # Newer versions of scipy have face in misc
        from scipy import misc
        face = misc.face(gray=True)
    mask = face > 50
    graph = grid_to_graph(*face.shape, mask=mask)
    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])

    mask = face > 150
    graph = grid_to_graph(*face.shape, mask=mask, dtype=None)
    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])
Exemplo n.º 5
0
 def ward_cluster_land_mask(self,threshold=50):
     """
     Try to seperate land from water using scikits-learn ward clustering. The 
     simple land_to_zeros method above does not distinguish shadow pixels on land
     from water pixels. The Ward clustering conectivity constraint should take
     care of that.
     """
     from sklearn.cluster import Ward    
     from sklearn.feature_extraction.image import grid_to_graph
     import time
     # Get the last band. I'm assuming the last band will be the longest
     # wavelength.
     band = self.band_array[-1]
     # zero out pixels that are above the threshold
     band[np.where(band > threshold)] = 0
     
     X = np.reshape(band, (-1,1))
     connectivity = grid_to_graph(*band.shape)
     
     st = time.time()
     n_clusters = 2
     ward = Ward(n_clusters=n_clusters, connectivity=connectivity).fit(X)
     label = np.reshape(ward.labels_, band.shape)
     print "Elaspsed time: ", time.time() - st
     return label
Exemplo n.º 6
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always", DeprecationWarning)
        if hasattr(np, 'VisibleDeprecationWarning'):
            # Let's not catch the numpy internal DeprecationWarnings
            warnings.simplefilter('ignore', np.VisibleDeprecationWarning)
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
        assert_equal(len(warning_list), 1)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Exemplo n.º 7
0
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    # test caching
    clustering = Ward(n_clusters=10, connectivity=connectivity,
                      memory=mkdtemp())
    clustering.fit(X)
    labels = clustering.labels_
    assert_true(np.size(np.unique(labels)) == 10)
    # Turn caching off now
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    # Check that we obtain the same solution with early-stopping of the
    # tree building
    clustering.compute_full_tree = False
    clustering.fit(X)
    np.testing.assert_array_equal(clustering.labels_, labels)
    clustering.connectivity = None
    clustering.fit(X)
    assert_true(np.size(np.unique(clustering.labels_)) == 10)
    # Check that we raise a TypeError on dense matrices
    clustering = Ward(n_clusters=10,
                      connectivity=connectivity.todense())
    assert_raises(TypeError, clustering.fit, X)
    clustering = Ward(n_clusters=10,
                      connectivity=sparse.lil_matrix(
                          connectivity.todense()[:10, :10]))
    assert_raises(ValueError, clustering.fit, X)
def test_affinity_passed_to_fix_connectivity():
    # Test that the affinity parameter is actually passed to the pairwise
    # function

    size = 2
    rng = np.random.RandomState(0)
    X = rng.randn(size, size)
    mask = np.array([True, False, False, True])

    connectivity = grid_to_graph(n_x=size, n_y=size,
                                 mask=mask, return_as=np.ndarray)

    class FakeAffinity:
        def __init__(self):
            self.counter = 0

        def increment(self, *args, **kwargs):
            self.counter += 1
            return self.counter

    fa = FakeAffinity()

    linkage_tree(X, connectivity=connectivity, affinity=fa.increment)

    assert fa.counter == 3
def test_agglomerative_clustering_with_distance_threshold(linkage):
    # Check that we obtain the correct number of clusters with
    # agglomerative clustering with distance_threshold.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    n_samples = 100
    X = rng.randn(n_samples, 50)
    connectivity = grid_to_graph(*mask.shape)
    # test when distance threshold is set to 10
    distance_threshold = 10
    for conn in [None, connectivity]:
        clustering = AgglomerativeClustering(
            n_clusters=None,
            distance_threshold=distance_threshold,
            connectivity=conn, linkage=linkage)
        clustering.fit(X)
        clusters_produced = clustering.labels_
        num_clusters_produced = len(np.unique(clustering.labels_))
        # test if the clusters produced match the point in the linkage tree
        # where the distance exceeds the threshold
        tree_builder = _TREE_BUILDERS[linkage]
        children, n_components, n_leaves, parent, distances = \
            tree_builder(X, connectivity=conn, n_clusters=None,
                         return_distance=True)
        num_clusters_at_threshold = np.count_nonzero(
            distances >= distance_threshold) + 1
        # test number of clusters produced
        assert num_clusters_at_threshold == num_clusters_produced
        # test clusters produced
        clusters_at_threshold = _hc_cut(n_clusters=num_clusters_produced,
                                        children=children,
                                        n_leaves=n_leaves)
        assert np.array_equiv(clusters_produced,
                              clusters_at_threshold)
Exemplo n.º 10
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    assert_warns(DeprecationWarning, WardAgglomeration)

    with ignore_warnings():
        ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
        ward.fit(X)
    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
    agglo.fit(X)
    assert_array_equal(agglo.labels_, ward.labels_)
    assert_true(np.size(np.unique(agglo.labels_)) == 5)

    X_red = agglo.transform(X)
    assert_true(X_red.shape[1] == 5)
    X_full = agglo.inverse_transform(X_red)
    assert_true(np.unique(X_full[0]).size == 5)
    assert_array_almost_equal(agglo.transform(X_full), X_red)

    # Check that fitting with no samples raises a ValueError
    assert_raises(ValueError, agglo.fit, X[:0])
Exemplo n.º 11
0
    def _fit_method(self, data):
        """Helper function which applies clustering method on the masked data
        """
        mask_img_ = self.masker_.mask_img_

        if self.algorithm == 'minibatchkmeans':
            if self.verbose:
                print("[MiniBatchKMeans] Learning")
            labels = self._cache(_minibatch_kmeans_fit_method,
                                 func_memory_level=1)(
                data.T, self.n_parcels, self.init, self.random_state,
                self.verbose)
            self.kmeans_labels_ = labels

        elif self.algorithm == 'featureagglomeration':
            if self.verbose:
                print("[Feature Agglomeration] Learning")
            mask_ = mask_img_.get_data().astype(np.bool)
            shape = mask_.shape
            if self.connectivity is None:
                self.connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                                        n_z=shape[2], mask=mask_)
            labels = self._cache(_feature_agglomeration_fit_method,
                                 func_memory_level=1)(
                data, self.n_parcels, self.connectivity, self.linkage)

            self.ward_labels_ = labels
def spatio_temporal_cluster_test_connectivity():
    """Test cluster level permutations with and without connectivity """
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return

    rng = np.random.RandomState(0)
    noise1_2d = rng.randn(condition1_2d.shape[0], condition1_2d.shape[1], 10)
    data1_2d  = np.transpose(np.dstack((condition1_2d, noise1_2d)), [0, 2, 1])
    
    noise2_d2 = rng.randn(condition2_2d.shape[0], condition2_2d.shape[1], 10)
    data2_2d  = np.transpose(np.dstack((condition2_2d, noise2_d2)), [0, 2, 1])

    conn = grid_to_graph(data1_2d.shape[-1], 1)

    threshold = dict(start=4.0, step=2)
    T_obs, clusters, p_values_conn, hist = \
        spatio_temporal_cluster_test([data1_2d, data2_2d], connectivity=conn,
                                     n_permutations=50, tail=1, seed=1,
                                     threshold=threshold)

    T_obs, clusters, p_values_no_conn, hist = \
        spatio_temporal_cluster_test([data1_2d, data2_2d],
                                     n_permutations=50, tail=1, seed=1,
                                     threshold=threshold)

    assert_equal(np.sum(p_values_conn < 0.05), np.sum(p_values_no_conn < 0.05))
Exemplo n.º 13
0
def segment_image(im_file, n_segments=5, alg='ac'):
    img = imread(im_file)
    img = img[:,:,0]
    X = np.reshape(img, (-1, 1))

    if alg == 'ac':
        # Define the structure A of the data. Pixels connected to their neighbors.
        connectivity = grid_to_graph(*img.shape)

        # Compute clustering
        print("Compute structured hierarchical clustering...")
        st = time.time()
        n_clusters = n_segments  # number of regions
        ward = AgglomerativeClustering(n_clusters=n_clusters,
                linkage='complete', connectivity=connectivity).fit(X)
        label = np.reshape(ward.labels_, img.shape)
    elif alg == 'dbscan':
        print("Compute DBScan clustering...")
        st = time.time()
        dbs = DBSCAN(eps=1).fit(X)
        label = np.reshape(dbs.labels_, img.shape)

    print("Elapsed time: ", time.time() - st)
    print("Number of pixels: ", label.size)
    print("Number of clusters: ", np.unique(label).size)

    return label
Exemplo n.º 14
0
def wardHierarchical(img):
    connectivity = grid_to_graph(*img.shape)
    print("Compute structured hierarchical clustering...")
    st = time.time()
    n_clusters = 15  # number of regions
    ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',
                                   connectivity=connectivity)
    
    face = sp.misc.imresize(img, 0.10) / 255.
    X = np.reshape(img, (-1, 1))
    ward.fit(X)
    label = np.reshape(ward.labels_, face.shape)
    print("Elapsed time: ", time.time() - st)
    print("Number of pixels: ", label.size)
    print("Number of clusters: ", np.unique(label).size)


    plt.figure(figsize=(5, 5))
    plt.imshow(face, cmap=plt.cm.gray)
    for l in range(n_clusters):
        plt.contour(label == l, contours=1,
                    colors=[plt.cm.spectral(l / float(n_clusters)), ])
    plt.xticks(())
    plt.yticks(())
    plt.show()
def unsupervisedLearningTest03():
	# Connectivity-constrained clustering

	import numpy as np
	import scipy as sp
	import matplotlib.pyplot as plt
	import time

	from sklearn.feature_extraction.image import grid_to_graph
	from sklearn.cluster import AgglomerativeClustering
	from sklearn import cluster, datasets
	lena = sp.misc.lena()

	#Downsample the image by a factor of 4
	lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2]

	X = np.reshape(lena, (-1, 1))

	# Define the structure A of the data. Pixels connected to their neighbors.
	# 把图片变成一张图, 讨论其连接性
	connectivity = grid_to_graph(*lena.shape)


	print "Compute structured hierarchical clustering..."
	st = time.time()

	n_clusters = 15 # number of regions

	ward = AgglomerativeClustering(n_clusters = n_clusters, linkage = 'ward', connectivity = connectivity).fit(X)
	label = np.reshape(ward.labels_, lena.shape)
	print "Elapsed time: " + str(time.time() - st)
	print "Number of pixels: " + str(label.size)
	print "Number of clusters: " + str(np.unique(label).size)

	#Feature agglomeration
	digits = datasets.load_digits()
	images = digits.images
	X = np.reshape(images, (len(images), -1))

	connectivity = grid_to_graph(*images[0].shape)

	agglo = cluster.FeatureAgglomeration(connectivity = connectivity, n_clusters = 32)

	agglo.fit(X)
	X_reduced = agglo.transform(X)
	X_approx = agglo.inverse_transform(X_reduced)
	images_approx = np.reshape(X_approx, images.shape)
Exemplo n.º 16
0
def agglomerativeClusteringFeatures(image):
    connectivity = grid_to_graph(*image[:,:,2].shape)
    X = np.reshape(image[:,:,2], (-1,1))
    ward = AgglomerativeClustering(n_clusters=150,
        linkage = 'ward', connectivity = connectivity).fit(X)
    labels = np.reshape(ward.labels_, image[:,:,2].shape)
    averageIntensity = color.label2rgb(labels, image[:,:,2], kind = 'avg')
    #areas = getAreas(labels) 
    return averageIntensity
def test_connectivity_fixing_non_lil():
    # Check non regression of a bug if a non item assignable connectivity is
    # provided with more than one component.
    # create dummy data
    x = np.array([[0, 0], [1, 1]])
    # create a mask with several components to force connectivity fixing
    m = np.array([[True, False], [False, True]])
    c = grid_to_graph(n_x=2, n_y=2, mask=m)
    w = AgglomerativeClustering(connectivity=c, linkage='ward')
    assert_warns(UserWarning, w.fit, x)
def test_height_linkage_tree():
    # Check that the height of the results of linkage tree is sorted.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rng.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    for linkage_func in _TREE_BUILDERS.values():
        children, n_nodes, n_leaves, parent = linkage_func(X.T, connectivity)
        n_nodes = 2 * X.shape[1] - 1
        assert len(children) + n_leaves == n_nodes
Exemplo n.º 19
0
def test_height_ward_tree():
    """
    Check that the height of ward tree is sorted.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    children, n_nodes, n_leaves, parent = ward_tree(X.T, connectivity)
    n_nodes = 2 * X.shape[1] - 1
    assert_true(len(children) + n_leaves == n_nodes)
Exemplo n.º 20
0
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    assert_true(np.size(np.unique(clustering.labels_)) == 10)
Exemplo n.º 21
0
def test_structured_ward_tree():
    """
    Check that we obtain the correct solution for structured ward tree.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    children, n_components, n_leaves = ward_tree(X.T, connectivity)
    n_nodes = 2 * X.shape[1] - 1
    assert_true(len(children) + n_leaves == n_nodes)
Exemplo n.º 22
0
 def fit(self, X, y):
     y = y.ravel()
     n_samples, n_lags, n_lats, n_lons = X.shape
     self.scaler.fit(X[:, -1].reshape(n_samples, -1))
     X = X.reshape(n_lags * n_samples, -1)
     connectivity = grid_to_graph(n_lats, n_lons)
     self.agglo.connectivity = connectivity
     X = self.scaler.transform(X)
     X = self.agglo.fit_transform(X)
     X = X.reshape(n_samples, -1)
     self.clf.fit(X, y)
Exemplo n.º 23
0
def test_connectivity_fixing_non_lil():
    """
    Check non regression of a bug if a non item assignable connectivity is
    provided with more than one component.
    """
    # create dummy data
    x = np.array([[0, 0], [1, 1]])
    # create a mask with several components to force connectivity fixing
    m = np.array([[True, False], [False, True]])
    c = grid_to_graph(n_x=2, n_y=2, mask=m)
    w = Ward(connectivity=c)
    w.fit(x)
 def fit(self, kshape=None):
     if kshape is not None:
         connectivity = grid_to_graph(*kshape)
         self.fit_parameters.update({"connectivity": connectivity})
     ward = AgglomerativeClustering(**self.fit_parameters)
     ward.fit(self.input_data)
     self.mapper_data = ward.labels_
     self.output_data = np.array([])
     self.output_space_size = ward.n_clusters
     self.model_attributes = {"n_clusters": ward.n_clusters,
                              "n_components": ward.n_components}
     self._log_model_results()
     return self
Exemplo n.º 25
0
def test_spatio_temporal_cluster_connectivity():
    """Test spatio-temporal cluster permutations."""
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return
    condition1_1d, condition2_1d, condition1_2d, condition2_2d = \
        _get_conditions()

    rng = np.random.RandomState(0)
    noise1_2d = rng.randn(condition1_2d.shape[0], condition1_2d.shape[1], 10)
    data1_2d = np.transpose(np.dstack((condition1_2d, noise1_2d)), [0, 2, 1])

    noise2_d2 = rng.randn(condition2_2d.shape[0], condition2_2d.shape[1], 10)
    data2_2d = np.transpose(np.dstack((condition2_2d, noise2_d2)), [0, 2, 1])

    conn = grid_to_graph(data1_2d.shape[-1], 1)

    threshold = dict(start=4.0, step=2)
    T_obs, clusters, p_values_conn, hist = \
        spatio_temporal_cluster_test([data1_2d, data2_2d], connectivity=conn,
                                     n_permutations=50, tail=1, seed=1,
                                     threshold=threshold, buffer_size=None)

    buffer_size = data1_2d.size // 10
    T_obs, clusters, p_values_no_conn, hist = \
        spatio_temporal_cluster_test([data1_2d, data2_2d],
                                     n_permutations=50, tail=1, seed=1,
                                     threshold=threshold, n_jobs=2,
                                     buffer_size=buffer_size)

    assert_equal(np.sum(p_values_conn < 0.05), np.sum(p_values_no_conn < 0.05))

    # make sure results are the same without buffer_size
    T_obs, clusters, p_values2, hist2 = \
        spatio_temporal_cluster_test([data1_2d, data2_2d],
                                     n_permutations=50, tail=1, seed=1,
                                     threshold=threshold, n_jobs=2,
                                     buffer_size=None)
    assert_array_equal(p_values_no_conn, p_values2)
    assert_raises(ValueError, spatio_temporal_cluster_test,
                  [data1_2d, data2_2d], tail=1, threshold=-2.)
    assert_raises(ValueError, spatio_temporal_cluster_test,
                  [data1_2d, data2_2d], tail=-1, threshold=2.)
    assert_raises(ValueError, spatio_temporal_cluster_test,
                  [data1_2d, data2_2d], tail=0, threshold=-1)
Exemplo n.º 26
0
def ward_clustering(config, img_flat):
    X = np.reshape(img_flat, (-1, 1))
    connectivity = grid_to_graph(*img_flat.shape)
    ward = AgglomerativeClustering(
                n_clusters=config['ward_clusters'],
                linkage='ward',
                compute_full_tree = False, 
                connectivity=connectivity).fit(X)
    ulab = np.unique(ward.labels_)
    out = []
    for u in ulab:
        inds = np.where(ward.labels_ == u)[0]
        hsh = hash(tuple(inds - inds[0]))
        out.append(hsh)
    return tuple(out)
Exemplo n.º 27
0
def test_structured_ward_tree():
    """
    Check that we obtain the correct solution for structured ward tree.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    # Avoiding a mask with only 'True' entries
    mask[4:7, 4:7] = 0
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    children, n_components, n_leaves, parent = ward_tree(X.T, connectivity)
    n_nodes = 2 * X.shape[1] - 1
    assert_true(len(children) + n_leaves == n_nodes)
    # Check that ward_tree raises a ValueError with a connectivity matrix
    # of the wrong shape
    assert_raises(ValueError, ward_tree, X.T, np.ones((4, 4)))
Exemplo n.º 28
0
def test_ward_agglomeration():
    """
    Check that we obtain the correct solution in a simplistic case
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
    ward.fit(X)
    assert_true(np.size(np.unique(ward.labels_)) == 5)

    Xred = ward.transform(X)
    assert_true(Xred.shape[1] == 5)
    Xfull = ward.inverse_transform(Xred)
    assert_true(np.unique(Xfull[0]).size == 5)
Exemplo n.º 29
0
def test_ward_fit_transform():
    """Test parcellation building and associated signal extraction.

    """
    # Generate toy data
    # define data structure
    shape = (5, 5, 5)
    mask = np.ones(shape, dtype=bool)
    connectivity = image.grid_to_graph(n_x=5, n_y=5, n_z=5, mask=mask)
    # data generation
    data1 = np.ones(shape)
    data1[1:3, 1:3, 1:3] = 2.
    data2 = np.ones(shape)
    data2[3:, 3:, 3:] = 4.
    data = np.ones((4, np.prod(shape)))  # 4 ravelized images
    data[0] = np.ravel(data1)
    data[1] = np.ravel(data2)

    # One image used for train, transform all
    parcelled_data, labels = _ward_fit_transform(data, [0], connectivity, 2, 0)
    # check parcelled_data
    assert_equal(parcelled_data.shape, (4, 2))
    assert_array_equal(
        np.sort(np.unique(parcelled_data[0])),  # order is hard to predict
        [1, 2])
    assert_array_equal(parcelled_data[2], [1, 1])
    assert_array_equal(parcelled_data[3], [1, 1])
    # check labels
    assert_equal(len(labels.shape), 1)
    assert_array_equal(np.unique(labels), [0, 1])

    # Two images used for train, transform all, add offset to labels
    parcelled_data, labels = _ward_fit_transform(data, [0, 1],
                                                 connectivity, 3, 10)
    # check parcelled_data
    assert_equal(parcelled_data.shape, (4, 3))
    assert_array_equal(
        np.sort(np.unique(parcelled_data[0])),  # order is hard to predict
        [1, 2])
    assert_array_equal(
        np.sort(np.unique(parcelled_data[1])),  # order is hard to predict
        [1, 4])
    assert_array_equal(parcelled_data[2], [1, 1, 1])
    assert_array_equal(parcelled_data[3], [1, 1, 1])
    # check labels
    assert_equal(len(labels.shape), 1)
    assert_array_equal(np.unique(labels), [10, 11, 12])
Exemplo n.º 30
0
def test_structured_linkage_tree():
    # Check that we obtain the correct solution for structured linkage trees.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    # Avoiding a mask with only 'True' entries
    mask[4:7, 4:7] = 0
    X = rng.randn(50, 100)
    connectivity = grid_to_graph(*mask.shape)
    for tree_builder in _TREE_BUILDERS.values():
        children, n_components, n_leaves, parent = tree_builder(X.T, connectivity)
        n_nodes = 2 * X.shape[1] - 1
        assert_true(len(children) + n_leaves == n_nodes)
        # Check that ward_tree raises a ValueError with a connectivity matrix
        # of the wrong shape
        assert_raises(ValueError, tree_builder, X.T, np.ones((4, 4)))
        # Check that fitting with no samples raises an error
        assert_raises(ValueError, tree_builder, X.T[:0], connectivity)
Exemplo n.º 31
0
def clustering(interactive: Interactive, api: API):
    window = api.application.document_windows[0]
    target_data_item = window.target_data_item
    ctx = iface.get_context()
    ds = iface.dataset_from_data_item(ctx, target_data_item)
    fy, fx = tuple(ds.shape.sig)
    y, x = tuple(ds.shape.nav)
    # roi = np.random.choice([True, False], tuple(ds.shape.nav), p=[0.01, 0.99])
    # We only sample 5 % of the frame for the std deviation map
    # since the UDF still needs optimization
    std_roi = np.random.choice([True, False],
                               tuple(ds.shape.nav),
                               p=[0.05, 0.95])
    roi = np.ones((y, x), dtype=bool)
    # roi = np.zeros((y, x), dtype=bool)
    # roi[:, :50] = True
    stddev_res = run_stddev(ctx=ctx, dataset=ds, roi=std_roi * roi)
    ref_frame = stddev_res['std']
    # sum_res = ctx.run_udf(udf=SumUDF(), dataset=ds)
    # ref_frame = sum_res['intensity'].data
    update_data(target_data_item, ref_frame)

    peaks = peak_local_max(ref_frame, min_distance=3, num_peaks=500)
    masks = sparse.COO(shape=(len(peaks), fy, fx),
                       coords=(range(len(peaks)), peaks[..., 0], peaks[...,
                                                                       1]),
                       data=1)
    feature_udf = ApplyMasksUDF(mask_factories=lambda: masks,
                                mask_dtype=np.uint8,
                                mask_count=len(peaks),
                                use_sparse=True)
    feature_res = ctx.run_udf(udf=feature_udf, dataset=ds, roi=roi)
    f = feature_res['intensity'].raw_data.astype(np.float32)
    f = np.log(f - np.min(f) + 1)
    feature_vector = f / np.abs(f).mean(axis=0)
    # too slow
    # nion_peaks = peaks / tuple(ds.shape.sig)
    # with api.library.data_ref_for_data_item(target_data_item):
    #     for p in nion_peaks:
    #         target_data_item.add_ellipse_region(*p, 0.01, 0.01)
    connectivity = scipy.sparse.csc_matrix(
        grid_to_graph(
            # Transposed!
            n_x=y,
            n_y=x,
        ))

    roi_connectivity = connectivity[roi.flatten()][:, roi.flatten()]
    threshold = interactive.get_float("Cluster distance threshold: ", 10)
    clusterer = AgglomerativeClustering(
        affinity='euclidean',
        distance_threshold=threshold,
        n_clusters=None,
        linkage='ward',
        connectivity=roi_connectivity,
    )
    clusterer.fit(feature_vector)
    labels = np.zeros((y, x), dtype=np.int32)
    labels[roi] = clusterer.labels_ + 1
    new_data = api.library.create_data_item_from_data(labels)
    window.display_data_item(new_data)
Exemplo n.º 32
0
def _build_parcellations(all_subjects_data,
                         mask,
                         n_parcellations=100,
                         n_parcels=1000,
                         n_bootstrap_samples=None,
                         random_state=None,
                         memory=Memory(cachedir=None),
                         n_jobs=1,
                         verbose=False):
    """Build the parcellations for the RPBI framework.

    Parameters
    ----------
    all_subjects_data : array_like, shape=(n_samples, n_voxels)
      Masked subject images as an array.

    mask : ndarray of booleans
      Mask that has been applied on the initial images to obtain
      `all_subjects_data`.

    n_parcellations : int,
      The number of parcellations to be built and used to extract
      signal averages from the data.

    n_parcels : int,
      Number of parcels for the parcellations.

    n_bootstrap_samples : int,
      Number of subjects to be used to build the parcellations. The subjects
      are randomly drawn with replacement.
      If set to None, n_samples subjects are drawn, which correspond to
      a bootstrap draw.

    random_state : int,
      Random numbers seed for reproducible results.

    memory : instance of joblib.Memory or string
      Used to cache the masking process.
      By default, no caching is done. If a string is given, it is the
      path to the caching directory.

    n_jobs : int,
      Number of parallel workers.
      If 0 is provided, all CPUs are used.
      A negative number indicates that all the CPUs except (|n_jobs| - 1) ones
      will be used.

    verbose : boolean,
      Activate verbose mode (default is False).

    Returns
    -------
    parcelled_data : np.ndarray, shape=(n_parcels_tot, n_subjs)
      Data for all subjects after mean signal extraction with all the
      parcellations that have been created.

    ward_labels : np.ndarray, shape=(n_vox * n_wards, )
      Voxel-to-parcel map for all the parcellations. Useful to perform
      inverse transforms.

    TODO
    ----
    - Deal with NaNs in the original data (WardAgglomeration cannot fit
      when NaNs are present in the data). Median imputation?

    """
    # initialize the seed of the random generator
    rng = check_random_state(random_state)

    # check n_jobs (number of CPUs)
    n_jobs = check_n_jobs(n_jobs)

    n_samples = all_subjects_data.shape[0]
    if n_bootstrap_samples is None:
        n_bootstrap_samples = n_samples

    # Compute connectivity
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0],
                                       n_y=shape[1],
                                       n_z=shape[2],
                                       mask=mask)

    # Build parcellations
    draw = rng.randint(n_samples, size=n_bootstrap_samples * n_parcellations)
    draw = draw.reshape((n_parcellations, -1))
    ret = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(cache(_ward_fit_transform, memory, verbose=verbose))(
            all_subjects_data, draw[i], connectivity, n_parcels, i * n_parcels)
        for i in range(n_parcellations))
    # reduce results
    parcelled_data_parts, ward_labels = zip(*ret)
    parcelled_data = np.hstack((parcelled_data_parts))
    ward_labels = np.ravel(ward_labels)

    return parcelled_data, np.ravel(ward_labels)
Exemplo n.º 33
0
def regionprops_3D(im):
    r"""
    Calculates various metrics for each labeled region in a 3D image.

    The ``regionsprops`` method in **skimage** is very thorough for 2D images,
    but is a bit limited when it comes to 3D images, so this function aims
    to fill this gap.

    Parameters
    ----------
    im : array_like
        An imaging containing at least one labeled region.  If a boolean image
        is received than the ``True`` voxels are treated as a single region
        labeled ``1``.  Regions labeled 0 are ignored in all cases.

    Returns
    -------
    An augmented version of the list returned by skimage's ``regionprops``.
    Information, such as ``volume``, can be found for region A using the
    following syntax: ``result[A-1].volume``.

    Notes
    -----
    This function may seem slow compared to the skimage version, but that is
    because they defer calculation of certain properties until they are
    accessed while this one evalulates everything (inlcuding the deferred
    properties from skimage's ``regionprops``)

    Regions can be identified using a watershed algorithm, which can be a bit
    tricky to obtain desired results.  *PoreSpy* includes the SNOW algorithm,
    which may be helpful.

    """
    print('_' * 60)
    print('Calculating regionprops')

    results = regionprops(im, coordinates='xy')
    for i in tqdm(range(len(results))):
        mask = results[i].image
        mask_padded = sp.pad(mask, pad_width=1, mode='constant')
        temp = spim.distance_transform_edt(mask_padded)
        dt = extract_subsection(temp, shape=mask.shape)
        # ---------------------------------------------------------------------
        # Slice indices
        results[i].slice = results[i]._slice
        # ---------------------------------------------------------------------
        # Volume of regions in voxels
        results[i].volume = results[i].area
        # ---------------------------------------------------------------------
        # Volume of bounding box, in voxels
        results[i].bbox_volume = sp.prod(mask.shape)
        # ---------------------------------------------------------------------
        # Create an image of the border
        results[i].border = dt == 1
        # ---------------------------------------------------------------------
        # Create an image of the maximal inscribed sphere
        r = dt.max()
        inv_dt = spim.distance_transform_edt(dt < r)
        results[i].inscribed_sphere = inv_dt < r
        # ---------------------------------------------------------------------
        # Find surface area using marching cubes and analyze the mesh
        tmp = sp.pad(sp.atleast_3d(mask), pad_width=1, mode='constant')
        tmp = spim.convolve(tmp, weights=ball(1)) / 5
        verts, faces, norms, vals = marching_cubes_lewiner(volume=tmp, level=0)
        results[i].surface_mesh_vertices = verts
        results[i].surface_mesh_simplices = faces
        area = mesh_surface_area(verts, faces)
        results[i].surface_area = area
        # ---------------------------------------------------------------------
        # Find sphericity
        vol = results[i].volume
        r = (3 / 4 / sp.pi * vol)**(1 / 3)
        a_equiv = 4 * sp.pi * (r)**2
        a_region = results[i].surface_area
        results[i].sphericity = a_equiv / a_region
        # ---------------------------------------------------------------------
        # Find skeleton of region
        results[i].skeleton = skeletonize_3d(mask)
        # ---------------------------------------------------------------------
        # Volume of convex image, equal to area in 2D, so just translating
        results[i].convex_volume = results[i].convex_area
        # ---------------------------------------------------------------------
        # Convert region grid to a graph
        am = grid_to_graph(*mask.shape, mask=mask)
        results[i].graph = am

    return results
Exemplo n.º 34
0
def generate_clustering_per_region(region):
    """
    Generate clustering from a series of 2D slices pertaining to a region (e.g. cervical)
    :param region:
    :param levels: list of levels
    :return:
    """
    use_mask = True

    # Load data
    logger.info("Load data...")
    nii = nib.load(params.file_prefix_all + region + ext)
    data = nii.get_fdata()

    # Crop around spinal cord, and only keep half of it.
    # The way the atlas was built, the right and left sides are perfectly symmetrical (mathematical average). Hence,
    # we can discard one half, without loosing information.
    # TODO: parametrize this, and find center automatically
    # TODO: find cropping values per region
    if region == 'cervical' or region == 'lumbar':
        xmin, xmax = (45, 110)
        ymin, ymax = (75, 114)
    elif region == 'thoracic':
        xmin, xmax = (53, 100)
        ymin, ymax = (75, 105)
    else:
        xmin, xmax = (55, 94)
        ymin, ymax = (75, 95)
    data_crop = data[xmin:xmax, ymin:ymax, :]
    del data

    # If we have a mask of the white matter, we load it and crop it according to the data_crop shape.
    if use_mask:
        # Load data
        nii_mask = nib.load(params.file_mask_prefix + region + ext)
        mask = nii_mask.get_data()
        # Crop, binarize
        mask_crop = mask[xmin:xmax, ymin:ymax, :] > 0.5
    else:
        mask_crop = np.ones(data_crop.shape[0:3]) > 0.5
    # Reshape
    ind_mask = np.where(mask_crop)
    mask1d = np.squeeze(mask_crop.reshape(-1, 1))

    # Standardize data
    logger.info("Standardize data...")
    # original_shape = data_crop.shape[0:3]
    data2d = data_crop.reshape(-1, data_crop.shape[3])
    scaler = StandardScaler()
    data2d_norm = scaler.fit_transform(data2d)
    del data2d

    # Build connectivity matrix
    logger.info("Build connectivity matrix...")
    connectivity = grid_to_graph(n_x=data_crop.shape[0],
                                 n_y=data_crop.shape[1],
                                 n_z=data_crop.shape[2],
                                 mask=mask_crop)

    del data_crop

    # Process Paxinos atlas for display
    nii_paxinos = nib.load(params.file_paxinos + '_' + region + ext)
    paxinos3d = np.mean(nii_paxinos.get_data(), axis=2)
    # Crop data
    paxinos3d = paxinos3d[xmin:xmax, ymin:ymax, :]
    # clip between 0 and 1.
    # note: we don't want to normalize, otherwise the background (which should be 0) will have a non-zero value.
    paxinos3d = np.clip(paxinos3d, 0, 1)
    # TODO: crop Paxinos

    # Perform clustering
    logger.info("Run clustering...")
    num_clusters = [8, 10]  # [5, 6, 7, 8, 9, 10, 11]

    for n_cluster in num_clusters:
        logger.info("Number of clusters: {}".format(n_cluster))
        clustering = AgglomerativeClustering(linkage="ward", n_clusters=n_cluster, connectivity=connectivity)
        clustering.fit(data2d_norm[mask1d, :])
        logger.info("Reshape labels...")
        labels = np.zeros_like(mask_crop, dtype=np.int)
        labels[ind_mask] = clustering.labels_ + 1  # we add a the +1 because sklearn's first label has value "0", and we are now going to use "0" as the background (i.e. not a label)
        del clustering

        # Display clustering results
        logger.info("Generate figures...")
        fig = plt.figure(figsize=(20, 20))
        fig.subplots_adjust(hspace=0.1, wspace=0.1)
        for i in range(len(levels)):
            ax = fig.add_subplot(4, 4, i+1)
            ax.imshow(labels[:, :, i], cmap='Spectral')
            plt.title("iz = {}".format(i), pad=18)
            plt.tight_layout()
        fig.savefig('clustering_results_ncluster{}_{}.png'.format(n_cluster, region))
        fig.clear()

        # Create 4D array: last dimension corresponds to the cluster number. Cluster value is converted to 1.
        a = list(labels.shape)
        a.append(n_cluster)
        labels4d = np.zeros(a)
        for i_label in range(n_cluster):
            ind_label = np.argwhere(labels == i_label + 1)
            for i in ind_label:
                labels4d[i[0], i[1], i[2], i_label] = 1

        # Average across Z. Each cluster is coded between 0 and 1.
        labels3d = np.mean(labels4d, axis=2)

        # Display result of averaging
        logger.info("Generate figures...")
        fig = plt.figure(figsize=(7, 7))
        fig.suptitle('Averaged clusters (N={}) | Region: {}'.format(n_cluster, region), fontsize=20)

        # Display Paxinos
        # TODO: generalize BASE_COLORS for more than 8 labels
        ax = fig.add_subplot(1, 2, 1)
        ax.set_facecolor((1, 1, 1))
        for i_label in range(paxinos3d.shape[2]):
            labels_rgb = np.zeros([paxinos3d.shape[0], paxinos3d.shape[1], 4])
            for ix in range(paxinos3d.shape[0]):
                for iy in range(paxinos3d.shape[1]):
                    ind_color = list(params.colors.keys())[i_label]
                    labels_rgb[ix, iy] = colors.to_rgba(params.colors[ind_color], paxinos3d[ix, iy, i_label])
            ax.imshow(labels_rgb)
        plt.axis('off')
        plt.title("Paxinos atlas", pad=18)
        plt.tight_layout()

        # Find label color corresponding best to the Paxinos atlas
        list_color = get_best_matching_color_with_paxinos(im=labels3d, imref=paxinos3d)

        # Display clustering
        ax = fig.add_subplot(1, 2, 2)
        for i_label in range(n_cluster):
            labels_rgb = np.zeros([labels3d.shape[0], labels3d.shape[1], 4])
            for ix in range(labels3d.shape[0]):
                for iy in range(labels3d.shape[1]):
                    labels_rgb[ix, iy] = colors.to_rgba(params.colors[list_color[i_label]], labels3d[ix, iy, i_label])
            ax.imshow(labels_rgb)
        plt.axis('off')
        plt.title("Cluster map", pad=18)
        plt.tight_layout()
        fig.subplots_adjust(hspace=0, wspace=0.1)
        fig.savefig('clustering_results_avgz_{}_ncluster{}.png'.format(region, n_cluster))

    del data2d_norm

    logger.info("Done!")
Exemplo n.º 35
0
    kshape = (30,20)
    n_iter = 100
    learning_rate = 0.01
    n_colors = 100
    
    spcolors = np.random.rand(n_colors,3)
    mapper = SOMMapper(kshape=kshape, n_iter=n_iter, learning_rate=learning_rate)
    kohonen = mapper.fit_transform(spcolors)
    U_Matrix = build_U_matrix(kohonen, kshape, topology="rect")

    
    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    ax1.imshow(np.split(kohonen, kshape[0], axis=0))
    ax1.set_title("Kohonen Map")
    
    ## Clustering
    n_clusters = 5  # number of regions
    connectivity = grid_to_graph(kshape[0],kshape[1])
    ward = AgglomerativeClustering(n_clusters=n_clusters,
            linkage='ward', connectivity=connectivity).fit(kohonen)
            
    label = np.reshape(ward.labels_, kshape)
    for l in range(n_clusters):
        ax1.contour(label == l, contours=1,
                    colors=[plt.cm.spectral(l / float(n_clusters)), ])
    
    ax2 = fig.add_subplot(221)
    ax2.imshow(np.split(U_Matrix, kshape[0], axis=0))
    ax2.set_title("U_Matrix")
    plt.show()
Exemplo n.º 36
0
 def _cluster(self, linkage='complete', k=6):
     C = grid_to_graph(self.x, self.y)
     X = np.array(self.weights).reshape((self.x * self.y, self.weights[0][0].shape[0]))
     clusterer = AgglomerativeClustering(n_clusters=k, connectivity=C, affinity=self.dtw_fn, linkage=linkage)
     return clusterer.fit_predict(X)
Exemplo n.º 37
0
# #############################################################################
# Generate data
orig_coins = coins()

# Resize it to 20% of the original size to speed up the processing
# Applying a Gaussian filter for smoothing prior to down-scaling
# reduces aliasing artifacts.
smoothened_coins = gaussian_filter(orig_coins, sigma=2)
rescaled_coins = rescale(smoothened_coins, 0.2, mode="reflect",
                         **rescale_params)

X = np.reshape(rescaled_coins, (-1, 1))

# #############################################################################
# Define the structure A of the data. Pixels connected to their neighbors.
connectivity = grid_to_graph(*rescaled_coins.shape)

# #############################################################################
# Compute clustering
print("Compute structured hierarchical clustering...")
st = time.time()
n_clusters = 27  # number of regions
ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',
                               connectivity=connectivity)
ward.fit(X)
label = np.reshape(ward.labels_, rescaled_coins.shape)
print("Elapsed time: ", time.time() - st)
print("Number of pixels: ", label.size)
print("Number of clusters: ", np.unique(label).size)

# #############################################################################
Exemplo n.º 38
0
# is quite simple, as our data are aligned on a rectangular grid.
#
# Let's pretend that our data were smaller -- a 3 × 3 grid. Thinking about
# each voxel as being connected to the other voxels it touches, we would
# need a 9 × 9 adjacency matrix. The first row of this matrix contains the
# voxels in the flattened data that the first voxel touches. Since it touches
# the second element in the first row and the first element in the second row
# (and is also a neighbor to itself), this would be::
#
#     [1, 1, 0, 1, 0, 0, 0, 0, 0]
#
# :mod:`sklearn.feature_extraction` provides a convenient function for this:

from sklearn.feature_extraction.image import grid_to_graph  # noqa: E402

mini_adjacency = grid_to_graph(3, 3).toarray()
assert mini_adjacency.shape == (9, 9)
print(mini_adjacency[0])

# %%
# In general the adjacency between voxels can be more complex, such as
# those between sensors in 3D space, or time-varying activation at brain
# vertices on a cortical surface. MNE provides several convenience functions
# for computing adjacency matrices, for example:
#
# * :func:`mne.channels.find_ch_adjacency`
# * :func:`mne.stats.combine_adjacency`
#
# See the :ref:`Statistics API <api_reference_statistics>` for a full list.
#
# MNE also ships with numerous built-in channel adjacency matrices from the
Exemplo n.º 39
0
def connectivity(shape):
    from sklearn.feature_extraction import image
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
                                       n_z=shape[2])
    return connectivity
# which voxels are adjacent to each other. In our case this
# is quite simple, as our data are aligned on a rectangular grid.
#
# Let's pretend that our data were smaller -- a 3x3 grid. Thinking about
# each voxel as being connected to the other voxels it touches, we would
# need a 9x9 connectivity matrix. The first row should contain the elements
# in the ``.ravel()``'ed data that it touches. Since it touches the
# second element in the first row and the first element in the second row
# (and is also a neighbor to itself), this would be::
#
#     [1, 1, 0, 1, 0, 0, 0, 0, 0]
#
# :mod:`sklearn.feature_extraction` provides a convenient function for this:

from sklearn.feature_extraction.image import grid_to_graph  # noqa: E402
mini_connectivity = grid_to_graph(3, 3).toarray()
assert mini_connectivity.shape == (9, 9)
print(mini_connectivity[0])
del mini_connectivity

###############################################################################
# In general the connectivity between voxels can be more complex, such as
# those between sensors in 3D space, or time-varying activation at brain
# vertices on a cortical surface. MNE provides several convenience functions
# for computing connectivity/neighbor/adjacency matrices, see the
# :ref:`Statistics API <api_reference_statistics>`.
#
# Standard clustering
# ~~~~~~~~~~~~~~~~~~~
# Here, since our data are on a grid, we can use ``connectivity=None`` to
# trigger optimized grid-based code, and run the clustering algorithm.
Exemplo n.º 41
0
def test_agglomerative_clustering():
    # Check that we obtain the correct number of clusters with
    # agglomerative clustering.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    n_samples = 100
    X = rng.randn(n_samples, 50)
    connectivity = grid_to_graph(*mask.shape)
    for linkage in ("ward", "complete", "average"):
        clustering = AgglomerativeClustering(n_clusters=10,
                                             connectivity=connectivity,
                                             linkage=linkage)
        clustering.fit(X)
        # test caching
        try:
            tempdir = mkdtemp()
            clustering = AgglomerativeClustering(
                n_clusters=10, connectivity=connectivity,
                memory=tempdir,
                linkage=linkage)
            clustering.fit(X)
            labels = clustering.labels_
            assert_true(np.size(np.unique(labels)) == 10)
        finally:
            shutil.rmtree(tempdir)
        # Turn caching off now
        clustering = AgglomerativeClustering(
            n_clusters=10, connectivity=connectivity, linkage=linkage)
        # Check that we obtain the same solution with early-stopping of the
        # tree building
        clustering.compute_full_tree = False
        clustering.fit(X)
        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
                                                         labels), 1)
        clustering.connectivity = None
        clustering.fit(X)
        assert_true(np.size(np.unique(clustering.labels_)) == 10)
        # Check that we raise a TypeError on dense matrices
        clustering = AgglomerativeClustering(
            n_clusters=10,
            connectivity=sparse.lil_matrix(
                connectivity.toarray()[:10, :10]),
            linkage=linkage)
        assert_raises(ValueError, clustering.fit, X)

    # Test that using ward with another metric than euclidean raises an
    # exception
    clustering = AgglomerativeClustering(
        n_clusters=10,
        connectivity=connectivity.toarray(),
        affinity="manhattan",
        linkage="ward")
    assert_raises(ValueError, clustering.fit, X)

    # Test using another metric than euclidean works with linkage complete
    for affinity in PAIRED_DISTANCES.keys():
        # Compare our (structured) implementation to scipy
        clustering = AgglomerativeClustering(
            n_clusters=10,
            connectivity=np.ones((n_samples, n_samples)),
            affinity=affinity,
            linkage="complete")
        clustering.fit(X)
        clustering2 = AgglomerativeClustering(
            n_clusters=10,
            connectivity=None,
            affinity=affinity,
            linkage="complete")
        clustering2.fit(X)
        assert_almost_equal(normalized_mutual_info_score(clustering2.labels_,
                                                         clustering.labels_),
                            1)

    # Test that using a distance matrix (affinity = 'precomputed') has same
    # results (with connectivity constraints)
    clustering = AgglomerativeClustering(n_clusters=10,
                                         connectivity=connectivity,
                                         linkage="complete")
    clustering.fit(X)
    X_dist = pairwise_distances(X)
    clustering2 = AgglomerativeClustering(n_clusters=10,
                                          connectivity=connectivity,
                                          affinity='precomputed',
                                          linkage="complete")
    clustering2.fit(X_dist)
    assert_array_equal(clustering.labels_, clustering2.labels_)
Exemplo n.º 42
0
    mkdir(write_dir)

###############################################################################

# Global parameters
n_clusters = 5000

test_set = ['left button press (auditory cue)']
do_soft_threshold = False
nifti_masker = NiftiMasker(mask=mask_image,
                           smoothing_fwhm=False,
                           standardize=False,
                           memory='nilearn_cache')
shape = mask.shape
connectivity = grid_to_graph(n_x=shape[0],
                             n_y=shape[1],
                             n_z=shape[2],
                             mask=mask)

#cross_validation scheme
subject_label = np.repeat(np.arange(n_subjects), len(ref))
cv = ShuffleSplit(n_subjects,
                  n_iter=20,
                  train_size=.9,
                  test_size=.1,
                  random_state=2)


def do_parcel_connectivity(mask, n_clusters, ward):
    # Estimate parcel connectivity
    import scipy.sparse as sps
    n_voxels = mask.sum()
Exemplo n.º 43
0
from sklearn.cluster import AgglomerativeClustering

# generate data
lena = sp.misc.lena()
# downsample the image by a factor of 4
lena = lena[::2, ::2] + \
        lena[1::2, ::2] + \
        lena[::2, 1::2] + \
        lena[1::2, 1::2]

X = np.reshape(lena, (-1, 1))

# define the structure A of the data.
# Pixels connected to their neighbors.

connectivity = grid_to_graph(*lena.shape)

# compute clustering
print('Compute structured hierarchical clustering ...')
st = time.time()
n_clusters = 15  # number of regions
ward = AgglomerativeClustering(n_clusters=n_clusters,
                               linkage='ward',
                               connectivity=connectivity).fit(X)

label = np.reshape(ward.labels_, lena.shape)

print('Elapsed time: ', time.time() - st)
print('Number of pixels: ', label.size)
print('Number of clusters: ', np.unique(label).size)
Exemplo n.º 44
0
    def _raw_fit(self, data):
        """ Fits the parcellation method on this reduced data.

        Data are coming from a base decomposition estimator which computes
        the mask and reduces the dimensionality of images using
        randomized_svd.

        Parameters
        ----------
        data: ndarray
            Shape (n_samples, n_features)

        Returns
        -------
        labels: numpy.ndarray
            Labels to each cluster in the brain.

        connectivity: numpy.ndarray
            voxel-to-voxel connectivity matrix computed from a mask.
            Note that, this attribute is returned only for selected methods
            such as 'ward', 'complete', 'average'.
        """
        valid_methods = self.VALID_METHODS
        if self.method is None:
            raise ValueError("Parcellation method is specified as None. "
                             "Please select one of the method in "
                             "{0}".format(valid_methods))
        if self.method is not None and self.method not in valid_methods:
            raise ValueError("The method you have selected is not implemented "
                             "'{0}'. Valid methods are in {1}".format(
                                 self.method, valid_methods))

        # we delay importing Ward or AgglomerativeClustering and same
        # time import plotting module before that.

        # Because sklearn.cluster imports scipy hierarchy and hierarchy imports
        # matplotlib. So, we force import matplotlib first using our
        # plotting to avoid backend display error with matplotlib
        # happening in Travis
        try:
            from nilearn import plotting
        except Exception:
            pass

        components = MultiPCA._raw_fit(self, data)

        mask_img_ = self.masker_.mask_img_
        if self.verbose:
            print("[{0}] computing {1}".format(self.__class__.__name__,
                                               self.method))

        if self.method == 'kmeans':
            from sklearn.cluster import MiniBatchKMeans
            kmeans = MiniBatchKMeans(n_clusters=self.n_parcels,
                                     init='k-means++',
                                     random_state=self.random_state,
                                     verbose=max(0, self.verbose - 1))
            labels = self._cache(_estimator_fit,
                                 func_memory_level=1)(components.T, kmeans)

        elif self.method == 'rena':
            rena = ReNA(mask_img_,
                        n_clusters=self.n_parcels,
                        scaling=self.scaling,
                        n_iter=self.n_iter,
                        memory=self.memory,
                        memory_level=self.memory_level,
                        verbose=max(0, self.verbose - 1))
            method = 'rena'
            labels = \
                self._cache(_estimator_fit, func_memory_level=1)(components.T,
                                                                 rena, method)

        else:
            mask_ = _safe_get_data(mask_img_).astype(np.bool)
            shape = mask_.shape
            connectivity = image.grid_to_graph(n_x=shape[0],
                                               n_y=shape[1],
                                               n_z=shape[2],
                                               mask=mask_)

            from sklearn.cluster import AgglomerativeClustering

            agglomerative = AgglomerativeClustering(n_clusters=self.n_parcels,
                                                    connectivity=connectivity,
                                                    linkage=self.method,
                                                    memory=self.memory)

            labels = self._cache(_estimator_fit,
                                 func_memory_level=1)(components.T,
                                                      agglomerative)

            self.connectivity_ = connectivity
        # Avoid 0 label
        labels = labels + 1
        unique_labels = np.unique(labels)

        # Check that appropriate number of labels were created
        if len(unique_labels) != self.n_parcels:
            n_parcels_warning = ('The number of generated labels does not '
                                 'match the requested number of parcels.')
            warnings.warn(message=n_parcels_warning,
                          category=UserWarning,
                          stacklevel=3)
        self.labels_img_ = self.masker_.inverse_transform(labels)

        return self
Exemplo n.º 45
0
   brain[labels == i] = 0

# Spatial smoothing to encourage smooth parcels
dim = np.shape(brain)
tc = tc.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc.shape[-1]
for t in np.arange(n_tpts):
    tc[:, :, :, t] = gaussian_filter(tc[:, :, :, t], sigma=1)
tc = tc.reshape((-1, n_tpts))
tc = tc[brain.ravel() == 1, :]

# Functional parcellation with Ward clustering
print("Performing Ward Clustering")
mem = Memory(cachedir='.', verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=n_parcels, connectivity=A.tolil(), memory=mem)
ward.fit(tc.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain==1] = ward.labels_ + 1 # labels start from 0, which is used for background

# Remove single voxels not connected to parcel
#for i in np.unique(template)[1:]:
#    labels, n_labels = label(template == i, structure=np.ones((3,3,3)))
#    if n_labels > 1:
#	for j in np.arange(n_labels):
#	    if np.sum(labels == j + 1) < 10:
#		template[labels == j + 1] = 0

# Saving the template
Exemplo n.º 46
0
def test_permutation_connectivity_equiv():
    """Test cluster level permutations with and without connectivity."""
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return
    rng = np.random.RandomState(0)
    # subjects, time points, spatial points
    n_time = 2
    n_space = 4
    X = rng.randn(6, n_time, n_space)
    # add some significant points
    X[:, :, 0:2] += 10  # span two time points and two spatial points
    X[:, 1, 3] += 20  # span one time point
    max_steps = [1, 1, 1, 2, 1]
    # This will run full algorithm in two ways, then the ST-algorithm in 2 ways
    # All of these should give the same results
    conns = [None,
             grid_to_graph(n_time, n_space),
             grid_to_graph(1, n_space),
             grid_to_graph(1, n_space),
             None]
    stat_map = None
    thresholds = [2, 2, 2, 2, dict(start=0.01, step=1.0)]
    sig_counts = [2, 2, 2, 2, 5]
    stat_fun = partial(ttest_1samp_no_p, sigma=1e-3)

    cs = None
    ps = None
    for thresh, count, max_step, conn in zip(thresholds, sig_counts,
                                             max_steps, conns):
        t, clusters, p, H0 = \
            permutation_cluster_1samp_test(
                X, threshold=thresh, connectivity=conn, n_jobs=2,
                max_step=max_step, stat_fun=stat_fun)
        # make sure our output datatype is correct
        assert_true(isinstance(clusters[0], np.ndarray))
        assert_true(clusters[0].dtype == bool)
        assert_array_equal(clusters[0].shape, X.shape[1:])

        # make sure all comparisons were done; for TFCE, no perm
        # should come up empty
        inds = np.where(p < 0.05)[0]
        assert_equal(len(inds), count)
        if isinstance(thresh, dict):
            assert_equal(len(clusters), n_time * n_space)
            assert_true(np.all(H0 != 0))
            continue
        this_cs = [clusters[ii] for ii in inds]
        this_ps = p[inds]
        this_stat_map = np.zeros((n_time, n_space), dtype=bool)
        for ci, c in enumerate(this_cs):
            if isinstance(c, tuple):
                this_c = np.zeros((n_time, n_space), bool)
                for x, y in zip(c[0], c[1]):
                    this_stat_map[x, y] = True
                    this_c[x, y] = True
                this_cs[ci] = this_c
                c = this_c
            this_stat_map[c] = True
        if cs is None:
            ps = this_ps
            cs = this_cs
        if stat_map is None:
            stat_map = this_stat_map
        assert_array_equal(ps, this_ps)
        assert_true(len(cs) == len(this_cs))
        for c1, c2 in zip(cs, this_cs):
            assert_array_equal(c1, c2)
        assert_array_equal(stat_map, this_stat_map)
def Clustering(image, amount_noise):
    # transforming image to appropriate input for cluster
    flatImg = np.reshape(image, [-1, 3])
    # Using Meanshift algorithm
    # Estimating bandwidth for meanshift algorithm
    bandwidth = estimate_bandwidth(flatImg, quantile=0.1, n_samples=100)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)

    print(". Using MeanShift Algorithm with", amount_noise, "noise.")
    ms.fit(flatImg)
    labels = ms.labels_
    # Finding and displaying the number of clusters
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    print(". Number of estimated clusters using MeanShift: %d" % n_clusters_)
    # Displaying segmented image using MeanShift
    ms_segmentedImg = np.reshape(labels, original_image_shape[:2])
    ms_segmentedImg = label2rgb(ms_segmentedImg) * 255

    cv2.imshow("MeanShift segments", ms_segmentedImg.astype(np.uint8))
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # cv2.imwrite("MeanShiftSegmentedImage.png", ms_segmentedImg)
    print(". Done!")
    print(". Calculating scores")
    CalculatingMetrics(a_image, ms_segmentedImg)

    # Agglomerative clustering algorithm
    x, y, z = original_image.shape
    connectivity = grid_to_graph(n_x=x, n_y=y)
    print(". Using Agglomerative Clustering Algorithm with", amount_noise,
          "noise.")
    ac = AgglomerativeClustering(n_clusters=n_clusters_,
                                 linkage="ward",
                                 connectivity=connectivity)
    ac.fit(flatImg)
    labels = ac.labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    print(" Number of estimated clusters using Agglomerative clustering: %d" %
          n_clusters_)

    # Displaying segmented image using KMeans
    ac_segmentedImg = np.reshape(labels, original_image_shape[:2])
    ac_segmentedImg = label2rgb(ac_segmentedImg) * 255
    cv2.imshow("Agglomerative clustering segmented image", ac_segmentedImg)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # cv2.imwrite("AgglomerativeSegmentedImage.png", ac_segmentedImg)
    print(". Done!")
    print(". Calculating scores")
    CalculatingMetrics(a_image, ac_segmentedImg)

    # KMeans algorithm
    print(". Using KMeans Clustering Algorithm with", amount_noise, "noise.")
    km = KMeans()
    km.fit(flatImg)
    labels = km.labels_
    labels_unique = np.unique(labels)
    n_clusters_ = len(labels_unique)
    print(" Number of estimated clusters using KMeans: %d" % n_clusters_)

    # Displaying segmented image using KMeans
    km_segmentedImg = np.reshape(labels, original_image_shape[:2])
    km_segmentedImg = label2rgb(km_segmentedImg) * 255
    cv2.imshow("KMeans segmented image", km_segmentedImg)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    # cv2.imwrite("KMeansSegmentedImage.png", km_segmentedImg)
    print(". Done!")
    print(". Calculating scores")
    CalculatingMetrics(a_image, km_segmentedImg)
Exemplo n.º 48
0
def superpixel_tSVD(signals,
                    Niter=3,
                    nclusters=5,
                    alpha=0.1,
                    grid_shape=None,
                    min_ncomps = 1,
                    max_ncomps = 100,
                    do_cleanup_label_maps=False,
                    return_components=True):
    approx = []
    sh = signals.shape
    connectivity_ward = None
    if grid_shape is not None:
        connectivity_ward = grid_to_graph(*grid_shape)

    labels = None # just to put this name into outer context
    comps = {}

    if connectivity_ward is None:
        clusterer = clustering_dispatcher_['minibatchkmeans'](nclusters)
        clusterer.batch_size = min(clusterer.batch_size, len(signals))
        if clusterer.init_size is None:
            clusterer.init_size=3*nclusters
        clusterer.init_size = max(3 * nclusters, clusterer.init_size)
    else:
        clusterer = skclust.AgglomerativeClustering(nclusters,connectivity=connectivity_ward)

    for k in (range(Niter)):
        # could also "improve" signals for labeling by smoothing or projection to low-rank spaces
        if nclusters >1 :
            label_signals = signals if k == 0 else np.mean(approx,0)#/i
            labels = clusterer.fit_predict(label_signals)
            if do_cleanup_label_maps:
                labels = cleanup_cluster_map(labels.reshape((len(labels),1)), min_neighbors=2, niter=10).ravel()
        else:
            labels = np.ones(signals.shape,dtype=np.int)
        #alpha = k/Niter
        update_signals = (1-alpha)*signals + alpha*np.mean(approx,0) if k > 0 else signals
        update = np.zeros_like(update_signals)
        comps = {}
        for ll in np.unique(labels):
            group = labels == ll
            u,s,vh = simple_tSVD(signals[group])
            comps[ll] = (u,s,vh)
            app = u @ np.diag(s) @ vh
            update[group] = app
        approx.append(update)

    if return_components:
        Ulist,Slist,Vhlist = [],[],[]
        for ll in comps:
            u,s,vh = comps[ll]
            Slist.append(s)
            ui = np.zeros((sh[0], len(s)))
            ui[labels==ll] = u
            Ulist.append(ui)
            Vhlist.append(vh)

        U = np.hstack(Ulist)
        S = np.concatenate(Slist)
        Vh = np.vstack(Vhlist)
        return U,S,Vh
    else:
        kstart = 1 if Niter > 1 else 0
        approx = np.mean(approx[kstart:],0)
        return approx
def BMA_consensus_cluster_parallel(cfg, remote_path, remote_BOLD_fn, remote_mask_fn, Y, nifti_masker, \
                        num_vox, K_clus, K_clusters, \
                        parc, alpha, prop, nbItRFIR, onsets, durations,\
                        output_sub_parc, rescale=True, averg_bold=False):
    '''
    Performs all steps for one clustering case (Kclus given, number l of the parcellation given)
    remote_path: path on the cluster, where results will be stored
    '''
    import os
    import sys
    sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_BB/Parcellations/")
    sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/")
    sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/Scripts_divers_utiles/Scripts_utiles/")
    sys.path.append('/home/pc174679/local/installations/consensus-cluster-0.6')
    
    from Random_parcellations import random_parcellations, subsample_data_on_time
    from Divers_parcellations_test import *
    
    from RFIR_evaluation_parcellations import JDE_estim, RFIR_estim, clustering_from_RFIR
    
    from Random_parcellations import hrf_roi_to_vox
    from pyhrf.tools._io import remote_copy, remote_mkdir
    from nisl import io
    
    #nifti_masker.mask=remote_mask_fn
    
    # Creation of the necessary paths --> do not do here
    parc_name = 'Subsampled_data_with_' + str(K_clus) + 'clusters' 
    parc_name_clus = parc_name + 'rnd_number_' + str(parc+1)
    
    remote_sub = os.sep.join((remote_path, parc_name))   
    #if not os.path.exists(remote_sub):
        #os.path.exists(remote_sub)
        #print 'remote_sub:', remote_sub
        #os.makedirs(remote_sub)
    remote_sub_parc = os.sep.join((remote_sub,parc_name_clus))   
    #if not os.path.exists(remote_sub_parc):
        #os.makedirs(remote_sub_parc)
    
    output_RFIR_parc = os.sep.join((output_sub_parc,'RFIR_estim'))
    
    ###################################
    ## 1st STEP: SUBSAMPLING
    print '--- Subsample data ---'
    Ysub = subsample_data_on_time(Y, remote_mask_fn, K_clus, alpha, prop, \
                    nifti_masker, rescale=rescale)
    print 'Ysub:', Ysub
    print 'remote_sub_prc:', remote_sub_parc
    Ysub_name = 'Y_sub_'+ str(K_clus) + 'clusters_' + 'rnd_number_' + str(parc+1) +'.nii'
    Ysub_fn = os.sep.join((remote_sub_parc, Ysub_name))
    Ysub_masked = nifti_masker.inverse_transform(Ysub).get_data()
    write_volume(Ysub_masked, Ysub_fn)                        
    
    
    
    ###################################
    ## 2D STEP: RFIR
    print '--- Performs RFIR estimation ---'

    
    remote_RFIR_parc_clus = os.sep.join((remote_sub_parc, 'RFIR_estim'))
    #if not os.path.exists(remote_RFIR_parc):os.makedirs(remote_RFIR_parc)
    #remote_RFIR_parc_clus = os.sep.join((remote_RFIR_parc, parc_name_clus))
    #if not os.path.exists(remote_RFIR_parc_clus):os.makedirs(remote_RFIR_parc_clus)
    
    print '  * output path for RFIR ', remote_RFIR_parc_clus
    print '  * RFIR for subsampling nb ', str(parc+1), ' with ', K_clus, ' clusters' 
    RFIR_estim(nbItRFIR, onsets, durations, Ysub_fn, remote_mask_fn, \
                remote_RFIR_parc, avg_bold=averg_bold) 
                  
    hrf_fn = os.sep.join((remote_RFIR_parc_clus, 'rfir_ehrf.nii'))
    #remote_copy([hrf_fn], remote_host, 
                #remote_user, remote_path)[0]
    
    ###################################
    ## 3D STEP: CLUSTERING FROM RFIR RESULTS
    name_hrf = 'rfir_ehrf.nii'
    
    from pyhrf.tools._io import write_volume, read_volume
    from pyhrf.tools._io import read_volume, write_volume
    import nisl.io as ionisl
    from sklearn.feature_extraction import image
    from sklearn.cluster import WardAgglomeration
    from scipy.spatial.distance import cdist, pdist
    
    hrf_fn = os.sep.join((remote_RFIR_parc_clus,name_hrf))
    hrf=read_volume(hrf_fn)[0]
    hrf_t_fn = add_suffix(hrf_fn, 'transpose')
    #taking only 1st condition to parcellate
    write_volume(hrf[:,:,:,:,0], hrf_t_fn)
    
    nifti_masker = ionisl.NiftiMasker(remote_mask_fn)
    Nm = nifti_masker.fit(hrf_t_fn)
    
    #features: coeff of the HRF
    HRF = Nm.fit_transform(hrf_t_fn)
    
    mask, meta_data = read_volume(remote_mask_fn)
    shape = mask.shape
    connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
            n_z=shape[2], mask=mask)
            
    #features used for clustering
    features = HRF.transpose()

    ward = WardAgglomeration(n_clusters=K_clus, connectivity=connectivity,
                                memory='nisl_cache')
    ward.fit(HRF)
    labels_tot = ward.labels_+1 
        
        
    #Kelbow, Perc_WSS, all_parc_from_RFIR_fns, all_parc_RFIR = \
    #clustering_from_RFIR(K_clusters, remote_RFIR_parc_clus, remote_mask_fn, name_hrf, plots=False)
    #labels_tot = all_parc_RFIR[str(Kelbow)]
    
    #to retrieve clustering with as many clusters as determined in K_clusters
    #labels_tot = all_parc_RFIR[str(K_clus)]
    #Parcellation retrieved: for K=Kelbow
    #clusters_RFIR_fn = all_parc_from_RFIR[str(Kelbow)]
    #clustering_rfir_fn = os.path.join(remote_RFIR_parc_clus, 'output_clustering_elbow.nii')
    #write_volume(read_volume(clusters_RFIR_fn)[0], clustering_rfir_fn, meta_bold)

    #labels_tot = nifti_masker.fit_transform([clusters_RFIR_fn])[0]
    #labels_tot = read_volume(clusters_RFIR_fn)[0]
    
    #labels_name='labels_' + str(int(K_clus)) + '_' + str(parc+1) + '.pck'
    #name_f = os.sep.join((remote_sub_parc, labels_name))
    #pickle_labels=open(name_f, 'w')
    #cPickle.dump(labels_tot,f)
    #pickle_labels.close()
    
    #remote_copy(pickle_labels, remote_user, 
            #remote_host, output_sub_parc)
    
    #################################
    ## Prepare consensus clustering
    print 'Prepare consensus clustering'
    clustcount, totalcount = upd_similarity_matrix(labels_tot)
    print 'results:', clustcount
    
    return clustcount.astype(np.bool)
Exemplo n.º 50
0
def test_cluster_permutation_with_connectivity():
    """Test cluster level permutations with connectivity matrix."""
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return
    condition1_1d, condition2_1d, condition1_2d, condition2_2d = \
        _get_conditions()

    n_pts = condition1_1d.shape[1]
    # we don't care about p-values in any of these, so do fewer permutations
    args = dict(seed=None, max_step=1, exclude=None,
                step_down_p=0, t_power=1, threshold=1.67,
                check_disjoint=False, n_permutations=50)

    did_warn = False
    for X1d, X2d, func, spatio_temporal_func in \
            [(condition1_1d, condition1_2d,
              permutation_cluster_1samp_test,
              spatio_temporal_cluster_1samp_test),
             ([condition1_1d, condition2_1d],
              [condition1_2d, condition2_2d],
              permutation_cluster_test,
              spatio_temporal_cluster_test)]:
        out = func(X1d, **args)
        connectivity = grid_to_graph(1, n_pts)
        out_connectivity = func(X1d, connectivity=connectivity, **args)
        assert_array_equal(out[0], out_connectivity[0])
        for a, b in zip(out_connectivity[1], out[1]):
            assert_array_equal(out[0][a], out[0][b])
            assert_true(np.all(a[b]))

        # test spatio-temporal w/o time connectivity (repeat spatial pattern)
        connectivity_2 = sparse.coo_matrix(
            linalg.block_diag(connectivity.asfptype().todense(),
                              connectivity.asfptype().todense()))

        if isinstance(X1d, list):
            X1d_2 = [np.concatenate((x, x), axis=1) for x in X1d]
        else:
            X1d_2 = np.concatenate((X1d, X1d), axis=1)

        out_connectivity_2 = func(X1d_2, connectivity=connectivity_2, **args)
        # make sure we were operating on the same values
        split = len(out[0])
        assert_array_equal(out[0], out_connectivity_2[0][:split])
        assert_array_equal(out[0], out_connectivity_2[0][split:])

        # make sure we really got 2x the number of original clusters
        n_clust_orig = len(out[1])
        assert_true(len(out_connectivity_2[1]) == 2 * n_clust_orig)

        # Make sure that we got the old ones back
        data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
        data_2 = set([np.sum(out_connectivity_2[0][a]) for a in
                     out_connectivity_2[1][:]])
        assert_true(len(data_1.intersection(data_2)) == len(data_1))

        # now use the other algorithm
        if isinstance(X1d, list):
            X1d_3 = [np.reshape(x, (-1, 2, n_space)) for x in X1d_2]
        else:
            X1d_3 = np.reshape(X1d_2, (-1, 2, n_space))

        out_connectivity_3 = spatio_temporal_func(X1d_3, n_permutations=50,
                                                  connectivity=connectivity,
                                                  max_step=0, threshold=1.67,
                                                  check_disjoint=True)
        # make sure we were operating on the same values
        split = len(out[0])
        assert_array_equal(out[0], out_connectivity_3[0][0])
        assert_array_equal(out[0], out_connectivity_3[0][1])

        # make sure we really got 2x the number of original clusters
        assert_true(len(out_connectivity_3[1]) == 2 * n_clust_orig)

        # Make sure that we got the old ones back
        data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
        data_2 = set([np.sum(out_connectivity_3[0][a[0], a[1]]) for a in
                     out_connectivity_3[1]])
        assert_true(len(data_1.intersection(data_2)) == len(data_1))

        # test new versus old method
        out_connectivity_4 = spatio_temporal_func(X1d_3, n_permutations=50,
                                                  connectivity=connectivity,
                                                  max_step=2, threshold=1.67)
        out_connectivity_5 = spatio_temporal_func(X1d_3, n_permutations=50,
                                                  connectivity=connectivity,
                                                  max_step=1, threshold=1.67)

        # clusters could be in a different order
        sums_4 = [np.sum(out_connectivity_4[0][a])
                  for a in out_connectivity_4[1]]
        sums_5 = [np.sum(out_connectivity_4[0][a])
                  for a in out_connectivity_5[1]]
        sums_4 = np.sort(sums_4)
        sums_5 = np.sort(sums_5)
        assert_array_almost_equal(sums_4, sums_5)

        if not _force_serial:
            assert_raises(ValueError, spatio_temporal_func, X1d_3,
                          n_permutations=1, connectivity=connectivity,
                          max_step=1, threshold=1.67, n_jobs=-1000)

        # not enough TFCE params
        assert_raises(KeyError, spatio_temporal_func, X1d_3,
                      connectivity=connectivity, threshold=dict(me='hello'))

        # too extreme a start threshold
        with warnings.catch_warnings(record=True) as w:
            spatio_temporal_func(X1d_3, connectivity=connectivity,
                                 threshold=dict(start=10, step=1))
        if not did_warn:
            assert_true(len(w) == 1)
            did_warn = True

        # too extreme a start threshold
        assert_raises(ValueError, spatio_temporal_func, X1d_3,
                      connectivity=connectivity, tail=-1,
                      threshold=dict(start=1, step=-1))
        assert_raises(ValueError, spatio_temporal_func, X1d_3,
                      connectivity=connectivity, tail=-1,
                      threshold=dict(start=-1, step=1))

        # wrong type for threshold
        assert_raises(TypeError, spatio_temporal_func, X1d_3,
                      connectivity=connectivity, threshold=[])

        # wrong value for tail
        assert_raises(ValueError, spatio_temporal_func, X1d_3,
                      connectivity=connectivity, tail=2)

        # make sure it actually found a significant point
        out_connectivity_6 = spatio_temporal_func(X1d_3, n_permutations=50,
                                                  connectivity=connectivity,
                                                  max_step=1,
                                                  threshold=dict(start=1,
                                                                 step=1))
        assert_true(np.min(out_connectivity_6[2]) < 0.05)
Exemplo n.º 51
0
def generate_spatial_data(shape=(40, 40),
                          n_subjects=1,
                          n_parcels=1,
                          mask=None,
                          mu=None,
                          sigma1=None,
                          sigma2=None,
                          model='ward',
                          seed=1,
                          smooth=0,
                          jitter=0.,
                          verbose=0):
    """ Generate a dataset

    Parameters
    ==========
    shape: tuple, optional
           dimensions of the spatial model (assuming a grid)
    n_subjects: int, optional, the number of subjects considered
    n_parcels: int, optional, the number of generated parcels
    mask: array of shape (shape), domain-defining binary mask
    mu: array of shape (n_parcels), the mean of the simulated parcels
    sigma1: array of shape (n_parcels),
            the first-level variance of the simulated parcels
    sigma2: array  of shape (n_parcels),
            the second-level variance of the simulated parcels
    model: string, one of ['ward, kmeans'],
           model used to generate the parcellation
    seed: int, optional, random generator seed
    smooth: float optional,
            posterior smoothing of the data
    jitter: float, optional,
            spatial jitter on the positions
    verbose: boolean, optional, verbosity mode

    Returns
    =======
    xyz: array of shape (n_voxels, 3) the coordinates of the spatial data
    label: array of shape (n_voxels) indexes defining the spatial model
    X: array of shape(n_voxels, 1), signal attached to the voxels
    """
    from scipy.ndimage import gaussian_filter
    # Create the spatial model
    if mask is None:
        mask = np.ones(np.prod(shape))
        xyz = np.indices(shape).reshape(len(shape), np.prod(shape)).T
    else:
        xyz = np.vstack(np.where(mask)).T

    if model == 'kmeans':
        spatial_model = KMeans(n_clusters=n_parcels).fit(xyz)
        label = spatial_model.labels_
    elif model == 'ward':
        connectivity = grid_to_graph(*shape, mask=mask).tocsr()
        label = Ward(n_clusters=n_parcels, connectivity=connectivity).fit(
            np.random.randn(mask.sum(), 100)).labels_
        from sklearn import neighbors
        spatial_model = neighbors.KNeighborsClassifier(3)
        spatial_model.fit(xyz, label)
    else:
        raise ValueError('%s Unknown simulation model' % model)

    if jitter > 0:
        labels = [
            spatial_model.predict(xyz +
                                  jitter * np.random.rand(1, xyz.shape[1]))
            for subj in range(n_subjects)
        ]

    X = np.zeros((xyz.shape[0], n_subjects))
    # Generate the functional data
    if mu == None:
        mu = np.zeros(n_parcels)
    if sigma1 == None:
        sigma1 = np.ones(n_parcels)
    if sigma2 == None:
        sigma2 = np.ones(n_parcels)
    beta_ = np.random.randn(n_subjects)

    for k in range(n_parcels):
        if jitter > 0:
            mask = [label_ == k for label_ in labels]
        else:
            mask = [label == k for subj in range(n_subjects)]
        x, subj = generate_data_jitter(mu[k],
                                       sigma1[k],
                                       sigma2[k],
                                       mask,
                                       seed=seed,
                                       beta=beta_ * sigma2[k])

        for n_subj in range(n_subjects):
            X[mask[n_subj], n_subj] = x[subj == n_subj]

    if smooth > 0:  # smooth the data
        for subj in range(n_subjects):
            X[:, subj] = gaussian_filter(np.reshape(X[:, subj], shape),
                                         sigma=smooth).ravel()

    if verbose:
        fig = plt.figure(figsize=(10, 1.5))
        plt.subplot(1, n_subjects + 1, 1)
        plt.imshow(np.reshape(label, shape),
                   interpolation='nearest',
                   cmap=plt.cm.spectral)
        plt.title('Template')
        plt.axis('off')
        for ns in range(n_subjects):
            plt.subplot(1, n_subjects + 1, 2 + ns)
            plt.imshow(np.reshape(X[:, ns], shape), interpolation='nearest')
            plt.title('subject %d' % ns)
            plt.axis('off')
        plt.subplots_adjust(left=.01,
                            bottom=.01,
                            right=.99,
                            top=.99,
                            wspace=.05,
                            hspace=.01)
        fig.set_figheight(1.5)

    return xyz, label, X
# #############################################################################
# Generate data
try:  # SciPy >= 0.16 have face in misc
    from scipy.misc import face
    face = face(gray=True)
except ImportError:
    face = sp.face(gray=True)

# Resize it to 10% of the original size to speed up the processing
face = sp.misc.imresize(face, 0.10) / 255.

X = np.reshape(face, (-1, 1))

# #############################################################################
# Define the structure A of the data. Pixels connected to their neighbors.
connectivity = grid_to_graph(*face.shape)

# #############################################################################
# Compute clustering
print("Compute structured hierarchical clustering...")
st = time.time()
n_clusters = 15  # number of regions
ward = AgglomerativeClustering(n_clusters=n_clusters,
                               linkage='ward',
                               connectivity=connectivity)
ward.fit(X)
label = np.reshape(ward.labels_, face.shape)
print("Elapsed time: ", time.time() - st)
print("Number of pixels: ", label.size)
print("Number of clusters: ", np.unique(label).size)
Exemplo n.º 53
0
def test_cluster_permutation_with_adjacency(numba_conditional):
    """Test cluster level permutations with adjacency matrix."""
    from sklearn.feature_extraction.image import grid_to_graph
    condition1_1d, condition2_1d, condition1_2d, condition2_2d = \
        _get_conditions()

    n_pts = condition1_1d.shape[1]
    # we don't care about p-values in any of these, so do fewer permutations
    args = dict(seed=None, max_step=1, exclude=None, out_type='mask',
                step_down_p=0, t_power=1, threshold=1.67,
                check_disjoint=False, n_permutations=50)

    did_warn = False
    for X1d, X2d, func, spatio_temporal_func in \
            [(condition1_1d, condition1_2d,
              permutation_cluster_1samp_test,
              spatio_temporal_cluster_1samp_test),
             ([condition1_1d, condition2_1d],
              [condition1_2d, condition2_2d],
              permutation_cluster_test,
              spatio_temporal_cluster_test)]:
        out = func(X1d, **args)
        adjacency = grid_to_graph(1, n_pts)
        out_adjacency = func(X1d, adjacency=adjacency, **args)
        assert_array_equal(out[0], out_adjacency[0])
        for a, b in zip(out_adjacency[1], out[1]):
            assert_array_equal(out[0][a], out[0][b])
            assert np.all(a[b])

        # test spatio-temporal w/o time adjacency (repeat spatial pattern)
        adjacency_2 = sparse.coo_matrix(
            linalg.block_diag(adjacency.asfptype().todense(),
                              adjacency.asfptype().todense()))
        # nesting here is time then space:
        adjacency_2a = combine_adjacency(np.eye(2), adjacency)
        assert_array_equal(adjacency_2.toarray().astype(bool),
                           adjacency_2a.toarray().astype(bool))

        if isinstance(X1d, list):
            X1d_2 = [np.concatenate((x, x), axis=1) for x in X1d]
        else:
            X1d_2 = np.concatenate((X1d, X1d), axis=1)

        out_adjacency_2 = func(X1d_2, adjacency=adjacency_2, **args)
        # make sure we were operating on the same values
        split = len(out[0])
        assert_array_equal(out[0], out_adjacency_2[0][:split])
        assert_array_equal(out[0], out_adjacency_2[0][split:])

        # make sure we really got 2x the number of original clusters
        n_clust_orig = len(out[1])
        assert len(out_adjacency_2[1]) == 2 * n_clust_orig

        # Make sure that we got the old ones back
        data_1 = {np.sum(out[0][b[:n_pts]]) for b in out[1]}
        data_2 = {np.sum(out_adjacency_2[0][a]) for a in
                  out_adjacency_2[1][:]}
        assert len(data_1.intersection(data_2)) == len(data_1)

        # now use the other algorithm
        if isinstance(X1d, list):
            X1d_3 = [np.reshape(x, (-1, 2, n_space)) for x in X1d_2]
        else:
            X1d_3 = np.reshape(X1d_2, (-1, 2, n_space))

        out_adjacency_3 = spatio_temporal_func(
            X1d_3, n_permutations=50, adjacency=adjacency,
            max_step=0, threshold=1.67, check_disjoint=True)
        # make sure we were operating on the same values
        split = len(out[0])
        assert_array_equal(out[0], out_adjacency_3[0][0])
        assert_array_equal(out[0], out_adjacency_3[0][1])

        # make sure we really got 2x the number of original clusters
        assert len(out_adjacency_3[1]) == 2 * n_clust_orig

        # Make sure that we got the old ones back
        data_1 = {np.sum(out[0][b[:n_pts]]) for b in out[1]}
        data_2 = {np.sum(out_adjacency_3[0][a[0], a[1]]) for a in
                  out_adjacency_3[1]}
        assert len(data_1.intersection(data_2)) == len(data_1)

        # test new versus old method
        out_adjacency_4 = spatio_temporal_func(
            X1d_3, n_permutations=50, adjacency=adjacency,
            max_step=2, threshold=1.67)
        out_adjacency_5 = spatio_temporal_func(
            X1d_3, n_permutations=50, adjacency=adjacency,
            max_step=1, threshold=1.67)

        # clusters could be in a different order
        sums_4 = [np.sum(out_adjacency_4[0][a])
                  for a in out_adjacency_4[1]]
        sums_5 = [np.sum(out_adjacency_4[0][a])
                  for a in out_adjacency_5[1]]
        sums_4 = np.sort(sums_4)
        sums_5 = np.sort(sums_5)
        assert_array_almost_equal(sums_4, sums_5)

        if not _force_serial:
            pytest.raises(ValueError, spatio_temporal_func, X1d_3,
                          n_permutations=1, adjacency=adjacency,
                          max_step=1, threshold=1.67, n_jobs=-1000)

        # not enough TFCE params
        with pytest.raises(KeyError, match='threshold, if dict, must have'):
            spatio_temporal_func(
                X1d_3, adjacency=adjacency, threshold=dict(me='hello'))

        # too extreme a start threshold
        with pytest.warns(None) as w:
            spatio_temporal_func(X1d_3, adjacency=adjacency,
                                 threshold=dict(start=10, step=1))
        if not did_warn:
            assert len(w) == 1
            did_warn = True

        with pytest.raises(ValueError, match='threshold.*<= 0 for tail == -1'):
            spatio_temporal_func(
                X1d_3, adjacency=adjacency, tail=-1,
                threshold=dict(start=1, step=-1))
        with pytest.warns(RuntimeWarning, match='threshold.* is more extreme'):
            spatio_temporal_func(
                X1d_3, adjacency=adjacency, tail=1,
                threshold=dict(start=100, step=1))
        bad_con = adjacency.todense()
        with pytest.raises(ValueError, match='must be a SciPy sparse matrix'):
            spatio_temporal_func(
                X1d_3, n_permutations=50, adjacency=bad_con,
                max_step=1, threshold=1.67)
        bad_con = adjacency.tocsr()[:-1, :-1].tocoo()
        with pytest.raises(ValueError, match='adjacency.*the correct size'):
            spatio_temporal_func(
                X1d_3, n_permutations=50, adjacency=bad_con,
                max_step=1, threshold=1.67)
        with pytest.raises(TypeError, match='must be a'):
            spatio_temporal_func(
                X1d_3, adjacency=adjacency, threshold=[])
        with pytest.raises(ValueError, match='Invalid value for the \'tail\''):
            with pytest.warns(None):  # sometimes ignoring tail
                spatio_temporal_func(
                    X1d_3, adjacency=adjacency, tail=2)

        # make sure it actually found a significant point
        out_adjacency_6 = spatio_temporal_func(
            X1d_3, n_permutations=50, adjacency=adjacency, max_step=1,
            threshold=dict(start=1, step=1))
        assert np.min(out_adjacency_6[2]) < 0.05

        with pytest.raises(ValueError, match='not compatible'):
            with pytest.warns(RuntimeWarning, match='No clusters'):
                spatio_temporal_func(
                    X1d_3, n_permutations=50, adjacency=adjacency,
                    threshold=1e-3, stat_fun=lambda *x: f_oneway(*x)[:-1],
                    buffer_size=None)
Exemplo n.º 54
0
def test_cluster_permutation_t_test_with_connectivity():
    """Test cluster level permutations T-test with connectivity matrix."""
    try:
        try:
            from sklearn.feature_extraction.image import grid_to_graph
        except ImportError:
            from scikits.learn.feature_extraction.image import grid_to_graph
    except ImportError:
        return

    out = permutation_cluster_1samp_test(condition1_1d, n_permutations=500)
    connectivity = grid_to_graph(1, condition1_1d.shape[1])
    out_connectivity = permutation_cluster_1samp_test(
        condition1_1d, n_permutations=500, connectivity=connectivity)
    assert_array_equal(out[0], out_connectivity[0])
    for a, b in zip(out_connectivity[1], out[1]):
        assert_true(np.sum(out[0][a]) == np.sum(out[0][b]))
        assert_true(np.all(a[b]))

    # test spatio-temporal with no time connectivity (repeat spatial pattern)
    connectivity_2 = sparse.coo_matrix(
        linalg.block_diag(connectivity.asfptype().todense(),
                          connectivity.asfptype().todense()))
    condition1_2 = np.concatenate((condition1_1d, condition1_1d), axis=1)

    out_connectivity_2 = permutation_cluster_1samp_test(
        condition1_2, n_permutations=500, connectivity=connectivity_2)
    # make sure we were operating on the same values
    split = len(out[0])
    assert_array_equal(out[0], out_connectivity_2[0][:split])
    assert_array_equal(out[0], out_connectivity_2[0][split:])

    # make sure we really got 2x the number of original clusters
    n_clust_orig = len(out[1])
    assert_true(len(out_connectivity_2[1]) == 2 * n_clust_orig)

    # Make sure that we got the old ones back
    n_pts = condition1_1d.shape[1]
    data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
    data_2 = set([
        np.sum(out_connectivity_2[0][a[:n_pts]])
        for a in out_connectivity_2[1][:]
    ])
    assert_true(len(data_1.intersection(data_2)) == len(data_1))

    # now use the other algorithm
    condition1_3 = np.reshape(condition1_2, (40, 2, 350))
    out_connectivity_3 = mnestats.spatio_temporal_cluster_1samp_test(
        condition1_3,
        n_permutations=500,
        connectivity=connectivity,
        max_step=0,
        threshold=1.67,
        check_disjoint=True)
    # make sure we were operating on the same values
    split = len(out[0])
    assert_array_equal(out[0], out_connectivity_3[0][0])
    assert_array_equal(out[0], out_connectivity_3[0][1])

    # make sure we really got 2x the number of original clusters
    assert_true(len(out_connectivity_3[1]) == 2 * n_clust_orig)

    # Make sure that we got the old ones back
    data_1 = set([np.sum(out[0][b[:n_pts]]) for b in out[1]])
    data_2 = set([
        np.sum(out_connectivity_3[0][a[0], a[1]])
        for a in out_connectivity_3[1]
    ])
    assert_true(len(data_1.intersection(data_2)) == len(data_1))
Exemplo n.º 55
0
X -= X.mean(axis=0)
X /= X.std(axis=0)

y = np.dot(X, coef.ravel())
noise = np.random.randn(y.shape[0])
noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2)
y += noise_coef * noise  # add noise

###############################################################################
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(len(y), 2)  # cross-validation generator for model selection
ridge = BayesianRidge()
mem = Memory(cachedir='.', verbose=1)

# Ward agglomeration followed by BayesianRidge
A = grid_to_graph(n_x=size, n_y=size)
ward = WardAgglomeration(n_clusters=10,
                         connectivity=A,
                         memory=mem,
                         n_components=1)
clf = Pipeline([('ward', ward), ('ridge', ridge)])
# Select the optimal number of parcels with grid search
clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv)
clf.fit(X, y)  # set the best parameters
coef_ = clf.best_estimator.steps[-1][1].coef_
coef_ = clf.best_estimator.steps[0][1].inverse_transform(coef_)
coef_agglomeration_ = coef_.reshape(size, size)

# Anova univariate feature selection followed by BayesianRidge
f_regression = mem.cache(feature_selection.f_regression)  # caching function
anova = feature_selection.SelectPercentile(f_regression)
Exemplo n.º 56
0
def boo(subject_idx=0, cut_coords=None, n_components=20, n_clusters=2000, memory='nilearn_cache'):

    mem = Memory(cachedir='nilearn_cache')

    # ## Load the data ###################################################

    print("Fetch the data files from Internet")
    haxby_dataset = datasets.fetch_haxby(n_subjects=subject_idx + 1)

    print("Second, load the labels")
    haxby_labels = np.genfromtxt(haxby_dataset.session_target[0],
                                 skip_header=1, usecols=[0],
                                 dtype=basestring)

    # ## Find voxels of interest ##############################################

    print("Load the data.")
    anat_filename = haxby_dataset.anat[subject_idx]
    anat_img = nibabel.load(anat_filename)
    fmri_filename = haxby_dataset.func[subject_idx]
    fmri_raw_img = nibabel.load(fmri_filename)

    print("Build a mask based on the activations.")
    epi_masker = NiftiMasker(mask_strategy='epi', detrend=True, standardize=True)
    epi_masker = mem.cache(epi_masker.fit)(fmri_raw_img)
    plot_roi(epi_masker.mask_img_,
             bg_img=anat_img,
             title='EPI mask',
             cut_coords=cut_coords)

    print("Normalize the (transformed) data")  # zscore per pixel, over examples.
    fmri_masked_vectors = epi_masker.transform(fmri_raw_img)
    fmri_normed_vectors = mem.cache(stats.mstats.zscore)(fmri_masked_vectors, axis=0)
    fmri_normed_img = epi_masker.inverse_transform(fmri_normed_vectors)

    print("Smooth the (spatial) data.")
    fmri_smooth_img = mem.cache(image.smooth_img)(fmri_normed_img, fwhm=7)

    print("Mask the MRI data.")
    masked_fmri_vectors = mem.cache(epi_masker.transform)(fmri_smooth_img)

    # ## Compute mean values based on condition matrix ##########################################
    condition_names = list(np.unique(haxby_labels))
    n_conditions = len(condition_names)
    n_good_voxels = masked_fmri_vectors.shape[1]

    mean_vectors = np.empty((n_conditions, n_good_voxels))
    for ci, condition in enumerate(condition_names):
        condition_vectors = masked_fmri_vectors[haxby_labels == condition, :]
        mean_vectors[ci, :] = condition_vectors.mean(axis=0)

    # ## Use similarity across conditions as the 4th dimension ##########################################
    n_conds = len(condition_names)
    n_compares = n_conds * (n_conds - 1) / 2

    p_vectors = np.zeros((n_compares, masked_fmri_vectors.shape[1]))
    comparison_text = []
    comparison_img = []
    idx = 0
    for i, cond in enumerate(condition_names):
        for j, cond2 in enumerate(condition_names[i+1:]):
            print("Computing ttest for %s vs. %s." % (cond, cond2))
            _, p_vector = stats.ttest_ind(
                masked_fmri_vectors[haxby_labels == cond, :],
                masked_fmri_vectors[haxby_labels == cond2, :],
                axis=0)

            p_vector /= p_vector.max()  # normalize
            p_vector = -np.log10(p_vector)
            p_vector[np.isnan(p_vector)] = 0.
            p_vector[p_vector > 10.] = 10.

            p_img = epi_masker.inverse_transform(p_vector)
            comparison_img.append(p_img)
            comparison_text.append('%s vs. %s' % (cond, cond2))
            p_vectors[idx, :] = p_vector
            idx += 1

    #n_comparisons = n_conditions * (n_conditions-1) / 2
    #similarity_vectors = np.empty((n_good_voxels, n_comparisons))
    #for vi in np.arange(n_good_voxels):
    #    similarity_vectors[vi, :] = pdist(mean_vectors[:, vi])



    # Compute a connectivity matrix (for constraining the clustering)
    mask_data = epi_masker.mask_img_.get_data().astype(np.bool)
    connectivity = sk_image.grid_to_graph(n_x=mask_data.shape[0], n_y=mask_data.shape[1],
                                          n_z=mask_data.shape[2], mask=mask_data)

    # Cluster (#2)

    start = time.time()
    ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory=memory)
    ward.fit(p_vectors)

    print("Ward agglomeration %d clusters: %.2fs" % (
        n_clusters, time.time() - start))

    # Compute an image with one ROI per label, and save to disk
    labels = ward.labels_ + 1    # Avoid 0 label - 0 means mask.
    labels_img = epi_masker.inverse_transform(labels)
    labels_img.to_filename('parcellation.nii')

    # Plot image with len(labels) ROIs, and store
    #   the cut coordinates to reuse for all plots
    #   and the figure for plotting all to a common axis
    first_plot = plot_roi(labels_img, title="Ward parcellation", bg_img=anat_img)
    plt.show()
Exemplo n.º 57
0
    def execute(self, eopatch):
        """
        :param eopatch: Input EOPatch
        :type eopatch: EOPatch
        :return: Transformed EOPatch
        :rtype: EOPatch
        """
        data = self.construct_data(eopatch)

        # Reshapes the data, because AgglomerativeClustering method only takes one dimensional arrays of vectors
        org_shape = data.shape
        data = np.reshape(data, (-1, org_shape[-1]))
        org_length = len(data)

        graph_args = {"n_x": org_shape[0], "n_y": org_shape[1]}
        locations = None

        # All connections to masked pixels are removed
        if self.mask_name is not None:
            mask = eopatch.mask_timeless[self.mask_name].squeeze()
            graph_args["mask"] = mask
            locations = [
                i for i, elem in enumerate(np.ravel(mask)) if elem == 0
            ]
            data = np.delete(data, locations, axis=0)

        # If connectivity is not set, it uses pixel-to-pixel connections
        if not self.connectivity:
            self.connectivity = grid_to_graph(**graph_args)

        model = AgglomerativeClustering(
            distance_threshold=self.distance_threshold,
            affinity=self.affinity,
            linkage=self.linkage,
            connectivity=self.connectivity,
            n_clusters=self.n_clusters,
            compute_full_tree=self.compute_full_tree,
        )

        model.fit(data)
        trimmed_labels = model.labels_
        if self.remove_small > 0:

            # Counts how many pixels covers each cluster
            labels = np.zeros(model.n_clusters_)
            for i in trimmed_labels:
                labels[i] += 1

            # Sets to -1 all pixels corresponding to too small clusters
            for i, no_lab in enumerate(labels):
                if no_lab < self.remove_small:
                    trimmed_labels[trimmed_labels == i] = -1

        # Transforms data back to original shape and setting all masked regions to -1
        if self.mask_name is not None:
            new_data = [-1] * org_length
            for i, val in zip(np.delete(range(org_length), locations),
                              trimmed_labels):
                new_data[i] = val
            trimmed_labels = new_data

        trimmed_labels = np.reshape(trimmed_labels, org_shape[:-1])

        eopatch[FeatureType.DATA_TIMELESS,
                self.new_feature_name] = trimmed_labels[..., np.newaxis]

        return eopatch
signal_mean = 100
signal_sd = 100
noise_sd = 0.01
gaussian_sd = 5
sigma = 1e-3  # sigma for the "hat" method
threshold = -stats.distributions.t.ppf(0.05, n_subjects - 1)
threshold_tfce = dict(start=0, step=0.2)
n_permutations = 1024  # number of clustering permutations (1024 for exact)

###############################################################################
# Construct simulated data
# ------------------------
#
# Make the connectivity matrix just next-neighbor spatially
n_src = width * width
connectivity = grid_to_graph(width, width)

#    For each "subject", make a smoothed noisy signal with a centered peak
rng = np.random.RandomState(42)
X = noise_sd * rng.randn(n_subjects, width, width)
#    Add a signal at the dead center
X[:, width // 2, width // 2] = signal_mean + rng.randn(n_subjects) * signal_sd
#    Spatially smooth with a 2D Gaussian kernel
size = width // 2 - 1
gaussian = np.exp(-(np.arange(-size, size + 1)**2 / float(gaussian_sd**2)))
for si in range(X.shape[0]):
    for ri in range(X.shape[1]):
        X[si, ri, :] = np.convolve(X[si, ri, :], gaussian, 'same')
    for ci in range(X.shape[2]):
        X[si, :, ci] = np.convolve(X[si, :, ci], gaussian, 'same')
Exemplo n.º 59
0
X /= X.std(axis=0)

y = np.dot(X, coef.ravel())
noise = np.random.randn(y.shape[0])
noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2)
y += noise_coef * noise  # add noise

# #############################################################################
# Compute the coefs of a Bayesian Ridge with GridSearch
cv = KFold(2)  # cross-validation generator for model selection
ridge = BayesianRidge()
cachedir = tempfile.mkdtemp()
mem = Memory(cachedir=cachedir, verbose=1)

# Ward agglomeration followed by BayesianRidge
connectivity = grid_to_graph(n_x=size, n_y=size)
ward = FeatureAgglomeration(n_clusters=10,
                            connectivity=connectivity,
                            memory=mem)
clf = Pipeline([('ward', ward), ('ridge', ridge)])
# Select the optimal number of parcels with grid search
clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv)
clf.fit(X, y)  # set the best parameters
coef_ = clf.best_estimator_.steps[-1][1].coef_
coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_)
coef_agglomeration_ = coef_.reshape(size, size)

# Anova univariate feature selection followed by BayesianRidge
f_regression = mem.cache(feature_selection.f_regression)  # caching function
anova = feature_selection.SelectPercentile(f_regression)
clf = Pipeline([('anova', anova), ('ridge', ridge)])
Exemplo n.º 60
0
from sklearn import datasets
import numpy as np
from sklearn.feature_extraction.image import grid_to_graph

digits = datasets.load_digits()
images = digits.images
X = np.reshape(images, (len(images), -1))
print(images[0].shape)
connectivity = grid_to_graph(*images[0].shape)