def cluster_ward(roi, reference, i):
    # from numpy.random import seed
    # seed(i)
    # X, Y = resample(roi.T, reference.T)

    # print "Computing roi ref distances..."
    # distances = pairwise_distances(X.T, Y.T, metric='correlation')
    # scaled_distances = scale(distances, axis=1)

    try:
        distances = load(
            '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_b%d.pkl'
            % i)
        scaled_distances = load(
            '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_scaled_b%d.pkl'
            % i)

        # dump(distances, '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_b%d.pkl' % i)
        # dump(scaled_distances, '/projects/delavega/clustering/results/bootstrap/hierarchical/whole_brain_PCA_dist_min100_scaled_b%d.pkl' % i)

        Z = ward(distances.T)
        Z_scaled = ward(scaled_distances.T)

        dump(
            Z,
            '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_b%d.pkl'
            % i)
        dump(
            Z_scaled,
            '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_scaled_b%d.pkl'
            % i)
    except IOError:
        pass
示例#2
0
def cluster_ward(distances, scaled_distances):
    Z = ward(distances)
    Z_scaled = ward(scaled_distances)

    dump(
        Z,
        '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_full.pkl'
    )
    dump(
        Z_scaled,
        '/projects/delavega/clustering/results/bootstrap/hierarchical/Z_ward_wholebrain_scaled_full.pkl'
    )
def clustering(images, metric="euclidean", t=1.15):
    X = images
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
    X_pca = PCA(n_components=100).fit_transform(X)
    X_dist = ward(pdist(X_pca, metric=metric))
    clusters = fcluster(X_dist, t=t)
    return clusters
示例#4
0
def hierarchicalClustering(g,k, labels, max_affinity=None):
    '''
    Performs hierarchical clustering using the connections in graph g. Edge weights
    are assumed to be affinity, thus higher weights means the nodes are more similar.
    Computes a distance matrix from the graph affinities, and clusters using
    the 'fastcluster' library implementation of ward's linkage.
    @param g: The graph as from generateConnectivityGraph
    @param k: Number of clusters
    @param labels: The ground truth labels used for measuring cluster accuracy
    @param max_affinity: The maximum similarity score that is possible on the graph.
    If None, then the max edge weight of the graph is used.
    @return: A tuple (clusts, score) where clusts is the ordered list of cluster
    indexes and score is the v-measure between clusts and labels.
    '''
    M = generateAffinityMatrix(g, max_affinity=max_affinity)
    if max_affinity is None:
        max_affinity = M.max()
        
    D = max_affinity - M
    
    Z = fc.ward(D) #linkage structure Z
    clusts = spc.fcluster(Z, k, criterion="maxclust")    
    try:
        score = sklearn.metrics.v_measure_score(labels, clusts)
    except:
        print "Warning: sklearn module not loaded. V_measure_score not computed."
        score = -1
    
    clusts = clusts - 1 #convert from 1-based to 0-based indexes
    return (clusts,score) 
示例#5
0
def cluster_ward(dataset, distances, roi, regions):
    print "Clustering: "

    Z = ward(distances)

    results = []
    for n_reg in regions:
        labels = fcluster(Z, n_reg, 'maxclust')

        ### Try shortening this
        header = dataset.masker.get_header()
        header['cal_max'] = labels.max()
        header['cal_min'] = labels.min()
        voxel_labels = roi.masker.unmask(labels)
        img = nifti1.Nifti1Image(voxel_labels, None, header)
        results.append(img)

    return results
示例#6
0
def hierarchicalClusteringDendrogram(g, max_affinity=None,show_dendrogram=False):
    '''
    Generates the Ward's linkage structure on the connections in graph g. This
    function works the same as hierarchicalClustering(), but instead of returning
    the cluster membership for a given K, it returns the linkage structure and
    optionally shows the dendrogram.
    @param g: The graph as from generateConnectivityGraph
    @param max_affinity: The maximum similarity score that is possible on the graph.
    If None, then the max edge weight of the graph is used.
    @return: Z, the linkage structure
    '''
    M = generateAffinityMatrix(g, max_affinity=max_affinity)
    if max_affinity is None:
        max_affinity = M.max()
        
    D = max_affinity - M
    Z = fc.ward(D) #linkage structure Z
    if show_dendrogram:
        import pylab
        fig = pylab.figure()
        spc.dendrogram(Z)
        fig.show()

    return Z
def compute_clustering_fast(distance):
    t1 = time.clock()
    c = fastcluster.ward(distance)
    t2 = time.clock()
    return scipy.cluster.hierarchy.fcluster(c, 2, criterion="maxclust")
from neurosynth.base.dataset import Dataset
import joblib
from sklearn.metrics import pairwise_distances
from sklearn.preprocessing import scale
from neurosynth.analysis.cluster import Clusterable
dataset = Dataset.load('/projects/delavega/dbs/db_v6_topics-100.pkl')
from fastcluster import ward

roi = Clusterable(
    dataset, '/home/delavega/projects/classification/masks/l_70_mask.nii.gz')

saved_pca = '/projects/delavega/clustering/dv_v6_reference_pca.pkl'
reference = joblib.load(saved_pca)

distances = pairwise_distances(roi.data, reference.data, metric='correlation')
distances = scale(distances, axis=1)

joblib.dump(
    distances,
    '/home/delavega/projects/clustering/results/hierarchical/v6_distances_l_70_scaled.pkl'
)

Z = ward(distances)

joblib.dump(
    Z,
    '/home/delavega/projects/clustering/results/hierarchical/v6_ward_l70_scaled.pkl'
)
示例#9
0
import pandas as pd
df = pd.read_csv("~/downloads/to_cluster.csv")
import fastcluster as fc
out = fc.ward(df)