def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) if hasattr(np, 'VisibleDeprecationWarning'): # Let's not catch the numpy internal DeprecationWarnings warnings.simplefilter('ignore', np.VisibleDeprecationWarning) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert_equal(len(warning_list), 1) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def ward_adni_rs_fmri(func_files, n_clusters=200): masker = NiftiMasker(mask_strategy='epi', mask_args=dict(opening=1)) masker.fit(func_files) func_masked = masker.transform(func_files) #func_masked = masker.transform_niimgs(func_files, n_jobs=4) func_masked = np.vstack(func_masked) ########################################################################### # Ward ########################################################################### mask = masker.mask_img_.get_data().astype(np.bool) shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory='nilearn_cache') ward.fit(func_masked) ward_labels_unique = np.unique(ward.labels_) ward_labels = ward.labels_ ward_filename = '_'.join(['ward', str(n_clusters)]) img_ward = masker.inverse_transform(ward.labels_) img_ward.to_filename(os.path.join(CACHE_DIR, ward_filename))
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with ignore_warnings(): ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rng.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with ignore_warnings(): ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100): # data preparation Z = nifti_masker.fit_transform(imgs) pca = RandomizedPCA(n_components=n_components) Z_ = pca.fit_transform(Z.T).T ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory='nilearn_cache').fit(Z_) W = ward.transform(Z) del Z # data cube is a more convenient representation cube = np.array([W[subject_label == subject] for subject in np.arange(n_subjects)]) # parcel connectivity parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward) return cube, ward, parcel_connectivity
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100): # data preparation Z = nifti_masker.fit_transform(imgs) pca = RandomizedPCA(n_components=n_components) Z_ = pca.fit_transform(Z.T).T ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory='nilearn_cache').fit(Z_) W = ward.transform(Z) del Z # data cube is a more convenient representation cube = np.array( [W[subject_label == subject] for subject in np.arange(n_subjects)]) # parcel connectivity parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward) return cube, ward, parcel_connectivity
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert_true(np.size(np.unique(ward.labels_)) == 5) Xred = ward.transform(X) assert_true(Xred.shape[1] == 5) Xfull = ward.inverse_transform(Xred) assert_true(np.unique(Xfull[0]).size == 5)
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert_true(np.size(np.unique(ward.labels_)) == 5) Xred = ward.transform(X) assert_true(Xred.shape[1] == 5) Xfull = ward.inverse_transform(Xred) assert_true(np.unique(Xfull[0]).size == 5) assert_array_almost_equal(ward.transform(Xfull), Xred)
# Mask data epi_masked = epi_img[mask] ### Ward ###################################################################### # Compute connectivity matrix from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... from sklearn.cluster import WardAgglomeration import time start = time.time() ward = WardAgglomeration(n_clusters=500, connectivity=connectivity, memory='nisl_cache') ward.fit(epi_masked.T) print "Ward agglomeration 500 clusters: %.2fs" % (time.time() - start) # Compute the ward with more clusters, should be faster start = time.time() ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity, memory='nisl_cache') ward.fit(epi_masked.T) print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start) ### Prepare output ############################################################ ### Show result ############################################################### from matplotlib import pyplot as pl
### Ward ###################################################################### # Compute connectivity matrix: which voxel is connected to which from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... from sklearn.cluster import WardAgglomeration import time start = time.time() ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory='nilearn_cache', compute_full_tree=True) ward.fit(fmri_masked) print "Ward agglomeration %d clusters: %.2fs" % (n_clusters, time.time() - start) labels = ward.labels_ + 1 labels = nifti_masker.inverse_transform(labels).get_data() # 0 is the background, putting it to -1 labels = labels - 1 # Display the labels plot_labels(labels, 8) pl.savefig('ward.eps') pl.savefig('ward.pdf')
""" Test various n_clusters """ for N_CLUSTERS in N_CLUSTERS_SET: ############################################################################## # Ward ############################################################################## mask = masker.mask_img_.get_data().astype(np.bool) shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity, memory='nilearn_cache') ward.fit(pet_data_masked) ward_labels_unique = np.unique(ward.labels_) ward_labels = ward.labels_ ############################################################################## # Generate cluster matrix ############################################################################## x = np.zeros((len(data), N_CLUSTERS)) for idx in np.arange(len(data)): for val in ward_labels_unique : ind = (ward_labels == val) x[idx, val] = np.mean(pet_data_masked[idx, ind])
masker = NiftiMasker(mask_strategy='epi', mask_args=dict(opening=8)) masker.fit(pet_files) pet_masked = masker.transform_niimgs(pet_files, n_jobs=2) #pet_masked = np.vstack(pet_masked) mask = masker.mask_img_.get_data().astype(np.bool) shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... start = time.time() ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity, memory='nilearn_cache') ward.fit(pet_masked[0]) print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start) labels = ward.labels_ + 1 labels_img = masker.inverse_transform(labels) first_plot = plot_roi(labels_img, pet_img[0], title="Ward parcellation", display_mode='xz') # labels_img is a Nifti1Image object, it can be saved to file with the # following code: labels_img.to_filename('parcellation.nii') """ ##################################################################
# Perform parcellation on smoothed PCA-ed timecourses for each ROI mem = Memory(cachedir=".", verbose=1) n_clust = np.zeros(n_rois) # Different #clusters for different ROI template = np.zeros((dim[0], dim[1], dim[2])) print ("Performing Ward Clustering") for i in np.arange(n_rois): # Determine the number of clusters to divide each ROI into roi_mask = brain == rois[i] n_clust[i] = np.round(np.sum(roi_mask) * n_parcels / n_vox) if n_clust[i] <= 1: template[roi_mask] = np.shape(np.unique(template))[0] else: # Define connectivity based on brain mask A = grid_to_graph(n_x=dim[0], n_y=dim[1], n_z=dim[2], mask=roi_mask) # Create ward object ward = WardAgglomeration(n_clusters=n_clust[i], connectivity=A.tolil(), memory=mem) ward.fit(tc_group[roi_mask.ravel(), :].T) template[roi_mask] = ward.labels_ + np.shape(np.unique(template))[0] # Remove parcels with zero timecourses in any of the subjects template = template.ravel() template_refined = template.copy() label = np.unique(template) for sub in subList: print str("Subject" + sub) # Load preprocessed voxel timecourses tc = io.loadmat(os.path.join(BASE_DIR, sub, "restfMRI/tc_vox.mat")) tc = tc["tc"] # Generate subject-specific tissue mask gm_file = os.path.join(BASE_DIR, sub, "anat", "gmMask.nii")
if __name__ == '__main__': memory = Memory('/havoc/cache', mmap_mode='r+') le = LabelEncoder() lb = LabelBinarizer() loader = NiftiMasker(mask='/tmp/mask.nii.gz', memory=memory, memory_level=1) reporter = Reporter(report_dir='/tmp/reporter') cv = ShuffleSplit(len(target), n_iter=5) Cs = [1e-3, 1e-2, 1e-1, 1., 10, 1e2, 1e3] scaler = StandardScaler() n_x, n_y, n_z = mask.shape connectivity = grid_to_graph(n_x, n_y, n_z, mask=mask_array) ward = WardAgglomeration(n_clusters=2000, connectivity=connectivity, memory=memory) svc = LinearSVC(penalty='l1', dual=False) # rand_svc = RandomizedWardClassifier(mask_array, n_iter=16, # memory=memory, n_jobs=-1) pipe = Pipeline([('scaler', scaler), ('clf', svc)]) grid = GridSearchCV(pipe, param_grid={'clf__C': Cs}, cv=cv, n_jobs=1) grid.best_estimator_ = grid.estimator ovr = OneVsRestClassifier(grid, n_jobs=1) # decoder = Decoder(ovr, loader, lb, reporter) # decoder.fit(niimgs, target).score(niimgs, target) # pipeline = Pipeline([('scaler', scaler), ('clf', clf)]) decoder = Decoder(ovr, loader, lb, reporter)
# Perform parcellation on smoothed PCA-ed timecourses for each ROI mem = Memory(cachedir='.', verbose=1) n_clust = np.zeros(n_rois) # Different #clusters for different ROI template = np.zeros((dim[0], dim[1], dim[2])) print("Performing Ward Clustering") for i in np.arange(n_rois): # Determine the number of clusters to divide each ROI into roi_mask = brain == rois[i] n_clust[i] = np.round(np.sum(roi_mask) * n_parcels / n_vox) if n_clust[i] <= 1: template[roi_mask] = np.shape(np.unique(template))[0] else: # Define connectivity based on brain mask A = grid_to_graph(n_x=dim[0], n_y=dim[1], n_z=dim[2], mask=roi_mask) # Create ward object ward = WardAgglomeration(n_clusters=n_clust[i], connectivity=A.tolil(), memory=mem) ward.fit(tc_group[roi_mask.ravel(), :].T) template[roi_mask] = ward.labels_ + np.shape(np.unique(template))[0] # Run relabel_disconnected_parcel.py # Saving the template io.savemat(os.path.join(BASE_DIR, "group/ica_roi_parcel150.mat"), {"template":template}) nii = nib.Nifti1Image(template, brain_img.affine) nib.save(nii, os.path.join(BASE_DIR, "group/ica_roi_parcel150.nii")) # Remove parcels with zero timecourses in any of the subjects template = template.ravel() template_refined = template.copy() label = np.unique(template) for sub in subList:
# Spatial smoothing to encourage smooth parcels dim = np.shape(brain) tc = tc.reshape((dim[0], dim[1], dim[2], -1)) n_tpts = tc.shape[-1] for t in np.arange(n_tpts): tc[:, :, :, t] = gaussian_filter(tc[:, :, :, t], sigma=1) tc = tc.reshape((-1, n_tpts)) tc = tc[brain.ravel() == 1, :] # Functional parcellation with Ward clustering print("Performing Ward Clustering") mem = Memory(cachedir='.', verbose=1) # Define connectivity based on brain mask A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain) # Create ward object ward = WardAgglomeration(n_clusters=n_parcels, connectivity=A.tolil(), memory=mem) ward.fit(tc.T) template = np.zeros((dim[0], dim[1], dim[2])) template[brain==1] = ward.labels_ + 1 # labels start from 0, which is used for background # Remove single voxels not connected to parcel #for i in np.unique(template)[1:]: # labels, n_labels = label(template == i, structure=np.ones((3,3,3))) # if n_labels > 1: # for j in np.arange(n_labels): # if np.sum(labels == j + 1) < 10: # template[labels == j + 1] = 0 # Saving the template nii = nib.Nifti1Image(template, brain_img.affine) nib.save(nii, PARCEL_PATH)
elif folding == 'leaveoneout': n_folds[0] = y.shape[0] cv = LeaveOneOut(n=y.shape[0]) else: print("unknown crossvalidation method!") #-- classifier clf = svm.SVC(kernel='linear', probability=True, C=svm_C) #-- normalizer scaler = Scaler() #-- Clustering n_clusters = 100 cluster = WardAgglomeration(n_clusters=n_clusters, connectivity=None, compute_full_tree='auto') #-- feature selection fs_n = round(n_features * fs_n) / n_features if fs_n == 100: fs = SelectKBest(f_classif, k=n_features) else: fs = SelectPercentile(f_classif, percentile=fs_n) #-- results initialization if compute_predict: predict = np.zeros([n_splits, n_samples, n_dims, n_dims_tg])**np.nan predictg = np.zeros([n_splits, n_samplesg, n_dimsg, n_dimsg_tg, n_folds])**np.nan else:
pl.axis('off') # Compute connectivity matrix: which voxel is connected to which from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) for n_clusters in 100, 1000: # Compute Ward clustering from sklearn.cluster import WardAgglomeration ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory='nilearn_cache', compute_full_tree=True) ward.fit(X) labels = ward.labels_ + 1 labels = masking.unmask(labels, adhd_mask) # 0 is the background, putting it to -1 labels = labels - 1 # Display the labels plot_labels(labels, 8) pl.savefig(join('clustering', 'ward_%i.eps' % n_clusters)) pl.savefig(join('clustering', 'ward_%i.pdf' % n_clusters)) # Compute Kmeans clustering from sklearn.cluster import MiniBatchKMeans
def BMA_consensus_cluster_parallel(cfg, remote_path, remote_BOLD_fn, remote_mask_fn, Y, nifti_masker, \ num_vox, K_clus, K_clusters, \ parc, alpha, prop, nbItRFIR, onsets, durations,\ output_sub_parc, rescale=True, averg_bold=False): ''' Performs all steps for one clustering case (Kclus given, number l of the parcellation given) remote_path: path on the cluster, where results will be stored ''' import os import sys sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_BB/Parcellations/") sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/") sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/Scripts_divers_utiles/Scripts_utiles/") sys.path.append('/home/pc174679/local/installations/consensus-cluster-0.6') from Random_parcellations import random_parcellations, subsample_data_on_time from Divers_parcellations_test import * from RFIR_evaluation_parcellations import JDE_estim, RFIR_estim, clustering_from_RFIR from Random_parcellations import hrf_roi_to_vox from pyhrf.tools._io import remote_copy, remote_mkdir from nisl import io #nifti_masker.mask=remote_mask_fn # Creation of the necessary paths --> do not do here parc_name = 'Subsampled_data_with_' + str(K_clus) + 'clusters' parc_name_clus = parc_name + 'rnd_number_' + str(parc+1) remote_sub = os.sep.join((remote_path, parc_name)) #if not os.path.exists(remote_sub): #os.path.exists(remote_sub) #print 'remote_sub:', remote_sub #os.makedirs(remote_sub) remote_sub_parc = os.sep.join((remote_sub,parc_name_clus)) #if not os.path.exists(remote_sub_parc): #os.makedirs(remote_sub_parc) output_RFIR_parc = os.sep.join((output_sub_parc,'RFIR_estim')) ################################### ## 1st STEP: SUBSAMPLING print '--- Subsample data ---' Ysub = subsample_data_on_time(Y, remote_mask_fn, K_clus, alpha, prop, \ nifti_masker, rescale=rescale) print 'Ysub:', Ysub print 'remote_sub_prc:', remote_sub_parc Ysub_name = 'Y_sub_'+ str(K_clus) + 'clusters_' + 'rnd_number_' + str(parc+1) +'.nii' Ysub_fn = os.sep.join((remote_sub_parc, Ysub_name)) Ysub_masked = nifti_masker.inverse_transform(Ysub).get_data() write_volume(Ysub_masked, Ysub_fn) ################################### ## 2D STEP: RFIR print '--- Performs RFIR estimation ---' remote_RFIR_parc_clus = os.sep.join((remote_sub_parc, 'RFIR_estim')) #if not os.path.exists(remote_RFIR_parc):os.makedirs(remote_RFIR_parc) #remote_RFIR_parc_clus = os.sep.join((remote_RFIR_parc, parc_name_clus)) #if not os.path.exists(remote_RFIR_parc_clus):os.makedirs(remote_RFIR_parc_clus) print ' * output path for RFIR ', remote_RFIR_parc_clus print ' * RFIR for subsampling nb ', str(parc+1), ' with ', K_clus, ' clusters' RFIR_estim(nbItRFIR, onsets, durations, Ysub_fn, remote_mask_fn, \ remote_RFIR_parc, avg_bold=averg_bold) hrf_fn = os.sep.join((remote_RFIR_parc_clus, 'rfir_ehrf.nii')) #remote_copy([hrf_fn], remote_host, #remote_user, remote_path)[0] ################################### ## 3D STEP: CLUSTERING FROM RFIR RESULTS name_hrf = 'rfir_ehrf.nii' from pyhrf.tools._io import write_volume, read_volume from pyhrf.tools._io import read_volume, write_volume import nisl.io as ionisl from sklearn.feature_extraction import image from sklearn.cluster import WardAgglomeration from scipy.spatial.distance import cdist, pdist hrf_fn = os.sep.join((remote_RFIR_parc_clus,name_hrf)) hrf=read_volume(hrf_fn)[0] hrf_t_fn = add_suffix(hrf_fn, 'transpose') #taking only 1st condition to parcellate write_volume(hrf[:,:,:,:,0], hrf_t_fn) nifti_masker = ionisl.NiftiMasker(remote_mask_fn) Nm = nifti_masker.fit(hrf_t_fn) #features: coeff of the HRF HRF = Nm.fit_transform(hrf_t_fn) mask, meta_data = read_volume(remote_mask_fn) shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) #features used for clustering features = HRF.transpose() ward = WardAgglomeration(n_clusters=K_clus, connectivity=connectivity, memory='nisl_cache') ward.fit(HRF) labels_tot = ward.labels_+1 #Kelbow, Perc_WSS, all_parc_from_RFIR_fns, all_parc_RFIR = \ #clustering_from_RFIR(K_clusters, remote_RFIR_parc_clus, remote_mask_fn, name_hrf, plots=False) #labels_tot = all_parc_RFIR[str(Kelbow)] #to retrieve clustering with as many clusters as determined in K_clusters #labels_tot = all_parc_RFIR[str(K_clus)] #Parcellation retrieved: for K=Kelbow #clusters_RFIR_fn = all_parc_from_RFIR[str(Kelbow)] #clustering_rfir_fn = os.path.join(remote_RFIR_parc_clus, 'output_clustering_elbow.nii') #write_volume(read_volume(clusters_RFIR_fn)[0], clustering_rfir_fn, meta_bold) #labels_tot = nifti_masker.fit_transform([clusters_RFIR_fn])[0] #labels_tot = read_volume(clusters_RFIR_fn)[0] #labels_name='labels_' + str(int(K_clus)) + '_' + str(parc+1) + '.pck' #name_f = os.sep.join((remote_sub_parc, labels_name)) #pickle_labels=open(name_f, 'w') #cPickle.dump(labels_tot,f) #pickle_labels.close() #remote_copy(pickle_labels, remote_user, #remote_host, output_sub_parc) ################################# ## Prepare consensus clustering print 'Prepare consensus clustering' clustcount, totalcount = upd_similarity_matrix(labels_tot) print 'results:', clustcount return clustcount.astype(np.bool)
#io.savemat(os.path.join(BASE_DIR, "group/tc_rest_pca_vox.mat"), {"tc_group": tc_group}) # Perform parcellation on PCA-ed timecourses brain_img = as_volume_img("/volatile/bernardng/templates/spm8/rgrey.nii") brain = brain_img.get_data() dim = np.shape(brain) brain = brain > 0.2 # Generate brain mask brain = mask_utils.largest_cc(brain) mem = Memory(cachedir='.', verbose=1) # Define connectivity based on brain mask A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain) # Create ward object ward = WardAgglomeration(n_clusters=500, connectivity=A, memory=mem) tc_group = tc_group.reshape((dim[0], dim[1], dim[2], -1)) n_tpts = tc_group.shape[-1] for t in np.arange(n_tpts): tc_group[:, :, :, t] = gaussian_filter(tc_group[:, :, :, t], sigma=5) tc_group = tc_group.reshape((-1, n_tpts)) tc_group = tc_group[brain.ravel() == 1, :] print("Performing Ward Clustering") ward.fit(tc_group.T) template = np.zeros((dim[0], dim[1], dim[2])) template[brain == 1] = ward.labels_ + 1 # Previously processed data did not include +1 # Remove parcels with zero timecourses in any of the subjects template = template.ravel() template_refined = template.copy()
pl.figure(figsize=(3.8, 4.5)) pl.axes([0, 0, 1, 1]) pl.imshow(colors[np.rot90(cut)], interpolation='nearest') pl.axis('off') # Compute connectivity matrix: which voxel is connected to which from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) for n_clusters in 100, 1000: # Compute Ward clustering from sklearn.cluster import WardAgglomeration ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory='nilearn_cache', compute_full_tree=True) ward.fit(X) labels = ward.labels_ + 1 labels = masking.unmask(labels, adhd_mask) # 0 is the background, putting it to -1 labels = labels - 1 # Display the labels plot_labels(labels, 8) pl.savefig(join('clustering', 'ward_%i.eps' % n_clusters)) pl.savefig(join('clustering', 'ward_%i.pdf' % n_clusters)) # Compute Kmeans clustering from sklearn.cluster import MiniBatchKMeans
# Spatial smoothing to encourage smooth parcels dim = np.shape(brain) tc = tc.reshape((dim[0], dim[1], dim[2], -1)) n_tpts = tc.shape[-1] for t in np.arange(n_tpts): tc[:, :, :, t] = gaussian_filter(tc[:, :, :, t], sigma=1) tc = tc.reshape((-1, n_tpts)) tc = tc[brain.ravel() == 1, :] # Functional parcellation with Ward clustering print("Performing Ward Clustering") mem = Memory(cachedir=".", verbose=1) # Define connectivity based on brain mask A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain) # Create ward object ward = WardAgglomeration(n_clusters=n_parcels, connectivity=A.tolil(), memory=mem) ward.fit(tc.T) template = np.zeros((dim[0], dim[1], dim[2])) template[brain == 1] = ward.labels_ + 1 # labels start from 0, which is used for background # Remove single voxels not connected to parcel # for i in np.unique(template)[1:]: # labels, n_labels = label(template == i, structure=np.ones((3,3,3))) # if n_labels > 1: # for j in np.arange(n_labels): # if np.sum(labels == j + 1) < 10: # template[labels == j + 1] = 0 # Saving the template nii = nib.Nifti1Image(template, brain_img.affine) nib.save(nii, PARCEL_PATH)
if types[ai] == bool: hemi = (hemi <= 0.).astype(float) - 0.5 hemi = factors[ai] * hemi X = np.append(X.T, [hemi], axis=0).T X = np.append(X[:, 3:].T, fmri_masked, axis=0).T print(X.shape) # Compute a connectivity matrix (for constraining the clustering) connectivity = sk_image.grid_to_graph(n_x=mask.shape[0], n_y=mask.shape[1], n_z=mask.shape[2], mask=mask) # Cluster (#2) start = time.time() ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory=memory) ward.fit(X.T) print("Ward agglomeration %d clusters: %.2fs" % (n_clusters, time.time() - start)) # Compute an image with one ROI per label, and save to disk labels = ward.labels_ + 1 # Avoid 0 label - 0 means mask. labels_img = nifti_masker.inverse_transform(labels) labels_img.to_filename('parcellation.nii') # Plot image with len(labels) ROIs, and store # the cut coordinates to reuse for all plots # and the figure for plotting all to a common axis if 1 in plots:
def feature_extractor(imgfile, maskfile, featurefile, maskerfile, wardfile, nclusters=[1000,], selectfile=None, targetfile=None, metafile=None, cachefile=None): resultdict = {"imgfile":imgfile, "maskfile":maskfile} # load data print "--loading data" nifti_masker = input_data.NiftiMasker(mask=maskfile, memory=cachefile, memory_level=1, standardize=False) fmri_masked = nifti_masker.fit_transform(imgfile) print "--getting mask" mask = nifti_masker.mask_img_.get_data().astype(np.bool) # saveit joblib.dump(nifti_masker, maskerfile) resultdict["mask"] = mask resultdict["Xmask"] = fmri_masked resultdict["maskerfile"] = maskerfile # get connectivity print "--getting connectivity" shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # saveit resultdict["connectivity"] = connectivity print "--save main file" np.savez(featurefile+"_main.npz", **resultdict) # run ward y = np.load(targetfile)["ymap"] meta = np.load(metafile) train = meta["train"] test = meta["test"] ncv = meta['ycv'] # for each cv set for cvx in range(ncv): trainidx = train[cvx] testidx = test[cvx] resultdict = {} wardfiles = [] selectfiles = [] print "--Running ward %d"%(cvx, ) for ix, nc in enumerate(nclusters): ward = WardAgglomeration(n_clusters=nc, connectivity=connectivity, memory=cachefile) ward.fit(fmri_masked[trainidx]) fmri_reduced_train = ward.transform(fmri_masked[trainidx]) fmri_reduced_test = ward.transform(fmri_masked[testidx]) # saveit subwardfile = wardfile+"_D%d_cv%d.pkl"%(nc, cvx,) joblib.dump(ward, subwardfile) resultdict["Xward_%d_train"%(nc,)] = fmri_reduced_train resultdict["Xward_%d_test"%(nc,)] = fmri_reduced_test wardfiles.append(subwardfile) # additional feature selection selector = SelectPercentile(f_classif, percentile=30) selector.fit(fmri_reduced_train, y[trainidx]) fmri_select_train = selector.transform(fmri_reduced_train) fmri_select_test = selector.transform(fmri_reduced_test) # saveit subselectfile = selectfile+"_D%d_cv%d.pkl"%(nc, cvx,) joblib.dump(selector, subselectfile) resultdict["Xselect_%d_train"%(nc,)] = fmri_select_train resultdict["Xselect_%d_test"%(nc,)] = fmri_select_test selectfiles.append(subselectfile) resultdict["wardfiles"] = wardfiles resultdict["selectfiles"] = selectfiles # save results print "--save cv result" np.savez(featurefile+"_cv%d.npz"%(cvx, ), **resultdict)
### Mask ###################################################################### fmri_data = dataset.func[0] # Compute a brain mask from nisl import masking mask = masking.compute_mask(fmri_data) # Mask data: go from a 4D dataset to a 2D dataset with only the voxels # in the mask fmri_masked = fmri_data[mask] ### Ward ###################################################################### # Compute connectivity matrix: which voxel is connected to which from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... from sklearn.cluster import WardAgglomeration import time start = time.time() ward = WardAgglomeration(n_clusters=5000, connectivity=connectivity) # memory='nisl_cache') ward.fit(fmri_masked.T) print "Ward agglomeration 500 clusters: %.2fs" % (time.time() - start)
def boo(subject_idx=0, cut_coords=None, n_components=20, n_clusters=2000, memory='nilearn_cache'): mem = Memory(cachedir='nilearn_cache') # ## Load the data ################################################### print("Fetch the data files from Internet") haxby_dataset = datasets.fetch_haxby(n_subjects=subject_idx + 1) print("Second, load the labels") haxby_labels = np.genfromtxt(haxby_dataset.session_target[0], skip_header=1, usecols=[0], dtype=basestring) # ## Find voxels of interest ############################################## print("Load the data.") anat_filename = haxby_dataset.anat[subject_idx] anat_img = nibabel.load(anat_filename) fmri_filename = haxby_dataset.func[subject_idx] fmri_raw_img = nibabel.load(fmri_filename) print("Build a mask based on the activations.") epi_masker = NiftiMasker(mask_strategy='epi', detrend=True, standardize=True) epi_masker = mem.cache(epi_masker.fit)(fmri_raw_img) plot_roi(epi_masker.mask_img_, bg_img=anat_img, title='EPI mask', cut_coords=cut_coords) print("Normalize the (transformed) data") # zscore per pixel, over examples. fmri_masked_vectors = epi_masker.transform(fmri_raw_img) fmri_normed_vectors = mem.cache(stats.mstats.zscore)(fmri_masked_vectors, axis=0) fmri_normed_img = epi_masker.inverse_transform(fmri_normed_vectors) print("Smooth the (spatial) data.") fmri_smooth_img = mem.cache(image.smooth_img)(fmri_normed_img, fwhm=7) print("Mask the MRI data.") masked_fmri_vectors = mem.cache(epi_masker.transform)(fmri_smooth_img) # ## Compute mean values based on condition matrix ########################################## condition_names = list(np.unique(haxby_labels)) n_conditions = len(condition_names) n_good_voxels = masked_fmri_vectors.shape[1] mean_vectors = np.empty((n_conditions, n_good_voxels)) for ci, condition in enumerate(condition_names): condition_vectors = masked_fmri_vectors[haxby_labels == condition, :] mean_vectors[ci, :] = condition_vectors.mean(axis=0) # ## Use similarity across conditions as the 4th dimension ########################################## n_conds = len(condition_names) n_compares = n_conds * (n_conds - 1) / 2 p_vectors = np.zeros((n_compares, masked_fmri_vectors.shape[1])) comparison_text = [] comparison_img = [] idx = 0 for i, cond in enumerate(condition_names): for j, cond2 in enumerate(condition_names[i+1:]): print("Computing ttest for %s vs. %s." % (cond, cond2)) _, p_vector = stats.ttest_ind( masked_fmri_vectors[haxby_labels == cond, :], masked_fmri_vectors[haxby_labels == cond2, :], axis=0) p_vector /= p_vector.max() # normalize p_vector = -np.log10(p_vector) p_vector[np.isnan(p_vector)] = 0. p_vector[p_vector > 10.] = 10. p_img = epi_masker.inverse_transform(p_vector) comparison_img.append(p_img) comparison_text.append('%s vs. %s' % (cond, cond2)) p_vectors[idx, :] = p_vector idx += 1 #n_comparisons = n_conditions * (n_conditions-1) / 2 #similarity_vectors = np.empty((n_good_voxels, n_comparisons)) #for vi in np.arange(n_good_voxels): # similarity_vectors[vi, :] = pdist(mean_vectors[:, vi]) # Compute a connectivity matrix (for constraining the clustering) mask_data = epi_masker.mask_img_.get_data().astype(np.bool) connectivity = sk_image.grid_to_graph(n_x=mask_data.shape[0], n_y=mask_data.shape[1], n_z=mask_data.shape[2], mask=mask_data) # Cluster (#2) start = time.time() ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity, memory=memory) ward.fit(p_vectors) print("Ward agglomeration %d clusters: %.2fs" % ( n_clusters, time.time() - start)) # Compute an image with one ROI per label, and save to disk labels = ward.labels_ + 1 # Avoid 0 label - 0 means mask. labels_img = epi_masker.inverse_transform(labels) labels_img.to_filename('parcellation.nii') # Plot image with len(labels) ROIs, and store # the cut coordinates to reuse for all plots # and the figure for plotting all to a common axis first_plot = plot_roi(labels_img, title="Ward parcellation", bg_img=anat_img) plt.show()
def feature_extractor(imgfile, maskfile, featurefile, maskerfile, wardfile, nclusters=[ 1000, ], selectfile=None, targetfile=None, metafile=None, cachefile=None): resultdict = {"imgfile": imgfile, "maskfile": maskfile} # load data print "--loading data" nifti_masker = input_data.NiftiMasker(mask=maskfile, memory=cachefile, memory_level=1, standardize=False) fmri_masked = nifti_masker.fit_transform(imgfile) print "--getting mask" mask = nifti_masker.mask_img_.get_data().astype(np.bool) # saveit joblib.dump(nifti_masker, maskerfile) resultdict["mask"] = mask resultdict["Xmask"] = fmri_masked resultdict["maskerfile"] = maskerfile # get connectivity print "--getting connectivity" shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # saveit resultdict["connectivity"] = connectivity print "--save main file" np.savez(featurefile + "_main.npz", **resultdict) # run ward y = np.load(targetfile)["ymap"] meta = np.load(metafile) train = meta["train"] test = meta["test"] ncv = meta['ycv'] # for each cv set for cvx in range(ncv): trainidx = train[cvx] testidx = test[cvx] resultdict = {} wardfiles = [] selectfiles = [] print "--Running ward %d" % (cvx, ) for ix, nc in enumerate(nclusters): ward = WardAgglomeration(n_clusters=nc, connectivity=connectivity, memory=cachefile) ward.fit(fmri_masked[trainidx]) fmri_reduced_train = ward.transform(fmri_masked[trainidx]) fmri_reduced_test = ward.transform(fmri_masked[testidx]) # saveit subwardfile = wardfile + "_D%d_cv%d.pkl" % ( nc, cvx, ) joblib.dump(ward, subwardfile) resultdict["Xward_%d_train" % (nc, )] = fmri_reduced_train resultdict["Xward_%d_test" % (nc, )] = fmri_reduced_test wardfiles.append(subwardfile) # additional feature selection selector = SelectPercentile(f_classif, percentile=30) selector.fit(fmri_reduced_train, y[trainidx]) fmri_select_train = selector.transform(fmri_reduced_train) fmri_select_test = selector.transform(fmri_reduced_test) # saveit subselectfile = selectfile + "_D%d_cv%d.pkl" % ( nc, cvx, ) joblib.dump(selector, subselectfile) resultdict["Xselect_%d_train" % (nc, )] = fmri_select_train resultdict["Xselect_%d_test" % (nc, )] = fmri_select_test selectfiles.append(subselectfile) resultdict["wardfiles"] = wardfiles resultdict["selectfiles"] = selectfiles # save results print "--save cv result" np.savez(featurefile + "_cv%d.npz" % (cvx, ), **resultdict)
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10): """ Given the predictors and labels, performs multi-label classification with the given classifier using n-fold c.v. Constructs a OvR classifier for multilabel prediction. Parameters ----------- x : `numpy.ndarray` (n_samples x n_features) array of features y : `numpy.ndarray` (n_samples x n_labels) array of labels classifier : str, optional which classifier model to use. Must be one of 'naive_bayes'| 'decision_tree' | 'logistic_regression'. Defaults to the original naive_bayes. clustering : bool, optional whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different value. Defaults to True. n_folds : int the number of fold of cv Returns ------- score_per_label, score_per_class : tuple The results are stored as a tuple of two dicts, with the keywords specifying the metrics. """ clf = None ward = None lb = preprocessing.LabelBinarizer() y_new = lb.fit_transform(y) #specify connectivity for clustering mask = nb.load('data/MNI152_T1_2mm_brain.nii.gz').get_data().astype('bool') shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity) # choose and assign appropriate classifier classifier_dict = { 'naive_bayes' : OneVsRestClassifier(MultinomialNB()), 'logistic_regression' : OneVsRestClassifier(LogisticRegression(penalty='l2')), 'decision_tree' : tree.DecisionTreeClassifier() } clf = classifier_dict[classifier] kf = cross_validation.KFold(len(y_new), n_folds=n_folds) score_per_class = [] score_per_label = [] for train, test in kf: x_train = np.ascontiguousarray(x[train]) y_train = np.ascontiguousarray(y_new[train]) x_test = np.ascontiguousarray(x[test]) y_test = np.ascontiguousarray(y_new[test]) if clustering: ward.fit(x_train) x_train = ward.transform(x_train) x_test = ward.transform(x_test) model = clf.fit(x_train, y_train) predicted = model.predict(x_test) predict_prob = model.predict_proba(x_test) if isinstance(predict_prob, list): predict_prob = np.array(predict_prob) cls_scores = utils.score_results(y_test, predicted, predict_prob) label_scores = utils.label_scores(y_test, predicted, predict_prob) score_per_class.append(cls_scores) score_per_label.append(label_scores) return (score_per_class,score_per_label)
### Ward ###################################################################### # Compute connectivity matrix: which voxel is connected to which from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... from sklearn.cluster import WardAgglomeration import time start = time.time() ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity, memory='nilearn_cache') ward.fit(fmri_masked) print("Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start)) # Compute the ward with more clusters, should be faster as we are using # the caching mechanism start = time.time() ward = WardAgglomeration(n_clusters=2000, connectivity=connectivity, memory='nilearn_cache') ward.fit(fmri_masked) print("Ward agglomeration 2000 clusters: %.2fs" % (time.time() - start)) ### Show result ###############################################################
fmri_masked = nifti_masker.fit_transform(dataset.func[0]) mask = nifti_masker.mask_img_.get_data().astype(np.bool) ### Ward ###################################################################### # Compute connectivity matrix: which voxel is connected to which from sklearn.feature_extraction import image shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) # Computing the ward for the first time, this is long... from sklearn.cluster import WardAgglomeration import time start = time.time() ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity, memory='nisl_cache') ward.fit(fmri_masked) print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start) # Compute the ward with more clusters, should be faster as we are using # the caching mechanism start = time.time() ward = WardAgglomeration(n_clusters=2000, connectivity=connectivity, memory='nisl_cache') ward.fit(fmri_masked) print "Ward agglomeration 2000 clusters: %.2fs" % (time.time() - start) ### Show result ############################################################### # Unmask data # Avoid 0 label
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10): """ Given the predictors and labels, performs single-class classification with the given classifier using n-fold c.v. Constructs a OvO classifier for every pair of terms. Parameters ----------- x : `numpy.ndarray` (n_samples x n_features) array of features y : `numpy.ndarray` (1 x n_samples) array of labels classifier : str, optional which classifier model to use. Must be one of 'naive_bayes'| 'svm' | 'logistic_regression' | 'ensemble'. Defaults to the original naive_bayes. clustering : bool, optional whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different value. Defaults to True. n_folds : int the number of fold of cv Returns ------- accuracy : `numpy.ndarray` The results are stored as a list of confusion matrices for each fold and saved as a numpy array of arrays, for further analysis. """ clf = None ward = None le = preprocessing.LabelEncoder() le.fit(y) y_new = le.transform(y) # choose and assign appropriate classifier classifier_dict = { 'naive_bayes' : MultinomialNB(), 'logistic_regression' : LogisticRegression(penalty='l2'), 'svm' : GridSearchCV(LinearSVC(), [{'C': [1, 10, 100, 1000]}]) } if classifier == 'ensemble': clf_nb = classifier_dict['naive_bayes'] clf_svm = classifier_dict['svm'] clf_lr = classifier_dict['logistic_regression'] else: clf = classifier_dict[classifier] # perform ward clustering if specified if clustering: mask = np.load('data/2mm_brain_mask.npy') shape = mask.shape connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask) ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity) # actual cross validation kf = cross_validation.KFold(len(y_new), n_folds=n_folds) accuracy = [] for train, test in kf: x_train = x[train] y_train = y_new[train] x_test = x[test] y_test = y_new[test] if clustering: ward.fit(x_train) x_train = ward.transform(x_train) x_test = ward.transform(x_test) if classifier != 'ensemble': predicted = clf.fit(x_train, y_train).predict(x_test) else: predicted_nb = clf_nb.fit(x_train, y_train).predict(x_test) predicted_lr = clf_lr.fit(x_train, y_train).predict(x_test) predicted_svm = clf_svm.fit(x_train, y_train).predict(x_test) predicted = predicted_nb + predicted_lr + predicted_svm predicted = np.array(predicted >= 2, dtype=int) conf_mat = confusion_matrix(y_test, predicted, labels=[0,1]) accuracy.append(conf_mat) return accuracy
# loading the mask, and binarise mask = seed.astype(np.bool) # shape = mask.shape # print 'compute adjacency matrix...' # compute the adjacency matrix over the target mask from sklearn.neighbors import kneighbors_graph connectivity = kneighbors_graph(connect_use2,7) print 'connectivity for ward:' ,connectivity print 'ward clustering...' # perform a hierarchical clustering considering spatial neighborhood ward = WardAgglomeration(n_clusters = nb_cluster, connectivity=connectivity) ward.fit(np.transpose(connect)) labelsf = ward.labels_ # the labels are the final labels of each voxels # OBSOLETE : DON'T USE elif option_cluster == 2: # perform the k-means clustering : the labels for each voxel seed are in table : labelsf print 'kmeans...' # k_means = KMeans(init = 'k-means++', n_clusters = nb_cluster, n_init = 10) # k_means.fit(connect) # labelsf = k_means.labels_ # USE IT INSTEAD
else: tc_group = np.hstack((tc_group, preprocessing.standardize(pca.transform(tc.T)))) print("Concatenating subject" + sub + "'s timecourses") #io.savemat(os.path.join(BASE_DIR, "group/tc_rest_pca_vox.mat"), {"tc_group": tc_group}) # Perform parcellation on PCA-ed timecourses brain_img = as_volume_img("/volatile/bernardng/templates/spm8/rgrey.nii") brain = brain_img.get_data() dim = np.shape(brain) brain = brain > 0.2 # Generate brain mask brain = mask_utils.largest_cc(brain) mem = Memory(cachedir='.', verbose=1) # Define connectivity based on brain mask A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain) # Create ward object ward = WardAgglomeration(n_clusters=500, connectivity=A, memory=mem) tc_group = tc_group.reshape((dim[0], dim[1], dim[2], -1)) n_tpts = tc_group.shape[-1] for t in np.arange(n_tpts): tc_group[:,:,:,t] = gaussian_filter(tc_group[:,:,:,t], sigma=5) tc_group = tc_group.reshape((-1, n_tpts)) tc_group = tc_group[brain.ravel()==1, :] print("Performing Ward Clustering") ward.fit(tc_group.T) template = np.zeros((dim[0], dim[1], dim[2])) template[brain==1] = ward.labels_ + 1 # Previously processed data did not include +1 # Remove parcels with zero timecourses in any of the subjects template = template.ravel() template_refined = template.copy() label = np.unique(template)
y = np.dot(X, coef.ravel()) noise = np.random.randn(y.shape[0]) noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2) y += noise_coef * noise # add noise ############################################################################### # Compute the coefs of a Bayesian Ridge with GridSearch cv = KFold(len(y), 2) # cross-validation generator for model selection ridge = BayesianRidge() cachedir = tempfile.mkdtemp() mem = Memory(cachedir=cachedir, verbose=1) # Ward agglomeration followed by BayesianRidge A = grid_to_graph(n_x=size, n_y=size) ward = WardAgglomeration(n_clusters=10, connectivity=A, memory=mem, n_components=1) clf = Pipeline([('ward', ward), ('ridge', ridge)]) # Select the optimal number of parcels with grid search clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1, cv=cv) clf.fit(X, y) # set the best parameters coef_ = clf.best_estimator_.steps[-1][1].coef_ coef_ = clf.best_estimator_.steps[0][1].inverse_transform(coef_) coef_agglomeration_ = coef_.reshape(size, size) # Anova univariate feature selection followed by BayesianRidge f_regression = mem.cache(feature_selection.f_regression) # caching function anova = feature_selection.SelectPercentile(f_regression) clf = Pipeline([('anova', anova), ('ridge', ridge)]) # Select the optimal percentage of features with grid search clf = GridSearchCV(clf, {'anova__percentile': [5, 10, 20]}, cv=cv) clf.fit(X, y) # set the best parameters
def _ward_fit_transform(all_subjects_data, fit_samples_indices, connectivity, n_parcels, offset_labels): """Ward clustering algorithm on a subsample and apply to the whole dataset. Computes a brain parcellation using Ward's clustering algorithm on some images, then averages the signal within parcels in order to reduce the dimension of the images of the whole dataset. This function is used with Randomized Parcellation Based Inference, so we need to save the labels to further perform the inverse transformation operation. The function therefore needs an offset to be applied on the labels so that they are unique across parcellations. Parameters ---------- all_subjects_data : array_like, shape=(n_samples, n_voxels) Masked subject images as an array. fit_samples_indices : array-like, Indices of the samples used to compute the parcellation. connectivity : scipy.sparse.coo_matrix, Graph representing the spatial structure of the images (i.e. connections between voxels). n_parcels : int, Number of parcels for the parcellations. offset_labels : int, Offset for labels numbering. The purpose is to have different labels in all the parcellations that can be built by multiple calls to the current function. Returns ------- parcelled_data : numpy.ndarray, shape=(n_samples, n_parcels) Average signal within each parcel for each subject. labels : np.ndarray, shape=(n_voxels,) Labels giving the correspondance between voxels and parcels. """ # XXX: Delayed import is a mega hack which is unfortunately # required. In scipy versions < 0.11, this import ends up # importing matplotlib.pyplot. This sets the matplotlib backend # which causes our matplotlib backend setting code in # nilearn/plotting/__init__.py to have no effect. In environment # without X, e.g. travis-ci, that means the tests will fail with # the usual "TclError: no display name and no $DISPLAY environment # variable". Note this is dependent on the order of import, # whichever comes first has the only shot at setting the # matplotlib backend. from sklearn.cluster import WardAgglomeration # fit part data_fit = all_subjects_data[fit_samples_indices] ward = WardAgglomeration(n_clusters=n_parcels, connectivity=connectivity) ward.fit(data_fit) # transform part labels = ward.labels_ + offset_labels # unique labels across parcellations parcelled_data = ward.transform(all_subjects_data) return parcelled_data, labels
rois = ["V1"] masks = [cortex.get_roi_mask("MLfs", "20121210ML_auto1", roi=roi)[roi] > 0 for roi in rois] roimask = reduce(lambda x, y: (x + y), masks) wardmask = cort_mask - roimask # Load training, test fMRI data trndata_roi = np.nan_to_num(data.get_train(masked=roimask)[:numtime]) trndata_ward = np.nan_to_num(data.get_train(masked=wardmask)[:numtime]) connectivity = image.grid_to_graph(n_x=wardmask.shape[0], n_y=wardmask.shape[1], n_z=wardmask.shape[2], mask=wardmask) ward = WardAgglomeration(n_clusters=numclusters, connectivity=connectivity, memory='nilearn_cache') ward.fit(trndata_ward) labels = ward.labels_ trndata_collapsed = np.array([trndata_ward[:, labels == i].mean(1) for i in range(numclusters)]) trndata = np.hstack((trndata_roi, trndata_collapsed.T)) valdata = data.get_val(masked=roimask) from ridge import _RidgeGridCV ridge = _RidgeGridCV(alpha_min=1., alpha_max=1000., n_grid_points=5, n_grid_refinements=2, cv=2) ridge_coefs = ridge.fit(sdeltrnstim, trndata).coef_.T Uridge, sridge, VridgeT = np.linalg.svd(ridge_coefs, full_matrices=False)