newgroupdata=pd.DataFrame([[rerun, group_dim, bootstrap, group_label_acc, gsm_repref_corr, mean_rep_ism_gsm_corr, mean_ref_ism_gsm_corr, stdev_rep_ism_gsm_corr, stdev_ref_ism_gsm_corr]], columns=['rerun', 'group_dim','bootstrap', 'group_label_acc', 'gsm_repref_corr', 'mean_rep_ism_gsm_corr', 'mean_ref_ism_gsm_corr', 'stdev_rep_ism_gsm_corr', 'stdev_ref_ism_gsm_corr']) groupframes=[group_level_reproducibility,newgroupdata] group_level_reproducibility=pd.concat(groupframes) for subdir in subdirs: #path=os.path.normpath(ismdir+subdirs[0]) #import pdb;pdb.set_trace() #individual=path.split(os.sep)[11] matrices=subdir.split('_')[3] subject=matrices[8:] ism=np.load(ismdir + subdir + '/individual_stability_matrix.npy') replabels=np.load(ismdir+subdir + '/Y1_labels.npy') refism=np.load(refismdir + subdir + '/individual_stability_matrix.npy') reflabels=np.load(refismdir + subdir + '/Y1_labels.npy') exp_ism=utils.expand_ism(ism,replabels) exp_refism=utils.expand_ism(refism,reflabels) print('calculating ism reproducibility for subject' + subject) ism_repref_corr=np.corrcoef((exp_ism.ravel(),exp_refism.ravel()))[0][1] sub_rep_ism_gsm_corr= rep_ism_gsm_corr[int(subject)] sub_ref_ism_gsm_corr= ref_ism_gsm_corr[int(subject)] newdata=pd.DataFrame([[rerun, group_dim, bootstrap, subject, ism_repref_corr,sub_rep_ism_gsm_corr,sub_ref_ism_gsm_corr]],columns=['rerun', 'group_dim', 'bootstrap', 'subject', 'ism_repref_corr', 'sub_rep_ism_gsm_corr', 'sub_ref_ism_gsm_corr']) frames=[all_individual_reproducibility, newdata] all_individual_reproducibility=pd.concat(frames) all_individual_reproducibility.to_csv('all_individual_reproducibility.csv') group_level_reproducibility.to_csv('group_level_reproducibility.csv') ##%%
def join_group_stability( subject_stability_list, group_stability_list, n_bootstraps, n_clusters, roi_mask_file, group_dim_reduce, compression_labels_list, cluster_method='ward', random_state_tuple=None ): """ Merges the group stability maps for all and compares to all individual stability maps Parameters ---------- subject_stability_list : list of strings A length `N` list of file paths to numpy matrices of shape (`V`, `V`), `N` subjects, `V` voxels group_stability_list : list of strings A length `N` list of file paths to numpy matrices of shape (`V`, `V`), `N` subjects, `V` voxels n_bootstraps : array_like Number of bootstraps to join and average. n_clusters : array_like number of clusters extrated from adjacency matrx roi_mask_file : string Region of interest that is being parcellated. Large volumes should use compression_dim to reduce computational load. group_dim_reduce : boolean Whether or not dimension reduction will be performed at the group level. compression_labels_list : array_like list of the arrays that contain the dimension reduced label files from each individual dim reduce. cluster_method : string, optional What type of clustering will be applied. Returns ------- G_file : numpy array The group stability matrix for a single bootstrap repitition """ import os import numpy as np import nibabel as nb import PyBASC.utils as utils import scipy.sparse random_state = utils.get_random_state(random_state_tuple) group_stability_set = np.asarray([ scipy.sparse.load_npz(G_file).toarray() for G_file in group_stability_list ]) G = group_stability_set.sum(axis=0) G *= 100 G //= n_bootstraps G = G.astype("uint8") if group_dim_reduce: compression_labels = np.asarray([np.load(compression_labels_list[0])]) G = scipy.sparse.csr_matrix(G, dtype=np.int8) G = utils.expand_ism(G, compression_labels.T) G = G.toarray() roi_mask_data = nb.load(roi_mask_file).get_data().astype('bool') clusters_G = utils.cluster_timeseries( G, roi_mask_data, n_clusters, similarity_metric='correlation', affinity_threshold=0.0, cluster_method=cluster_method, random_state=random_state ) clusters_G = clusters_G.astype("uint16") # TODO @AKI APPLY THIS METHOD TO THE INDIVIDUAL LEVEL CLUSTER # TODO @AKI INSERT SECTION HERE TO RETURN ALL OUTPUTS # OF JGSM TO VOXEL RESOLUTION. # Cluster labels normally start from 0, # so start from 1 to provide contrast when viewing between 0 voxels clusters_G += 1 indiv_stability_set = [ scipy.sparse.load_npz(ism_file) for ism_file in subject_stability_list ] if compression_labels_list[0] == None: ism_gsm_corr = np.zeros(len(subject_stability_list)) for i in range(len(subject_stability_list)): ism = indiv_stability_set[i].toarray() ism_gsm_corr[i] = utils.compare_stability_matrices(ism, G) else: compression_labels_set = np.asarray([ np.load(compression_labels_file) for compression_labels_file in compression_labels_list ]) ism_gsm_corr = np.zeros(len(subject_stability_list)) for i in range(len(subject_stability_list)): compression_labels = compression_labels_set[i] ism = utils.expand_ism( indiv_stability_set[i], compression_labels ).toarray() ism_gsm_corr[i] = utils.compare_stability_matrices(ism, G) gsm_file = os.path.join(os.getcwd(), 'group_stability_matrix.npz') G = scipy.sparse.csr_matrix(G, dtype=np.int8) scipy.sparse.save_npz(gsm_file, G) clusters_G_file = os.path.join(os.getcwd(), 'clusters_G.npy') np.save(clusters_G_file, clusters_G) ism_gsm_corr_file = os.path.join(os.getcwd(), 'ism_gsm_corr.npy') np.save(ism_gsm_corr_file, ism_gsm_corr) return ( G, clusters_G, ism_gsm_corr, gsm_file, clusters_G_file, ism_gsm_corr_file )
def individual_group_clustered_maps( subject_stability_list, clusters_G, roi_mask_file, group_dim_reduce, compression_labels_file): # TODO @AKI update doc """ Calculate the individual stability maps of each subject based on the group stability clustering solution. Parameters ---------- subject_stability_list : list of strings A length `N` list of file paths to numpy matrices of shape (`V`, `V`), `N` subjects, `V` voxels clusters_G : array_like Length `V` array of cluster assignments for each voxel roi_mask_file : string Region of interest that is being parcellated. Large volumes should use compression_dim to reduce computational load. group_dim_reduce : boolean Whether or not dimension reduction will be performed at the group level. compression_labels_file : array_like an array that contain the dimension reduced label file from an individual dim reduce. Returns ------- individual_cluster_voxel_scores : list of strings A length `N` list of nifti files of the individual group clustered stability maps for each cluster. Temporal dimension of each file corresponds to each subject. """ import os import numpy as np import PyBASC.utils as utils import PyBASC.basc as basc import scipy.sparse supervox_ism = scipy.sparse.load_npz(subject_stability_list) if compression_labels_file: compression_labels = np.load(compression_labels_file) else: compression_labels = None if group_dim_reduce: indiv_stability_set = utils.expand_ism( supervox_ism, compression_labels ).toarray() else: indiv_stability_set = supervox_ism.toarray() cluster_ids = np.unique(clusters_G) cluster_voxel_scores, k_mask = \ utils.cluster_matrix_average(indiv_stability_set, clusters_G) ind_group_cluster_stability = np.array([ cluster_voxel_scores[(i-1), clusters_G == i].mean() for i in cluster_ids ]) cluster_voxel_scores = cluster_voxel_scores.astype("uint8") k_mask = k_mask.astype(bool) ind_group_cluster_stability_file = os.path.join( os.getcwd(), 'ind_group_cluster_stability.npy' ) np.save(ind_group_cluster_stability_file, ind_group_cluster_stability) individualized_group_cluster_npy = np.argmax( cluster_voxel_scores, axis=0 ) + 1 ind_group_cluster_labels_file = os.path.join( os.getcwd(), 'ind_group_cluster_labels.npy' ) np.save(ind_group_cluster_labels_file, individualized_group_cluster_npy) individualized_group_clusters_file, _ = basc.ndarray_to_vol( individualized_group_cluster_npy, roi_mask_file, roi_mask_file, os.path.join(os.getcwd(), 'individualized_group_cluster.nii.gz') ) np.save(ind_group_cluster_labels_file, individualized_group_cluster_npy) return (ind_group_cluster_stability_file, individualized_group_clusters_file, ind_group_cluster_labels_file)
def nifti_individual_stability( subject_file, roi_mask_file, n_bootstraps, n_clusters, compression_dim, similarity_metric, blocklength=1, cbb_block_size=None, affinity_threshold=0.0, cluster_method='ward', compressor=None, cross_cluster=False, cxc_compressor=None, cxc_roi_mask_file=None, random_state_tuple=None ): # TODO @AKI update docs """ Calculate the individual stability matrix for a single subject by using Circular Block Bootstrapping method for time-series data. Parameters ---------- subject_file : string Nifti file of a subject roi_mask_file : string Region of interest that is being parcellated. Large volumes should use compression_dim to reduce computational load. n_bootstraps : integer Number of bootstraps n_clusters : integer Number of clusters compression_dim : The number of supervoxels to be created after the compression. similarity_metric : string options 'correlation' blocklength : float, optional A scalar value of the window size to be used for the block bootstrap cbb_block_size : integer, optional Size of the time-series block when performing circular block bootstrap affinity_threshold : float, optional Minimum threshold for similarity matrix based on correlation to create an edge cluster_method : string, optional compressor : Compressor object from group dim reduce. cross_cluster : boolean Whether or not the region of interest will be clustered according to the similarity of connectivity within the region, or similarity of connectivity to a secondary region. cxc_compressor: Compressor object from group dim reduce for cxc cxc_roi_mask_file : string The primary region will be clustered based on similarity of voxel-wise connectivity to this region. random_state_tuple : Returns ------- ism : array_like Individual stability matrix of shape (`V`, `V`), `V` voxels """ import os import numpy as np import nibabel as nb import PyBASC.utils as utils from sklearn.preprocessing import normalize import scipy.sparse print('Calculating individual stability matrix of:', subject_file) random_state = utils.get_random_state(random_state_tuple) if type(compression_dim) == list: cxc_compression_dim = compression_dim[1] compression_dim = compression_dim[0] else: cxc_compression_dim = compression_dim subject_data = nb.load(subject_file).get_data().astype('float32') roi_mask_image = nb.load(roi_mask_file) roi_mask_data = roi_mask_image.get_data().astype('bool') subject_rois = subject_data[roi_mask_data] subject_rois = normalize(subject_rois, norm='l2') if compression_dim == 0: # Use uncompressed data compressed = subject_rois.T compression_labels_file = None else: if not compressor: # Perform individual data compression compression = utils.data_compression( subject_rois.T, roi_mask_image, roi_mask_data, compression_dim ) compression_labels = compression['labels'][:, np.newaxis] compressed = compression['compressed'] else: # Use group-based data compression compression_labels = compressor.labels_ compressed = compressor.transform(subject_rois.T) roi_mask_data = None compression_labels_file = os.path.join( os.getcwd(), 'compression_labels.npy' ) np.save(compression_labels_file, compression_labels) if cross_cluster: cxc_roi_mask_img = nb.load(cxc_roi_mask_file) cxc_roi_mask_data = cxc_roi_mask_img.get_data().astype('bool') subject_cxc_rois = subject_data[cxc_roi_mask_data] subject_cxc_rois = normalize(subject_cxc_rois, norm='l2') if cxc_compression_dim == 0: # Use uncompressed data cxc_compressed = subject_cxc_rois.T else: if not cxc_compressor: # Perform individual data compression cxc_compression = utils.data_compression( subject_cxc_rois.T, cxc_roi_mask_img, cxc_roi_mask_data, cxc_compression_dim ) cxc_compressed = cxc_compression['compressed'] else: # Use group-based data compression cxc_compressor.fit(subject_cxc_rois.T) cxc_compressed = cxc_compressor.transform(subject_cxc_rois.T) else: cxc_compressed = None # Compute individual stability matrix # # compressed = # if compression dimensionality == 0 # use original voxel data # else # if group dimensionality reduce # project subject data into group lower dimensions # else # project subject data into individual lower dimensions # ism = utils.individual_stability_matrix( compressed, roi_mask_data, n_bootstraps=n_bootstraps, n_clusters=n_clusters, similarity_metric=similarity_metric, Y2=cxc_compressed, cross_cluster=cross_cluster, cbb_block_size=cbb_block_size, blocklength=blocklength, affinity_threshold=affinity_threshold, cluster_method=cluster_method, random_state=random_state ) ism = scipy.sparse.csr_matrix(ism, dtype=np.int8) ism_file = os.path.join(os.getcwd(), 'individual_stability_matrix.npz') # get back to original dimensionality based on individual or group-based # dimensionality reductionn if not compressor: voxel_ism = utils.expand_ism(ism, compression_labels) voxel_ism = voxel_ism.astype("uint8") scipy.sparse.save_npz(ism_file, voxel_ism) else: scipy.sparse.save_npz(ism_file, ism) return ism_file, compression_labels_file