示例#1
0
            newgroupdata=pd.DataFrame([[rerun, group_dim, bootstrap, group_label_acc, gsm_repref_corr, mean_rep_ism_gsm_corr, mean_ref_ism_gsm_corr, stdev_rep_ism_gsm_corr, stdev_ref_ism_gsm_corr]], columns=['rerun', 'group_dim','bootstrap', 'group_label_acc', 'gsm_repref_corr', 'mean_rep_ism_gsm_corr', 'mean_ref_ism_gsm_corr', 'stdev_rep_ism_gsm_corr', 'stdev_ref_ism_gsm_corr'])
            groupframes=[group_level_reproducibility,newgroupdata]
            group_level_reproducibility=pd.concat(groupframes)
            for subdir in subdirs:
                #path=os.path.normpath(ismdir+subdirs[0])
                #import pdb;pdb.set_trace()
                #individual=path.split(os.sep)[11]
                matrices=subdir.split('_')[3]
                subject=matrices[8:]
                ism=np.load(ismdir + subdir + '/individual_stability_matrix.npy')
                replabels=np.load(ismdir+subdir + '/Y1_labels.npy')
                
                refism=np.load(refismdir + subdir + '/individual_stability_matrix.npy')
                reflabels=np.load(refismdir + subdir + '/Y1_labels.npy')
                
                exp_ism=utils.expand_ism(ism,replabels)
                exp_refism=utils.expand_ism(refism,reflabels)
                
                print('calculating ism reproducibility for subject' + subject)
                ism_repref_corr=np.corrcoef((exp_ism.ravel(),exp_refism.ravel()))[0][1]
                
                sub_rep_ism_gsm_corr= rep_ism_gsm_corr[int(subject)]
                sub_ref_ism_gsm_corr= ref_ism_gsm_corr[int(subject)]
                
                newdata=pd.DataFrame([[rerun, group_dim, bootstrap, subject, ism_repref_corr,sub_rep_ism_gsm_corr,sub_ref_ism_gsm_corr]],columns=['rerun', 'group_dim', 'bootstrap', 'subject', 'ism_repref_corr', 'sub_rep_ism_gsm_corr', 'sub_ref_ism_gsm_corr'])
                frames=[all_individual_reproducibility, newdata]
                all_individual_reproducibility=pd.concat(frames) 
    all_individual_reproducibility.to_csv('all_individual_reproducibility.csv')
    group_level_reproducibility.to_csv('group_level_reproducibility.csv')

##%%
示例#2
0
def join_group_stability(
    subject_stability_list, group_stability_list, n_bootstraps, n_clusters,
    roi_mask_file, group_dim_reduce, compression_labels_list,
    cluster_method='ward', random_state_tuple=None
):
    """
    Merges the group stability maps for all and compares to all individual
    stability maps

    Parameters
    ----------
    subject_stability_list : list of strings
        A length `N` list of file paths to numpy matrices of shape (`V`, `V`),
        `N` subjects, `V` voxels
        
    group_stability_list : list of strings
        A length `N` list of file paths to numpy matrices of shape (`V`, `V`),
        `N` subjects, `V` voxels
        
    n_bootstraps : array_like
        Number of bootstraps to join and average.
        
    n_clusters : array_like
        number of clusters extrated from adjacency matrx
        
    roi_mask_file : string
        Region of interest that is being parcellated. Large volumes should use
        compression_dim to reduce computational load.
   
    group_dim_reduce : boolean
        Whether or not dimension reduction will be performed at the group 
        level.
    
    compression_labels_list : array_like
        list of the arrays that contain the dimension reduced label files from
        each individual dim reduce.
    
    cluster_method : string, optional
        What type of clustering will be applied. 

    Returns
    -------
    G_file : numpy array
        The group stability matrix for a single bootstrap repitition

    """

    import os
    import numpy as np
    import nibabel as nb
    import PyBASC.utils as utils
    import scipy.sparse

    random_state = utils.get_random_state(random_state_tuple)

    group_stability_set = np.asarray([
        scipy.sparse.load_npz(G_file).toarray()
        for G_file in group_stability_list
    ])

    
    G = group_stability_set.sum(axis=0)
    G *= 100
    G //= n_bootstraps
    G = G.astype("uint8")

    if group_dim_reduce:
        compression_labels = np.asarray([np.load(compression_labels_list[0])])
        G = scipy.sparse.csr_matrix(G, dtype=np.int8)
        G = utils.expand_ism(G, compression_labels.T)
        G = G.toarray()
    
    
    roi_mask_data = nb.load(roi_mask_file).get_data().astype('bool')

    clusters_G = utils.cluster_timeseries(
        G, roi_mask_data, n_clusters,
        similarity_metric='correlation', affinity_threshold=0.0,
        cluster_method=cluster_method, random_state=random_state
    )
    clusters_G = clusters_G.astype("uint16")

    # TODO @AKI APPLY THIS METHOD TO THE INDIVIDUAL LEVEL CLUSTER
    # TODO @AKI INSERT SECTION HERE TO RETURN ALL OUTPUTS
    #           OF JGSM TO VOXEL RESOLUTION.

    # Cluster labels normally start from 0,
    # so start from 1 to provide contrast when viewing between 0 voxels
    clusters_G += 1

    indiv_stability_set = [
        scipy.sparse.load_npz(ism_file) for ism_file in subject_stability_list
    ]

    if compression_labels_list[0] == None:
        ism_gsm_corr = np.zeros(len(subject_stability_list))

        for i in range(len(subject_stability_list)):
            ism = indiv_stability_set[i].toarray()
            ism_gsm_corr[i] = utils.compare_stability_matrices(ism, G)

    else:

        compression_labels_set = np.asarray([
            np.load(compression_labels_file)
            for compression_labels_file in compression_labels_list
        ])

        ism_gsm_corr = np.zeros(len(subject_stability_list))

        for i in range(len(subject_stability_list)):
            compression_labels = compression_labels_set[i]
            ism = utils.expand_ism(
                indiv_stability_set[i], compression_labels
            ).toarray()
            ism_gsm_corr[i] = utils.compare_stability_matrices(ism, G)

    gsm_file = os.path.join(os.getcwd(), 'group_stability_matrix.npz')
    G = scipy.sparse.csr_matrix(G, dtype=np.int8)

    scipy.sparse.save_npz(gsm_file, G)

    clusters_G_file = os.path.join(os.getcwd(), 'clusters_G.npy')
    np.save(clusters_G_file, clusters_G)

    ism_gsm_corr_file = os.path.join(os.getcwd(), 'ism_gsm_corr.npy')
    np.save(ism_gsm_corr_file, ism_gsm_corr)

    return (
        G,
        clusters_G,
        ism_gsm_corr,
        gsm_file,
        clusters_G_file,
        ism_gsm_corr_file
    )
示例#3
0
def individual_group_clustered_maps(
        subject_stability_list, clusters_G, roi_mask_file,
        group_dim_reduce, compression_labels_file):
    # TODO @AKI update doc
    """
    Calculate the individual stability maps of each subject based on the group
    stability clustering solution.

    Parameters
    ----------
    subject_stability_list : list of strings
        A length `N` list of file paths to numpy matrices of shape (`V`, `V`),
        `N` subjects, `V` voxels
    clusters_G : array_like
        Length `V` array of cluster assignments for each voxel
    roi_mask_file : string
        Region of interest that is being parcellated. Large volumes should use
        compression_dim to reduce computational load. 
    group_dim_reduce : boolean
        Whether or not dimension reduction will be performed at the group 
        level.
    compression_labels_file : array_like
        an array that contain the dimension reduced label file from
        an individual dim reduce. 
    
    

    Returns
    -------
    individual_cluster_voxel_scores : list of strings
        A length `N` list of nifti files of the individual group clustered
        stability maps for each cluster.  Temporal dimension of each file
        corresponds to each subject.

    """

    import os
    import numpy as np
    import PyBASC.utils as utils
    import PyBASC.basc as basc
    import scipy.sparse

    supervox_ism = scipy.sparse.load_npz(subject_stability_list)

    if compression_labels_file:
        compression_labels = np.load(compression_labels_file)
    else:
       compression_labels = None

    if group_dim_reduce:
        indiv_stability_set = utils.expand_ism(
            supervox_ism, compression_labels
        ).toarray()
    else:
        indiv_stability_set = supervox_ism.toarray()

    cluster_ids = np.unique(clusters_G)
    cluster_voxel_scores, k_mask = \
        utils.cluster_matrix_average(indiv_stability_set, clusters_G)

    ind_group_cluster_stability = np.array([
        cluster_voxel_scores[(i-1), clusters_G == i].mean()
        for i in cluster_ids
    ])

    cluster_voxel_scores = cluster_voxel_scores.astype("uint8")

    k_mask = k_mask.astype(bool)

    ind_group_cluster_stability_file = os.path.join(
        os.getcwd(), 'ind_group_cluster_stability.npy'
    )
    np.save(ind_group_cluster_stability_file, ind_group_cluster_stability)

    individualized_group_cluster_npy = np.argmax(
        cluster_voxel_scores, axis=0
    ) + 1

    ind_group_cluster_labels_file = os.path.join(
        os.getcwd(), 'ind_group_cluster_labels.npy'
    )
    np.save(ind_group_cluster_labels_file, individualized_group_cluster_npy)

    individualized_group_clusters_file, _ = basc.ndarray_to_vol(
        individualized_group_cluster_npy,
        roi_mask_file,
        roi_mask_file,
        os.path.join(os.getcwd(), 'individualized_group_cluster.nii.gz')
    )

    np.save(ind_group_cluster_labels_file, individualized_group_cluster_npy)

    return (ind_group_cluster_stability_file,
            individualized_group_clusters_file,
            ind_group_cluster_labels_file)
示例#4
0
def nifti_individual_stability(
    subject_file, roi_mask_file,
    n_bootstraps, n_clusters, compression_dim, similarity_metric,
    blocklength=1, cbb_block_size=None, affinity_threshold=0.0, cluster_method='ward',
    compressor=None, cross_cluster=False, cxc_compressor=None,
    cxc_roi_mask_file=None, random_state_tuple=None
):
    # TODO @AKI update docs
    """
    Calculate the individual stability matrix for a single subject by using Circular Block Bootstrapping method
    for time-series data.

    Parameters
    ----------
    subject_file : string
        Nifti file of a subject
        
    roi_mask_file : string
        Region of interest that is being parcellated. Large volumes should use
        compression_dim to reduce computational load.
        
    n_bootstraps : integer
        Number of bootstraps
        
    n_clusters : integer
        Number of clusters
                
    compression_dim : 
        The number of supervoxels to be created after the compression.

    similarity_metric : string
        options 'correlation'
    
    blocklength : float, optional
        A scalar value of the window size to be used for the block bootstrap
    
    cbb_block_size : integer, optional
        Size of the time-series block when performing circular block bootstrap
        
    affinity_threshold : float, optional
        Minimum threshold for similarity matrix based on correlation to create an edge
       
    cluster_method : string, optional
    
    compressor : 
        Compressor object from group dim reduce.
    
    cross_cluster : boolean
        Whether or not the region of interest will be clustered according to 
        the similarity of connectivity within the region, or similarity of 
        connectivity to a secondary region.
    
    cxc_compressor:
        Compressor object from group dim reduce for cxc
    
    cxc_roi_mask_file : string
         The primary region will be clustered based on similarity of voxel-wise
        connectivity to this region.
    
    random_state_tuple : 

    Returns
    -------
    ism : array_like
        Individual stability matrix of shape (`V`, `V`), `V` voxels
    """
    
    import os
    import numpy as np
    import nibabel as nb
    import PyBASC.utils as utils
    from sklearn.preprocessing import normalize
    import scipy.sparse

    print('Calculating individual stability matrix of:', subject_file)

    random_state = utils.get_random_state(random_state_tuple)


    if type(compression_dim) == list:
        cxc_compression_dim = compression_dim[1]
        compression_dim = compression_dim[0]
    else:
        cxc_compression_dim = compression_dim

    subject_data = nb.load(subject_file).get_data().astype('float32')
    roi_mask_image = nb.load(roi_mask_file)
    roi_mask_data = roi_mask_image.get_data().astype('bool')

    subject_rois = subject_data[roi_mask_data]
    subject_rois = normalize(subject_rois, norm='l2')

    if compression_dim == 0:

        # Use uncompressed data
        compressed = subject_rois.T
        compression_labels_file = None

    else:

        if not compressor:

            # Perform individual data compression
            compression = utils.data_compression(
                subject_rois.T,
                roi_mask_image,
                roi_mask_data,
                compression_dim
            )

            compression_labels = compression['labels'][:, np.newaxis]
            compressed = compression['compressed']

        else:

            # Use group-based data compression
            compression_labels = compressor.labels_
            compressed = compressor.transform(subject_rois.T)
            roi_mask_data = None

        compression_labels_file = os.path.join(
            os.getcwd(), 'compression_labels.npy'
        )
        np.save(compression_labels_file, compression_labels)

    if cross_cluster:

        cxc_roi_mask_img = nb.load(cxc_roi_mask_file)
        cxc_roi_mask_data = cxc_roi_mask_img.get_data().astype('bool')

        subject_cxc_rois = subject_data[cxc_roi_mask_data]
        subject_cxc_rois = normalize(subject_cxc_rois, norm='l2')

        if cxc_compression_dim == 0:

            # Use uncompressed data
            cxc_compressed = subject_cxc_rois.T

        else:

            if not cxc_compressor:

                # Perform individual data compression
                cxc_compression = utils.data_compression(
                    subject_cxc_rois.T,
                    cxc_roi_mask_img,
                    cxc_roi_mask_data,
                    cxc_compression_dim
                )

                cxc_compressed = cxc_compression['compressed']

            else:

                # Use group-based data compression
                cxc_compressor.fit(subject_cxc_rois.T)
                cxc_compressed = cxc_compressor.transform(subject_cxc_rois.T)

    else:
        cxc_compressed = None

    # Compute individual stability matrix
    #
    # compressed =
    #   if compression dimensionality == 0
    #       use original voxel data
    #   else
    #       if group dimensionality reduce
    #           project subject data into group lower dimensions
    #       else
    #           project subject data into individual lower dimensions
    #
    ism = utils.individual_stability_matrix(
        compressed, roi_mask_data,
        n_bootstraps=n_bootstraps,
        n_clusters=n_clusters,
        similarity_metric=similarity_metric,
        Y2=cxc_compressed,
        cross_cluster=cross_cluster,
        cbb_block_size=cbb_block_size,
        blocklength=blocklength,
        affinity_threshold=affinity_threshold,
        cluster_method=cluster_method,
        random_state=random_state
    )

    ism = scipy.sparse.csr_matrix(ism, dtype=np.int8)
    ism_file = os.path.join(os.getcwd(), 'individual_stability_matrix.npz')

    # get back to original dimensionality based on individual or group-based
    # dimensionality reductionn
    if not compressor:
        voxel_ism = utils.expand_ism(ism, compression_labels)
        voxel_ism = voxel_ism.astype("uint8")
        scipy.sparse.save_npz(ism_file, voxel_ism)
    else:
        scipy.sparse.save_npz(ism_file, ism)

    return ism_file, compression_labels_file