from sklearn import svm, cross_validation, metrics
from pyriemann.classification import TSclassifier, MDM

# set directories
data_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/kernels/'
timecourse_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC_Unsmooth_TimeCourse/'

# indices of lower triangle of a 90 x 90 matrix
lotril_ind = np.ravel_multi_index(np.tril_indices(90, k=-1), (90, 90))

# for sparse inverse covariance matrices generated from raw timecourses
timecourse_connectivity_data = np.genfromtxt(timecourse_dir + 'sparse_inverse_covariance_data.csv', delimiter=',')
timecourse_files = pd.read_csv(timecourse_dir + 'sparse_inverse_covariance_files.csv').T.index.values
timecourse_IDs = map(lambda x: int(x.split('/')[-1].split('_')[1][0:-4]), timecourse_files) 
labels = np.array([utils.load_labels(data_dir), ])[0]
connectivity_files = glob.glob(data_dir + '*.txt')
connectivity_IDs = map(lambda x: int(x.split('/')[-1][0:3]), connectivity_files)
connectivity_IDs.sort()
connectivity_in_timecourse = np.array([True if ID in timecourse_IDs else False for ID in connectivity_IDs])
timecourse_in_connectivity = np.array([True if ID in connectivity_IDs else False for ID in timecourse_IDs])
labels = labels[np.array(connectivity_in_timecourse)]
#labels = np.expand_dims(labels, axis=1)
timecourse_connectivity_data = timecourse_connectivity_data[timecourse_in_connectivity, :]

# take matrix logs of timecourse_connectivity_data
timecourse_log_data = np.squeeze(np.array(map(lambda x: np.reshape(la.logm(np.reshape(x, (90, 90))), (1, 8100)), timecourse_connectivity_data)))

# pull out lower triangle
timecourse_log_data_lotril = timecourse_log_data[:, lotril_ind]
timecourse_connectivity_data_lotril = timecourse_connectivity_data[:, lotril_ind]
Exemplo n.º 2
0
# read in connectivity data and labels - for original connectivity matrices
#connectivity_data = utils.load_connectivity_data(data_dir)
#labels = np.array([utils.load_labels(data_dir), ])

# for sparse inverse covariance matrices generated from raw timecourses
connectivity_data = np.genfromtxt(timecourse_dir +
                                  'sparse_inverse_covariance_data.csv',
                                  delimiter=',')

timecourse_files = pd.read_csv(
    timecourse_dir + 'sparse_inverse_covariance_files.csv').T.index.values
timecourse_IDs = map(lambda x: int(x.split('/')[-1].split('_')[1][0:-4]),
                     timecourse_files)
edge_data = connectivity_data
labels = np.array([
    utils.load_labels(data_dir),
])[0]
connectivity_files = glob.glob(data_dir + '*.txt')
connectivity_IDs = map(lambda x: int(x.split('/')[-1][0:3]),
                       connectivity_files)
connectivity_IDs.sort()
connectivity_in_timecourse = np.array(
    [True if ID in timecourse_IDs else False for ID in connectivity_IDs])
timecourse_in_connectivity = np.array(
    [True if ID in connectivity_IDs else False for ID in timecourse_IDs])
labels = labels[np.array(connectivity_in_timecourse)]
labels = np.expand_dims(labels, axis=1)
edge_data = edge_data[timecourse_in_connectivity, :]

# sanity check
# pull out lower triangles
# between the covariance matrices
def Matusita_kernel(cov_1, cov_2):
    
    p = np.shape(cov_1)[0]    
    
    det_1 = la.det(cov_1)
    det_2 = la.det(cov_2)
    det_sum = la.det(cov_1 + cov_2)
    return ((2 ** (p/2.0)) * (det_1 ** 0.25) * (det_2 ** 0.25))/(det_sum ** 0.5)
    
# timecourse data connectivity matrices
timecourse_connectivity_data = np.genfromtxt(timecourse_dir + 'sparse_inverse_covariance_data.csv', delimiter=',')    
connectivity_data = np.genfromtxt(timecourse_dir + 'sparse_inverse_covariance_data.csv', delimiter=',')
timecourse_files = pd.read_csv(timecourse_dir + 'sparse_inverse_covariance_files.csv').T.index.values
timecourse_IDs = map(lambda x: int(x.split('/')[-1].split('_')[1][0:-4]), timecourse_files) 
labels = np.array([utils.load_labels(data_dir), ])[0]
connectivity_files = glob.glob(data_dir + '*.txt')
connectivity_IDs = map(lambda x: int(x.split('/')[-1][0:3]), connectivity_files)
connectivity_IDs.sort()
connectivity_in_timecourse = np.array([True if ID in timecourse_IDs else False for ID in connectivity_IDs])
timecourse_in_connectivity = np.array([True if ID in connectivity_IDs else False for ID in timecourse_IDs])
labels = labels[np.array(connectivity_in_timecourse)]
#labels = np.expand_dims(labels, axis=1)
timecourse_connectivity_data = timecourse_connectivity_data[timecourse_in_connectivity, :]
timecourse_connectivity_matrices = np.reshape(timecourse_connectivity_data, (100, 90, 90))

# original connectivity matrices
#connectivity_data, connectivity_files = utils.load_connectivity_data('/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/')
#connectivity_data = connectivity_data[connectivity_in_timecourse, :]
#connectivity_matrices = np.reshape(connectivity_data, (100, 90, 90))
# put these in a df
dataset_1_cov = pd.DataFrame(data=dataset_1_cov_data)
dataset_1_cov['file'] = dataset_1_cov_files.index

# convert format of file name so they can be matched
dataset_1_cov['file'] = dataset_1_cov['file'].apply(
    lambda x: x.split('/')[-1].split('_')[-1].zfill(7))

# import and process full dataset 1 files list and metadata to get labels
# import original dataset 1 data, files
dataset_1_data, dataset_1_files = utils.load_connectivity_data(
    '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/')

# import dataset 1 labels
dataset_1_labels = utils.load_labels(
    '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/')

# put labels alongside files in a DF
dataset_1_metadata = pd.DataFrame(columns=['file', 'label'])
dataset_1_metadata['file'] = dataset_1_files
dataset_1_metadata['label'] = dataset_1_labels

# convert format of file name so they can be matched
dataset_1_metadata['file'] = dataset_1_metadata['file'].apply(
    lambda x: x.split('/')[-1].split('_')[-1])

# join the DFs to match labels with spare inverse cov data
dataset_1_cov = dataset_1_cov.merge(dataset_1_metadata, how='inner', on='file')

# extract the data and labels
dataset_1_cov_data = dataset_1_cov.iloc[:, 0:8100].as_matrix()
covariance_subject_ids = map(lambda string: int(string.split('_')[-1][0:-4]),
                             covariance_files[0])

# find indices of common elements in BOTH lists
common_correlation_indices = [
    i for i, item in enumerate(correlation_subject_ids)
    if item in set(covariance_subject_ids)
]
common_covariance_indices = [
    i for i, item in enumerate(covariance_subject_ids)
    if item in set(correlation_subject_ids)
]

# read in the labels
labels = np.array([
    utils.load_labels(correlation_dir + 'matrix_unsmooth/'),
])

# take only labels for subjects common between covariance and correlation sets.
# initially labels are for the same subjects as correlation data so use common_correlation_indices
labels = labels[0, common_correlation_indices]
print len(labels)

# read in the sparse inverse covariance data and cut it down to only include common subjects
sparse_inverse_cov_data = np.genfromtxt(
    sparse_inverse_cov_dir + 'OAS_data.csv',
    delimiter=',')[common_covariance_indices, :]

# map lower triangles of connectivities to an array
sparse_cov_edge_data = np.apply_along_axis(
    lambda x: x[np.ravel_multi_index(np.tril_indices(90, k=-1), (90, 90))], 1,
Exemplo n.º 6
0
from scipy.spatial.distance import pdist, squareform
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel

# set directories
data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/'

# number of subjects
n = 333

# read in connectivity data and labels
connectivity_data_1 = utils.load_connectivity_data(data_dir_1)
labels_1 = np.array([
    utils.load_labels(data_dir_1),
])
connectivity_data_2 = utils.load_connectivity_data(data_dir_2)
labels_2 = np.array([
    utils.load_labels(data_dir_2),
])

connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
labels = np.hstack((labels_1, labels_2))

# set negative connectivities to 0
edge_data = np.apply_along_axis(
    lambda x: [0 if element < 0 else element for element in x], 1,
    connectivity_data)

# normalise the edge data
import connectivity_utils as utils

# set directories
data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/'

# include negatively weighted edges or not
include_negative_weights = True

# standardise data with a z-transform
standardise_data = False

# read in connectivity data and labels
connectivity_data_1 = utils.load_connectivity_data(data_dir_1, standardise_data)
labels_1 = np.array([utils.load_labels(data_dir_1), ])
connectivity_data_2 = utils.load_connectivity_data(data_dir_2, standardise_data)
labels_2 = np.array([utils.load_labels(data_dir_2), ])


connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
labels = np.hstack((labels_1, labels_2))

# save connectivity data
np.savetxt(kernel_dir + 'connectivity_data.csv', connectivity_data, delimiter = ',')

# map lower triangles of connectivities to an array
edge_data = np.apply_along_axis(lambda x: x[np.ravel_multi_index(np.tril_indices(90, k=-1), (90, 90))], 1, connectivity_data)

if not include_negative_weights :
correlation_subject_ids = map(lambda string: int(string.split('/')[-1][0:3]), correlation_files)

# read in and process the list of files for which we have sparse covariance data
with open(sparse_inverse_cov_dir + 'sparse_inverse_covariance_files.csv', 'rb') as f:
    reader = csv.reader(f)
    covariance_files = list(reader)
    
# convert to list of subject numbers
covariance_subject_ids = map(lambda string: int(string.split('_')[-1][0:-4]), covariance_files[0])

# find indices of common elements in BOTH lists
common_correlation_indices = [i for i, item in enumerate(correlation_subject_ids) if item in set(covariance_subject_ids)]
common_covariance_indices = [i for i, item in enumerate(covariance_subject_ids) if item in set(correlation_subject_ids)]

# read in the labels 
labels = np.array([utils.load_labels(correlation_dir + 'matrix_unsmooth/' ), ])

# take only labels for subjects common between covariance and correlation sets.
# initially labels are for the same subjects as correlation data so use common_correlation_indices
labels = labels[0, common_correlation_indices]
print len(labels)

# read in the sparse inverse covariance data and cut it down to only include common subjects
sparse_inverse_cov_data = np.genfromtxt(sparse_inverse_cov_dir + 'OAS_data.csv', delimiter=',')[common_covariance_indices,:]

# map lower triangles of connectivities to an array
sparse_cov_edge_data = np.apply_along_axis(lambda x: x[np.ravel_multi_index(np.tril_indices(90, k=-1), (90, 90))], 1, sparse_inverse_cov_data)

# optional - remove negative correlations
#sparse_cov_edge_data = np.apply_along_axis(lambda x: [0 if element < 0 else element for element in x], 1, sparse_cov_edge_data)
Exemplo n.º 9
0
from scipy.linalg import logm

# set directories
data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/'

# include negatively weighted edges or not
include_negative_weights = True

# standardise data with a z-transform
standardise_data = True

# read in connectivity data and labels
connectivity_data_1, connectivity_files = utils.load_connectivity_data(data_dir_1, standardise_data)
labels_1 = np.array([utils.load_labels(data_dir_1), ])
connectivity_data_2 = utils.load_connectivity_data(data_dir_2, standardise_data)
labels_2 = np.array([utils.load_labels(data_dir_2), ])

#connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
connectivity_data = connectivity_data_1
labels = np.hstack((labels_1, labels_2))

edge_data = connectivity_data

if not include_negative_weights :

    # set negative connectivities to 0
    edge_data = np.apply_along_axis(lambda x: [0 if element < 0 else element for element in x], 1, edge_data)