return  k/c
       
# import what we need
import numpy as np
import connectivity_utils as utils
import scipy.linalg as la
from scipy.spatial.distance import pdist, squareform
from sklearn.preprocessing import normalize

# set directories
data_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/kernels/'

# read in connectivity data and labels
connectivity_data = utils.load_connectivity_data(data_dir)
labels = np.array([utils.load_labels(data_dir), ])

# set negative connectivities to 0
edge_data = np.apply_along_axis(lambda x: [0 if element < 0 else element for element in x], 1, connectivity_data)
#edge_data = edge_data[:6, :]

# calculate the kernel using pdist
K = squareform(pdist(edge_data, rw_overlap_kernel))
K[np.diag_indices(140)] = 1
K = np.hstack((np.transpose(labels), K))

np.savetxt(kernel_dir + 'K_rw_overlap.csv', K, delimiter=',')

#C1 = edge_data[0, :]
#C2 = edge_data[1, :]
# import dataset 1 sparse inverse covariance files
dataset_1_cov_files = pd.read_csv(dataset_1_dir +
                                  'sparse_inverse_covariance_files.csv').T

# put these in a df
dataset_1_cov = pd.DataFrame(data=dataset_1_cov_data)
dataset_1_cov['file'] = dataset_1_cov_files.index

# convert format of file name so they can be matched
dataset_1_cov['file'] = dataset_1_cov['file'].apply(
    lambda x: x.split('/')[-1].split('_')[-1].zfill(7))

# import and process full dataset 1 files list and metadata to get labels
# import original dataset 1 data, files
dataset_1_data, dataset_1_files = utils.load_connectivity_data(
    '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/')

# import dataset 1 labels
dataset_1_labels = utils.load_labels(
    '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/')

# put labels alongside files in a DF
dataset_1_metadata = pd.DataFrame(columns=['file', 'label'])
dataset_1_metadata['file'] = dataset_1_files
dataset_1_metadata['label'] = dataset_1_labels

# convert format of file name so they can be matched
dataset_1_metadata['file'] = dataset_1_metadata['file'].apply(
    lambda x: x.split('/')[-1].split('_')[-1])

# join the DFs to match labels with spare inverse cov data
Exemplo n.º 3
0
import numpy as np
import connectivity_utils as utils
from scipy.spatial.distance import pdist, squareform
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel

# set directories
data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/'

# number of subjects
n = 333

# read in connectivity data and labels
connectivity_data_1 = utils.load_connectivity_data(data_dir_1)
labels_1 = np.array([
    utils.load_labels(data_dir_1),
])
connectivity_data_2 = utils.load_connectivity_data(data_dir_2)
labels_2 = np.array([
    utils.load_labels(data_dir_2),
])

connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
labels = np.hstack((labels_1, labels_2))

# set negative connectivities to 0
edge_data = np.apply_along_axis(
    lambda x: [0 if element < 0 else element for element in x], 1,
    connectivity_data)
import numpy as np
import connectivity_utils as utils

# set directories
data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/'

# include negatively weighted edges or not
include_negative_weights = True

# standardise data with a z-transform
standardise_data = False

# read in connectivity data and labels
connectivity_data_1 = utils.load_connectivity_data(data_dir_1, standardise_data)
labels_1 = np.array([utils.load_labels(data_dir_1), ])
connectivity_data_2 = utils.load_connectivity_data(data_dir_2, standardise_data)
labels_2 = np.array([utils.load_labels(data_dir_2), ])


connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
labels = np.hstack((labels_1, labels_2))

# save connectivity data
np.savetxt(kernel_dir + 'connectivity_data.csv', connectivity_data, delimiter = ',')

# map lower triangles of connectivities to an array
edge_data = np.apply_along_axis(lambda x: x[np.ravel_multi_index(np.tril_indices(90, k=-1), (90, 90))], 1, connectivity_data)

if not include_negative_weights :
    return k / c


# import what we need
import numpy as np
import connectivity_utils as utils
import scipy.linalg as la
from scipy.spatial.distance import pdist, squareform
from sklearn.preprocessing import normalize

# set directories
data_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/kernels/'

# read in connectivity data and labels
connectivity_data = utils.load_connectivity_data(data_dir)
labels = np.array([
    utils.load_labels(data_dir),
])

# set negative connectivities to 0
edge_data = np.apply_along_axis(
    lambda x: [0 if element < 0 else element for element in x], 1,
    connectivity_data)
#edge_data = edge_data[:6, :]

# calculate the kernel using pdist
K = squareform(pdist(edge_data, rw_overlap_kernel))
K[np.diag_indices(140)] = 1
K = np.hstack((np.transpose(labels), K))
Exemplo n.º 6
0
from sklearn.cross_decomposition import CCA, PLSCanonical
from sklearn import svm, cross_validation, metrics, decomposition

# set directories
data_dir_1 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/'
data_dir_2 = '/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC2/matrix_unsmooth/'
kernel_dir = '/home/jonyoung/IoP_data/Data/connectivity_data/kernels/'

# include negatively weighted edges or not
include_negative_weights = True

# standardise data with a z-transform
standardise_data = False

# read in connectivity data and labels
connectivity_data_1 = utils.load_connectivity_data(data_dir_1,
                                                   standardise_data)
labels_1 = np.array([
    utils.load_labels(data_dir_1),
])
connectivity_data_2 = utils.load_connectivity_data(data_dir_2,
                                                   standardise_data)
labels_2 = np.array([
    utils.load_labels(data_dir_2),
])
labels = np.squeeze(np.hstack((labels_1, labels_2)))

n_subjects = len(labels)
n_folds = 10

connectivity_data = np.vstack((connectivity_data_1, connectivity_data_2))
connectivity_data = np.genfromtxt(timecourse_dir + 'sparse_inverse_covariance_data.csv', delimiter=',')
timecourse_files = pd.read_csv(timecourse_dir + 'sparse_inverse_covariance_files.csv').T.index.values
timecourse_IDs = map(lambda x: int(x.split('/')[-1].split('_')[1][0:-4]), timecourse_files) 
labels = np.array([utils.load_labels(data_dir), ])[0]
connectivity_files = glob.glob(data_dir + '*.txt')
connectivity_IDs = map(lambda x: int(x.split('/')[-1][0:3]), connectivity_files)
connectivity_IDs.sort()
connectivity_in_timecourse = np.array([True if ID in timecourse_IDs else False for ID in connectivity_IDs])
timecourse_in_connectivity = np.array([True if ID in connectivity_IDs else False for ID in timecourse_IDs])
labels = labels[np.array(connectivity_in_timecourse)]
#labels = np.expand_dims(labels, axis=1)
timecourse_connectivity_data = timecourse_connectivity_data[timecourse_in_connectivity, :]
timecourse_connectivity_matrices = np.reshape(timecourse_connectivity_data, (100, 90, 90))

# original connectivity matrices
connectivity_data, connectivity_files = utils.load_connectivity_data('/home/jonyoung/IoP_data/Data/connectivity_data/KCL_SC1/matrix_unsmooth/')
connectivity_data = connectivity_data[connectivity_in_timecourse, :]
connectivity_matrices = np.reshape(connectivity_data, (100, 90, 90))

n_subjects = len(labels)


print connectivity_matrices[0, :5 , :5]
print timecourse_connectivity_matrices[0, :5 , :5]

# create kernel matrices
#sq_IG = np.zeros((n_subjects, n_subjects))
#sq_IG_timecourse = np.zeros((n_subjects, n_subjects))
#for i in range(n_subjects) :
#    print i
#    for j in range(i) :