""" R2 """ """ PCA """ """ R1 """ data_t1, y_t1, subs_t1 = load_cort(measure = 'r1', cort='midgray') dataset = 'stanford_ms_run1' cortical_mat_ms1_t1, age_ms1, subs, _ = load_dataset(dataset, cortical_parc='midgray', measure_type='r1_les') dataset = 'stanford_ms_run2' cortical_mat_ms2_t1, age_ms2, subs, _ = load_dataset(dataset, cortical_parc='midgray', measure_type='r1_les') NUM_REPS = 1000 X = data_t1.T youngsters = np.where(y_t1<=AGE) y_old = np.delete(y_t1, youngsters) data_old = np.delete(X, youngsters, axis=0) data_old = np.delete(data_old, np.where(areas_notdeg2==1), axis=1) #df_pca, X_pca, pca_evr, df_corr, df_n, df_na, df_nabv, df_rank = pca_full_report(X=data_old, features_=np.delete(areas, np.where(areas_notdeg2==1)), save_plot=False, fig_dpi=50) adults = np.where(y_t1>AGE) y_young = np.delete(y_t1, adults)
from src.data.preprocess import run_pca from src.data.make_dataset import load_dataset, load_subjects from sklearn import svm from sklearn.metrics import confusion_matrix, accuracy_score from sklearn.ensemble import RandomForestClassifier import numpy as np import pickle def transform_data(data, channels): """Select a number of channels from each data record and concatenate them into a single vector""" return [np.ravel(d[0][:channels]) for d in data['data']] dataset = load_dataset('data/processed/hcp', 'data/hcp-train.csv') testdataset = load_dataset('data/processed/hcp', 'data/hcp-eval-dist.csv') channels = 10 dd = transform_data(dataset, channels) td = transform_data(testdataset, channels) model = svm.SVC(gamma=0.01, C=10.) model.fit(dd, dataset['age']) with open('models/svm-age.dat', 'wb') as out: pickle.dump(model, out) predicted = model.predict(td) print("id,age")
areas = load_cort_areas() from scipy.stats import pearsonr #""" #CT #""" #dataset = 'huji' #cortical_parc = 'volume' ## 'volume' #data_matrix_imputed, age_list, subs, area_names = load_dataset(dataset, cortical_parc=cortical_parc, measure_type='t1') """ T1 """ dataset = 'huji' cortical_parc = 'midgray' ## 'volume' data_t1, y_t1, subs_t1, areas = load_dataset(dataset, cortical_parc=cortical_parc, measure_type='r1') subjects_to_delete = [1, 3, 4, 5, 17, 24] data_t1 = np.delete(data_t1, subjects_to_delete, axis=1) y_t1 = np.delete(y_t1, subjects_to_delete) #subs_t1 = np.delete(subs_t1, subjects_to_delete) #sex= [0, 1, 1, 1, 1, 1, # 1, 0, 1, 1, 1, 1, # 0, 1, 1, 0, 1, 1, # 1, 1, 1, 1, 0, 0, # 0, 0, 1, 0, 1, 1, # 0, 1, 1, 0] # 1-male. 23 Male. Young:10 Male dataset = 'huji' cortical_parc = 'volume'
file_data = nib.load(file).get_data() except: print('File {} does not exist'.format(file)) x, y, z = file_data.shape file_axial = file_data[:, z // 2, :] return file_axial #dataset = 'stanford_2' #data_stan, age_stan, subs_stan, _ = load_dataset(dataset) #dataset = 'kalanit' #data_kalanit, age_kalanit, subs_kalanit, _ = load_dataset(dataset) dataset = 'gotlib' data_matrix_gotlib, age_gotlib, subs_gotlib, _ = load_dataset(dataset) figures = [] for sub in subs_gotlib: sub_fs = sub + '_' + dataset aparc_file = '/ems/elsc-labs/mezer-a/Mezer-Lab/analysis/freesurfer_subjects/' + sub_fs + '/mri/aparc.a2009s+aseg.mgz' orig_file = '/ems/elsc-labs/mezer-a/Mezer-Lab/analysis/freesurfer_subjects/' + sub_fs + '/mri/orig.mgz' if os.path.exists(aparc_file) and os.path.exists(orig_file): axial_slice = get_axial(aparc_file) axial_slice2 = get_axial(orig_file) # make figure plt.imshow(axial_slice2, cmap='gray') plt.imshow(axial_slice, cmap='gray', alpha=0.2) ax.set_rasterized(True) ax = plt.title('Subject {}'.format(sub))
areas_notdeg2[areas_notdeg2_idx] = 1 """ PCA """ """ R1 """ data_t1, y_t1, subs_t1 = load_cort(measure = 'r1', cort='midgray') dataset = 'reading' data_matrix_reading, age_reading, subs_reading, _ = load_dataset(dataset, cortical_parc='midgray', measure_type = 'r1') no_preterm =['s016', 's088', 's114', 's027', 's041', 's121', 's132', 's143', 's057', 's061', 's079', 's070', 's067', 's080', 's148', 's112', 's117', 's160', 's113', ] # # # ringing # 's007','s034','s037','s045','s046','s064','s078','s082', # 's083','s083_2','s092_dti30','s097_2','s101','s109','s111','s116', # 's120','s123','s126','s126_2','s127','s129','s133','s137', # 's141','s142','s144','s145','s146','s147','s153','s154', # 's157','s168','s169','s171','s175', # 's006','s038','AOK07_run1','s008_2','s040','s055', # 's062','s081','s086','s096','s102_2','s125','s134','s138'] no_preterm_idx = [index for index, elem in enumerate(subs_reading) if elem in no_preterm]
def prepare_experiment_data(download_new=False, test_seed:int=123, valid_seed:int=456) -> ExperimentData: raw_data = load_dataset(download_new) raw_data = get_dummy_values(fill_missing_values(raw_data)) data_min_n_ratings = get_users_with_min_n_ratings(raw_data, 3) experiment_data = train_validation_test_split(data_min_n_ratings, test_seed=test_seed, valid_seed=valid_seed) return experiment_data