def run(args): # load parcellation file parcel, nw_info = _get_parcel(args.roi, args.net) # use glob to get all files with `ext` ext = '*MSMAll_hp2000_clean.dtseries.nii' files = [ y for x in os.walk(args.input_data) for y in glob(os.path.join(x[0], ext)) ] # get list of participants # ID <=> individual participants = set() for file in files: ID = file.split('/MNINonLinear')[0][-6:] participants.add(ID) participants = np.sort(list(participants)) _info('Number of participants = %d' % len(participants)) data = {} for ii, ID in enumerate(participants): ID_files = [file for file in files if ID in file] ID_files = np.sort(ID_files) # if individual has all 4 runs if len(ID_files) == 4: _info('%s: %d/%d' % (ID, (ii + 1), len(participants))) ID_ts, t = [], [] for path in ID_files: roi_ts = _get_roi_ts(path, parcel, nw_info, args) ID_ts.append(roi_ts) t.append(roi_ts.shape[0]) k_time = np.max(t) ''' ID_ts have different temporal length pad zeros (time x roi x number of runs) ''' save_ts = np.zeros((k_time, args.roi, 4)) for run in range(4): run_ts = ID_ts[run] t = run_ts.shape[0] save_ts[:t, :, run] = run_ts data[ID] = save_ts else: _info('%s not processed' % ID) SAVE_DIR = args.output_data if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) save_path = (SAVE_DIR + '/data_MOVIE_runs_roi_%d_net_%d_ts.pkl' % (args.roi, args.net)) with open(save_path, 'wb') as f: pickle.dump(data, f)
def _bhv_class_df(args): ''' data for k_class bhv classification *** used for both classification and regression args.mode: 'class' or bhv' args.roi: number of ROIs args.net: number of subnetworks (7 or 17) args.subnet: subnetwork; 'wb' if all subnetworks args.bhv: behavioral measure args.k_class: number of behavioral groups args.cutoff: percentile for participant cutoff args.invert_flag: all-but-one subnetwork save each timepoint as feature vector append 'c' based on clip append 'y' based on behavioral group ''' # optional arguments d = vars(args) if 'invert_flag' not in d: args.invert_flag = False if 'mode' not in d: args.mode = 'class' load_path = (args.input_data + '/data_MOVIE_runs_' + 'roi_%d_net_%d_ts.pkl' %(args.roi, args.net)) with open(load_path, 'rb') as f: data = pickle.load(f) subject_list = np.sort(list(data.keys())) bhv_df = _group_bhv_df(args, subject_list) cutoff_list = bhv_df['Subject'].values.astype(str) # where are the clips within the run? timing_file = pd.read_csv('data/videoclip_tr_lookup.csv') # pick either all ROIs or subnetworks if args.subnet!='wb': if 'minus' in args.subnet: # remove 'minus_' prefix args.subnet = args.subnet.split('minus_')[1] _, nw_info = _get_parcel(args.roi, args.net) # ***roi ts sorted in preprocessing nw_info = np.sort(nw_info) idx = (nw_info == args.subnet) else: idx = np.ones(args.roi).astype(bool) # all-but-one subnetwork if args.subnet and args.invert_flag: idx = ~idx ''' main ''' clip_y = _get_clip_labels() table = [] for run in range(K_RUNS): print('loading run %d/%d' %(run+1, K_RUNS)) run_name = 'MOVIE%d' %(run+1) #MOVIEx_7T_yz # timing file for run timing_df = timing_file[ timing_file['run'].str.contains(run_name)] timing_df = timing_df.reset_index(drop=True) for subject in data: if subject in cutoff_list: # get subject data (time x roi x run) roi_ts = data[subject][:, idx, run] for jj, clip in timing_df.iterrows(): start = int(np.floor(clip['start_tr'])) stop = int(np.ceil(clip['stop_tr'])) clip_length = stop - start # assign label to clip c = clip_y[clip['clip_name']] for t in range(clip_length): act = roi_ts[t + start, :] t_data = {} t_data['Subject'] = subject t_data['timepoint'] = t for feat in range(roi_ts.shape[1]): t_data['feat_%d' %(feat)] = act[feat] t_data['c'] = c table.append(t_data) df = pd.DataFrame(table) df['Subject'] = df['Subject'].astype(int) # merges on all subject rows! df = df.merge(bhv_df, on='Subject', how='inner') return df, bhv_df
def _clip_class_rest_df(args, run): ''' data for 15 clip + rest visualization each run is saved individually run: 0, 1, 2, 3 (one of the 4 runs) args.roi: number of ROIs args.net: number of subnetworks (7 or 17) args.subnet: subnetwork; 'wb' if all subnetworks args.invert_flag: all-but-one subnetwork args.r_roi: number of random ROIs to pick args.r_seed: random seed for picking ROIs save each timepoint as feature vector append class label based on clip return: pandas df ''' # optional arguments d = vars(args) if 'invert_flag' not in d: args.invert_flag = False if 'r_roi' not in d: args.r_roi = 0 args.r_seed = 0 load_path = (args.input_data + '/data_MOVIE_runs_' + 'roi_%d_net_%d_ts.pkl' %(args.roi, args.net)) with open(load_path, 'rb') as f: data = pickle.load(f) # where are the clips within the run? timing_file = pd.read_csv('data/videoclip_tr_lookup.csv') # pick either all ROIs or subnetworks if args.subnet!='wb': if 'minus' in args.subnet: # remove 'minus_' prefix args.subnet = args.subnet.split('minus_')[1] _, nw_info = _get_parcel(args.roi, args.net) # ***roi ts sorted in preprocessing nw_info = np.sort(nw_info) idx = (nw_info == args.subnet) else: idx = np.ones(args.roi).astype(bool) # all-but-one subnetwork if args.subnet and args.invert_flag: idx = ~idx # if random selection, # overwrite everything above if args.r_roi > 0: random.seed(args.r_seed) idx = np.zeros(args.roi).astype(bool) # random sample without replacement samp = random.sample(range(args.roi), k=args.r_roi) idx[samp] = True ''' main ''' print('loading run %d' %(run+1)) run_name = 'MOVIE%d' %(run+1) #MOVIEx_7T_yz timing_df = timing_file[timing_file['run'].str.contains(run_name)] timing_df = timing_df.reset_index(drop=True) # get unique id for each segment including rest segments length = data[list(data.keys())[0]][:, :, run].shape[0] k_class = len(timing_df) y_vec = np.ones(length)*len(timing_df) for jj, clip in timing_df.iterrows(): start = int(np.floor(clip['start_tr'])) if jj==0: tag = k_class y_vec[:start] = tag tag += 1 else: y_vec[stop:start] = tag tag += 1 stop = int(np.ceil(clip['stop_tr'])) clip_length = stop - start y_vec[start:stop] = jj y_vec[stop:] = tag table = [] for subject in data: roi_ts = data[subject][:, idx, run] for t in range(roi_ts.shape[0]): act = roi_ts[t, :] t_data = {} t_data['Subject'] = subject t_data['timepoint'] = t t_data['y'] = y_vec[t] for feat in range(roi_ts.shape[1]): t_data['feat_%d' %(feat)] = act[feat] table.append(t_data) df = pd.DataFrame(table) df['Subject'] = df['Subject'].astype(int) return df
def _clip_class_df(args): ''' data for 15-way clip classification args.roi: number of ROIs args.net: number of subnetworks (7 or 17) args.subnet: subnetwork; 'wb' if all subnetworks args.invert_flag: all-but-one subnetwork args.r_roi: number of random ROIs to pick args.r_seed: random seed for picking ROIs save each timepoint as feature vector append class label based on clip return: pandas df ''' # optional arguments d = vars(args) if 'invert_flag' not in d: args.invert_flag = False if 'r_roi' not in d: args.r_roi = 0 args.r_seed = 0 load_path = (args.input_data + '/data_MOVIE_runs_' + 'roi_%d_net_%d_ts.pkl' % (args.roi, args.net)) with open(load_path, 'rb') as f: data = pickle.load(f) # where are the clips within the run? timing_file = pd.read_csv('data/videoclip_tr_lookup.csv') # pick either all ROIs or subnetworks if args.subnet != 'wb': if 'minus' in args.subnet: # remove 'minus_' prefix args.subnet = args.subnet.split('minus_')[1] _, nw_info = _get_parcel(args.roi, args.net) # ***roi ts sorted in preprocessing nw_info = np.sort(nw_info) idx = (nw_info == args.subnet) else: idx = np.ones(args.roi).astype(bool) # all-but-one subnetwork if args.subnet and args.invert_flag: idx = ~idx # if random selection, # overwrite everything above if args.r_roi > 0: random.seed(args.r_seed) idx = np.zeros(args.roi).astype(bool) # random sample without replacement samp = random.sample(range(args.roi), k=args.r_roi) idx[samp] = True ''' main ''' clip_y = _get_clip_labels() table = [] for run in range(K_RUNS): print('loading run %d/%d' % (run + 1, K_RUNS)) run_name = 'MOVIE%d' % (run + 1) #MOVIEx_7T_yz # timing file for run timing_df = timing_file[timing_file['run'].str.contains(run_name)] timing_df = timing_df.reset_index(drop=True) for subject in data: # get subject data (time x roi x run) roi_ts = data[subject][:, idx, run] for jj, clip in timing_df.iterrows(): start = int(np.floor(clip['start_tr'])) stop = int(np.ceil(clip['stop_tr'])) clip_length = stop - start # assign label to clip y = clip_y[clip['clip_name']] for t in range(clip_length): act = roi_ts[t + start, :] t_data = {} t_data['Subject'] = subject t_data['timepoint'] = t for feat in range(roi_ts.shape[1]): t_data['feat_%d' % (feat)] = act[feat] t_data['y'] = y table.append(t_data) df = pd.DataFrame(table) df['Subject'] = df['Subject'].astype(int) return df