def norm_and_mean(norm, bilateral, classifier, sensitivities): """This function normalizes a list of sensitivities to their L2 norm if norm = True, else just stacks them according to the classifier they were build with. Resulting stack of sensitivities is averaged with the mean_group_sample() function.""" if norm: from sklearn.preprocessing import normalize import copy # default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize(sensitivities_to_normalize[i].samples, axis=1) * np.sqrt(sensitivities[i].shape[1]) print(sensitivities[i].shape) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) print('I normalized the data.') else: sensitivities_stacked = mv.vstack(sensitivities) sgds = ['sgd', 'l-sgd'] if bilateral: if classifier in sgds: # Note: All SGD based classifier wanted an explicit # 'target' sample attribute, therefore, this is still present # in the sensitivities. # note to self: we were wondering whether we assign correct estimates to label # I double checked now (May 19) that estimates here are assigned the correct estimate. # references: ulabels are assigned with the help of np.unique, which returns a sorted # array. Given https://github.com/PyMVPA/PyMVPA/pull/607/files#diff-bbf744fd29d7f3e4abdf7a1586a5aa95, # the sensitivity calculation uses this order further lexicographically. sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.targets) else: # ...whereas in GNB, the results are in 'bilat_ROIs' sample attribute sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: if classifier in sgds: # Note: All SGD based classifier wanted an explicit # 'target' sample attribute, therefore, this is still present # in the sensitivities. sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.targets) else: # ...whereas in GNB, the results are in 'all_ROIs' sample attribute sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) # return the averaged sensitivities return mean_sens
def preprocess_datasets(dataset_list, ref_space, warp_files, mask, **kwargs): detrending = kwargs.get('detrending', True) use_zscore = kwargs.get('use_zscore', True) use_events = kwargs.get('use_events', False) anno_dir = kwargs.get('anno_dir', None) use_glm_estimates = kwargs.get('use_glm_estimates', False) targets = kwargs.get('targets', None) event_offset = kwargs.get('event_offset', None) event_dur = kwargs.get('event_dur', None) save_disc_space = kwargs.get('save_disc_space', True) rois = kwargs.get('rois', None) if isinstance(dataset_list, list): datasets = [preprocessing(ds_p, ref_space, warp_files, mask, detrending=detrending, use_zscore=use_zscore, use_events=use_events, anno_dir=anno_dir, use_glm_estimates=use_glm_estimates, targets=targets, event_offset=event_offset, event_dur=event_dur, rois=rois, save_disc_space=save_disc_space) for ds_p in dataset_list] if use_glm_estimates: for ds in datasets: del ds.sa["regressors"] ds = mvpa.vstack(datasets, a='drop_nonunique', fa='drop_nonunique') else: ds = preprocessing(dataset_list, ref_space, warp_files, mask, detrending=detrending, use_zscore=use_zscore, use_events=use_events, anno_dir=anno_dir, use_glm_estimates=use_glm_estimates, targets=targets, event_offset=event_offset, event_dur=event_dur, rois=rois, save_disc_space=save_disc_space) return ds
def create_dataset(sub_name, main_dir, task_list, hemi): data_set = [] for task_name in task_list: for run_num in range(1, 6): ds = [] gifti_fname = os.path.join( main_dir, 'analysis', sub_name, 'func', sub_name + '_task-' + task_name + '_run-' + str(run_num) + '_rw-glm.' + hemi + '.coefs.gii') gifti_niml = os.path.join( main_dir, 'analysis', sub_name, 'func', sub_name + '_task-' + task_name + '_run-' + str(run_num) + '_rw-glm.' + hemi + '.coefs.niml.dset') #ds = mv.gifti_dataset(gifti_fname) ds = mv.niml.read(gifti_niml) # order in sub-rid000001_task-beh_run-5_rw-glm.lh.X.xmat.1D ds.sa['beh_tax'] = [ "bird_eating", "bird_fighting", "bird_running", "bird_swimming", "insect_eating", "insect_fighting", "insect_running", "insect_swimming", "primate_eating", "primate_fighting", "primate_running", "primate_swimming", "reptile_eating", "reptile_fighting", "reptile_running", "reptile_swimming", "ungulate_eating", "ungulate_fighting", "ungulate_running", "ungulate_swimming" ] ds.sa['beh'] = np.tile( ['eating', 'fighting', 'running', 'swimming'], 5) ds.sa['tax'] = np.repeat( ['bird', 'insect', 'primate', 'reptile', 'ungulate'], 4) ds.fa['node_indices'] = range(0, ds.shape[1]) # 0 ~ 400000 data_set.append(ds) # http://www.pymvpa.org/tutorial_mappers.html within_ds = mv.vstack(data_set) return within_ds
def movie_dataset(subj, task, label, **kwargs): # ds = movie_dataset( # 2, # 'avmovie', # 'bold3Tp2', # mask='src/tnt/sub-02/bold3Tp2/brain_mask.nii.gz') cur_max_time = 0 segments = [] if not 'add_sa' in kwargs: add_sa = {} for seg in range(1, 9): print 'Seg', seg mc = np.recfromtxt( 'sub-%.2i/in_%s/sub-%.2i_task-%s_run-%i_bold_mcparams.txt' % (subj, label, subj, task, seg), names=('mc_xtrans', 'mc_ytrans', 'mc_ztrans', 'mc_xrot', 'mc_yrot', 'mc_zrot')) for i in mc.dtype.fields: add_sa[i] = mc[i] ds = preprocessed_fmri_dataset( 'sub-%.2i/in_%s/sub-%.2i_task-%s_run-%i_bold.nii.gz' % (subj, label, subj, task, seg), add_sa=add_sa, **kwargs) ds.sa['movie_segment'] = [seg] * len(ds) TR = np.diff(ds.sa.time_coords).mean() ## truncate segment time series to remove overlap if seg > 1: ds = ds[4:] if seg < 8: ds = ds[:-4] ds.sa['movie_time'] = np.arange(len(ds)) * TR + cur_max_time cur_max_time = ds.sa.movie_time[-1] + TR segments.append(ds) return vstack(segments)
def sources_merged_ds(path_list, subjects_list, conf_list, task, **kwargs): ds_list = [] for path, subjects, conf in zip(path_list, subjects_list, conf_list): ds, _, conf_n = load_subjectwise_ds(path, subjects, conf, task, **kwargs) ds_list.append(ds) ds_new = vstack(ds_list) ds_new.a.update(ds_list[0].a) print 'Merging from different sources ended... ' print 'The number of subjects merged are '+str(len(np.unique(ds_new.sa.name))) return ds_new, ['group'], conf_n
def sources_merged_ds(path_list, subjects_list, conf_list, task, **kwargs): ds_list = [] for path, subjects, conf in zip(path_list, subjects_list, conf_list): ds, _, conf_n = load_subjectwise_ds(path, subjects, conf, task, **kwargs) ds_list.append(ds) ds_new = vstack(ds_list) ds_new.a.update(ds_list[0].a) print 'Merging from different sources ended... ' print 'The number of subjects merged are ' + str( len(np.unique(ds_new.sa.name))) return ds_new, ['group'], conf_n
def dotheglm(sensitivities, eventdir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them in, average the durations because there are tiny differences between subjects, and then it will put all of that into a glm. """ sensitivities_stacked = mv.vstack(sensitivities) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # average onsets into one event file events = get_group_events(eventdir) # save the event_file fmt = "%10.3f\t%10.3f\t%16s\t%60s" np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='', header='onset\tduration\ttrial_type\tstim_file', fmt=fmt) # get events into dictionary events_dicts = [] for i in range(0, len(events)): dic = { 'onset': events[i][0], 'duration': events[i][1], 'condition': events[i][2] } events_dicts.append(dic) hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates) print('calculated glm, saving results.') return hrf_estimates
def subjects_merged_ds(path, subjects, conf_file, task, extra_sa=None, **kwargs): """ extra_sa: dict or None, sample attributes added to the final dataset, they should be the same length as the subjects. """ conf = read_configuration(path, conf_file, task) for arg in kwargs: conf[arg] = kwargs[arg] data_path = conf['data_path'] i = 0 print 'Merging subjects from '+data_path for subj in subjects: ds = load_dataset(data_path, subj, task, **conf) ds = preprocess_dataset(ds, task, **conf) # add extra samples for k, v in extra_sa.iteritems(): if len(v) == len(subjects): ds.sa[k] = [v[i] for _ in range(ds.samples.shape[0])] if i == 0: ds_merged = ds.copy() else: ds_merged = vstack((ds_merged, ds)) ds_merged.a.update(ds.a) i = i + 1 del ds return ds_merged, ['group'], conf
for hemi in ['rh']: # Load surface and create searchlight query engine surf = mv.surf.read(join(suma_dir, '{0}.pial.gii'.format(hemi))) qe = mv.SurfaceQueryEngine(surf, 20.0, distance_metric='dijkstra') print("Finished creating surface-based searchlight query engine") # Load in surface data sets dss = [] for participant in participants: print(participant) print(tr[included[0]]) ra = load_data(join(data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr[included[0]], included[0], hemi))) rb = load_data(join(data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr[included[1]], included[1], hemi))) rc = load_data(join(data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr[included[2]], included[2], hemi))) ds = mv.vstack((ra, rb, rc)) print(ds.samples.shape) dss.append(ds) n_samples = ds.samples.shape[0] # Exclude medial wall print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples)) medial_wall = np.where(np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist() print(len(medial_wall)) cortical_vertices = np.where(np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist() assert len(medial_wall) == n_medial[hemi] assert len(medial_wall) + len(cortical_vertices) == n_vertices # Estimate searchlight hyperalignment transformation on movie data sl_hyper = mv.SearchlightHyperalignment(queryengine=qe, nproc=n_proc, nblocks=n_proc*8, featsel=1.0,
def get_merged_ds(path, subjects, conf_file, source='task', dim=3, **kwargs): #Mettere source e target nel conf! if source == 'task': target = 'rest' else: if source == 'rest': target = 'task' if source == 'saccade': target = 'face' else: if source == 'face': target = 'saccade' ds_merged_list = [] conf_src = read_configuration(path, conf_file, source) conf_tar = read_configuration(path, conf_file, target) ############################################## ############################################## ## conf_src['label_included'] = 'all' ## ## conf_src['label_dropped'] = 'none' ## ## conf_src['mean_samples'] = 'False' ## ############################################## ############################################## for arg in kwargs: conf_src[arg] = kwargs[arg] conf_tar[arg] = kwargs[arg] data_path = conf_src['data_path'] for subj in subjects: print '--------' try: ds_src = load_dataset(data_path, subj, source, **conf_src) ds_tar = load_dataset(data_path, subj, target, **conf_tar) except Exception, err: print err continue ds_src = detrend_dataset(ds_src, source, **conf_src) ds_tar = detrend_dataset(ds_tar, target, **conf_tar) if dim == 4: duration = np.min([e['duration'] for e in ds_src.a.events]) ds_tar = build_events_ds(ds_tar, duration, overlap=duration-1) ds_src = load_spatiotemporal_dataset(ds_src, duration=duration) print ds_src.samples.shape print ds_tar.samples.shape ds_src.sa['task'] = [source for s in range(ds_src.samples.shape[0])] ds_tar.sa['task'] = [target for s in range(ds_tar.samples.shape[0])] ds_merged = vstack((ds_src, ds_tar)) ds_merged.a.update(ds_src.a) print ds_merged.sa.task ds_merged_list.append(ds_merged) '''
def makeaplot(events, sensitivities, hrf_estimates, roi_pair, fn=True): """ This produces a time series plot for the roi class comparison specified in roi_pair such as roi_pair = ['left FFA', 'left PPA'] """ import matplotlib.pyplot as plt # take the mean and transpose the sensitivities sensitivities_stacked = mv.vstack(sensitivities) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # some parameters # get the conditions block_design = sorted(np.unique(events['trial_type'])) reorder = [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11] block_design = [block_design[i] for i in reorder] # end indices to chunk timeseries into runs run_startidx = np.array([0, 157, 313, 469]) run_endidx = np.array([156, 312, 468, 624]) runs = np.unique(mean_sens_transposed.sa.chunks) for j in range(len(hrf_estimates.fa.bilat_ROIs_str)): comparison = hrf_estimates.fa.bilat_ROIs[j][0] if (roi_pair[0] in comparison) and (roi_pair[1] in comparison): roi_pair_idx = j roi_betas_ds = hrf_estimates[:, roi_pair_idx] roi_sens_ds = mean_sens_transposed[:, roi_pair_idx] for run in runs: fig, ax = plt.subplots(1, 1, figsize=[18, 10]) colors = ['#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60', '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1'] plt.suptitle('Timecourse of sensitivities, {} versus {}, run {}'.format(roi_pair[0], roi_pair[1], run + 1), fontsize='large') plt.xlim([0, max(mean_sens_transposed.sa.time_coords)]) plt.ylim([-5, 7]) plt.xlabel('Time in sec') plt.legend(loc=1) plt.grid(True) # for each stimulus, plot a color band on top of the plot for stimulus in block_design: onsets = events[events['trial_type'] == stimulus]['onset'].values durations = events[events['trial_type'] == stimulus]['duration'].values stimulation_end = np.sum([onsets, durations], axis=0) r_height = 1 color = colors[0] y = 6 # get the beta corresponding to the stimulus to later use in label beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus.replace(" ", ""), 0] for i in range(len(onsets)): r_width = durations[i] x = stimulation_end[i] rectangle = plt.Rectangle((x, y), r_width, r_height, fc=color, alpha=0.5, label='_'*i + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')') plt.gca().add_patch(rectangle) plt.legend(loc=1) del colors[0] times = roi_sens_ds.sa.time_coords[run_startidx[run]:run_endidx[run]] ax.plot(times, roi_sens_ds.samples[run_startidx[run]:run_endidx[run]], '-', color='black', lw=1.0) glm_model = hrf_estimates.a.model.results_[0.0].predicted[run_startidx[run]:run_endidx[run], roi_pair_idx] ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0) model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx] plt.title('R squared: %.2f' % model_fit) if fn: plt.savefig(results_dir + 'timecourse_localizer_glm_sens_{}_vs_{}_run-{}.svg'.format(roi_pair[0], roi_pair[1], run + 1))
behavior = np.tile(['eating', 'fighting', 'running', 'swimming'], 5) conditions = [' '.join((beh, tax)) for beh, tax in zip(behavior, taxonomy)] # load in all of the data into the dataframe targets = range(1, 21) ds = None for x in range(len(files)): chunks = [x + 1] * 20 d = mv.gifti_dataset(files[x], chunks=chunks, targets=targets) d.sa['conditions'] = conditions d.sa['taxonomy'] = taxonomy d.sa['behavior'] = behavior if ds is None: ds = d else: ds = mv.vstack((ds, d)) ds.fa['node_indices'] = range(ds.shape[1]) # zscore all of our samples mv.zscore(ds, chunks_attr='chunks', dtype='float32') # load in surgace and get searchlight query radius = 10 surface = mv.surf.read(join(data_path, '{0}.pial.gii'.format(hemi))) # this is an arbitrary radius and distance metric! query = mv.SurfaceQueryEngine(surface, radius, distance_metric='dijkstra') # based off PyMVPA tutorial clf = mv.LinearNuSVMC(space=predict) cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr=train_on), errorfx=lambda p, t: np.mean(p == t), enable_ca=['stats'])
def get_merged_ds(path, subjects, conf_file, source='task', dim=3, **kwargs): #Mettere source e target nel conf! if source == 'task': target = 'rest' else: if source == 'rest': target = 'task' if source == 'saccade': target = 'face' else: if source == 'face': target = 'saccade' ds_merged_list = [] conf_src = read_configuration(path, conf_file, source) conf_tar = read_configuration(path, conf_file, target) ############################################## ############################################## ## conf_src['label_included'] = 'all' ## ## conf_src['label_dropped'] = 'none' ## ## conf_src['mean_samples'] = 'False' ## ############################################## ############################################## for arg in kwargs: conf_src[arg] = kwargs[arg] conf_tar[arg] = kwargs[arg] data_path = conf_src['data_path'] for subj in subjects: print '--------' try: ds_src = load_dataset(data_path, subj, source, **conf_src) ds_tar = load_dataset(data_path, subj, target, **conf_tar) except Exception, err: print err continue ds_src = detrend_dataset(ds_src, source, **conf_src) ds_tar = detrend_dataset(ds_tar, target, **conf_tar) if dim == 4: duration = np.min([e['duration'] for e in ds_src.a.events]) ds_tar = build_events_ds(ds_tar, duration, overlap=duration - 1) ds_src = load_spatiotemporal_dataset(ds_src, duration=duration) print ds_src.samples.shape print ds_tar.samples.shape ds_src.sa['task'] = [source for s in range(ds_src.samples.shape[0])] ds_tar.sa['task'] = [target for s in range(ds_tar.samples.shape[0])] ds_merged = vstack((ds_src, ds_tar)) ds_merged.a.update(ds_src.a) print ds_merged.sa.task ds_merged_list.append(ds_merged) '''
def buildadataset(zscore, rois, event_path=None): """buildataset() will build and save participant-specific hdf5 datasets with all rois from preprocessed objectcategories data, stack them for a group dataset and save them, and transpose the group dataset and save it. The parameter 'zscore' determines whether and what kind of z-scoring should be performed.""" print('I am building a dataset with the following option: {}.'.format( zscore)) # get the participants and rois participants = sorted( [path.split('/')[-1] for path in glob(base_dir + 'sub-*')]) localizer_dss = [] for participant in participants: localizer_fns = sorted(glob(base_dir + participant + locdir + \ '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format( participant))) mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz' assert len(localizer_fns) == 4 localizer_ds = mv.vstack([ mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run) for run, localizer_fn in enumerate(localizer_fns) ]) localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1] print('loaded localizer data for participant {}.'.format(participant)) # zscore the data with means and standard deviations from no-stimulation # periods if zscore == 'custom': events = get_group_events(event_path) means, stds = extract_baseline(events, localizer_ds) # zscore stuff mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks') print('finished custom zscoring for participant {}.'.format( participant)) elif zscore == 'z-score': mv.zscore(localizer_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') all_rois_mask = np.array([['brain'] * localizer_ds.shape[1] ]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(base_dir + participant + anat_dir + \ '{0}_*_mask.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'l{0}_*_mask.nii.gz'.format(roi))) right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \ 'r{0}_*_mask.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, localizer_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches localizer data assert roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, localizer_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, localizer_ds.shape[1])) # Ensure that number of voxels in ROI mask matches localizer data assert lat_roi_mask.shape[1] == localizer_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(base_dir + participant + anat_dir + '/{0}_*_mask.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign ROI mask to localizer data feature attributes localizer_ds.fa['all_ROIs'] = all_rois_flat if save_per_subject: mv.h5save(base_dir + participant + locdir + \ '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format( participant), localizer_ds) print('Saved dataset for {}.'.format(participant)) # join all datasets localizer_dss.append(localizer_ds) # save full dataset mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5', localizer_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(localizer_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save( results_dir + 'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5', ds) print('Transposed the group-dataset and saved it.') return ds
def makeaplot(events, sensitivities, hrf_estimates, roi_pair, fn=None, include_all_regressors=False): """ This produces a time series plot for the roi class comparison specified in roi_pair such as roi_pair = ['left FFA', 'left PPA']. If include_all_regressors = True, the function will create a potentially overloaded legend with all of the regressors, regardless of they occurred in the run. (Plotting then takes longer, but is a useful option if all regressors are of relevance and can be twitched in inkscape). If the figure should be saved, spcify an existing path in the parameter fn. # TODO's for the future: runs=None, overlap=False, grouping (should be a way to not rely # on hardcoded stimuli and colors within function anymore, with Ordered Dicts): """ import matplotlib.pyplot as plt # normalize the sensitivities from sklearn.preprocessing import normalize import copy #default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize( sensitivities_to_normalize[i].samples, axis=1) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) # get the mean, because we don't want to have 15 folds of sensitivities, but their average if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['bilat_ROIs_str' ])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['all_ROIs_str' ])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) chunks = mean_sens_transposed.sa.chunks assert np.all(chunks[1:] >= chunks[:-1]) # TR was not preserved/carried through in .a # so we will guestimate it based on the values of time_coords runs = np.unique(mean_sens_transposed.sa.chunks) tc = mean_sens_transposed.sa.time_coords TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1] assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001 mean_sens_transposed.sa.time_coords = np.arange( len(mean_sens_transposed)) * TRdirty # those runlengths = [ np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty for run in runs ] runonsets = [sum(runlengths[:run]) for run in runs] # just append any large number to accomodate the fact that the last run also needs an # at some point. runonsets.append(99999) for j in range(len(hrf_estimates.fa.bilat_ROIs_str)): comparison = hrf_estimates.fa.targets[j][0] if (roi_pair[0] in comparison) and (roi_pair[1] in comparison): roi_pair_idx = j roi_betas_ds = hrf_estimates[:, roi_pair_idx] roi_sens_ds = mean_sens_transposed[:, roi_pair_idx] from collections import OrderedDict block_design_betas = OrderedDict( sorted(zip(roi_betas_ds.sa.condition, roi_betas_ds.samples[:, 0]), key=lambda x: x[1])) block_design = list(block_design_betas) for run in runs: fig, ax = plt.subplots(1, 1, figsize=[18, 10]) colors = [ '#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60', '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1' ] plt.suptitle( 'Timecourse of sensitivities, {} versus {}, run {}'.format( roi_pair[0], roi_pair[1], run + 1), fontsize='large') # 2 is a TR here... sorry, we are in rush run_onset = int(runonsets[run] // 2) run_offset = int(runonsets[run + 1] // 2) # for each run, adjust the x-axis plt.xlim([ min(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)] ), max(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)]) ]) plt.ylim([-2.7, 4.5]) plt.xlabel('Time in sec') plt.legend(loc=1) plt.grid(True) # for each stimulus, plot a color band on top of the plot for stimulus in block_design: # color = colors[0] print(stimulus) condition_event_mask = events['condition'] == stimulus onsets = events[condition_event_mask]['onset'].values onsets_run = [ time for time in onsets if np.logical_and(time > run_onset * 2, time < run_offset * 2) ] durations = events[condition_event_mask]['duration'].values durations_run = [ dur for idx, dur in enumerate(durations) if np.logical_and(onsets[idx] > run_onset * 2, onsets[idx] < run_offset * 2) ] # prepare for plotting r_height = 0.3 y = 4 if stimulus.startswith('run'): continue if stimulus.startswith('location'): # gradually decrease alpha level over occurances of location stims y -= r_height color = 'darkgreen' elif 'face' in stimulus: if stimulus == 'many_faces': color = 'tomato' else: color = 'firebrick' elif stimulus == 'exterior': color = 'cornflowerblue' y -= 2 * r_height elif stimulus.startswith('time'): color = 'darkslategrey' y -= 3 * r_height elif stimulus == 'night': color = 'slategray' y -= 4 * r_height elif stimulus == 'scene-change': color = 'black' y -= 5 * r_height # get the beta corresponding to the stimulus to later use in label beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus, 0] if include_all_regressors and onsets_run == []: # if there are no onsets for a particular regressor, but we want to print all # regressors, set i manually to 0 rectangle = plt.Rectangle( (0, 0), 0, 0, fc=color, alpha=0.5, label='_' * 0 + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')') plt.gca().add_patch(rectangle) for i, x in enumerate(onsets_run): # We need the i to trick the labeling. It will attempt to plot every single occurance # of a stimulus with numbered labels. However, appending a '_' to the label makes # matplotlib disregard it. If we attach an '_' * i to the label, all but the first onset # get a '_' prefix and are ignored. r_width = durations_run[i] rectangle = plt.Rectangle( (x, y), r_width, r_height, fc=color, alpha=0.5, label='_' * i + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')') plt.gca().add_patch(rectangle) plt.legend(loc=1) # plt.axis('scaled') # del colors[0] times = roi_sens_ds.sa.time_coords[run_onset:run_offset] ax.plot(times, roi_sens_ds.samples[run_onset:run_offset], '-', color='black', lw=1.0) # plot glm model results glm_model = hrf_estimates.a.model.results_[0.0].predicted[ run_onset:int(run_offset), roi_pair_idx] # ax2 = ax.twinx() ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0) model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx] plt.title('R squared: %.2f' % model_fit) if fn: plt.savefig(results_dir + 'timecourse_avmovie_glm_sens_{}_vs_{}_run-{}.svg'. format(roi_pair[0], roi_pair[1], run + 1))
ds.sa.pop('stats') ds.sa['behavior'] = np.tile(['eating', 'fighting', 'running', 'swimming'], 5) ds.sa['taxonomy'] = np.repeat( ['bird', 'insect', 'primate', 'reptile', 'ungulate'], 4) ds.sa['conditions'] = [ ' '.join((tax, beh)) for tax, beh in zip(ds.sa.taxonomy, ds.sa.behavior) ] for lab, cond in zip(ds.sa.labels, ds.sa.conditions): assert ' '.join(lab.split('#')[0].split('_')) == cond ds.sa['runs'] = [run] * 20 ds.sa['subjects'] = [participant] * 20 ds.fa['node_indices'] = range(n_vertices) dss.append(ds) ds = mv.vstack(dss) # Exclude medial wall medial_wall = np.where(np.sum(ds.samples == 0, axis=0) == n_conditions * 5)[0].tolist() cortical_vertices = np.where( np.sum(ds.samples == 0, axis=0) < n_conditions * 5)[0].tolist() assert len(medial_wall) == n_medial[hemi] assert len(medial_wall) + len(cortical_vertices) == n_vertices #np.save(join(mvpa_dir, 'cortical_vertices_{0}.npy'.format(hemi)), cortical_vertices) #cortical_vertices = = np.load(join(mvpa_dir, 'cortical_vertices_{0}.npy').tolist() # Z-score features across samples #mv.zscore(ds, chunks_attr='runs') ds.samples = ((ds.samples - np.mean(ds.samples, axis=1)[:, None]) /
# Load masked movie data IN GROUP TEMPLATE SPACE and assign ROIs as feature attributes # Set order of polynomial for detrending polyord = 3 movie_dss = [] for participant in participants: # Load movie data with brain mask for a participant movie_fns = sorted( glob(base_dir + participant + data_dir + '*_task-avmovie_run-*highpass_tmpl.nii.gz')) mask_fn = base_dir + participant + anat_dir + 'brain_mask_tmpl.nii.gz' assert len(movie_fns) == 8 # Include chunk (i.e., run) labels movie_ds = mv.vstack([ mv.fmri_dataset(movie_fn, mask=mask_fn, chunks=run) for run, movie_fn in enumerate(movie_fns) ]) # Assign participant labels as feature attribute movie_ds.fa['participant'] = [participant] * movie_ds.shape[1] print("Loaded movie data for participant {0}".format(participant)) # Perform linear detrending per chunk mv.poly_detrend(movie_ds, polyord=polyord, chunks_attr='chunks') # Perform low-pass filtering per chunk movie_ds.samples = clean(movie_ds.samples, sessions=movie_ds.sa.chunks, low_pass=.1, high_pass=None, t_r=2.0,
# problems from pre-processing above tsds.sa.time_coords = fmri_dataset(bold_filename).sa.time_coords # post-process time series dataset -- possibly modeling run_mkds_args = {k: v[run_id] for k, v in mkds_args.items()} ds = args.mkds(tsds, **run_mkds_args) for attr in ('target', 'chunk'): attr_val = getattr(args, '{}_attr'.format(attr)) if attr_val not in ds.sa.keys(): raise RuntimeError( '{} "{}" not found in dataset attributes: {}"'.format( attr, attr_val, ds.sa.keys())) ds_list.append(ds) #merge ds across runs dataset = vstack(ds_list, a=0) # # analysis setup # TODO: possible Rf into a plugin to allow for other types # # collect raw predictions, so we can compute a McNemar test easily # without any reconstruction of binomial results results = [] # use a confusion matrix to collect all results in multiple sets, # one for each data fold confusion = ConfusionMatrix(labels=list(dataset.sa[args.target_attr].unique)) partitioner = NFoldPartitioner(attr=args.chunk_attr)
'dico7Tad2grpbold7Tad_nl', 'brain_mask_intersection.nii.gz') elif align == 'linear': maskfile = os.path.join(datapath, 'templates', 'grpbold7Tad', 'qa', 'dico7Tad2grpbold7Tad7Tad', 'brain_mask_intersection.nii.gz') ds = mvpa.fmri_dataset(maskfile, mask=maskfile) dsfile = '_z' + str(zsc) + '_' + str(samples_size) + '_' + align #Load dataset of two subjects and reorganise for univariate analysis evds1 = mvpa.h5load(os.path.join('dataset', subj1 + dsfile + '.hdf5')) evds1 = evds1.mapper.reverse(evds1) evds2 = mvpa.h5load(os.path.join('dataset', subj2 + dsfile + '.hdf5')) evds2 = evds1.mapper.reverse(evds2) evds = mvpa.vstack([evds1, evds2]) del evds1, evds2 # Prepare inter-subject correlation measure class Corr(mvpa.Measure): is_trained = True def __init__(self, subj1, subj2, **kwargs): mvpa.Measure.__init__(self, **kwargs) self._subj1 = subj1 self._subj2 = subj2 def _call(self, evds): res = 1 - sd.pdist( np.hstack(
#mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz') #get openFMRI dataset handle dhandle = mvpa.OpenFMRIDataset(datapath) model = 1 task = 1 T3 = False #get openFMRI dataset handle dhandle = mvpa.OpenFMRIDataset(datapath) model = 1 task = 1 datapath = os.path.join('/home','data','psyinf','forrest_gump','anondata') #boldlist = sorted(glob.glob(os.path.join(datapath,'task002*'))) flavor = 'dico_bold7Tp1_to_subjbold7Tp1' for subj in xrange(1,20): mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj%02dbold.nii.gz' % subj) #load and save all datasets run_datasets = [] for run_id in dhandle.get_task_bold_run_ids(task)[subj]: run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor) run_datasets.append(run_ds) s1ds = mvpa.vstack(run_datasets) mvpa.poly_detrend(s1ds,polyord=1,chunks_attr='chunks') mvpa.zscore(s1ds) s1ds.save(os.path.join('/home','mboos','SpeechEncoding','PreProcessed','FG_subj' + str(subj) + 'pp.gzipped.hdf5'),compression=9)
ds.sa['sessions'] = [ses] * ds.shape[0] ds.fa['node_indices'] = range(ds.shape[1]) ds.fa['center_ids'] = range(ds.shape[1]) ds.sa['targets'] = range(ds.shape[0]) # ds.sa.pop('labels') if hyperalign: ds = mappers[i][participant].forward(ds) print("Hyperaligned participant {0}".format(participant)) if zscore_features: mv.zscore(ds, chunks_attr=None) ds.fa['node_indices'] = range(ds.shape[1]) ds.fa['center_ids'] = range(ds.shape[1]) ds_all = mv.vstack((ds1, ds2, ds3, ds4), fa='update') rsa.PDist(**kwargs) #variant_ids = mv.remove_invariant_features(ds_both).fa.center_ids.tolist() # Set up cross-validated RSA cv_rsa_ = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'), mv.HalfPartitioner(attr='sessions'), errorfx=None) # cv_rsa above would return all kinds of .sa which are important # but must be the same across searchlights. so we first apply it # to the entire ds to capture them cv_rsa_out = cv_rsa_(ds_all) target_sa = cv_rsa_out.sa.copy(deep=True) # And now create a postproc which would verify and strip them off
'{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'. format(participant, tr[run], run, hemi))) mv.zscore(ds, chunks_attr=None) if hyperalign: ds = mappers[participant].forward(ds) mv.zscore(ds, chunks_attr=None) ds.fa['node_indices'] = range(ds.shape[1]) # n_samples = ds.samples.shape[0] # # # Exclude medial wall # print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples)) n_samples = ds.samples.shape[0] medial_wall = np.where( np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist() print(len(medial_wall)) cortical_vertices = np.where( np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist() assert len(medial_wall) == n_medial[hemi] assert len(medial_wall) + len(cortical_vertices) == n_vertices sl_result = sl(ds) print(ds.samples.shape, sl_result.samples.shape) list_of_RDMs.append(sl_result) final = mv.vstack(list_of_RDMs) print(final.shape) mv.h5save( '/idata/DBIC/cara/search_hyper_mappers_life_mask_nofsel_{0}_{1}_leftout_{1}_{2}.hdf5' .format(participant, hemi, left_out, sys.argv[1]), final)
# apply feature selection to all data (localizer and memory) fs_mapds_list = [ ds[:, mask] for ds, mask in zip(map_ds_dict.values(), fsel_masks) ] fs_memds_list = [ ds[:, mask] for ds, mask in zip(mem_ds_dict.values(), fsel_masks) ] hyper = mvpa2.Hyperalignment() hypmaps = hyper(fs_mapds_list) # returns list # apply the hyperalignment maps to feature-selected test data memds_hyper_list = [ha.forward(ds) for ha, ds in zip(hypmaps, fs_memds_list)] # stack all datasets and zscore, because now all in common space ds_hyper = mvpa2.vstack(memds_hyper_list) mvpa2.zscore(ds_hyper, chunks_attr='subj') ####################################### ## use sklearn to perform decoding ## ####################################### # get the num of groups for leave-one-subj-out cross-validation n_groups = len(pd.np.unique(ds_hyper.sa.subj)) cv_df_list = [] # holds cross-validation accuracies for each condition null_df_list = [ ] # holds permutation distribution accuracies for each condition pval_df_list = [] # holds mean accuracy and p-values for each condition # choose the "target" sample attributes
def dotheglm(sensitivities, eventdir, annot_dir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them into an apprpriate. data structure. It will compute one glm per run. """ # normalize the sensitivities from sklearn.preprocessing import normalize import copy #default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize( sensitivities_to_normalize[i].samples, axis=1) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['bilat_ROIs_str' ])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['all_ROIs_str' ])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # get a list of the event files with occurances of faces event_files = sorted(glob(eventdir + '/*')) assert len(event_files) == 8 # get additional events from the location annotation location_annotation = pd.read_csv(annot_dir, sep='\t') # get all settings with more than one occurrence setting = [ set for set in location_annotation.setting.unique() if (location_annotation.setting[location_annotation.setting == set].value_counts()[0] > 1) ] # get onsets and durations onset = [] duration = [] condition = [] for set in setting: for i in range(location_annotation.setting[ location_annotation['setting'] == set].value_counts()[0]): onset.append(location_annotation[location_annotation['setting'] == set]['onset'].values[i]) duration.append(location_annotation[location_annotation['setting'] == set]['duration'].values[i]) condition.append([set] * (i + 1)) # flatten conditions condition = [y for x in condition for y in x] assert len(condition) == len(onset) == len(duration) # concatenate the strings condition_str = [set.replace(' ', '_') for set in condition] condition_str = ['location_' + set for set in condition_str] # put it in a dataframe locations = pd.DataFrame({ 'onset': onset, 'duration': duration, 'condition': condition_str }) # sort according to onsets to be paranoid locations_sorted = locations.sort_values(by='onset') # this is a dataframe encoding flow of time time_forward = pd.DataFrame( [{ 'condition': 'time+', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['+', '++']]) time_back = pd.DataFrame( [{ 'condition': 'time-', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['-', '--']]) # sort according to onsets to be paranoid time_forward_sorted = time_forward.sort_values(by='onset') time_back_sorted = time_back.sort_values(by='onset') scene_change = pd.DataFrame([{ 'condition': 'scene-change', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1)]) scene_change_sorted = scene_change.sort_values(by='onset') # this is a dataframe encoding exterior exterior = pd.DataFrame([{ 'condition': 'exterior', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['int_or_ext'][i] == 'ext') ]) # sort according to onsets to be paranoid exterior_sorted = exterior.sort_values(by='onset') # this is a dataframe encoding nighttime night = pd.DataFrame([{ 'condition': 'night', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['time_of_day'][i] == 'night') ]) # sort according to onsets to be paranoid night_sorted = night.sort_values(by='onset') assert np.all( locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values ) assert np.all( time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values ) assert np.all(time_forward_sorted.onset[1:].values >= time_forward_sorted.onset[:-1].values) assert np.all( exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values) assert np.all( night_sorted.onset[1:].values >= night_sorted.onset[:-1].values) assert np.all(scene_change_sorted.onset[1:].values >= scene_change_sorted.onset[:-1].values) # check whether chunks are increasing as well as sanity check chunks = mean_sens_transposed.sa.chunks assert np.all(chunks[1:] >= chunks[:-1]) # TR was not preserved/carried through in .a # so we will guestimate it based on the values of time_coords tc = mean_sens_transposed.sa.time_coords TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1] assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001 # make time coordinates real seconds mean_sens_transposed.sa.time_coords = np.arange( len(mean_sens_transposed)) * TRdirty # get runs, and runlengths in seconds runs = sorted(mean_sens_transposed.UC) assert runs == range(len(runs)) runlengths = [ np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty for run in runs ] runonsets = [sum(runlengths[:run]) for run in runs] assert len(runs) == 8 # initialize the list of dicts that gets later passed to the glm events_dicts = [] # This is relevant to later stack all dataframes together # and paranoidly make sure that they have the same columns cols = ['onset', 'duration', 'condition'] for run in runs: # get face data eventfile = sorted(event_files)[run] events = pd.read_csv(eventfile, sep='\t') for index, row in events.iterrows(): # disregard no faces, put everything else into event structure if row['condition'] != 'no_face': dic = { 'onset': row['onset'] + runonsets[run], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # concatenate all event dataframes run_reg = pd.DataFrame([{ 'onset': runonsets[i], 'duration': abs(runonsets[i] - runonsets[i + 1]), 'condition': 'run-' + str(i + 1) } for i in range(7)]) # get all of these wonderful dataframes into a list and squish them dfs = [ locations_sorted[cols], scene_change_sorted[cols], time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols], night_sorted[cols], run_reg[cols] ] allevents = pd.concat(dfs) # save all non-face related events in an event file, just for the sake of it allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv', sep='\t', index=False) # append non-faceevents to event structure for glm for index, row in allevents.iterrows(): dic = { 'onset': row['onset'], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # save this event dicts structure as a tsv file import csv with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile: fieldnames = ['onset', 'duration', 'condition'] writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t') writer.writeheader() writer.writerows(events_dicts) # save this event file also as json file... can there ever be enough different files... import json with open(results_dir + '/' + 'allevents.json', 'w') as f: json.dump(events_dicts, f) # do the glm - we've earned it hrf_estimates = mv.fit_event_hrf_model( mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5', hrf_estimates) print('calculated the, saving results.') return hrf_estimates
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois): """ Build an hdf5 dataset. """ # initialize a list to load all datasets into: data_dss = [] # get list of participants from root dir participants = sorted( [path.split('/')[-1] for path in glob(rootdir + 'sub-*')]) assert len(participants) != 0 print('The following participants were found: {}'.format(participants)) for participant in participants: # count the number of participant substitutions necessary data_fns = sorted(glob(rootdir + participant + datadir)) print(rootdir + participant + datadir) mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz' if analysis == 'localizer': assert len(data_fns) == 4 if analysis == 'avmovie': assert len(data_fns) == 8 data_ds = mv.vstack([ mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run) for run, data_fn in enumerate(data_fns) ]) data_ds.fa['participant'] = [participant] * data_ds.shape[1] print('loaded data for participant {}.'.format(participant)) # z scoring if analysis == 'localizer' and zscore == 'baseline-zscore': events = get_group_events(eventdir) means, stds = extract_baseline(events, data_ds) mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks') print('finished baseline zscoring for participant {}.'.format( participant)) elif zscore == 'zscore': mv.zscore(data_ds, chunks_attr='chunks') print('finished zscoring for participant {}.'.format(participant)) else: print('I did not zscore.') # roi masks all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10') for roi in rois: # Get filenames for potential right and left ROI masks if roi == 'VIS': roi_fns = sorted(glob(rootdir + participant + anatdir + \ '{0}_*_mask_tmpl.nii.gz'.format(roi))) else: left_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'l{0}*mask_tmpl.nii.gz'.format(roi))) right_roi_fns = sorted(glob(rootdir + participant + anatdir + \ 'r{0}*mask_tmpl.nii.gz'.format(roi))) roi_fns = left_roi_fns + right_roi_fns if len(roi_fns) == 0: print( "ROI {0} does not exist for participant {1}; appending all zeros" .format(roi, participant)) roi_mask = np.zeros((1, data_ds.shape[1])) elif len(roi_fns) == 1: roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples elif len(roi_fns) > 1: # Add ROI maps into single map print("Combining {0} {1} masks for participant {2}".format( len(roi_fns), roi, participant)) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) # Set any voxels that might exceed 1 to 1 roi_mask = np.where(roi_mask > 0, 1, 0) # Ensure that number of voxels in ROI mask matches dataset dimension assert roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list roi_flat = list(roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa[roi] = roi_flat # Get lateralized masks as well if roi != 'VIS': lat_roi_mask = np.zeros((1, data_ds.shape[1])) if len(left_roi_fns) == 1: left_roi_mask = np.where( mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) > 1: left_roi_mask = np.where( np.sum([ mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples for left_roi_fn in left_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[left_roi_mask > 0] = 1 elif len(left_roi_fns) == 0: left_roi_mask = np.zeros((1, data_ds.shape[1])) if len(right_roi_fns) == 1: right_roi_mask = np.where( mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) > 1: right_roi_mask = np.where( np.sum([ mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples for right_roi_fn in right_roi_fns ], axis=0) > 0, 1, 0) lat_roi_mask[right_roi_mask > 0] = 2 elif len(right_roi_fns) == 0: right_roi_mask = np.zeros((1, data_ds.shape[1])) # Ensure that number of voxels in ROI mask matches dataset dimension assert lat_roi_mask.shape[1] == data_ds.shape[1] # Flatten mask into list lat_roi_flat = list(lat_roi_mask.ravel()) # Assign ROI mask to data feature attributes data_ds.fa['lat_' + roi] = lat_roi_flat # Check existing feature attribute for all ROIS for overlaps np.place(all_rois_mask, ((left_roi_mask > 0) | (right_roi_mask > 0)) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(left_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'left {0}'.format(roi) all_rois_mask[(right_roi_mask > 0) & ( all_rois_mask != 'overlap')] = 'right {0}'.format(roi) elif roi == 'VIS': roi_fns = sorted( glob(rootdir + participant + anatdir + '/{0}_*_mask_tmpl.nii.gz'.format(roi))) roi_mask = np.sum([ mv.fmri_dataset(roi_fn, mask=mask_fn).samples for roi_fn in roi_fns ], axis=0) np.place(all_rois_mask, (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap') all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi # Flatten mask into list all_rois_flat = list(all_rois_mask.ravel()) # Assign roi mask to dataset feature attributes data_ds.fa['all_ROIs'] = all_rois_flat # join all datasets data_dss.append(data_ds) # save full dataset mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss) print('saved the collection of all subjects datasets.') # squish everything together ds_wide = mv.hstack(data_dss) # transpose the dataset, time points are now features ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy()) mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds) print('Transposed the group-dataset and saved it.') return ds
samples_size = 12 #Length of segments in sec if align=='nonlinear': maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad_nl','brain_mask_intersection.nii.gz') elif align=='linear': maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad7Tad','brain_mask_intersection.nii.gz') ds = mvpa.fmri_dataset(maskfile, mask=maskfile) dsfile = '_z'+str(zsc)+'_'+str(samples_size)+'_'+align #Load dataset of two subjects and reorganise for univariate analysis evds1 = mvpa.h5load(os.path.join('dataset',subj1+dsfile+'.hdf5')) evds1 = evds1.mapper.reverse(evds1) evds2 = mvpa.h5load(os.path.join('dataset',subj2+dsfile+'.hdf5')) evds2 = evds1.mapper.reverse(evds2) evds = mvpa.vstack([evds1,evds2]) del evds1, evds2 # Prepare inter-subject correlation measure class Corr(mvpa.Measure): is_trained = True def __init__(self,subj1,subj2, **kwargs): mvpa.Measure.__init__(self, **kwargs) self._subj1 = subj1 self._subj2 = subj2 def _call(self, evds): res = 1-sd.pdist(np.hstack((evds[evds.sa.subj==self._subj1].samples,evds[evds.sa.subj==self._subj2].samples)).T,'correlation') return mvpa.Dataset(np.array(res)[np.newaxis]) # Call inter-subject correlation measure cor = Corr(subj1,subj2)
ds = mvpa.fmri_dataset(os.path.join(datapath, run, boldfile), mask=maskfile) mc = mvpa.McFlirtParams(os.path.join(run, 'bold_dico_moco.txt')) for param in mc: ds.sa['mc_' + param] = mc[param] if i == 0: ds = ds[:-4] elif i < 7: ds = ds[4:-4] else: ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples) * i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32') #Detrending and MC removal mvpa.poly_detrend(ds, opt_regs=['mc_' + param for param in mc], chunks_attr='chunks') #Voxelwise Zscore if zsc: mvpa.zscore(ds) #bandpass filter nf = 0.5 / TR ws = [(1 / lf) / nf, (1 / hf) / nf] b, a = signal.butter(5, ws, btype='band')
print run ds = mvpa.fmri_dataset(os.path.join(datapath,run,boldfile), mask=maskfile) mc = mvpa.McFlirtParams(os.path.join(run, 'bold_dico_moco.txt')) for param in mc: ds.sa['mc_' + param] = mc[param] if i==0: ds = ds[:-4] elif i<7: ds = ds[4:-4] else: ds = ds[4:] ds.sa['chunks'] = np.ones(ds.nsamples)*i print ds.shape Ds.append(ds) ds = mvpa.vstack(Ds) ds.samples = ds.samples.astype('float32') #Detrending and MC removal mvpa.poly_detrend(ds, opt_regs=['mc_'+param for param in mc], chunks_attr='chunks' ) #Voxelwise Zscore if zsc: mvpa.zscore(ds) #bandpass filter nf = 0.5/TR ws = [(1/lf)/nf, (1/hf)/nf]