def dotheglm(sensitivities, eventdir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them in, average the durations because there are tiny differences between subjects, and then it will put all of that into a glm. """ sensitivities_stacked = mv.vstack(sensitivities) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.bilat_ROIs) mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p), sensitivities_stacked.sa.all_ROIs) mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # average onsets into one event file events = get_group_events(eventdir) # save the event_file fmt = "%10.3f\t%10.3f\t%16s\t%60s" np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='', header='onset\tduration\ttrial_type\tstim_file', fmt=fmt) # get events into dictionary events_dicts = [] for i in range(0, len(events)): dic = { 'onset': events[i][0], 'duration': events[i][1], 'condition': events[i][2] } events_dicts.append(dic) hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates) print('calculated glm, saving results.') return hrf_estimates
def project_betas(ds, analysis, eventdir, results_dir, annot_dir=None, ): """ Currently unused, but can become relevant later on. Will keep it in utils.py. Project beta values from 2nd analysis approach into the brain. Current problem: For first analysis type overlaps are excluded (for classification purposes), so we need to do the glm on data with overlaps. Thats why its a separate function and not integrated into the reversed analysis. :return: nifti images... many nifti images in a dictionary # project beta estimates back into a brain. I'll save-guard this function for now, because there is still # the unsolved overlap issue... project_beta = False if project_beta: print('going on to project resulting betas back into brain...') subs = np.unique(hrf_estimates_transposed.sa.participant) regs = hrf_estimates_transposed.fa.condition assert len(subs) > 0 from collections import OrderedDict result_maps = OrderedDict() for sub in subs: print('...for subject {}...'.format(sub)) result_maps[sub] = OrderedDict() # subset to participants dataframe data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub], fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa, sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa) # loop over regressors for idx, reg in enumerate(regs): result_map = buildremapper(ds_type, sub, data.samples.T[idx], # we select one beta vector per regressor ) # populate a nested dict with the resulting nifti images # this guy has one nifti image per regressor for each subject result_maps[sub][reg] = result_map # Those result maps can be quick-and-dirty-plotted with # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz', # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'} # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args) # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg) # how do we know which regressors have highest betas for given ROI? averaging? #from collections import OrderedDict #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)] # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1])) """ ds_transposed = ds.get_mapped(mv.TransposeMapper()) assert ds_transposed.shape[0] < ds_transposed.shape[1] # get the appropriate event file. extract runs, chunks, timecoords from transposed dataset chunks, runs, runonsets = False, False, False if analysis == 'avmovie': ds_transposed, chunks, runs, runonsets = get_avmovietimes(ds_transposed) events_dicts = get_events(analysis=analysis, eventdir=eventdir, results_dir=results_dir, chunks=chunks, runs=runs, runonsets=runonsets, annot_dir=annot_dir, multimatch=False) # step 1: do the glm on the data hrf_estimates = mv.fit_event_hrf_model(ds_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) # lets save these mv.h5save(results_dir + '/' + 'betas_from_2nd_approach.hdf5', hrf_estimates) print('calculated the glm, saving results') # step 2: get the results back into a transposed form, because we want to have time points as features & extract the betas hrf_estimates_transposed = hrf_estimates.get_mapped(mv.TransposeMapper()) assert hrf_estimates_transposed.samples.shape[0] > hrf_estimates_transposed.samples.shape[1] subs = np.unique(hrf_estimates_transposed.sa.participant) print('going on to project resulting betas back into brain...') regs = hrf_estimates_transposed.fa.condition assert len(subs) > 0 from collections import OrderedDict result_maps = OrderedDict() for sub in subs: print('...for subject {}...'.format(sub)) result_maps[sub] = OrderedDict() # subset to participants dataframe data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub], fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa, sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa) # loop over regressors for idx, reg in enumerate(regs): result_map = buildremapper(sub, data.samples.T[idx], # we select one beta vector per regressor ds_type='full', # currently we can only do this for the full ds. ) # populate a nested dict with the resulting nifti images # this guy has one nifti image per regressor for each subject result_maps[sub][reg] = result_map # Those result maps can be quick-and-dirty-plotted with # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz', # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'} # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args) # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg) # how do we know which regressors have highest betas for given ROI? averaging? #from collections import OrderedDict #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)] # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1])) return result_maps
def dotheglm(sensitivities, eventdir, annot_dir): """dotheglm does the glm. It will squish the sensitivity dataset by vstacking them, calculating the mean sensitivity per ROI pair with the mean_group_sample() function, transpose it with a TransposeMapper(). It will get the event files and read them into an apprpriate. data structure. It will compute one glm per run. """ # normalize the sensitivities from sklearn.preprocessing import normalize import copy #default for normalization is the L2 norm sensitivities_to_normalize = copy.deepcopy(sensitivities) for i in range(len(sensitivities)): sensitivities_to_normalize[i].samples = normalize( sensitivities_to_normalize[i].samples, axis=1) sensitivities_stacked = mv.vstack(sensitivities_to_normalize) if bilateral: sensitivities_stacked.sa['bilat_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['bilat_ROIs_str' ])(sensitivities_stacked) else: sensitivities_stacked.sa['all_ROIs_str'] = map( lambda p: '_'.join(p), sensitivities_stacked.sa.targets) mean_sens = mv.mean_group_sample(['all_ROIs_str' ])(sensitivities_stacked) mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper()) # get a list of the event files with occurances of faces event_files = sorted(glob(eventdir + '/*')) assert len(event_files) == 8 # get additional events from the location annotation location_annotation = pd.read_csv(annot_dir, sep='\t') # get all settings with more than one occurrence setting = [ set for set in location_annotation.setting.unique() if (location_annotation.setting[location_annotation.setting == set].value_counts()[0] > 1) ] # get onsets and durations onset = [] duration = [] condition = [] for set in setting: for i in range(location_annotation.setting[ location_annotation['setting'] == set].value_counts()[0]): onset.append(location_annotation[location_annotation['setting'] == set]['onset'].values[i]) duration.append(location_annotation[location_annotation['setting'] == set]['duration'].values[i]) condition.append([set] * (i + 1)) # flatten conditions condition = [y for x in condition for y in x] assert len(condition) == len(onset) == len(duration) # concatenate the strings condition_str = [set.replace(' ', '_') for set in condition] condition_str = ['location_' + set for set in condition_str] # put it in a dataframe locations = pd.DataFrame({ 'onset': onset, 'duration': duration, 'condition': condition_str }) # sort according to onsets to be paranoid locations_sorted = locations.sort_values(by='onset') # this is a dataframe encoding flow of time time_forward = pd.DataFrame( [{ 'condition': 'time+', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['+', '++']]) time_back = pd.DataFrame( [{ 'condition': 'time-', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1) if location_annotation['flow_of_time'][i] in ['-', '--']]) # sort according to onsets to be paranoid time_forward_sorted = time_forward.sort_values(by='onset') time_back_sorted = time_back.sort_values(by='onset') scene_change = pd.DataFrame([{ 'condition': 'scene-change', 'onset': location_annotation['onset'][i], 'duration': 1.0 } for i in range(len(location_annotation) - 1)]) scene_change_sorted = scene_change.sort_values(by='onset') # this is a dataframe encoding exterior exterior = pd.DataFrame([{ 'condition': 'exterior', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['int_or_ext'][i] == 'ext') ]) # sort according to onsets to be paranoid exterior_sorted = exterior.sort_values(by='onset') # this is a dataframe encoding nighttime night = pd.DataFrame([{ 'condition': 'night', 'onset': location_annotation['onset'][i], 'duration': location_annotation['duration'][i] } for i in range(len(location_annotation) - 1) if (location_annotation['time_of_day'][i] == 'night') ]) # sort according to onsets to be paranoid night_sorted = night.sort_values(by='onset') assert np.all( locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values ) assert np.all( time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values ) assert np.all(time_forward_sorted.onset[1:].values >= time_forward_sorted.onset[:-1].values) assert np.all( exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values) assert np.all( night_sorted.onset[1:].values >= night_sorted.onset[:-1].values) assert np.all(scene_change_sorted.onset[1:].values >= scene_change_sorted.onset[:-1].values) # check whether chunks are increasing as well as sanity check chunks = mean_sens_transposed.sa.chunks assert np.all(chunks[1:] >= chunks[:-1]) # TR was not preserved/carried through in .a # so we will guestimate it based on the values of time_coords tc = mean_sens_transposed.sa.time_coords TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1] assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001 # make time coordinates real seconds mean_sens_transposed.sa.time_coords = np.arange( len(mean_sens_transposed)) * TRdirty # get runs, and runlengths in seconds runs = sorted(mean_sens_transposed.UC) assert runs == range(len(runs)) runlengths = [ np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty for run in runs ] runonsets = [sum(runlengths[:run]) for run in runs] assert len(runs) == 8 # initialize the list of dicts that gets later passed to the glm events_dicts = [] # This is relevant to later stack all dataframes together # and paranoidly make sure that they have the same columns cols = ['onset', 'duration', 'condition'] for run in runs: # get face data eventfile = sorted(event_files)[run] events = pd.read_csv(eventfile, sep='\t') for index, row in events.iterrows(): # disregard no faces, put everything else into event structure if row['condition'] != 'no_face': dic = { 'onset': row['onset'] + runonsets[run], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # concatenate all event dataframes run_reg = pd.DataFrame([{ 'onset': runonsets[i], 'duration': abs(runonsets[i] - runonsets[i + 1]), 'condition': 'run-' + str(i + 1) } for i in range(7)]) # get all of these wonderful dataframes into a list and squish them dfs = [ locations_sorted[cols], scene_change_sorted[cols], time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols], night_sorted[cols], run_reg[cols] ] allevents = pd.concat(dfs) # save all non-face related events in an event file, just for the sake of it allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv', sep='\t', index=False) # append non-faceevents to event structure for glm for index, row in allevents.iterrows(): dic = { 'onset': row['onset'], 'duration': row['duration'], 'condition': row['condition'] } events_dicts.append(dic) # save this event dicts structure as a tsv file import csv with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile: fieldnames = ['onset', 'duration', 'condition'] writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t') writer.writeheader() writer.writerows(events_dicts) # save this event file also as json file... can there ever be enough different files... import json with open(results_dir + '/' + 'allevents.json', 'w') as f: json.dump(events_dicts, f) # do the glm - we've earned it hrf_estimates = mv.fit_event_hrf_model( mean_sens_transposed, events_dicts, time_attr='time_coords', condition_attr='condition', design_kwargs=dict(drift_model='blank'), glmfit_kwargs=dict(model='ols'), return_model=True) mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5', hrf_estimates) print('calculated the, saving results.') return hrf_estimates
def preprocessing(ds_p, ref_space, warp_files, mask_p, **kwargs): mask_p = str(mask_p) ref_space = str(ref_space) detrending = kwargs.get('detrending', None) use_zscore = kwargs.get('use_zscore', True) use_events = kwargs.get('use_events', False) anno_dir = kwargs.get('anno_dir', None) use_glm_estimates = kwargs.get('use_glm_estimates', False) targets = kwargs.get('targets', None) event_offset = kwargs.get('event_offset', None) event_dur = kwargs.get('event_dur', None) save_disc_space = kwargs.get('save_disc_space', True) rois = kwargs.get('rois', None) vp_num_str = ds_p[(ds_p.find("sub") + 4):(ds_p.find("sub") + 6)] warp_file = [warp_file for warp_file in warp_files if warp_file.find(vp_num_str) != -1][0] part_info = find_participant_info(ds_p) if save_disc_space: temp_file_add = "tmp_warped_data_file.nii.gz" temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", temp_file_add)) else: temp_file_add = "sub-{}_{}-movie_run-{}_warped_file.nii.gz".format(part_info[0], part_info[1], int(part_info[2])) temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", "runs_for_testing", temp_file_add)) # change warped_ds = warp_image(ds_p, ref_space, warp_file, temp_file, save_disc_space=save_disc_space) while not os.path.exists(warped_ds): time.sleep(5) if os.path.isfile(warped_ds): if mask_p is not None: mask = get_adjusted_mask(mask_p, ref_space) if rois is not None: ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask, add_fa=rois) else: ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask) else: if rois is not None: ds = mvpa.fmri_dataset(samples=warped_ds, add_fa=rois) else: ds = mvpa.fmri_dataset(samples=warped_ds) ds.sa['participant'] = [int(part_info[0])] ds.sa["movie_type"] = [part_info[1]] ds.sa['chunks'] = [int(part_info[2])] if detrending is not None: detrender = mvpa.PolyDetrendMapper(polyord=1) ds = ds.get_mapped(detrender) if use_zscore: mvpa.zscore(ds) if use_events: events = create_event_dict(anno_dir, ds_p, targets, event_dur) if use_glm_estimates: ds = mvpa.fit_event_hrf_model(ds, events, time_attr='time_coords', condition_attr='targets') else: ds = mvpa.extract_boxcar_event_samples(ds, events=events, time_attr='time_coords', match='closest', event_offset=event_offset, event_duration=event_dur, eprefix='event', event_mapper=None) ds = fix_info_after_events(ds) return ds