示例#1
0
def dotheglm(sensitivities, eventdir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them in, average the
    durations because there are tiny differences between subjects, and then it
    will put all of that into a glm.
    """
    sensitivities_stacked = mv.vstack(sensitivities)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # average onsets into one event file
    events = get_group_events(eventdir)
    # save the event_file
    fmt = "%10.3f\t%10.3f\t%16s\t%60s"
    np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='',
               header='onset\tduration\ttrial_type\tstim_file', fmt=fmt)
    # get events into dictionary
    events_dicts = []
    for i in range(0, len(events)):
        dic = {
            'onset': events[i][0],
            'duration': events[i][1],
            'condition': events[i][2]
        }
        events_dicts.append(dic)

    hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)
    mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates)
    print('calculated glm, saving results.')
    return hrf_estimates
示例#2
0
def project_betas(ds,
                  analysis,
                  eventdir,
                  results_dir,
                  annot_dir=None,
                  ):
    """
    Currently unused, but can become relevant later on. Will keep it in utils.py.
    Project beta values from 2nd analysis approach into the brain.
    Current problem: For first analysis type overlaps are excluded (for classification
    purposes), so we need to do the glm on data with overlaps. Thats why its a separate function
    and not integrated into the reversed analysis.
    :return: nifti images... many nifti images in a dictionary


    # project beta estimates back into a brain. I'll save-guard this function for now, because there is still
    # the unsolved overlap issue...
    project_beta = False
    if project_beta:
        print('going on to project resulting betas back into brain...')
        subs = np.unique(hrf_estimates_transposed.sa.participant)
        regs = hrf_estimates_transposed.fa.condition
        assert len(subs) > 0
        from collections import OrderedDict
        result_maps = OrderedDict()
        for sub in subs:
            print('...for subject {}...'.format(sub))
            result_maps[sub] = OrderedDict()
            # subset to participants dataframe
            data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub],
                              fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa,
                              sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa)
            # loop over regressors
            for idx, reg in enumerate(regs):
                result_map = buildremapper(ds_type,
                                           sub,
                                           data.samples.T[idx], # we select one beta vector per regressor
                                           )
                # populate a nested dict with the resulting nifti images
                # this guy has one nifti image per regressor for each subject
                result_maps[sub][reg] = result_map

        # Those result maps can be quick-and-dirty-plotted with
        # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz',
        # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'}
        # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args)
        # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg)
        # how do we know which regressors have highest betas for given ROI? averaging?
        #from collections import OrderedDict
        #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)]
        # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1]))

    """

    ds_transposed = ds.get_mapped(mv.TransposeMapper())
    assert ds_transposed.shape[0] < ds_transposed.shape[1]

    # get the appropriate event file. extract runs, chunks, timecoords from transposed dataset
    chunks, runs, runonsets = False, False, False

    if analysis == 'avmovie':
        ds_transposed, chunks, runs, runonsets = get_avmovietimes(ds_transposed)

    events_dicts = get_events(analysis=analysis,
                              eventdir=eventdir,
                              results_dir=results_dir,
                              chunks=chunks,
                              runs=runs,
                              runonsets=runonsets,
                              annot_dir=annot_dir,
                              multimatch=False)

    # step 1: do the glm on the data
    hrf_estimates = mv.fit_event_hrf_model(ds_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)

    # lets save these
    mv.h5save(results_dir + '/' + 'betas_from_2nd_approach.hdf5', hrf_estimates)
    print('calculated the glm, saving results')

    # step 2: get the results back into a transposed form, because we want to have time points as features & extract the betas
    hrf_estimates_transposed = hrf_estimates.get_mapped(mv.TransposeMapper())
    assert hrf_estimates_transposed.samples.shape[0] > hrf_estimates_transposed.samples.shape[1]

    subs = np.unique(hrf_estimates_transposed.sa.participant)
    print('going on to project resulting betas back into brain...')

    regs = hrf_estimates_transposed.fa.condition
    assert len(subs) > 0
    from collections import OrderedDict
    result_maps = OrderedDict()
    for sub in subs:
        print('...for subject {}...'.format(sub))
        result_maps[sub] = OrderedDict()
        # subset to participants dataframe
        data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub],
                          fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa,
                          sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa)
        # loop over regressors
        for idx, reg in enumerate(regs):
            result_map = buildremapper(sub,
                                       data.samples.T[idx], # we select one beta vector per regressor
                                       ds_type='full', # currently we can only do this for the full ds.
                                       )
            # populate a nested dict with the resulting nifti images
            # this guy has one nifti image per regressor for each subject
            result_maps[sub][reg] = result_map

        # Those result maps can be quick-and-dirty-plotted with
        # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz',
        # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'}
        # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args)
        # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg)
        # how do we know which regressors have highest betas for given ROI? averaging?
        #from collections import OrderedDict
        #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)]
        # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1]))

    return result_maps
示例#3
0
def dotheglm(sensitivities, eventdir, annot_dir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them into an apprpriate.
    data structure. It will compute one glm per run.
    """
    # normalize the sensitivities
    from sklearn.preprocessing import normalize
    import copy
    #default for normalization is the L2 norm
    sensitivities_to_normalize = copy.deepcopy(sensitivities)
    for i in range(len(sensitivities)):
        sensitivities_to_normalize[i].samples = normalize(
            sensitivities_to_normalize[i].samples, axis=1)

    sensitivities_stacked = mv.vstack(sensitivities_to_normalize)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'
                                          ])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'
                                          ])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # get a list of the event files with occurances of faces
    event_files = sorted(glob(eventdir + '/*'))
    assert len(event_files) == 8
    # get additional events from the location annotation
    location_annotation = pd.read_csv(annot_dir, sep='\t')

    # get all settings with more than one occurrence
    setting = [
        set for set in location_annotation.setting.unique()
        if (location_annotation.setting[location_annotation.setting ==
                                        set].value_counts()[0] > 1)
    ]

    # get onsets and durations
    onset = []
    duration = []
    condition = []
    for set in setting:
        for i in range(location_annotation.setting[
                location_annotation['setting'] == set].value_counts()[0]):
            onset.append(location_annotation[location_annotation['setting'] ==
                                             set]['onset'].values[i])
            duration.append(location_annotation[location_annotation['setting']
                                                == set]['duration'].values[i])
        condition.append([set] * (i + 1))
    # flatten conditions
    condition = [y for x in condition for y in x]
    assert len(condition) == len(onset) == len(duration)

    # concatenate the strings
    condition_str = [set.replace(' ', '_') for set in condition]
    condition_str = ['location_' + set for set in condition_str]

    # put it in a dataframe
    locations = pd.DataFrame({
        'onset': onset,
        'duration': duration,
        'condition': condition_str
    })

    # sort according to onsets to be paranoid
    locations_sorted = locations.sort_values(by='onset')

    # this is a dataframe encoding flow of time
    time_forward = pd.DataFrame(
        [{
            'condition': 'time+',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['+', '++']])

    time_back = pd.DataFrame(
        [{
            'condition': 'time-',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['-', '--']])

    # sort according to onsets to be paranoid
    time_forward_sorted = time_forward.sort_values(by='onset')
    time_back_sorted = time_back.sort_values(by='onset')

    scene_change = pd.DataFrame([{
        'condition': 'scene-change',
        'onset': location_annotation['onset'][i],
        'duration': 1.0
    } for i in range(len(location_annotation) - 1)])

    scene_change_sorted = scene_change.sort_values(by='onset')

    # this is a dataframe encoding exterior
    exterior = pd.DataFrame([{
        'condition': 'exterior',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                             if (location_annotation['int_or_ext'][i] == 'ext')
                             ])

    # sort according to onsets to be paranoid
    exterior_sorted = exterior.sort_values(by='onset')

    # this is a dataframe encoding nighttime
    night = pd.DataFrame([{
        'condition': 'night',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                          if (location_annotation['time_of_day'][i] == 'night')
                          ])

    # sort according to onsets to be paranoid
    night_sorted = night.sort_values(by='onset')

    assert np.all(
        locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values
    )
    assert np.all(
        time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values
    )
    assert np.all(time_forward_sorted.onset[1:].values >=
                  time_forward_sorted.onset[:-1].values)
    assert np.all(
        exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values)
    assert np.all(
        night_sorted.onset[1:].values >= night_sorted.onset[:-1].values)
    assert np.all(scene_change_sorted.onset[1:].values >=
                  scene_change_sorted.onset[:-1].values)

    # check whether chunks are increasing as well as sanity check
    chunks = mean_sens_transposed.sa.chunks
    assert np.all(chunks[1:] >= chunks[:-1])

    # TR was not preserved/carried through in .a
    # so we will guestimate it based on the values of time_coords
    tc = mean_sens_transposed.sa.time_coords
    TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1]
    assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001

    # make time coordinates real seconds
    mean_sens_transposed.sa.time_coords = np.arange(
        len(mean_sens_transposed)) * TRdirty

    # get runs, and runlengths in seconds
    runs = sorted(mean_sens_transposed.UC)
    assert runs == range(len(runs))
    runlengths = [
        np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty
        for run in runs
    ]
    runonsets = [sum(runlengths[:run]) for run in runs]
    assert len(runs) == 8

    # initialize the list of dicts that gets later passed to the glm
    events_dicts = []
    # This is relevant to later stack all dataframes together
    # and paranoidly make sure that they have the same columns
    cols = ['onset', 'duration', 'condition']

    for run in runs:
        # get face data
        eventfile = sorted(event_files)[run]
        events = pd.read_csv(eventfile, sep='\t')

        for index, row in events.iterrows():

            # disregard no faces, put everything else into event structure
            if row['condition'] != 'no_face':
                dic = {
                    'onset': row['onset'] + runonsets[run],
                    'duration': row['duration'],
                    'condition': row['condition']
                }
                events_dicts.append(dic)

    # concatenate all event dataframes
    run_reg = pd.DataFrame([{
        'onset': runonsets[i],
        'duration': abs(runonsets[i] - runonsets[i + 1]),
        'condition': 'run-' + str(i + 1)
    } for i in range(7)])

    # get all of these wonderful dataframes into a list and squish them
    dfs = [
        locations_sorted[cols], scene_change_sorted[cols],
        time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols],
        night_sorted[cols], run_reg[cols]
    ]
    allevents = pd.concat(dfs)

    # save all non-face related events in an event file, just for the sake of it
    allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv',
                     sep='\t',
                     index=False)

    # append non-faceevents to event structure for glm
    for index, row in allevents.iterrows():
        dic = {
            'onset': row['onset'],
            'duration': row['duration'],
            'condition': row['condition']
        }
        events_dicts.append(dic)

    # save this event dicts structure  as a tsv file
    import csv
    with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile:
        fieldnames = ['onset', 'duration', 'condition']
        writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t')
        writer.writeheader()
        writer.writerows(events_dicts)
    # save this event file also as json file... can there ever be enough different files...
    import json
    with open(results_dir + '/' + 'allevents.json', 'w') as f:
        json.dump(events_dicts, f)

    # do the glm - we've earned it
    hrf_estimates = mv.fit_event_hrf_model(
        mean_sens_transposed,
        events_dicts,
        time_attr='time_coords',
        condition_attr='condition',
        design_kwargs=dict(drift_model='blank'),
        glmfit_kwargs=dict(model='ols'),
        return_model=True)

    mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5',
              hrf_estimates)
    print('calculated the, saving results.')

    return hrf_estimates
示例#4
0
def preprocessing(ds_p, ref_space, warp_files, mask_p, **kwargs):
    mask_p = str(mask_p)
    ref_space = str(ref_space)
    detrending = kwargs.get('detrending', None)
    use_zscore = kwargs.get('use_zscore', True)

    use_events = kwargs.get('use_events', False)
    anno_dir = kwargs.get('anno_dir', None)
    use_glm_estimates = kwargs.get('use_glm_estimates', False)
    targets = kwargs.get('targets', None)
    event_offset = kwargs.get('event_offset', None)
    event_dur = kwargs.get('event_dur', None)
    save_disc_space = kwargs.get('save_disc_space', True)

    rois = kwargs.get('rois', None)

    vp_num_str = ds_p[(ds_p.find("sub") + 4):(ds_p.find("sub") + 6)]
    warp_file = [warp_file for warp_file in warp_files if warp_file.find(vp_num_str) != -1][0]
    part_info = find_participant_info(ds_p)

    if save_disc_space:
        temp_file_add = "tmp_warped_data_file.nii.gz"
        temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", temp_file_add))
    else:
        temp_file_add = "sub-{}_{}-movie_run-{}_warped_file.nii.gz".format(part_info[0],
                                                                           part_info[1],
                                                                           int(part_info[2]))
        temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp",
                                                         "runs_for_testing",
                                                         temp_file_add)) # change

    warped_ds = warp_image(ds_p, ref_space, warp_file, temp_file, save_disc_space=save_disc_space)

    while not os.path.exists(warped_ds):
        time.sleep(5)

    if os.path.isfile(warped_ds):
        if mask_p is not None:
            mask = get_adjusted_mask(mask_p, ref_space)
            if rois is not None:
                ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask, add_fa=rois)
            else:
                ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask)
        else:
            if rois is not None:
                ds = mvpa.fmri_dataset(samples=warped_ds, add_fa=rois)
            else:
                ds = mvpa.fmri_dataset(samples=warped_ds)

    ds.sa['participant'] = [int(part_info[0])]
    ds.sa["movie_type"] = [part_info[1]]
    ds.sa['chunks'] = [int(part_info[2])]
    if detrending is not None:
        detrender = mvpa.PolyDetrendMapper(polyord=1)
        ds = ds.get_mapped(detrender)
    if use_zscore:
        mvpa.zscore(ds)
    if use_events:
        events = create_event_dict(anno_dir, ds_p, targets, event_dur)
        if use_glm_estimates:
            ds = mvpa.fit_event_hrf_model(ds, events, time_attr='time_coords',
                                          condition_attr='targets')

        else:
            ds = mvpa.extract_boxcar_event_samples(ds, events=events, time_attr='time_coords',
                                                   match='closest', event_offset=event_offset,
                                                   event_duration=event_dur, eprefix='event',
                                                   event_mapper=None)
            ds = fix_info_after_events(ds)
    return ds