Пример #1
0
def norm_and_mean(norm,
                  bilateral,
                  classifier,
                  sensitivities):
    """This function normalizes a list of sensitivities to their
    L2 norm if norm = True, else just stacks them according to the
    classifier they were build with. Resulting stack of sensitivities
    is averaged with the mean_group_sample() function."""
    if norm:
        from sklearn.preprocessing import normalize
        import copy
        # default for normalization is the L2 norm
        sensitivities_to_normalize = copy.deepcopy(sensitivities)
        for i in range(len(sensitivities)):
            sensitivities_to_normalize[i].samples = normalize(sensitivities_to_normalize[i].samples, axis=1) * np.sqrt(sensitivities[i].shape[1])
            print(sensitivities[i].shape)

        sensitivities_stacked = mv.vstack(sensitivities_to_normalize)
        print('I normalized the data.')

    else:
        sensitivities_stacked = mv.vstack(sensitivities)

    sgds = ['sgd', 'l-sgd']

    if bilateral:
        if classifier in sgds:
            # Note: All SGD based classifier wanted an explicit
            # 'target' sample attribute, therefore, this is still present
            # in the sensitivities.
            # note to self: we were wondering whether we assign correct estimates to label
            # I double checked now (May 19) that estimates here are assigned the correct estimate.
            # references: ulabels are assigned with the help of np.unique, which returns a sorted
            # array. Given https://github.com/PyMVPA/PyMVPA/pull/607/files#diff-bbf744fd29d7f3e4abdf7a1586a5aa95,
            # the sensitivity calculation uses this order further lexicographically.
            sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                             sensitivities_stacked.sa.targets)
        else:
            # ...whereas in GNB, the results are in 'bilat_ROIs' sample attribute
            sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                             sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)

    else:
        if classifier in sgds:
            # Note: All SGD based classifier wanted an explicit
            # 'target' sample attribute, therefore, this is still present
            # in the sensitivities.
            sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                           sensitivities_stacked.sa.targets)
        else:
            # ...whereas in GNB, the results are in 'all_ROIs' sample attribute
            sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                           sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)

    # return the averaged sensitivities
    return mean_sens
Пример #2
0
def preprocess_datasets(dataset_list, ref_space, warp_files, mask, **kwargs):
    detrending = kwargs.get('detrending', True)
    use_zscore = kwargs.get('use_zscore', True)

    use_events = kwargs.get('use_events', False)
    anno_dir = kwargs.get('anno_dir', None)
    use_glm_estimates = kwargs.get('use_glm_estimates', False)
    targets = kwargs.get('targets', None)
    event_offset = kwargs.get('event_offset', None)
    event_dur = kwargs.get('event_dur', None)
    save_disc_space = kwargs.get('save_disc_space', True)

    rois = kwargs.get('rois', None)

    if isinstance(dataset_list, list):
        datasets = [preprocessing(ds_p, ref_space, warp_files, mask, detrending=detrending,
                                  use_zscore=use_zscore, use_events=use_events, anno_dir=anno_dir,
                                  use_glm_estimates=use_glm_estimates, targets=targets,
                                  event_offset=event_offset, event_dur=event_dur, rois=rois,
                                  save_disc_space=save_disc_space)
                    for ds_p in dataset_list]

        if use_glm_estimates:
            for ds in datasets:
                del ds.sa["regressors"]

        ds = mvpa.vstack(datasets, a='drop_nonunique', fa='drop_nonunique')
    else:
        ds = preprocessing(dataset_list, ref_space, warp_files, mask, detrending=detrending,
                           use_zscore=use_zscore, use_events=use_events, anno_dir=anno_dir,
                           use_glm_estimates=use_glm_estimates, targets=targets,
                           event_offset=event_offset, event_dur=event_dur, rois=rois,
                           save_disc_space=save_disc_space)
    return ds
Пример #3
0
def create_dataset(sub_name, main_dir, task_list, hemi):
    data_set = []
    for task_name in task_list:
        for run_num in range(1, 6):
            ds = []
            gifti_fname = os.path.join(
                main_dir, 'analysis', sub_name, 'func',
                sub_name + '_task-' + task_name + '_run-' + str(run_num) +
                '_rw-glm.' + hemi + '.coefs.gii')
            gifti_niml = os.path.join(
                main_dir, 'analysis', sub_name, 'func',
                sub_name + '_task-' + task_name + '_run-' + str(run_num) +
                '_rw-glm.' + hemi + '.coefs.niml.dset')
            #ds = mv.gifti_dataset(gifti_fname)
            ds = mv.niml.read(gifti_niml)
            # order in sub-rid000001_task-beh_run-5_rw-glm.lh.X.xmat.1D
            ds.sa['beh_tax'] = [
                "bird_eating", "bird_fighting", "bird_running",
                "bird_swimming", "insect_eating", "insect_fighting",
                "insect_running", "insect_swimming", "primate_eating",
                "primate_fighting", "primate_running", "primate_swimming",
                "reptile_eating", "reptile_fighting", "reptile_running",
                "reptile_swimming", "ungulate_eating", "ungulate_fighting",
                "ungulate_running", "ungulate_swimming"
            ]
            ds.sa['beh'] = np.tile(
                ['eating', 'fighting', 'running', 'swimming'], 5)
            ds.sa['tax'] = np.repeat(
                ['bird', 'insect', 'primate', 'reptile', 'ungulate'], 4)
            ds.fa['node_indices'] = range(0, ds.shape[1])  # 0 ~ 400000
            data_set.append(ds)

    # http://www.pymvpa.org/tutorial_mappers.html
    within_ds = mv.vstack(data_set)
    return within_ds
def movie_dataset(subj, task, label, **kwargs):
    # ds = movie_dataset(
    #       2,
    #       'avmovie',
    #       'bold3Tp2',
    #       mask='src/tnt/sub-02/bold3Tp2/brain_mask.nii.gz')
    cur_max_time = 0
    segments = []
    if not 'add_sa' in kwargs:
        add_sa = {}
    for seg in range(1, 9):
        print 'Seg', seg
        mc = np.recfromtxt(
            'sub-%.2i/in_%s/sub-%.2i_task-%s_run-%i_bold_mcparams.txt'
            % (subj, label, subj, task, seg),
            names=('mc_xtrans', 'mc_ytrans', 'mc_ztrans', 'mc_xrot',
                   'mc_yrot', 'mc_zrot'))
        for i in mc.dtype.fields:
            add_sa[i] = mc[i]
        ds = preprocessed_fmri_dataset(
            'sub-%.2i/in_%s/sub-%.2i_task-%s_run-%i_bold.nii.gz'
            % (subj, label, subj, task, seg),
            add_sa=add_sa,
            **kwargs)
        ds.sa['movie_segment'] = [seg] * len(ds)
        TR = np.diff(ds.sa.time_coords).mean()
        ## truncate segment time series to remove overlap
        if seg > 1:
            ds = ds[4:]
        if seg < 8:
            ds = ds[:-4]
        ds.sa['movie_time'] = np.arange(len(ds)) * TR + cur_max_time
        cur_max_time = ds.sa.movie_time[-1] + TR
        segments.append(ds)
    return vstack(segments)
Пример #5
0
def movie_dataset(subj, task, label, **kwargs):
    # ds = movie_dataset(
    #       2,
    #       'avmovie',
    #       'bold3Tp2',
    #       mask='src/tnt/sub-02/bold3Tp2/brain_mask.nii.gz')
    cur_max_time = 0
    segments = []
    if not 'add_sa' in kwargs:
        add_sa = {}
    for seg in range(1, 9):
        print 'Seg', seg
        mc = np.recfromtxt(
            'sub-%.2i/in_%s/sub-%.2i_task-%s_run-%i_bold_mcparams.txt' %
            (subj, label, subj, task, seg),
            names=('mc_xtrans', 'mc_ytrans', 'mc_ztrans', 'mc_xrot', 'mc_yrot',
                   'mc_zrot'))
        for i in mc.dtype.fields:
            add_sa[i] = mc[i]
        ds = preprocessed_fmri_dataset(
            'sub-%.2i/in_%s/sub-%.2i_task-%s_run-%i_bold.nii.gz' %
            (subj, label, subj, task, seg),
            add_sa=add_sa,
            **kwargs)
        ds.sa['movie_segment'] = [seg] * len(ds)
        TR = np.diff(ds.sa.time_coords).mean()
        ## truncate segment time series to remove overlap
        if seg > 1:
            ds = ds[4:]
        if seg < 8:
            ds = ds[:-4]
        ds.sa['movie_time'] = np.arange(len(ds)) * TR + cur_max_time
        cur_max_time = ds.sa.movie_time[-1] + TR
        segments.append(ds)
    return vstack(segments)
Пример #6
0
def sources_merged_ds(path_list, subjects_list, conf_list, task, **kwargs):
    
    ds_list = []
    for path, subjects, conf in zip(path_list, subjects_list, conf_list):
        
        ds, _, conf_n = load_subjectwise_ds(path, subjects, conf, task, **kwargs)
        
        ds_list.append(ds)
        
    
    ds_new = vstack(ds_list)
    ds_new.a.update(ds_list[0].a) 
    print 'Merging from different sources ended... '
    print 'The number of subjects merged are '+str(len(np.unique(ds_new.sa.name)))
    
    return ds_new, ['group'], conf_n
Пример #7
0
def sources_merged_ds(path_list, subjects_list, conf_list, task, **kwargs):

    ds_list = []
    for path, subjects, conf in zip(path_list, subjects_list, conf_list):

        ds, _, conf_n = load_subjectwise_ds(path, subjects, conf, task,
                                            **kwargs)

        ds_list.append(ds)

    ds_new = vstack(ds_list)
    ds_new.a.update(ds_list[0].a)
    print 'Merging from different sources ended... '
    print 'The number of subjects merged are ' + str(
        len(np.unique(ds_new.sa.name)))

    return ds_new, ['group'], conf_n
Пример #8
0
def dotheglm(sensitivities, eventdir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them in, average the
    durations because there are tiny differences between subjects, and then it
    will put all of that into a glm.
    """
    sensitivities_stacked = mv.vstack(sensitivities)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # average onsets into one event file
    events = get_group_events(eventdir)
    # save the event_file
    fmt = "%10.3f\t%10.3f\t%16s\t%60s"
    np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='',
               header='onset\tduration\ttrial_type\tstim_file', fmt=fmt)
    # get events into dictionary
    events_dicts = []
    for i in range(0, len(events)):
        dic = {
            'onset': events[i][0],
            'duration': events[i][1],
            'condition': events[i][2]
        }
        events_dicts.append(dic)

    hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)
    mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates)
    print('calculated glm, saving results.')
    return hrf_estimates
Пример #9
0
def subjects_merged_ds(path, subjects, conf_file, task, extra_sa=None, **kwargs):
    """
    extra_sa: dict or None, sample attributes added to the final dataset, they should be
    the same length as the subjects.
    
    """
    
    conf = read_configuration(path, conf_file, task)
   
    for arg in kwargs:
        conf[arg] = kwargs[arg]
    
    data_path = conf['data_path']
    
    i = 0

    print 'Merging subjects from '+data_path
    
    for subj in subjects:
        
        ds = load_dataset(data_path, subj, task, **conf)
        ds = preprocess_dataset(ds, task, **conf)
        
        # add extra samples
        for k, v in extra_sa.iteritems():
            if len(v) == len(subjects):
                ds.sa[k] = [v[i] for _ in range(ds.samples.shape[0])]
        
        if i == 0:
            ds_merged = ds.copy()
        else:
            ds_merged = vstack((ds_merged, ds))
            ds_merged.a.update(ds.a)
            
        
        i = i + 1
        
        del ds

    return ds_merged, ['group'], conf
Пример #10
0
    for hemi in ['rh']:
        # Load surface and create searchlight query engine
        surf = mv.surf.read(join(suma_dir, '{0}.pial.gii'.format(hemi)))
        qe = mv.SurfaceQueryEngine(surf, 20.0, distance_metric='dijkstra')
        print("Finished creating surface-based searchlight query engine")

        # Load in surface data sets
        dss = []
        for participant in participants:
            print(participant)
            print(tr[included[0]])
            ra = load_data(join(data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr[included[0]], included[0], hemi)))
            rb = load_data(join(data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr[included[1]], included[1], hemi)))
            rc = load_data(join(data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr[included[2]], included[2], hemi)))

            ds = mv.vstack((ra, rb, rc))
            print(ds.samples.shape)
            dss.append(ds)

        n_samples = ds.samples.shape[0]
        # Exclude medial wall
        print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples))
        medial_wall = np.where(np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist()
        print(len(medial_wall))
        cortical_vertices = np.where(np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist()
        assert len(medial_wall) == n_medial[hemi]
        assert len(medial_wall) + len(cortical_vertices) == n_vertices

        # Estimate searchlight hyperalignment transformation on movie data
        sl_hyper = mv.SearchlightHyperalignment(queryengine=qe, nproc=n_proc,
                                                nblocks=n_proc*8, featsel=1.0,
Пример #11
0
def get_merged_ds(path, subjects, conf_file, source='task', dim=3, **kwargs):
    
    
    #Mettere source e target nel conf!
    if source == 'task':
        target = 'rest'
    else:
        if source == 'rest':
            target = 'task'
    
    
    if source == 'saccade':
        target = 'face'
    else:
        if source == 'face':
            target = 'saccade'
    
    ds_merged_list = []
    conf_src = read_configuration(path, conf_file, source)
    conf_tar = read_configuration(path, conf_file, target)
    
    ##############################################    
    ##############################################
    ##    conf_src['label_included'] = 'all'    ##   
    ##    conf_src['label_dropped'] = 'none'    ##
    ##    conf_src['mean_samples'] = 'False'    ##
    ##############################################
    ##############################################
    
    for arg in kwargs:
        conf_src[arg] = kwargs[arg]
        conf_tar[arg] = kwargs[arg]
    
    data_path = conf_src['data_path']
    
    for subj in subjects:
        print '--------'
        try:
            ds_src = load_dataset(data_path, subj, source, **conf_src)
            ds_tar = load_dataset(data_path, subj, target, **conf_tar)
        except Exception, err:
            print err
            continue
        
        ds_src = detrend_dataset(ds_src, source, **conf_src)
        ds_tar = detrend_dataset(ds_tar, target, **conf_tar) 

        if dim == 4:    
            duration = np.min([e['duration'] for e in ds_src.a.events])      
            ds_tar = build_events_ds(ds_tar, duration, overlap=duration-1)
            ds_src = load_spatiotemporal_dataset(ds_src, duration=duration)
        
        print ds_src.samples.shape
        print ds_tar.samples.shape 
        
        ds_src.sa['task'] = [source for s in range(ds_src.samples.shape[0])]
        ds_tar.sa['task'] = [target for s in range(ds_tar.samples.shape[0])]
        
        ds_merged = vstack((ds_src, ds_tar))
        ds_merged.a.update(ds_src.a)
        
        print ds_merged.sa.task
        
        ds_merged_list.append(ds_merged)
        '''
Пример #12
0
def makeaplot(events,
              sensitivities,
              hrf_estimates,
              roi_pair,
              fn=True):
    """
    This produces a time series plot for the roi class comparison specified in
    roi_pair such as roi_pair = ['left FFA', 'left PPA']
    """
    import matplotlib.pyplot as plt

    # take the mean and transpose the sensitivities
    sensitivities_stacked = mv.vstack(sensitivities)

    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                                sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)

    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # some parameters
    # get the conditions
    block_design = sorted(np.unique(events['trial_type']))
    reorder = [0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11]
    block_design = [block_design[i] for i in reorder]
    # end indices to chunk timeseries into runs
    run_startidx = np.array([0, 157, 313, 469])
    run_endidx = np.array([156, 312, 468, 624])

    runs = np.unique(mean_sens_transposed.sa.chunks)

    for j in range(len(hrf_estimates.fa.bilat_ROIs_str)):
        comparison = hrf_estimates.fa.bilat_ROIs[j][0]
        if (roi_pair[0] in comparison) and (roi_pair[1] in comparison):
            roi_pair_idx = j
    roi_betas_ds = hrf_estimates[:, roi_pair_idx]
    roi_sens_ds = mean_sens_transposed[:, roi_pair_idx]

    for run in runs:
        fig, ax = plt.subplots(1, 1, figsize=[18, 10])
        colors = ['#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60',
                  '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1']
        plt.suptitle('Timecourse of sensitivities, {} versus {}, run {}'.format(roi_pair[0],
                                                                                roi_pair[1],
                                                                                run + 1),
                     fontsize='large')
        plt.xlim([0, max(mean_sens_transposed.sa.time_coords)])
        plt.ylim([-5, 7])
        plt.xlabel('Time in sec')
        plt.legend(loc=1)
        plt.grid(True)
        # for each stimulus, plot a color band on top of the plot
        for stimulus in block_design:
            onsets = events[events['trial_type'] == stimulus]['onset'].values
            durations = events[events['trial_type'] == stimulus]['duration'].values
            stimulation_end = np.sum([onsets, durations], axis=0)
            r_height = 1
            color = colors[0]
            y = 6

            # get the beta corresponding to the stimulus to later use in label
            beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus.replace(" ", ""), 0]

            for i in range(len(onsets)):
                r_width = durations[i]
                x = stimulation_end[i]
                rectangle = plt.Rectangle((x, y),
                                          r_width,
                                          r_height,
                                          fc=color,
                                          alpha=0.5,
                                          label='_'*i + stimulus.replace(" ", "") + '(' + str('%.2f' % beta) + ')')
                plt.gca().add_patch(rectangle)
                plt.legend(loc=1)
            del colors[0]

        times = roi_sens_ds.sa.time_coords[run_startidx[run]:run_endidx[run]]

        ax.plot(times, roi_sens_ds.samples[run_startidx[run]:run_endidx[run]], '-', color='black', lw=1.0)
        glm_model = hrf_estimates.a.model.results_[0.0].predicted[run_startidx[run]:run_endidx[run], roi_pair_idx]
        ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0)
        model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx]
        plt.title('R squared: %.2f' % model_fit)
        if fn:
            plt.savefig(results_dir + 'timecourse_localizer_glm_sens_{}_vs_{}_run-{}.svg'.format(roi_pair[0], roi_pair[1], run + 1))
behavior = np.tile(['eating', 'fighting', 'running', 'swimming'], 5)
conditions = [' '.join((beh, tax)) for beh, tax in zip(behavior, taxonomy)]

# load in all of the data into the dataframe
targets = range(1, 21)
ds = None
for x in range(len(files)):
    chunks = [x + 1] * 20
    d = mv.gifti_dataset(files[x], chunks=chunks, targets=targets)
    d.sa['conditions'] = conditions
    d.sa['taxonomy'] = taxonomy
    d.sa['behavior'] = behavior
    if ds is None:
        ds = d
    else:
        ds = mv.vstack((ds, d))
ds.fa['node_indices'] = range(ds.shape[1])
# zscore all of our samples
mv.zscore(ds, chunks_attr='chunks', dtype='float32')
# load in surgace and get searchlight query
radius = 10
surface = mv.surf.read(join(data_path, '{0}.pial.gii'.format(hemi)))
# this is an arbitrary radius and distance metric!
query = mv.SurfaceQueryEngine(surface, radius, distance_metric='dijkstra')
# based off PyMVPA tutorial
clf = mv.LinearNuSVMC(space=predict)

cv = mv.CrossValidation(clf,
                        mv.NFoldPartitioner(attr=train_on),
                        errorfx=lambda p, t: np.mean(p == t),
                        enable_ca=['stats'])
Пример #14
0
def get_merged_ds(path, subjects, conf_file, source='task', dim=3, **kwargs):

    #Mettere source e target nel conf!
    if source == 'task':
        target = 'rest'
    else:
        if source == 'rest':
            target = 'task'

    if source == 'saccade':
        target = 'face'
    else:
        if source == 'face':
            target = 'saccade'

    ds_merged_list = []
    conf_src = read_configuration(path, conf_file, source)
    conf_tar = read_configuration(path, conf_file, target)

    ##############################################
    ##############################################
    ##    conf_src['label_included'] = 'all'    ##
    ##    conf_src['label_dropped'] = 'none'    ##
    ##    conf_src['mean_samples'] = 'False'    ##
    ##############################################
    ##############################################

    for arg in kwargs:
        conf_src[arg] = kwargs[arg]
        conf_tar[arg] = kwargs[arg]

    data_path = conf_src['data_path']

    for subj in subjects:
        print '--------'
        try:
            ds_src = load_dataset(data_path, subj, source, **conf_src)
            ds_tar = load_dataset(data_path, subj, target, **conf_tar)
        except Exception, err:
            print err
            continue

        ds_src = detrend_dataset(ds_src, source, **conf_src)
        ds_tar = detrend_dataset(ds_tar, target, **conf_tar)

        if dim == 4:
            duration = np.min([e['duration'] for e in ds_src.a.events])
            ds_tar = build_events_ds(ds_tar, duration, overlap=duration - 1)
            ds_src = load_spatiotemporal_dataset(ds_src, duration=duration)

        print ds_src.samples.shape
        print ds_tar.samples.shape

        ds_src.sa['task'] = [source for s in range(ds_src.samples.shape[0])]
        ds_tar.sa['task'] = [target for s in range(ds_tar.samples.shape[0])]

        ds_merged = vstack((ds_src, ds_tar))
        ds_merged.a.update(ds_src.a)

        print ds_merged.sa.task

        ds_merged_list.append(ds_merged)
        '''
Пример #15
0
def buildadataset(zscore, rois, event_path=None):
    """buildataset() will build and save participant-specific hdf5 datasets
    with all rois from preprocessed objectcategories data, stack them for a
    group dataset and save them, and transpose the group dataset and save it.
    The parameter 'zscore' determines whether and what kind of z-scoring
    should be performed."""
    print('I am building a dataset with the following option: {}.'.format(
        zscore))

    # get the participants and rois
    participants = sorted(
        [path.split('/')[-1] for path in glob(base_dir + 'sub-*')])
    localizer_dss = []

    for participant in participants:
        localizer_fns = sorted(glob(base_dir + participant + locdir + \
                                    '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format(
                                        participant)))
        mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz'
        assert len(localizer_fns) == 4
        localizer_ds = mv.vstack([
            mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run)
            for run, localizer_fn in enumerate(localizer_fns)
        ])

        localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1]
        print('loaded localizer data for participant {}.'.format(participant))

        # zscore the data with means and standard deviations from no-stimulation
        # periods
        if zscore == 'custom':
            events = get_group_events(event_path)
            means, stds = extract_baseline(events, localizer_ds)
            # zscore stuff
            mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks')
            print('finished custom zscoring for participant {}.'.format(
                participant))
        elif zscore == 'z-score':
            mv.zscore(localizer_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        all_rois_mask = np.array([['brain'] * localizer_ds.shape[1]
                                  ]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                      '{0}_*_mask.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                           'l{0}_*_mask.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                            'r{0}_*_mask.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns

            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, localizer_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)
            # Ensure that number of voxels in ROI mask matches localizer data
            assert roi_mask.shape[1] == localizer_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to localizer data feature attributes
            localizer_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, localizer_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches localizer data
                assert lat_roi_mask.shape[1] == localizer_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to localizer data feature attributes
                localizer_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(base_dir + participant + anat_dir +
                         '/{0}_*_mask.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi
        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())
        # Assign ROI mask to localizer data feature attributes
        localizer_ds.fa['all_ROIs'] = all_rois_flat

        if save_per_subject:
            mv.h5save(base_dir + participant + locdir + \
                  '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format(
                      participant), localizer_ds)
            print('Saved dataset for {}.'.format(participant))
        # join all datasets
        localizer_dss.append(localizer_ds)

    # save full dataset
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5',
        localizer_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(localizer_dss)

    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5',
        ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Пример #16
0
def makeaplot(events,
              sensitivities,
              hrf_estimates,
              roi_pair,
              fn=None,
              include_all_regressors=False):
    """
    This produces a time series plot for the roi class comparison specified in
    roi_pair such as roi_pair = ['left FFA', 'left PPA'].
    If include_all_regressors = True, the function will create a potentially overloaded
    legend with all of the regressors, regardless of they occurred in the run. (Plotting
    then takes longer, but is a useful option if all regressors are of relevance and can
    be twitched in inkscape).
    If the figure should be saved, spcify an existing path in the parameter fn.

    # TODO's for the future: runs=None, overlap=False, grouping (should be a way to not rely
    # on hardcoded stimuli and colors within function anymore, with Ordered Dicts):

    """
    import matplotlib.pyplot as plt

    # normalize the sensitivities
    from sklearn.preprocessing import normalize
    import copy
    #default for normalization is the L2 norm
    sensitivities_to_normalize = copy.deepcopy(sensitivities)
    for i in range(len(sensitivities)):
        sensitivities_to_normalize[i].samples = normalize(
            sensitivities_to_normalize[i].samples, axis=1)

    sensitivities_stacked = mv.vstack(sensitivities_to_normalize)

    # get the mean, because we don't want to have 15 folds of sensitivities, but their average
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'
                                          ])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'
                                          ])(sensitivities_stacked)

    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    chunks = mean_sens_transposed.sa.chunks
    assert np.all(chunks[1:] >= chunks[:-1])

    # TR was not preserved/carried through in .a
    # so we will guestimate it based on the values of time_coords
    runs = np.unique(mean_sens_transposed.sa.chunks)
    tc = mean_sens_transposed.sa.time_coords
    TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1]
    assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001

    mean_sens_transposed.sa.time_coords = np.arange(
        len(mean_sens_transposed)) * TRdirty
    # those
    runlengths = [
        np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty
        for run in runs
    ]
    runonsets = [sum(runlengths[:run]) for run in runs]
    # just append any large number to accomodate the fact that the last run also needs an
    # at some point.
    runonsets.append(99999)

    for j in range(len(hrf_estimates.fa.bilat_ROIs_str)):
        comparison = hrf_estimates.fa.targets[j][0]
        if (roi_pair[0] in comparison) and (roi_pair[1] in comparison):
            roi_pair_idx = j
    roi_betas_ds = hrf_estimates[:, roi_pair_idx]
    roi_sens_ds = mean_sens_transposed[:, roi_pair_idx]
    from collections import OrderedDict
    block_design_betas = OrderedDict(
        sorted(zip(roi_betas_ds.sa.condition, roi_betas_ds.samples[:, 0]),
               key=lambda x: x[1]))
    block_design = list(block_design_betas)
    for run in runs:
        fig, ax = plt.subplots(1, 1, figsize=[18, 10])
        colors = [
            '#7b241c', '#e74c3c', '#154360', '#3498db', '#145a32', '#27ae60',
            '#9a7d0a', '#f4d03f', '#5b2c6f', '#a569bd', '#616a6b', '#ccd1d1'
        ]
        plt.suptitle(
            'Timecourse of sensitivities, {} versus {}, run {}'.format(
                roi_pair[0], roi_pair[1], run + 1),
            fontsize='large')
        # 2 is a TR here... sorry, we are in rush
        run_onset = int(runonsets[run] // 2)
        run_offset = int(runonsets[run + 1] // 2)
        # for each run, adjust the x-axis
        plt.xlim([
            min(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)]
                ),
            max(mean_sens_transposed.sa.time_coords[run_onset:int(run_offset)])
        ])
        plt.ylim([-2.7, 4.5])
        plt.xlabel('Time in sec')
        plt.legend(loc=1)
        plt.grid(True)

        # for each stimulus, plot a color band on top of the plot
        for stimulus in block_design:
            # color = colors[0]
            print(stimulus)
            condition_event_mask = events['condition'] == stimulus
            onsets = events[condition_event_mask]['onset'].values
            onsets_run = [
                time for time in onsets
                if np.logical_and(time > run_onset * 2, time < run_offset * 2)
            ]
            durations = events[condition_event_mask]['duration'].values
            durations_run = [
                dur for idx, dur in enumerate(durations)
                if np.logical_and(onsets[idx] > run_onset *
                                  2, onsets[idx] < run_offset * 2)
            ]
            # prepare for plotting
            r_height = 0.3
            y = 4
            if stimulus.startswith('run'):
                continue
            if stimulus.startswith('location'):
                # gradually decrease alpha level over occurances of location stims
                y -= r_height
                color = 'darkgreen'
            elif 'face' in stimulus:
                if stimulus == 'many_faces':
                    color = 'tomato'
                else:
                    color = 'firebrick'
            elif stimulus == 'exterior':
                color = 'cornflowerblue'
                y -= 2 * r_height
            elif stimulus.startswith('time'):
                color = 'darkslategrey'
                y -= 3 * r_height
            elif stimulus == 'night':
                color = 'slategray'
                y -= 4 * r_height
            elif stimulus == 'scene-change':
                color = 'black'
                y -= 5 * r_height

            # get the beta corresponding to the stimulus to later use in label
            beta = roi_betas_ds.samples[hrf_estimates.sa.condition == stimulus,
                                        0]

            if include_all_regressors and onsets_run == []:
                # if there are no onsets for a particular regressor, but we want to print all
                # regressors, set i manually to 0
                rectangle = plt.Rectangle(
                    (0, 0),
                    0,
                    0,
                    fc=color,
                    alpha=0.5,
                    label='_' * 0 + stimulus.replace(" ", "") + '(' +
                    str('%.2f' % beta) + ')')
                plt.gca().add_patch(rectangle)

            for i, x in enumerate(onsets_run):
                # We need the i to trick the labeling. It will attempt to plot every single occurance
                # of a stimulus with numbered labels. However, appending a '_' to the label makes
                # matplotlib disregard it. If we attach an '_' * i to the label, all but the first onset
                # get a '_' prefix and are ignored.
                r_width = durations_run[i]
                rectangle = plt.Rectangle(
                    (x, y),
                    r_width,
                    r_height,
                    fc=color,
                    alpha=0.5,
                    label='_' * i + stimulus.replace(" ", "") + '(' +
                    str('%.2f' % beta) + ')')
                plt.gca().add_patch(rectangle)
                plt.legend(loc=1)
                # plt.axis('scaled')
                # del colors[0]

        times = roi_sens_ds.sa.time_coords[run_onset:run_offset]

        ax.plot(times,
                roi_sens_ds.samples[run_onset:run_offset],
                '-',
                color='black',
                lw=1.0)
        # plot glm model results
        glm_model = hrf_estimates.a.model.results_[0.0].predicted[
            run_onset:int(run_offset), roi_pair_idx]
        # ax2 = ax.twinx()
        ax.plot(times, glm_model, '-', color='#7b241c', lw=1.0)
        model_fit = hrf_estimates.a.model.results_[0.0].R2[roi_pair_idx]
        plt.title('R squared: %.2f' % model_fit)
        if fn:
            plt.savefig(results_dir +
                        'timecourse_avmovie_glm_sens_{}_vs_{}_run-{}.svg'.
                        format(roi_pair[0], roi_pair[1], run + 1))
    ds.sa.pop('stats')
    ds.sa['behavior'] = np.tile(['eating', 'fighting', 'running', 'swimming'],
                                5)
    ds.sa['taxonomy'] = np.repeat(
        ['bird', 'insect', 'primate', 'reptile', 'ungulate'], 4)
    ds.sa['conditions'] = [
        ' '.join((tax, beh))
        for tax, beh in zip(ds.sa.taxonomy, ds.sa.behavior)
    ]
    for lab, cond in zip(ds.sa.labels, ds.sa.conditions):
        assert ' '.join(lab.split('#')[0].split('_')) == cond
    ds.sa['runs'] = [run] * 20
    ds.sa['subjects'] = [participant] * 20
    ds.fa['node_indices'] = range(n_vertices)
    dss.append(ds)
ds = mv.vstack(dss)

# Exclude medial wall
medial_wall = np.where(np.sum(ds.samples == 0, axis=0) == n_conditions *
                       5)[0].tolist()
cortical_vertices = np.where(
    np.sum(ds.samples == 0, axis=0) < n_conditions * 5)[0].tolist()
assert len(medial_wall) == n_medial[hemi]
assert len(medial_wall) + len(cortical_vertices) == n_vertices

#np.save(join(mvpa_dir, 'cortical_vertices_{0}.npy'.format(hemi)), cortical_vertices)
#cortical_vertices = = np.load(join(mvpa_dir, 'cortical_vertices_{0}.npy').tolist()

# Z-score features across samples
#mv.zscore(ds, chunks_attr='runs')
ds.samples = ((ds.samples - np.mean(ds.samples, axis=1)[:, None]) /
Пример #18
0
# Load masked movie data IN GROUP TEMPLATE SPACE and assign ROIs as feature attributes
# Set order of polynomial for detrending
polyord = 3

movie_dss = []
for participant in participants:
    # Load movie data with brain mask for a participant
    movie_fns = sorted(
        glob(base_dir + participant + data_dir +
             '*_task-avmovie_run-*highpass_tmpl.nii.gz'))
    mask_fn = base_dir + participant + anat_dir + 'brain_mask_tmpl.nii.gz'
    assert len(movie_fns) == 8

    # Include chunk (i.e., run) labels
    movie_ds = mv.vstack([
        mv.fmri_dataset(movie_fn, mask=mask_fn, chunks=run)
        for run, movie_fn in enumerate(movie_fns)
    ])

    # Assign participant labels as feature attribute
    movie_ds.fa['participant'] = [participant] * movie_ds.shape[1]
    print("Loaded movie data for participant {0}".format(participant))

    # Perform linear detrending per chunk
    mv.poly_detrend(movie_ds, polyord=polyord, chunks_attr='chunks')

    # Perform low-pass filtering per chunk
    movie_ds.samples = clean(movie_ds.samples,
                             sessions=movie_ds.sa.chunks,
                             low_pass=.1,
                             high_pass=None,
                             t_r=2.0,
Пример #19
0
    # problems from pre-processing above
    tsds.sa.time_coords = fmri_dataset(bold_filename).sa.time_coords

    # post-process time series dataset -- possibly modeling
    run_mkds_args = {k: v[run_id] for k, v in mkds_args.items()}
    ds = args.mkds(tsds, **run_mkds_args)
    for attr in ('target', 'chunk'):
        attr_val = getattr(args, '{}_attr'.format(attr))
        if attr_val not in ds.sa.keys():
            raise RuntimeError(
                '{} "{}" not found in dataset attributes: {}"'.format(
                    attr, attr_val, ds.sa.keys()))
    ds_list.append(ds)

#merge ds across runs
dataset = vstack(ds_list, a=0)

#
# analysis setup
# TODO: possible Rf into a plugin to allow for other types
#

# collect raw predictions, so we can compute a McNemar test easily
# without any reconstruction of binomial results
results = []
# use a confusion matrix to collect all results in multiple sets,
# one for each data fold
confusion = ConfusionMatrix(labels=list(dataset.sa[args.target_attr].unique))

partitioner = NFoldPartitioner(attr=args.chunk_attr)
Пример #20
0
                            'dico7Tad2grpbold7Tad_nl',
                            'brain_mask_intersection.nii.gz')
elif align == 'linear':
    maskfile = os.path.join(datapath, 'templates', 'grpbold7Tad', 'qa',
                            'dico7Tad2grpbold7Tad7Tad',
                            'brain_mask_intersection.nii.gz')

ds = mvpa.fmri_dataset(maskfile, mask=maskfile)
dsfile = '_z' + str(zsc) + '_' + str(samples_size) + '_' + align

#Load dataset of two subjects and reorganise for univariate analysis
evds1 = mvpa.h5load(os.path.join('dataset', subj1 + dsfile + '.hdf5'))
evds1 = evds1.mapper.reverse(evds1)
evds2 = mvpa.h5load(os.path.join('dataset', subj2 + dsfile + '.hdf5'))
evds2 = evds1.mapper.reverse(evds2)
evds = mvpa.vstack([evds1, evds2])
del evds1, evds2


# Prepare inter-subject correlation measure
class Corr(mvpa.Measure):
    is_trained = True

    def __init__(self, subj1, subj2, **kwargs):
        mvpa.Measure.__init__(self, **kwargs)
        self._subj1 = subj1
        self._subj2 = subj2

    def _call(self, evds):
        res = 1 - sd.pdist(
            np.hstack(
Пример #21
0
#mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz')

#get openFMRI dataset handle
dhandle = mvpa.OpenFMRIDataset(datapath)
model = 1
task = 1

T3 = False
#get openFMRI dataset handle
dhandle = mvpa.OpenFMRIDataset(datapath)
model = 1
task = 1

datapath = os.path.join('/home','data','psyinf','forrest_gump','anondata')
#boldlist = sorted(glob.glob(os.path.join(datapath,'task002*')))
flavor = 'dico_bold7Tp1_to_subjbold7Tp1'

for subj in xrange(1,20):
    mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj%02dbold.nii.gz' % subj)

    #load and save all datasets
    run_datasets = []
    for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
        run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor)
        run_datasets.append(run_ds)
    s1ds = mvpa.vstack(run_datasets)
    mvpa.poly_detrend(s1ds,polyord=1,chunks_attr='chunks')
    mvpa.zscore(s1ds)
    s1ds.save(os.path.join('/home','mboos','SpeechEncoding','PreProcessed','FG_subj' + str(subj) + 'pp.gzipped.hdf5'),compression=9)

Пример #22
0
        ds.sa['sessions'] = [ses] * ds.shape[0]

        ds.fa['node_indices'] = range(ds.shape[1])
        ds.fa['center_ids'] = range(ds.shape[1])
        ds.sa['targets'] = range(ds.shape[0])
        # ds.sa.pop('labels')

        if hyperalign:
            ds = mappers[i][participant].forward(ds)
            print("Hyperaligned participant {0}".format(participant))
            if zscore_features:
                mv.zscore(ds, chunks_attr=None)
            ds.fa['node_indices'] = range(ds.shape[1])
            ds.fa['center_ids'] = range(ds.shape[1])

    ds_all = mv.vstack((ds1, ds2, ds3, ds4), fa='update')
    rsa.PDist(**kwargs)
    #variant_ids = mv.remove_invariant_features(ds_both).fa.center_ids.tolist()

    # Set up cross-validated RSA
    cv_rsa_ = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'),
                                 mv.HalfPartitioner(attr='sessions'),
                                 errorfx=None)

    # cv_rsa above would return all kinds of .sa which are important
    # but must be the same across searchlights. so we first apply it
    # to the entire ds to capture them
    cv_rsa_out = cv_rsa_(ds_all)
    target_sa = cv_rsa_out.sa.copy(deep=True)

    # And now create a postproc which would verify and strip them off
Пример #23
0
                    '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                    format(participant, tr[run], run, hemi)))
            mv.zscore(ds, chunks_attr=None)

            if hyperalign:
                ds = mappers[participant].forward(ds)
                mv.zscore(ds, chunks_attr=None)
            ds.fa['node_indices'] = range(ds.shape[1])

            # n_samples = ds.samples.shape[0]
            #
            # # Exclude medial wall
            # print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples))
            n_samples = ds.samples.shape[0]
            medial_wall = np.where(
                np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist()
            print(len(medial_wall))
            cortical_vertices = np.where(
                np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist()
            assert len(medial_wall) == n_medial[hemi]
            assert len(medial_wall) + len(cortical_vertices) == n_vertices

            sl_result = sl(ds)
            print(ds.samples.shape, sl_result.samples.shape)
            list_of_RDMs.append(sl_result)
        final = mv.vstack(list_of_RDMs)
        print(final.shape)
        mv.h5save(
            '/idata/DBIC/cara/search_hyper_mappers_life_mask_nofsel_{0}_{1}_leftout_{1}_{2}.hdf5'
            .format(participant, hemi, left_out, sys.argv[1]), final)
Пример #24
0
# apply feature selection to all data (localizer and memory)
fs_mapds_list = [
    ds[:, mask] for ds, mask in zip(map_ds_dict.values(), fsel_masks)
]
fs_memds_list = [
    ds[:, mask] for ds, mask in zip(mem_ds_dict.values(), fsel_masks)
]

hyper = mvpa2.Hyperalignment()
hypmaps = hyper(fs_mapds_list)  # returns list

# apply the hyperalignment maps to feature-selected test data
memds_hyper_list = [ha.forward(ds) for ha, ds in zip(hypmaps, fs_memds_list)]

# stack all datasets and zscore, because now all in common space
ds_hyper = mvpa2.vstack(memds_hyper_list)
mvpa2.zscore(ds_hyper, chunks_attr='subj')

#######################################
##  use sklearn to perform decoding  ##
#######################################

# get the num of groups for leave-one-subj-out cross-validation
n_groups = len(pd.np.unique(ds_hyper.sa.subj))

cv_df_list = []  # holds cross-validation accuracies for each condition
null_df_list = [
]  # holds permutation distribution accuracies for each condition
pval_df_list = []  # holds mean accuracy and p-values for each condition

# choose the "target" sample attributes
Пример #25
0
def dotheglm(sensitivities, eventdir, annot_dir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them into an apprpriate.
    data structure. It will compute one glm per run.
    """
    # normalize the sensitivities
    from sklearn.preprocessing import normalize
    import copy
    #default for normalization is the L2 norm
    sensitivities_to_normalize = copy.deepcopy(sensitivities)
    for i in range(len(sensitivities)):
        sensitivities_to_normalize[i].samples = normalize(
            sensitivities_to_normalize[i].samples, axis=1)

    sensitivities_stacked = mv.vstack(sensitivities_to_normalize)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'
                                          ])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'
                                          ])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # get a list of the event files with occurances of faces
    event_files = sorted(glob(eventdir + '/*'))
    assert len(event_files) == 8
    # get additional events from the location annotation
    location_annotation = pd.read_csv(annot_dir, sep='\t')

    # get all settings with more than one occurrence
    setting = [
        set for set in location_annotation.setting.unique()
        if (location_annotation.setting[location_annotation.setting ==
                                        set].value_counts()[0] > 1)
    ]

    # get onsets and durations
    onset = []
    duration = []
    condition = []
    for set in setting:
        for i in range(location_annotation.setting[
                location_annotation['setting'] == set].value_counts()[0]):
            onset.append(location_annotation[location_annotation['setting'] ==
                                             set]['onset'].values[i])
            duration.append(location_annotation[location_annotation['setting']
                                                == set]['duration'].values[i])
        condition.append([set] * (i + 1))
    # flatten conditions
    condition = [y for x in condition for y in x]
    assert len(condition) == len(onset) == len(duration)

    # concatenate the strings
    condition_str = [set.replace(' ', '_') for set in condition]
    condition_str = ['location_' + set for set in condition_str]

    # put it in a dataframe
    locations = pd.DataFrame({
        'onset': onset,
        'duration': duration,
        'condition': condition_str
    })

    # sort according to onsets to be paranoid
    locations_sorted = locations.sort_values(by='onset')

    # this is a dataframe encoding flow of time
    time_forward = pd.DataFrame(
        [{
            'condition': 'time+',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['+', '++']])

    time_back = pd.DataFrame(
        [{
            'condition': 'time-',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['-', '--']])

    # sort according to onsets to be paranoid
    time_forward_sorted = time_forward.sort_values(by='onset')
    time_back_sorted = time_back.sort_values(by='onset')

    scene_change = pd.DataFrame([{
        'condition': 'scene-change',
        'onset': location_annotation['onset'][i],
        'duration': 1.0
    } for i in range(len(location_annotation) - 1)])

    scene_change_sorted = scene_change.sort_values(by='onset')

    # this is a dataframe encoding exterior
    exterior = pd.DataFrame([{
        'condition': 'exterior',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                             if (location_annotation['int_or_ext'][i] == 'ext')
                             ])

    # sort according to onsets to be paranoid
    exterior_sorted = exterior.sort_values(by='onset')

    # this is a dataframe encoding nighttime
    night = pd.DataFrame([{
        'condition': 'night',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                          if (location_annotation['time_of_day'][i] == 'night')
                          ])

    # sort according to onsets to be paranoid
    night_sorted = night.sort_values(by='onset')

    assert np.all(
        locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values
    )
    assert np.all(
        time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values
    )
    assert np.all(time_forward_sorted.onset[1:].values >=
                  time_forward_sorted.onset[:-1].values)
    assert np.all(
        exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values)
    assert np.all(
        night_sorted.onset[1:].values >= night_sorted.onset[:-1].values)
    assert np.all(scene_change_sorted.onset[1:].values >=
                  scene_change_sorted.onset[:-1].values)

    # check whether chunks are increasing as well as sanity check
    chunks = mean_sens_transposed.sa.chunks
    assert np.all(chunks[1:] >= chunks[:-1])

    # TR was not preserved/carried through in .a
    # so we will guestimate it based on the values of time_coords
    tc = mean_sens_transposed.sa.time_coords
    TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1]
    assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001

    # make time coordinates real seconds
    mean_sens_transposed.sa.time_coords = np.arange(
        len(mean_sens_transposed)) * TRdirty

    # get runs, and runlengths in seconds
    runs = sorted(mean_sens_transposed.UC)
    assert runs == range(len(runs))
    runlengths = [
        np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty
        for run in runs
    ]
    runonsets = [sum(runlengths[:run]) for run in runs]
    assert len(runs) == 8

    # initialize the list of dicts that gets later passed to the glm
    events_dicts = []
    # This is relevant to later stack all dataframes together
    # and paranoidly make sure that they have the same columns
    cols = ['onset', 'duration', 'condition']

    for run in runs:
        # get face data
        eventfile = sorted(event_files)[run]
        events = pd.read_csv(eventfile, sep='\t')

        for index, row in events.iterrows():

            # disregard no faces, put everything else into event structure
            if row['condition'] != 'no_face':
                dic = {
                    'onset': row['onset'] + runonsets[run],
                    'duration': row['duration'],
                    'condition': row['condition']
                }
                events_dicts.append(dic)

    # concatenate all event dataframes
    run_reg = pd.DataFrame([{
        'onset': runonsets[i],
        'duration': abs(runonsets[i] - runonsets[i + 1]),
        'condition': 'run-' + str(i + 1)
    } for i in range(7)])

    # get all of these wonderful dataframes into a list and squish them
    dfs = [
        locations_sorted[cols], scene_change_sorted[cols],
        time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols],
        night_sorted[cols], run_reg[cols]
    ]
    allevents = pd.concat(dfs)

    # save all non-face related events in an event file, just for the sake of it
    allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv',
                     sep='\t',
                     index=False)

    # append non-faceevents to event structure for glm
    for index, row in allevents.iterrows():
        dic = {
            'onset': row['onset'],
            'duration': row['duration'],
            'condition': row['condition']
        }
        events_dicts.append(dic)

    # save this event dicts structure  as a tsv file
    import csv
    with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile:
        fieldnames = ['onset', 'duration', 'condition']
        writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t')
        writer.writeheader()
        writer.writerows(events_dicts)
    # save this event file also as json file... can there ever be enough different files...
    import json
    with open(results_dir + '/' + 'allevents.json', 'w') as f:
        json.dump(events_dicts, f)

    # do the glm - we've earned it
    hrf_estimates = mv.fit_event_hrf_model(
        mean_sens_transposed,
        events_dicts,
        time_attr='time_coords',
        condition_attr='condition',
        design_kwargs=dict(drift_model='blank'),
        glmfit_kwargs=dict(model='ols'),
        return_model=True)

    mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5',
              hrf_estimates)
    print('calculated the, saving results.')

    return hrf_estimates
Пример #26
0
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois):
    """
    Build an hdf5 dataset.
    """
    # initialize a list to load all datasets into:
    data_dss = []

    # get list of participants from root dir
    participants = sorted(
        [path.split('/')[-1] for path in glob(rootdir + 'sub-*')])
    assert len(participants) != 0
    print('The following participants were found: {}'.format(participants))

    for participant in participants:
        # count the number of participant substitutions necessary
        data_fns = sorted(glob(rootdir + participant + datadir))
        print(rootdir + participant + datadir)
        mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz'
        if analysis == 'localizer':
            assert len(data_fns) == 4
        if analysis == 'avmovie':
            assert len(data_fns) == 8
        data_ds = mv.vstack([
            mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run)
            for run, data_fn in enumerate(data_fns)
        ])
        data_ds.fa['participant'] = [participant] * data_ds.shape[1]
        print('loaded data for participant {}.'.format(participant))

        # z scoring
        if analysis == 'localizer' and zscore == 'baseline-zscore':
            events = get_group_events(eventdir)
            means, stds = extract_baseline(events, data_ds)
            mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks')
            print('finished baseline zscoring for participant {}.'.format(
                participant))
        elif zscore == 'zscore':
            mv.zscore(data_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        # roi masks
        all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                      '{0}_*_mask_tmpl.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                           'l{0}*mask_tmpl.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                            'r{0}*mask_tmpl.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns
            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, data_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)

            # Ensure that number of voxels in ROI mask matches dataset dimension
            assert roi_mask.shape[1] == data_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to data feature attributes
            data_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, data_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1

                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, data_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, data_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches dataset dimension
                assert lat_roi_mask.shape[1] == data_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to data feature attributes
                data_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(rootdir + participant + anatdir +
                         '/{0}_*_mask_tmpl.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi

        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())

        # Assign roi mask to dataset feature attributes
        data_ds.fa['all_ROIs'] = all_rois_flat

        # join all datasets
        data_dss.append(data_ds)

    # save full dataset
    mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(data_dss)
    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Пример #27
0
samples_size = 12	#Length of segments in sec

if align=='nonlinear':
	maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad_nl','brain_mask_intersection.nii.gz')
elif align=='linear':
	maskfile = os.path.join(datapath,'templates', 'grpbold7Tad','qa', 'dico7Tad2grpbold7Tad7Tad','brain_mask_intersection.nii.gz')

ds = mvpa.fmri_dataset(maskfile, mask=maskfile)
dsfile = '_z'+str(zsc)+'_'+str(samples_size)+'_'+align

#Load dataset of two subjects and reorganise for univariate analysis
evds1 = mvpa.h5load(os.path.join('dataset',subj1+dsfile+'.hdf5'))
evds1 = evds1.mapper.reverse(evds1)
evds2 = mvpa.h5load(os.path.join('dataset',subj2+dsfile+'.hdf5'))
evds2 = evds1.mapper.reverse(evds2)
evds = mvpa.vstack([evds1,evds2])
del evds1, evds2

# Prepare inter-subject correlation measure
class Corr(mvpa.Measure):
	is_trained = True
	def __init__(self,subj1,subj2, **kwargs):
		mvpa.Measure.__init__(self, **kwargs)
		self._subj1 = subj1
		self._subj2 = subj2
	def _call(self, evds):
		res = 1-sd.pdist(np.hstack((evds[evds.sa.subj==self._subj1].samples,evds[evds.sa.subj==self._subj2].samples)).T,'correlation')
		return mvpa.Dataset(np.array(res)[np.newaxis])

# Call inter-subject correlation measure
cor = Corr(subj1,subj2)
Пример #28
0
    ds = mvpa.fmri_dataset(os.path.join(datapath, run, boldfile),
                           mask=maskfile)
    mc = mvpa.McFlirtParams(os.path.join(run, 'bold_dico_moco.txt'))
    for param in mc:
        ds.sa['mc_' + param] = mc[param]
    if i == 0:
        ds = ds[:-4]
    elif i < 7:
        ds = ds[4:-4]
    else:
        ds = ds[4:]
    ds.sa['chunks'] = np.ones(ds.nsamples) * i
    print ds.shape
    Ds.append(ds)

ds = mvpa.vstack(Ds)
ds.samples = ds.samples.astype('float32')

#Detrending and MC removal
mvpa.poly_detrend(ds,
                  opt_regs=['mc_' + param for param in mc],
                  chunks_attr='chunks')

#Voxelwise Zscore
if zsc:
    mvpa.zscore(ds)

#bandpass filter
nf = 0.5 / TR
ws = [(1 / lf) / nf, (1 / hf) / nf]
b, a = signal.butter(5, ws, btype='band')
Пример #29
0
	print run
	ds = mvpa.fmri_dataset(os.path.join(datapath,run,boldfile), mask=maskfile)	
	mc = mvpa.McFlirtParams(os.path.join(run, 'bold_dico_moco.txt'))
	for param in mc:
		ds.sa['mc_' + param] = mc[param]
	if i==0:
		ds = ds[:-4]
	elif i<7:
		ds = ds[4:-4]
	else:
		ds = ds[4:]
	ds.sa['chunks'] = np.ones(ds.nsamples)*i
	print ds.shape
	Ds.append(ds)
	
ds = mvpa.vstack(Ds)
ds.samples = ds.samples.astype('float32')

#Detrending and MC removal
mvpa.poly_detrend(ds,
		  opt_regs=['mc_'+param  for param in mc],
		  chunks_attr='chunks'
		  )
		  
#Voxelwise Zscore
if zsc:
	mvpa.zscore(ds)

#bandpass filter
nf = 0.5/TR
ws = [(1/lf)/nf, (1/hf)/nf]