Exemplo n.º 1
0
def bilateralize(ds):
    """combine lateralized ROIs in a dataset."""
    ds_ROIs = ds.copy('deep')
    ds_ROIs.sa['bilat_ROIs'] = [label.split(' ')[-1] for label in ds_ROIs.sa.all_ROIs]
    mv.h5save(results_dir + 'ds_ROIs.hdf5', ds_ROIs)
    print('Combined lateralized ROIs for the provided dataset and saved the dataset.')
    return ds_ROIs
Exemplo n.º 2
0
def mk_movie_dataset(gd, subj, mask, task=1, flavor='', filter=None,
        writeto=None, add_fa=None):
    cur_max_time = 0
    segments = []
    for seg in range(1,9):
        print 'Seg', seg
        ds = fmri_dataset(
                gd.get_run_fmri(subj, task, seg, flavor=flavor),
                mask=mask, add_fa=add_fa)
        if task == 1:
            # sanitize TR
            ds.sa.time_coords = np.arange(len(ds)) * 2.0
        mc = gd.get_run_motion_estimates(subj, task, seg)
        for i, par in enumerate(('mc_xtrans', 'mc_ytrans', 'mc_ztrans',
                                 'mc_xrot', 'mc_yrot', 'mc_zrot')):
            ds.sa[par] = mc.T[i]
        ds.sa['movie_segment'] = [seg] * len(ds)
        TR = np.diff(ds.sa.time_coords).mean()
        if not filter is None:
            print 'filter'
            ds = filter(ds)
        # truncate segment time series to remove overlap
        if seg > 1:
            ds = ds[4:]
        if seg < 8:
            ds = ds[:-4]
        ds.sa['movie_time'] = np.arange(len(ds)) * TR + cur_max_time
        cur_max_time = ds.sa.movie_time[-1] + TR
        if writeto is None:
            segments.append(ds)
        else:
            ds.samples = ds.samples.astype('float32')
            h5save(writeto % (subj, task, seg), ds, compression=9)
    return segments
Exemplo n.º 3
0
def dotheglm(sensitivities, eventdir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them in, average the
    durations because there are tiny differences between subjects, and then it
    will put all of that into a glm.
    """
    sensitivities_stacked = mv.vstack(sensitivities)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.bilat_ROIs)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(lambda p: '_'.join(p),
                                                         sensitivities_stacked.sa.all_ROIs)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # average onsets into one event file
    events = get_group_events(eventdir)
    # save the event_file
    fmt = "%10.3f\t%10.3f\t%16s\t%60s"
    np.savetxt(results_dir + 'group_events.tsv', events, delimiter='\t', comments='',
               header='onset\tduration\ttrial_type\tstim_file', fmt=fmt)
    # get events into dictionary
    events_dicts = []
    for i in range(0, len(events)):
        dic = {
            'onset': events[i][0],
            'duration': events[i][1],
            'condition': events[i][2]
        }
        events_dicts.append(dic)

    hrf_estimates = mv.fit_event_hrf_model(mean_sens_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)
    mv.h5save(results_dir + 'sens_glm_objectcategories_results.hdf5', hrf_estimates)
    print('calculated glm, saving results.')
    return hrf_estimates
Exemplo n.º 4
0
def load_create_save_ds(ds_save_p, dataset_list, ref_space, warp_files, mask, **kwargs):
    detrending = kwargs.get('detrending', True)
    use_zscore = kwargs.get('use_zscore', True)

    use_events = kwargs.get('use_events', False)
    anno_dir = kwargs.get('anno_dir', None)
    use_glm_estimates = kwargs.get('use_glm_estimates', False)
    targets = kwargs.get('targets', None)
    event_offset = kwargs.get('event_offset', None)
    event_dur = kwargs.get('event_dur', None)
    save_disc_space = kwargs.get('save_disc_space', True)

    rois = kwargs.get('rois', None)

    if ds_save_p.exists():
        ds = mvpa.h5load(str(ds_save_p))
    else:
        ds = preprocess_datasets(dataset_list, ref_space, warp_files, mask, detrending=detrending,
                                 use_zscore=use_zscore, use_events=use_events, anno_dir=anno_dir,
                                 use_glm_estimates=use_glm_estimates, targets=targets,
                                 event_offset=event_offset, event_dur=event_dur, rois=rois,
                                 save_disc_space=save_disc_space)
        mvpa.h5save(str(ds_save_p), ds) # , compression=9
    return ds
Exemplo n.º 5
0
def dotheclassification(ds, bilateral, store_sens=True):
    """ Dotheclassification does the classification. It builds a
    linear gaussian naive bayes classifier, performs a leave-one-out
    crossvalidation and stores the sensitivities from the SGD classifier of each
    fold in a combined dataset for further use in a glm.
    If sens == False, the sensitivities are not stored, and only a
    classification is performed"""
    import matplotlib.pyplot as plt
    # set up the dataset: If I understand the sourcecode correctly, the
    # MulticlassClassifier wants to have unique labels in a sample attribute
    # called 'targets' and is quite stubborn with this name - I could not convince
    # it to look for targets somewhere else, so now I catering to his demands
    if bilateral:
        ds.sa['targets'] = ds.sa.bilat_ROIs
    else:
        ds.sa['targets'] = ds.sa.all_ROIs

    # necessary I believe regardless of the SKLLearnerAdapter
    from sklearn.linear_model import SGDClassifier

    # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter.
    # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier
    clf = mv.MulticlassClassifier(
        mv.SKLLearnerAdapter(
            SGDClassifier(loss='hinge', penalty='l2',
                          class_weight='balanced')))

    # prepare for callback of sensitivity extraction within CrossValidation
    sensitivities = []
    if store_sens:

        def store_sens(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            # we also need to manually append the time attributes to the sens ds
            sens.fa['time_coords'] = data.fa['time_coords']
            sens.fa['chunks'] = data.fa['chunks']
            sensitivities.append(sens)

            # do a crossvalidation classification

        cv = mv.CrossValidation(clf,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'],
                                callback=store_sens)
    else:
        cv = mv.CrossValidation(clf,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'])
    results = cv(ds)
    # save classification results

    with open(results_dir + 'avmovie_clf.txt', 'a') as f:
        f.write(cv.ca.stats.as_string(description=True))
    # printing of the confusion matrix
    if bilateral:
        desired_order = ['VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA']
    else:
        desired_order = [
            'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA',
            'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA',
            'right PPA'
        ]
    labels = get_known_labels(desired_order, cv.ca.stats.labels)

    # plot the confusion matrix with pymvpas build-in plot function currently fails
    #    cv.ca.stats.plot(labels=labels,
    #                     numbers=True,
    #                     cmap='gist_heat_r')
    #    plt.savefig(results_dir + 'confusion_matrix.png')
    #    if niceplot:
    #        ACC = cv.ca.stats.stats['mean(ACC)']
    #        plot_confusion(cv,
    #                       labels,
    #                       fn=results_dir + 'confusion_matrix_avmovie.svg',
    #                       figsize=(9, 9),
    #                       vmax=100,
    #                       cmap='Blues',
    #                       ACC='%.2f' % ACC)
    #    mv.h5save(results_dir + 'SGD_cv_classification_results.hdf5', results)
    print('Saved the crossvalidation results.')
    if store_sens:
        mv.h5save(results_dir + 'sensitivities_nfold.hdf5', sensitivities)
        print('Saved the sensitivities.')
    # results now has the overall accuracy. results.samples gives the
    # accuracy per participant.
    # sensitivities contains a dataset for each participant with the
    # sensitivities as samples and class-pairings as attributes
    return sensitivities, cv
Exemplo n.º 6
0
def dotheclassification(ds, bilateral, store_sens=True):
    """ Dotheclassification does the classification. It builds a
    linear gaussian naive bayes classifier, performs a leave-one-out
    crossvalidation and stores the sensitivities from the GNB classifier of each
    fold in a combined dataset for further use in a glm.
    If sens == False, the sensitivities are not stored, and only a
    classification is performed"""
    import matplotlib.pyplot as plt
    # set up classifier
    prior = 'ratio'
    if bilateral:
        targets = 'bilat_ROIs'
    else:
        targets = 'all_ROIs'
    gnb = mv.GNB(common_variance=True, prior=prior, space=targets)

    # prepare for callback of sensitivity extraction within CrossValidation
    sensitivities = []
    if store_sens:

        def store_sens(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            # we also need to manually append the time attributes to the sens ds
            sens.fa['time_coords'] = data.fa['time_coords']
            sens.fa['chunks'] = data.fa['chunks']
            sensitivities.append(sens)

            # do a crossvalidation classification

        cv = mv.CrossValidation(gnb,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'],
                                callback=store_sens)
    else:
        cv = mv.CrossValidation(gnb,
                                mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'])
    results = cv(ds)
    # save classification results

    with open(results_dir + 'avmovie_clf.txt', 'a') as f:
        f.write(cv.ca.stats.as_string(description=True))
    # printing of the confusion matrix
    if bilateral:
        desired_order = ['VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA']
    else:
        desired_order = [
            'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA',
            'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA',
            'right PPA'
        ]
    labels = get_known_labels(desired_order, cv.ca.stats.labels)

    # plot the confusion matrix with pymvpas build-in plot function currently fails
    # cv.ca.stats.plot(labels=labels,
    #                 numbers=True,
    #                 cmap='gist_heat_r')
    # plt.savefig(results_dir + 'confusion_matrix.png')
    if niceplot:
        ACC = cv.ca.stats.stats['mean(ACC)']
        plot_confusion(cv,
                       labels,
                       fn=results_dir + 'confusion_matrix_avmovie.svg',
                       figsize=(9, 9),
                       vmax=100,
                       cmap='Blues',
                       ACC='%.2f' % ACC)
    mv.h5save(results_dir + 'gnb_cv_classification_results.hdf5', results)
    print('Saved the crossvalidation results.')
    if store_sens:
        mv.h5save(results_dir + 'sensitivities_nfold.hdf5', sensitivities)
        print('Saved the sensitivities.')
    # results now has the overall accuracy. results.samples gives the
    # accuracy per participant.
    # sensitivities contains a dataset for each participant with the
    # sensitivities as samples and class-pairings as attributes
    return sensitivities, cv
Exemplo n.º 7
0
                              axis=0)
            np.place(all_rois_mask,
                     (roi_mask > 0) & (all_rois_mask != 'brain'), 'overlap')
            all_rois_mask[(roi_mask > 0) & (all_rois_mask != 'overlap')] = roi

    # Flatten mask into list
    all_rois_flat = list(all_rois_mask.ravel())

    # Assign ROI mask to movie data feature attributes
    movie_ds.fa['all_ROIs'] = all_rois_flat

    movie_dss.append(movie_ds)

    if save_per_subject:
        mv.h5save(
            base_dir + participant + data_dir +
            '{0}_avmovie_detrend{1}_lowpass_ROIs_tmpl_bold.hdf5'.format(
                participant, polyord), movie_ds)
        print("Finished participant {0}, saved the data".format(participant))

mv.h5save(
    results_dir +
    'allsub_avmovie_detrend{0}_lowpass_ROIs_tmpl_bold.hdf5'.format(polyord),
    movie_dss)
print('Saved the group dataset in {}.'.format(results_dir))

# Horizontally stack all data sets
ds_wide = mv.hstack(movie_dss)

# Transpose brain so voxels are now samples
ds = mv.Dataset(ds_wide.samples.T, sa=ds_wide.fa.copy(), fa=ds_wide.sa.copy())
Exemplo n.º 8
0
#bandpass filter
nf = 0.5/TR
ws = [(1/lf)/nf, (1/hf)/nf]
b, a = signal.butter(5, ws, btype='band')
S = [signal.filtfilt(b, a, x) for x in ds.samples.T]
ds.samples = np.array(S).T
ds.samples = ds.samples.astype('float32')

#Create Event-related Dataset
onsets = np.arange(0,ds.nsamples - samples_size/TR, samples_size/TR)
events = []
for on in onsets:
	Ev = dict()
	Ev['onset'] = on
	Ev['duration'] = samples_size / TR
	Ev['target'] = on*TR
	Ev['subj'] = subj
	events.append(Ev)

evds = mvpa.eventrelated_dataset(ds, events=events)
evds.fa['1stidx'] = evds.fa.event_offsetidx==0

#Save pymvpa-dataset as hdf5 in dataset directory 
try:
    os.mkdir(os.path.join(path,'dataset'))
except:
    print 'results directory already exists'

dsfile = subj+'_z'+str(zsc)+'_'+str(samples_size)+'_'+align
mvpa.h5save(os.path.join(path,'dataset',dsfile+'.hdf5'), evds, compression='gzip')
Exemplo n.º 9
0
        n_samples = ds.samples.shape[0]
        # Exclude medial wall
        print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples))
        medial_wall = np.where(np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist()
        print(len(medial_wall))
        cortical_vertices = np.where(np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist()
        assert len(medial_wall) == n_medial[hemi]
        assert len(medial_wall) + len(cortical_vertices) == n_vertices

        # Estimate searchlight hyperalignment transformation on movie data
        sl_hyper = mv.SearchlightHyperalignment(queryengine=qe, nproc=n_proc,
                                                nblocks=n_proc*8, featsel=1.0,
                                                mask_node_ids=cortical_vertices,
                                                tmp_prefix='/fastscratch/cara/tmpsl')

        print("Estimated transformation!")

        mv.debug.active += ['HPAL', 'SLC']
        mappers = sl_hyper(dss)
        print("Finished creating hyperalignment mappers!")

        # Organize and save fitted hyperalignment mappers
        assert len(participants) == len(mappers)
        mappers = {participant: mapper for participant, mapper
                   in zip(participants, mappers)}
        print("Reorganized hyperalignment mappers")

        mv.h5save(join(mvpa_dir, 'search_hyper_mappers_life_mask_nofsel_{0}_leftout_{1}_reverse.hdf5'.format(hemi, left_out)), mappers)
        print("Successfully saved hyperalignment mappers for left out run {0}".format(left_out))
Exemplo n.º 10
0
    else:
        clf = args.clf(args.target_attr)

    tm = TransferMeasure(clf, splitter)
    res = tm(partitions)
    # make a record of the tuned hyper parameter for comprehensive
    # reporting
    if args.tune_hyperparam:
        res.a['tuned_hyperparam'] = tuned_par
    results.append(res)
    # feed predictions into the confusion tracker as a new set
    confusion.add(res.sa[args.target_attr].value, res.samples[:, 0])

# one result dataset
results = vstack(results, a='all')
# report analysis params for the afterlife
results.a['confusion'] = confusion
results.a['mask'] = args.mask
results.a['fwhm'] = args.fwhm
results.a['dog_bandwidth'] = args.dog_bandwidth
results.a['filter_type'] = args.filter_type
for k, v in mkds_args.items():
    results.a['mkds_{}'.format(k)] = v

# brag about it
print(results)
print(confusion)

# safe to disk
h5save(opj(args.output_dir, '_'.join(args.result_labels) + '.hdf5'), results)
Exemplo n.º 11
0
    # the one with the lean one
    cv_rsa = mv.CrossValidation(mv.CDist(pairwise_metric='correlation'),
                                 mv.HalfPartitioner(attr='sessions'),
                                 errorfx=None, postproc=lean_errorfx)

    sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'],
                        nproc=1, results_backend='native')
    #sl = mv.Searchlight(cv_rsa, queryengine=qe, enable_ca=['roi_sizes'],
    #                    nproc=1, results_backend='native', roi_ids=cortical_vertices)
    #tmp_prefix='/local/tmp/sam_sl_p{0}_{1}_'.format(participant_id, hemi)
    mv.debug.active += ['SLC']
    sl_result = sl(ds)
    assert len(sl_result.sa) == 0  # we didn't pass any
    sl_result.sa = target_sa

    print '>>>', np.mean(sl.ca.roi_sizes), np.std(sl.ca.roi_sizes)

    sl_means = np.mean(np.dstack((sl_result.samples[:n_conditions**2, :],
                                  sl_result.samples[n_conditions**2:, :])),
                       axis=2)
    sl_final = mv.Dataset(
         sl_means,
         sa={'conditions': sl_result.sa.conditions[:sl_means.shape[0], :].tolist(),
             'participants': [int(participant[-2:])] * sl_means.shape[0]},
         fa=sl_result.fa, a=sl_result.a)
    #assert sl_result.shape[0] == n_conditions**2
    print(sl_final)
    mv.h5save('/idata/DBIC/cara/life/search_RDMs_sq_zscore_HA_{0}_{1}.hdf5'.format(participant, hemi), sl_final)
        #mv.niml.write(join(mvpa_dir, 'search_RDMs_sq_p{0}_{1}_TEST.niml.dset'.format(
        #                                       participant_id, hemi)), sl_result)
Exemplo n.º 12
0
                    '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                    format(participant, tr[run], run, hemi)))
            mv.zscore(ds, chunks_attr=None)

            if hyperalign:
                ds = mappers[participant].forward(ds)
                mv.zscore(ds, chunks_attr=None)
            ds.fa['node_indices'] = range(ds.shape[1])

            # n_samples = ds.samples.shape[0]
            #
            # # Exclude medial wall
            # print(np.where(np.sum(ds.samples == 0, axis=0) == n_samples))
            n_samples = ds.samples.shape[0]
            medial_wall = np.where(
                np.sum(ds.samples == 0, axis=0) == n_samples)[0].tolist()
            print(len(medial_wall))
            cortical_vertices = np.where(
                np.sum(ds.samples == 0, axis=0) < n_samples)[0].tolist()
            assert len(medial_wall) == n_medial[hemi]
            assert len(medial_wall) + len(cortical_vertices) == n_vertices

            sl_result = sl(ds)
            print(ds.samples.shape, sl_result.samples.shape)
            list_of_RDMs.append(sl_result)
        final = mv.vstack(list_of_RDMs)
        print(final.shape)
        mv.h5save(
            '/idata/DBIC/cara/search_hyper_mappers_life_mask_nofsel_{0}_{1}_leftout_{1}_{2}.hdf5'
            .format(participant, hemi, left_out, sys.argv[1]), final)
Exemplo n.º 13
0
def dotheclassification(ds, bilateral):
    """This functions performs the classification in a one-vs-all fashion with a
    stochastic gradient descent.
    Future TODO: Selection of alpha may be better performed via
    GridSearchCV. To quote sklearns documentation: 'Finding a reasonable
    regularization term is best done using GridSearchCV, usually in the range
    10.0**-np.arange(1,7).'"""

    # set up the dataset: If I understand the sourcecode correctly, the
    # SGDclassifier wants to have unique labels in a sample attribute
    # called 'targets' and is quite stubborn with this name - I could not convince
    # it to look for targets somewhere else, so now I'm catering to his demands
    if bilateral:
        ds.sa['targets'] = ds.sa.bilat_ROIs
    else:
        ds.sa['targets'] = ds.sa.all_ROIs

    clf = mv.SKLLearnerAdapter(
        SGDClassifier(loss='hinge', penalty='l2', class_weight='balanced'))

    cv = mv.CrossValidation(clf,
                            mv.NFoldPartitioner(attr='participant'),
                            errorfx=mv.mean_match_accuracy,
                            enable_ca=['stats'])

    results = cv(ds)

    # save classification results
    with open(results_dir + 'SGD_clf.txt', 'a') as f:
        f.write(cv.ca.stats.as_string(description=True))

    if bilateral:
        desired_order = ['brain', 'VIS', 'LOC', 'OFA', 'FFA', 'EBA', 'PPA']
    else:
        desired_order = [
            'brain', 'VIS', 'left LOC', 'right LOC', 'left OFA', 'right OFA',
            'left FFA', 'right FFA', 'left EBA', 'right EBA', 'left PPA',
            'right PPA'
        ]

    labels = get_known_labels(desired_order, cv.ca.stats.labels)

    # print confusion matrix with pymvpas build in function
    cv.ca.stats.plot(labels=labels, numbers=True, cmap='gist_heat_r')
    plt.savefig(results_dir + 'confusion_matrix.png')

    # print confusion matrix with matplotlib
    if niceplot:
        ACC = cv.ca.stats.stats['mean(ACC)']
        plot_confusion(cv,
                       labels,
                       fn=results_dir + 'confusion_matrix_SGD.svg',
                       figsize=(9, 9),
                       vmax=100,
                       cmap='Blues',
                       ACC='%.2f' % ACC)

    mv.h5save(results_dir + 'SGD_cv_classification_results.hdf5', results)
    print('Saved the crossvalidation results.')

    return cv
Exemplo n.º 14
0
def buildadataset(zscore, rois, event_path=None):
    """buildataset() will build and save participant-specific hdf5 datasets
    with all rois from preprocessed objectcategories data, stack them for a
    group dataset and save them, and transpose the group dataset and save it.
    The parameter 'zscore' determines whether and what kind of z-scoring
    should be performed."""
    print('I am building a dataset with the following option: {}.'.format(
        zscore))

    # get the participants and rois
    participants = sorted(
        [path.split('/')[-1] for path in glob(base_dir + 'sub-*')])
    localizer_dss = []

    for participant in participants:
        localizer_fns = sorted(glob(base_dir + participant + locdir + \
                                    '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format(
                                        participant)))
        mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz'
        assert len(localizer_fns) == 4
        localizer_ds = mv.vstack([
            mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run)
            for run, localizer_fn in enumerate(localizer_fns)
        ])

        localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1]
        print('loaded localizer data for participant {}.'.format(participant))

        # zscore the data with means and standard deviations from no-stimulation
        # periods
        if zscore == 'custom':
            events = get_group_events(event_path)
            means, stds = extract_baseline(events, localizer_ds)
            # zscore stuff
            mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks')
            print('finished custom zscoring for participant {}.'.format(
                participant))
        elif zscore == 'z-score':
            mv.zscore(localizer_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        all_rois_mask = np.array([['brain'] * localizer_ds.shape[1]
                                  ]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                      '{0}_*_mask.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                           'l{0}_*_mask.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                            'r{0}_*_mask.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns

            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, localizer_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)
            # Ensure that number of voxels in ROI mask matches localizer data
            assert roi_mask.shape[1] == localizer_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to localizer data feature attributes
            localizer_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, localizer_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches localizer data
                assert lat_roi_mask.shape[1] == localizer_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to localizer data feature attributes
                localizer_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(base_dir + participant + anat_dir +
                         '/{0}_*_mask.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi
        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())
        # Assign ROI mask to localizer data feature attributes
        localizer_ds.fa['all_ROIs'] = all_rois_flat

        if save_per_subject:
            mv.h5save(base_dir + participant + locdir + \
                  '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format(
                      participant), localizer_ds)
            print('Saved dataset for {}.'.format(participant))
        # join all datasets
        localizer_dss.append(localizer_ds)

    # save full dataset
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5',
        localizer_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(localizer_dss)

    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5',
        ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Exemplo n.º 15
0
    ds.sa.pop('intents')
    ds.sa['subjects'] = [participant] * ds.shape[0]
    ds.fa['node_indices'] = range(n_vertices)
    # z-score features across samples
    mv.zscore(ds, chunks_attr=None)

    return ds


t = []
for hemi in hemispheres:
    mappers = mv.h5load(
        os.path.join(
            mvpa_dir,
            'search_hyper_mappers_life_mask_nofsel_{0}.hdf5'.format(hemi)))

    print('\nLoading fMRI GIFTI data...')
    l = []
    for participant in participants:
        p = []
        for run in range(1, 5):
            p.append(mappers[participant].forward(
                load_data(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                        format(participant, tr[run], run, hemi)))))
        l.append(p)
    t.append(l)
mv.h5save("hyperaligned.hdf5", t)
Exemplo n.º 16
0
def dotheglm(sensitivities, eventdir, annot_dir):
    """dotheglm does the glm. It will squish the sensitivity
    dataset by vstacking them, calculating the mean sensitivity per ROI pair
    with the mean_group_sample() function, transpose it with a
    TransposeMapper(). It will get the event files and read them into an apprpriate.
    data structure. It will compute one glm per run.
    """
    # normalize the sensitivities
    from sklearn.preprocessing import normalize
    import copy
    #default for normalization is the L2 norm
    sensitivities_to_normalize = copy.deepcopy(sensitivities)
    for i in range(len(sensitivities)):
        sensitivities_to_normalize[i].samples = normalize(
            sensitivities_to_normalize[i].samples, axis=1)

    sensitivities_stacked = mv.vstack(sensitivities_to_normalize)
    if bilateral:
        sensitivities_stacked.sa['bilat_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['bilat_ROIs_str'
                                          ])(sensitivities_stacked)
    else:
        sensitivities_stacked.sa['all_ROIs_str'] = map(
            lambda p: '_'.join(p), sensitivities_stacked.sa.targets)
        mean_sens = mv.mean_group_sample(['all_ROIs_str'
                                          ])(sensitivities_stacked)
    mean_sens_transposed = mean_sens.get_mapped(mv.TransposeMapper())

    # get a list of the event files with occurances of faces
    event_files = sorted(glob(eventdir + '/*'))
    assert len(event_files) == 8
    # get additional events from the location annotation
    location_annotation = pd.read_csv(annot_dir, sep='\t')

    # get all settings with more than one occurrence
    setting = [
        set for set in location_annotation.setting.unique()
        if (location_annotation.setting[location_annotation.setting ==
                                        set].value_counts()[0] > 1)
    ]

    # get onsets and durations
    onset = []
    duration = []
    condition = []
    for set in setting:
        for i in range(location_annotation.setting[
                location_annotation['setting'] == set].value_counts()[0]):
            onset.append(location_annotation[location_annotation['setting'] ==
                                             set]['onset'].values[i])
            duration.append(location_annotation[location_annotation['setting']
                                                == set]['duration'].values[i])
        condition.append([set] * (i + 1))
    # flatten conditions
    condition = [y for x in condition for y in x]
    assert len(condition) == len(onset) == len(duration)

    # concatenate the strings
    condition_str = [set.replace(' ', '_') for set in condition]
    condition_str = ['location_' + set for set in condition_str]

    # put it in a dataframe
    locations = pd.DataFrame({
        'onset': onset,
        'duration': duration,
        'condition': condition_str
    })

    # sort according to onsets to be paranoid
    locations_sorted = locations.sort_values(by='onset')

    # this is a dataframe encoding flow of time
    time_forward = pd.DataFrame(
        [{
            'condition': 'time+',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['+', '++']])

    time_back = pd.DataFrame(
        [{
            'condition': 'time-',
            'onset': location_annotation['onset'][i],
            'duration': 1.0
        } for i in range(len(location_annotation) - 1)
         if location_annotation['flow_of_time'][i] in ['-', '--']])

    # sort according to onsets to be paranoid
    time_forward_sorted = time_forward.sort_values(by='onset')
    time_back_sorted = time_back.sort_values(by='onset')

    scene_change = pd.DataFrame([{
        'condition': 'scene-change',
        'onset': location_annotation['onset'][i],
        'duration': 1.0
    } for i in range(len(location_annotation) - 1)])

    scene_change_sorted = scene_change.sort_values(by='onset')

    # this is a dataframe encoding exterior
    exterior = pd.DataFrame([{
        'condition': 'exterior',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                             if (location_annotation['int_or_ext'][i] == 'ext')
                             ])

    # sort according to onsets to be paranoid
    exterior_sorted = exterior.sort_values(by='onset')

    # this is a dataframe encoding nighttime
    night = pd.DataFrame([{
        'condition': 'night',
        'onset': location_annotation['onset'][i],
        'duration': location_annotation['duration'][i]
    } for i in range(len(location_annotation) - 1)
                          if (location_annotation['time_of_day'][i] == 'night')
                          ])

    # sort according to onsets to be paranoid
    night_sorted = night.sort_values(by='onset')

    assert np.all(
        locations_sorted.onset[1:].values >= locations_sorted.onset[:-1].values
    )
    assert np.all(
        time_back_sorted.onset[1:].values >= time_back_sorted.onset[:-1].values
    )
    assert np.all(time_forward_sorted.onset[1:].values >=
                  time_forward_sorted.onset[:-1].values)
    assert np.all(
        exterior_sorted.onset[1:].values >= exterior_sorted.onset[:-1].values)
    assert np.all(
        night_sorted.onset[1:].values >= night_sorted.onset[:-1].values)
    assert np.all(scene_change_sorted.onset[1:].values >=
                  scene_change_sorted.onset[:-1].values)

    # check whether chunks are increasing as well as sanity check
    chunks = mean_sens_transposed.sa.chunks
    assert np.all(chunks[1:] >= chunks[:-1])

    # TR was not preserved/carried through in .a
    # so we will guestimate it based on the values of time_coords
    tc = mean_sens_transposed.sa.time_coords
    TRdirty = sorted(np.unique(tc[1:] - tc[:-1]))[-1]
    assert np.abs(np.round(TRdirty, decimals=2) - TRdirty) < 0.0001

    # make time coordinates real seconds
    mean_sens_transposed.sa.time_coords = np.arange(
        len(mean_sens_transposed)) * TRdirty

    # get runs, and runlengths in seconds
    runs = sorted(mean_sens_transposed.UC)
    assert runs == range(len(runs))
    runlengths = [
        np.max(tc[mean_sens_transposed.sa.chunks == run]) + TRdirty
        for run in runs
    ]
    runonsets = [sum(runlengths[:run]) for run in runs]
    assert len(runs) == 8

    # initialize the list of dicts that gets later passed to the glm
    events_dicts = []
    # This is relevant to later stack all dataframes together
    # and paranoidly make sure that they have the same columns
    cols = ['onset', 'duration', 'condition']

    for run in runs:
        # get face data
        eventfile = sorted(event_files)[run]
        events = pd.read_csv(eventfile, sep='\t')

        for index, row in events.iterrows():

            # disregard no faces, put everything else into event structure
            if row['condition'] != 'no_face':
                dic = {
                    'onset': row['onset'] + runonsets[run],
                    'duration': row['duration'],
                    'condition': row['condition']
                }
                events_dicts.append(dic)

    # concatenate all event dataframes
    run_reg = pd.DataFrame([{
        'onset': runonsets[i],
        'duration': abs(runonsets[i] - runonsets[i + 1]),
        'condition': 'run-' + str(i + 1)
    } for i in range(7)])

    # get all of these wonderful dataframes into a list and squish them
    dfs = [
        locations_sorted[cols], scene_change_sorted[cols],
        time_back_sorted[cols], time_forward_sorted, exterior_sorted[cols],
        night_sorted[cols], run_reg[cols]
    ]
    allevents = pd.concat(dfs)

    # save all non-face related events in an event file, just for the sake of it
    allevents.to_csv(results_dir + '/' + 'non_face_regs.tsv',
                     sep='\t',
                     index=False)

    # append non-faceevents to event structure for glm
    for index, row in allevents.iterrows():
        dic = {
            'onset': row['onset'],
            'duration': row['duration'],
            'condition': row['condition']
        }
        events_dicts.append(dic)

    # save this event dicts structure  as a tsv file
    import csv
    with open(results_dir + '/' + 'full_event_file.tsv', 'w') as tsvfile:
        fieldnames = ['onset', 'duration', 'condition']
        writer = csv.DictWriter(tsvfile, fieldnames=fieldnames, delimiter='\t')
        writer.writeheader()
        writer.writerows(events_dicts)
    # save this event file also as json file... can there ever be enough different files...
    import json
    with open(results_dir + '/' + 'allevents.json', 'w') as f:
        json.dump(events_dicts, f)

    # do the glm - we've earned it
    hrf_estimates = mv.fit_event_hrf_model(
        mean_sens_transposed,
        events_dicts,
        time_attr='time_coords',
        condition_attr='condition',
        design_kwargs=dict(drift_model='blank'),
        glmfit_kwargs=dict(model='ols'),
        return_model=True)

    mv.h5save(results_dir + '/' + 'sens_glm_avmovie_results.hdf5',
              hrf_estimates)
    print('calculated the, saving results.')

    return hrf_estimates
Exemplo n.º 17
0
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois):
    """
    Build an hdf5 dataset.
    """
    # initialize a list to load all datasets into:
    data_dss = []

    # get list of participants from root dir
    participants = sorted(
        [path.split('/')[-1] for path in glob(rootdir + 'sub-*')])
    assert len(participants) != 0
    print('The following participants were found: {}'.format(participants))

    for participant in participants:
        # count the number of participant substitutions necessary
        data_fns = sorted(glob(rootdir + participant + datadir))
        print(rootdir + participant + datadir)
        mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz'
        if analysis == 'localizer':
            assert len(data_fns) == 4
        if analysis == 'avmovie':
            assert len(data_fns) == 8
        data_ds = mv.vstack([
            mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run)
            for run, data_fn in enumerate(data_fns)
        ])
        data_ds.fa['participant'] = [participant] * data_ds.shape[1]
        print('loaded data for participant {}.'.format(participant))

        # z scoring
        if analysis == 'localizer' and zscore == 'baseline-zscore':
            events = get_group_events(eventdir)
            means, stds = extract_baseline(events, data_ds)
            mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks')
            print('finished baseline zscoring for participant {}.'.format(
                participant))
        elif zscore == 'zscore':
            mv.zscore(data_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        # roi masks
        all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                      '{0}_*_mask_tmpl.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                           'l{0}*mask_tmpl.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                            'r{0}*mask_tmpl.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns
            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, data_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)

            # Ensure that number of voxels in ROI mask matches dataset dimension
            assert roi_mask.shape[1] == data_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to data feature attributes
            data_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, data_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1

                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, data_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, data_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches dataset dimension
                assert lat_roi_mask.shape[1] == data_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to data feature attributes
                data_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(rootdir + participant + anatdir +
                         '/{0}_*_mask_tmpl.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi

        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())

        # Assign roi mask to dataset feature attributes
        data_ds.fa['all_ROIs'] = all_rois_flat

        # join all datasets
        data_dss.append(data_ds)

    # save full dataset
    mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(data_dss)
    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Exemplo n.º 18
0
def dotheclassification(ds_movie,
                        ds_loc,
                        classifier,
                        bilateral):
    """ Dotheclassification does the classification.
    Input: the dataset on which to perform a leave-one-out crossvalidation with a classifier
    of choice.
    Specify: the classifier to be used (gnb (linear gnb), l-sgd (linear sgd), sgd)
             whether the sensitivities should be computed and stored for later use
             whether the dataset has ROIs combined across hemisphere (bilateral)
    """

    dfs = []
    for idx, ds in enumerate([ds_movie, ds_loc]):
        if bilateral:
            ds.sa['targets'] = ds.sa.bilat_ROIs
        else:
            ds.sa['targets'] = ds.sa.all_ROIs

        if classifier == 'gnb':
            # set up classifier
            prior = 'ratio'
            clf = mv.GNB(common_variance=True,
                         prior=prior)

        elif classifier == 'sgd':
            # necessary I believe regardless of the SKLLearnerAdapter
            from sklearn.linear_model import SGDClassifier
            clf = mv.SKLLearnerAdapter(SGDClassifier(loss='hinge',
                                                     penalty='l2',
                                                     class_weight='balanced'))
        elif classifier == 'l-sgd':
            # necessary I believe regardless of the SKLLearnerAdapter
            from sklearn.linear_model import SGDClassifier
            # get a stochastic gradient descent into pymvpa by using the SKLLearnerAdapter.
            # Get it to perform 1 vs 1 decisions (instead of one vs all) with the MulticlassClassifier
            clf = mv.MulticlassClassifier(mv.SKLLearnerAdapter(SGDClassifier(loss='hinge',
                                                                             penalty='l2',
                                                                             class_weight='balanced'
                                                                             )))

        # prepare for callback of sensitivity extraction within CrossValidation
        classifications = []

        def store_class(data, node, result):
            # import pdb; pdb.set_trace()
            class_ds = mv.Dataset(samples=data.sa.voxel_indices)
            class_ds.sa['targets'] = data.sa.targets
            class_ds.sa['partitions'] = data.sa.partitions
            class_ds.sa['predictions'] = clf.predict(data)
            class_ds.sa['participant'] = data.sa.participant
            classifications.append(class_ds)

        # do a crossvalidation classification and store the classification results
        cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='participant'),
                                errorfx=mv.mean_match_accuracy,
                                enable_ca=['stats'],
                                callback=store_class)
        # import pdb; pdb.set_trace()
        results = cv(ds)
        # import pdb; pdb.set_trace()
        # save classification results as a Dataset
        ds_type = ['movie', 'loc']
        mv.h5save(results_dir + 'cv_classification_results_{}.hdf5'.format(ds_type[idx]), classifications)
        print('Saved the classification results obtained during crossvalidation.')

        # get the classification list into a pandas dataframe

        for i, classification in enumerate(classifications):
            df = pd.DataFrame(data={'voxel_indices': list(classification.samples),
                                    'targets': list(classification.sa.targets),
                                    'predictions': list(classification.sa.predictions),
                                    'partitions': list(classification.sa.partitions),
                                    'participants': list(classification.sa.participant),
                                    'ds_type': [ds_type[idx]] * len(classification.sa.predictions)
                                    }
                              )
            dfs.append(df)

    # two helper functions for later use in a lamda function
    def hits(row):
        if row['predictions'] == row['targets']:
            return 1
        else:
            return 0

    def parts(row):
        if row['partitions'] == 1:
            return "train"
        elif row['partitions'] == 2:
            return "test"

    # get all folds into one dataframe, disregard the index
    all_classifications = pd.concat(dfs, ignore_index=True)
    # compute hits as correspondence between target and prediction
    all_classifications['hits'] = all_classifications.apply(lambda row: hits(row), axis=1)
    # assign string labels to testing and training partitions (instead of 1, 2)
    all_classifications['parts'] = all_classifications.apply(lambda row: parts(row), axis=1)
    # transform voxel coordinates from arrays (unhashable) into tuples
    all_classifications['voxel_indices'] = all_classifications['voxel_indices'].apply(tuple)

    # subset the dataset to contain only the testing data
    all_testing = all_classifications[all_classifications.parts == "test"]
    # check that every participant is in the data
    assert len(all_testing.participants.unique()) == 15
    # to check for correspondence between the sum of the two experiments confusion matrices,
    # do sth like this: len(all_testing[(all_testing['predictions'] == 'PPA') & (all_testing['targets'] == 'VIS')])

    # this counts hits per fold across experiments (2 if both experiments classified correctly,
    # 1 if 1 experiment classified correctly, 0 is none did). Also, append the targets per voxel.
    # we use 'min' here because aggregate needs any function, but targets are the same between
    # the experiments
    compare_exp = all_testing.groupby(['voxel_indices', 'participants']).agg(
        {'hits': 'sum', 'targets': 'min'}).reset_index().sort_values(['voxel_indices', 'participants'])
    all_testing_movie = all_testing[all_testing.ds_type == 'movie'].sort_values(
        ['voxel_indices', 'participants']).reset_index()
    all_testing_loc = all_testing[all_testing.ds_type == 'loc'].sort_values(
        ['voxel_indices', 'participants']).reset_index()
    # append movie and loc predictions to the dataframe
    compare_exp['pred_movie'] = all_testing_movie.predictions
    compare_exp['pred_loc'] = all_testing_loc.predictions

    # get the ROIS from the classification
    ROIS = np.unique(ds_movie.sa.targets)

    # there can't be values greater than two or lower than zero
    assert compare_exp.hits.max() <= 2
    assert compare_exp.hits.min() >= 0
    return compare_exp, all_testing, ROIS
Exemplo n.º 19
0
def project_betas(ds,
                  analysis,
                  eventdir,
                  results_dir,
                  annot_dir=None,
                  ):
    """
    Currently unused, but can become relevant later on. Will keep it in utils.py.
    Project beta values from 2nd analysis approach into the brain.
    Current problem: For first analysis type overlaps are excluded (for classification
    purposes), so we need to do the glm on data with overlaps. Thats why its a separate function
    and not integrated into the reversed analysis.
    :return: nifti images... many nifti images in a dictionary


    # project beta estimates back into a brain. I'll save-guard this function for now, because there is still
    # the unsolved overlap issue...
    project_beta = False
    if project_beta:
        print('going on to project resulting betas back into brain...')
        subs = np.unique(hrf_estimates_transposed.sa.participant)
        regs = hrf_estimates_transposed.fa.condition
        assert len(subs) > 0
        from collections import OrderedDict
        result_maps = OrderedDict()
        for sub in subs:
            print('...for subject {}...'.format(sub))
            result_maps[sub] = OrderedDict()
            # subset to participants dataframe
            data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub],
                              fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa,
                              sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa)
            # loop over regressors
            for idx, reg in enumerate(regs):
                result_map = buildremapper(ds_type,
                                           sub,
                                           data.samples.T[idx], # we select one beta vector per regressor
                                           )
                # populate a nested dict with the resulting nifti images
                # this guy has one nifti image per regressor for each subject
                result_maps[sub][reg] = result_map

        # Those result maps can be quick-and-dirty-plotted with
        # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz',
        # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'}
        # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args)
        # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg)
        # how do we know which regressors have highest betas for given ROI? averaging?
        #from collections import OrderedDict
        #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)]
        # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1]))

    """

    ds_transposed = ds.get_mapped(mv.TransposeMapper())
    assert ds_transposed.shape[0] < ds_transposed.shape[1]

    # get the appropriate event file. extract runs, chunks, timecoords from transposed dataset
    chunks, runs, runonsets = False, False, False

    if analysis == 'avmovie':
        ds_transposed, chunks, runs, runonsets = get_avmovietimes(ds_transposed)

    events_dicts = get_events(analysis=analysis,
                              eventdir=eventdir,
                              results_dir=results_dir,
                              chunks=chunks,
                              runs=runs,
                              runonsets=runonsets,
                              annot_dir=annot_dir,
                              multimatch=False)

    # step 1: do the glm on the data
    hrf_estimates = mv.fit_event_hrf_model(ds_transposed,
                                           events_dicts,
                                           time_attr='time_coords',
                                           condition_attr='condition',
                                           design_kwargs=dict(drift_model='blank'),
                                           glmfit_kwargs=dict(model='ols'),
                                           return_model=True)

    # lets save these
    mv.h5save(results_dir + '/' + 'betas_from_2nd_approach.hdf5', hrf_estimates)
    print('calculated the glm, saving results')

    # step 2: get the results back into a transposed form, because we want to have time points as features & extract the betas
    hrf_estimates_transposed = hrf_estimates.get_mapped(mv.TransposeMapper())
    assert hrf_estimates_transposed.samples.shape[0] > hrf_estimates_transposed.samples.shape[1]

    subs = np.unique(hrf_estimates_transposed.sa.participant)
    print('going on to project resulting betas back into brain...')

    regs = hrf_estimates_transposed.fa.condition
    assert len(subs) > 0
    from collections import OrderedDict
    result_maps = OrderedDict()
    for sub in subs:
        print('...for subject {}...'.format(sub))
        result_maps[sub] = OrderedDict()
        # subset to participants dataframe
        data = mv.Dataset(hrf_estimates_transposed.samples[hrf_estimates_transposed.sa.participant == sub],
                          fa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].fa,
                          sa=hrf_estimates_transposed[hrf_estimates_transposed.sa.participant == sub].sa)
        # loop over regressors
        for idx, reg in enumerate(regs):
            result_map = buildremapper(sub,
                                       data.samples.T[idx], # we select one beta vector per regressor
                                       ds_type='full', # currently we can only do this for the full ds.
                                       )
            # populate a nested dict with the resulting nifti images
            # this guy has one nifti image per regressor for each subject
            result_maps[sub][reg] = result_map

        # Those result maps can be quick-and-dirty-plotted with
        # mri_args = {'background' : 'sourcedata/tnt/sub-01/bold3Tp2/in_grpbold3Tp2/head.nii.gz',
        # 'background_mask': 'sub-01/ses-movie/anat/brain_mask_tmpl.nii.gz'}
        # fig = mv.plot_lightbox(overlay=result_maps['sub-01']['scene'], vlim=(1.5, None), **mri_args)
        # TODO: maybe save the result map? Done with map2nifti(ds, da).to_filename('blabla{}'.format(reg)
        # how do we know which regressors have highest betas for given ROI? averaging?
        #from collections import OrderedDict
        #betas = [np.mean(hrf_estimates.samples[i][hrf_estimates.fa.bilat_ROIs == 'PPA']) for i, reg in enumerate(regs)]
        # to get it sorted: OrderedDict(sorted(zip(regs, betas), key=lambda x:x[1]))

    return result_maps
Exemplo n.º 20
0
ws = [(1 / lf) / nf, (1 / hf) / nf]
b, a = signal.butter(5, ws, btype='band')
S = [signal.filtfilt(b, a, x) for x in ds.samples.T]
ds.samples = np.array(S).T
ds.samples = ds.samples.astype('float32')

#Create Event-related Dataset
onsets = np.arange(0, ds.nsamples - samples_size / TR, samples_size / TR)
events = []
for on in onsets:
    Ev = dict()
    Ev['onset'] = on
    Ev['duration'] = samples_size / TR
    Ev['target'] = on * TR
    Ev['subj'] = subj
    events.append(Ev)

evds = mvpa.eventrelated_dataset(ds, events=events)
evds.fa['1stidx'] = evds.fa.event_offsetidx == 0

#Save pymvpa-dataset as hdf5 in dataset directory
try:
    os.mkdir(os.path.join(path, 'dataset'))
except:
    print 'results directory already exists'

dsfile = subj + '_z' + str(zsc) + '_' + str(samples_size) + '_' + align
mvpa.h5save(os.path.join(path, 'dataset', dsfile + '.hdf5'),
            evds,
            compression='gzip')