Exemplo n.º 1
0
def get_ws_data(test_p, fold_shifted, included, hemi):
    print(
        '\nLoading fMRI GIFTI data for HA in test subj space and using {0} as test participant...'.format(test_p))
    train_resp = []
    for run in included:
        avg = []
        if run == 4:
            resp = mv.gifti_dataset(os.path.join(
                sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[run], run, hemi))).samples[4:-5, :]
        else:
            resp = mv.gifti_dataset(os.path.join(
                sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[run], run, hemi))).samples[4:-4, :]

        resp = resp[:, cortical_vertices[hemi] == 1]
        mv.zscore(resp, chunks_attr=None)
        print('train', run, resp.shape)

        train_resp.append(resp)

    if fold_shifted == 4:
        test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(
            test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-5, :]
    else:
        test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(
            test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-4, :]

    test_resp = test_resp[:, cortical_vertices[hemi] == 1]
    mv.zscore(test_resp, chunks_attr=None)
    print('test', fold_shifted, test_resp.shape)

    return train_resp, test_resp
Exemplo n.º 2
0
def get_ha_testsubj_data(test_p, mappers, fold_shifted, included, hemi):
    train_p = [x for x in participants if x != test_p]
    print('\nLoading fMRI GIFTI data for HA in test subj space and using {0} as test participant...'.format(test_p))
    train_resp = []
    for run in included:
        avg = []
        for participant in train_p:
            if run == 4:
                resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr_fmri[run], run, hemi))).samples[4:-5,:]
            else:
                resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(participant, tr_fmri[run], run, hemi))).samples[4:-4,:]

            mv.zscore(resp, chunks_attr=None)
            resp = mappers[participant].forward(resp)
            mv.zscore(resp, chunks_attr=None)
            resp = mappers[test_p].reverse(resp)
            resp = resp[:,cortical_vertices[hemi] == 1]
            mv.zscore(resp, chunks_attr=None)
            avg.append(resp)

        avg = np.mean(avg, axis=0)
        mv.zscore(avg, chunks_attr=None)
        print('train', run, avg.shape)
        train_resp.append(avg)

    if fold_shifted == 4:
        test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-5,cortical_vertices[hemi] == 1]
    else:
        test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-4,cortical_vertices[hemi] == 1]
    mv.zscore(test_resp, chunks_attr=None)

    print('test', fold_shifted, test_resp.shape)

    return train_resp, test_resp
Exemplo n.º 3
0
def get_voxel_coords(ds,
                     append=True,
                     zscore=True):
    """ This function is able to append coordinates (and their
    squares, etc., to a dataset. If append = False, it returns
    a dataset with only coordinates, and no fmri data. Such a
    dataset is useful for a sanity check of the classification.
    """
    ds_coords = ds.copy('deep')
    # Append voxel coordinates (and squares, cubes)
    products = np.column_stack((ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1],
                                ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 2],
                                ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2],
                                ds.sa.voxel_indices[:, 0] * ds.sa.voxel_indices[:, 1] * ds.sa.voxel_indices[:, 2]))
    coords = np.hstack((ds.sa.voxel_indices,
                        ds.sa.voxel_indices ** 2,
                        ds.sa.voxel_indices ** 3,
                        products))
    coords = mv.Dataset(coords, sa=ds_coords.sa)
    if zscore:
        mv.zscore(coords, chunks_attr='participant')
    ds_coords.fa.clear()
    if append:
        ds_coords.samples = np.hstack((ds_coords.samples, coords.samples))
    elif not append:
        ds_coords.samples = coords.samples
    return ds_coords
Exemplo n.º 4
0
def load_data(filename):
    ds = mv.gifti_dataset(filename)
    ds.sa.pop('intents')
    ds.sa['subjects'] = [participant] * ds.shape[0]
    ds.fa['node_indices'] = range(n_vertices)
    # z-score features across samples
    mv.zscore(ds, chunks_attr=None)

    return ds
Exemplo n.º 5
0
def preprocess_and_tmp_save_fmri(data_path,
                                 task,
                                 subj,
                                 model,
                                 tmp_path,
                                 group_mask=None):
    '''
    Generator for preprocessed fMRI runs from  one subject of Forrest Gump
    aligns to group template
    run-wise linear de-trending and z-scoring
    IN:
        data_path    -   string, path pointing to the Forrest Gump directory
        task        -   string, which part of the Forrest Gump dataset to load
        subj        -   int, subject to pre-process
        tmp_path    -   string, path to save the dataset temporarily to
    OUT:
        preprocessed fMRI samples per run'''
    from nipype.interfaces import fsl
    dhandle = mvpa.OpenFMRIDataset(data_path)

    flavor = 'dico_bold7Tp1_to_subjbold7Tp1'
    if group_mask is None:
        group_mask = os.path.join(data_path, 'sub{0:03d}'.format(subj),
                                  'templates', 'bold7Tp1', 'in_grpbold7Tp1',
                                  'brain_mask.nii.gz')
    mask_fname = os.path.join(data_path, 'sub{0:03d}'.format(subj),
                              'templates', 'bold7Tp1', 'brain_mask.nii.gz')
    for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
        run_ds = dhandle.get_bold_run_dataset(subj,
                                              task,
                                              run_id,
                                              chunks=run_id - 1,
                                              mask=mask_fname,
                                              flavor=flavor)
        filename = 'brain_subj_{}_run_{}.nii.gz'.format(subj, run_id)
        tmp_file = os.path.join(tmp_path, filename)
        save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_file)
        warp = fsl.ApplyWarp()
        warp.inputs.in_file = tmp_file
        warp.inputs.out_file = os.path.join(tmp_path, 'group_' + filename)
        warp.inputs.ref_file = os.path.join(data_path, 'templates',
                                            'grpbold7Tp1', 'brain.nii.gz')
        warp.inputs.field_file = os.path.join(data_path,
                                              'sub{0:03d}'.format(subj),
                                              'templates', 'bold7Tp1',
                                              'in_grpbold7Tp1',
                                              'subj2tmpl_warp.nii.gz')
        warp.inputs.interp = 'nn'
        warp.run()
        os.remove(tmp_file)
        run_ds = mvpa.fmri_dataset(os.path.join(tmp_path, 'group_' + filename),
                                   mask=group_mask,
                                   chunks=run_id - 1)
        mvpa.poly_detrend(run_ds, polyord=1)
        mvpa.zscore(run_ds)
        os.remove(os.path.join(tmp_path, 'group_' + filename))
        yield run_ds.samples.astype('float32')
def tmp_save_fmri(datapath, task, subj, model):
    dhandle = mvpa.OpenFMRIDataset(datapath)
    #mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz')

    flavor = 'dico_bold7Tp1_to_subjbold7Tp1'
    group_brain_mask = '/home/mboos/SpeechEncoding/brainmask_group_template.nii.gz'
    mask_fname = os.path.join(datapath, 'sub{0:03d}'.format(subj), 'templates', 'bold7Tp1', 'brain_mask.nii.gz')
    #mask_fname = '/home/mboos/SpeechEncoding/masks/epi_subj_{}.nii.gz'.format(subj)
    scratch_path = '/home/data/scratch/mboos/prepro/tmp/'
    for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
        run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor)
        filename = 'whole_brain_subj_{}_run_{}.nii.gz'.format(subj, run_id)
        tmp_path = scratch_path + filename
        save(unmask(run_ds.samples.astype('float32'), mask_fname), tmp_path)
        os.system('applywarp -i {0} -o {1} -r /home/data/psyinf/forrest_gump/anondata/templates/grpbold7Tp1/brain.nii.gz -w /home/data/psyinf/forrest_gump/anondata/sub{2:03}/templates/bold7Tp1/in_grpbold7Tp1/subj2tmpl_warp.nii.gz --interp=nn'.format(tmp_path, scratch_path+'group_'+filename,subj))
        os.remove(tmp_path)
        run_ds = mvpa.fmri_dataset(scratch_path+'group_'+filename, mask=group_brain_mask, chunks=run_id-1)
        mvpa.poly_detrend(run_ds, polyord=1)
        mvpa.zscore(run_ds)
        joblib.dump(run_ds.samples.astype('float32'),
                    '/home/data/scratch/mboos/prepro/tmp/whole_brain_subj_{}_run_{}.pkl'.format(subj, run_id))
        os.remove(scratch_path+'group_'+filename)
    return run_ds.samples.shape[1]
Exemplo n.º 7
0
def fx(dataset, behav_file, motion_file, polynomial_order, run_number):
    print("events      ->  %s" % behav_file)
    print("nuisance    ->  %s" % motion_file)

    tsds = dataset
    behav_txt = np.recfromcsv(behav_file, delimiter=',')
    events = [
        dict(onset=float(event['run_volume']) * 2.0,
             duration=6.0,
             targets=event['genre'],
             chunks=int(event['run']),
             stim=event['stim']) for event in behav_txt
    ]

    motion = np.loadtxt(motion_file)

    add_reg_names = ['tx', 'ty', 'tz', 'rx', 'ry', 'rz']
    hrf_estimates = eventrelated_dataset(
        tsds,
        events,
        model='hrf',
        time_attr='time_coords',
        condition_attr=(('targets', 'chunks')),
        design_kwargs=dict(drift_model='polynomial',
                           drift_order=polynomial_order,
                           hrf_model='canonical with derivative',
                           add_regs=motion,
                           add_reg_names=add_reg_names),
        glmfit_kwargs=dict(model='ar1'))

    #hrf_estimates.sa['subj'] = [subject] * len(hrf_estimates)
    hrf_estimates.sa['run'] = [run_number] * len(hrf_estimates)

    # zscore voxelwise
    # XXX `hrf_estimates` has no chunks! hence zscoring is not performed run-wise!
    zscore(hrf_estimates)
    return hrf_estimates
Exemplo n.º 8
0
def get_ha_common_data(test_p, mappers, fold_shifted, included, hemi):
    train_p = [x for x in participants if x != test_p]
    print("\n4. hyperalignment common data")
    print(
        'Loading fMRI GIFTI data for HA in test subj space and using {0} as test participant...'
        .format(test_p))
    train_resp = []
    for run in included:
        avg = []
        for participant in train_p:
            # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
            # UNCOMMENT LATER -
            # if run == 4:
            #     resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(
            #         participant, tr_fmri[run], run, hemi))).samples[4:-5, :]
            # else:
            #     resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(
            #         participant, tr_fmri[run], run, hemi))).samples[4:-4, :]
            # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
            if run == 4:
                resp = mv.gifti_dataset(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                        format(participant, tr_fmri[run], run,
                               hemi))).samples[4:-5, :]
            else:
                resp = mv.gifti_dataset(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                        format(participant, tr_fmri[run], run,
                               hemi))).samples[4:-4, :]
            mv.zscore(resp, chunks_attr=None)
            resp = mappers[participant].forward(resp)
            # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
            # DELETE LATER -
            # resp = resp[:, cortical_vertices[hemi] == 1]

            resp = resp[:, selected_node]
            #
            # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
            mv.zscore(resp, chunks_attr=None)
            avg.append(resp)

        avg = np.mean(avg, axis=0)
        mv.zscore(avg, chunks_attr=None)
        print('train', run, avg.shape)
        train_resp.append(avg)

    if fold_shifted == 4:
        test_resp = mv.gifti_dataset(
            os.path.join(
                sam_data_dir,
                '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(
                    test_p, tr_fmri[fold_shifted], fold_shifted,
                    hemi))).samples[4:-5, :]
    else:
        test_resp = mv.gifti_dataset(
            os.path.join(
                sam_data_dir,
                '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(
                    test_p, tr_fmri[fold_shifted], fold_shifted,
                    hemi))).samples[4:-4, :]
    mv.zscore(test_resp, chunks_attr=None)
    test_resp = mappers[participant].forward(test_resp)
    # test_resp = test_resp[:, cortical_vertices[hemi] == 1]
    test_resp = test_resp[:, selected_node]
    mv.zscore(test_resp, chunks_attr=None)

    print('test', fold_shifted, test_resp.shape)

    return train_resp, test_resp
Exemplo n.º 9
0
                        resp = mappers[participant].forward(
                            load_data(
                                os.path.join(
                                    sam_data_dir,
                                    '{0}_task-life_acq-346vol_run-02.{1}.tproject.gii'
                                    .format(participant,
                                            hemi))).samples[4:-12, :])
                    else:
                        resp = mappers[participant].forward(
                            load_data(
                                os.path.join(
                                    sam_data_dir,
                                    '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'
                                    .format(participant, tr[run], run,
                                            hemi))).samples[4:-7, :])
                    mv.zscore(resp, chunks_attr=None)
                    avg.append(resp)

                avg = np.mean(avg, axis=0)
                print(run, avg.shape)

                train_resp.append(avg)

            train_resp = np.concatenate(train_resp, axis=0)

            test_resp = mappers[test_p].forward(
                load_data(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                        format(test_p, tr[3], 3, hemi))).samples[4:-7, :])
Exemplo n.º 10
0
def createdataset(analysis, datadir, rootdir, anatdir, eventdir, zscore, rois):
    """
    Build an hdf5 dataset.
    """
    # initialize a list to load all datasets into:
    data_dss = []

    # get list of participants from root dir
    participants = sorted(
        [path.split('/')[-1] for path in glob(rootdir + 'sub-*')])
    assert len(participants) != 0
    print('The following participants were found: {}'.format(participants))

    for participant in participants:
        # count the number of participant substitutions necessary
        data_fns = sorted(glob(rootdir + participant + datadir))
        print(rootdir + participant + datadir)
        mask_fn = rootdir + participant + anatdir + 'brain_mask_tmpl.nii.gz'
        if analysis == 'localizer':
            assert len(data_fns) == 4
        if analysis == 'avmovie':
            assert len(data_fns) == 8
        data_ds = mv.vstack([
            mv.fmri_dataset(data_fn, mask=mask_fn, chunks=run)
            for run, data_fn in enumerate(data_fns)
        ])
        data_ds.fa['participant'] = [participant] * data_ds.shape[1]
        print('loaded data for participant {}.'.format(participant))

        # z scoring
        if analysis == 'localizer' and zscore == 'baseline-zscore':
            events = get_group_events(eventdir)
            means, stds = extract_baseline(events, data_ds)
            mv.zscore(data_ds, params=(means, stds), chunks_attr='chunks')
            print('finished baseline zscoring for participant {}.'.format(
                participant))
        elif zscore == 'zscore':
            mv.zscore(data_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        # roi masks
        all_rois_mask = np.array([['brain'] * data_ds.shape[1]]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                      '{0}_*_mask_tmpl.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                           'l{0}*mask_tmpl.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(rootdir + participant + anatdir + \
                                            'r{0}*mask_tmpl.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns
            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, data_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)

            # Ensure that number of voxels in ROI mask matches dataset dimension
            assert roi_mask.shape[1] == data_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to data feature attributes
            data_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, data_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1

                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, data_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, data_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches dataset dimension
                assert lat_roi_mask.shape[1] == data_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to data feature attributes
                data_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(rootdir + participant + anatdir +
                         '/{0}_*_mask_tmpl.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi

        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())

        # Assign roi mask to dataset feature attributes
        data_ds.fa['all_ROIs'] = all_rois_flat

        # join all datasets
        data_dss.append(data_ds)

    # save full dataset
    mv.h5save(outdir + '{}_groupdataset.hdf5'.format(analysis), data_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(data_dss)
    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(outdir + '{}_groupdataset_transposed.hdf5'.format(analysis), ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Exemplo n.º 11
0
print sub

behav_file = 'all_attr.txt'

print roi
bold_fname = os.path.join(cwd1, sub, 'betas_sub' + sub +
                          '.nii.gz')  #full functional timeseries (beta series)
mask_fname = os.path.join(cwd1, sub, 'native_masks',
                          roi)  #chooses the mask for a given ROI
attr_fname = os.path.join(cwd1, sub,
                          behav_file)  #codes stimuli number and run number
attr = mvpa2.SampleAttributes(attr_fname)  #loads attributes into pymvpa
ds = mvpa2.fmri_dataset(
    bold_fname, targets=attr.targets, chunks=attr.chunks, mask=mask_fname
)  #loads dataset with appropriate mask and attribute information
mvpa2.zscore(ds, chunks_attr='chunks')  #z-scores dataset per run
ds = mvpa2.remove_nonfinite_features(ds)
ds = mvpa2.remove_invariant_features(ds)

stimuli = []
for i in range(0, 16):
    stimuli.append(ds.uniquetargets[i])
#create all possible pairs for confusion matrix
pair_list = list(itertools.combinations(range(len(stimuli)), 2))
pair_list2 = []
for x in range(0, len(pair_list)):
    pair_list2.append([stimuli[pair_list[x][0]], stimuli[pair_list[x][1]]])

test_accs, val_accs, nfs_per_chunk, val_chunks = clf_wrapper(ds, pair_list2)
corrs1, pvals1, dist_list1, nf_list_dists1 = dist_wrapper(
    ds, pair_list2, test_accs, val_accs, val_chunks)
Exemplo n.º 12
0
# 0. parameters ____________________________________________________________
# main_dir = '/Users/h/Documents/projects_local/cluster_projects'
main_dir = '/dartfs-hpc/scratch/psyc164/groupXHD'
sub_name = sys.argv[1]
hemisphere = sys.argv[2]
task_list = ['beh', 'tax']
radii = 10.0

# 1. create pymvpa dataset  ____________________________________________________________
ds_q2 = generate_dataset.create_dataset(sub_name, main_dir, task_list,
                                        hemisphere)
ds_q2.sa['chunks'] = ds_q2.sa['beh']
ds_q2.sa['targets'] = ds_q2.sa['tax']
#del ds_q2.sa['intents']
del ds_q2.sa['stats']
mv.zscore(ds_q2, chunks_attr='chunks')

n_medial = {'lh': 3486, 'rh': 3491}
medial_wall = np.where(np.sum(ds_q2.samples == 0, axis=0) == 200)[0].tolist()
cortical_vertices = np.where(
    np.sum(ds_q2.samples == 0, axis=0) < 200)[0].tolist()
assert len(medial_wall) == n_medial[hemisphere]
n_vertices = ds_q2.fa.node_indices.shape[0]
assert len(medial_wall) + len(cortical_vertices) == n_vertices

# 2. cross validation __________________________________________________________________
# setting up classifier
clf = mv.LinearCSVMC(space='targets')
cv = mv.CrossValidation(clf, mv.NFoldPartitioner(attr='chunks'))
cv_within = cv(ds_q2)
cv_within
Exemplo n.º 13
0
def normalize_dataset(ds, **kwargs):

    import collections
    import fractions

    mean = False
    normalization = 'feature'
    chunk_number = None

    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
        if (arg == 'chunk_number'):
            chunk_number = kwargs[arg]

    n_targets = np.array(
        [value for value in collections.Counter(ds.targets).values()]).min()

    if chunk_number == 'adaptive':
        n_chunks = np.max(
            [fractions.gcd(n_targets, i) for i in np.arange(2, 10)])
        if n_chunks == 1:
            n_chunks = 4
    elif isinstance(chunk_number, int):
        n_chunks = int(chunk_number)

    if chunk_number != None:
        argsort = np.argsort(ds.targets)
        chunks = []
        for _ in ds.uniquetargets:
            chunk = np.linspace(0,
                                n_chunks,
                                n_targets,
                                endpoint=False,
                                dtype=np.int)
            chunks.append(chunk)

        ds.chunks[argsort] = np.hstack(chunks)

    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num'])
        ds = ds.get_mapped(avg_mapper)

    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)  #, param_est=('targets', ['fixation']))

    if normalization == 'sample' or normalization == 'both':
        # Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]

        ds.samples[np.isnan(ds.samples)] = 0

    # Find event related stuff
    ds.a.events = find_events(  #event= ds.sa.event_num, 
        chunks=ds.sa.chunks, targets=ds.sa.targets)

    return ds
Exemplo n.º 14
0
	ds.sa['chunks'] = np.ones(ds.nsamples)*i
	print ds.shape
	Ds.append(ds)
	
ds = mvpa.vstack(Ds)
ds.samples = ds.samples.astype('float32')

#Detrending and MC removal
mvpa.poly_detrend(ds,
		  opt_regs=['mc_'+param  for param in mc],
		  chunks_attr='chunks'
		  )
		  
#Voxelwise Zscore
if zsc:
	mvpa.zscore(ds)

#bandpass filter
nf = 0.5/TR
ws = [(1/lf)/nf, (1/hf)/nf]
b, a = signal.butter(5, ws, btype='band')
S = [signal.filtfilt(b, a, x) for x in ds.samples.T]
ds.samples = np.array(S).T
ds.samples = ds.samples.astype('float32')

#Create Event-related Dataset
onsets = np.arange(0,ds.nsamples - samples_size/TR, samples_size/TR)
events = []
for on in onsets:
	Ev = dict()
	Ev['onset'] = on
Exemplo n.º 15
0
def preprocessing(ds_p, ref_space, warp_files, mask_p, **kwargs):
    mask_p = str(mask_p)
    ref_space = str(ref_space)
    detrending = kwargs.get('detrending', None)
    use_zscore = kwargs.get('use_zscore', True)

    use_events = kwargs.get('use_events', False)
    anno_dir = kwargs.get('anno_dir', None)
    use_glm_estimates = kwargs.get('use_glm_estimates', False)
    targets = kwargs.get('targets', None)
    event_offset = kwargs.get('event_offset', None)
    event_dur = kwargs.get('event_dur', None)
    save_disc_space = kwargs.get('save_disc_space', True)

    rois = kwargs.get('rois', None)

    vp_num_str = ds_p[(ds_p.find("sub") + 4):(ds_p.find("sub") + 6)]
    warp_file = [warp_file for warp_file in warp_files if warp_file.find(vp_num_str) != -1][0]
    part_info = find_participant_info(ds_p)

    if save_disc_space:
        temp_file_add = "tmp_warped_data_file.nii.gz"
        temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp", temp_file_add))
    else:
        temp_file_add = "sub-{}_{}-movie_run-{}_warped_file.nii.gz".format(part_info[0],
                                                                           part_info[1],
                                                                           int(part_info[2]))
        temp_file = str((Path.cwd().parents[0]).joinpath("data", "tmp",
                                                         "runs_for_testing",
                                                         temp_file_add)) # change

    warped_ds = warp_image(ds_p, ref_space, warp_file, temp_file, save_disc_space=save_disc_space)

    while not os.path.exists(warped_ds):
        time.sleep(5)

    if os.path.isfile(warped_ds):
        if mask_p is not None:
            mask = get_adjusted_mask(mask_p, ref_space)
            if rois is not None:
                ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask, add_fa=rois)
            else:
                ds = mvpa.fmri_dataset(samples=warped_ds, mask=mask)
        else:
            if rois is not None:
                ds = mvpa.fmri_dataset(samples=warped_ds, add_fa=rois)
            else:
                ds = mvpa.fmri_dataset(samples=warped_ds)

    ds.sa['participant'] = [int(part_info[0])]
    ds.sa["movie_type"] = [part_info[1]]
    ds.sa['chunks'] = [int(part_info[2])]
    if detrending is not None:
        detrender = mvpa.PolyDetrendMapper(polyord=1)
        ds = ds.get_mapped(detrender)
    if use_zscore:
        mvpa.zscore(ds)
    if use_events:
        events = create_event_dict(anno_dir, ds_p, targets, event_dur)
        if use_glm_estimates:
            ds = mvpa.fit_event_hrf_model(ds, events, time_attr='time_coords',
                                          condition_attr='targets')

        else:
            ds = mvpa.extract_boxcar_event_samples(ds, events=events, time_attr='time_coords',
                                                   match='closest', event_offset=event_offset,
                                                   event_duration=event_dur, eprefix='event',
                                                   event_mapper=None)
            ds = fix_info_after_events(ds)
    return ds
        print "masked data has", dataset.shape[1], "voxels in each of", dataset.shape[0], "volumes"
        print "... which means that", round(
            100 - 100 * dataset.shape[1] / N.product(dataset.a.voxel_dim)
        ), "% of the voxels were masked out"
        print "of", dataset.shape[1], "remaining features ..."
        print "summary of conditions/volumes\n", datetime.datetime.now()
        print dataset.summary_targets()

        # DETREND
        print "detrending (remove slow drifts in signal, and jumps between runs) ...", datetime.datetime.now()  # can be very memory intensive!
        M.poly_detrend(dataset, polyord=1, chunks_attr="chunks")  # linear detrend
        print "... done", datetime.datetime.now()

        # ZSCORE
        print "zscore normalising (give all voxels similar variance) ...", datetime.datetime.now()
        M.zscore(dataset, chunks_attr="chunks", param_est=("targets", ["base"]))  # zscoring, on basis of rest periods
        print "... done", datetime.datetime.now()
        # P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

        pickleFile = gzip.open(preprocessedCache, "wb", 5)
        pickle.dump(dataset, pickleFile)

        # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
    print "averaging over trials ...", datetime.datetime.now()
    dataset = dataset.get_mapped(M.mean_group_sample(attrs=["chunks", "targets"]))
    print "... only", dataset.shape[0], "cases left now"
    dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5)

    # print '\n\n\n'
    # print dataset.targets
    # print len(dataset.targets)
Exemplo n.º 17
0
#mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj' + str(subj) + 'bold.nii.gz')

#get openFMRI dataset handle
dhandle = mvpa.OpenFMRIDataset(datapath)
model = 1
task = 1

T3 = False
#get openFMRI dataset handle
dhandle = mvpa.OpenFMRIDataset(datapath)
model = 1
task = 1

datapath = os.path.join('/home','data','psyinf','forrest_gump','anondata')
#boldlist = sorted(glob.glob(os.path.join(datapath,'task002*')))
flavor = 'dico_bold7Tp1_to_subjbold7Tp1'

for subj in xrange(1,20):
    mask_fname = os.path.join('/home','mboos','SpeechEncoding','temporal_lobe_mask_brain_subj%02dbold.nii.gz' % subj)

    #load and save all datasets
    run_datasets = []
    for run_id in dhandle.get_task_bold_run_ids(task)[subj]:
        run_ds = dhandle.get_bold_run_dataset(subj,task,run_id,chunks=run_id-1,mask=mask_fname,flavor=flavor)
        run_datasets.append(run_ds)
    s1ds = mvpa.vstack(run_datasets)
    mvpa.poly_detrend(s1ds,polyord=1,chunks_attr='chunks')
    mvpa.zscore(s1ds)
    s1ds.save(os.path.join('/home','mboos','SpeechEncoding','PreProcessed','FG_subj' + str(subj) + 'pp.gzipped.hdf5'),compression=9)

Exemplo n.º 18
0
def preprocess_dataset(ds, type_, **kwargs):
    """
    Preprocess the dataset: detrending of single run and for chunks, the zscoring is also
    done by chunks and by run.
    
    Parameters
    ----------
    ds : Dataset
        The dataset to be preprocessed
    type : string
        The experiment to be processed
    kwargs : dict
        mean_samples - boolean : if samples should be averaged
        label_included - list : list of labels to be included in the dataset
        label_dropped - string : label to be dropped (rest, fixation)
        
    Returns
    -------
    Dataset
        the processed dataset
    
    
    """
    mean = False
    normalization = 'feature'
    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'label_included'):
            label_included = kwargs[arg].split(',')
        if (arg == 'label_dropped'):
            label_dropped = kwargs[arg] 
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
                
    
    logger.info('Dataset preprocessing: Detrending...')
    if len(np.unique(ds.sa['file'])) != 1:
        poly_detrend(ds, polyord = 1, chunks_attr = 'file')
    poly_detrend(ds, polyord = 1, chunks_attr = 'chunks')
    
    
    if  label_dropped != 'None':
        logger.info('Removing labels...')
        ds = ds[ds.sa.targets != label_dropped]
    if  label_included != ['all']:
        ds = ds[np.array([l in label_included for l in ds.sa.targets],
                          dtype='bool')]
        
               
    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num']) 
        ds = ds.get_mapped(avg_mapper)     
    
    
    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)#, param_est=('targets', ['fixation']))
    
    if normalization == 'sample' or normalization == 'both':
        #Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]
        
        ds.samples[np.isnan(ds.samples)] = 0
    
    
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Exemplo n.º 19
0
        Pstim = get_stim_for_test_fold(run)
        if run == 4:
            Presp = mv.gifti_dataset(
                os.path.join(
                    sam_data_dir,
                    '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                    format(p, tr[run], run, h))).samples[4:-14, :]
        else:
            Presp = mv.gifti_dataset(
                os.path.join(
                    sam_data_dir,
                    '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                    format(p, tr[run], run, h))).samples[4:-7, :]

        mv.zscore(Presp, chunks_attr=None)
        forward_resp = mappers[p].forward(Presp)
        mv.zscore(forward_resp, chunks_attr=None)

        print("Loaded stim and resp data. Doing prediction...")
        pred = np.dot(Pstim, wt)
        print(Pstim.shape, wt.shape, pred.shape)

        mv.zscore(pred, chunks_attr=None)
        forward_pred = mappers[p].forward(pred)
        mv.zscore(forward_pred, chunks_attr=None)
        print(forward_pred.shape, Presp.shape)

        # Find prediction correlations
        nnpred = np.nan_to_num(forward_pred)
        corrs = np.nan_to_num(
Exemplo n.º 20
0
                            '{0}_task-life_acq-{1}vol_run-0{2}.lh.tproject.gii'
                            .format(participant, tr[run], run))))
            else:
                rh = load_data(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.rh.tproject.gii'.
                        format(participant, tr[run], run)))
                lh = load_data(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.lh.tproject.gii'.
                        format(participant, tr[run], run)))
            rh = rh.samples
            lh = lh.samples
            mv.zscore(rh, chunks_attr=None)
            mv.zscore(lh, chunks_attr=None)

            ds = np.concatenate((rh, lh), axis=1)

            fc += 2
            print('file {0}/{1} loaded'.format(fc, len(participants) * 2))

            fmri.append(ds)

        print(len(fmri))

        print('Computing pairwise correlations...')
        n_nodes = fmri[0].shape[1]
        n_sub = len(fmri)
        print(n_nodes)
qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius))
qe.train(ref_ds)

# load all subject 
nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*'))
print('Loading participant data from: ')
print(chamats)
mysubs = nfiles[0:nsubs]

# import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets
dss = []
for sub in range(len(mysubs)):
    ds = mv.Dataset(np.load(mysubs[sub]))
    ds.fa['voxel_indices'] = range(ds.shape[1])
    #ds.sa['chunks'] = np.repeat(i,cnx_tx)
    mv.zscore(ds, chunks_attr=None)
    dss.append(ds)
    
    
print('Number of data sets in dss: ')
print(len(dss))
print('Size of data sets: ')
print(dss[0].shape)
    
# create SL hyperalignment instance
hyper = SearchlightHyperalignment(
    queryengine=qe,
    compute_recon=False, # We don't need to project back from common space to subject space
    nproc=1, 
    nblocks=N_BLOCKS,
    dtype ='float64'
Exemplo n.º 22
0
    print("Loaded movie data for participant {0}".format(participant))

    # Perform linear detrending per chunk
    mv.poly_detrend(movie_ds, polyord=polyord, chunks_attr='chunks')

    # Perform low-pass filtering per chunk
    movie_ds.samples = clean(movie_ds.samples,
                             sessions=movie_ds.sa.chunks,
                             low_pass=.1,
                             high_pass=None,
                             t_r=2.0,
                             detrend=False,
                             standardize=False)

    # Z-score movie time series per chunk
    mv.zscore(movie_ds, chunks_attr='chunks')
    print("Finished preprocessing (detrending, z-scoring) for participant {0}".
          format(participant))

    # Load ROI masks and attach them to movie data
    all_rois_mask = np.array([['brain'] * movie_ds.shape[1]]).astype('S10')
    for roi in rois:
        # Get filenames for potential right and left ROI masks
        if roi == 'VIS':
            roi_fns = sorted(
                glob(base_dir + participant + anat_dir +
                     '{0}_*_mask_tmpl.nii.gz'.format(roi)))
        else:
            left_roi_fns = sorted(
                glob(base_dir + participant + anat_dir +
                     'l{0}_*_mask_tmpl.nii.gz'.format(roi)))
Exemplo n.º 23
0
featsel = SelectKBest(f_classif, k=K_FEATS)
clf = LogisticRegression(penalty='l2', multi_class='ovr', solver='liblinear')

#################
##  LOAD DATA  ##
#################

map_ds_dict, mem_ds_dict = load_data(MASK)

# preprocess
for d in [mem_ds_dict, map_ds_dict]:
    for ds in d.values():
        mvpa2.remove_invariant_features(ds)
        mvpa2.poly_detrend(ds, polyord=1, chunks_attr='chunks')
        mvpa2.zscore(ds, chunks_attr='chunks')

##############################################################
##  build and convert to common space using hyperalignment  ##
##############################################################

# select features based on localizer data
fsel_masks = [
    featsel.fit(ds.samples, ds.targets).get_support()
    for ds in map_ds_dict.values()
]
# apply feature selection to all data (localizer and memory)
fs_mapds_list = [
    ds[:, mask] for ds, mask in zip(map_ds_dict.values(), fsel_masks)
]
fs_memds_list = [
# load in all of the data into the dataframe
targets = range(1, 21)
ds = None
for x in range(len(files)):
    chunks = [x + 1] * 20
    d = mv.gifti_dataset(files[x], chunks=chunks, targets=targets)
    d.sa['conditions'] = conditions
    d.sa['taxonomy'] = taxonomy
    d.sa['behavior'] = behavior
    if ds is None:
        ds = d
    else:
        ds = mv.vstack((ds, d))
ds.fa['node_indices'] = range(ds.shape[1])
# zscore all of our samples
mv.zscore(ds, chunks_attr='chunks', dtype='float32')
# load in surgace and get searchlight query
radius = 10
surface = mv.surf.read(join(data_path, '{0}.pial.gii'.format(hemi)))
# this is an arbitrary radius and distance metric!
query = mv.SurfaceQueryEngine(surface, radius, distance_metric='dijkstra')
# based off PyMVPA tutorial
clf = mv.LinearNuSVMC(space=predict)

cv = mv.CrossValidation(clf,
                        mv.NFoldPartitioner(attr=train_on),
                        errorfx=lambda p, t: np.mean(p == t),
                        enable_ca=['stats'])
searchlights = mv.Searchlight(cv,
                              queryengine=query,
                              postproc=mv.mean_sample(),
Exemplo n.º 25
0
        print 'of', dataset.shape[1], 'remaining features ...'
        print 'summary of conditions/volumes\n', datetime.datetime.now()
        print dataset.summary_targets()

        # DETREND
        print 'detrending (remove slow drifts in signal, and jumps between runs) ...', datetime.datetime.now(
        )  # can be very memory intensive!
        M.poly_detrend(dataset, polyord=1,
                       chunks_attr='chunks')  # linear detrend
        print '... done', datetime.datetime.now()

        # ZSCORE
        print 'zscore normalising (give all voxels similar variance) ...', datetime.datetime.now(
        )
        M.zscore(dataset,
                 chunks_attr='chunks',
                 param_est=('targets',
                            ['base']))  # zscoring, on basis of rest periods
        print '... done', datetime.datetime.now()
        #P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

        pickleFile = gzip.open(preprocessedCache, 'wb', 5)
        pickle.dump(dataset, pickleFile)

    # AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
    print 'averaging over trials ...', datetime.datetime.now()
    dataset = dataset.get_mapped(
        M.mean_group_sample(attrs=['chunks', 'targets']))
    print '... only', dataset.shape[0], 'cases left now'
    dataset.chunks = N.mod(N.arange(0, dataset.shape[0]), 5)

    # print '\n\n\n'
		print 'functional input has',dataset.a.voxel_dim,'voxels of dimesions',dataset.a.voxel_eldim,'mm'
		print '... or',N.product(dataset.a.voxel_dim),'voxels per volume'
		print 'masked data has',dataset.shape[1],'voxels in each of',dataset.shape[0],'volumes'
		print '... which means that',round(100-100*dataset.shape[1]/N.product(dataset.a.voxel_dim)),'% of the voxels were masked out'
		print 'of',dataset.shape[1],'remaining features ...'
		print 'summary of conditions/volumes\n',datetime.datetime.now()
		print dataset.summary_targets()

		# DETREND
		print 'detrending (remove slow drifts in signal, and jumps between runs) ...',datetime.datetime.now() # can be very memory intensive!
		M.poly_detrend(dataset, polyord=1, chunks_attr='chunks') # linear detrend
		print '... done',datetime.datetime.now()

		# ZSCORE
		print 'zscore normalising (give all voxels similar variance) ...',datetime.datetime.now()
		M.zscore(dataset, chunks_attr='chunks', param_est=('targets', ['base'])) # zscoring, on basis of rest periods
		print '... done',datetime.datetime.now()
		#P.savefig(os.path.join(sessionPath,'pyMVPAimportDetrendZscore.png'))

		pickleFile = gzip.open(preprocessedCache, 'wb', 5);
		pickle.dump(dataset, pickleFile);

	# AVERAGE OVER MULTIPLE VOLUMES IN A SINGLE TRIAL
	print 'averaging over trials ...',datetime.datetime.now()
	dataset = dataset.get_mapped(M.mean_group_sample(attrs=['chunks','targets']))
	print '... only',dataset.shape[0],'cases left now'
	dataset.chunks = N.mod(N.arange(0,dataset.shape[0]),5)

	# print '\n\n\n'
	# print dataset.targets
	# print len(dataset.targets)
def clf_wrapper(ds):
    #1 of 2 main functions. computes the crossvalidated classifier we base our inferences on
    test_accs_per_chunk = []  #array with accuracies for each test fold
    val_accs_per_chunk = []
    nfs_all_chunks = []
    val_chunks = []
    mvpa2.zscore(ds, chunks_attr='chunks')
    for chunk in chunk_num:  #does LOOCV
        val_chunk = np.random.choice(ds.uniquechunks[ds.uniquechunks != chunk])
        val_chunks.append(val_chunk)

        def optimize_clf(nf, optimize=1):
            acc_list = [
            ]  #array with accuracies for each pair within each LOOVC fold

            def nf_select(nf):
                #fselector = mvpa2.FixedNElementTailSelector(np.round(nf), tail='upper',mode='select', sort=False)
                #sbfs = mvpa2.SensitivityBasedFeatureSelection(mvpa2.OneWayAnova(), fselector, enable_ca=['sensitivities'], auto_train=True)
                if (optimize >= 1):
                    not_test_ds = ds[ds.chunks != chunk]
                    val_ds = not_test_ds[not_test_ds.chunks == val_chunk]
                    train_ds = not_test_ds[not_test_ds.chunks != val_chunk]
                    #sbfs.train(train_ds)
                    #train_ds = sbfs(train_ds)
                    #val_ds = sbfs(val_ds)
                    return train_ds, val_ds
                elif (optimize == 0):
                    train_ds = ds[ds.chunks != chunk]
                    test_ds = ds[ds.chunks == chunk]
                    #sbfs.train(train_ds)
                    #train_ds = sbfs(train_ds)
                    #test_ds = sbfs(test_ds)
                    return train_ds, test_ds

            train_ds, not_train_ds = nf_select(nf)
            for y in range(0, len(pair_list2)):

                def mask(y, train_ds, test_ds):
                    stim_mask1 = (train_ds.targets == pair_list2[y][0]) | (
                        train_ds.targets == pair_list2[y][1])
                    stim_mask2 = (not_train_ds.targets == pair_list2[y][0]) | (
                        not_train_ds.targets == pair_list2[y][1])
                    ds_temp_train = train_ds[stim_mask1]
                    ds_temp_not_train = not_train_ds[stim_mask2]
                    return ds_temp_train, ds_temp_not_train

                ds_temp_train, ds_temp_not_train = mask(
                    y, train_ds, not_train_ds)
                #clf = mvpa2.LinearNuSVMC(nu=0.5)#defines a classifier, linear SVM in this case
                clf = NuSVC(nu=0.5, max_iter=2000)
                #clf = SKLLearnerAdapter(knn)
                #clf = SKLLearnerAdapter(linear_model.SGDClassifier())
                #clf.train(ds_temp_train)
                clf.fit(ds_temp_train.samples, ds_temp_train.targets)
                #predictions = clf.predict(ds_temp_not_train)
                predictions = clf.predict(ds_temp_not_train.samples)
                labels = ds_temp_not_train.targets
                bool_vec = predictions == labels
                acc_list.append(
                    sum(bool_vec) /
                    float(len(bool_vec)))  #array with accuracies for each pair
            if (optimize == 1):
                #print len(acc_list)
                #print np.mean(acc_list)
                return 1 - np.mean(acc_list)
            else:
                #print np.mean(acc_list), 'for chunk:', chunk
                return acc_list

        #f = minimize_scalar(optimize_clf, bounds=(1, 1500), method='bounded', options={'maxiter': 20, 'xatol': 1e-05})
        #nf = int(np.round(f.x))
        nf = ds.shape[1]
        #val_accs = optimize_clf(nf, optimize=2)
        #val_accs_per_chunk.append(val_accs)
        test_accs = optimize_clf(nf, optimize=0)
        test_accs_per_chunk.append(test_accs)
        nfs_all_chunks.append(nf)
    #return test_accs_per_chunk,0,nfs_all_chunks,0;
    corrs1, pvals1, dist_list1, nf_list_dists1 = dist_wrapper(
        ds, test_accs, 0, 0)
    return corrs1, pvals1
Exemplo n.º 28
0
                h, run, t))
            for p in participants:
                wt = np.load(
                    os.path.join(
                        data_dir, '{0}-leftout{1}/{2}/{3}/weights.npy'.format(
                            t, run, p, h)))
                Pstim = get_stim_for_test_fold(run)
                Presp = mv.gifti_dataset(
                    os.path.join(
                        sam_data_dir,
                        '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.
                        format(p, tr[run], run, h))).samples

                pred = np.dot(Pstim, wt)

                mv.zscore(pred, chunks_attr=None)
                forward_pred = mappers[p].forward(pred)
                mv.zscore(pred, chunks_attr=None)

                # Find prediction correlations
                nnpred = np.nan_to_num(forward_pred)
                corrs = np.nan_to_num(
                    np.array([
                        np.corrcoef(Presp[:, ii], nnpred[:, ii].ravel())[0, 1]
                        for ii in range(Presp.shape[1])
                    ]))
                np.save(
                    os.path.join(
                        data_dir,
                        '{0}-leftout{1}/{2}/{3}/forward_corrs.npy'.format(
                            t, run, p, h)), corrs)
Exemplo n.º 29
0
def normalize_dataset(ds, **kwargs):
    
    import collections
    import fractions
    
    mean = False
    normalization = 'feature'
    chunk_number = None
    
    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
        if (arg == 'chunk_number'):
            chunk_number = kwargs[arg]
        
    n_targets = np.array([value for value in collections.Counter(ds.targets).values()]).min()
    
    if chunk_number == 'adaptive':
        n_chunks = np.max([fractions.gcd(n_targets, i) for i in np.arange(2, 10)])
        if n_chunks == 1:
            n_chunks = 4
    elif isinstance(chunk_number, int):
        n_chunks = int(chunk_number)
        
    if chunk_number != None:
        argsort = np.argsort(ds.targets)
        chunks = []
        for _ in ds.uniquetargets:
            chunk = np.linspace(0, n_chunks, n_targets, endpoint=False, dtype=np.int)
            chunks.append(chunk)
        
        
        ds.chunks[argsort] = np.hstack(chunks)
        
    
    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num']) 
        ds = ds.get_mapped(avg_mapper)     
    
    
    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)#, param_est=('targets', ['fixation']))
    
    
    if normalization == 'sample' or normalization == 'both':
        # Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]
        
        ds.samples[np.isnan(ds.samples)] = 0
    
    
    # Find event related stuff
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Exemplo n.º 30
0
def buildadataset(zscore, rois, event_path=None):
    """buildataset() will build and save participant-specific hdf5 datasets
    with all rois from preprocessed objectcategories data, stack them for a
    group dataset and save them, and transpose the group dataset and save it.
    The parameter 'zscore' determines whether and what kind of z-scoring
    should be performed."""
    print('I am building a dataset with the following option: {}.'.format(
        zscore))

    # get the participants and rois
    participants = sorted(
        [path.split('/')[-1] for path in glob(base_dir + 'sub-*')])
    localizer_dss = []

    for participant in participants:
        localizer_fns = sorted(glob(base_dir + participant + locdir + \
                                    '{}_task-objectcategories_run-*_space-custom-subject_desc-highpass_bold.nii.gz'.format(
                                        participant)))
        mask_fn = base_dir + participant + anat_dir + 'brain_mask.nii.gz'
        assert len(localizer_fns) == 4
        localizer_ds = mv.vstack([
            mv.fmri_dataset(localizer_fn, mask=mask_fn, chunks=run)
            for run, localizer_fn in enumerate(localizer_fns)
        ])

        localizer_ds.fa['participant'] = [participant] * localizer_ds.shape[1]
        print('loaded localizer data for participant {}.'.format(participant))

        # zscore the data with means and standard deviations from no-stimulation
        # periods
        if zscore == 'custom':
            events = get_group_events(event_path)
            means, stds = extract_baseline(events, localizer_ds)
            # zscore stuff
            mv.zscore(localizer_ds, params=(means, stds), chunks_attr='chunks')
            print('finished custom zscoring for participant {}.'.format(
                participant))
        elif zscore == 'z-score':
            mv.zscore(localizer_ds, chunks_attr='chunks')
            print('finished zscoring for participant {}.'.format(participant))
        else:
            print('I did not zscore.')

        all_rois_mask = np.array([['brain'] * localizer_ds.shape[1]
                                  ]).astype('S10')
        for roi in rois:
            # Get filenames for potential right and left ROI masks
            if roi == 'VIS':
                roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                      '{0}_*_mask.nii.gz'.format(roi)))
            else:
                left_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                           'l{0}_*_mask.nii.gz'.format(roi)))
                right_roi_fns = sorted(glob(base_dir + participant + anat_dir + \
                                            'r{0}_*_mask.nii.gz'.format(roi)))
                roi_fns = left_roi_fns + right_roi_fns

            if len(roi_fns) == 0:
                print(
                    "ROI {0} does not exist for participant {1}; appending all zeros"
                    .format(roi, participant))
                roi_mask = np.zeros((1, localizer_ds.shape[1]))
            elif len(roi_fns) == 1:
                roi_mask = mv.fmri_dataset(roi_fns[0], mask=mask_fn).samples
            elif len(roi_fns) > 1:
                # Add ROI maps into single map
                print("Combining {0} {1} masks for participant {2}".format(
                    len(roi_fns), roi, participant))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                # Set any voxels that might exceed 1 to 1
                roi_mask = np.where(roi_mask > 0, 1, 0)
            # Ensure that number of voxels in ROI mask matches localizer data
            assert roi_mask.shape[1] == localizer_ds.shape[1]
            # Flatten mask into list
            roi_flat = list(roi_mask.ravel())
            # Assign ROI mask to localizer data feature attributes
            localizer_ds.fa[roi] = roi_flat
            # Get lateralized masks as well
            if roi != 'VIS':
                lat_roi_mask = np.zeros((1, localizer_ds.shape[1]))
                if len(left_roi_fns) == 1:
                    left_roi_mask = np.where(
                        mv.fmri_dataset(left_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) > 1:
                    left_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(left_roi_fn, mask=mask_fn).samples
                            for left_roi_fn in left_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[left_roi_mask > 0] = 1
                elif len(left_roi_fns) == 0:
                    left_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                if len(right_roi_fns) == 1:
                    right_roi_mask = np.where(
                        mv.fmri_dataset(right_roi_fns[0], mask=mask_fn).samples
                        > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) > 1:
                    right_roi_mask = np.where(
                        np.sum([
                            mv.fmri_dataset(right_roi_fn, mask=mask_fn).samples
                            for right_roi_fn in right_roi_fns
                        ],
                               axis=0) > 0, 1, 0)
                    lat_roi_mask[right_roi_mask > 0] = 2
                elif len(right_roi_fns) == 0:
                    right_roi_mask = np.zeros((1, localizer_ds.shape[1]))

                # Ensure that number of voxels in ROI mask matches localizer data
                assert lat_roi_mask.shape[1] == localizer_ds.shape[1]
                # Flatten mask into list
                lat_roi_flat = list(lat_roi_mask.ravel())
                # Assign ROI mask to localizer data feature attributes
                localizer_ds.fa['lat_' + roi] = lat_roi_flat
                # Check existing feature attribute for all ROIS for overlaps
                np.place(all_rois_mask,
                         ((left_roi_mask > 0) | (right_roi_mask > 0))
                         & (all_rois_mask != 'brain'), 'overlap')

                all_rois_mask[(left_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'left {0}'.format(roi)
                all_rois_mask[(right_roi_mask > 0) & (
                    all_rois_mask != 'overlap')] = 'right {0}'.format(roi)
            elif roi == 'VIS':
                roi_fns = sorted(
                    glob(base_dir + participant + anat_dir +
                         '/{0}_*_mask.nii.gz'.format(roi)))
                roi_mask = np.sum([
                    mv.fmri_dataset(roi_fn, mask=mask_fn).samples
                    for roi_fn in roi_fns
                ],
                                  axis=0)
                np.place(all_rois_mask,
                         (roi_mask > 0) & (all_rois_mask != 'brain'),
                         'overlap')
                all_rois_mask[(roi_mask > 0)
                              & (all_rois_mask != 'overlap')] = roi
        # Flatten mask into list
        all_rois_flat = list(all_rois_mask.ravel())
        # Assign ROI mask to localizer data feature attributes
        localizer_ds.fa['all_ROIs'] = all_rois_flat

        if save_per_subject:
            mv.h5save(base_dir + participant + locdir + \
                  '{}_ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5'.format(
                      participant), localizer_ds)
            print('Saved dataset for {}.'.format(participant))
        # join all datasets
        localizer_dss.append(localizer_ds)

    # save full dataset
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass.hdf5',
        localizer_dss)
    print('saved the collection of all subjects datasets.')
    # squish everything together
    ds_wide = mv.hstack(localizer_dss)

    # transpose the dataset, time points are now features
    ds = mv.Dataset(ds_wide.samples.T,
                    sa=ds_wide.fa.copy(),
                    fa=ds_wide.sa.copy())
    mv.h5save(
        results_dir +
        'ses-localizer_task-objectcategories_ROIs_space-custom-subject_desc-highpass_transposed.hdf5',
        ds)
    print('Transposed the group-dataset and saved it.')
    return ds
Exemplo n.º 31
0
        target_list.append(targets)
        sample_list.append(samples)
        chunk_list.append(subject_list_chunks)
        band_list.append(band_)

targets = np.hstack(target_list)
samples = np.vstack(sample_list)
chunks = np.hstack(chunk_list)

zsamples = sc_zscore(samples, axis=0)

ds = dataset_wizard(zsamples, targets=targets, chunks=chunks)
ds.sa['band'] = np.hstack(band_list)

zscore(ds)

n_folds = [4]
#n_feats = np.arange(10, 1220, 50)
n_feats = [10]
err_lst = []

sens_mat = []

for k in n_folds:
    for n in n_feats:
        #fsel = SensitivityBasedFeatureSelection(OneWayAnova(),
        #                                       FixedNElementTailSelector(
        #                                                               n, mode = 'select',tail = 'upper'))
        '''
        rfesvm_split = SplitClassifier(LinearCSVMC())
Exemplo n.º 32
0
        ds = ds[4:]
    ds.sa['chunks'] = np.ones(ds.nsamples) * i
    print ds.shape
    Ds.append(ds)

ds = mvpa.vstack(Ds)
ds.samples = ds.samples.astype('float32')

#Detrending and MC removal
mvpa.poly_detrend(ds,
                  opt_regs=['mc_' + param for param in mc],
                  chunks_attr='chunks')

#Voxelwise Zscore
if zsc:
    mvpa.zscore(ds)

#bandpass filter
nf = 0.5 / TR
ws = [(1 / lf) / nf, (1 / hf) / nf]
b, a = signal.butter(5, ws, btype='band')
S = [signal.filtfilt(b, a, x) for x in ds.samples.T]
ds.samples = np.array(S).T
ds.samples = ds.samples.astype('float32')

#Create Event-related Dataset
onsets = np.arange(0, ds.nsamples - samples_size / TR, samples_size / TR)
events = []
for on in onsets:
    Ev = dict()
    Ev['onset'] = on