def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds):
        """Helper to return trained query engine(s), either list of one or one per each dataset

        if queryengine is None then IndexQueryEngine based on radius is created
        """
        ndatasets = len(datasets)
        if queryengine:
            if isinstance(queryengine, (list, tuple)):
                queryengines = queryengine
                if len(queryengines) != ndatasets:
                    raise ValueError(
                        "%d query engines were specified although %d datasets "
                        "provided" % (len(queryengines), ndatasets))
                _shpaldebug("Training provided query engines")
                for qe, ds in zip(queryengines, datasets):
                    qe.train(ds)
            else:
                queryengine.train(datasets[ref_ds])
                queryengines = [queryengine]
        else:
            _shpaldebug('No custom query engines were provided. Setting up the '
                        'volumetric query engine on voxel_indices.')
            queryengine = IndexQueryEngine(voxel_indices=Sphere(radius))
            queryengine.train(datasets[ref_ds])
            queryengines = [queryengine]
        return queryengines
示例#2
0
def local_random_affine_transformations(
        ds, distort_seeds, distort_neighbor, space, scale_fac=100,
        shift_fac=10):
    """Distort a dataset in the local neighborhood of selected features.

    This function is similar to ``random_affine_transformation()``, but applies
    multiple random affine transformations to a spatially constraint local
    neighborhood.

    Parameters
    ----------
    ds : Dataset
      The to be transformed/distorted dataset.
    distort_seeds : list(int)
      This a sequence of feature ids (corresponding to the input dataset) that
      serve as anchor to determine the local neighborhood for a distortion. The
      number of seeds also determines the number of different local distortions
      that are going to be applied.
    distort_neighbor : callable
      And object that when called with a coordinate generates a sequence of
      coordinates that comprise its neighborhood (see e.g. ``Sphere()``).
    space : str
      Name of the feature attribute of the input dataset that contains the
      relevant feature coordinates (e.g. 'voxel_indices').
    scale_fac : float
      See ``random_affine_transformation()``
    shift_fac : float
      See ``random_affine_transformation()``

    Returns
    -------
    Dataset
      A dataset derived from the input dataset with added local distortions.
    """
    # which dataset attributes to aggregate
    random_stats = ['random_rotation', 'random_scale', 'random_shift']
    kwa = {space: distort_neighbor}
    qe = IndexQueryEngine(**kwa)
    qe.train(ds)
    ds_distorted = ds.copy()
    for stat in random_stats:
        ds_distorted.a[stat + 's'] = {}
    # for each seed region
    for seed in distort_seeds:
        # select the neighborhood for this seed
        # take data from the distorted dataset to avoid
        # 'loosing' previous distortions
        distort_ids = qe[seed]
        ds_d = random_affine_transformation(
                               ds_distorted[:, distort_ids],
                               scale_fac=scale_fac,
                               shift_fac=shift_fac)
        # recover the distortions stats for this seed
        for stat in random_stats:
            ds_distorted.a[stat + 's'].value[seed] = ds_d.a[stat].value
        # put the freshly distorted data back
        ds_distorted.samples[:, distort_ids] = ds_d.samples
    return ds_distorted
 def test_1d_multispace_searchlight(self):
     ds = Dataset([np.arange(6)])
     ds.fa['coord1'] = np.repeat(np.arange(3), 2)
     # add a second space to the dataset
     ds.fa['coord2'] = np.tile(np.arange(2), 3)
     measure = lambda x: "+".join([str(x) for x in x.samples[0]])
     # simply select each feature once
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']])
     res = Searchlight(measure,
                       IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)),
                       nproc=1)(ds)
     assert_array_equal(res.samples,
                        [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
示例#4
0
def sphere_searchlight(datameasure,
                       radius=1,
                       center_ids=None,
                       space='voxel_indices',
                       **kwargs):
    """Creates a `Searchlight` to run a scalar `Measure` on
    all possible spheres of a certain size within a dataset.

    The idea for a searchlight algorithm stems from a paper by
    :ref:`Kriegeskorte et al. (2006) <KGB06>`.

    Parameters
    ----------
    datameasure : callable
      Any object that takes a :class:`~mvpa2.datasets.base.Dataset`
      and returns some measure when called.
    radius : int
      All features within this radius around the center will be part
      of a sphere. Radius is in grid-indices, i.e. ``1`` corresponds
      to all immediate neighbors, regardless of the physical distance.
    center_ids : list of int
      List of feature ids (not coordinates) the shall serve as sphere
      centers. Alternatively, this can be the name of a feature attribute
      of the input dataset, whose non-zero values determine the feature
      ids.  By default all features will be used (it is passed as ``roi_ids``
      argument of Searchlight).
    space : str
      Name of a feature attribute of the input dataset that defines the spatial
      coordinates of all features.
    **kwargs
      In addition this class supports all keyword arguments of its
      base-class :class:`~mvpa2.measures.base.Measure`.

    Notes
    -----
    If `Searchlight` is used as `SensitivityAnalyzer` one has to make
    sure that the specified scalar `Measure` returns large
    (absolute) values for high sensitivities and small (absolute) values
    for low sensitivities. Especially when using error functions usually
    low values imply high performance and therefore high sensitivity.
    This would in turn result in sensitivity maps that have low
    (absolute) values indicating high sensitivities and this conflicts
    with the intended behavior of a `SensitivityAnalyzer`.
    """
    # build a matching query engine from the arguments
    kwa = {space: Sphere(radius)}
    qe = IndexQueryEngine(**kwa)
    # init the searchlight with the queryengine
    return Searchlight(datameasure,
                       queryengine=qe,
                       roi_ids=center_ids,
                       **kwargs)
def sphere_gnbsearchlight(gnb,
                          generator,
                          radius=1,
                          center_ids=None,
                          space='voxel_indices',
                          *args,
                          **kwargs):
    """Creates a `GNBSearchlight` to assess :term:`cross-validation`
    classification performance of GNB on all possible spheres of a
    certain size within a dataset.

    The idea of taking advantage of naiveness of GNB for the sake of
    quick searchlight-ing stems from Francisco Pereira (paper under
    review).

    Parameters
    ----------
    radius : float
      All features within this radius around the center will be part
      of a sphere.
    center_ids : list of int
      List of feature ids (not coordinates) the shall serve as sphere
      centers. By default all features will be used (it is passed
      roi_ids argument for Searchlight).
    space : str
      Name of a feature attribute of the input dataset that defines the spatial
      coordinates of all features.
    **kwargs
      In addition this class supports all keyword arguments of
      :class:`~mvpa2.measures.gnbsearchlight.GNBSearchlight`.

    Notes
    -----
    If any `BaseSearchlight` is used as `SensitivityAnalyzer` one has to make
    sure that the specified scalar `Measure` returns large
    (absolute) values for high sensitivities and small (absolute) values
    for low sensitivities. Especially when using error functions usually
    low values imply high performance and therefore high sensitivity.
    This would in turn result in sensitivity maps that have low
    (absolute) values indicating high sensitivities and this conflicts
    with the intended behavior of a `SensitivityAnalyzer`.
    """
    # build a matching query engine from the arguments
    kwa = {space: Sphere(radius)}
    qe = IndexQueryEngine(**kwa)
    # init the searchlight with the queryengine
    return GNBSearchlight(gnb,
                          generator,
                          qe,
                          roi_ids=center_ids,
                          *args,
                          **kwargs)
    def test_add_center_fa(self):
        # just a smoke test pretty much
        ds = datasets['3dsmall'].copy()

        # check that we do not mark anything as center whenever there is none
        def check_no_center(ds):
            assert (not np.any(ds.fa.center))
            return 1.0

        # or just a single center in our case
        def check_center(ds):
            assert (np.sum(ds.fa.center) == 1)
            return 1.0

        for n, check in [(HollowSphere(1, 0), check_no_center),
                         (Sphere(0), check_center), (Sphere(1), check_center)]:
            Searchlight(check,
                        IndexQueryEngine(myspace=n),
                        add_center_fa='center')(ds)
            # and no changes to original ds data, etc
            assert_array_equal(datasets['3dsmall'].fa.keys(), ds.fa.keys())
            assert_array_equal(datasets['3dsmall'].samples, ds.samples)
N_BLOCKS=128
cnx_tx = 489
toutdir = os.path.join(basedir, 'transformation_matrices', 'iterative_cha_olp4cbp_mappers' +'_' + 'subs-' + str(nsubs) + '_'+ 'radius1-10_radius2-' +  str(HYPERALIGNMENT_RADIUS) + '.hdf5.gz')
print(toutdir)

# load nifti as a pymvpa dataset and then use that as ref_ds in the queryengine definition
# mask with data in brainmask so only 170k (size of connectomes) voxels are included
ref_ds = fmri_dataset(os.path.join(helperfiles,'newbrainmask.nii'), mask=os.path.join(helperfiles,'newbrainmask.nii'))
print('Size of brain mask:')
print(str(len(ref_ds.fa.voxel_indices)))

# set searchlight sphere radius
sl_radius = HYPERALIGNMENT_RADIUS

#create query engine
qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius))
qe.train(ref_ds)

# load all subject 
nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*'))
print('Loading participant data from: ')
print(chamats)
mysubs = nfiles[0:nsubs]

# import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets
dss = []
for sub in range(len(mysubs)):
    ds = mv.Dataset(np.load(mysubs[sub]))
    ds.fa['voxel_indices'] = range(ds.shape[1])
    #ds.sa['chunks'] = np.repeat(i,cnx_tx)
    mv.zscore(ds, chunks_attr=None)
    def test_voxel_selection(self):
        '''Compare surface and volume based searchlight'''
        '''
        Tests to see whether results are identical for surface-based
        searchlight (just one plane; Euclidean distnace) and volume-based
        searchlight.

        Note that the current value is a float; if it were int, it would
        specify the number of voxels in each searchlight'''

        radius = 10.
        '''Define input filenames'''
        epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz')
        maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz')
        '''
        Use the EPI datafile to define a surface.
        The surface has as many nodes as there are voxels
        and is parallel to the volume 'slice'
        '''
        vg = volgeom.from_any(maskfn, mask_volume=True)

        aff = vg.affine
        nx, ny, nz = vg.shape[:3]
        '''Plane goes in x and y direction, so we take these vectors
        from the affine transformation matrix of the volume'''
        plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny)
        '''
        Simulate pial and white matter as just above and below
        the central plane
        '''
        normal_vec = aff[:3, 2]
        outer = plane + normal_vec
        inner = plane + -normal_vec
        '''
        Combine volume and surface information
        '''
        vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner)
        '''
        Run voxel selection with specified radius (in mm), using
        Euclidean distance measure
        '''
        surf_voxsel = surf_voxel_selection.voxel_selection(vsm,
                                                           radius,
                                                           distance_metric='e')
        '''Define the measure'''

        # run_slow=True would give an actual cross-validation with meaningful
        # accuracies. Because this is a unit-test only the number of voxels
        # in each searchlight is tested.
        run_slow = False

        if run_slow:
            meas = CrossValidation(GNB(),
                                   OddEvenPartitioner(),
                                   errorfx=lambda p, t: np.mean(p == t))
            postproc = mean_sample
        else:
            meas = _Voxel_Count_Measure()
            postproc = lambda x: x
        '''
        Surface analysis: define the query engine, cross validation,
        and searchlight
        '''
        surf_qe = SurfaceVerticesQueryEngine(surf_voxsel)
        surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc)
        '''
        new (Sep 2012): also test 'simple' queryengine wrapper function
        '''

        surf_qe2 = disc_surface_queryengine(radius,
                                            maskfn,
                                            inner,
                                            outer,
                                            plane,
                                            volume_mask=True,
                                            distance_metric='euclidean')
        surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc)
        '''
        Same for the volume analysis
        '''
        element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2])))
        sph = Sphere(radius, element_sizes=element_sizes)
        kwa = {'voxel_indices': sph}

        vol_qe = IndexQueryEngine(**kwa)
        vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc)
        '''The following steps are similar to start_easy.py'''
        attr = SampleAttributes(
            os.path.join(pymvpa_dataroot, 'attributes_literal.txt'))

        mask = surf_voxsel.get_mask()

        dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot,
                                                    'bold.nii.gz'),
                               targets=attr.targets,
                               chunks=attr.chunks,
                               mask=mask)

        if run_slow:
            # do chunkswise linear detrending on dataset

            poly_detrend(dataset, polyord=1, chunks_attr='chunks')

            # zscore dataset relative to baseline ('rest') mean
            zscore(dataset,
                   chunks_attr='chunks',
                   param_est=('targets', ['rest']))

        # select class face and house for this demo analysis
        # would work with full datasets (just a little slower)
        dataset = dataset[np.array(
            [l in ['face', 'house'] for l in dataset.sa.targets],
            dtype='bool')]
        '''Apply searchlight to datasets'''
        surf_dset = surf_sl(dataset)
        surf_dset2 = surf_sl2(dataset)
        vol_dset = vol_sl(dataset)

        surf_data = surf_dset.samples
        surf_data2 = surf_dset2.samples
        vol_data = vol_dset.samples

        assert_array_equal(surf_data, surf_data2)
        assert_array_equal(surf_data, vol_data)
示例#9
0
def run(args):
    if os.path.isfile(args.payload) and args.payload.endswith('.py'):
        measure = script2obj(args.payload)
    elif args.payload == 'cv':
        if args.cv_learner is None or args.cv_partitioner is None:
            raise ValueError(
                'cross-validation payload requires --learner and --partitioner'
            )
        # get CV instance
        measure = get_crossvalidation_instance(
            args.cv_learner, args.cv_partitioner, args.cv_errorfx,
            args.cv_sampling_repetitions, args.cv_learner_space,
            args.cv_balance_training, args.cv_permutations,
            args.cv_avg_datafold_results, args.cv_prob_tail)
    else:
        raise RuntimeError("this should not happen")
    ds = arg2ds(args.data)
    if not args.ds_preproc_fx is None:
        ds = args.ds_preproc_fx(ds)
    # setup neighborhood
    # XXX add big switch to allow for setting up surface-based neighborhoods
    from mvpa2.misc.neighborhood import IndexQueryEngine
    qe = IndexQueryEngine(**dict(args.neighbors))
    # determine ROIs
    rids = None  # all by default
    aggregate_fx = args.aggregate_fx
    if args.roi_attr is not None:
        # first figure out which roi features should be processed
        if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys():
            # name of an attribute -> pull non-zeroes
            rids = ds.fa[args.roi_attr[0]].value.nonzero()[0]
        else:
            # an expression?
            from .cmd_select import _eval_attr_expr
            rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0]

    seed_ids = None
    if args.scatter_rois is not None:
        # scatter_neighborhoods among available ids if was requested
        from mvpa2.misc.neighborhood import scatter_neighborhoods
        attr, nb = args.scatter_rois
        coords = ds.fa[attr].value
        if rids is not None:
            # select only those which were chosen by ROI
            coords = coords[rids]
        _, seed_ids = scatter_neighborhoods(nb, coords)
        if aggregate_fx is None:
            # no custom one given -> use default "fill in" function
            aggregate_fx = _fill_in_scattered_results
            if args.enable_ca is None:
                args.enable_ca = ['roi_feature_ids']
            elif 'roi_feature_ids' not in args.enable_ca:
                args.enable_ca += ['roi_feature_ids']

    if seed_ids is None:
        roi_ids = rids
    else:
        if rids is not None:
            # we had to sub-select by scatterring among available rids
            # so we would need to get original ids
            roi_ids = rids[seed_ids]
        else:
            # scattering happened on entire feature-set
            roi_ids = seed_ids

    verbose(
        3, 'Attempting %i ROI analyses' %
        ((roi_ids is None) and ds.nfeatures or len(roi_ids)))

    from mvpa2.measures.searchlight import Searchlight

    sl = Searchlight(measure,
                     queryengine=qe,
                     roi_ids=roi_ids,
                     nproc=args.nproc,
                     results_backend=args.multiproc_backend,
                     results_fx=aggregate_fx,
                     enable_ca=args.enable_ca,
                     disable_ca=args.disable_ca)
    # XXX support me too!
    #                 add_center_fa
    #                 tmp_prefix
    #                 nblocks
    #                 null_dist
    # run
    res = sl(ds)
    if (seed_ids is not None) and ('mapper' in res.a):
        # strip the last mapper link in the chain, which would be the seed ID selection
        res.a['mapper'] = res.a.mapper[:-1]
    # XXX create more output
    # and store
    ds2hdf5(res, args.output, compression=args.hdf5_compression)
    return res