def _get_trained_queryengines(self, datasets, queryengine, radius, ref_ds): """Helper to return trained query engine(s), either list of one or one per each dataset if queryengine is None then IndexQueryEngine based on radius is created """ ndatasets = len(datasets) if queryengine: if isinstance(queryengine, (list, tuple)): queryengines = queryengine if len(queryengines) != ndatasets: raise ValueError( "%d query engines were specified although %d datasets " "provided" % (len(queryengines), ndatasets)) _shpaldebug("Training provided query engines") for qe, ds in zip(queryengines, datasets): qe.train(ds) else: queryengine.train(datasets[ref_ds]) queryengines = [queryengine] else: _shpaldebug('No custom query engines were provided. Setting up the ' 'volumetric query engine on voxel_indices.') queryengine = IndexQueryEngine(voxel_indices=Sphere(radius)) queryengine.train(datasets[ref_ds]) queryengines = [queryengine] return queryengines
def local_random_affine_transformations( ds, distort_seeds, distort_neighbor, space, scale_fac=100, shift_fac=10): """Distort a dataset in the local neighborhood of selected features. This function is similar to ``random_affine_transformation()``, but applies multiple random affine transformations to a spatially constraint local neighborhood. Parameters ---------- ds : Dataset The to be transformed/distorted dataset. distort_seeds : list(int) This a sequence of feature ids (corresponding to the input dataset) that serve as anchor to determine the local neighborhood for a distortion. The number of seeds also determines the number of different local distortions that are going to be applied. distort_neighbor : callable And object that when called with a coordinate generates a sequence of coordinates that comprise its neighborhood (see e.g. ``Sphere()``). space : str Name of the feature attribute of the input dataset that contains the relevant feature coordinates (e.g. 'voxel_indices'). scale_fac : float See ``random_affine_transformation()`` shift_fac : float See ``random_affine_transformation()`` Returns ------- Dataset A dataset derived from the input dataset with added local distortions. """ # which dataset attributes to aggregate random_stats = ['random_rotation', 'random_scale', 'random_shift'] kwa = {space: distort_neighbor} qe = IndexQueryEngine(**kwa) qe.train(ds) ds_distorted = ds.copy() for stat in random_stats: ds_distorted.a[stat + 's'] = {} # for each seed region for seed in distort_seeds: # select the neighborhood for this seed # take data from the distorted dataset to avoid # 'loosing' previous distortions distort_ids = qe[seed] ds_d = random_affine_transformation( ds_distorted[:, distort_ids], scale_fac=scale_fac, shift_fac=shift_fac) # recover the distortions stats for this seed for stat in random_stats: ds_distorted.a[stat + 's'].value[seed] = ds_d.a[stat].value # put the freshly distorted data back ds_distorted.samples[:, distort_ids] = ds_d.samples return ds_distorted
def test_1d_multispace_searchlight(self): ds = Dataset([np.arange(6)]) ds.fa['coord1'] = np.repeat(np.arange(3), 2) # add a second space to the dataset ds.fa['coord2'] = np.tile(np.arange(2), 3) measure = lambda x: "+".join([str(x) for x in x.samples[0]]) # simply select each feature once res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0', '1', '2', '3', '4', '5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(0), coord2=Sphere(1)), nproc=1)(ds) assert_array_equal(res.samples, [['0+1', '0+1', '2+3', '2+3', '4+5', '4+5']]) res = Searchlight(measure, IndexQueryEngine(coord1=Sphere(1), coord2=Sphere(0)), nproc=1)(ds) assert_array_equal(res.samples, [['0+2', '1+3', '0+2+4', '1+3+5', '2+4', '3+5']])
def sphere_searchlight(datameasure, radius=1, center_ids=None, space='voxel_indices', **kwargs): """Creates a `Searchlight` to run a scalar `Measure` on all possible spheres of a certain size within a dataset. The idea for a searchlight algorithm stems from a paper by :ref:`Kriegeskorte et al. (2006) <KGB06>`. Parameters ---------- datameasure : callable Any object that takes a :class:`~mvpa2.datasets.base.Dataset` and returns some measure when called. radius : int All features within this radius around the center will be part of a sphere. Radius is in grid-indices, i.e. ``1`` corresponds to all immediate neighbors, regardless of the physical distance. center_ids : list of int List of feature ids (not coordinates) the shall serve as sphere centers. Alternatively, this can be the name of a feature attribute of the input dataset, whose non-zero values determine the feature ids. By default all features will be used (it is passed as ``roi_ids`` argument of Searchlight). space : str Name of a feature attribute of the input dataset that defines the spatial coordinates of all features. **kwargs In addition this class supports all keyword arguments of its base-class :class:`~mvpa2.measures.base.Measure`. Notes ----- If `Searchlight` is used as `SensitivityAnalyzer` one has to make sure that the specified scalar `Measure` returns large (absolute) values for high sensitivities and small (absolute) values for low sensitivities. Especially when using error functions usually low values imply high performance and therefore high sensitivity. This would in turn result in sensitivity maps that have low (absolute) values indicating high sensitivities and this conflicts with the intended behavior of a `SensitivityAnalyzer`. """ # build a matching query engine from the arguments kwa = {space: Sphere(radius)} qe = IndexQueryEngine(**kwa) # init the searchlight with the queryengine return Searchlight(datameasure, queryengine=qe, roi_ids=center_ids, **kwargs)
def sphere_gnbsearchlight(gnb, generator, radius=1, center_ids=None, space='voxel_indices', *args, **kwargs): """Creates a `GNBSearchlight` to assess :term:`cross-validation` classification performance of GNB on all possible spheres of a certain size within a dataset. The idea of taking advantage of naiveness of GNB for the sake of quick searchlight-ing stems from Francisco Pereira (paper under review). Parameters ---------- radius : float All features within this radius around the center will be part of a sphere. center_ids : list of int List of feature ids (not coordinates) the shall serve as sphere centers. By default all features will be used (it is passed roi_ids argument for Searchlight). space : str Name of a feature attribute of the input dataset that defines the spatial coordinates of all features. **kwargs In addition this class supports all keyword arguments of :class:`~mvpa2.measures.gnbsearchlight.GNBSearchlight`. Notes ----- If any `BaseSearchlight` is used as `SensitivityAnalyzer` one has to make sure that the specified scalar `Measure` returns large (absolute) values for high sensitivities and small (absolute) values for low sensitivities. Especially when using error functions usually low values imply high performance and therefore high sensitivity. This would in turn result in sensitivity maps that have low (absolute) values indicating high sensitivities and this conflicts with the intended behavior of a `SensitivityAnalyzer`. """ # build a matching query engine from the arguments kwa = {space: Sphere(radius)} qe = IndexQueryEngine(**kwa) # init the searchlight with the queryengine return GNBSearchlight(gnb, generator, qe, roi_ids=center_ids, *args, **kwargs)
def test_add_center_fa(self): # just a smoke test pretty much ds = datasets['3dsmall'].copy() # check that we do not mark anything as center whenever there is none def check_no_center(ds): assert (not np.any(ds.fa.center)) return 1.0 # or just a single center in our case def check_center(ds): assert (np.sum(ds.fa.center) == 1) return 1.0 for n, check in [(HollowSphere(1, 0), check_no_center), (Sphere(0), check_center), (Sphere(1), check_center)]: Searchlight(check, IndexQueryEngine(myspace=n), add_center_fa='center')(ds) # and no changes to original ds data, etc assert_array_equal(datasets['3dsmall'].fa.keys(), ds.fa.keys()) assert_array_equal(datasets['3dsmall'].samples, ds.samples)
N_BLOCKS=128 cnx_tx = 489 toutdir = os.path.join(basedir, 'transformation_matrices', 'iterative_cha_olp4cbp_mappers' +'_' + 'subs-' + str(nsubs) + '_'+ 'radius1-10_radius2-' + str(HYPERALIGNMENT_RADIUS) + '.hdf5.gz') print(toutdir) # load nifti as a pymvpa dataset and then use that as ref_ds in the queryengine definition # mask with data in brainmask so only 170k (size of connectomes) voxels are included ref_ds = fmri_dataset(os.path.join(helperfiles,'newbrainmask.nii'), mask=os.path.join(helperfiles,'newbrainmask.nii')) print('Size of brain mask:') print(str(len(ref_ds.fa.voxel_indices))) # set searchlight sphere radius sl_radius = HYPERALIGNMENT_RADIUS #create query engine qe = IndexQueryEngine(voxel_indices=Sphere(sl_radius)) qe.train(ref_ds) # load all subject nfiles = glob.glob(os.path.join(chamats, '*commonspace_subs*')) print('Loading participant data from: ') print(chamats) mysubs = nfiles[0:nsubs] # import connectomes into pymvpa dataset, zscore, then add chunks and voxel indices, append to list of datsets dss = [] for sub in range(len(mysubs)): ds = mv.Dataset(np.load(mysubs[sub])) ds.fa['voxel_indices'] = range(ds.shape[1]) #ds.sa['chunks'] = np.repeat(i,cnx_tx) mv.zscore(ds, chunks_attr=None)
def test_voxel_selection(self): '''Compare surface and volume based searchlight''' ''' Tests to see whether results are identical for surface-based searchlight (just one plane; Euclidean distnace) and volume-based searchlight. Note that the current value is a float; if it were int, it would specify the number of voxels in each searchlight''' radius = 10. '''Define input filenames''' epi_fn = os.path.join(pymvpa_dataroot, 'bold.nii.gz') maskfn = os.path.join(pymvpa_dataroot, 'mask.nii.gz') ''' Use the EPI datafile to define a surface. The surface has as many nodes as there are voxels and is parallel to the volume 'slice' ''' vg = volgeom.from_any(maskfn, mask_volume=True) aff = vg.affine nx, ny, nz = vg.shape[:3] '''Plane goes in x and y direction, so we take these vectors from the affine transformation matrix of the volume''' plane = surf.generate_plane(aff[:3, 3], aff[:3, 0], aff[:3, 1], nx, ny) ''' Simulate pial and white matter as just above and below the central plane ''' normal_vec = aff[:3, 2] outer = plane + normal_vec inner = plane + -normal_vec ''' Combine volume and surface information ''' vsm = volsurf.VolSurfMaximalMapping(vg, outer, inner) ''' Run voxel selection with specified radius (in mm), using Euclidean distance measure ''' surf_voxsel = surf_voxel_selection.voxel_selection(vsm, radius, distance_metric='e') '''Define the measure''' # run_slow=True would give an actual cross-validation with meaningful # accuracies. Because this is a unit-test only the number of voxels # in each searchlight is tested. run_slow = False if run_slow: meas = CrossValidation(GNB(), OddEvenPartitioner(), errorfx=lambda p, t: np.mean(p == t)) postproc = mean_sample else: meas = _Voxel_Count_Measure() postproc = lambda x: x ''' Surface analysis: define the query engine, cross validation, and searchlight ''' surf_qe = SurfaceVerticesQueryEngine(surf_voxsel) surf_sl = Searchlight(meas, queryengine=surf_qe, postproc=postproc) ''' new (Sep 2012): also test 'simple' queryengine wrapper function ''' surf_qe2 = disc_surface_queryengine(radius, maskfn, inner, outer, plane, volume_mask=True, distance_metric='euclidean') surf_sl2 = Searchlight(meas, queryengine=surf_qe2, postproc=postproc) ''' Same for the volume analysis ''' element_sizes = tuple(map(abs, (aff[0, 0], aff[1, 1], aff[2, 2]))) sph = Sphere(radius, element_sizes=element_sizes) kwa = {'voxel_indices': sph} vol_qe = IndexQueryEngine(**kwa) vol_sl = Searchlight(meas, queryengine=vol_qe, postproc=postproc) '''The following steps are similar to start_easy.py''' attr = SampleAttributes( os.path.join(pymvpa_dataroot, 'attributes_literal.txt')) mask = surf_voxsel.get_mask() dataset = fmri_dataset(samples=os.path.join(pymvpa_dataroot, 'bold.nii.gz'), targets=attr.targets, chunks=attr.chunks, mask=mask) if run_slow: # do chunkswise linear detrending on dataset poly_detrend(dataset, polyord=1, chunks_attr='chunks') # zscore dataset relative to baseline ('rest') mean zscore(dataset, chunks_attr='chunks', param_est=('targets', ['rest'])) # select class face and house for this demo analysis # would work with full datasets (just a little slower) dataset = dataset[np.array( [l in ['face', 'house'] for l in dataset.sa.targets], dtype='bool')] '''Apply searchlight to datasets''' surf_dset = surf_sl(dataset) surf_dset2 = surf_sl2(dataset) vol_dset = vol_sl(dataset) surf_data = surf_dset.samples surf_data2 = surf_dset2.samples vol_data = vol_dset.samples assert_array_equal(surf_data, surf_data2) assert_array_equal(surf_data, vol_data)
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError( 'cross-validation payload requires --learner and --partitioner' ) # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") ds = arg2ds(args.data) if not args.ds_preproc_fx is None: ds = args.ds_preproc_fx(ds) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose( 3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res