def run(args): from mvpa2.base.hdf5 import h5save ds = None if not args.txt_data is None: verbose(1, "Load data from TXT file '%s'" % args.txt_data) samples = _load_from_txt(args.txt_data) ds = Dataset(samples) elif not args.npy_data is None: verbose(1, "Load data from NPY file '%s'" % args.npy_data) samples = _load_from_npy(args.npy_data) ds = Dataset(samples) elif not args.mri_data is None: verbose(1, "Load data from MRI image(s) %s" % args.mri_data) from mvpa2.datasets.mri import fmri_dataset vol_attr = dict() if not args.add_vol_attr is None: # XXX add a way to use the mapper of an existing dataset to # add a volume attribute without having to load the entire # mri data again vol_attr = dict(args.add_vol_attr) if not len(args.add_vol_attr) == len(vol_attr): warning("--vol-attr option with duplicate attribute name: " "check arguments!") verbose(2, "Add volumetric feature attributes: %s" % vol_attr) ds = fmri_dataset(args.mri_data, mask=args.mask, add_fa=vol_attr) if ds is None: if args.data is None: raise RuntimeError('no data source specific') else: ds = hdf2ds(args.data)[0] else: if args.data is not None: verbose( 1, 'ignoring dataset input in favor of other data source -- remove either one to disambiguate' ) # act on all attribute options ds = process_common_dsattr_opts(ds, args) if not args.add_fsl_mcpar is None: from mvpa2.misc.fsl.base import McFlirtParams mc_par = McFlirtParams(args.add_fsl_mcpar) for param in mc_par: verbose( 2, "Add motion regressor as sample attribute '%s'" % ('mc_' + param)) ds.sa['mc_' + param] = mc_par[param] verbose(3, "Dataset summary %s" % (ds.summary())) # and store outfilename = args.output if not outfilename.endswith('.hdf5'): outfilename += '.hdf5' verbose(1, "Save dataset to '%s'" % outfilename) h5save(outfilename, ds, mkdir=True, compression=args.hdf5_compression)
def run(args): from mvpa2.base.hdf5 import h5save ds = None if not args.txt_data is None: verbose(1, "Load data from TXT file '%s'" % args.txt_data) samples = _load_from_txt(args.txt_data) ds = Dataset(samples) elif not args.npy_data is None: verbose(1, "Load data from NPY file '%s'" % args.npy_data) samples = _load_from_npy(args.npy_data) ds = Dataset(samples) elif not args.mri_data is None: verbose(1, "Load data from MRI image(s) %s" % args.mri_data) from mvpa2.datasets.mri import fmri_dataset vol_attr = dict() if not args.add_vol_attr is None: # XXX add a way to use the mapper of an existing dataset to # add a volume attribute without having to load the entire # mri data again vol_attr = dict(args.add_vol_attr) if not len(args.add_vol_attr) == len(vol_attr): warning("--vol-attr option with duplicate attribute name: " "check arguments!") verbose(2, "Add volumetric feature attributes: %s" % vol_attr) ds = fmri_dataset(args.mri_data, mask=args.mask, add_fa=vol_attr) if ds is None: if args.data is None: raise RuntimeError('no data source specific') else: ds = hdf2ds(args.data)[0] else: if args.data is not None: verbose(1, 'ignoring dataset input in favor of other data source -- remove either one to disambiguate') # act on all attribute options ds = process_common_dsattr_opts(ds, args) if not args.add_fsl_mcpar is None: from mvpa2.misc.fsl.base import McFlirtParams mc_par = McFlirtParams(args.add_fsl_mcpar) for param in mc_par: verbose(2, "Add motion regressor as sample attribute '%s'" % ('mc_' + param)) ds.sa['mc_' + param] = mc_par[param] verbose(3, "Dataset summary %s" % (ds.summary())) # and store outfilename = args.output if not outfilename.endswith('.hdf5'): outfilename += '.hdf5' verbose(1, "Save dataset to '%s'" % outfilename) h5save(outfilename, ds, mkdir=True, compression=args.hdf5_compression)
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # slicing sliceme = {'samples': slice(None), 'features': slice(None)} # indices for opt, col, which in ((args.samples_by_index, ds.sa, 'samples'), (args.features_by_index, ds.fa, 'features')): if opt is None: continue if len(opt) == 1 and opt[0].count(':'): # slice spec arg = opt[0].split(':') spec = [] for a in arg: if not len(a): spec.append(None) else: spec.append(int(a)) sliceme[which] = slice(*spec) else: # actual indices sliceme[which] = [int(o) for o in opt] # attribute evaluation for opt, col, which in ((args.samples_by_attr, ds.sa, 'samples'), (args.features_by_attr, ds.fa, 'features')): if opt is None: continue sliceme[which] = _eval_attr_expr(opt, col) # apply selection ds = ds.__getitem__((sliceme['samples'], sliceme['features'])) verbose(1, 'Selected %i samples with %i features' % ds.shape) # strip attributes for attrarg, col, descr in ((args.strip_sa, ds.sa, 'sample '), (args.strip_fa, ds.fa, 'feature '), (args.strip_da, ds.a, '')): if not attrarg is None: for attr in attrarg: try: del col[attr] except KeyError: warning("dataset has no %sattribute '%s' to remove" % (descr, attr)) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # get CV instance cv = get_crossvalidation_instance(args.learner, args.partitioner, args.errorfx, args.sampling_repetitions, args.learner_space, args.balance_training, args.permutations, args.avg_datafold_results, args.prob_tail) res = cv(ds) # some meaningful output # XXX make condition on classification analysis only? print cv.ca.stats print 'Results\n-------' if args.permutations > 0: nprob = cv.ca.null_prob.samples if res.shape[1] == 1: # simple result structure if args.permutations > 0: p = ', p-value (%s tail)' % args.prob_tail else: p = '' print 'Fold, Result%s' % p for i in xrange(len(res)): if args.permutations > 0: p = ', %f' % nprob[i, 0] else: p = '' print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p) # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) if args.permutations > 0: if args.output.endswith('.hdf5'): args.output = args.output[:-5] ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output, compression=args.hdf5_compression) return res
def run(args): dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # get CV instance cv = get_crossvalidation_instance( args.learner, args.partitioner, args.errorfx, args.sampling_repetitions, args.learner_space, args.balance_training, args.permutations, args.avg_datafold_results, args.prob_tail) res = cv(ds) # some meaningful output # XXX make condition on classification analysis only? print cv.ca.stats print 'Results\n-------' if args.permutations > 0: nprob = cv.ca.null_prob.samples if res.shape[1] == 1: # simple result structure if args.permutations > 0: p=', p-value (%s tail)' % args.prob_tail else: p='' print 'Fold, Result%s' % p for i in xrange(len(res)): if args.permutations > 0: p = ', %f' % nprob[i, 0] else: p = '' print '%s, %f%s' % (res.sa.cvfolds[i], res.samples[i, 0], p) # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) if args.permutations > 0: if args.output.endswith('.hdf5'): args.output = args.output[:-5] ds2hdf5(cv.ca.null_prob, '%s_nullprob' % args.output, compression=args.hdf5_compression) return res
def run(args): if os.path.isfile(args.payload) and args.payload.endswith('.py'): measure = script2obj(args.payload) elif args.payload == 'cv': if args.cv_learner is None or args.cv_partitioner is None: raise ValueError( 'cross-validation payload requires --learner and --partitioner' ) # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail) else: raise RuntimeError("this should not happen") dss = hdf2ds(args.data) verbose(3, 'Loaded %i dataset(s)' % len(dss)) ds = vstack(dss) verbose(3, 'Concatenation yielded %i samples with %i features' % ds.shape) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ['roi_feature_ids'] elif 'roi_feature_ids' not in args.enable_ca: args.enable_ca += ['roi_feature_ids'] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose( 3, 'Attempting %i ROI analyses' % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight(measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ('mapper' in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a['mapper'] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res
def run(args): if os.path.isfile(args.payload) and args.payload.endswith(".py"): measure = script2obj(args.payload) elif args.payload == "cv": if args.cv_learner is None or args.cv_partitioner is None: raise ValueError("cross-validation payload requires --learner and --partitioner") # get CV instance measure = get_crossvalidation_instance( args.cv_learner, args.cv_partitioner, args.cv_errorfx, args.cv_sampling_repetitions, args.cv_learner_space, args.cv_balance_training, args.cv_permutations, args.cv_avg_datafold_results, args.cv_prob_tail, ) else: raise RuntimeError("this should not happen") dss = hdf2ds(args.data) verbose(3, "Loaded %i dataset(s)" % len(dss)) ds = vstack(dss) verbose(3, "Concatenation yielded %i samples with %i features" % ds.shape) # setup neighborhood # XXX add big switch to allow for setting up surface-based neighborhoods from mvpa2.misc.neighborhood import IndexQueryEngine qe = IndexQueryEngine(**dict(args.neighbors)) # determine ROIs rids = None # all by default aggregate_fx = args.aggregate_fx if args.roi_attr is not None: # first figure out which roi features should be processed if len(args.roi_attr) == 1 and args.roi_attr[0] in ds.fa.keys(): # name of an attribute -> pull non-zeroes rids = ds.fa[args.roi_attr[0]].value.nonzero()[0] else: # an expression? from .cmd_select import _eval_attr_expr rids = _eval_attr_expr(args.roi_attr, ds.fa).nonzero()[0] seed_ids = None if args.scatter_rois is not None: # scatter_neighborhoods among available ids if was requested from mvpa2.misc.neighborhood import scatter_neighborhoods attr, nb = args.scatter_rois coords = ds.fa[attr].value if rids is not None: # select only those which were chosen by ROI coords = coords[rids] _, seed_ids = scatter_neighborhoods(nb, coords) if aggregate_fx is None: # no custom one given -> use default "fill in" function aggregate_fx = _fill_in_scattered_results if args.enable_ca is None: args.enable_ca = ["roi_feature_ids"] elif "roi_feature_ids" not in args.enable_ca: args.enable_ca += ["roi_feature_ids"] if seed_ids is None: roi_ids = rids else: if rids is not None: # we had to sub-select by scatterring among available rids # so we would need to get original ids roi_ids = rids[seed_ids] else: # scattering happened on entire feature-set roi_ids = seed_ids verbose(3, "Attempting %i ROI analyses" % ((roi_ids is None) and ds.nfeatures or len(roi_ids))) from mvpa2.measures.searchlight import Searchlight sl = Searchlight( measure, queryengine=qe, roi_ids=roi_ids, nproc=args.nproc, results_backend=args.multiproc_backend, results_fx=aggregate_fx, enable_ca=args.enable_ca, disable_ca=args.disable_ca, ) # XXX support me too! # add_center_fa # tmp_prefix # nblocks # null_dist # run res = sl(ds) if (seed_ids is not None) and ("mapper" in res.a): # strip the last mapper link in the chain, which would be the seed ID selection res.a["mapper"] = res.a.mapper[:-1] # XXX create more output # and store ds2hdf5(res, args.output, compression=args.hdf5_compression) return res