def test_remove_invariant_as_a_mapper(): from mvpa2.featsel.helpers import RangeElementSelector from mvpa2.featsel.base import StaticFeatureSelection, SensitivityBasedFeatureSelection from mvpa2.testing.datasets import datasets from mvpa2.datasets.miscfx import remove_invariant_features mapper = SensitivityBasedFeatureSelection( lambda x: np.std(x, axis=0), RangeElementSelector(lower=0, inclusive=False), train_analyzer=False, auto_train=True) ds = datasets['uni2large'].copy() ds.a['mapper'] = StaticFeatureSelection(np.arange(ds.nfeatures)) ds.fa['index'] = np.arange(ds.nfeatures) ds.samples[:, [1, 8]] = 10 ds_out = mapper(ds) # Validate that we are getting the same results as remove_invariant_features ds_rifs = remove_invariant_features(ds) assert_array_equal(ds_out.samples, ds_rifs.samples) assert_array_equal(ds_out.fa.index, ds_rifs.fa.index) assert_equal(ds_out.fa.index[1], 2) assert_equal(ds_out.fa.index[8], 10)
def test_invar_features_removal(self): r = np.random.normal(size=(3, 1)) ds = dataset_wizard(samples=np.hstack((np.zeros((3, 2)), r)), targets=1) self.failUnless(ds.nfeatures == 3) dsc = remove_invariant_features(ds) self.failUnless(dsc.nfeatures == 1) self.failUnless((dsc.samples == r).all())
def test_invar_features_removal(self): r = np.random.normal(size=(3,1)) ds = dataset_wizard(samples=np.hstack((np.zeros((3,2)), r)), targets=1) self.assertTrue(ds.nfeatures == 3) dsc = remove_invariant_features(ds) self.assertTrue(dsc.nfeatures == 1) self.assertTrue((dsc.samples == r).all())
def _test_gnb_overflow_haxby(): # pragma: no cover # example from https://github.com/PyMVPA/PyMVPA/issues/581 # a heavier version of the above test import os import numpy as np from mvpa2.datasets.sources.native import load_tutorial_data from mvpa2.clfs.gnb import GNB from mvpa2.measures.base import CrossValidation from mvpa2.generators.partition import HalfPartitioner from mvpa2.mappers.zscore import zscore from mvpa2.mappers.detrend import poly_detrend from mvpa2.datasets.miscfx import remove_invariant_features from mvpa2.testing.datasets import * datapath = '/usr/share/data/pymvpa2-tutorial/' haxby = load_tutorial_data(datapath, roi='vt', add_fa={ 'vt_thr_glm': os.path.join(datapath, 'haxby2001', 'sub001', 'masks', 'orig', 'vt.nii.gz') }) # poly_detrend(haxby, polyord=1, chunks_attr='chunks') haxby = haxby[np.array( [ l in ['rest', 'scrambled'] # ''house', 'face'] for l in haxby.targets ], dtype='bool')] #zscore(haxby, chunks_attr='chunks', param_est=('targets', ['rest']), # dtype='float32') # haxby = haxby[haxby.sa.targets != 'rest'] haxby = remove_invariant_features(haxby) clf = GNB(enable_ca='estimates', logprob=True, normalize=True) #clf.train(haxby) #clf.predict(haxby) # estimates a bit "overfit" to judge in the train/predict on the same data cv = CrossValidation(clf, HalfPartitioner(attr='chunks'), postproc=None, enable_ca=['stats']) cv_results = cv(haxby) res1_est = clf.ca.estimates print "Estimates:\n", res1_est print "Exp(estimates):\n", np.round(np.exp(res1_est), 3) assert np.all(np.isfinite(res1_est))
fds_z = detrended_fds #print fds.a.mapper #pring fds_z.a.mapper #use a balancer to make a balanced dataset of even amounts of samples in each class #if model == 'MVPA-01': balancer = ChainNode([ NFoldPartitioner(), Balancer(attr='targets', count=1, limit='partitions', apply_selection=True) ], space='partitions') ##WHATCHA # Removing inv features #pleases the SVM but messes up dimensions. ##triplecheck fds = remove_invariant_features(fds_z) if model == 'MVPA-05' or model == 'MVPA-04': fds = (fds_z) #cross validate using NFoldPartioner - which makes cross validation folds by chunk/run #if model == 'MVPA-01': cv = CrossValidation(clf, balancer, errorfx=lambda p, t: np.mean(p == t)) if model == 'MVPA-03' or model == 'MVPA-05': cv = CrossValidation(clf, NFoldPartitioner(), errorfx=lambda p, t: np.mean(p == t)) #cv = CrossValidation(clf, NFoldPartitioner(1), errorfx=lambda p, t: np.mean(p == t)) #no balance!
def run(args): if not args.chunks is None: # apply global "chunks" setting for cattr in ('detrend_chunks', 'zscore_chunks'): if getattr(args, cattr) is None: # only overwrite if individual option is not given args.__setattr__(cattr, args.chunks) ds = arg2ds(args.data) if not args.poly_detrend is None: if not args.detrend_chunks is None \ and not args.detrend_chunks in ds.sa: raise ValueError( "--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks) from mvpa2.mappers.detrend import poly_detrend verbose(1, "Detrend") poly_detrend(ds, polyord=args.poly_detrend, chunks_attr=args.detrend_chunks, opt_regs=args.detrend_regrs, space=args.detrend_coords) if args.filter_passband is not None: from mvpa2.mappers.filters import iir_filter from scipy.signal import butter, buttord if args.sampling_rate is None or args.filter_stopband is None: raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate") # determine filter type nyquist = args.sampling_rate / 2.0 if len(args.filter_passband) > 1: btype = 'bandpass' if not len(args.filter_passband) == len(args.filter_stopband): raise ValueError( "passband and stopband specifications have to " "match in size") wp = [v / nyquist for v in args.filter_passband] ws = [v / nyquist for v in args.filter_stopband] elif args.filter_passband[0] < args.filter_stopband[0]: btype = 'lowpass' wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist elif args.filter_passband[0] > args.filter_stopband[0]: btype = 'highpass' wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist else: raise ValueError("invalid specification of Butterworth filter") # create filter verbose(1, "Spectral filtering (%s)" % (btype, )) try: ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False) b, a = butter(ord, wn, btype=btype) except OverflowError: raise ValueError( "cannot contruct Butterworth filter for the given " "specification") ds = iir_filter(ds, b, a) if args.zscore: from mvpa2.mappers.zscore import zscore verbose(1, "Z-score") zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params) verbose(3, "Dataset summary %s" % (ds.summary())) # invariants? if not args.strip_invariant_features is None: from mvpa2.datasets.miscfx import remove_invariant_features ds = remove_invariant_features(ds) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds
def run(args): if args.chunks is not None: # apply global "chunks" setting for cattr in ("detrend_chunks", "zscore_chunks"): if getattr(args, cattr) is None: # only overwrite if individual option is not given args.__setattr__(cattr, args.chunks) ds = arg2ds(args.data) if args.poly_detrend is not None: if args.detrend_chunks is not None and not args.detrend_chunks in ds.sa: raise ValueError("--detrend-chunks attribute '%s' not found in dataset" % args.detrend_chunks) from mvpa2.mappers.detrend import poly_detrend verbose(1, "Detrend") poly_detrend( ds, polyord=args.poly_detrend, chunks_attr=args.detrend_chunks, opt_regs=args.detrend_regrs, space=args.detrend_coords, ) if args.filter_passband is not None: from mvpa2.mappers.filters import iir_filter from scipy.signal import butter, buttord if args.sampling_rate is None or args.filter_stopband is None: raise ValueError("spectral filtering requires specification of " "--filter-stopband and --sampling-rate") # determine filter type nyquist = args.sampling_rate / 2.0 if len(args.filter_passband) > 1: btype = "bandpass" if not len(args.filter_passband) == len(args.filter_stopband): raise ValueError("passband and stopband specifications have to " "match in size") wp = [v / nyquist for v in args.filter_passband] ws = [v / nyquist for v in args.filter_stopband] elif args.filter_passband[0] < args.filter_stopband[0]: btype = "lowpass" wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist elif args.filter_passband[0] > args.filter_stopband[0]: btype = "highpass" wp = args.filter_passband[0] / nyquist ws = args.filter_stopband[0] / nyquist else: raise ValueError("invalid specification of Butterworth filter") # create filter verbose(1, "Spectral filtering (%s)" % (btype,)) try: ord, wn = buttord(wp, ws, args.filter_passloss, args.filter_stopattenuation, analog=False) b, a = butter(ord, wn, btype=btype) except OverflowError: raise ValueError("cannot contruct Butterworth filter for the given " "specification") ds = iir_filter(ds, b, a) if args.zscore: from mvpa2.mappers.zscore import zscore verbose(1, "Z-score") zscore(ds, chunks_attr=args.zscore_chunks, params=args.zscore_params) verbose(3, "Dataset summary %s" % (ds.summary())) # invariants? if args.strip_invariant_features is not None: from mvpa2.datasets.miscfx import remove_invariant_features ds = remove_invariant_features(ds) # and store ds2hdf5(ds, args.output, compression=args.hdf5_compression) return ds