Пример #1
0
    def __test_matthias_question(self):
        rfe_clf = LinearCSVMC(C=1)

        rfesvm_split = SplitClassifier(rfe_clf)
        clf = \
            FeatureSelectionClassifier(
            clf = LinearCSVMC(C=1),
            feature_selection = RFE(
                sensitivity_analyzer = rfesvm_split.get_sensitivity_analyzer(
                    combiner=first_axis_mean,
                    transformer=np.abs),
                transfer_error=ConfusionBasedError(
                    rfesvm_split,
                    confusion_state="confusion"),
                stopping_criterion=FixedErrorThresholdStopCrit(0.20),
                feature_selector=FractionTailSelector(
                    0.2, mode='discard', tail='lower'),
                update_sensitivity=True))

        no_permutations = 1000
        permutator = AttributePermutator('targets', count=no_permutations)
        cv = CrossValidation(clf,
                             NFoldPartitioner(),
                             null_dist=MCNullDist(permutator, tail='left'),
                             enable_ca=['stats'])
        error = cv(datasets['uni2small'])
        self.assertTrue(error < 0.4)
        self.assertTrue(cv.ca.null_prob < 0.05)
Пример #2
0
    def test_james_problem_multiclass(self):
        percent = 80
        dataset = datasets['uni4large']
        #dataset = dataset[:, dataset.a.nonbogus_features]

        rfesvm_split = LinearCSVMC()
        fs = \
            RFE(rfesvm_split.get_sensitivity_analyzer(
            postproc=ChainMapper([
                #FxMapper('features', l2_normed),
                #FxMapper('samples', np.mean),
                #FxMapper('samples', np.abs)
                FxMapper('features', lambda x: np.argsort(np.abs(x))),
                #maxofabs_sample()
                mean_sample()
                ])),
                ProxyMeasure(rfesvm_split,
                             postproc=BinaryFxNode(mean_mismatch_error,
                                                   'targets')),
                Splitter('train'),
                fselector=FractionTailSelector(
                    percent / 100.0,
                    mode='select', tail='upper'), update_sensitivity=True)

        clf = FeatureSelectionClassifier(
            LinearCSVMC(),
            # on features selected via RFE
            fs)

        # update sensitivity at each step (since we're not using the
        # same CLF as sensitivity analyzer)

        class StoreResults(object):
            def __init__(self):
                self.storage = []

            def __call__(self, data, node, result):
                self.storage.append((node.measure.mapper.ca.history,
                                     node.measure.mapper.ca.errors)),

        cv_storage = StoreResults()
        cv = CrossValidation(clf,
                             NFoldPartitioner(),
                             postproc=mean_sample(),
                             callback=cv_storage,
                             enable_ca=['stats'])
        #cv = SplitClassifier(clf)
        try:
            error = cv(dataset).samples.squeeze()
        except Exception, e:
            self.fail('CrossValidation cannot handle classifier with RFE '
                      'feature selection. Got exception: %s' % (e, ))
Пример #3
0
    def test_james_problem_multiclass(self):
        percent = 80
        dataset = datasets['uni4large']
        #dataset = dataset[:, dataset.a.nonbogus_features]

        rfesvm_split = LinearCSVMC()
        fs = \
            RFE(rfesvm_split.get_sensitivity_analyzer(
            postproc=ChainMapper([
                #FxMapper('features', l2_normed),
                #FxMapper('samples', np.mean),
                #FxMapper('samples', np.abs)
                FxMapper('features', lambda x: np.argsort(np.abs(x))),
                #maxofabs_sample()
                mean_sample()
                ])),
                ProxyMeasure(rfesvm_split,
                             postproc=BinaryFxNode(mean_mismatch_error,
                                                   'targets')),
                Splitter('train'),
                fselector=FractionTailSelector(
                    percent / 100.0,
                    mode='select', tail='upper'), update_sensitivity=True)

        clf = FeatureSelectionClassifier(
            LinearCSVMC(),
            # on features selected via RFE
            fs)
             # update sensitivity at each step (since we're not using the
             # same CLF as sensitivity analyzer)

        class StoreResults(object):
            def __init__(self):
                self.storage = []
            def __call__(self, data, node, result):
                self.storage.append((node.measure.mapper.ca.history,
                                     node.measure.mapper.ca.errors)),

        cv_storage = StoreResults()
        cv = CrossValidation(clf, NFoldPartitioner(), postproc=mean_sample(),
                             callback=cv_storage,
                             enable_ca=['stats'])
        #cv = SplitClassifier(clf)
        try:
            error = cv(dataset).samples.squeeze()
        except Exception, e:
            self.fail('CrossValidation cannot handle classifier with RFE '
                      'feature selection. Got exception: %s' % (e,))
Пример #4
0
    def test_james_problem(self):
        percent = 80
        dataset = datasets['uni2small']
        rfesvm_split = LinearCSVMC()
        fs = \
            RFE(rfesvm_split.get_sensitivity_analyzer(),
                ProxyMeasure(rfesvm_split,
                             postproc=BinaryFxNode(mean_mismatch_error,
                                                   'targets')),
                Splitter('train'),
                fselector=FractionTailSelector(
                    percent / 100.0,
                    mode='select', tail='upper'), update_sensitivity=True)

        clf = FeatureSelectionClassifier(
            LinearCSVMC(),
            # on features selected via RFE
            fs)

        # update sensitivity at each step (since we're not using the
        # same CLF as sensitivity analyzer)

        class StoreResults(object):
            def __init__(self):
                self.storage = []

            def __call__(self, data, node, result):
                self.storage.append((node.measure.mapper.ca.history,
                                     node.measure.mapper.ca.errors)),

        cv_storage = StoreResults()
        cv = CrossValidation(clf,
                             NFoldPartitioner(),
                             postproc=mean_sample(),
                             callback=cv_storage,
                             enable_ca=['confusion'])  # TODO -- it is stats
        #cv = SplitClassifier(clf)
        try:
            error = cv(dataset).samples.squeeze()
        except Exception as e:
            self.fail('CrossValidation cannot handle classifier with RFE '
                      'feature selection. Got exception: %s' % (e, ))

        assert (len(cv_storage.storage) == len(dataset.sa['chunks'].unique))
        assert (len(cv_storage.storage[0]) == 2)
        assert (len(cv_storage.storage[0][0]) == dataset.nfeatures)

        self.assertTrue(error < 0.2)
Пример #5
0
def _test_edmund_chong_20120907():  # pragma: no cover
    # commented out to avoid syntax warnings while compiling
    # from mvpa2.suite import *
    from mvpa2.testing.datasets import datasets
    repeater = Repeater(count=20)

    partitioner = ChainNode([NFoldPartitioner(cvtype=1),
                             Balancer(attr='targets',
                                      count=1, # for real data > 1
                                      limit='partitions',
                                      apply_selection=True
                                      )],
                            space='partitions')

    clf = LinearCSVMC() #choice of classifier
    permutator = AttributePermutator('targets', limit={'partitions': 1},
                                     count=1)
    null_cv = CrossValidation(
        clf,
        ChainNode([partitioner, permutator], space=partitioner.get_space()),
        errorfx=mean_mismatch_error)
    distr_est = MCNullDist(repeater, tail='left', measure=null_cv,
                           enable_ca=['dist_samples'])
    cvte = CrossValidation(clf, partitioner,
                           errorfx=mean_mismatch_error,
                           null_dist=distr_est,
                           enable_ca=['stats'])
    errors = cvte(datasets['uni2small'])
Пример #6
0
def _test_mcasey20120222():  # pragma: no cover
    # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2012q1/002034.html

    # This one is conditioned on allowing # of samples to be changed
    # by the mapper provided to MappedClassifier.  See
    # https://github.com/yarikoptic/PyMVPA/tree/_tent/allow_ch_nsamples

    import numpy as np
    from mvpa2.datasets.base import dataset_wizard
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.mappers.base import ChainMapper
    from mvpa2.mappers.svd import SVDMapper
    from mvpa2.mappers.fx import mean_group_sample
    from mvpa2.clfs.svm import LinearCSVMC
    from mvpa2.clfs.meta import MappedClassifier
    from mvpa2.measures.base import CrossValidation

    mapper = ChainMapper([mean_group_sample(['targets','chunks']),
                          SVDMapper()])
    clf = MappedClassifier(LinearCSVMC(), mapper)
    cvte = CrossValidation(clf, NFoldPartitioner(),
                           enable_ca=['repetition_results', 'stats'])

    ds = dataset_wizard(
        samples=np.arange(32).reshape((8, -1)),
        targets=[1, 1, 2, 2, 1, 1, 2, 2],
        chunks=[1, 1, 1, 1, 2, 2, 2, 2])

    errors = cvte(ds)
Пример #7
0
def test_sifter_superord_usecase():
    from mvpa2.misc.data_generators import normal_feature_dataset
    from mvpa2.clfs.svm import LinearCSVMC  # fast one to use for tests
    from mvpa2.measures.base import CrossValidation

    from mvpa2.base.node import ChainNode
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.generators.base import Sifter

    # Let's simulate the beast -- 6 categories total groupped into 3
    # super-ordinate, and actually without any 'superordinate' effect
    # since subordinate categories independent
    ds = normal_feature_dataset(
        nlabels=6,
        snr=100,  # pure signal! ;)
        perlabel=30,
        nfeatures=6,
        nonbogus_features=range(6),
        nchunks=5)
    ds.sa['subord'] = ds.sa.targets.copy()
    ds.sa['superord'] = ['super%d' % (int(i[1]) % 3, )
                         for i in ds.targets]  # 3 superord categories
    # let's override original targets just to be sure that we aren't relying on them
    ds.targets[:] = 0

    npart = ChainNode(
        [
            ## so we split based on superord
            NFoldPartitioner(len(ds.sa['superord'].unique), attr='subord'),
            ## so it should select only those splits where we took 1 from
            ## each of the superord categories leaving things in balance
            Sifter([('partitions', 2),
                    ('superord', {
                        'uvalues': ds.sa['superord'].unique,
                        'balanced': True
                    })]),
        ],
        space='partitions')

    # and then do your normal where clf is space='superord'
    clf = LinearCSVMC(space='superord')
    cvte_regular = CrossValidation(clf,
                                   NFoldPartitioner(),
                                   errorfx=lambda p, t: np.mean(p == t))
    cvte_super = CrossValidation(clf,
                                 npart,
                                 errorfx=lambda p, t: np.mean(p == t))

    accs_regular = cvte_regular(ds)
    accs_super = cvte_super(ds)

    # With sifting we should get only 2^3 = 8 splits
    assert (len(accs_super) == 8)
    # I don't think that this would ever fail, so not marking it labile
    assert (np.mean(accs_regular) > .8)
    assert (np.mean(accs_super) < .6)
Пример #8
0
def test_searchlight_cross_decoding(path, subjects, conf_file, type, **kwargs):

    conf = read_configuration(path, conf_file, type)

    for arg in kwargs:
        conf[arg] = kwargs[arg]
        if arg == 'radius':
            radius = kwargs[arg]

    debug.active += ["SLC"]

    ds_merged = get_merged_ds(path, subjects, conf_file, type, **kwargs)

    clf = LinearCSVMC(C=1, probability=1, enable_ca=['probabilities'])
    cv = CrossValidation(clf, NFoldPartitioner(attr='task'))

    maps = []

    for ds in ds_merged:

        ds.targets[ds.targets == 'point'] = 'face'
        ds.targets[ds.targets == 'saccade'] = 'place'

        sl = sphere_searchlight(cv, radius, space='voxel_indices')

        sl_map = sl(ds)

        sl_map.samples *= -1
        sl_map.samples += 1

        nif = map2nifti(sl_map, imghdr=ds.a.imghdr)

        maps.append(nif)

    datetime = get_time()
    analysis = 'cross_searchlight'
    mask = conf['mask_area']
    task = type

    new_dir = datetime + '_' + analysis + '_' + mask + '_' + task
    command = 'mkdir ' + os.path.join(path, '0_results', new_dir)
    os.system(command)

    parent_dir = os.path.join(path, '0_results', new_dir)

    for s, map in zip(subjects, maps):
        name = s
        command = 'mkdir ' + os.path.join(parent_dir, name)
        os.system(command)

        results_dir = os.path.join(parent_dir, name)
        fname = name + '_radius_' + str(radius) + '_searchlight_map.nii.gz'
        map.to_filename(os.path.join(results_dir, fname))

    return maps
Пример #9
0
    def _call(self, ds):
        # Function overrided to let the results have
        # some dataset attributes
        
        res = LinearCSVMC._call(self, ds)

        if isinstance(res, Dataset):
            for k in ds.sa.keys():
                res.sa[k] = ds.sa[k]
            return res
        else:
            return Dataset(res, sa=ds.sa)
Пример #10
0
    def test_SplitRFE(self):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20,
                                         nlabels=2,
                                         nfeatures=30,
                                         snr=1.,
                                         nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 4 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=4)

        rfeclf = MappedClassifier(
            clf,
            SplitRFE(clf,
                     partitioner,
                     fselector=FractionTailSelector(0.2,
                                                    mode='discard',
                                                    tail='lower')))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(
            len(
                set(dataset.a.nonbogus_features).intersection(
                    rfeclf.mapper.slicearg)) > 0)
        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)
Пример #11
0
def test_sifter_superord_usecase():
    from mvpa2.misc.data_generators import normal_feature_dataset
    from mvpa2.clfs.svm import LinearCSVMC  # fast one to use for tests
    from mvpa2.measures.base import CrossValidation

    from mvpa2.base.node import ChainNode
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.generators.base import Sifter

    ds = _get_superord_dataset()

    npart = ChainNode(
        [
            ## so we split based on superord
            NFoldPartitioner(len(ds.sa['superord'].unique), attr='subord'),
            ## so it should select only those splits where we took 1 from
            ## each of the superord categories leaving things in balance
            Sifter([('partitions', 2),
                    ('superord', {
                        'uvalues': ds.sa['superord'].unique,
                        'balanced': True
                    })]),
        ],
        space='partitions')

    # and then do your normal where clf is space='superord'
    clf = LinearCSVMC(space='superord')
    cvte_regular = CrossValidation(clf,
                                   NFoldPartitioner(),
                                   errorfx=lambda p, t: np.mean(p == t))
    cvte_super = CrossValidation(clf,
                                 npart,
                                 errorfx=lambda p, t: np.mean(p == t))

    accs_regular = cvte_regular(ds)
    accs_super = cvte_super(ds)

    # With sifting we should get only 2^3 = 8 splits
    assert (len(accs_super) == 8)
    # I don't think that this would ever fail, so not marking it labile
    assert (np.mean(accs_regular) > .8)
    assert (np.mean(accs_super) < .6)
Пример #12
0
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.measures.base import CrossValidation
from mvpa2.measures.searchlight import sphere_searchlight
from mvpa2.testing.datasets import datasets
from mvpa2.mappers.fx import mean_sample

"""For the sake of simplicity, let's use a small artificial dataset."""

# Lets just use our tiny 4D dataset from testing battery
dataset = datasets['3dlarge']

"""Now it only takes three lines for a searchlight analysis."""

# setup measure to be computed in each sphere (cross-validated
# generalization error on odd/even splits)
cv = CrossValidation(LinearCSVMC(), OddEvenPartitioner())

# setup searchlight with 2 voxels radius and measure configured above
sl = sphere_searchlight(cv, radius=2, space='myspace',
                        postproc=mean_sample())

# run searchlight on dataset
sl_map = sl(dataset)

print 'Best performing sphere error:', np.min(sl_map.samples)

"""
If this analysis is done on a fMRI dataset using `NiftiDataset` the resulting
searchlight map (`sl_map`) can be mapped back into the original dataspace
and viewed as a brain overlay. :ref:`Another example <example_searchlight>`
shows a typical application of this algorithm.
Пример #13
0
def test_rfe_sensmap():
    # http://lists.alioth.debian.org/pipermail/pkg-exppsy-pymvpa/2013q3/002538.html
    # just a smoke test. fails with
    from mvpa2.clfs.svm import LinearCSVMC
    from mvpa2.clfs.meta import FeatureSelectionClassifier
    from mvpa2.measures.base import CrossValidation, RepeatedMeasure
    from mvpa2.generators.splitters import Splitter
    from mvpa2.generators.partition import NFoldPartitioner
    from mvpa2.misc.errorfx import mean_mismatch_error
    from mvpa2.mappers.fx import mean_sample
    from mvpa2.mappers.fx import maxofabs_sample
    from mvpa2.generators.base import Repeater
    from mvpa2.featsel.rfe import RFE
    from mvpa2.featsel.helpers import FractionTailSelector, BestDetector
    from mvpa2.featsel.helpers import NBackHistoryStopCrit
    from mvpa2.datasets import vstack

    from mvpa2.misc.data_generators import normal_feature_dataset

    # Let's simulate the beast -- 6 categories total groupped into 3
    # super-ordinate, and actually without any 'superordinate' effect
    # since subordinate categories independent
    fds = normal_feature_dataset(nlabels=3,
                                 snr=1, # 100,   # pure signal! ;)
                                 perlabel=9,
                                 nfeatures=6,
                                 nonbogus_features=range(3),
                                 nchunks=3)
    clfsvm = LinearCSVMC()

    rfesvm = RFE(clfsvm.get_sensitivity_analyzer(postproc=maxofabs_sample()),
                 CrossValidation(
                     clfsvm,
                     NFoldPartitioner(),
                     errorfx=mean_mismatch_error, postproc=mean_sample()),
                 Repeater(2),
                 fselector=FractionTailSelector(0.70, mode='select', tail='upper'),
                 stopping_criterion=NBackHistoryStopCrit(BestDetector(), 10),
                 update_sensitivity=True)

    fclfsvm = FeatureSelectionClassifier(clfsvm, rfesvm)

    sensanasvm = fclfsvm.get_sensitivity_analyzer(postproc=maxofabs_sample())


    # manually repeating/splitting so we do both RFE sensitivity and classification
    senses, errors = [], []
    for i, pset in enumerate(NFoldPartitioner().generate(fds)):
        # split partitioned dataset
        split = [d for d in Splitter('partitions').generate(pset)]
        senses.append(sensanasvm(split[0])) # and it also should train the classifier so we would ask it about error
        errors.append(mean_mismatch_error(fclfsvm.predict(split[1]), split[1].targets))

    senses = vstack(senses)
    errors = vstack(errors)

    # Let's compare against rerunning the beast simply for classification with CV
    errors_cv = CrossValidation(fclfsvm, NFoldPartitioner(), errorfx=mean_mismatch_error)(fds)
    # and they should match
    assert_array_equal(errors, errors_cv)

    # buggy!
    cv_sensana_svm = RepeatedMeasure(sensanasvm, NFoldPartitioner())
    senses_rm = cv_sensana_svm(fds)

    #print senses.samples, senses_rm.samples
    #print errors, errors_cv.samples
    assert_raises(AssertionError,
                  assert_array_almost_equal,
                  senses.samples, senses_rm.samples)
    raise SkipTest("Known failure for repeated measures: https://github.com/PyMVPA/PyMVPA/issues/117")
Пример #14
0
def test_multiclass_pairs_svm_searchlight():
    from mvpa2.measures.searchlight import sphere_searchlight
    import mvpa2.clfs.meta
    #reload(mvpa2.clfs.meta)
    from mvpa2.clfs.meta import MulticlassClassifier

    from mvpa2.datasets import Dataset
    from mvpa2.clfs.svm import LinearCSVMC
    #import mvpa2.testing.datasets
    #reload(mvpa2.testing.datasets)
    from mvpa2.testing.datasets import datasets
    from mvpa2.generators.partition import NFoldPartitioner, OddEvenPartitioner
    from mvpa2.measures.base import CrossValidation

    from mvpa2.testing import ok_, assert_equal, assert_array_equal
    from mvpa2.sandbox.multiclass import get_pairwise_accuracies

    # Some parameters used in the test below
    nproc = 1 + int(mvpa2.externals.exists('pprocess'))
    ntargets = 4                                # number of targets
    npairs = ntargets*(ntargets-1)/2
    center_ids = [35, 55, 1]
    ds = datasets['3dsmall'].copy()

    # redefine C,T so we have a multiclass task
    nsamples = len(ds)
    ds.sa.targets = range(ntargets) * (nsamples//ntargets)
    ds.sa.chunks = np.arange(nsamples) // ntargets
    # and add some obvious signal where it is due
    ds.samples[:, 55] += 15*ds.sa.targets   # for all 4 targets
    ds.samples[:, 35] += 15*(ds.sa.targets % 2) # so we have conflicting labels
    # while 35 would still be just for 2 categories which would conflict

    mclf = MulticlassClassifier(LinearCSVMC(),
                                pass_attr=['sa.chunks', 'ca.raw_predictions_ds'],
                                enable_ca=['raw_predictions_ds'])

    label_pairs = mclf._get_binary_pairs(ds)

    def place_sa_as_samples(ds):
        # add a degenerate dimension for the hstacking in the searchlight
        ds.samples = ds.sa.raw_predictions_ds[:, None]
        ds.sa.pop('raw_predictions_ds')   # no need to drag the copy
        return ds

    mcv = CrossValidation(mclf, OddEvenPartitioner(), errorfx=None,
                          postproc=place_sa_as_samples)
    sl = sphere_searchlight(mcv, nproc=nproc, radius=2, space='myspace',
                            center_ids=center_ids)
    slmap = sl(ds)


    ok_('chunks' in slmap.sa)
    ok_('cvfolds' in slmap.sa)
    ok_('targets' in slmap.sa)
    # so for each SL we got all pairwise tests
    assert_equal(slmap.shape, (nsamples, len(center_ids), npairs))
    assert_array_equal(np.unique(slmap.sa.cvfolds), [0, 1])

    # Verify that we got right labels in each 'pair'
    # all searchlights should have the same set of labels for a given
    # pair of targets
    label_pairs_ = np.apply_along_axis(
        np.unique, 0,
        ## reshape slmap so we have only simple pairs in the columns
        np.reshape(slmap, (-1, npairs))).T

    # need to prep that list of pairs obtained from MulticlassClassifier
    # and since it is 1-vs-1, they all should be just pairs of lists of
    # 1 element so should work
    assert_equal(len(label_pairs_), npairs)
    assert_array_equal(np.squeeze(np.array(label_pairs)), label_pairs_)
    assert_equal(label_pairs_.shape, (npairs, 2))   # for this particular case


    out    = get_pairwise_accuracies(slmap)
    out123 = get_pairwise_accuracies(slmap, select=[1, 2, 3])

    assert_array_equal(np.unique(out123.T), np.arange(1, 4))   # so we got at least correct targets
    # test that we extracted correct accuracies
    # First 3 in out.T should have category 0, so skip them and compare otherwise
    assert_array_equal(out.samples[3:], out123.samples)

    ok_(np.all(out.samples[:, 1] == 1.), "This was with super-strong result")
Пример #15
0
    def test_SplitRFE(self, fmeasure):
        # just a smoke test ATM
        from mvpa2.clfs.svm import LinearCSVMC
        from mvpa2.clfs.meta import MappedClassifier
        from mvpa2.misc.data_generators import normal_feature_dataset
        #import mvpa2.featsel.rfe
        #reload(mvpa2.featsel.rfe)
        from mvpa2.featsel.rfe import RFE, SplitRFE
        from mvpa2.generators.partition import NFoldPartitioner
        from mvpa2.featsel.helpers import FractionTailSelector
        from mvpa2.testing import ok_, assert_equal

        clf = LinearCSVMC(C=1)
        dataset = normal_feature_dataset(perlabel=20,
                                         nlabels=2,
                                         nfeatures=11,
                                         snr=1.,
                                         nonbogus_features=[1, 5])
        # flip one of the meaningful features around to see
        # if we are still getting proper selection
        dataset.samples[:, dataset.a.nonbogus_features[1]] *= -1
        # 3 partitions should be enough for testing
        partitioner = NFoldPartitioner(count=3)

        rfeclf = MappedClassifier(
            clf,
            SplitRFE(
                clf,
                partitioner,
                fselector=FractionTailSelector(0.5,
                                               mode='discard',
                                               tail='lower'),
                fmeasure=fmeasure,
                # need to update only when using clf's sens anal
                update_sensitivity=fmeasure is None))
        r0 = repr(rfeclf)

        ok_(rfeclf.mapper.nfeatures_min == 0)
        rfeclf.train(dataset)
        ok_(rfeclf.mapper.nfeatures_min > 0)
        predictions = rfeclf(dataset).samples

        # at least 1 of the nonbogus-features should be chosen
        ok_(
            len(
                set(dataset.a.nonbogus_features).intersection(
                    rfeclf.mapper.slicearg)) > 0)

        # check repr to have all needed pieces
        r = repr(rfeclf)
        s = str(rfeclf)
        ok_(('partitioner=NFoldP' in r) or
            ('partitioner=mvpa2.generators.partition.NFoldPartitioner' in r))
        ok_('lrn=' in r)
        ok_(not 'slicearg=' in r)
        assert_equal(r, r0)

        if externals.exists('joblib'):
            rfeclf.mapper.nproc = -1
            # compare results against the one ran in parallel
            _slicearg = rfeclf.mapper.slicearg
            _predictions = predictions
            rfeclf.train(dataset)
            predictions = rfeclf(dataset).samples
            assert_array_equal(predictions, _predictions)
            assert_array_equal(_slicearg, rfeclf.mapper.slicearg)

        # Test that we can collect stats from cas within cross-validation
        sensitivities = []
        nested_errors = []
        nested_nfeatures = []

        def store_me(data, node, result):
            sens = node.measure.get_sensitivity_analyzer(
                force_train=False)(data)
            sensitivities.append(sens)
            nested_errors.append(node.measure.mapper.ca.nested_errors)
            nested_nfeatures.append(node.measure.mapper.ca.nested_nfeatures)

        cv = CrossValidation(rfeclf,
                             NFoldPartitioner(count=1),
                             callback=store_me,
                             enable_ca=['stats'])
        _ = cv(dataset)
        # just to make sure we collected them
        assert_equal(len(sensitivities), 1)
        assert_equal(len(nested_errors), 1)
        assert_equal(len(nested_nfeatures), 1)
Пример #16
0
#fds2.samples = stats.zscore(fds2.samples, axis = None)

#fds = fds1.copy()
fds = fds1
#fds.samples = np.dstack((fds1.samples, fds2.samples))
fds.sa.accuracy = sequences.accuracy

# qe.ids - vertices
roi_ids = np.intersect1d(labels[0].ravel(), qe.ids)

seq_train = sequences.loc[:, ["seq_type", "seq_train"]].drop_duplicates()
seq_train = dict(zip(seq_train["seq_type"], seq_train["seq_train"]))

if True:
    # compute classification accuracy
    classifiers = {'svm': LinearCSVMC()}
    mycl = 'svm'
    myresults = []

    fds_acc = fds1[sequences.accuracy >= minacc, :].copy()

    for myseq in seq_train.keys():
        print(myseq)
        errorfx = lambda p, t: np.sum(np.logical_and(p == t, t == myseq),
                                      dtype=float) / np.sum(t == myseq,
                                                            dtype=float)
        cv = CrossValidation(classifiers[mycl],
                             NFoldPartitioner(),
                             errorfx=errorfx,
                             enable_ca=['stats'])
Пример #17
0
                                  nlabels=2,
                                  nfeatures=2,
                                  snr=0,
                                  nonbogus_features=[0, 1])

# signal levels
sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
"""

To mimic behavior of hard-margin SVM whenever classes become
separable, which is easier to comprehend, we are intentionally setting
very high C value.

"""

clf = LinearCSVMC(C=1000, enable_ca=['training_stats'])
cve = CrossValidation(clf, NFoldPartitioner(), enable_ca='stats')
sana = clf.get_sensitivity_analyzer(postproc=None)

rs = []
errors, training_errors = [], []

for sig in sigs:
    ds = ds_noise.copy()
    # introduce signal into the first feature
    ds.samples[ds.T == 'L1', 0] += sig

    error = np.mean(cve(ds))
    sa = sana(ds)
    training_error = 1 - clf.ca.training_stats.stats['ACC']
Пример #18
0
                                          np.array(ds.sa.evidence,dtype= np.str))
    '''

    ds.targets = ds.sa.memory_status

    conf['label_dropped'] = 'None'
    conf['label_included'] = 'all'
    ds = preprocess_dataset(ds, data_type, **conf)
    count_ = 1
    field_ = 'memory'
    balanc = Balancer(count=count_, apply_selection=True, limit=None)
    gen = balanc.generate(ds)
    
    cv_storage = StoreResults()

    clf = LinearCSVMC(C=1)
                
    # This is used for the sklearn crossvalidation
    y = np.zeros_like(ds.targets, dtype=np.int_)
    y[ds.targets == ds.uniquetargets[0]] = 1
    
    # We needs to modify the chunks in order to use sklearn
    ds.chunks = np.arange(len(ds.chunks))
    
    permut_ = []
    
    i = 3
    
    partitioner = SKLCrossValidation(StratifiedKFold(y, n_folds=i))
    
    cvte = CrossValidation(clf,
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.measures.base import CrossValidation
from mvpa2.misc.errorfx import mean_match_accuracy
from mvpa2.mappers.fx import mean_sample

clf = LinearCSVMC(space='condition')
obj = CrossValidation(
        clf,
        NFoldPartitioner(),
        errorfx=mean_match_accuracy,
        postproc=mean_sample(),
        enable_ca=['stats'])
Пример #20
0
(Linear voxel indices mean that each voxel is indexed by a value between
0 (inclusive) and N (exclusive), where N is the number of voxels in the 
volume (N = NX * NY * NZ, where NX, NY and NZ are the number of voxels in
the three spatial dimensions). For certain analyses one may want to index
voxels by 'sub indices' (triples (i,j,k) with 0<=i<NX, 0<=j<=NY,
and 0<=k<NZ) or spatial coordinates; conversions amongst
linear and sub indices and spatial coordinates is provided by
functions in the  VolGeom (volume geometry) instance stored in 
'qe.voxsel.volgeom'.)   

From now on we follow the example as in doc/examples/searchlight.py.

First, cross-validation is defined using a (SVM) classifier.
"""

clf = LinearCSVMC()

cv = CrossValidation(clf,
                     NFoldPartitioner(),
                     errorfx=lambda p, t: np.mean(p == t),
                     enable_ca=['stats'])
"""
Set the roi_ids, that is the node indices that serve as searchlight 
center. In this example it is set to None, meaning that all nodes are used
as a searchlight center. It is also possible to restrict the nodes that serve
as a searchlight center: setting roi_ids=np.arange(400,800) means that only 
nodes in the range from 400 (inclusive) to 800 (exclusive) are used as a 
searchlight center, and the result would be a partial brain map.
"""

roi_ids = None
Пример #21
0
mvpa2.seed(1);
ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0,
                                  nonbogus_features=[0,1])

# signal levels
sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]

"""

To mimic behavior of hard-margin SVM whenever classes become
separable, which is easier to comprehend, we are intentionally setting
very high C value.

"""

clf = LinearCSVMC(C=1000, enable_ca=['training_stats'])
cve = CrossValidation(clf, NFoldPartitioner(), enable_ca='stats')
sana = clf.get_sensitivity_analyzer(postproc=None)

rs = []
errors, training_errors = [], []

for sig in sigs:
    ds = ds_noise.copy()
    # introduce signal into the first feature
    ds.samples[ds.T == 'L1', 0] += sig

    error = np.mean(cve(ds))
    sa = sana(ds)
    training_error = 1-clf.ca.training_stats.stats['ACC']
Пример #22
0
def do_searchlight(glm_dataset, radius, output_basename, with_null_prob=False):
    clf = LinearCSVMC(space='condition')
    #		clf = RbfCSVMC(C=5.0)
    splt = NFoldPartitioner()
    cv = CrossValidation(clf,
                         splt,
                         errorfx=mean_match_accuracy,
                         enable_ca=['stats'],
                         postproc=mean_sample())
    distr_est = []
    if with_null_prob:
        permutator = AttributePermutator('condition',
                                         count=100,
                                         limit='chunks')
        distr_est = MCNullDist(permutator,
                               tail='left',
                               enable_ca=['dist_samples'])
        """
		repeater   = Repeater(count=100)
		permutator = AttributePermutator('condition', limit={'partitions': 1}, count=1) 
		null_cv = CrossValidation(clf, ChainNode([splt, permutator],space=splt.get_space()),
					  postproc=mean_sample())
		null_sl = sphere_searchlight(null_cv, radius=radius, space='voxel_indices',
					     enable_ca=['roi_sizes'])
		distr_est = MCNullDist(repeater,tail='left', measure=null_sl,
				       enable_ca=['dist_samples'])
		"""
        sl = sphere_searchlight(cv,
                                radius=radius,
                                space='voxel_indices',
                                null_dist=distr_est,
                                enable_ca=['roi_sizes', 'roi_feature_ids'])
    else:

        sl = sphere_searchlight(cv,
                                radius=radius,
                                space='voxel_indices',
                                enable_ca=['roi_sizes', 'roi_feature_ids'])
    #ds = glm_dataset.copy(deep=False,
    #		       sa=['condition','chunks'],
    #		       fa=['voxel_indices'],
    #		       a=['mapper'])
    #debug.active += ["SLC"]
    sl_map = sl(glm_dataset)
    errresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr)
    errresults.to_filename('{}-acc.nii.gz'.format(output_basename))
    sl_map.samples *= -1
    sl_map.samples += 1
    niftiresults = map2nifti(sl_map, imghdr=glm_dataset.a.imghdr)
    niftiresults.to_filename('{}-err.nii.gz'.format(output_basename))
    #TODO: save p value map
    if with_null_prob:
        nullt_results = map2nifti(sl_map,
                                  data=sl.ca.null_t,
                                  imghdr=glm_dataset.a.imghdr)
        nullt_results.to_filename('{}-t.nii.gz'.format(output_basename))
        nullprob_results = map2nifti(sl_map,
                                     data=sl.ca.null_prob,
                                     imghdr=glm_dataset.a.imghdr)
        nullprob_results.to_filename('{}-prob.nii.gz'.format(output_basename))
        nullprob_results = map2nifti(sl_map,
                                     data=distr_est.cdf(sl_map.samples),
                                     imghdr=glm_dataset.a.imghdr)
        nullprob_results.to_filename('{}-cdf.nii.gz'.format(output_basename))
Пример #23
0
 def _train(self, ds):
     avg_mapper = mean_group_sample([self._attribute]) 
     ds = ds.get_mapped(avg_mapper)
     return LinearCSVMC._train(self, ds)
Пример #24
0
 def __init__(self, C=1, attr='trial'):
     LinearCSVMC.__init__(self, C=1)
     self._attribute = attr
Пример #25
0
def plot_vertex(label, vertex_data):

    fdsz = vertex_data[label][0]
    fds_effects = vertex_data[label][1]

    print("#######")
    print(label)

    # clean accuracy
    # extract data and remove outside of valid trials
    NCOMPS = 10
    pca = PCA(n_components=NCOMPS, whiten=False)
    nels = len(np.unique(fds_effects.targets))
    meanRDMs = []
    meanRDMs_pca = []

    for chunkind, chunk in enumerate(np.unique(fds_effects.chunks)):

        fds_red = fds_effects[fds_effects.chunks == chunk]
        #use new class
        fds_pca = pca.fit_transform(fds_effects.samples)
        fds_pca_red = fds_pca[fds_effects.chunks == chunk]
        pl.plot(pca.explained_variance_ratio_)
        pl.xlim((0, NCOMPS))

        dist_matrix = rsa.pdist(fds_red[fds_red.sa.accuracy == 1, :],
                                metric='correlation')
        dist_matrix_pca = rsa.pdist(fds_pca_red[fds_red.sa.accuracy == 1, :],
                                    metric='correlation')
        meanRDM, within, between = get_RDM_metric(
            dist_matrix, 'correlation',
            fds_red[fds_red.sa.accuracy == 1, :].targets)
        meanRDM_pca, within_pca, between_pca = get_RDM_metric(
            dist_matrix_pca, 'correlation',
            fds_red[fds_red.sa.accuracy == 1, :].targets)
        meanRDMs.append(meanRDM)
        meanRDMs_pca.append(meanRDM_pca)

    meanRDMs = 1 - np.tanh(np.nanmean(np.dstack(meanRDMs), axis=2))
    meanRDMs_pca = 1 - np.tanh(np.nanmean(np.dstack(meanRDMs_pca), axis=2))

    pl.figure
    pl.subplot(1, 2, 1)
    pl.imshow(meanRDMs)
    pl.subplot(1, 2, 2)
    pl.imshow(meanRDMs_pca)
    pl.savefig(os.path.join(datapath, 'results', 'matrix-%s.png' % (label)))

    RDM = meanRDMs_pca  # select one

    colors = {1: 'b', 2: 'g', 3: 'r', 4: 'k'}
    #    markers = ['o', 'v', '+', 's']
    mymat = RDM.copy()
    # as barplot
    within_values = np.diag(RDM)
    np.fill_diagonal(mymat, 0)
    between_values = np.sum(mymat, axis=0) / (nels - 1)

    fig = pl.figure(figsize=(15, 10), dpi=300)
    ax = fig.gca()
    ind = np.unique(fds_effects.targets)
    p1 = ax.bar(ind - 0.2, within_values, color='green', width=0.4)
    p2 = ax.bar(ind + 0.2, between_values, color='red', width=0.4)
    ax.set_xticks(ind)
    ax.set_ylim((0, .7))
    fig.legend((p1[0], p2[0]), ('Within', 'Between'))
    fig.savefig(os.path.join(datapath, 'results', 'barplot%s.png' % (label)))

    pl.figure(figsize=(15, 10), dpi=300)

    for t, target in enumerate(np.unique(fds_red.targets)):
        sel = np.logical_and(fds_red.targets == target,
                             fds_red.sa.accuracy == 1)
        #            sel = np.logical_and(fds_red.chunks == 2, sel)
        std = np.std(fds_pca_red[sel, :], axis=0)
        mean = np.mean(fds_pca_red[sel, :], axis=0)
        pl.plot(mean, color=colors[target])
        pl.plot(fds_pca_red[sel, :].T, color=colors[target], linewidth=0.2)
        pl.errorbar(x=np.arange(len(mean)), y=mean, yerr=std, fmt='o')
    pl.savefig(os.path.join(datapath, 'results', 'PCA-%s.png' % (label)))

    # ADD ALL
    #    mtgs = mean_group_sample(['targets', 'chunks'])
    #    fds_mean = mtgs(fds[fds.sa.trials > 0, fd_indices])

    X = fdsz.samples

    pl.figure(figsize=(15, 10), dpi=300)
    colors = {'1': 'b', '2': 'g', '3': 'r', '4': 'k'}
    #    for chunk in np.unique(fds.chunks):
    #        pl.subplot(1, NRUNS, chunk + 1)
    #        for trial in range(1, 6):
    for target in ['1', '2', '3', '4']:
        sel = fds.targets == target  #np.logical_and(fds.chunks == chunk, fds.targets == target)
        if np.sum(sel):
            df = pd.DataFrame({
                't': np.round(fds.sa.trial_time[sel], 1),
                'x': np.mean(X[sel, :], axis=1)
            })
            xx = np.linspace(np.min(df.t), np.max(df.t), 20)
            yy = np.interp(xx, df.t, df.x)
            #                df = df.groupby('t').mean()
            #                pl.plot(df.index, df.x, color = colors[target])
            pl.plot(xx, yy, color=colors[target])
            pl.xlim((0, 25))
            pl.ylim((-1, 2))
            pl.axvline(x=1.0, linestyle='--', color='k')  # fixation
            pl.axvline(x=3.1, linestyle='--', color='k')  # execution
            pl.axvline(x=6.6, linestyle='--', color='k')  # end of execution
            pl.axvline(x=6.6 + 0.5 + 6.7, linestyle='--',
                       color='r')  # mean for next trial
            pl.axhline(y=0.0, linestyle='--', color='g')

    pl.savefig(
        os.path.join(datapath, 'results', 'timecourses-%s.png' % (label)))

    #########
    if True:
        svm = LinearCSVMC()
        #    ridge = RidgeReg()

        NPERMS = 100
        permutator = AttributePermutator('targets', count=NPERMS)
        partitioner = NFoldPartitioner()

        distr_est = MCNullDist(permutator,
                               tail='left',
                               enable_ca=['dist_samples'])

        cv_svm = CrossValidation(svm,
                                 partitioner,
                                 errorfx=lambda p, t: np.mean(p == t),
                                 enable_ca=['stats'])

        #    cv_ridge = CrossValidation(ridge, partitioner,
        #                         errorfx=lambda p, t: np.mean(p == t),
        #                         enable_ca=['stats'])

        results_svm = cv_svm(fds_effects[fds_effects.sa.accuracy == 1, :])
        #    results_ridge = cv_ridge(fds)

        cv_mc = CrossValidation(svm,
                                partitioner,
                                postproc=mean_sample(),
                                null_dist=distr_est,
                                enable_ca=['stats'])

        #        cv_mc = CrossValidation(svm, partitioner,
        #                         errorfx=mean_mismatch_error,
        #                         null_dist=distr_est,
        #                         enable_ca=['stats'])
        # run

        results_clf = cv_mc(fds_effects[fds_effects.sa.accuracy == 1, :])
        p = cv_mc.ca.null_prob
        print 'Accuracy:', np.mean(results_svm)
        print 'CV-errors:', np.ravel(results_clf)
        print 'Corresponding p-values:', np.ravel(p)

#        pl.hist(fds.samples)

    if False:
        #compute matrix and aggregate across trials
        dist_matrix = squareform(
            rsa.pdist(fds_effects[fds_effects.sa.accuracy == 1, :],
                      metric='correlation'))
        #       dist_matrix = squareform(rsa.pdist(fds, metric='mahalanobis'))
        meanRDM, elements, score, within, between = aggregate_matrix(
            dist_matrix, fds_effects.chunks, fds_effects.targets)

        print(label, score, within, between)
        #        pl.figure(figsize=(15, 9), dpi=300)
        #        pl.plot(within, between)
        #        pl.xlim((0, 2))
        #        pl.ylim((0, 2))
        #        pl.savefig(os.path.join(datapath, 'results', 'score-%s.png'%(label)))

        pl.figure(figsize=(15, 9), dpi=300)

        for chunkind, chunk in enumerate(np.unique(fds_effects.chunks)):
            pl.subplot(1, 5, chunkind + 1)
            mtx = meanRDM[:, :, chunkind]
            pl.imshow(mtx, interpolation='nearest')
            pl.xticks(range(len(mtx)), elements, rotation=-45)
            pl.yticks(range(len(mtx)), elements)
            pl.title('Correlation distances')
            pl.clim((0, np.nanmax(mtx)))
#            if chunkind == 4:
#                pl.colorbar()

        pl.savefig(
            os.path.join(datapath, 'results',
                         'target_distances-%s.png' % (label)))

        # MDS
        embedding = MDS(n_components=2, dissimilarity='precomputed')
        X_transformed = embedding.fit_transform(dist_matrix)
        colors = ['k', 'r', 'g', 'b']
        markers = ['o', 'v', 'P', 's']
        pl.figure(figsize=(27, 9), dpi=300)

        for c, chunk in enumerate(np.unique(fds_effects.chunks)):
            pl.subplot(1, 5, c + 1)
            xmin = np.min(X_transformed[fds_effects.chunks == chunk, 0]) * 1.1
            xmax = np.max(X_transformed[fds_effects.chunks == chunk, 0]) * 1.1
            ymin = np.min(X_transformed[fds_effects.chunks == chunk, 1]) * 1.1
            ymax = np.max(X_transformed[fds_effects.chunks == chunk, 1]) * 1.1

            for t, target in enumerate(np.unique(fds_effects.targets)):
                sel = np.logical_and(fds_effects.chunks == chunk,
                                     fds_effects.targets == target)

                pl.scatter(X_transformed[sel, 0],
                           X_transformed[sel, 1],
                           marker=markers[t],
                           color=colors[t],
                           s=5,
                           alpha=0.8)
                pl.xlim((xmin, xmax))
                pl.ylim((ymin, ymax))
                pl.scatter(np.mean(X_transformed[sel, 0], axis=0),
                           np.mean(X_transformed[sel, 1], axis=0),
                           marker=markers[t],
                           color=colors[t],
                           s=50)
        pl.savefig(os.path.join(datapath, 'results', 'MDS-%s.png' % (label)))