Exemplo n.º 1
0
    def test_perm(self):
        X, y = datasets.make_classification(n_samples=20,
                                            n_features=5,
                                            n_informative=2)
        n_perms = 2
        rnd = 0
        # = With EPAC
        wf = Perms(SVC(kernel="linear"), n_perms=n_perms, permute="y",
                   random_state=rnd, reducer=None)
        r_epac = wf.top_down(X=X, y=y)
        # = With SKLEARN
        clf = SVC(kernel="linear")
        r_sklearn = list()
        for perm in Permutations(n=y.shape[0], n_perms=n_perms,
                                 random_state=rnd):
            y_p = y[perm, :]
            clf.fit(X, y_p)
            r_sklearn.append(clf.predict(X))
        key2cmp = 'y' + conf.SEP + conf.PREDICTION

        # = Comparison
        for iperm in range(n_perms):
            comp = np.all(np.asarray(r_epac[iperm][key2cmp]) ==
                          np.asarray(r_sklearn[iperm]))
            self.assertTrue(comp, u'Diff Perm: EPAC vs sklearn')
        # test reduce
        for iperm in range(n_perms):
            r_epac_reduce = wf.reduce().values()[iperm][key2cmp]
            comp = np.all(np.asarray(r_epac_reduce)
                          == np.asarray(r_sklearn[iperm]))
            self.assertTrue(comp, u'Diff Perm: EPAC reduce')
Exemplo n.º 2
0
    def test_perm(self):
        X, y = datasets.make_classification(n_samples=20, n_features=5, n_informative=2)
        n_perms = 2
        rnd = 0
        # = With EPAC
        wf = Perms(SVC(kernel="linear"), n_perms=n_perms, permute="y", random_state=rnd, reducer=None)
        r_epac = wf.top_down(X=X, y=y)
        # = With SKLEARN
        clf = SVC(kernel="linear")
        r_sklearn = list()
        for perm in Permutations(n=y.shape[0], n_perms=n_perms, random_state=rnd):
            y_p = y[perm, :]
            clf.fit(X, y_p)
            r_sklearn.append(clf.predict(X))
        key2cmp = "y" + conf.SEP + conf.PREDICTION

        # = Comparison
        for iperm in range(n_perms):
            comp = np.all(np.asarray(r_epac[iperm][key2cmp]) == np.asarray(r_sklearn[iperm]))
            self.assertTrue(comp, u"Diff Perm: EPAC vs sklearn")
        # test reduce
        for iperm in range(n_perms):
            r_epac_reduce = wf.reduce().values()[iperm][key2cmp]
            comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn[iperm]))
            self.assertTrue(comp, u"Diff Perm: EPAC reduce")
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features), add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[Pipe(SelectKBest(k=k),
                      SVC(kernel="linear", C=C))
                      for C in C_values
                      for k in k_values])
    pipeline = CVBestSearchRefit(cls,
                  n_folds=options.n_folds_nested, random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
             n_perms=options.n_perms,
             permute="y",
             random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    ## Run on local machine
    sfw_engine = SomaWorkflowEngine(
                        tree_root=wf,
                        num_processes=options.n_cores
                        )
    ## Run on cluster
#    sfw_engine = SomaWorkflowEngine(
#                        tree_root=wf,
#                        num_processes=options.n_cores,
#                        resource_id="jl237561@gabriel",
#                        login="******")
    # You can use soma_workflow_gui to track your progress
    wf = sfw_engine.run(X=X, y=y)
    print "Time ellapsed, fit predict:",  time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:",   time.time() - time_reduce
Exemplo n.º 4
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features),
                              add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[
        Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values
        for k in k_values
    ])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    ## Run on local machine
    sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                    num_processes=options.n_cores)
    ## Run on cluster
    #    sfw_engine = SomaWorkflowEngine(
    #                        tree_root=wf,
    #                        num_processes=options.n_cores,
    #                        resource_id="jl237561@gabriel",
    #                        login="******")
    wf = sfw_engine.run(X=X, y=y)
    print "Time ellapsed, fit predict:", time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:", time.time() - time_reduce
Exemplo n.º 5
0
    def test_perm_cv(self):
        X, y = datasets.make_classification(n_samples=20,
                                            n_features=5,
                                            n_informative=2)
        n_perms = 3
        n_folds = 2
        rnd = 0
        # = With EPAC
        wf = Perms(CV(SVC(kernel="linear"),
                      n_folds=n_folds,
                      reducer=ClassificationReport(keep=True)),
                   n_perms=n_perms,
                   permute="y",
                   random_state=rnd,
                   reducer=None)
        r_epac = wf.run(X=X, y=y)
        # = With SKLEARN
        from sklearn.cross_validation import StratifiedKFold
        clf = SVC(kernel="linear")
        r_sklearn = [[None] * n_folds for i in xrange(n_perms)]
        perm_nb = 0
        for perm in Permutations(n=y.shape[0],
                                 n_perms=n_perms,
                                 random_state=rnd):
            y_p = y[perm]
            fold_nb = 0
            for idx_train, idx_test in StratifiedKFold(y=y_p, n_folds=n_folds):
                X_train = X[idx_train, :]
                X_test = X[idx_test, :]
                y_p_train = y_p[idx_train, :]
                clf.fit(X_train, y_p_train)
                r_sklearn[perm_nb][fold_nb] = clf.predict(X_test)
                fold_nb += 1
            perm_nb += 1

        cmp_key = 'y' + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION
        # Comparison
        for iperm in range(n_perms):
            for icv in range(n_folds):
                comp = np.all(
                    np.asarray(r_epac[iperm][icv][cmp_key]) == np.asarray(
                        r_sklearn[iperm][icv]))
                self.assertTrue(comp, u'Diff Perm / CV: EPAC vs sklearn')

        # test reduce
        for iperm in range(n_perms):
            for icv in range(n_folds):
                ## iperm = 0
                ## icv = 0
                comp = np.all(
                    np.asarray(wf.reduce().values()[iperm][cmp_key][icv]) ==
                    np.asarray(r_sklearn[iperm][icv]))
                self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
Exemplo n.º 6
0
    def test_perm_cv(self):
        X, y = datasets.make_classification(n_samples=20, n_features=5,
                                            n_informative=2)
        n_perms = 3
        n_folds = 2
        rnd = 0
        # = With EPAC
        wf = Perms(CV(SVC(kernel="linear"), n_folds=n_folds,
                            reducer=ClassificationReport(keep=True)),
                            n_perms=n_perms, permute="y",
                            random_state=rnd, reducer=None)
        r_epac = wf.run(X=X, y=y)
        # = With SKLEARN
        from sklearn.cross_validation import StratifiedKFold
        clf = SVC(kernel="linear")
        r_sklearn = [[None] * n_folds for i in xrange(n_perms)]
        perm_nb = 0
        for perm in Permutations(n=y.shape[0], n_perms=n_perms,
                                random_state=rnd):
            y_p = y[perm]
            fold_nb = 0
            for idx_train, idx_test in StratifiedKFold(y=y_p,
                                                       n_folds=n_folds):
                X_train = X[idx_train, :]
                X_test = X[idx_test, :]
                y_p_train = y_p[idx_train, :]
                clf.fit(X_train, y_p_train)
                r_sklearn[perm_nb][fold_nb] = clf.predict(X_test)
                fold_nb += 1
            perm_nb += 1

        cmp_key = 'y' + conf.SEP + conf.TEST + conf.SEP + conf.PREDICTION
        # Comparison
        for iperm in range(n_perms):
            for icv in range(n_folds):
                comp = np.all(
                            np.asarray(r_epac[iperm][icv][cmp_key]) ==
                            np.asarray(r_sklearn[iperm][icv])
                             )
                self.assertTrue(comp, u'Diff Perm / CV: EPAC vs sklearn')

        # test reduce
        for iperm in range(n_perms):
            for icv in range(n_folds):
                ## iperm = 0
                ## icv = 0
                comp = np.all(
                    np.asarray(wf.reduce().values()[iperm][cmp_key][icv])
                    ==
                    np.asarray(r_sklearn[iperm][icv])
                    )
                self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
Exemplo n.º 7
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features), add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[Pipe(SelectKBest(k=k),
                         SVC(kernel="linear", C=C))
                    for C in C_values
                    for k in k_values])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    wf.run(X=X, y=y)
    print "Time ellapsed, fit predict:",  time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:",   time.time() - time_reduce
Exemplo n.º 8
0
 def get_workflow(self):
     ####################################################################
     ## EPAC WORKFLOW
     # -------------------------------------
     #             Perms                      Perm (Splitter)
     #         /     |       \
     #        0      1       2                Samples (Slicer)
     #        |
     #       CV                               CV (Splitter)
     #  /       |       \
     # 0        1       2                     Folds (Slicer)
     # |        |       |
     # Pipeline     Pipeline     Pipeline     Sequence
     # |
     # 2                                      SelectKBest (Estimator)
     # |
     # Methods
     # |                     \
     # SVM(linear,C=1)   SVM(linear,C=10)     Classifiers (Estimator)
     pipeline = Pipe(SelectKBest(k=2),
                     Methods(*[SVC(kernel="linear", C=C)
                               for C in [1, 3]]))
     wf = Perms(CV(pipeline, n_folds=3),
                n_perms=3,
                permute="y",
                random_state=1)
     return wf
Exemplo n.º 9
0
 def get_workflow(self, n_features=int(1E03)):
     random_state = 0
     C_values = [1, 10]
     k_values = 0
     k_max = "auto"
     n_folds_nested = 5
     n_folds = 10
     n_perms = 10
     if k_max != "auto":
         k_values = range_log2(np.minimum(int(k_max), n_features),
                               add_n=True)
     else:
         k_values = range_log2(n_features, add_n=True)
     cls = Methods(*[
         Pipe(SelectKBest(k=k), SVC(C=C, kernel="linear")) for C in C_values
         for k in k_values
     ])
     pipeline = CVBestSearchRefit(cls,
                                  n_folds=n_folds_nested,
                                  random_state=random_state)
     wf = Perms(CV(pipeline, n_folds=n_folds),
                n_perms=n_perms,
                permute="y",
                random_state=random_state)
     return wf
Exemplo n.º 10
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features),
                              add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[
        Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values
        for k in k_values
    ])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start

    ## 3) Export Workflow to soma_workflow_gui
    ## ===============
    time_fit_predict = time.time()
    if os.path.isdir(options.soma_workflow_dir):
        shutil.rmtree(options.soma_workflow_dir)
    sfw_engine = SomaWorkflowEngine(tree_root=wf,
                                    num_processes=options.n_cores)
    sfw_engine.export_to_gui(options.soma_workflow_dir, X=X, y=y)

    print "Time ellapsed, fit predict:", time.time() - time_fit_predict

    #    ## 6) Load Epac tree & Reduce
    #    ## ==========================
    reduce_filename = os.path.join(options.soma_workflow_dir, "reduce.py")
    f = open(reduce_filename, 'w')
    reduce_str = """from epac.map_reduce.engine import SomaWorkflowEngine
wf = SomaWorkflowEngine.load_from_gui("%s")
print wf.reduce()
""" % options.soma_workflow_dir
    f.write(reduce_str)
    f.close()
    print "#First run\n"\
        "soma_workflow_gui\n"\
        "\t(1)Open %s\n"\
        "\t(2)Submit\n"\
        "\t(3)Transfer Input Files\n"\
        "\t...wait...\n"\
        "\t(4)Transfer Output Files\n"\
        "#When done run:\npython %s" % (
            os.path.join(options.soma_workflow_dir,
                         sfw_engine.open_me_by_soma_workflow_gui),
            reduce_filename)
Exemplo n.º 11
0
    def todo_perm_cv_grid_vs_sklearn(self):
        X, y = datasets.make_classification(n_samples=100,
                                            n_features=500,
                                            n_informative=5)
        n_perms = 3
        n_folds = 2
        n_folds_nested = 2
        random_state = 0
        k_values = [2, 3]
        C_values = [1, 10]
        # = With EPAC
        pipelines = Methods(*[Pipe(SelectKBest(k=k),
                                   SVC(C=C, kernel="linear"))
                              for C in C_values
                              for k in k_values])
        #print [n for n in pipelines.walk_leaves()]
        pipelines_cv = CVBestSearchRefit(pipelines,
                                         n_folds=n_folds_nested,
                                         random_state=random_state)
        wf = Perms(CV(pipelines_cv, n_folds=n_folds,
                      reducer=ClassificationReport(keep=True)),
                   n_perms=n_perms, permute="y",
                   reducer=PvalPerms(keep=True),
                   random_state=random_state)
        wf.fit_predict(X=X, y=y)
        r_epac = wf.reduce().values()[0]
        for key in r_epac:
            print("key=" + repr(key) + ", value=" + repr(r_epac[key]))

        # = With SKLEARN
        from sklearn.cross_validation import StratifiedKFold
        from epac.sklearn_plugins import Permutations
        from sklearn.pipeline import Pipeline
        from sklearn import grid_search

        clf = Pipeline([('anova', SelectKBest(k=3)),
                        ('svm', SVC(kernel="linear"))])
        parameters = {'anova__k': k_values, 'svm__C': C_values}

        r_sklearn = dict()
        r_sklearn['pred_te'] = [[None] * n_folds for i in range(n_perms)]
        r_sklearn['true_te'] = [[None] * n_folds for i in range(n_perms)]
        r_sklearn['score_tr'] = [[None] * n_folds for i in range(n_perms)]
        r_sklearn['score_te'] = [[None] * n_folds for i in range(n_perms)]
        r_sklearn['mean_score_te'] = [None] * n_perms
        r_sklearn['mean_score_tr'] = [None] * n_perms

        perm_nb = 0
        perms = Permutations(n=y.shape[0],
                             n_perms=n_perms,
                             random_state=random_state)
        for idx in perms:
            #idx = perms.__iter__().next()
            y_p = y[idx]
            cv = StratifiedKFold(y=y_p, n_folds=n_folds)
            fold_nb = 0
            for idx_train, idx_test in cv:
                #idx_train, idx_test  = cv.__iter__().next()
                X_train = X[idx_train, :]
                X_test = X[idx_test, :]
                y_p_train = y_p[idx_train, :]
                y_p_test = y_p[idx_test, :]
                # Nested CV
                cv_nested = StratifiedKFold(y=y_p_train,
                                            n_folds=n_folds_nested)
                gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested)
                gscv.fit(X_train, y_p_train)
                r_sklearn['pred_te'][perm_nb][fold_nb] = gscv.predict(X_test)
                r_sklearn['true_te'][perm_nb][fold_nb] = y_p_test
                r_sklearn['score_tr'][perm_nb][fold_nb] =\
                    gscv.score(X_train, y_p_train)
                r_sklearn['score_te'][perm_nb][fold_nb] =\
                    gscv.score(X_test, y_p_test)
                fold_nb += 1
            # Average over folds
            r_sklearn['mean_score_te'][perm_nb] = \
                np.mean(np.asarray(r_sklearn['score_te'][perm_nb]), axis=0)
            r_sklearn['mean_score_tr'][perm_nb] = \
                np.mean(np.asarray(r_sklearn['score_tr'][perm_nb]), axis=0)
                    #np.mean(R2[key]['score_tr'][perm_nb])
            perm_nb += 1

        print(repr(r_sklearn))
        # - Comparisons
        shared_keys = set(r_epac.keys()).intersection(set(r_sklearn.keys()))
        comp = {k: np.all(np.asarray(r_epac[k]) == np.asarray(r_sklearn[k]))
                for k in shared_keys}
        print("comp=" + repr(comp))
        #return comp
        for key in comp:
            self.assertTrue(comp[key], u'Diff for attribute: "%s"' % key)
Exemplo n.º 12
0
# -*- coding: utf-8 -*-
"""
Created on Thu May 23 15:21:35 2013

@author: ed203246
"""

from sklearn import datasets
from sklearn.svm import LinearSVC as SVM
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.feature_selection import SelectKBest
from epac.map_reduce.reducers import PvalPerms
import numpy

X, y = datasets.make_classification(n_samples=100,
                                    n_features=200,
                                    n_informative=2)
X = numpy.random.rand(*X.shape)

from epac import Perms, CV, Methods
perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100)
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

self = perms_cv_svm
key = 'LinearSVC(loss=l1)'
self = PvalPerms()
Exemplo n.º 13
0
n_perms = 10

if k_max != "auto":
    k_values = range_log2(np.minimum(int(k_max), n_features),
                          add_n=True)
else:
    k_values = range_log2(n_features, add_n=True)

cls = Methods(*[Pipe(SelectKBest(k=k), SVC(C=C, kernel="linear"))
                           for C in C_values
                           for k in k_values])
pipeline = CVBestSearchRefitParallel(cls,
                             n_folds=n_folds_nested,
                             random_state=random_state)
wf = Perms(CV(pipeline, n_folds=n_folds),
                n_perms=n_perms,
                permute="y",
                random_state=random_state)

# wf.run(X=X, y=y)
# for leaf in wf.walk_leaves():
#     print leaf.load_results()
# wf.reduce()

# from epac.map_reduce.engine import LocalEngine
# local_engine = LocalEngine(tree_root=wf, num_processes=2)
# wf = local_engine.run(X=X, y=y)
# for leaf in wf.walk_leaves():
#     print leaf.load_results()
# wf.reduce()

from epac.map_reduce.engine import SomaWorkflowEngine
Exemplo n.º 14
0
    def todo_perm_cv_grid_vs_sklearn(self):
        X, y = datasets.make_classification(n_samples=100,
                                            n_features=500,
                                            n_informative=5)
        n_perms = 3
        n_folds = 2
        n_folds_nested = 2
        random_state = 0
        k_values = [2, 3]
        C_values = [1, 10]
        # = With EPAC
        pipelines = Methods(*[Pipe(SelectKBest(k=k),
                                   SVC(C=C, kernel="linear"))
                              for C in C_values
                              for k in k_values])
        #print [n for n in pipelines.walk_leaves()]
        pipelines_cv = CVBestSearchRefit(pipelines,
                                         n_folds=n_folds_nested,
                                         random_state=random_state)
        wf = Perms(CV(pipelines_cv, n_folds=n_folds,
                      reducer=ClassificationReport(keep=True)),
                   n_perms=n_perms, permute="y",
                   reducer=PvalPerms(keep=True),
                   random_state=random_state)
        wf.fit_predict(X=X, y=y)
        r_epac = wf.reduce().values()[0]
        for key in r_epac:
            print "key=" + repr(key) + ", value=" + repr(r_epac[key])

        # = With SKLEARN
        from sklearn.cross_validation import StratifiedKFold
        from epac.sklearn_plugins import Permutations
        from sklearn.pipeline import Pipeline
        from sklearn import grid_search

        clf = Pipeline([('anova', SelectKBest(k=3)),
                        ('svm', SVC(kernel="linear"))])
        parameters = {'anova__k': k_values, 'svm__C': C_values}

        r_sklearn = dict()
        r_sklearn['pred_te'] = [[None] * n_folds for i in xrange(n_perms)]
        r_sklearn['true_te'] = [[None] * n_folds for i in xrange(n_perms)]
        r_sklearn['score_tr'] = [[None] * n_folds for i in xrange(n_perms)]
        r_sklearn['score_te'] = [[None] * n_folds for i in xrange(n_perms)]
        r_sklearn['mean_score_te'] = [None] * n_perms
        r_sklearn['mean_score_tr'] = [None] * n_perms

        perm_nb = 0
        perms = Permutations(n=y.shape[0],
                             n_perms=n_perms,
                             random_state=random_state)
        for idx in perms:
            #idx = perms.__iter__().next()
            y_p = y[idx]
            cv = StratifiedKFold(y=y_p, n_folds=n_folds)
            fold_nb = 0
            for idx_train, idx_test in cv:
                #idx_train, idx_test  = cv.__iter__().next()
                X_train = X[idx_train, :]
                X_test = X[idx_test, :]
                y_p_train = y_p[idx_train, :]
                y_p_test = y_p[idx_test, :]
                # Nested CV
                cv_nested = StratifiedKFold(y=y_p_train,
                                            n_folds=n_folds_nested)
                gscv = grid_search.GridSearchCV(clf, parameters, cv=cv_nested)
                gscv.fit(X_train, y_p_train)
                r_sklearn['pred_te'][perm_nb][fold_nb] = gscv.predict(X_test)
                r_sklearn['true_te'][perm_nb][fold_nb] = y_p_test
                r_sklearn['score_tr'][perm_nb][fold_nb] =\
                    gscv.score(X_train, y_p_train)
                r_sklearn['score_te'][perm_nb][fold_nb] =\
                    gscv.score(X_test, y_p_test)
                fold_nb += 1
            # Average over folds
            r_sklearn['mean_score_te'][perm_nb] = \
                np.mean(np.asarray(r_sklearn['score_te'][perm_nb]), axis=0)
            r_sklearn['mean_score_tr'][perm_nb] = \
                np.mean(np.asarray(r_sklearn['score_tr'][perm_nb]), axis=0)
                    #np.mean(R2[key]['score_tr'][perm_nb])
            perm_nb += 1

        print repr(r_sklearn)
        # - Comparisons
        shared_keys = set(r_epac.keys()).intersection(set(r_sklearn.keys()))
        comp = {k: np.all(np.asarray(r_epac[k]) == np.asarray(r_sklearn[k]))
                for k in shared_keys}
        print "comp=" + repr(comp)
        #return comp
        for key in comp:
            self.assertTrue(comp[key], u'Diff for attribute: "%s"' % key)
Exemplo n.º 15
0
# -*- coding: utf-8 -*-
"""
Created on Thu May 23 15:21:35 2013

@author: ed203246
"""


from sklearn import datasets
from sklearn.svm import LinearSVC as SVM
from sklearn.lda import LDA
from sklearn.feature_selection import SelectKBest

X, y = datasets.make_classification(n_samples=100, n_features=200, n_informative=2)
X = numpy.random.rand(*X.shape)

from epac import Perms, CV, Methods

perms_cv_svm = Perms(CV(Methods(SVM(loss="l1"), SVM(loss="l2"))), n_perms=100)
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

self = perms_cv_svm
key = "LinearSVC(loss=l1)"
self = PvalPerms()
Exemplo n.º 16
0
cv.reduce()

# Perms + Cross-validation of SVM(linear) and SVM(rbf)
# -------------------------------------
#           Perms        Perm (Splitter)
#      /     |       \
#     0      1       2   Samples (Slicer)
#            |
#           CV           CV (Splitter)
#       /   |   \
#      0    1    2       Folds (Slicer)
#           |
#        Methods         Methods (Splitter)
#    /           \
# SVM(linear)  SVM(rbf)  Classifiers (Estimator)

from sklearn.svm import SVC
from epac import Perms, CV, Methods
perms_cv_svm = Perms(CV(Methods(*[SVC(kernel="linear"), SVC(kernel="rbf")])))
perms_cv_svm.run(X=X, y=y)
perms_cv_svm.reduce()

# Run with soma-workflow for multi-processes
from epac import SomaWorkflowEngine
sfw_engine = SomaWorkflowEngine(
    tree_root=perms_cv_svm,
    num_processes=2,
)
perms_cv_svm = sfw_engine.run(X=X, y=y)
perms_cv_svm.reduce()