def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_local.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_swf.walk_leaves():
            print leaf.get_key()

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf,
                                        num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
    def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_local.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_swf.walk_leaves():
            print(leaf.get_key())

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
Exemplo n.º 3
0
 def test_constructor_avoid_collision_level2(self):
     # Test that level 2 collisions are avoided
     pm = Methods(*[Pipe(SelectKBest(k=2), SVC(kernel="linear", C=C))\
                       for C in [1, 10]])
     leaves_key = [l.get_key() for l in pm.walk_leaves()]
     self.assertTrue(len(leaves_key) == len(set(leaves_key)),
                     u'Collision could not be avoided')
Exemplo n.º 4
0
 def test_constructor_avoid_collision_level2(self):
     # Test that level 2 collisions are avoided
     pm = Methods(*[Pipe(SelectKBest(k=2), SVC(kernel="linear", C=C))
                    for C in [1, 10]])
     leaves_key = [l.get_key() for l in pm.walk_leaves()]
     self.assertTrue(len(leaves_key) == len(set(leaves_key)),
                     u'Collision could not be avoided')
Exemplo n.º 5
0
 def test_constructor_avoid_collision_level1(self):
     # Test that level 1 collisions are avoided
     pm = Methods(*[SVC(kernel="linear", C=C) for C in [1, 10]])
     leaves_key = [l.get_key() for l in pm.walk_leaves()]
     self.assertTrue(len(leaves_key) == len(set(leaves_key)), u"Collision could not be avoided")
Exemplo n.º 6
0
wf = CVBestSearchRefit(Methods(SVM(C=1), SVM(C=10)))
wf.run(X=X, y=y)
print(wf.reduce())

# Feature selection combined with SVM and LDA
# CVBestSearchRefit
#                     Methods          (Splitter)
#               /              \
#            KBest(1)         KBest(5) SelectKBest (Estimator)
#              |
#            Methods                   (Splitter)
#        /          \
#    LDA()          SVM() ...          Classifiers (Estimator)
pipelines = Methods(
    *[Pipe(SelectKBest(k=k), Methods(LDA(), SVM())) for k in [1, 5]])
print([n for n in pipelines.walk_leaves()])
best_cv = CVBestSearchRefit(pipelines)
best_cv.run(X=X, y=y)
best_cv.reduce()

# Put it in an outer CV
cv = CV(best_cv)
cv.run(X=X, y=y)
cv.reduce()

# Perms + Cross-validation of SVM(linear) and SVM(rbf)
# -------------------------------------
#           Perms        Perm (Splitter)
#      /     |       \
#     0      1       2   Samples (Slicer)
#            |
Exemplo n.º 7
0
# CV + Grid search of a simple classifier
wf = CVBestSearchRefit(Methods(SVM(C=1), SVM(C=10)))
wf.run(X=X, y=y)
print wf.reduce()

# Feature selection combined with SVM and LDA
# CVBestSearchRefit
#                     Methods          (Splitter)
#               /              \
#            KBest(1)         KBest(5) SelectKBest (Estimator)
#              |
#            Methods                   (Splitter)
#        /          \
#    LDA()          SVM() ...          Classifiers (Estimator)
pipelines = Methods(*[Pipe(SelectKBest(k=k), Methods(LDA(), SVM())) for k in [1, 5]])
print [n for n in pipelines.walk_leaves()]
best_cv = CVBestSearchRefit(pipelines)
best_cv.run(X=X, y=y)
best_cv.reduce()

# Put it in an outer CV
cv = CV(best_cv)
cv.run(X=X, y=y)
cv.reduce()

# Perms + Cross-validation of SVM(linear) and SVM(rbf)
# -------------------------------------
#           Perms        Perm (Splitter)
#      /     |       \
#     0      1       2   Samples (Slicer)
#            |
Exemplo n.º 8
0
print svms_auto.refited.estimator.coef_

##############################################################################
# Put everything together
# Pipeline, "Pipe": SelectKBest + StandardScaler + SVM l1 vs l2
from epac import range_log2
from epac import CVBestSearchRefit, Pipe, Methods, CV
k_values = range_log2(X.shape[1], add_n=True)
C_values = [.1, 1, 10, 100]
anova_svms = Methods(*[Pipe(SelectKBest(k=k), preprocessing.StandardScaler(),
                      Methods(*[SVM(C=C, penalty=penalty, class_weight='auto', dual=False) 
                                for C in C_values for penalty in  ['l1', 'l2']]))
                  for k in k_values])

# Take a look
print [l for l in anova_svms.walk_leaves()]

## k and C selection based on CV
anova_svms_auto = CVBestSearchRefit(anova_svms)

#anova_svm_all = Methods(anova_svm, anova_svm_cv)
cv = CV(anova_svms_auto, n_folds=n_folds)
time_fit_predict = time.time()
cv.run(X=X, y=y)
print time.time() - time_fit_predict
print cv.reduce()

# Re-fit on all data. Warning: biased !!!
anova_svms_auto.run(X=X, y=y)

print anova_svms_auto.best_params