def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_local.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_swf.walk_leaves():
            print leaf.get_key()

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf,
                                        num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
    def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_local.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_swf.walk_leaves():
            print(leaf.get_key())

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
    def test_prev_state_methods(self):
        ## 1) Build dataset
        ## ================================================
        X, y = datasets.make_classification(n_samples=5,
                                            n_features=20,
                                            n_informative=2)
        Xy = {"X": X, "y": y}
        methods = Methods(*[TOY_CLF(v_lambda=v_lambda)
                            for v_lambda in [2, 1]])
        methods.run(**Xy)

        ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda)
                                        for v_lambda in [2, 1]])
        ps_methods.run(**Xy)
        self.assertTrue(compare_two_node(methods, ps_methods))
        self.assertTrue(comp_2wf_reduce_res(methods, ps_methods))
示例#4
0
    def test_twomethods(self):
        key_y_pred = 'y' + conf.SEP + conf.PREDICTION
        X, y = datasets.make_classification(n_samples=20, n_features=5,
                                            n_informative=2)
        # = With EPAC
        wf = Methods(LDA(), SVC(kernel="linear"))
        r_epac = wf.run(X=X, y=y)

        # = With SKLEARN
        lda = LDA()
        svm = SVC(kernel="linear")
        lda.fit(X, y)
        svm.fit(X, y)
        r_sklearn = [lda.predict(X), svm.predict(X)]

        # Comparison
        for i_cls in range(2):
            comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) ==
                                    np.asarray(r_sklearn[i_cls]))
            self.assertTrue(comp, u'Diff Methods')

        # test reduce
        r_epac_reduce = [wf.reduce().values()[0][key_y_pred],
            wf.reduce().values()[1][key_y_pred]]
        comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn))
        self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
示例#5
0
    def test_twomethods(self):
        key_y_pred = 'y' + conf.SEP + conf.PREDICTION
        X, y = datasets.make_classification(n_samples=20, n_features=5,
                                            n_informative=2)
        # = With EPAC
        wf = Methods(LDA(), SVC(kernel="linear"))
        r_epac = wf.run(X=X, y=y)

        # = With SKLEARN
        lda = LDA()
        svm = SVC(kernel="linear")
        lda.fit(X, y)
        svm.fit(X, y)
        r_sklearn = [lda.predict(X), svm.predict(X)]

        # Comparison
        for i_cls in range(2):
            comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) ==
                          np.asarray(r_sklearn[i_cls]))
            self.assertTrue(comp, u'Diff Methods')

        # test reduce
        r_epac_reduce = [wf.reduce().values()[0][key_y_pred],
                         wf.reduce().values()[1][key_y_pred]]
        comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn))
        self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
    def test_prev_state_methods(self):
        ## 1) Build dataset
        ## ================================================
        X, y = datasets.make_classification(n_samples=5,
                                            n_features=20,
                                            n_informative=2)
        Xy = {"X": X, "y": y}
        methods = Methods(*[TOY_CLF(v_lambda=v_lambda)
                            for v_lambda in [2, 1]])
        methods.run(**Xy)

        ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda)
                                        for v_lambda in [2, 1]])
        ps_methods.run(**Xy)
        self.assertTrue(compare_two_node(methods, ps_methods))
        self.assertTrue(comp_2wf_reduce_res(methods, ps_methods))
示例#7
0

if __name__ == "__main__":
    ## 1) Build dataset
    ## ================================================
    X, y = datasets.make_classification(n_samples=10,
                                        n_features=5,
                                        n_informative=2,
                                        random_state=1)
    Xy = {"X": X, "y": y}

    ## 2) Build Methods
    ## ================================================
    print("Methods ===================================")
    methods = Methods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]])
    print(methods.run(**Xy))

    ## 3) Build WarmStartMethods like Methods
    ## ================================================
    ##               WarmStartMethods
    ##             /                  \
    ##  TOY_CLF(v_lambda=2)    TOY_CLF(v_lambda=1)
    ##
    ##  1. WarmStartMethods will look for different argumenets as signature
    ##     For example, here is v_lambda, there are different for each leaf
    ##  2. And then run TOY_CLF(v_lambda=2).transform
    ##  3. Except v_lambda, WarmStartMethods copy all the other parameters
    ##     from TOY_CLF(v_lambda=2) to TOY_CLF(v_lambda=1) as initialization
    ##  4. Finally call TOY_CLF(v_lambda=1).transform
    print("WarmStartMethods ==========================")
    ps_methods = WarmStartMethods(
if __name__ == "__main__":
    ## 1) Build dataset
    ## ================================================
    X, y = datasets.make_classification(n_samples=10,
                                        n_features=5,
                                        n_informative=2,
                                        random_state=1)
    Xy = {"X": X, "y": y}

    ## 2) Build Methods
    ## ================================================
    print "Methods ==================================="
    methods = Methods(*[TOY_CLF(v_lambda=v_lambda)
                        for v_lambda in [2, 1]])
    print methods.run(**Xy)

    ## 3) Build WarmStartMethods like Methods
    ## ================================================
    ##               WarmStartMethods
    ##             /                  \
    ##  TOY_CLF(v_lambda=2)    TOY_CLF(v_lambda=1)
    ##
    ##  1. WarmStartMethods will look for different argumenets as signature
    ##     For example, here is v_lambda, there are different for each leaf
    ##  2. And then run TOY_CLF(v_lambda=2).transform
    ##  3. Except v_lambda, WarmStartMethods copy all the other parameters
    ##     from TOY_CLF(v_lambda=2) to TOY_CLF(v_lambda=1) as initialization
    ##  4. Finally call TOY_CLF(v_lambda=1).transform
    print "WarmStartMethods =========================="
    ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda)
示例#9
0
# possible to convert it to CSV format

from epac import export_leaves_csv
export_leaves_csv(pipe, 'my_result_run.csv')

## Parallelization
## ===============

# Multi-classifiers
# -----------------
#         Methods       Methods (Splitter)
#        /   \
# SVM(C=1)  SVM(C=10)   Classifiers (Estimator)
from epac import Methods
multi = Methods(SVM(C=1), SVM(C=10))
multi.run(X=X, y=y)
print(multi.reduce())

# Reduce format outputs into "ResultSet" which is a dict-like structure
# which contains the "keys" of the methods that have beeen used.

# You can also export the results of the bottom-up operation (reduce) to CSV

from epac import export_resultset_csv
export_resultset_csv(multi.reduce(), 'my_result_reduce.csv')

#                         Methods                  Methods (Splitter)
#          /                        \
# SVM(l1, C=1)  SVM(l1, C=10)  ..... SVM(l2, C=10) Classifiers (Estimator)
svms = Methods(
    *[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]])
示例#10
0
# and produces a dictionnary as output. The output is passed  to the next node. 

# The return value of the run is simply agregation of the outputs (dict) of
# the leaf nodes

## Parallelization
## ===============

# Multi-classifiers
# -----------------
#         Methods       Methods (Splitter)
#        /   \
# SVM(C=1)  SVM(C=10)   Classifiers (Estimator)
from epac import Methods
multi = Methods(SVM(C=1), SVM(C=10))
multi.run(X=X, y=y)
print multi.reduce()

# Reduce format outputs into "ResultSet" which is a dict-like structure
# which contains the "keys" of the methods that as beeen used.


#                         Methods                  Methods (Splitter)
#          /                        \
# SVM(l1, C=1)  SVM(l1, C=10)  ..... SVM(l2, C=10) Classifiers (Estimator)
svms = Methods(*[SVM(loss=loss, C=C) for loss in ("l1", "l2") for C in [1, 10]])
svms.run(X=X, y=y)
print svms.reduce()

# Parallelize sequential Pipeline: Anova(k best selection) + SVM.
#    Methods    Methods (Splitter)
示例#11
0
svms = Methods(SVM(penalty="l1", class_weight='auto', dual=False), 
               SVM(penalty="l2", class_weight='auto', dual=False))

cv = CV(svms, n_folds=n_folds)
cv.run(X=X, y=y)
res_cv_svms = cv.reduce()
#
print res_cv_svms
print res_cv_svms["LinearSVC(penalty=l1)"]['y/test/score_recall']
print res_cv_svms["LinearSVC(penalty=l2)"]['y/test/score_recall']
# !!! BIASED RESULT !!!

# Re-fit on all data to see which mode is choosen. Warning !!! this is biased
# since all data have been used. We use for information only. No score can be
# used from it. We look the weights map.
svms.run(X=X, y=y)
print svms.children[0]
svms.children[0].estimator.coef_
print svms.children[1]
svms.children[1].estimator.coef_

print "Weights given by SVMs"
d = dict(var = imaging_variables,
svm_weights_l1 = svms.children[0].estimator.coef_.ravel(),
svm_weights_l2 = svms.children[1].estimator.coef_.ravel())
print pd.DataFrame(d).to_string()

##############################################################################
# Automatic model selection: "CVBestSearchRefit"
from epac import CVBestSearchRefit, Methods, CV