def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_local.walk_leaves():
            print leaf.get_key()
        for leaf in two_svc_swf.walk_leaves():
            print leaf.get_key()

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf,
                                        num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
    def test_mysvc_reducer(self):
        ## 1) Build dataset
        ## ===================================================================
        X, y = datasets.make_classification(n_samples=12,
                                            n_features=10,
                                            n_informative=2,
                                            random_state=1)

        ## 2) run with Methods
        ## ===================================================================
        my_svc1 = MySVC(C=1.0)
        my_svc2 = MySVC(C=2.0)

        two_svc_single = Methods(my_svc1, my_svc2)
        two_svc_local = Methods(my_svc1, my_svc2)
        two_svc_swf = Methods(my_svc1, my_svc2)

        two_svc_single.reducer = MyReducer()
        two_svc_local.reducer = MyReducer()
        two_svc_swf.reducer = MyReducer()

        for leaf in two_svc_single.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_local.walk_leaves():
            print(leaf.get_key())
        for leaf in two_svc_swf.walk_leaves():
            print(leaf.get_key())

        # top-down process to call transform
        two_svc_single.run(X=X, y=y)
        # buttom-up process to compute scores
        res_single = two_svc_single.reduce()

        ### You can get below results:
        ### ==================================================================
        ### [{'MySVC(C=1.0)': array([ 1.,  1.])}, {'MySVC(C=2.0)': array([ 1.,  1.])}]

        ### 3) Run using local multi-processes
        ### ==================================================================
        from epac.map_reduce.engine import LocalEngine
        local_engine = LocalEngine(two_svc_local, num_processes=2)
        two_svc_local = local_engine.run(**dict(X=X, y=y))
        res_local = two_svc_local.reduce()

        ### 4) Run using soma-workflow
        ### ==================================================================
        from epac.map_reduce.engine import SomaWorkflowEngine
        sfw_engine = SomaWorkflowEngine(tree_root=two_svc_swf, num_processes=2)
        two_svc_swf = sfw_engine.run(**dict(X=X, y=y))
        res_swf = two_svc_swf.reduce()
        if not repr(res_swf) == repr(res_local):
            raise ValueError("Cannot dump class definition")
        if not repr(res_swf) == repr(res_single):
            raise ValueError("Cannot dump class definition")
Пример #3
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features),
                              add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[
        Pipe(SelectKBest(k=k), SVC(kernel="linear", C=C)) for C in C_values
        for k in k_values
    ])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start
    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    local_engine = LocalEngine(tree_root=wf, num_processes=options.n_cores)
    wf = local_engine.run(X=X, y=y)
    print "Time ellapsed, fit predict:", time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:", time.time() - time_reduce
Пример #4
0
def do_all(options):
    if options.k_max != "auto":
        k_values = range_log2(np.minimum(int(options.k_max),
                                         options.n_features), add_n=True)
    else:
        k_values = range_log2(options.n_features, add_n=True)
    C_values = [1, 10]
    random_state = 0
    #print options
    #sys.exit(0)
    if options.trace:
        from epac import conf
        conf.TRACE_TOPDOWN = True

    ## 1) Build dataset
    ## ================
    X, y = datasets.make_classification(n_samples=options.n_samples,
                                        n_features=options.n_features,
                                        n_informative=options.n_informative)

    ## 2) Build Workflow
    ## =================
    time_start = time.time()
    ## CV + Grid search of a pipeline with a nested grid search
    cls = Methods(*[Pipe(SelectKBest(k=k),
                         SVC(kernel="linear", C=C))
                    for C in C_values
                    for k in k_values])
    pipeline = CVBestSearchRefit(cls,
                                 n_folds=options.n_folds_nested,
                                 random_state=random_state)
    wf = Perms(CV(pipeline, n_folds=options.n_folds),
               n_perms=options.n_perms,
               permute="y",
               random_state=random_state)
    print "Time ellapsed, tree construction:", time.time() - time_start
    ## 3) Run Workflow
    ## ===============
    time_fit_predict = time.time()
    local_engine = LocalEngine(tree_root=wf, num_processes=options.n_cores)
    wf = local_engine.run(X=X, y=y)
    print "Time ellapsed, fit predict:",  time.time() - time_fit_predict
    time_reduce = time.time()

    ## 4) Reduce Workflow
    ## ==================
    print wf.reduce()
    print "Time ellapsed, reduce:",   time.time() - time_reduce
Пример #5
0
        from sklearn.svm import SVC
        svc = SVC(C=self.C)
        svc.fit(X, y)
        # "transform" should return a dictionary: ie.: a result, keys are abritrary
        return {"y/pred": svc.predict(X), "y/true": y}

best_svc_tranform = Methods(SVMTransform(C=1.0), SVMTransform(C=2.0))
cv = CV(best_svc_tranform, cv_key="y", cv_type="stratified", n_folds=2,
        reducer=None)
cv.run(X=X, y=y)  # top-down process to call transform
cv.reduce()       # buttom-up process


#
## 4) Run using local multi-processes
## ==================================

from epac.map_reduce.engine import LocalEngine
local_engine = LocalEngine(best_svc, num_processes=2)
best_svc = local_engine.run(**dict(X=X, y=y))
best_svc_tranform.reduce()

## 5) Run using soma-workflow
## ==========================

from epac.map_reduce.engine import SomaWorkflowEngine
sfw_engine = SomaWorkflowEngine(tree_root=best_svc,
                                num_processes=2)
best_svc = sfw_engine.run(**dict(X=X, y=y))
best_svc.reduce()
Пример #6
0
#anova_svm_all = Methods(anova_svm, anova_svm_cv)
cv = CV(anova_svms_auto, n_folds=n_folds)
time_fit_predict = time.time()
cv.run(X=X, y=y)
print time.time() - time_fit_predict
print cv.reduce()

# Re-fit on all data. Warning: biased !!!
anova_svms_auto.run(X=X, y=y)

print anova_svms_auto.best_params
print "Features selected by univariate filter"
selected_features = imaging_variables[anova_svms_auto.refited.estimator.get_support()]

print "Features selected weights"
d = dict(var = selected_features,
svm_weights_l1 = anova_svms_auto.refited.children[0].children[0].estimator.coef_.ravel())
print pd.DataFrame(d).to_string()


##############################################################################
## Use multi-process
from epac.map_reduce.engine import LocalEngine
time_fit_predict = time.time()
local_engine = LocalEngine(tree_root=cv, num_processes=4)
wf = local_engine.run(X=X, y=y)
print time.time() - time_fit_predict

print wf.reduce()