def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # if example().__class__.__name__ == "WFExample1" or\ # example().__class__.__name__ == "WFExample2": # example = list_all_examples[0] wf = example().get_workflow() local_engine_wf = example().get_workflow() sfw_engine_wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=local_engine_wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine( tree_root=sfw_engine_wf, num_processes=self.n_cores, #resource_id="ed203246@gabriel", #login="******", remove_finished_wf=False, remove_local_tree=False) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(compare_two_node(wf, local_engine_wf)) self.assertTrue(compare_two_node(wf, sfw_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # if example().__class__.__name__ == "WFExample1" or\ # example().__class__.__name__ == "WFExample2": # example = list_all_examples[0] wf = example().get_workflow() local_engine_wf = example().get_workflow() sfw_engine_wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=local_engine_wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine(tree_root=sfw_engine_wf, num_processes=self.n_cores, #resource_id="ed203246@gabriel", #login="******", remove_finished_wf=False, remove_local_tree=False ) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(compare_two_node(wf, local_engine_wf)) self.assertTrue(compare_two_node(wf, sfw_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
def test_examples_local_engine(self): list_all_examples = get_wf_example_classes() for example in list_all_examples: # example = list_all_examples[0] wf = example().get_workflow() wf.run(X=self.X, y=self.y) local_engine = LocalEngine(tree_root=wf, num_processes=self.n_cores) local_engine_wf = local_engine.run(X=self.X, y=self.y) sfw_engine = SomaWorkflowEngine( tree_root=wf, num_processes=self.n_cores) sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y) self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf)) self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
def test_prev_state_methods(self): ## 1) Build dataset ## ================================================ X, y = datasets.make_classification(n_samples=5, n_features=20, n_informative=2) Xy = {"X": X, "y": y} methods = Methods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]]) methods.run(**Xy) ps_methods = WarmStartMethods(*[TOY_CLF(v_lambda=v_lambda) for v_lambda in [2, 1]]) ps_methods.run(**Xy) self.assertTrue(compare_two_node(methods, ps_methods)) self.assertTrue(comp_2wf_reduce_res(methods, ps_methods))
def test_cv_best_search_refit_parallel(self): n_folds = 2 n_folds_nested = 3 k_values = [1, 2] C_values = [1, 2] n_samples = 500 n_features = 10000 n_cores = 2 X, y = datasets.make_classification(n_samples=n_samples, n_features=n_features, n_informative=5) # epac workflow for paralle computing pipelines = Methods(*[ Pipe(SelectKBest( k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values])) for k in k_values ]) pipeline = CVBestSearchRefitParallel(pipelines, n_folds=n_folds_nested) wf = CV(pipeline, n_folds=n_folds) sfw_engine = SomaWorkflowEngine(tree_root=wf, num_processes=n_cores, remove_finished_wf=False, remove_local_tree=False) sfw_engine_wf = sfw_engine.run(X=X, y=y) # epac workflow for normal node computing pipelines2 = Methods(*[ Pipe(SelectKBest( k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values])) for k in k_values ]) pipeline2 = CVBestSearchRefitParallel(pipelines2, n_folds=n_folds_nested) wf2 = CV(pipeline2, n_folds=n_folds) wf2.run(X=X, y=y) self.assertTrue(compare_two_node(sfw_engine_wf, wf2)) self.assertTrue(comp_2wf_reduce_res(sfw_engine_wf, wf2))
def test_cv_best_search_refit_parallel(self): n_folds = 2 n_folds_nested = 3 k_values = [1, 2] C_values = [1, 2] n_samples = 500 n_features = 10000 n_cores = 2 X, y = datasets.make_classification(n_samples=n_samples, n_features=n_features, n_informative=5) # epac workflow for paralle computing pipelines = Methods(*[Pipe(SelectKBest(k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values])) for k in k_values]) pipeline = CVBestSearchRefitParallel(pipelines, n_folds=n_folds_nested) wf = CV(pipeline, n_folds=n_folds) sfw_engine = SomaWorkflowEngine(tree_root=wf, num_processes=n_cores, remove_finished_wf=False, remove_local_tree=False) sfw_engine_wf = sfw_engine.run(X=X, y=y) # epac workflow for normal node computing pipelines2 = Methods(*[Pipe(SelectKBest(k=k), Methods(*[SVC(kernel="linear", C=C) for C in C_values])) for k in k_values]) pipeline2 = CVBestSearchRefitParallel(pipelines2, n_folds=n_folds_nested) wf2 = CV(pipeline2, n_folds=n_folds) wf2.run(X=X, y=y) self.assertTrue(compare_two_node(sfw_engine_wf, wf2)) self.assertTrue(comp_2wf_reduce_res(sfw_engine_wf, wf2))