示例#1
0
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         #            if example().__class__.__name__ == "WFExample1" or\
         #                example().__class__.__name__ == "WFExample2":
         # example = list_all_examples[0]
         wf = example().get_workflow()
         local_engine_wf = example().get_workflow()
         sfw_engine_wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=local_engine_wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
             tree_root=sfw_engine_wf,
             num_processes=self.n_cores,
             #resource_id="ed203246@gabriel",
             #login="******",
             remove_finished_wf=False,
             remove_local_tree=False)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(compare_two_node(wf, local_engine_wf))
         self.assertTrue(compare_two_node(wf, sfw_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
 def test_examples_local_engine(self):
     list_all_examples = get_wf_example_classes()
     for example in list_all_examples:
         # example = list_all_examples[0]
         wf = example().get_workflow()
         wf.run(X=self.X, y=self.y)
         local_engine = LocalEngine(tree_root=wf,
                                    num_processes=self.n_cores)
         local_engine_wf = local_engine.run(X=self.X, y=self.y)
         sfw_engine = SomaWorkflowEngine(
                 tree_root=wf,
                 num_processes=self.n_cores)
         sfw_engine_wf = sfw_engine.run(X=self.X, y=self.y)
         self.assertTrue(comp_2wf_reduce_res(wf, local_engine_wf))
         self.assertTrue(comp_2wf_reduce_res(wf, sfw_engine_wf))
示例#3
0
"""

from sklearn import datasets

X, y = datasets.make_classification(n_samples=500,
                                    n_features=200000,
                                    n_informative=2,
                                    random_state=1)

Xy = dict(X=X, y=y)
## 2) Building workflow
## =======================================================
print " -> Pt2 : X and y created, building workflow"
from sklearn import svm, cross_validation
#kfold = cross_validation.KFold(n=len(X), n_folds=3)
#svc = svm.SVC(C=1, kernel='linear')
#print [svc.fit(X[train], y[train]).score(X[test], y[test]) for train, test in kfold]
from epac import CV, Methods
cv_svm_local = CV(Methods(*[svm.SVC(kernel="linear"),
                            svm.SVC(kernel="rbf")]),
                  n_folds=3)
print " -> Pt3 : Workflow built, defining local engine"
cv_svm = None
n_proc = 2
# Running on the local machine
from epac import LocalEngine
local_engine = LocalEngine(cv_svm_local, num_processes=n_proc)
print " -> Pt4 : Running"
cv_svm = local_engine.run(**Xy)
print " -> Success with %i procs!" % n_proc
示例#4
0
    def test_memmapping(self):
        ## 1) Building dataset
        ## ============================================================
        if self.memmap:
            # If the proc is 1, always generate the matrix
            # Otherwise, load it if it exists, or create it if it doesn't
            writing_mode = (self.n_proc == 1)
            X = create_mmat(self.n_samples,
                            self.n_features,
                            dir=self.directory,
                            writing_mode=writing_mode)
            y = create_array(self.n_samples, [0, 1],
                             dir=self.directory,
                             writing_mode=writing_mode)
            Xy = dict(X=X, y=y)
        else:
            X, y = datasets.make_classification(n_samples=self.n_samples,
                                                n_features=self.n_features,
                                                n_informative=2,
                                                random_state=1)
            Xy = dict(X=X, y=y)
        ## 2) Building workflow
        ## =======================================================
        from sklearn.svm import SVC
        from epac import CV, Methods
        cv_svm_local = CV(Methods(*[SVC(
            kernel="linear"), SVC(kernel="rbf")]),
                          n_folds=3)

        cv_svm = None
        if self.is_swf:
            # Running on the cluster
            from epac import SomaWorkflowEngine
            mmap_mode = None
            if self.memmap:
                mmap_mode = "r+"
            swf_engine = SomaWorkflowEngine(
                cv_svm_local,
                num_processes=self.n_proc,
                resource_id="jl237561@gabriel",
                login="******",
                # remove_finished_wf=False,
                # remove_local_tree=False,
                mmap_mode=mmap_mode,
                queue="Global_long")

            cv_svm = swf_engine.run(**Xy)

            # Printing information about the jobs
            time.sleep(2)
            print('')
            sum_memory = 0
            max_time_cost = 0
            for job_info in swf_engine.engine_info:
                print(
                    "mem_cost = {0}, vmem_cost = {1}, time_cost = {2}".format(
                        job_info.mem_cost, job_info.vmem_cost,
                        job_info.time_cost))
                sum_memory += job_info.mem_cost
                if max_time_cost < job_info.time_cost:
                    max_time_cost = job_info.time_cost
            print("sum_memory = ", sum_memory)
            print("max_time_cost = ", max_time_cost)
        else:
            # Running on the local machine
            from epac import LocalEngine
            local_engine = LocalEngine(cv_svm_local, num_processes=self.n_proc)
            cv_svm = local_engine.run(**Xy)

        cv_svm_reduce = cv_svm.reduce()
        print("\n -> Reducing results")
        print(cv_svm_reduce)

        # Creating the directory to save results, if it doesn't exist
        dirname = 'tmp_save_tree/'
        if self.directory is None:
            directory = '/tmp'
        else:
            directory = self.directory
        if not os.path.isdir(directory):
            os.mkdir(directory)
        dirpath = os.path.join(directory, dirname)
        if not os.path.isdir(dirpath):
            os.mkdir(dirpath)

        if self.n_proc == 1:
            ## 4.1) Saving results on the disk for one process
            ## ===================================================
            store = StoreFs(dirpath=dirpath, clear=True)
            cv_svm.save_tree(store=store)

            with open(os.path.join(directory, "tmp_save_results"), 'w+') \
                    as filename:
                print(filename.name)
                pickle.dump(cv_svm_reduce, filename)

        else:
            ## 4.2) Loading the results for one process
            ## ===================================================
            try:
                store = StoreFs(dirpath=dirpath, clear=False)
                cv_svm_one_proc = store.load()

                with open(os.path.join(directory, "tmp_save_results"), 'r+') \
                        as filename:
                    cv_svm_reduce_one_proc = pickle.load(filename)

                ## 5.2) Comparing results to the results for one process
                ## ===================================================
                print("\nComparing %i proc with one proc" % self.n_proc)
                self.assertTrue(compare_two_node(cv_svm, cv_svm_one_proc))
                self.assertTrue(isequal(cv_svm_reduce, cv_svm_reduce_one_proc))
            except KeyError:
                print("Warning: ")
                print("No previous tree detected, no possible "\
                    "comparison of results")