def _submit_graph(self, pyfiles, dependencies, nodes): jobs = [] soma_deps = [] for idx, fname in enumerate(pyfiles): name = os.path.splitext(os.path.split(fname)[1])[0] jobs.append(Job(command=[sys.executable, fname], name=name)) for key, values in list(dependencies.items()): for val in values: soma_deps.append((jobs[val], jobs[key])) wf = Workflow(jobs, soma_deps) logger.info('serializing workflow') Helper.serialize('workflow', wf) controller = WorkflowController() logger.info('submitting workflow') wf_id = controller.submit_workflow(wf) Helper.wait_workflow(wf_id, controller)
def export_to_gui(self, soma_workflow_dirpath, **Xy): ''' Example ------- see the directory of "examples/run_somaworkflow_gui.py" in epac ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError if not os.path.exists(soma_workflow_dirpath): os.makedirs(soma_workflow_dirpath) tmp_work_dir_path = soma_workflow_dirpath cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez( os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [ Job(command=[ u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile) ], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list ] soma_workflow = Workflow(jobs=jobs) if soma_workflow_dirpath and soma_workflow_dirpath != "": out_soma_workflow_file = os.path.join( soma_workflow_dirpath, SomaWorkflowEngine.open_me_by_soma_workflow_gui) Helper.serialize(out_soma_workflow_file, soma_workflow) os.chdir(cur_work_dir)
def run(self, **Xy): '''Run soma-workflow without gui Example ------- >>> from sklearn import datasets >>> from epac.map_reduce.engine import SomaWorkflowEngine >>> from epac.tests.wfexamples2test import WFExample2 >>> ## Build dataset >>> ## ============= >>> X, y = datasets.make_classification(n_samples=10, ... n_features=20, ... n_informative=5, ... random_state=1) >>> Xy = {'X':X, 'y':y} >>> ## Build epac tree >>> ## =============== >>> tree_root_node = WFExample2().get_workflow() >>> ## Build SomaWorkflowEngine and run function for each node >>> ## ======================================================= >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node, ... function_name="trasform", ... num_processes=3) >>> tree_root_node = sfw_engine.run(**Xy) >>> ## Run reduce process >>> ## ================== >>> tree_root_node.reduce() ResultSet( [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}, {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0. 0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0. 0.], 'y/test/score_precision/pval': [ 0. 0.], 'y/test/score_precision': [ 0.8 0.8], 'y/test/score_recall': [ 0.8 0.8], 'y/test/score_f1': [ 0.8 0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}]) ''' try: from soma.workflow.client import Job, Workflow from soma.workflow.client import Helper, FileTransfer from soma.workflow.client import WorkflowController except ImportError: errmsg = "No soma-workflow is found. "\ "Please verify your soma-worklow"\ "on your computer (e.g. PYTHONPATH) \n" sys.stderr.write(errmsg) sys.stdout.write(errmsg) raise NoSomaWFError tmp_work_dir_path = tempfile.mkdtemp() cur_work_dir = os.getcwd() os.chdir(tmp_work_dir_path) ft_working_directory = FileTransfer(is_input=True, client_path=tmp_work_dir_path, name="working directory") ## Save the database and tree to working directory ## =============================================== np.savez( os.path.join(tmp_work_dir_path, SomaWorkflowEngine.dataset_relative_path), **Xy) store = StoreFs(dirpath=os.path.join( tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path)) self.tree_root.save_tree(store=store) ## Subtree job allocation on disk ## ============================== node_input = NodesInput(self.tree_root.get_key()) split_node_input = SplitNodesInput(self.tree_root, num_processes=self.num_processes) nodesinput_list = split_node_input.split(node_input) keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list) ## Build soma-workflow ## =================== jobs = [ Job(command=[ u"epac_mapper", u'--datasets', '"%s"' % (SomaWorkflowEngine.dataset_relative_path), u'--keysfile', '"%s"' % (nodesfile) ], referenced_input_files=[ft_working_directory], referenced_output_files=[ft_working_directory], name="epac_job_key=%s" % (nodesfile), working_directory=ft_working_directory) for nodesfile in keysfile_list ] soma_workflow = Workflow(jobs=jobs) if not self.resource_id or self.resource_id == "": self.resource_id = socket.gethostname() controller = WorkflowController(self.resource_id, self.login, self.pw) ## run soma-workflow ## ================= wf_id = controller.submit_workflow(workflow=soma_workflow, name="epac workflow") Helper.transfer_input_files(wf_id, controller) Helper.wait_workflow(wf_id, controller) Helper.transfer_output_files(wf_id, controller) controller.delete_workflow(wf_id) ## read result tree ## ================ self.tree_root = store.load() os.chdir(cur_work_dir) if os.path.isdir(tmp_work_dir_path): shutil.rmtree(tmp_work_dir_path) return self.tree_root