示例#1
0
    def _submit_graph(self, pyfiles, dependencies, nodes):
        jobs = []
        soma_deps = []
        for idx, fname in enumerate(pyfiles):
            name = os.path.splitext(os.path.split(fname)[1])[0]
            jobs.append(Job(command=[sys.executable, fname], name=name))
        for key, values in list(dependencies.items()):
            for val in values:
                soma_deps.append((jobs[val], jobs[key]))

        wf = Workflow(jobs, soma_deps)
        logger.info('serializing workflow')
        Helper.serialize('workflow', wf)
        controller = WorkflowController()
        logger.info('submitting workflow')
        wf_id = controller.submit_workflow(wf)
        Helper.wait_workflow(wf_id, controller)
示例#2
0
    def run(self, **Xy):
        '''Run soma-workflow without gui

        Example
        -------

        >>> from sklearn import datasets
        >>> from epac.map_reduce.engine import SomaWorkflowEngine
        >>> from epac.tests.wfexamples2test import WFExample2

        >>> ## Build dataset
        >>> ## =============
        >>> X, y = datasets.make_classification(n_samples=10,
        ...                                     n_features=20,
        ...                                     n_informative=5,
        ...                                     random_state=1)
        >>> Xy = {'X':X, 'y':y}

        >>> ## Build epac tree
        >>> ## ===============
        >>> tree_root_node = WFExample2().get_workflow()

        >>> ## Build SomaWorkflowEngine and run function for each node
        >>> ## =======================================================
        >>> sfw_engine = SomaWorkflowEngine(tree_root=tree_root_node,
        ...                                 function_name="trasform",
        ...                                 num_processes=3)
        >>> tree_root_node = sfw_engine.run(**Xy)

        >>> ## Run reduce process
        >>> ## ==================
        >>> tree_root_node.reduce()
        ResultSet(
        [{'key': SelectKBest/SVC(C=1), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]},
         {'key': SelectKBest/SVC(C=3), 'y/test/score_recall_mean/pval': [ 0.], 'y/test/score_recall/pval': [ 0.  0.], 'y/test/score_accuray': 0.8, 'y/test/score_f1/pval': [ 0.  0.], 'y/test/score_precision/pval': [ 0.  0.], 'y/test/score_precision': [ 0.8  0.8], 'y/test/score_recall': [ 0.8  0.8], 'y/test/score_f1': [ 0.8  0.8], 'y/test/score_recall_mean': 0.8, 'y/test/score_accuray/pval': [ 0.]}])
        '''
        try:
            from soma.workflow.client import Job, Workflow
            from soma.workflow.client import Helper, FileTransfer
            from soma.workflow.client import WorkflowController
        except ImportError:
            errmsg = "No soma-workflow is found. "\
                "Please verify your soma-worklow"\
                "on your computer (e.g. PYTHONPATH) \n"
            sys.stderr.write(errmsg)
            sys.stdout.write(errmsg)
            raise NoSomaWFError
        tmp_work_dir_path = tempfile.mkdtemp()
        cur_work_dir = os.getcwd()
        os.chdir(tmp_work_dir_path)
        ft_working_directory = FileTransfer(is_input=True,
                                            client_path=tmp_work_dir_path,
                                            name="working directory")
        ## Save the database and tree to working directory
        ## ===============================================
        np.savez(
            os.path.join(tmp_work_dir_path,
                         SomaWorkflowEngine.dataset_relative_path), **Xy)
        store = StoreFs(dirpath=os.path.join(
            tmp_work_dir_path, SomaWorkflowEngine.tree_root_relative_path))
        self.tree_root.save_tree(store=store)

        ## Subtree job allocation on disk
        ## ==============================
        node_input = NodesInput(self.tree_root.get_key())
        split_node_input = SplitNodesInput(self.tree_root,
                                           num_processes=self.num_processes)
        nodesinput_list = split_node_input.split(node_input)
        keysfile_list = self._save_job_list(tmp_work_dir_path, nodesinput_list)
        ## Build soma-workflow
        ## ===================
        jobs = [
            Job(command=[
                u"epac_mapper", u'--datasets',
                '"%s"' % (SomaWorkflowEngine.dataset_relative_path),
                u'--keysfile',
                '"%s"' % (nodesfile)
            ],
                referenced_input_files=[ft_working_directory],
                referenced_output_files=[ft_working_directory],
                name="epac_job_key=%s" % (nodesfile),
                working_directory=ft_working_directory)
            for nodesfile in keysfile_list
        ]
        soma_workflow = Workflow(jobs=jobs)
        if not self.resource_id or self.resource_id == "":
            self.resource_id = socket.gethostname()
        controller = WorkflowController(self.resource_id, self.login, self.pw)
        ## run soma-workflow
        ## =================
        wf_id = controller.submit_workflow(workflow=soma_workflow,
                                           name="epac workflow")
        Helper.transfer_input_files(wf_id, controller)
        Helper.wait_workflow(wf_id, controller)
        Helper.transfer_output_files(wf_id, controller)
        controller.delete_workflow(wf_id)
        ## read result tree
        ## ================
        self.tree_root = store.load()
        os.chdir(cur_work_dir)
        if os.path.isdir(tmp_work_dir_path):
            shutil.rmtree(tmp_work_dir_path)
        return self.tree_root