def exec_workflow( self, run: RunObject, template: WorkflowTemplate, arguments: Dict, staticfs: StorageVolume, config: Optional[Dict] = None ) -> Tuple[WorkflowState, StorageVolume]: """Initiate the execution of a given workflow template for a set of argument values. This will start a new process that executes a serial workflow asynchronously. Returns the state of the workflow after the process is stated (the state will therefore be RUNNING). The set of arguments is not further validated. It is assumed that the validation has been performed by the calling code (e.g., the run service manager). If the state of the run handle is not pending, an error is raised. Parameters ---------- run: flowserv.model.base.RunObject Handle for the run that is being executed. template: flowserv.model.template.base.WorkflowTemplate Workflow template containing the parameterized specification and the parameter declarations. arguments: dict Dictionary of argument values for parameters in the template. staticfs: flowserv.volume.base.StorageVolume Storage volume that contains the static files from the workflow template. config: dict, default=None Optional configuration settings are currently ignored. Included for API completeness. Returns ------- flowserv.model.workflow.state.WorkflowState, flowserv.volume.base.StorageVolume """ # Get the run state. Ensure that the run is in pending state. if not run.is_pending(): raise RuntimeError("invalid run state '{}'".format(run.state())) try: # Create a workflow on the remote engine. This will also upload all # necessary files to the remote engine. Workflow execution may not # be started (indicated by the state property of the returned # handle for the remote workflow). workflow = self.client.create_workflow( run=run, template=template, arguments=arguments, staticfs=staticfs ) workflow_id = workflow.workflow_id # Run the workflow. Depending on the values of the is_async flag # the process will either block execution while monitoring the # workflow state or not. if self.is_async: self.tasks[run.run_id] = workflow_id # Start monitor tread for asynchronous monitoring. monitor.WorkflowMonitor( workflow=workflow, poll_interval=self.poll_interval, service=self.service, tasks=self.tasks ).start() return workflow.state, workflow.runstore else: # Run workflow synchronously. This will lock the calling thread # while waiting (i.e., polling the remote engine) for the # workflow execution to finish. state = monitor.monitor_workflow( workflow=workflow, poll_interval=self.poll_interval ) return state, workflow.runstore except Exception as ex: # Set the workflow runinto an ERROR state logging.error(ex, exc_info=True) strace = util.stacktrace(ex) logging.debug('\n'.join(strace)) return run.state().error(messages=strace), None
def exec_workflow( self, run: RunObject, template: WorkflowTemplate, arguments: Dict, staticfs: StorageVolume, config: Optional[Dict] = None ) -> Tuple[WorkflowState, StorageVolume]: """Initiate the execution of a given workflow template for a set of argument values. This will start a new process that executes a serial workflow asynchronously. The serial workflow engine executes workflows on the local machine and therefore uses the file system to store temporary run files. The path to the run folder is returned as the second value in the result tuple. The first value in the result tuple is the state of the workflow after the process is stated. If the workflow is executed asynchronously the state will be RUNNING. Otherwise, the run state should be an inactive state. The set of arguments is not further validated. It is assumed that the validation has been performed by the calling code (e.g., the run service manager). The optional configuration object can be used to override the worker configuration that was provided at object instantiation. Expects a dictionary with an element `workers` that contains a mapping of container identifier to a container worker configuration object. If the state of the run handle is not pending, an error is raised. Parameters ---------- run: flowserv.model.base.RunObject Handle for the run that is being executed. template: flowserv.model.template.base.WorkflowTemplate Workflow template containing the parameterized specification and the parameter declarations. arguments: dict Dictionary of argument values for parameters in the template. staticfs: flowserv.volume.base.StorageVolume Storage volume that contains the static files from the workflow template. config: dict, default=None Optional object to overwrite the worker configuration settings. Returns ------- flowserv.model.workflow.state.WorkflowState, flowserv.volume.base.StorageVolume """ # Get the run state. Raise an error if the run is not in pending state. if not run.is_pending(): raise RuntimeError("invalid run state '{}'".format(run.state)) state = run.state() # Create configuration dictionary that merges the engine global # configuration with the workflow-specific one. run_config = self.config if self.config is not None else dict() if config: run_config.update(config) # Get the list of workflow steps, run arguments, and the list of output # files that the workflow is expected to generate. steps, run_args, outputs = parser.parse_template(template=template, arguments=arguments) # Create and prepare storage volume for run files. runstore = self.fs.get_store_for_folder(key=util.join( self.runsdir, run.run_id), identifier=DEFAULT_STORE) try: # Copy template files to the run folder. files = staticfs.copy(src=None, store=runstore) # Store any given file arguments and additional input files # that are required by actor parameters into the run folder. for key, para in template.parameters.items(): if para.is_file() and key in arguments: for key in arguments[key].copy(target=runstore): files.append(key) elif para.is_actor() and key in arguments: input_files = arguments[key].files for f in input_files if input_files else []: for key in f.copy(target=runstore): files.append(key) # Create factory objects for storage volumes. volumes = volume_manager(specs=run_config.get('volumes', []), runstore=runstore, runfiles=files) # Create factory for workers. Include mapping of workflow steps to # the worker that are responsible for their execution. workers = WorkerPool(workers=run_config.get('workers', []), managers={ doc['step']: doc['worker'] for doc in run_config.get('workflow', []) }) # Start a new process to run the workflow. Make sure to catch all # exceptions to set the run state properly. state = state.start() if self.is_async: # Run steps asynchronously in a separate process pool = Pool(processes=1) task_callback_function = partial(callback_function, lock=self.lock, tasks=self.tasks, service=self.service) with self.lock: self.tasks[run.run_id] = (pool, state) pool.apply_async(run_workflow, args=(run.run_id, state, outputs, steps, run_args, volumes, workers), callback=task_callback_function) return state, runstore else: # Run steps synchronously and block the controller until done _, _, state_dict = run_workflow(run_id=run.run_id, state=state, output_files=outputs, steps=steps, arguments=run_args, volumes=volumes, workers=workers) return serialize.deserialize_state(state_dict), runstore except Exception as ex: # Set the workflow run into an ERROR state logging.error(ex, exc_info=True) return state.error(messages=util.stacktrace(ex)), runstore
def exec_workflow( self, run: RunObject, template: WorkflowTemplate, arguments: Dict, config: Optional[Dict] = None) -> Tuple[WorkflowState, str]: """Initiate the execution of a given workflow template for a set of argument values. This will start a new process that executes a serial workflow asynchronously. The serial workflow engine executes workflows on the local machine and therefore uses the file system to store temporary run files. The path to the run folder is returned as the second value in the result tuple. The first value in the result tuple is the state of the workflow after the process is stated. If the workflow is executed asynchronously the state will be RUNNING. Otherwise, the run state should be an inactive state. The set of arguments is not further validated. It is assumed that the validation has been performed by the calling code (e.g., the run service manager). The optional configuration object can be used to override the worker configuration that was provided at object instantiation. Expects a dictionary with an element `workers` that contains a mapping of container identifier to a container worker configuration object. If the state of the run handle is not pending, an error is raised. Parameters ---------- run: flowserv.model.base.RunObject Handle for the run that is being executed. template: flowserv.model.template.base.WorkflowTemplate Workflow template containing the parameterized specification and the parameter declarations. arguments: dict Dictionary of argument values for parameters in the template. config: dict, default=None Optional object to overwrite the worker configuration settings. Returns ------- flowserv.model.workflow.state.WorkflowState, string Raises ------ flowserv.error.DuplicateRunError """ # Get the run state. Ensure that the run is in pending state if not run.is_pending(): raise RuntimeError("invalid run state '{}'".format(run.state)) state = run.state() rundir = os.path.join(self.runsdir, run.run_id) # Get the worker configuration. worker_config = self.worker_config if not config else config.get( 'workers') # Get the source directory for static workflow files. sourcedir = self.fs.workflow_staticdir(run.workflow.workflow_id) # Get the list of workflow steps and the generated output files. steps, run_args, outputs = parser.parse_template(template=template, arguments=arguments) try: # Copy template files to the run folder. self.fs.copy_folder(key=sourcedir, dst=rundir) # Store any given file arguments in the run folder. for key, para in template.parameters.items(): if para.is_file() and key in arguments: file = arguments[key] file.source().store(os.path.join(rundir, file.target())) # Create top-level folder for all expected result files. util.create_directories(basedir=rundir, files=outputs) # Start a new process to run the workflow. Make sure to catch all # exceptions to set the run state properly state = state.start() if self.is_async: # Raise an error if the service manager is not given. if self.service is None: raise ValueError('service manager not given') # Run steps asynchronously in a separate process pool = Pool(processes=1) task_callback_function = partial(callback_function, lock=self.lock, tasks=self.tasks, service=self.service) with self.lock: self.tasks[run.run_id] = (pool, state) pool.apply_async(run_workflow, args=(run.run_id, rundir, state, outputs, steps, run_args, WorkerFactory(config=worker_config)), callback=task_callback_function) return state, rundir else: # Run steps synchronously and block the controller until done _, _, state_dict = run_workflow( run_id=run.run_id, rundir=rundir, state=state, output_files=outputs, steps=steps, arguments=run_args, workers=WorkerFactory(config=worker_config)) return serialize.deserialize_state(state_dict), rundir except Exception as ex: # Set the workflow runinto an ERROR state logging.error(ex) return state.error(messages=util.stacktrace(ex)), rundir
def run_handle(self, run: RunObject, group: Optional[GroupObject] = None) -> Dict: """Get serialization for a run handle. The run handle extends the run descriptor with the run arguments, the parameter declaration taken from the workflow group handle (since it may differ from the parameter list of the workflow), and additional information associated with the run state. Parameters ---------- run: flowserv.model.base.RunObject Workflow run handle group: flowserv.model.base.GroupObject, default=None Workflow group handle. Missing for post-processing workflows Returns ------- dict """ doc = self.run_descriptor(run) # Add information about the run workflow and the run group doc[RUN_WORKFLOW] = run.workflow_id if run.group_id is not None: doc[RUN_GROUP] = run.group_id # Add run arguments doc[RUN_ARGUMENTS] = run.arguments # Add group specific parameters if group is not None: parameters = group.parameters.values() doc[RUN_PARAMETERS] = [p.to_dict() for p in parameters] # Add additional information from the run state if not run.is_pending(): doc[RUN_STARTED] = run.state().started_at if run.is_canceled() or run.is_error(): doc[RUN_FINISHED] = run.state().stopped_at doc[RUN_ERRORS] = run.state().messages elif run.is_success(): doc[RUN_FINISHED] = run.state().finished_at output_spec = run.outputs() # Serialize file resources. The default serialization contains the # file identifier and name. If an output specification is present # for the file the values for that specification will be added # to the serialization. files = list() for f in run.files: obj = {FILE_ID: f.file_id, FILE_NAME: f.name} if f.name in output_spec: fspec = output_spec[f.name] obj[FILE_NAME] = fspec.key if fspec.title is not None: obj[FILE_TITLE] = fspec.title if fspec.caption is not None: obj[FILE_CAPTION] = fspec.caption if fspec.widget is not None: obj[FILE_WIDGET] = fspec.widget if fspec.format is not None: obj[FILE_FORMAT] = fspec.format files.append(obj) doc[RUN_FILES] = files return doc