Пример #1
0
def test_get_worker_instance(doc, cls):
    """Test creating worker instances from specification documents."""
    factory = WorkerFactory(config=doc)
    worker = factory.get('test')
    assert isinstance(worker, cls)
    # Run twice to account for the cached object.
    assert factory.get('test') == worker
Пример #2
0
def test_load_config_from_json_file():
    """Test loading worker factory configuration from a Json file."""
    worker = WorkerFactory.load_json(JSON_FILE).get('test')
    assert worker.variables['a'] == 0
    # Passing the file content directly to the object constructor should yield
    # the same result.
    worker = WorkerFactory(util.read_object(JSON_FILE)).get('test')
    assert worker.variables['a'] == 0
Пример #3
0
def exec_workflow(
    steps: List[WorkflowStep], workers: WorkerFactory, rundir: str,
    result: RunResult
) -> RunResult:
    """Execute steps in a serial workflow.

    The workflow arguments are part of the execution context that is contained
    in the :class:`flowserv.controller.serial.workflow.result.RunResult`. The
    result object is used to maintain the results for executed workflow steps.

    Executes workflow steps in sequence. Terminates early if the execution
    of a workflow step returns a non-zero value. Uses the given worker
    factory to create workers for steps that are of class
    :class:`flowserv.model.workflow.step.ContainerStep`.

    Parameters
    ----------
    steps: list of flowserv.model.workflow.step.WorkflowStep
        Steps in the serial workflow that are executed in the given context.
    workers: flowserv.controller.worker.factory.WorkerFactory, default=None
        Factory for :class:`flowserv.model.workflow.step.ContainerStep` steps.
    rundir: str, default=None
        Working directory for all executed workflow steps.
    result: flowserv.controller.worker.result.RunResult
        Collector for results from executed workflow steps. Contains the context
        within which the workflow is executed.

    Returns
    -------
    flowserv.controller.worker.result.RunResult
    """
    for step in steps:
        if step.is_function_step():
            r = exec_func(step=step, context=result.context, rundir=rundir)
        else:
            worker = workers.get(step.image)
            r = worker.exec(
                step=step,
                arguments=result.context,
                rundir=rundir
            )
        result.add(r)
        # Terminate if the step execution was not successful.
        if r.returncode != 0:
            break
    return result
Пример #4
0
    def run(self,
            arguments: Dict,
            workers: Optional[WorkerFactory] = None,
            rundir: Optional[str] = None) -> RunResult:
        """Execute workflow for the given set of input arguments.

        Executes workflow steps in sequence. Terminates early if the execution
        of a workflow step returns a non-zero value. Uses the given worker
        factory to create workers for steps that are of class
        :class:`flowserv.model.workflow.step.ContainerStep`.

        Collects results for all executed steps and returns them in the
        :class:`flowserv.controller.serial.workflow.result.RunResult`.

        Parameters
        ----------
        arguments: dict
            User-provided arguments for the workflow run.
        workers: flowserv.controller.worker.factory.WorkerFactory, default=None
            Factory for :class:`flowserv.model.workflow.step.ContainerStep`
            steps. Uses the default worker for all container steps if None.
        rundir: str, default=None
            Working directory for all executed workflow steps. Uses the current
            working directory if None.

        Returns
        -------
        flowserv.controller.worker.result.RunResult
        """
        # Use current working directory if run directory is None.
        rundir = rundir if rundir else os.getcwd()
        # Use default worker for all container steps if no factory is given.
        workers = workers if workers else WorkerFactory()
        # Execute the workflow and return the run result that contains the
        # results of the executed steps.
        return exec_workflow(steps=self.steps,
                             workers=workers,
                             rundir=rundir,
                             result=RunResult(arguments=arguments))
Пример #5
0
    def __init__(self,
                 steps: Optional[List[WorkflowStep]] = None,
                 parameters: Optional[List[Parameter]] = None,
                 workers: Optional[WorkerFactory] = None):
        """Initialize the object properties.

        All properties are optional and can be initialized via different methods
        of the workflow instance.

        Parameters
        ----------
        steps: list of flowserv.model.workflow.step.WorkflowStep, default=None
            Optional sequence of steps in the serial workflow.
        parameters: list of flowserv.model.parameter.base.Parameter, default=None
            Optional list of workflow template parameters.
        workers: flowserv.controller.worker.factory.WorkerFactory
            Factory for :class:`flowserv.controller.worker.base.ContainerStep`
            objects that are used to execute individual
            :class:`flowserv.model.workflow.step.ContainerStep` instances in the
            workflow sequence.
        """
        self.steps = steps if steps is not None else list()
        self.parameters = ParameterIndex(parameters=parameters)
        self.workers = workers if workers is not None else WorkerFactory()
Пример #6
0
def test_eval_arg_func():
    """Test creating a worker factory from a dictionary that contains
    argument specifications that are callables.
    """
    doc = {
        'test': {
            'image': 'test',
            'worker': 'subprocess',
            'args': {
                'a': callme,
                'b': 1
            }
        },
        'dummy': {
            'image': 'dummy',
            'worker': {
                'className': 'a',
                'moduleName': 'b'
            }
        }
    }
    factory = WorkerFactory(config=doc, validate=True)
    assert factory.config['test']['args'] == {'a': 'called', 'b': 1}
    assert factory.config['dummy'].get('args') is None
Пример #7
0
    def exec_workflow(
            self,
            run: RunObject,
            template: WorkflowTemplate,
            arguments: Dict,
            config: Optional[Dict] = None) -> Tuple[WorkflowState, str]:
        """Initiate the execution of a given workflow template for a set of
        argument values. This will start a new process that executes a serial
        workflow asynchronously.

        The serial workflow engine executes workflows on the local machine and
        therefore uses the file system to store temporary run files. The path
        to the run folder is returned as the second value in the result tuple.
        The first value in the result tuple is the state of the workflow after
        the process is stated. If the workflow is executed asynchronously the
        state will be RUNNING. Otherwise, the run state should be an inactive
        state.

        The set of arguments is not further validated. It is assumed that the
        validation has been performed by the calling code (e.g., the run
        service manager).

        The optional configuration object can be used to override the worker
        configuration that was provided at object instantiation. Expects a
        dictionary with an element `workers` that contains a mapping of container
        identifier to a container worker configuration object.

        If the state of the run handle is not pending, an error is raised.

        Parameters
        ----------
        run: flowserv.model.base.RunObject
            Handle for the run that is being executed.
        template: flowserv.model.template.base.WorkflowTemplate
            Workflow template containing the parameterized specification and
            the parameter declarations.
        arguments: dict
            Dictionary of argument values for parameters in the template.
        config: dict, default=None
            Optional object to overwrite the worker configuration settings.

        Returns
        -------
        flowserv.model.workflow.state.WorkflowState, string

        Raises
        ------
        flowserv.error.DuplicateRunError
        """
        # Get the run state. Ensure that the run is in pending state
        if not run.is_pending():
            raise RuntimeError("invalid run state '{}'".format(run.state))
        state = run.state()
        rundir = os.path.join(self.runsdir, run.run_id)
        # Get the worker configuration.
        worker_config = self.worker_config if not config else config.get(
            'workers')
        # Get the source directory for static workflow files.
        sourcedir = self.fs.workflow_staticdir(run.workflow.workflow_id)
        # Get the list of workflow steps and the generated output files.
        steps, run_args, outputs = parser.parse_template(template=template,
                                                         arguments=arguments)
        try:
            # Copy template files to the run folder.
            self.fs.copy_folder(key=sourcedir, dst=rundir)
            # Store any given file arguments in the run folder.
            for key, para in template.parameters.items():
                if para.is_file() and key in arguments:
                    file = arguments[key]
                    file.source().store(os.path.join(rundir, file.target()))
            # Create top-level folder for all expected result files.
            util.create_directories(basedir=rundir, files=outputs)
            # Start a new process to run the workflow. Make sure to catch all
            # exceptions to set the run state properly
            state = state.start()
            if self.is_async:
                # Raise an error if the service manager is not given.
                if self.service is None:
                    raise ValueError('service manager not given')
                # Run steps asynchronously in a separate process
                pool = Pool(processes=1)
                task_callback_function = partial(callback_function,
                                                 lock=self.lock,
                                                 tasks=self.tasks,
                                                 service=self.service)
                with self.lock:
                    self.tasks[run.run_id] = (pool, state)
                pool.apply_async(run_workflow,
                                 args=(run.run_id, rundir, state, outputs,
                                       steps, run_args,
                                       WorkerFactory(config=worker_config)),
                                 callback=task_callback_function)
                return state, rundir
            else:
                # Run steps synchronously and block the controller until done
                _, _, state_dict = run_workflow(
                    run_id=run.run_id,
                    rundir=rundir,
                    state=state,
                    output_files=outputs,
                    steps=steps,
                    arguments=run_args,
                    workers=WorkerFactory(config=worker_config))
                return serialize.deserialize_state(state_dict), rundir
        except Exception as ex:
            # Set the workflow runinto an ERROR state
            logging.error(ex)
            return state.error(messages=util.stacktrace(ex)), rundir
Пример #8
0
def test_load_config_from_yaml_file():
    """Test loading worker factory configuration from a Yaml file."""
    worker = WorkerFactory.load_yaml(YAML_FILE).get('test')
    assert worker.variables['a'] == 0
Пример #9
0
def test_load_config_from_file():
    """Test loading worker factory configuration from a file."""
    # Passing the file content directly to the object constructor should work
    # the same as using the static load method.
    worker = WorkerFactory(util.read_object(JSON_FILE)).get('test')
    assert worker.variables['a'] == 0
Пример #10
0
def test_invalid_config(config):
    """Test errors when creating factory from invalid worker specifications."""
    # No error when validate is False
    WorkerFactory(config=config)
    with pytest.raises(ValidationError):
        WorkerFactory(config=config, validate=True)
Пример #11
0
def test_init_empty(validate):
    """Test creating a worker factory from an empty dictionary."""
    factory = WorkerFactory(config=dict(), validate=validate)
    assert len(factory.config) == 0