def test_step_type(): """Test methods that distinguish different step types.""" # CodeStep step = CodeStep(identifier='test', func=my_add, arg='z', inputs=['a', 'b'], outputs=['x', 'y']) assert step.identifier == 'test' assert step.arg == 'z' assert step.inputs == ['a', 'b'] assert step.outputs == ['x', 'y'] assert step.is_code_step() assert not step.is_container_step() # ContainerStep step = ContainerStep(identifier='test', image='test', inputs=['a', 'b'], outputs=['x', 'y']) assert step.identifier == 'test' assert step.image == 'test' assert step.inputs == ['a', 'b'] assert step.outputs == ['x', 'y'] assert step.is_container_step() assert not step.is_code_step() # Empty inputs. step = CodeStep(identifier='test', func=my_add) assert step.inputs == [] assert step.outputs == [] step = ContainerStep(identifier='test', image='test') assert step.inputs == [] assert step.outputs == [] # Invalid step type. with pytest.raises(ValueError): WorkflowStep(identifier='test', step_type=-1)
def test_initialize_workflow(): """Test initializing the serial workflow with different argument combinations.""" # -- Initialize without arguments ----------------------------------------- wf = SerialWorkflow() assert len(wf.steps) == 0 assert len(wf.parameters) == 0 assert wf.workers is not None # -- Set workflow steps at initialization --------------------------------- wf = SerialWorkflow( steps=[ ContainerStep(identifier='s1', image='test_1'), ContainerStep(identifier='s2', image='test_2') ] ) assert len(wf.steps) == 2 assert [s.image for s in wf] == ['test_1', 'test_2'] assert wf.steps[0].image == 'test_1' assert wf.steps[1].image == 'test_2' assert len(wf.parameters) == 0 assert wf.workers is not None # -- Set template parameters at initialization ---------------------------- wf = SerialWorkflow(parameters=[String('a'), String('b')]) assert len(wf.steps) == 0 assert len(wf.parameters) == 2 assert 'a' in wf.parameters assert 'b' in wf.parameters assert wf.workers is not None # -- Error when initializing parameters with duplicate names -------------- with pytest.raises(err.InvalidTemplateError): SerialWorkflow(parameters=[String('a'), String('a')])
def test_run_successful_steps(): """Test successful execution of a workflow step with two commands.""" # Avoid error '/bin/sh: 1: python: not found interpreter = sys.executable commands = [ '{py} printenv.py TEST_ENV_1'.format(py=interpreter), '{py} printenv.py TEST_ENV_2'.format(py=interpreter) ] env = {'TEST_ENV_1': 'Hello', 'TEST_ENV_2': 'World'} step = ContainerStep(image='test', commands=commands) result = SubprocessWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 0 assert result.exception is None assert ' '.join([s.strip() for s in result.stdout]) == 'Hello World' step = ContainerStep(image='test', commands=commands)
def parse_template( template: WorkflowTemplate, arguments: Dict) -> Tuple[List[ContainerStep], Dict, List[str]]: """Parse a serial workflow template to extract workflow steps and output files. Expands template parameter references in the workflow argument specification and returns the modified argument list as part of the result. Parameters ---------- template: flowserv.model.template.base.WorkflowTemplate Template for a serial workflow. Returns ------- tuple of list of flowsert.controller.serial.workflow.step.ContainerStep, dict and list of string """ # Get the commands from the workflow specification. workflow_spec = template.workflow_spec steps = list() for step in workflow_spec.get('workflow', {}).get('specification', {}).get('steps', []): # Workflow steps may either be parameter references or dictionaries # with `image` and `commands` elements. script = None if tp.is_parameter(step): para = template.parameters[tp.get_name(step)] if para.name in arguments: script = para.cast(arguments[para.name]) else: script = ContainerStep(image=step.get('environment')) for cmd in step.get('commands', []): script.add(cmd) if script: steps.append(script) # Get the workflow arguments that are defined in the workflow template. # Expand template parameter references using the given argument set. run_args = workflow_spec.get('inputs', {}).get('parameters', {}) for key in run_args.keys(): run_args[key] = tp.expand_value(value=str(run_args[key]), arguments=arguments, parameters=template.parameters) # Get the list of output files from the workflow specification. At this # point we do not support references to template arguments or parameters. output_files = workflow_spec.get('outputs', {}).get('files', {}) # Return tuple of workflow steps and output file list. return steps, run_args, output_files
def cast(self, value: Union[List[str, Dict], Tuple[str, Dict]]) -> WorkflowStep: """Convert the given serialization into a workflow step. The given tuple contains the workflow step type identifier (str) and a type-specific serialization of the step properties. For values that represent a :class:`flowserv.model.workflow.step.ContainerStep` the dictionary elements are: `image`, `commands`, and `env`. The first two elements are mandatory. Parameters ---------- value: list or tuple with str and dict Pair of workflow step type identifier and type-specific values. Returns ------- flowserv.model.workflow.step.WorkflowStep """ if isinstance(value, ContainerStep): return value step_type, step_val = value if step_type == CONTAINER_STEP: try: return ContainerStep(image=step_val['image'], commands=step_val['commands'], env=step_val.get('env')) except KeyError as ex: raise err.InvalidArgumentError(str(ex)) raise err.InvalidArgumentError( "unknown workflow step type '{}'".format(step_type))
def test_get_worker_error(): """Test error when accessing worker with unknown identifier.""" step = ContainerStep(identifier='test', image='test') factory = WorkerPool(workers=[], managers={'test': 'test'}) with pytest.raises(err.UnknownObjectError): factory.get(step) # Manipulate the worker type to get an error for unknown type. doc = Code(identifier='test') doc['type'] = 'unknown' factory = WorkerPool(workers=[doc], managers={'test': 'test'}) with pytest.raises(ValueError): factory.get(step) # Manipulate the step type to get an error for unknown type. step.step_type = 'unknown' factory = WorkerPool(workers=[]) with pytest.raises(ValueError): factory.get(step)
def test_error_run_result(): """Test results of an erroneous workflow run.""" r = RunResult(arguments={}) r.add(ExecResult(step=ContainerStep(identifier='s1', image='test'), returncode=0)) assert r.exception is None assert r.returncode == 0 r.add(ExecResult(step=ContainerStep(identifier='s2', image='test'), returncode=1, stderr=['e1', 'e2'], exception=ValueError())) # noqa: E501 with pytest.raises(ValueError): r.raise_for_status() assert r.exception is not None assert r.returncode == 1 assert r.stdout == [] assert r.stderr == ['e1', 'e2'] r = RunResult(arguments={}) r.add(ExecResult(step=ContainerStep(identifier='s3', image='test'), returncode=1, stderr=['e1', 'e2'])) with pytest.raises(err.FlowservError): r.raise_for_status()
def Step( identifier: str, action: Dict, inputs: Optional[List[str]] = None, outputs: Optional[List[str]] = None ) -> WorkflowStep: """Create workflow step instance from dictionary serialization. The type of the generated workflow step will depend on the elements in the given dictionary serialization. Raises a ValueError if the given dictionary is not a valid serialization for a workflow step. Parameters ---------- identifier: string Unique step name (identifier). action: Dict Dictionary serialization for the workflow step. inputs: list of string, default=None List of files that are required by the workflow step as inputs. outputs: list of string, default=None List of files that are generated by the workflow step as outputs. Returns ------- flowserv.model.workflow.step.WorkflowStep """ if 'environment' in action and 'commands' in action: # If the dictionary contains `environment` and `commands` the result # is a container step. return ContainerStep( identifier=identifier, image=action.get('environment'), commands=action.get('commands', []), inputs=inputs, outputs=outputs ) elif 'func' in action: return CodeStep( identifier=identifier, func=util.import_obj(action['func']), arg=action.get('arg'), varnames=parse_varnames(action=action), inputs=inputs, outputs=outputs ) elif 'notebook' in action: return NotebookStep( identifier=identifier, notebook=action['notebook'], output=action.get('output'), params=action.get('params'), requirements=action.get('requirements'), varnames=parse_varnames(action=action), inputs=inputs, outputs=outputs ) raise ValueError(f"invalid action specification '{action}'")
def exec(self, step: ContainerStep, context: Dict, store: FileSystemStorage) -> ExecResult: """Execute a given list of commands that are represented by template strings. Substitutes parameter and template placeholder occurrences first. Then calls the implementation-specific run method to execute the individual commands. Note that the container worker expects a file system storage volume. Parameters ---------- step: flowserv.controller.serial.workflow.ContainerStep Step in a serial workflow. context: dict Dictionary of argument values for parameters in the template. store: flowserv.volume.fs.FileSystemStorage Storage volume that contains the workflow run files. Returns ------- flowserv.controller.serial.workflow.result.ExecResult """ # Create a modified container step where all commands are expended so # that they do not contain references to variables and template parameters # any more. expanded_step = ContainerStep(identifier=step.identifier, image=step.image, env=step.env) for cmd in step.commands: # Generate mapping for template substitution. Include a mapping of # placeholder names to themselves. args = {p: p for p in tp.placeholders(cmd)} args.update(context) # Update arguments with fixed variables. args.update(self.variables) expanded_step.add(Template(cmd).substitute(args).strip()) # Create mapping for environment variables. environment = dict(self.env) environment.update(step.env) environment = environment if environment else None return self.run(step=expanded_step, env=environment, rundir=store.basedir)
def test_run_steps_with_subprocess_error(mock_subprocess): """Test execution of a workflow step that fails to run.""" commands = ['nothing to do'] step = ContainerStep(image='test', commands=commands) result = SubprocessWorker().run(step=step, env=dict(), rundir=RUN_DIR) assert result.returncode == 1 assert result.exception is not None assert result.stdout == [] assert 'cannot run' in ''.join(result.stderr)
def test_fixed_variables(tmpdir): """Test proper behavior for setting fixed variables in the worker environment.""" step = ContainerStep(identifier='test', image='test', commands=['${python} $run $me']) arguments = {'run': 'my_model.py', 'me': 1} engine = ContainerTestEngine(variables=dict()) engine.exec(step=step, context=arguments, store=FileSystemStorage(tmpdir)) assert engine.commands == ['python my_model.py 1'] engine = ContainerTestEngine(variables={'run': 'static.py'}) engine.exec(step=step, context=arguments, store=FileSystemStorage(tmpdir)) assert engine.commands == ['python static.py 1']
def test_run_successful_steps(mock_docker): """Test successful execution of a workflow step with two commands.""" commands = ['TEST_ENV_1', 'TEST_ENV_2'] env = {'TEST_ENV_1': 'Hello', 'TEST_ENV_2': 'World'} step = ContainerStep(image='test', commands=commands) result = DockerWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 0 assert result.exception is None assert result.stdout == ['Hello', 'World'] assert result.stderr == []
def test_fixed_variables(): """Test proper behavior for setting fixed variables in the worker environment.""" step = ContainerStep(image='test', commands=['${python} $run $me']) arguments = {'run': 'my_model.py', 'me': 1} engine = ContainerTestEngine(variables=dict()) engine.exec(step=step, arguments=arguments, rundir='/dev/null') assert engine.commands == ['python my_model.py 1'] engine = ContainerTestEngine(variables={'run': 'static.py'}) engine.exec(step=step, arguments=arguments, rundir='/dev/null') assert engine.commands == ['python static.py 1']
def test_successful_run_result(): """Test results of a successful workflow run.""" r = RunResult(arguments={'a': 1}) r.add(ExecResult(step=ContainerStep(identifier='s1', image='test1'), returncode=0, stdout=['o1'])) r.context['a'] = 2 r.add(ExecResult(step=ContainerStep(identifier='s2', image='test2'), returncode=0, stdout=['o2', 'o3'])) r.context['b'] = 1 assert r.exception is None assert r.returncode == 0 r.raise_for_status() assert r.stdout == ['o1', 'o2', 'o3'] assert r.stderr == [] assert r.get('a') == 2 assert r.get('b') == 1 result = r.steps[0] assert result.step.image == 'test1' assert result.stdout == ['o1'] result = r.steps[1] assert result.step.image == 'test2' assert result.stdout == ['o2', 'o3']
def test_run_steps_with_error(mock_docker): """Test execution of a workflow step where one of the commands raises an error. """ # Run with exception raised. commands = ['TEST_ENV_1', 'error', 'TEST_ENV_2'] env = {'TEST_ENV_1': ('Hello', 0), 'TEST_ENV_2': ('World', 0)} step = ContainerStep(identifier='test', image='test', commands=commands) result = DockerWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 1 assert result.exception is not None assert result.stdout == ['Hello'] assert 'there was an error' in ''.join(result.stderr) # Run with command exit code being '1'. commands = ['TEST_ENV_1', 'TEST_ENV_2'] env = {'TEST_ENV_1': ('', 0), 'TEST_ENV_2': ('World', 1)} step = ContainerStep(identifier='test', image='test', commands=commands) result = DockerWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 1 assert result.exception is None assert result.stdout == ['World']
def test_run_steps_with_error(mock_docker): """Test execution of a workflow step where one of the commands raises an error. """ commands = ['TEST_ENV_1', 'error', 'TEST_ENV_2'] env = {'TEST_ENV_1': 'Hello', 'TEST_ENV_2': 'World'} step = ContainerStep(image='test', commands=commands) result = DockerWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 1 assert result.exception is not None assert result.stdout == ['Hello'] assert 'there was an error' in ''.join(result.stderr)
def test_step_type(): """Test methods that distinguish different step types.""" # FunctionStep step = FunctionStep(func=my_add, output='z') assert step.is_function_step() assert not step.is_container_step() # ContainerStep step = ContainerStep(image='test') assert step.is_container_step() assert not step.is_function_step() # Invalid step type. with pytest.raises(ValueError): WorkflowStep(step_type=-1)
def test_run_successful_steps(): """Test successful execution of a workflow step with two commands.""" # Set SYSTEMROOT environment variable. systemroot = os.environ.get('SYSTEMROOT') if not systemroot: os.environ['SYSTEMROOT'] = 'SYSTEMROOT' # Avoid error '/bin/sh: 1: python: not found interpreter = sys.executable commands = [ '{py} printenv.py TEST_ENV_1'.format(py=interpreter), '{py} printenv.py TEST_ENV_2'.format(py=interpreter) ] env = {'TEST_ENV_1': 'Hello', 'TEST_ENV_2': 'World'} step = ContainerStep(identifier='test', image='test', commands=commands) result = SubprocessWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 0 assert result.exception is None assert ' '.join([s.strip() for s in result.stdout]) == 'Hello World' step = ContainerStep(identifier='test', image='test', commands=commands) if systemroot: os.environ['SYSTEMROOT'] = systemroot else: del os.environ['SYSTEMROOT']
def test_run_successful_steps_splitenv(): """Test successful execution of a workflow step when dividing environment variables between worker and step. """ # Avoid error '/bin/sh: 1: python: not found interpreter = sys.executable commands = [ '{py} printenv.py TEST_ENV_1'.format(py=interpreter), '{py} printenv.py TEST_ENV_2'.format(py=interpreter) ] worker = SubprocessWorker(env={'TEST_ENV_1': 'Hello', 'TEST_ENV_2': 'You'}) step = ContainerStep(image='test', env={'TEST_ENV_2': 'World'}, commands=commands) result = worker.exec(step=step, arguments=dict(), rundir=RUN_DIR) assert result.returncode == 0 assert result.exception is None assert ' '.join([s.strip() for s in result.stdout]) == 'Hello World'
def test_run_steps_with_error(): """Test execution of a workflow step where one of the commands raises an error. """ # Avoid error '/bin/sh: 1: python: not found interpreter = sys.executable commands = [ '{py} printenv.py TEST_ENV_1'.format(py=interpreter), '{py} printenv.py TEST_ENV_ERROR'.format(py=interpreter), '{py} printenv.py TEST_ENV_2'.format(py=interpreter) ] env = { 'TEST_ENV_1': 'Hello', 'TEST_ENV_ERROR': 'error', 'TEST_ENV_2': 'World' } step = ContainerStep(image='test', commands=commands) result = SubprocessWorker().run(step=step, env=env, rundir=RUN_DIR) assert result.returncode == 1 assert result.exception is None assert ' '.join([s.strip() for s in result.stdout]) == 'Hello' assert 'there was an error' in ''.join(result.stderr)
def add_container_step(self, image: str, commands: Optional[List[str]] = None, env: Optional[Dict] = None) -> SerialWorkflow: """Append a container step to the serial workflow. Parameters ---------- image: string, default=None Execution environment identifier. commands: list(string), default=None List of command line statements. env: dict, default=None Environment variables for workflow step execution. Returns ------- flowserv.controller.serial.workflow.base.SerialWorkflow """ step = ContainerStep(image=image, commands=commands, env=env) self.steps.append(step) return self
def add_container_step( self, identifier: str, image: str, commands: Optional[List[str]] = None, env: Optional[Dict] = None, inputs: Optional[List[str]] = None, outputs: Optional[List[str]] = None) -> SerialWorkflow: """Append a container step to the serial workflow. Parameters ---------- identifier: str Unique workflow step identifier. image: string, default=None Execution environment identifier. commands: list(string), default=None List of command line statements. env: dict, default=None Environment variables for workflow step execution. inputs: list of string, default=None List of files that are required by the workflow step as inputs. outputs: list of string, default=None List of files that are generated by the workflow step as outputs. Returns ------- flowserv.controller.serial.workflow.base.SerialWorkflow """ step = ContainerStep(identifier=identifier, image=image, commands=commands, env=env, inputs=inputs, outputs=outputs) self.steps.append(step) return self
def test_container_step(): """Test add method of the container step.""" step = ContainerStep(identifier='test', image='test').add('A').add('B') assert step.image == 'test' assert step.commands == ['A', 'B']
import pytest from flowserv.controller.worker.code import CodeWorker from flowserv.controller.worker.docker import DockerWorker from flowserv.controller.worker.manager import WorkerPool, Code, Docker, Notebook, Subprocess from flowserv.controller.worker.notebook import NotebookEngine from flowserv.controller.worker.subprocess import SubprocessWorker from flowserv.model.workflow.step import CodeStep, ContainerStep, NotebookStep import flowserv.error as err @pytest.mark.parametrize( 'step,cls', [(ContainerStep(identifier='test', image='test'), SubprocessWorker), (ContainerStep(identifier='test', image='test'), SubprocessWorker), (CodeStep(identifier='test', func=lambda x: x), CodeWorker), (NotebookStep(identifier='test', notebook='helloworld.ipynb'), NotebookEngine)]) def test_get_default_worker(step, cls): """Test getting a default worker for a workflow step that has no manager explicitly assigned to it. """ factory = WorkerPool(workers=[]) assert isinstance(factory.get_default_worker(step), cls) def test_get_worker_error(): """Test error when accessing worker with unknown identifier.""" step = ContainerStep(identifier='test', image='test')