def test_delete_group(database, tmpdir): """Test creating and deleting workflow groups.""" # -- Setup ---------------------------------------------------------------- # # Create a database with two groups for a single workflow. fs = FileSystemStorage(basedir=tmpdir) with database.session() as session: user_id = model.create_user(session, active=True) wf_id = model.create_workflow(session) manager = WorkflowGroupManager(session=session, fs=fs) group_1 = manager.create_group(workflow_id=wf_id, name='A', user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict()).group_id group_2 = manager.create_group(workflow_id=wf_id, name='B', user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict()).group_id # -- Delete group --------------------------------------------------------- with database.session() as session: # Ensure that group directores are deleted. manager = WorkflowGroupManager(session=session, fs=fs) manager.delete_group(group_1) # Access to group 1 raises error while group 2 is still accessible. with pytest.raises(err.UnknownWorkflowGroupError): manager.get_group(group_1) assert manager.get_group(group_2) is not None
def __init__(self, workflow_id: str, group_id: str, service: APIFactory): """Initialize the required identifier and the API factory. Reads all metadata for the given workflow during intialization and maintains a copy in memory. Parameters ---------- workflow_id: string Unique workflow identifier. group_id: string Unique workflow group identifier. service: flowserv.client.api.APIFactory Factory to create instances of the service API. """ self.workflow_id = workflow_id self.group_id = group_id self.service = service # Get application properties from the database. with self.service() as api: wf = api.workflows().get_workflow(self.workflow_id) grp = api.groups().get_group(group_id=self.group_id) self._name = wf.get(wflbls.WORKFLOW_NAME) self._description = wf.get(wflbls.WORKFLOW_DESCRIPTION) self._instructions = wf.get(wflbls.WORKFLOW_INSTRUCTIONS) self._parameters = ParameterIndex.from_dict( grp[glbls.GROUP_PARAMETERS])
def create_group(session, workflow_id, users): """Create a new workflow group in the database. Expects a workflow identifier and a list of user identifier. Returns the identifier for the created group. Parameters ---------- session: sqlalchemy.orm.session.Session Database session. workflow_id: string Unique workflow identifier. users: list List of unique user identifier. Returns ------- string """ group_id = util.get_unique_identifier() group = GroupObject(group_id=group_id, workflow_id=workflow_id, name=group_id, owner_id=users[0], parameters=ParameterIndex(), workflow_spec=dict()) # Add users as group members. for user_id in users: user = session.query(User).filter(User.user_id == user_id).one() group.members.append(user) session.add(group) return group_id
def test_parse_code_step(): """Test parsing specification for a workflow with a code step.""" doc = { 'steps': [{ 'name': 'code_step', 'action': { 'func': 'flowserv.tests.worker.a_plus_b', 'arg': 'z', 'variables': [{ 'arg': 'a', 'var': 'val1' }, { 'arg': 'b', 'var': 'val2' }] } }] } template = WorkflowTemplate(workflow_spec=doc, parameters=ParameterIndex()) steps, _, _ = parser.parse_template(template=template, arguments=dict()) assert len(steps) == 1 step = steps[0] assert step.func(2, 3) == 5 assert step.arg == 'z' assert step.varnames == {'a': 'val1', 'b': 'val2'}
def show_run(run): """Show workflow run information.""" with service() as api: doc = api.runs().get_run(run_id=run) click.echo('ID: {}'.format(doc[labels.RUN_ID])) if labels.RUN_STARTED in doc: click.echo('Started at: {}'.format(doc[labels.RUN_STARTED][:19])) if labels.RUN_FINISHED in doc: click.echo('Finished at: {}'.format(doc[labels.RUN_FINISHED][:19])) click.echo('State: {}'.format(doc[labels.RUN_STATE])) # Get index of parameters. The index contains the parameter name # and type parameters = ParameterIndex.from_dict(doc[labels.RUN_PARAMETERS]) click.echo('\nArguments:') for arg in doc['arguments']: para = parameters[arg['name']] if para.is_file(): file_id, target_path = deserialize_fh(arg['value']) value = '{} ({})'.format(file_id, target_path) else: value = arg['value'] click.echo(' {} = {}'.format(para.name, value)) if labels.RUN_ERRORS in doc: click.echo('\nMessages:') for msg in doc[labels.RUN_ERRORS]: click.echo(' {}'.format(msg)) elif labels.RUN_FILES in doc: click.echo('\nFiles:') for res in doc[labels.RUN_FILES]: click.echo(' {} ({})'.format(res[flbls.FILE_ID], res[flbls.FILE_NAME]))
def test_expand_parameter_value(value, args, result): """Test expand parameter reference function.""" parameters = ParameterIndex() parameters['A'] = String(name='A', label='P1', index=0) parameters['B'] = String(name='B', label='P2', index=0) parameters['C'] = String(name='C', label='P3', index=2, default='default') assert tp.expand_value(value, args, parameters) == result
def start_run(ctx, group, configfile): """Start new workflow run.""" group_id = ctx.obj.get_group(ctx.params) config = factory.read_config(configfile) if configfile else None with service() as api: doc = api.groups().get_group(group_id=group_id) config = config if config else doc[glbls.ENGINE_CONFIG] # Create list of file descriptors for uploaded files that are included # in the submission handle files = [] for fh in doc[glbls.GROUP_UPLOADS]: files.append(( fh[flbls.FILE_ID], fh[flbls.FILE_NAME], fh[flbls.FILE_DATE][:19]) ) # Create list of additional user-provided template parameters parameters = ParameterIndex.from_dict(doc[glbls.GROUP_PARAMETERS]) # Read values for all parameters. user_input = read(parameters.sorted(), files=files) args = [serialize_arg(key, val) for key, val in user_input.items()] # Start the run and print returned run state information. doc = api.runs().start_run(group_id=group_id, arguments=args, config=config) run_id = doc[labels.RUN_ID] run_state = doc[labels.RUN_STATE] click.echo('started run {} is {}'.format(run_id, run_state))
def test_parse_workflow_spec_error(): """Test error for unknown workflow step when parsing a serial workflow specification. """ doc = {'steps': [{'name': 'S1', 'action': {'type': 'undefined'}}]} template = WorkflowTemplate(workflow_spec=doc, parameters=ParameterIndex()) with pytest.raises(ValueError): parser.parse_template(template=template, arguments=dict())
def test_validate_arguments(): """Test validating a given set of arguments against the parameters in a workflow template. """ parameters = ParameterIndex.from_dict([ String(name='A', label='P1', index=0, required=True).to_dict(), String(name='B', label='P2', index=1, default='X').to_dict() ]) template = WorkflowTemplate(workflow_spec=dict(), parameters=parameters) template.validate_arguments({'A': 1, 'B': 0}) template.validate_arguments({'A': 1}) with pytest.raises(err.MissingArgumentError): template.validate_arguments({'B': 1})
def test_replace_arguments(spec): """Test getting parameter references for a workflow specification.""" parameters = ParameterIndex() parameters['A'] = String(name='A', label='P1', index=0) parameters['B'] = String(name='B', label='P2', index=1) parameters['C'] = String(name='C', label='P3', index=2) parameters['D'] = String(name='D', label='P4', index=3, default='default') parameters['E'] = String(name='E', label='P5', index=4) parameters['F'] = String(name='F', label='P6', index=5) parameters['G'] = String(name='G', label='P7', index=6) doc = tp.replace_args(spec, arguments={ 'A': 'x', 'B': 'y', 'C': 'z', 'E': True, 'F': 'b', 'G': 'c' }, parameters=parameters) assert doc == { "name": "myname", "var1": "x", "var2": "x", "var3": "e1", "values": [{ "name": "name", "el1": "y", "el2": None, "el3": "b", "nest": { "var": "default" } }, "z", "y", 3, "E"], "count": 2 } # Error for missing parameter value. with pytest.raises(err.MissingArgumentError): tp.replace_args(spec, {'A': 'x', 'B': 'y'}, parameters) # Error for nested lists. with pytest.raises(err.InvalidTemplateError): spec = {'values': ['A', [2, 3]]} tp.replace_args(spec, {'A': 'x', 'B': 'y'}, parameters)
def test_parameter_index_serialization(): """Test generating parameter index from serializations.""" p1 = String(name='0', label='P1', index=1) p2 = String(name='1', label='P2', index=0) doc = ParameterIndex.from_dict([p1.to_dict(), p2.to_dict()]).to_dict() parameters = ParameterIndex.from_dict(doc) assert len(parameters) == 2 assert '0' in parameters assert '1' in parameters assert [p.name for p in parameters.sorted()] == ['1', '0'] # Error case: Duplicate parameter. with pytest.raises(err.InvalidTemplateError): ParameterIndex.from_dict([p1.to_dict(), p1.to_dict()]) # Error case: Unknown parameter type. doc = p1.to_dict() doc['dtype'] = 'unknown' with pytest.raises(err.InvalidParameterError): ParameterIndex.from_dict([doc])
def __init__(self, steps: Optional[List[WorkflowStep]] = None, parameters: Optional[List[Parameter]] = None, workers: Optional[WorkerFactory] = None): """Initialize the object properties. All properties are optional and can be initialized via different methods of the workflow instance. Parameters ---------- steps: list of flowserv.model.workflow.step.WorkflowStep, default=None Optional sequence of steps in the serial workflow. parameters: list of flowserv.model.parameter.base.Parameter, default=None Optional list of workflow template parameters. workers: flowserv.controller.worker.factory.WorkerFactory Factory for :class:`flowserv.controller.worker.base.ContainerStep` objects that are used to execute individual :class:`flowserv.model.workflow.step.ContainerStep` instances in the workflow sequence. """ self.steps = steps if steps is not None else list() self.parameters = ParameterIndex(parameters=parameters) self.workers = workers if workers is not None else WorkerFactory()
def start_run(ctx, submission): """Start new submission run.""" s_id = submission if submission else config.SUBMISSION_ID() if s_id is None: click.echo('no submission specified') return try: url = ctx.obj['URLS'].get_submission(submission_id=s_id) headers = ctx.obj['HEADERS'] r = requests.get(url, headers=headers) r.raise_for_status() body = r.json() # Create list of file descriptors for uploaded files that are included # in the submission handle files = [] for fh in body['files']: files.append((fh['id'], fh['name'], fh['createdAt'][:19])) # Create list of additional user-provided template parameters parameters = ParameterIndex.from_dict(body['parameters']) # Read values for all parameters args = read(parameters.sorted(), files=files) data = {'arguments': [ARG(key, val) for key, val in args.items()]} url = ctx.obj['URLS'].start_run(submission_id=s_id) r = requests.post(url, json=data, headers=headers) r.raise_for_status() body = r.json() if ctx.obj['RAW']: click.echo(json.dumps(body, indent=4)) else: run_id = body['id'] run_state = body['state'] click.echo('run {} in state {}'.format(run_id, run_state)) except (requests.ConnectionError, requests.HTTPError) as ex: click.echo('{}'.format(ex)) except (ValueError, IOError, OSError) as ex: click.echo('{}'.format(ex))
def test_init_parameter_index(): """Test initializing the parameter index from a given list of parameters.""" assert len(ParameterIndex()) == 0 assert len(ParameterIndex(parameters=[String('A'), String('B')])) == 2 with pytest.raises(err.InvalidTemplateError): ParameterIndex(parameters=[String('A'), String('B'), String('A')])
def from_dict(cls, doc, validate=True): """Create an instance of the workflow template for a dictionary serialization. The structure of the dictionary is expected to be the same as generated by the to_dict() method of this class. The only mandatory element in the dictionary is the workflow specification. Parameters ---------- doc: dict Dictionary serialization of a workflow template validate: bool, optional Validate template parameter declarations against the parameter schema if this flag is True. Returns ------- flowserv.model.template.base.WorkflowTemplate Raises ------ flowserv.error.InvalidTemplateError flowserv.error.UnknownParameterError """ # Ensure that the mandatory elements are present. At this point, only # the workflow specification is mandatory. if validate: if 'workflow' not in doc: msg = "missing element '{}'".format('workflow') raise err.InvalidTemplateError(msg) # -- Workflow specification ------------------------------------------- workflow_spec = doc['workflow'] # -- Parameter declarations ------------------------------------------- # Add given parameter declarations to the parameter list. Ensure that # all default values are set parameters = ParameterIndex.from_dict( doc.get('parameters', dict()), validate=validate ) # Ensure that the workflow specification does not reference # undefined parameters if validate flag is True. if validate: for key in tp.get_parameter_references(workflow_spec): if key not in parameters: raise err.UnknownParameterError(key) # -- Post-processing task --------------------------------------------- postproc_spec = None if 'postproc' in doc: postproc_spec = doc['postproc'] if validate: util.validate_doc( doc=postproc_spec, mandatory=['workflow'], optional=['inputs', 'outputs'] ) util.validate_doc( doc=postproc_spec.get('inputs', {'files': ''}), mandatory=['files'], optional=['runs'] ) # -- Parameter module information ------------------------------------- parameter_groups = None if 'parameterGroups' in doc: parameter_groups = list() for m in doc['parameterGroups']: parameter_groups.append(ParameterGroup.from_dict(m, validate=validate)) # -- Output file specifications -------------------------------------- outputs = None if 'outputs' in doc: outputs = [WorkflowOutputFile.from_dict( f, validate=validate ) for f in doc['outputs']] # -- Result schema --------------------------------------------------- schema = ResultSchema.from_dict(doc.get('results'), validate=validate) # Return template instance return cls( workflow_spec=workflow_spec, postproc_spec=postproc_spec, parameters=parameters, result_schema=schema, parameter_groups=parameter_groups, outputs=outputs )
def test_template_serialization(): """Test creating template instances from serializations.""" # Minimal template specification. doc = {'workflow': dict()} doc = WorkflowTemplate.from_dict(doc).to_dict() template = WorkflowTemplate.from_dict(doc) assert template.workflow_spec == dict() assert template.parameters == ParameterIndex() # Maximal template specification. doc = { 'workflow': { 'inputs': [tp.VARIABLE('A'), 'B', 'C'] }, 'parameters': [String(name='A', label='P1', index=0).to_dict()], 'parameterGroups': [{ 'name': '0', 'title': 'G1', 'index': 0 }, { 'name': '1', 'title': 'G2', 'index': 1 }], 'postproc': { 'workflow': dict(), 'inputs': { 'files': ['D', 'E'] } }, 'results': { 'file': 'results/analytics.json', 'schema': [{ 'name': '0', 'label': 'col0', 'dtype': PARA_STRING }] } } doc = WorkflowTemplate.from_dict(doc).to_dict() template = WorkflowTemplate.from_dict(doc) assert template.workflow_spec == {'inputs': [tp.VARIABLE('A'), 'B', 'C']} assert len(template.parameters) == 1 assert len(template.parameter_groups) == 2 assert template.postproc_spec['workflow'] == dict() # No error for invalid document only if validate is not set to False. para = String(name='0', label='P1', index=0).to_dict() para['addOn'] = 1 doc = { 'workflow': { 'inputs': ['A', 'B', 'C'] }, 'parameters': [para], 'parameterGroups': [{ 'name': '0', 'title': 'G1', 'index': 0, 'sortDesc': True }, { 'name': '1', 'title': 'G2', 'index': 1 }], 'postproc': { 'inputs': { 'files': ['D', 'E'] } } } WorkflowTemplate.from_dict(doc, validate=False) with pytest.raises(err.InvalidParameterError): WorkflowTemplate.from_dict(doc) # Error for missing workflow specification. with pytest.raises(err.InvalidTemplateError): WorkflowTemplate.from_dict(dict()) # Error for unknown parameter. with pytest.raises(err.UnknownParameterError): doc = { 'workflow': { 'inputs': [tp.VARIABLE('0'), 'B', 'C'] }, 'parameters': [String(name='A', label='P1', index=0).to_dict()] } WorkflowTemplate.from_dict(doc)
def test_create_group(database, tmpdir): """Test creating and retrieving new workflow groups.""" # -- Setup ---------------------------------------------------------------- # # Create a database with a single workflow. fs = FileSystemStorage(basedir=tmpdir) with database.session() as session: user_id = model.create_user(session, active=True) workflow_id = model.create_workflow(session) # -- Test create group ---------------------------------------------------- with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) group = manager.create_group(workflow_id=workflow_id, name='Group 1', user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict()) assert group.name == 'Group 1' assert group.owner_id == user_id assert group.engine_config is None assert len(group.members) == 1 assert isinstance(group.parameters, dict) assert len(group.parameters) == 0 assert isinstance(group.workflow_spec, dict) assert len(group.workflow_spec) == 0 # Retrieve the group from the database group = manager.get_group(group.group_id) assert group.name == 'Group 1' assert group.owner_id == user_id assert len(group.members) == 1 assert isinstance(group.parameters, dict) assert len(group.parameters) == 0 assert isinstance(group.workflow_spec, dict) assert len(group.workflow_spec) == 0 # -- Test create group with duplicate members and different config -------- engine_config = {'workers': {'test': {'worker': 'docker'}}} with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) group = manager.create_group(workflow_id=workflow_id, name='Group 2', user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict(), members=[user_id, user_id, user_id], engine_config=engine_config) assert len(group.members) == 1 assert group.engine_config == engine_config # Retrieve the group from the database group = manager.get_group(group.group_id) assert len(group.members) == 1 # -- Test error cases ----------------------------------------------------- with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) # - Invalid name with pytest.raises(err.ConstraintViolationError): manager.create_group(workflow_id=workflow_id, name='A' * 513, user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict()) # - Duplicate name with pytest.raises(err.ConstraintViolationError): manager.create_group(workflow_id=workflow_id, name='Group 1', user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict()) # - Unknown user with pytest.raises(err.UnknownUserError): manager.create_group(workflow_id=workflow_id, name='D', user_id=user_id, parameters=ParameterIndex(), workflow_spec=dict(), members=[user_id, 'not a user']) # - Missing user with pytest.raises(err.UnknownUserError): manager.create_group(workflow_id=workflow_id, name='D', user_id=None, parameters=ParameterIndex(), workflow_spec=dict(), members=[user_id, 'not a user'])
"""Names for files and folders that contain run result files and run metadata. """ RUNS_DIR = 'runs' RUNS_FILE = 'runs.json' """Labels for metadata objects in the run listing.""" LABEL_ID = 'id' LABEL_NAME = 'name' LABEL_FILES = 'files' """Fixed set of parameter declarations for post-processing workflows. Contains only the declaration for the runs folder. """ PARA_RUNS = 'runs' PARAMETER = File(name=PARA_RUNS, index=0, target=util.join(RUNS_DIR, RUNS_FILE)) PARAMETERS = ParameterIndex() PARAMETERS[PARAMETER.name] = PARAMETER # -- Helper functions --------------------------------------------------------- def prepare_postproc_data(input_files: List[str], ranking: List[RunResult], run_manager: RunManager, store: StorageVolume): """Create input files for post-processing steps for a given set of runs. Creates files for a post-processing run in a given base directory on a storage volume. The resulting directory contains files for each run in a given ranking. For each run a sub-folder with the run identifier as the directory name is created. Each folder contains copies of result files for the run for those files that are specified in the input files list. A file ``runs.json`` in the base directory lists the runs in the ranking together