def test_access_run_result_files_local(database, tmpdir): """Test accessing run result files.""" # -- Setup ---------------------------------------------------------------- env = Config().basedir(tmpdir).auth() fs = FS(env=env) workflow_id, group_id, run_id, user_id = success_run(database, fs, tmpdir) local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine()) # -- Read result files ---------------------------------------------------- with local_service(user_id=user_id) as api: # Map file names to file handles. r = api.runs().get_run(run_id=run_id) files = dict() for fh in r['files']: files[fh['name']] = fh['id'] # Read content of result files. fh = api.runs().get_result_file(run_id=run_id, file_id=files['run/results/B.json']) results = util.read_object(fh.open()) assert results == {'B': 1} # -- Error when user 2 attempts to read file ------------------------------ with database.session() as session: user_2 = create_user(session, active=True) with local_service(user_id=user_2) as api: with pytest.raises(err.UnauthorizedAccessError): api.runs().get_result_file(run_id=run_id, file_id=files['run/results/B.json']) # -- With an open access policy user 2 can read the data file ------------- env = Config().basedir(tmpdir).open_access() local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine()) with local_service(user_id=user_2) as api: api.runs().get_result_file(run_id=run_id, file_id=files['run/results/B.json'])
def test_cancel_remote_workflow(tmpdir): """Cancel the execution of a remote workflow.""" # -- Setup ---------------------------------------------------------------- # env = Config().basedir(tmpdir) engine = RemoteTestController(client=RemoteTestClient(runcount=100), poll_interval=1, is_async=True) service = LocalAPIFactory(env=env, engine=engine) engine.service = service # -- Start a new run for the workflow template. with service() as api: workflow_id = create_workflow(api, source=TEMPLATE_DIR) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) run_id = start_run(api, group_id) # -- Poll workflow state every second. with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) while run['state'] == st.STATE_PENDING: time.sleep(1) with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_RUNNING) with service(user_id=user_id) as api: api.runs().cancel_run(run_id=run_id, reason='test') # Sleep to ensure that the workflow monitor polls the state and makes an # attempt to update the run state. This should raise an error for the # monitor. The error is not propagated here or to the run. time.sleep(3) with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_CANCELED) assert run['messages'][0] == 'test'
def test_run_remote_workflow_error(is_async, tmpdir): """Execute the remote workflow example synchronized and in asynchronous mode when execution results in an error state. """ # -- Setup ---------------------------------------------------------------- env = Config().volume(FStore(basedir=str(tmpdir))).auth() engine = RemoteWorkflowController(client=RemoteTestClient( runcount=3, error='some error'), poll_interval=0.1, is_async=is_async) service = LocalAPIFactory(env=env, engine=engine) # Need to set the association between the engine and the service explicitly # after the API is created. engine.service = service with service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) # -- Unit test ------------------------------------------------------------ # Start a new run with service(user_id=user_id) as api: run_id = start_run(api, group_id) # Poll workflow state every second. with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) watch_dog = 30 while run['state'] in st.ACTIVE_STATES and watch_dog: time.sleep(1) watch_dog -= 1 with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_ERROR) assert run['messages'] == ['some error']
def test_run_remote_workflow_with_error(tmpdir): """Execute the remote workflow example that will end in an error state in asynchronous mode. """ # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. env = Config().basedir(tmpdir) engine = RemoteTestController(client=RemoteTestClient(runcount=3, error='some error'), poll_interval=1, is_async=True) service = LocalAPIFactory(env=env, engine=engine) engine.service = service with service() as api: workflow_id = create_workflow(api, source=TEMPLATE_DIR) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) run_id = start_run(api, group_id) # Poll workflow state every second. with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) while run['state'] in st.ACTIVE_STATES: time.sleep(1) with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_ERROR) assert run['messages'][0] == 'some error'
def ClientAPI( env: Optional[Dict] = None, basedir: Optional[str] = None, database: Optional[str] = None, open_access: Optional[bool] = None, run_async: Optional[bool] = None, user_id: Optional[str] = None ) -> APIFactory: """Create an instance of the API factory that is responsible for generating API instances for a flowserv client. The main distinction here is whether a connection is made to a local instance of the service or to a remote instance. This distinction is made based on the value of the FLOWSERV_CLIENT environment variable that takes the values 'local' or 'remote'. The default is 'local'. Provides the option to alter the default settings of environment variables. Parameters ---------- env: dict, default=None Dictionary with configuration parameter values. basedir: string, default=None Base directory for all workflow files. If no directory is given or specified in the environment a temporary directory will be created. database: string, default=None Optional database connect url. open_access: bool, default=None Use an open access policy if set to True. run_async: bool, default=False Run workflows in asynchronous mode. user_id: string, default=None Optional identifier for the authenticated API user. Returns ------- flowserv.service.api.APIFactory """ # Get the base configuration settings from the environment if not given. env = env if env is not None else config.env() if not isinstance(env, Config): env = Config(env) # Update configuration based on the given optional arguments. if basedir is not None: env.basedir(basedir) if database is not None: env.database(database) if open_access is not None and open_access: env.open_access() # By default, the client runs all workflows synchronously. if run_async is not None and run_async: env.run_async() elif env.get(config.FLOWSERV_ASYNC) is None: env.run_sync() # Create local or remote API factory depending on the FLOWSERV_CLIENT value. client = env.get(config.FLOWSERV_CLIENT, config.LOCAL_CLIENT) if client == config.LOCAL_CLIENT: return LocalAPIFactory(env=env, user_id=user_id) elif client == config.REMOTE_CLIENT: # Not implemented yet. pass raise ValueError("inalid client type '{}'".format(client))
def test_postproc_workflow_errors(tmpdir): """Execute the modified helloworld example.""" # -- Setup ---------------------------------------------------------------- # # It is important here that we do not use the SQLite in-memory database # since this fails (for unknown reason; presumably due to different threads) # when the post-processing run is updated. # -- env = Config().basedir(tmpdir).run_async().auth() service = LocalAPIFactory(env=env) # Error during data preparation run_erroneous_workflow(service, SPEC_FILE_ERR_1) # Erroneous specification run_erroneous_workflow(service, SPEC_FILE_ERR_2)
def test_result_archive_local(database, tmpdir): """Test getting an archive of run results.""" # -- Setup ---------------------------------------------------------------- env = Config().basedir(tmpdir).auth() fs = FS(env=env) workflow_id, group_id, run_id, user_id = success_run(database, fs, tmpdir) local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine()) # -- Get result archive --------------------------------------------------- with local_service(user_id=user_id) as api: archive = api.runs().get_result_archive(run_id=run_id) tar = tarfile.open(fileobj=archive.open(), mode='r:gz') members = [t.name for t in tar.getmembers()] assert len(members) == 2 assert 'A.json' in members assert 'run/results/B.json' in members
def test_cancel_remote_workflow(tmpdir): """Cancel the execution of a remote workflow.""" # -- Setup ---------------------------------------------------------------- env = Config().volume(FStore(basedir=str(tmpdir))).auth() engine = RemoteWorkflowController(client=RemoteTestClient(runcount=100), poll_interval=0.25, is_async=True) service = LocalAPIFactory(env=env, engine=engine) # Need to set the association between the engine and the service explicitly # after the API is created. engine.service = service with service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) # -- Unit test ------------------------------------------------------------ # Start a new run with service(user_id=user_id) as api: run_id = start_run(api, group_id) # -- Poll workflow state every second. with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) watch_dog = 30 while run['state'] == st.STATE_PENDING and watch_dog: time.sleep(0.1) watch_dog -= 1 with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_RUNNING) with service(user_id=user_id) as api: api.runs().cancel_run(run_id=run_id, reason='test') # Sleep to ensure that the workflow monitor polls the state and makes an # attempt to update the run state. This should raise an error for the # monitor. The error is not propagated here or to the run. time.sleep(1) with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_CANCELED) assert run['messages'][0] == 'test'
def test_run_remote_workflow(tmpdir, is_async): """Execute the remote workflow example synchronized and in asynchronous mode. """ # -- Setup ---------------------------------------------------------------- # env = Config().basedir(tmpdir) engine = RemoteTestController(client=RemoteTestClient(runcount=3, data=['success']), poll_interval=1, is_async=is_async) service = LocalAPIFactory(env=env, engine=engine) engine.service = service # Start a new run for the workflow template. with service() as api: workflow_id = create_workflow(api, source=TEMPLATE_DIR) user_id = create_user(api) with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) run_id = start_run(api, group_id) # Poll workflow state every second. with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) watch_dog = 30 while run['state'] in st.ACTIVE_STATES and watch_dog: time.sleep(1) watch_dog -= 1 with service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) serialize.validate_run_handle(run, state=st.STATE_SUCCESS) files = dict() for obj in run['files']: files[obj['name']] = obj['id'] f_id = files['results/data.txt'] with service(user_id=user_id) as api: fh = api.runs().get_result_file(run_id=run_id, file_id=f_id) data = fh.open().read().decode('utf-8') assert 'success' in data
def local_service(database, tmpdir): """Create a local API factory for test purposes.""" env = Config().basedir(tmpdir).volume(FStore(basedir=str(tmpdir))).auth() return LocalAPIFactory(env=env, db=database, engine=StateEngine())
def sync_service(database, tmpdir): """Create a local API factory that executes workflows synchronously.""" env = Config().basedir(tmpdir).volume( FStore(basedir=str(tmpdir))).run_sync().auth() return LocalAPIFactory(env=env, db=database)
def test_postproc_workflow(fsconfig, tmpdir): """Execute the modified helloworld example.""" # -- Setup ---------------------------------------------------------------- # # It is important here that we do not use the SQLite in-memory database # since this fails (for unknown reason; presumably due to different threads) # when the post-processing run is updated. # -- env = Config().basedir(tmpdir).run_async().auth() env.update(fsconfig) service = LocalAPIFactory(env=env) # Start a new run for the workflow template. with service() as api: # Need to set the file store in the backend to the new instance as # well. Otherwise, the post processing workflow may attempt to use # the backend which was initialized prior with a different file store. workflow_id = create_workflow( api, source=TEMPLATE_DIR, specfile=SPEC_FILE ) user_id = create_user(api) # Create four groups and run the workflow with a slightly different input # file for i in range(4): with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=NAMES[:(i + 1)], format='plain/text') file_id = upload_file(api, group_id, names) # Set the template argument values arguments = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=arguments) # Poll workflow state every second. run = poll_run(service, run_id, user_id) assert run['state'] == st.STATE_SUCCESS with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts = 0 while 'postproc' not in wh: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert 'postproc' in wh serialize.validate_workflow_handle(wh) attmpts = 0 while wh['postproc']['state'] in st.ACTIVE_STATES: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break serialize.validate_workflow_handle(wh) with service() as api: ranking = api.workflows().get_ranking(workflow_id=workflow_id) serialize.validate_ranking(ranking) for fobj in wh['postproc']['files']: if fobj['name'] == 'results/compare.json': file_id = fobj['id'] with service(user_id=user_id) as api: fh = api.workflows().get_result_file( workflow_id=workflow_id, file_id=file_id ) compare = util.read_object(fh.open()) assert len(compare) == (i + 1) # Access the post-processing result files. with service() as api: fh = api.workflows().get_result_archive(workflow_id=workflow_id) assert fh.name.startswith('run') assert fh.mime_type == 'application/gzip'
def async_service(database, tmpdir): """Create a local API factory that executes workflows asynchronously.""" env = Config().basedir(tmpdir).run_async().auth() return LocalAPIFactory(env=env)