def success_run(database: DB, fs: StorageVolume, basedir: str) -> Tuple[str, str, str, str]: """Create a successful run with two result files: - A.json - results/B.json Returns the identifier of the created workflow, group, run, and user. """ # Setup temporary run folder. runfs = FileSystemStorage(basedir=os.path.join(basedir, 'tmprun')) runfs.store(file=io_file({'A': 1}), dst='A.json') runfs.store(file=io_file({'B': 1}), dst=util.join('results', 'B.json')) with database.session() as session: user_id = create_user(session, active=True) workflow_id = create_workflow(session) group_id = create_group(session, workflow_id, users=[user_id]) groups = WorkflowGroupManager(session=session, fs=fs) runs = RunManager(session=session, fs=fs) run = runs.create_run(group=groups.get_group(group_id)) run_id = run.run_id state = run.state() runs.update_run( run_id=run_id, state=state.start().success(files=['A.json', 'results/B.json']), runstore=runfs) return workflow_id, group_id, run_id, user_id
def test_upload_file(database, tmpdir): """Test uploading files.""" # -- Setup ---------------------------------------------------------------- # # Create a database with two groups for a single workflow. Upload one file # for each group. fs = FileSystemStorage(basedir=tmpdir) with database.session() as session: user_1 = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_1 = model.create_group(session, workflow_id, users=[user_1]) # -- Test upload file ----------------------------------------------------- data = {'A': 1} with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) fh = manager.upload_file(group_id=group_1, file=io_file(data={'A': 1}), name='A.json') assert fh.name == 'A.json' assert fh.mime_type == 'application/json' fh = manager.get_uploaded_file(group_id=group_1, file_id=fh.file_id) assert json.load(fh.open()) == data # -- Test error case ------------------------------------------------------ data = {'A': 1} with database.session() as session: with pytest.raises(err.ConstraintViolationError): manager.upload_file(group_id=group_1, file=io_file(data={'A': 1}), name=' ') with pytest.raises(err.UnknownWorkflowGroupError): manager.upload_file(group_id='UNKNOWN', file=io_file(data={'A': 1}), name=' ')
def test_list_files(database, tmpdir): """Test listing uploaded files.""" # -- Setup ---------------------------------------------------------------- # # Create a database with two groups for a single workflow. The first group # has one uploaded file and the second group has one file. fn = 'data.json' fs = FileSystemStorage(basedir=tmpdir) with database.session() as session: user_1 = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_1 = model.create_group(session, workflow_id, users=[user_1]) group_2 = model.create_group(session, workflow_id, users=[user_1]) manager = WorkflowGroupManager(session=session, fs=fs) manager.upload_file(group_id=group_1, file=io_file(data={'A': 1}), name=fn) manager.upload_file(group_id=group_1, file=io_file(data={'A': 2}), name=fn) manager.upload_file(group_id=group_2, file=io_file(data={'A': 3}), name=fn) # -- Test list files for groups ------------------------------------------- with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) files = manager.list_uploaded_files(group_id=group_1) assert len(files) == 2 files = manager.list_uploaded_files(group_id=group_2) assert len(files) == 1
def test_delete_group_file_local(local_service, hello_world): """Test deleting an uploaded file for a workflow group.""" # -- Setup ---------------------------------------------------------------- # # Upload one file for a workflow group. with local_service() as api: user_id = create_user(api) workflow = hello_world(api, name='W1') workflow_id = workflow.workflow_id with local_service(user_id=user_id) as api: group_id = create_group(api, workflow_id=workflow_id) file_id = upload_file(api=api, group_id=group_id, file=io_file(data={ 'group': 1, 'file': 1 })) # -- Error when unknown user attempts to delete the file ------------------ with local_service(user_id='UNKNNOWN') as api: with pytest.raises(err.UnauthorizedAccessError): api.uploads().delete_file(group_id, file_id) # -- Delete the uploaded file --------------------------------------------- with local_service(user_id=user_id) as api: api.uploads().delete_file(group_id, file_id) # After deletion the file cannot be accessed anymore. with local_service(user_id=user_id) as api: with pytest.raises(err.UnknownFileError): api.uploads().get_uploaded_file(group_id, file_id)
def test_upload_group_file_local(local_service, hello_world): """Test uploading files for a workflow group.""" # -- Setup ---------------------------------------------------------------- # # Create one group with minimal metadata for the 'Hello World' workflow. with local_service() as api: user_id = create_user(api) workflow = hello_world(api, name='W1') workflow_id = workflow.workflow_id with local_service(user_id=user_id) as api: group_id = create_group(api, workflow_id=workflow_id) # -- Upload first file for the group -------------------------------------- with local_service(user_id=user_id) as api: r = api.uploads().upload_file(group_id=group_id, file=io_file(data={ 'group': 1, 'file': 1 }), name='group1.json') file_id = r['id'] serialize.validate_file_handle(r) assert r['name'] == 'group1.json' # -- Get serialized handle for the file and the group --------------------- for uid in [user_id, None]: with local_service(user_id=uid) as api: fcont = api.uploads().get_uploaded_file(group_id, file_id).read() assert fcont == b'{"group": 1, "file": 1}' gh = api.groups().get_group(group_id=group_id) serialize.validate_group_handle(gh)
def test_cancel_run_helloworld(async_service): """Test cancelling a helloworld run.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with async_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with async_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('sleeptime', 10), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=args) # Poll run after sleeping for one second. time.sleep(1) with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] in st.ACTIVE_STATES # -- Cancel the active run ------------------------------------------------ with async_service(user_id=user_id) as api: run = api.runs().cancel_run( run_id=run_id, reason='done' ) assert run['state'] == st.STATE_CANCELED assert run['messages'][0] == 'done' with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] == st.STATE_CANCELED assert run['messages'][0] == 'done'
def start_hello_world(api, group_id): """Start a new run for the Hello World template. Returns the run identifier and the identifier for the input file. Parameters ---------- api: flowserv.service.api.API Service API manager. group_id: string Unique group identifier. Returns ------- string, string """ file_id = api.uploads().upload_file( group_id=group_id, file=io_file(data=['Alice', 'Bob'], format='txt/plain'), name='n.txt' )['id'] run_id = api.runs().start_run( group_id=group_id, arguments=[{ 'name': 'names', 'value': serialize_fh(file_id=file_id) }] )['id'] api.runs().backend.start(run_id) return run_id, file_id
def test_list_group_files_local(local_service, hello_world): """Test getting a listing of uploaded files for a workflow group.""" # -- Setup ---------------------------------------------------------------- # # Upload two files for a workflow group. with local_service() as api: user_id = create_user(api) workflow = hello_world(api, name='W1') workflow_id = workflow.workflow_id with local_service(user_id=user_id) as api: group_id = create_group(api, workflow_id=workflow_id) for i in range(2): upload_file(api=api, group_id=group_id, file=io_file(data={ 'group': 1, 'file': i })) # -- Get file listing ----------------------------------------------------- with local_service(user_id=user_id) as api: files = api.uploads().list_uploaded_files(group_id=group_id) serialize.validate_file_listing(files, 2) # -- Error when listing files as unknonw user ----------------------------- with local_service(user_id='UNKNOWN') as api: with pytest.raises(err.UnauthorizedAccessError): api.uploads().list_uploaded_files(group_id=group_id)
def run_success(run_manager, run_id, store, values): """Set given run into success state with the given result data.""" store.store(file=io_file(values), dst=RESULT_FILE_ID) ts = util.utc_now() run_manager.update_run(run_id=run_id, state=st.StateSuccess(created_at=ts, started_at=ts, finished_at=ts, files=[RESULT_FILE_ID]), runstore=store)
def test_run_serialization(database, tmpdir): """Test serialization of run handles and run listings.""" view = RunSerializer() fs = FileSystemStorage(basedir=tmpdir) # Setup temporary run folder. runfs = FileSystemStorage(basedir=os.path.join(tmpdir, 'tmprun')) runfs.store(file=io_file({'A': 1}), dst='A.json') runfs.store(file=io_file({'B': 2}), dst='results/B.json') # Create runs. with database.session() as session: user_id = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_id = model.create_group(session, workflow_id, users=[user_id]) # Create successful run. groups = WorkflowGroupManager(session=session, fs=fs) runs = RunManager(session=session, fs=fs) run = runs.create_run(group=groups.get_group(group_id)) run_id = run.run_id state = run.state() runs.update_run( run_id, state.start().success(files=['A.json', 'results/B.json']), runstore=runfs) run = runs.get_run(run_id) doc = view.run_handle(run) validator('RunHandle').validate(doc) # Create error run. run = runs.create_run(group=groups.get_group(group_id)) run_id = run.run_id state = run.state() runs.update_run(run_id=run_id, state=state) messages = ['There', 'were', 'many errors'] runs.update_run(run_id=run_id, state=state.error(messages)) run = runs.get_run(run_id) doc = view.run_handle(run) validator('RunHandle').validate(doc) # Validate run listing. doc = view.run_listing(runs=runs.list_runs(group_id)) validator('RunListing').validate(doc) assert len(doc[labels.RUN_LIST]) == 2
def write_results(runstore: StorageVolume, files: Tuple[Union[dict, list], str, str]): """Create a result files for a workflow run. Parameters ---------- runstore: flowserv.volume.base.StorageVolume Storage volume for the run (result) files of a successful workflow run. files: list List of 3-tuples containing the file data, format, and relative path. """ for data, format, rel_path in files: runstore.store(file=io_file(data=data, format=format), dst=rel_path)
def test_get_file(database, tmpdir): """Test accessing uploaded files.""" # -- Setup ---------------------------------------------------------------- # # Create a database with two groups for a single workflow. Upload one file # for each group. data_1 = {'A': 1} data_2 = {'B': 2} f1 = io_file(data=data_1) f2 = io_file(data=data_2) fn = 'data.json' fs = FileSystemStorage(basedir=tmpdir) with database.session() as session: user_1 = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_1 = model.create_group(session, workflow_id, users=[user_1]) group_2 = model.create_group(session, workflow_id, users=[user_1]) mngr = WorkflowGroupManager(session=session, fs=fs) file_1 = mngr.upload_file(group_id=group_1, file=f1, name=fn).file_id file_2 = mngr.upload_file(group_id=group_2, file=f2, name=fn).file_id files = [(group_1, file_1, data_1), (group_2, file_2, data_2)] # -- Test get file -------------------------------------------------------- with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) for g_id, f_id, data in files: fh = manager.get_uploaded_file(group_id=g_id, file_id=f_id) assert fh.name == fn assert fh.mime_type == 'application/json' assert json.load(fh.open()) == data # -- Test error cases ----------------------------------------------------- # - File handle is unknown for s2 with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) with pytest.raises(err.UnknownFileError): manager.get_uploaded_file(group_id=group_2, file_id=file_1).open() # - Access file with unknown file identifier with pytest.raises(err.UnknownFileError): manager.get_uploaded_file(group_id=group_1, file_id='UNK').open()
def test_delete_file(database, tmpdir): """Test deleting an uploaded file.""" # -- Setup ---------------------------------------------------------------- # # Create a database with two groups for a single workflow. Upload one file # for each group. fn = 'data.json' fs = FileSystemStorage(basedir=tmpdir) with database.session() as session: user_1 = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_1 = model.create_group(session, workflow_id, users=[user_1]) group_2 = model.create_group(session, workflow_id, users=[user_1]) manager = WorkflowGroupManager(session=session, fs=fs) fh = manager.upload_file(group_id=group_1, file=io_file(data={'A': 1}), name=fn) file_1 = fh.file_id fh = manager.upload_file(group_id=group_2, file=io_file(data={'A': 1}), name=fn) file_2 = fh.file_id # -- Test delete file ----------------------------------------------------- with database.session() as session: manager = WorkflowGroupManager(session=session, fs=fs) fh = manager.get_uploaded_file(group_id=group_1, file_id=file_1) manager.delete_file(group_id=group_1, file_id=file_1) # File 1 can no longer be accessed while file 2 is still present. with pytest.raises(err.UnknownFileError): manager.get_uploaded_file(group_id=group_1, file_id=file_1).open() fh = manager.get_uploaded_file(group_id=group_2, file_id=file_2) # -- Error cases ---------------------------------------------------------- with database.session() as session: # - Delete unknown file manager = WorkflowGroupManager(session=session, fs=fs) with pytest.raises(err.UnknownFileError): manager.delete_file(group_id=group_1, file_id=file_1)
def test_group_handle_serialization(database, tmpdir): """Test serialization of workflow group handles.""" view = WorkflowGroupSerializer() with database.session() as session: manager = WorkflowGroupManager(session=session, fs=FileSystemStorage(basedir=tmpdir)) user_id = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_id = model.create_group(session, workflow_id, users=[user_id]) manager.upload_file(group_id=group_id, file=io_file(data={'A': 1}), name='a.json') group = manager.get_group(group_id) doc = view.group_handle(group) validator('UserGroupHandle').validate(doc) assert len(doc[labels.GROUP_MEMBERS]) == 1
def prepare_postproc_data(input_files: List[str], ranking: List[RunResult], run_manager: RunManager, store: StorageVolume): """Create input files for post-processing steps for a given set of runs. Creates files for a post-processing run in a given base directory on a storage volume. The resulting directory contains files for each run in a given ranking. For each run a sub-folder with the run identifier as the directory name is created. Each folder contains copies of result files for the run for those files that are specified in the input files list. A file ``runs.json`` in the base directory lists the runs in the ranking together with their group name. Parameters ---------- input_files: list(string) List of identifier for benchmark run output files that are copied into the input directory for each submission. ranking: list(flowserv.model.ranking.RunResult) List of runs in the current result ranking run_manager: flowserv.model.run.RunManager Manager for workflow runs store: flowserv.volume.base.StorageVolume Target storage volume where the created post-processing files are stored. """ # Collect information about runs and their result files. runs = list() for entry in ranking: run_id = entry.run_id group_name = entry.group_name # Create a sub-folder for the run in the output directory. Then copy # all given files into the created directory. rundir = run_id for key in input_files: # Copy run file to target file. file = run_manager.get_runfile(run_id=run_id, key=key) dst = util.join(rundir, key) store.store(file=file, dst=dst) runs.append({ LABEL_ID: run_id, LABEL_NAME: group_name, LABEL_FILES: input_files }) store.store(file=io_file(runs), dst=RUNS_FILE)
def test_run_helloworld_async(async_service, target): """Execute the helloworld example.""" # -- Setup ---------------------------------------------------------------- # # Start a new run for the workflow template. with async_service() as api: workflow_id = create_workflow(api, source=BENCHMARK_DIR) user_id = create_user(api) with async_service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=['Alice', 'Bob', 'Zoe'], format='plain/text') file_id = upload_file(api, group_id, names) args = [ serialize_arg('names', serialize_fh(file_id, target)), serialize_arg('sleeptime', 1), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=args) # Poll workflow state every second. with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) watch_dog = 30 while run['state'] in st.ACTIVE_STATES and watch_dog: time.sleep(1) watch_dog -= 1 with async_service(user_id=user_id) as api: run = api.runs().get_run(run_id=run_id) assert run['state'] == st.STATE_SUCCESS files = dict() for f in run['files']: files[f['name']] = f['id'] fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/greetings.txt'] ) greetings = fh.open().read().decode('utf-8').strip() assert 'Hi Alice' in greetings assert 'Hi Bob' in greetings assert 'Hi Zoe' in greetings fh = api.runs().get_result_file( run_id=run_id, file_id=files['results/analytics.json'] ) assert json.load(fh.open()) is not None
def test_s3_volume_subfolder(store, people): """Test creating a new storage volume for a sub-folder of the base directory of a S3 bucket storage volume. """ substore = store.get_store_for_folder(key='data', identifier='SUBSTORE') assert substore.identifier == 'SUBSTORE' with substore.load(key='names.txt').open() as f: doc = json.load(f) assert doc == people # Store a file in the sub folder and then make sure we can read it. substore.store(file=io_file(['a', 'b']), dst='x/y') with substore.load(key='x/y').open() as f: doc = json.load(f) assert doc == ['a', 'b'] # Erase all files in the sub-folder. substore.erase() # Note that the file will not have been deleted in the original store. This # is because of how the unit tests are set up with each store having its # own full list of the files. assert substore.walk(src=None) == set()
def test_file_listing_serialization(database, tmpdir): """Test serialization of file handles.""" view = UploadFileSerializer() filename = 'data.json' with database.session() as session: manager = WorkflowGroupManager(session=session, fs=FileSystemStorage(basedir=tmpdir)) user_id = model.create_user(session, active=True) workflow_id = model.create_workflow(session) group_id = model.create_group(session, workflow_id, users=[user_id]) fh = manager.upload_file(group_id=group_id, file=io_file(data={'A': 1}), name=filename) doc = view.file_handle(group_id=group_id, fh=fh) assert doc[labels.FILE_NAME] == filename validator('FileHandle').validate(doc) doc = view.file_listing( group_id=group_id, files=manager.list_uploaded_files(group_id=group_id)) validator('FileListing').validate(doc)
def test_io_buffer_size(): """Test size method of IOBuffer objects.""" assert io_file(['Alice', 'Bob']).size() > 0
return tmpdir @pytest.fixture def filenames_all(): """Set of names for all files in the created base directory.""" return { 'A.json', 'examples/B.json', 'examples/C.json', 'docs/D.json', 'examples/data/data.json' } # -- Bucket stores ------------------------------------------------------------ NAMES = ['Alice', 'Bob'] FILES = [(io_file(NAMES), 'data/names.txt'), (io_file({'a': 1}), 'code/obj1.json'), (io_file({'b': 2}), 'code/obj2.json')] @pytest.fixture def bucket_keys(): """Set of file keys in the created buckets.""" return set([key for _, key in FILES]) @pytest.fixture def people(): """List of person names in the data/names.txt files.""" return NAMES