def test_store_and_copy_folder(store_id, tmpdir): """Test uploading and downloading folder files.""" # -- Setup ---------------------------------------------------------------- # Initialize the file store and create the input file structure. fs = create_store(store_id, os.path.join(tmpdir, 'fs')) files = create_files(os.path.join(tmpdir, 'data')) # -- Store all files in the file store (change file D which is the last # file in the returned file list to E.json instead of docs/D.json) -------- file_d, _ = files[-1] files = files[:-1] + [(file_d, 'E.json')] KEY = '0000' fs.store_files(files=files, dst=KEY) assert json.load(fs.load_file(os.path.join(KEY, FILE_A)).open()) == DATA1 assert json.load(fs.load_file(os.path.join(KEY, FILE_B)).open()) == DATA2 assert json.load(fs.load_file(os.path.join(KEY, FILE_C)).open()) == DATA3 assert json.load(fs.load_file(os.path.join(KEY, FILE_DATA)).open()) == EXDATA # noqa: E501 assert json.load(fs.load_file(os.path.join(KEY, 'E.json')).open()) == DATA4 with pytest.raises(err.UnknownFileError): fs.load_file(os.path.join(KEY, FILE_D)).open() # -- Download files ------------------------------------------------------- DOWNLOAD = os.path.join(tmpdir, 'download') fs.copy_folder(key=KEY, dst=DOWNLOAD) assert util.read_object(os.path.join(DOWNLOAD, FILE_A)) == DATA1 assert util.read_object(os.path.join(DOWNLOAD, FILE_B)) == DATA2 assert util.read_object(os.path.join(DOWNLOAD, FILE_C)) == DATA3 assert util.read_object(os.path.join(DOWNLOAD, FILE_DATA)) == EXDATA assert util.read_object(os.path.join(DOWNLOAD, 'E.json')) == DATA4 assert not os.path.exists(os.path.join(DOWNLOAD, FILE_D))
def ENGINECONFIG(env: Dict, validate: Optional[bool] = False) -> Dict: """Read engine configuration information from the file that is specified by the environment variable *FLOWSERV_SERIAL_ENGINECONFIG*. Returns an empty dictionary if the environment variable is not set. If the validate flag is True the read document will be validated against the configuration document schema that is defined in ``config.json``. Parameters ---------- env: dict Configuration object that provides access to configuration parameters in the environment. validate: bool, default=False Validate the read configuration object if True. Returns ------- dict """ filename = env.get(FLOWSERV_SERIAL_ENGINECONFIG) if not filename: return dict() doc = util.read_object(filename=filename) if validate: validator.validate(doc) return doc
def read_run_results(run: RunObject, schema: ResultSchema, rundir: str): """Read the run results from the result file that is specified in the workflow result schema. If the file is not found we currently do not raise an error. Parameters ---------- run: flowserv.model.base.RunObject Handle for a workflow run. schema: flowserv.model.template.schema.ResultSchema Workflow result schema specification that contains the reference to the result file key. rundir: string Directory containing run result files. """ filename = os.path.join(rundir, schema.result_file) if os.path.exists(filename): results = util.read_object(filename) # Create a dictionary of result values. values = dict() for col in schema.columns: val = util.jquery(doc=results, path=col.jpath()) col_id = col.column_id if val is None and col.required: msg = "missing value for '{}'".format(col_id) raise err.ConstraintViolationError(msg) elif val is not None: values[col_id] = col.cast(val) run.result = values
def test_access_run_result_files_local(database, tmpdir): """Test accessing run result files.""" # -- Setup ---------------------------------------------------------------- env = Config().basedir(tmpdir).auth() fs = FS(env=env) workflow_id, group_id, run_id, user_id = success_run(database, fs, tmpdir) local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine()) # -- Read result files ---------------------------------------------------- with local_service(user_id=user_id) as api: # Map file names to file handles. r = api.runs().get_run(run_id=run_id) files = dict() for fh in r['files']: files[fh['name']] = fh['id'] # Read content of result files. fh = api.runs().get_result_file(run_id=run_id, file_id=files['run/results/B.json']) results = util.read_object(fh.open()) assert results == {'B': 1} # -- Error when user 2 attempts to read file ------------------------------ with database.session() as session: user_2 = create_user(session, active=True) with local_service(user_id=user_2) as api: with pytest.raises(err.UnauthorizedAccessError): api.runs().get_result_file(run_id=run_id, file_id=files['run/results/B.json']) # -- With an open access policy user 2 can read the data file ------------- env = Config().basedir(tmpdir).open_access() local_service = LocalAPIFactory(env=env, db=database, engine=StateEngine()) with local_service(user_id=user_2) as api: api.runs().get_result_file(run_id=run_id, file_id=files['run/results/B.json'])
def read_run_results(run: RunObject, schema: ResultSchema, runstore: StorageVolume): """Read the run results from the result file that is specified in the workflow result schema. If the file is not found we currently do not raise an error. Parameters ---------- run: flowserv.model.base.RunObject Handle for a workflow run. schema: flowserv.model.template.schema.ResultSchema Workflow result schema specification that contains the reference to the result file key. runstore: flowserv.volume.base.StorageVolume Storage volume containing the run (result) files for a successful workflow run. """ with runstore.load(schema.result_file).open() as f: results = util.read_object(f) # Create a dictionary of result values. values = dict() for col in schema.columns: val = util.jquery(doc=results, path=col.jpath()) col_id = col.column_id if val is None and col.required: msg = "missing value for '{}'".format(col_id) raise err.ConstraintViolationError(msg) elif val is not None: values[col_id] = col.cast(val) run.result = values
def test_load_config_from_json_file(): """Test loading worker factory configuration from a Json file.""" worker = WorkerFactory.load_json(JSON_FILE).get('test') assert worker.variables['a'] == 0 # Passing the file content directly to the object constructor should yield # the same result. worker = WorkerFactory(util.read_object(JSON_FILE)).get('test') assert worker.variables['a'] == 0
def multi_by_x(filename: str, x: int) -> int: """Read input file with single integer value (in Json format) and multiplies the value with the given x. Expects a Json object with format: {"value": v} Returns the multiplication result. """ doc = util.read_object(filename=filename) return doc['value'] * x
def create_group(ctx, workflow, name, members, configfile): """Create a new user group.""" workflow_id = ctx.obj.get_workflow(ctx.params) config = util.read_object(configfile) if configfile else None with service() as api: doc = api.groups().create_group( workflow_id=workflow_id, name=name, members=members.split(',') if members is not None else None, engine_config=config) group_id = doc[labels.GROUP_ID] click.echo('export {}={}'.format(ctx.obj.vars['group'], group_id))
def test_parse_top_tagger_template(): """Test parsing the Top-Tagger template that contains parameter references as workflow steps. """ template = WorkflowTemplate.from_dict( doc=util.read_object(TEMPLATE_TOPTAGGER)) doc = {'environment': 'test', 'commands': ['python analyze']} args = {'tagger': ActorValue(spec=doc)} steps, _, _ = parser.parse_template(template=template, arguments=args) assert len(steps) == 2 step = steps[0] assert step.image == 'test' assert step.commands == ['python analyze']
def worker_config(self) -> Union[Dict, List]: """Get the configuration settings for workers that are used by the serial workflow controller. If the configuration is not set an empty dictionary is returned. Returns ------- dict of list """ wconf = self.get(FLOWSERV_SERIAL_WORKERS, dict()) if wconf and isinstance(wconf, str): wconf = util.read_object(filename=wconf) return wconf if wconf else dict()
def test_load_file_and_write(store_id, tmpdir): """Test getting a previously uploaded file and writing the content to the file system. """ # -- Setup ---------------------------------------------------------------- # Initialize the file store and create the input file structure. Upload # only file A. fs = create_store(store_id, os.path.join(tmpdir, 'fs')) files = create_files(os.path.join(tmpdir, 'data')) KEY = '0000' fs.store_files(files=[files[0]], dst=KEY) # -- Read and write file A. filename = os.path.join(tmpdir, 'tmp') fs.load_file(os.path.join(KEY, FILE_A)).store(filename) assert util.read_object(filename) == DATA1
def read_config_obj(filename: Union[str, Dict]) -> Dict: """Read configuration object from a file. This function only attempts to read an object from disk if the type of the filename argument is string. Parameters ---------- filename: str or dict Path to file on disk. Returns ------- dict """ if isinstance(filename, dict): return filename return util.read_object(filename=filename)
def test_parse_hello_world_notebook_template(): """Extract commands and output files from the 'Hello world' template that included a notebook step. """ template = WorkflowTemplate.from_dict( doc=util.read_object(TEMPLATE_NOTEBOOK)) steps, args, output_files = parser.parse_template( template=template, arguments={'greeting': 'Hey'}) assert len(steps) == 2 step = steps[0] assert step.notebook == 'notebooks/HelloWorld.ipynb' assert step.inputs == ['data/names.txt', 'notebooks/HelloWorld.ipynb'] assert step.outputs == ['results/greetings.txt'] assert output_files == ['results/greetings.txt', 'results/analytics.json'] assert args == { 'inputfile': 'data/names.txt', 'outputfile': 'results/greetings.txt', 'greeting': 'Hey' }
def __init__(self, basedir): """Read the run result index file in the given base directory to initialize the result handles. Parameters ---------- basedir: string Base directory for run result that have benn made available to the post-processing workflow """ self.runs = list() doc = util.read_object(filename=os.path.join(basedir, base.RUNS_FILE)) for obj in doc: run_id = obj[base.LABEL_ID] name = obj[base.LABEL_NAME] files = dict() for filename in obj[base.LABEL_FILES]: files[filename] = os.path.join(basedir, run_id, filename) run = Run(run_id=run_id, name=name, files=files) self.runs.append(run)
def test_parse_hello_world_template(): """Extract commands and output files from the 'Hello world' template.""" template = WorkflowTemplate.from_dict( doc=util.read_object(TEMPLATE_HELLOWORLD)) steps, args, output_files = parser.parse_template(template=template, arguments={ 'names': 'names.txt', 'sleeptime': 10 }) assert len(steps) == 1 step = steps[0] assert step.image == 'python:3.7' assert len(step.commands) == 2 assert output_files == ['results/greetings.txt', 'results/analytics.json'] assert args == { 'inputfile': 'names.txt', 'outputfile': 'results/greetings.txt', 'sleeptime': 10, 'greeting': 'Hello' }
def create_workflow(ctx, key, name, description, instructions, specfile, manifest, template, configfile, ignore_postproc): """Create a new workflow for a given template.""" config = util.read_object(configfile) if configfile else None with service() as api: # The create_workflow() method is only supported by the local API. If # an attempte is made to create a new workflow via a remote API an # error will be raised. doc = api.workflows().create_workflow( source=template, identifier=key, name=name, description=description, instructions=read_instructions(instructions), specfile=specfile, manifestfile=manifest, engine_config=config, ignore_postproc=ignore_postproc) workflow_id = doc[labels.WORKFLOW_ID] click.echo('export {}={}'.format(ctx.obj.vars['workflow'], workflow_id))
def read_config(filename: str, format: Optional[str] = None, validate: Optional[bool] = False) -> Dict: """Read worker configuration object from a given file. Parameters ---------- filename: str Input file name format: string, optional Optional file format identifier. validate: bool, default=True Validate the given worker specifications against the `workerSpec` schema if True. Returns ------- dict """ return convert_config(doc=util.read_object(filename, format=format), validate=validate)
def main(rundir, outputfile): """Write greeting for every name in a given input file to the output file. The optional waiting period delays the output between each input name. """ # Read avg_count for all runs in the ranking results = list() for run in Runs(rundir): filename = run.get_file(name='results/analytics.json') doc = util.read_object(filename=filename) results.append(doc) # Write analytics results. Ensure that output directory exists: # influenced by http://stackoverflow.com/a/12517490 if not os.path.exists(os.path.dirname(outputfile)): try: os.makedirs(os.path.dirname(outputfile)) except OSError as exc: # guard against race condition if exc.errno != errno.EEXIST: raise with open(outputfile, "at") as f: json.dump(results, f)
def test_parse_hello_world_template(): """Extract commands and output files from the 'Hello world' template.""" template = WorkflowTemplate.from_dict( doc=util.read_object(TEMPLATE_HELLOWORLD)) steps, args, output_files = parser.parse_template(template=template, arguments={ 'names': 'names.txt', 'sleeptime': 10 }) assert len(steps) == 1 step = steps[0] assert step.image == 'python:2.7' assert len(step.commands) == 1 assert step.commands[ 0] == '${python} "${helloworld}" --inputfile "${inputfile}" --outputfile "${outputfile}" --sleeptime ${sleeptime}' # noqa: E501 assert output_files == ['results/greetings.txt'] assert args == { 'helloworld': 'code/helloworld.py', 'inputfile': 'names.txt', 'outputfile': 'results/greetings.txt', 'sleeptime': '10' } # noqa: E501
def main(rundir, outputfile): """ Create summary of analytics results for all runs. """ # Read avg_count for all runs in the ranking results = list() for run in Runs(rundir): filename = run.get_file(name='results/analytics.json') doc = util.read_object(filename=filename) results.append(doc) # Delay execution to allow for testing running post-processing # workflows time.sleep(1) # Write analytics results. Ensure that output directory exists: # influenced by http://stackoverflow.com/a/12517490 if not os.path.exists(os.path.dirname(outputfile)): try: os.makedirs(os.path.dirname(outputfile)) except OSError as exc: # guard against race condition if exc.errno != errno.EEXIST: raise with open(outputfile, "at") as f: json.dump(results, f)
def read_config_obj(var: str, env: Dict) -> Dict: """Read configuration object from a given environment variables. If the variable is set and contains a dictionary as value that value is returned. Otherwise, it is assumed that the variable references a Json or Yaml file that contains the configuration object. Parameters ---------- var: string Name of the environment variable. env: dict Dictionary representing the current environment settings. Returns ------- dict """ obj = env.get(var) if not obj: return None if isinstance(obj, dict): return obj return read_object(filename=obj)
def test_read_write_object(tmpdir): """Test reading and writing dictionary objects to file in Json format and in Yaml format. """ doc = {'A': 1, 'B': 2, 'C': {'D': 3}} json_file = os.path.join(str(tmpdir), 'file.json') txt_file = os.path.join(str(tmpdir), 'file.txt') yaml_file = os.path.join(str(tmpdir), 'file.yaml') # Read and write Json file util.write_object(filename=json_file, obj=doc) obj = util.read_object(filename=json_file) assert obj == doc obj = util.read_object(filename=json_file, format=util.FORMAT_JSON) assert obj == doc util.write_object(filename=json_file, obj=doc, format=util.FORMAT_YAML) obj = util.read_object(filename=json_file, format=util.FORMAT_YAML) assert obj == doc with pytest.raises(JSONDecodeError): util.read_object(filename=json_file) # Yaml format util.write_object(filename=yaml_file, obj=doc) obj = util.read_object(filename=yaml_file) assert obj == doc obj = util.read_object(filename=yaml_file, format=util.FORMAT_YAML) assert obj == doc util.write_object(filename=yaml_file, obj=doc, format=util.FORMAT_JSON) obj = util.read_object(filename=yaml_file, format=util.FORMAT_JSON) assert obj == doc doc = util.read_object(filename=yaml_file) buf = io.BytesIO(str(doc).encode("utf-8")) obj = util.read_object(filename=buf, format=util.FORMAT_YAML) assert doc == obj # The Yaml parser can read Json files obj = util.read_object(filename=yaml_file) assert obj == doc # File with non-standard suffix is written in Yaml format util.write_object(filename=txt_file, obj=doc) obj = util.read_object(filename=txt_file) assert obj == doc obj = util.read_object(filename=txt_file, format=util.FORMAT_YAML) assert obj == doc with pytest.raises(JSONDecodeError): util.read_object(filename=txt_file, format=util.FORMAT_JSON) with pytest.raises(ValueError): util.read_object(filename=txt_file, format='UNKNOWN') with pytest.raises(ValueError): util.write_object(filename=txt_file, obj=doc, format='UNKNOWN')
def test_postproc_workflow(fsconfig, tmpdir): """Execute the modified helloworld example.""" # -- Setup ---------------------------------------------------------------- # # It is important here that we do not use the SQLite in-memory database # since this fails (for unknown reason; presumably due to different threads) # when the post-processing run is updated. # -- env = Config().basedir(tmpdir).run_async().auth() env.update(fsconfig) service = LocalAPIFactory(env=env) # Start a new run for the workflow template. with service() as api: # Need to set the file store in the backend to the new instance as # well. Otherwise, the post processing workflow may attempt to use # the backend which was initialized prior with a different file store. workflow_id = create_workflow( api, source=TEMPLATE_DIR, specfile=SPEC_FILE ) user_id = create_user(api) # Create four groups and run the workflow with a slightly different input # file for i in range(4): with service(user_id=user_id) as api: group_id = create_group(api, workflow_id) names = io_file(data=NAMES[:(i + 1)], format='plain/text') file_id = upload_file(api, group_id, names) # Set the template argument values arguments = [ serialize_arg('names', serialize_fh(file_id)), serialize_arg('greeting', 'Hi') ] run_id = start_run(api, group_id, arguments=arguments) # Poll workflow state every second. run = poll_run(service, run_id, user_id) assert run['state'] == st.STATE_SUCCESS with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts = 0 while 'postproc' not in wh: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break assert 'postproc' in wh serialize.validate_workflow_handle(wh) attmpts = 0 while wh['postproc']['state'] in st.ACTIVE_STATES: time.sleep(1) with service() as api: wh = api.workflows().get_workflow(workflow_id=workflow_id) attmpts += 1 if attmpts > 60: break serialize.validate_workflow_handle(wh) with service() as api: ranking = api.workflows().get_ranking(workflow_id=workflow_id) serialize.validate_ranking(ranking) for fobj in wh['postproc']['files']: if fobj['name'] == 'results/compare.json': file_id = fobj['id'] with service(user_id=user_id) as api: fh = api.workflows().get_result_file( workflow_id=workflow_id, file_id=file_id ) compare = util.read_object(fh.open()) assert len(compare) == (i + 1) # Access the post-processing result files. with service() as api: fh = api.workflows().get_result_archive(workflow_id=workflow_id) assert fh.name.startswith('run') assert fh.mime_type == 'application/gzip'
def load(basedir, manifestfile=None, name=None, description=None, instructions=None, specfile=None, existing_names=set()): """Read the workflow manifest from file. By default, an attempt is made to read a file with one the following names in the basedir (in the given order): flowserv.json, flowserv.yaml, flowserv.yml. If the manifest file parameter is given the specified file is being read instead. The parameters name, description, instructions, and specfile are used to override the respective properties in the manifest file. Raises a ValueError if no manifest file is found or if no name or workflow specification is present in the resulting manifest object. Parameters ---------- basedir: string Path to the base directory containing the workflow files. This directory is used when reading the manifest file (if not given as argument) and the instructions file (if not given as argument). manifestfile: string, default=None Path to manifest file. If not given an attempt is made to read one of the default manifest file names in the base directory. name: string Unique workflow name description: string Optional short description for display in workflow listings instructions: string File containing instructions for workflow users. specfile: string Path to the workflow template specification file (absolute or relative to the workflow directory) existing_names: set, default=set() Set of names for existing projects. Returns ------- flowserv.model.workflow.manifest.WorkflowManifest Raises ------ IOError, OSError, ValueError, flowserv.error.InvalidManifestError """ doc = dict() if manifestfile is not None: doc = util.read_object(manifestfile) else: # Attempt to read default manifest files. for filename in MANIFEST_FILES: filename = os.path.join(basedir, filename) if os.path.isfile(filename): doc = util.read_object(filename) break # Validate the the manifest file. try: util.validate_doc(doc, optional=[ 'name', 'description', 'instructions', 'files', 'specfile' ]) for obj in doc.get('files', []): util.validate_doc(obj, mandatory=['source'], optional=['target']) except ValueError as ex: raise err.InvalidManifestError(str(ex)) # Override metadata with given arguments if name is not None: doc['name'] = name if description is not None: doc['description'] = description # Raise error if no name or no workflow specification is present. if 'name' not in doc: raise err.InvalidManifestError('missing name') if 'specfile' not in doc and specfile is None: raise err.InvalidManifestError('missing workflow specification') # Ensure that the name is valid an unique. doc['name'] = unique_name(doc['name'], existing_names) # Read the instructions file if specified. if instructions is not None or 'instructions' in doc: filename = getfile(basedir=basedir, manifest_value=doc.get('instructions'), user_argument=instructions) with open(filename, 'r') as f: doc['instructions'] = f.read().strip() # Get the workflow specification file. filename = getfile(basedir=basedir, manifest_value=doc.get('specfile'), user_argument=specfile) return WorkflowManifest(basedir=basedir, name=doc['name'], workflow_spec=util.read_object(filename), description=doc.get('description'), instructions=doc.get('instructions'), files=doc.get('files'))
def test_load_config_from_file(): """Test loading worker factory configuration from a file.""" # Passing the file content directly to the object constructor should work # the same as using the static load method. worker = WorkerFactory(util.read_object(JSON_FILE)).get('test') assert worker.variables['a'] == 0