def run_workflow(): backend = setupbackend_fromstring('multiproc:auto') with steering_ctx('local:'+ workdir, 'workflow.yml', {'par':'value'}, 'tests/testspecs/local-helloworld', backend, cache='checksums') as ys: ys.adage_argument(default_trackers = False) backend.backends['packtivity'].cache.todisk()
def cli(workdir, identifier, tracker): if os.path.exists(workdir): shutil.rmtree(workdir) ctx = zmq.Context() socket = ctx.socket(zmq.PUB) socket.connect(tracker) with steering_ctx( # workdir, 'madgraph_delphes.yml', # loadtoplevel = 'from-github/phenochain', # initdata = {'nevents': 100}, workdir=workdir, workflow='workflow.yml', loadtoplevel='from-github/testing/dynamic_glob', initdata= { 'sourcefile': 'https://github.com/lukasheinrich/yadage-workflows/raw/master/testing/dynamic_glob/inputs/three_files.zip' }, updateinterval=5, loginterval=5, backend=setupbackend_fromstring('multiproc:auto')) as ys: ys.adage_argument(additional_trackers=[ ZeroMQTracker(socket=socket, identifier=identifier) ])
def run_yadage_workflow_standalone(jobguid, ctx): log.info('getting socket..') zmqctx = worker.celery_zeromq.get_context() socket = zmqctx.socket(zmq.PUB) socket.connect(os.environ['ZMQ_PROXY_CONNECT']) log.info('running recast workflow on context: {ctx}'.format(ctx=ctx)) taskdir = os.path.join('/data', jobguid) if not os.path.exists(taskdir): os.makedirs(taskdir) workdir = os.path.join(taskdir, 'yadage') cap_backend = setupbackend_fromstring('fromenv') with steering_ctx(workdir=workdir, workflow=ctx['workflow'], loadtoplevel=ctx['toplevel'], initdata=ctx['preset_pars'], updateinterval=5, loginterval=5, backend=cap_backend) as ys: ys.adage_argument(additional_trackers=[ ZeroMQTracker(socket=socket, identifier=jobguid) ]) log.info('added zmq tracker.. ready to go..') log.info('zmq publishing under: %s', jobguid) log.info('workflow done')
def run_workflow(): backend = setupbackend_fromstring('multiproc:auto') with steering_ctx(workdir, 'workflow.yml', {'par':'value'}, 'tests/testspecs/local-helloworld', backend, cache='checksums') as ys: ys.adage_argument(default_trackers = False) backend.backends['packtivity'].cache.todisk()
def run_yadage_workflow(workflow_uuid, workflow_workspace, workflow_json=None, workflow_parameters=None): """Run a ``yadage`` workflow.""" log.info('getting socket..') workflow_workspace = '{0}/{1}'.format(SHARED_VOLUME_PATH, workflow_workspace) # use some shared object between tasks. os.environ["workflow_uuid"] = workflow_uuid os.environ["workflow_workspace"] = workflow_workspace cap_backend = setupbackend_fromstring('fromenv') toplevel = os.getcwd() workflow = None if workflow_json: # When `yadage` is launched using an already validated workflow file. workflow_kwargs = dict(workflow_json=workflow_json) elif workflow: # When `yadage` resolves the workflow file from a remote repository: # i.e. github:reanahub/reana-demo-root6-roofit/workflow.yaml workflow_kwargs = dict(workflow=workflow, toplevel=toplevel) dataopts = {'initdir': workflow_workspace} try: publisher = REANAWorkflowStatusPublisher() with steering_ctx( dataarg=workflow_workspace, dataopts=dataopts, initdata=workflow_parameters if workflow_parameters else {}, visualize=False, updateinterval=5, loginterval=5, backend=cap_backend, **workflow_kwargs) as ys: log.info('running workflow on context: {0}'.format(locals())) publisher.publish_workflow_status(workflow_uuid, 1) ys.adage_argument( additional_trackers=[REANATracker(identifier=workflow_uuid)]) publisher.publish_workflow_status(workflow_uuid, 2) log.info('Workflow {workflow_uuid} finished. Files available ' 'at {workflow_workspace}.'.format( workflow_uuid=workflow_uuid, workflow_workspace=workflow_workspace)) except Exception as e: log.info('workflow failed: {0}'.format(e)) if publisher: publisher.publish_workflow_status(workflow_uuid, 3) else: log.error( 'Workflow {workflow_uuid} failed but status ' 'could not be published.'.format(workflow_uuid=workflow_uuid))
def test_directjson_ctx(tmpdir, multiproc_backend): wflowjson = yadage.workflow_loader.workflow( 'workflow.yml', 'tests/testspecs/local-helloworld') workdir = os.path.join(str(tmpdir), 'workdir') with steering_ctx('local:' + workdir, workflow_json=wflowjson, backend=multiproc_backend, initdata={'par': 'parvalue'}) as ys: ys.adage_argument(default_trackers=False)
def test_context(tmpdir, multiproc_backend): workdir = os.path.join(str(tmpdir), "workdir") with steering_ctx( "local:" + workdir, "workflow.yml", {"input": [1, 2, 3]}, "tests/testspecs/nestedmapreduce", multiproc_backend, ) as ys: ys.adage_argument(default_trackers=False)
def test_inparchive(tmpdir, multiproc_backend): workdir = os.path.join(str(tmpdir), 'workdir') inputarchive = 'file://{}/tests/testspecs/dynamic_glob/inputs/three_files.zip'.format( os.path.abspath(os.curdir)) with steering_ctx('local:' + workdir, 'workflow_frominit.yml', {'inputfiles': '*.txt'}, 'tests/testspecs/dynamic_glob', multiproc_backend, dataopts=dict(inputarchive=inputarchive)) as ys: ys.adage_argument(default_trackers=False)
def run_workflow(): with steering_ctx( "local:" + workdir, "workflow.yml", {"par": "value"}, "tests/testspecs/local-helloworld", checksum_cached_multiproc, accept_metadir=True, ) as ys: ys.adage_argument(default_trackers=False)
def test_directjson_ctx(tmpdir, multiproc_backend): wflowjson = yadage.workflow_loader.workflow( "workflow.yml", "tests/testspecs/local-helloworld" ) workdir = os.path.join(str(tmpdir), "workdir") with steering_ctx( "local:" + workdir, workflow_json=wflowjson, backend=multiproc_backend, initdata={"par": "parvalue"}, ) as ys: ys.adage_argument(default_trackers=False)
def run_workflow(): backend = setupbackend_fromstring("multiproc:auto") with steering_ctx( "local:" + workdir, "workflow.yml", {"par": "value"}, "tests/testspecs/local-helloworld", backend, cache="checksums", ) as ys: ys.adage_argument(default_trackers=False) backend.backends["packtivity"].cache.todisk()
def test_inparchive(tmpdir, multiproc_backend): workdir = os.path.join(str(tmpdir), "workdir") inputarchive = ( "file://{}/tests/testspecs/dynamic_glob/inputs/three_files.zip".format( os.path.abspath(os.curdir) ) ) with steering_ctx( "local:" + workdir, "workflow_frominit.yml", {"inputfiles": "*.txt"}, "tests/testspecs/dynamic_glob", multiproc_backend, dataopts=dict(inputarchive=inputarchive), ) as ys: ys.adage_argument(default_trackers=False)
def run_yadage_workflow_engine_adapter( publisher, rjc_api_client, workflow_uuid=None, workflow_workspace=None, workflow_json=None, workflow_parameters=None, operational_options={}, **kwargs, ): """Run a ``yadage`` workflow.""" log.info("getting socket..") workflow_workspace = "{0}/{1}".format(SHARED_VOLUME_PATH, workflow_workspace) # use some shared object between tasks. os.environ["workflow_uuid"] = workflow_uuid os.environ["workflow_workspace"] = workflow_workspace os.umask(REANA_WORKFLOW_UMASK) cap_backend = setupbackend_fromstring("fromenv") publisher = REANAWorkflowStatusPublisher(instance=publisher) workflow_kwargs = dict(workflow_json=workflow_json) dataopts = {"initdir": operational_options["initdir"]} initdata = {} for initfile in operational_options["initfiles"]: initdata.update(**yaml.safe_load(open(initfile))) initdata.update(workflow_parameters) with steering_ctx( dataarg=workflow_workspace, dataopts=dataopts, initdata=initdata, visualize=True, updateinterval=5, loginterval=5, backend=cap_backend, accept_metadir="accept_metadir" in operational_options, **workflow_kwargs, ) as ys: log.info("running workflow on context: {0}".format(locals())) publisher.publish_workflow_status(workflow_uuid, 1) ys.adage_argument(additional_trackers=[REANATracker(identifier=workflow_uuid)]) publisher.publish_workflow_status(workflow_uuid, 2)
def run_yadage_workflow_engine_adapter( publisher, rjc_api_client, workflow_uuid=None, workflow_workspace=None, workflow_json=None, workflow_parameters=None, operational_options={}, **kwargs, ): """Run a ``yadage`` workflow.""" os.environ["workflow_uuid"] = workflow_uuid os.environ["workflow_workspace"] = workflow_workspace os.umask(REANA_WORKFLOW_UMASK) tracker = REANATracker(identifier=workflow_uuid, publisher=publisher) tracker.publish_workflow_running_status() cap_backend = setupbackend_fromstring("fromenv") workflow_kwargs = dict(workflow_json=workflow_json) dataopts = {"initdir": operational_options["initdir"]} initdata = {} for initfile in operational_options["initfiles"]: with open(initfile) as stream: initdata.update(**yaml.safe_load(stream)) initdata.update(workflow_parameters) with steering_ctx( dataarg=workflow_workspace, dataopts=dataopts, initdata=initdata, visualize=True, updateinterval=WORKFLOW_TRACKING_UPDATE_INTERVAL_SECONDS, loginterval=LOG_INTERVAL_SECONDS, backend=cap_backend, accept_metadir="accept_metadir" in operational_options, **workflow_kwargs, ) as ys: log.debug(f"running workflow on context: {locals()}") ys.adage_argument(additional_trackers=[tracker]) # hack to publish finished workflow status AFTER Yadage visualization is done. tracker.publish_workflow_final_status()
def run_yadage_workflow( workflow_uuid, workflow_workspace, workflow_file, workflow_parameters=None, operational_options={}, ): """Run a ``yadage`` workflow.""" log.info("getting socket..") workflow_workspace = "{0}/{1}".format(SHARED_VOLUME_PATH, workflow_workspace) # use some shared object between tasks. os.environ["workflow_uuid"] = workflow_uuid os.environ["workflow_workspace"] = workflow_workspace os.umask(REANA_WORKFLOW_UMASK) cap_backend = setupbackend_fromstring("fromenv") workflow_file_abs_path = os.path.join(workflow_workspace, workflow_file) publisher = REANAWorkflowStatusPublisher() try: if not os.path.exists(workflow_file_abs_path): message = f"Workflow file {workflow_file} does not exist" raise Exception(message) else: schema_name = "yadage/workflow-schema" schemadir = None specopts = { "toplevel": operational_options["toplevel"], "schema_name": schema_name, "schemadir": schemadir, "load_as_ref": False, } validopts = { "schema_name": schema_name, "schemadir": schemadir, } workflow_json = yadageschemas.load( spec=workflow_file, specopts=specopts, validopts=validopts, validate=True, ) workflow_kwargs = dict(workflow_json=workflow_json) dataopts = {"initdir": operational_options["initdir"]} check_connection_to_job_controller() with steering_ctx( dataarg=workflow_workspace, dataopts=dataopts, initdata=workflow_parameters if workflow_parameters else {}, visualize=True, updateinterval=5, loginterval=5, backend=cap_backend, **workflow_kwargs, ) as ys: log.info("running workflow on context: {0}".format(locals())) publisher.publish_workflow_status(workflow_uuid, 1) ys.adage_argument( additional_trackers=[REANATracker(identifier=workflow_uuid)]) publisher.publish_workflow_status(workflow_uuid, 2) log.info("Workflow {workflow_uuid} finished. Files available " "at {workflow_workspace}.".format( workflow_uuid=workflow_uuid, workflow_workspace=workflow_workspace)) except Exception as e: log.error("Workflow failed: {0}".format(e), exc_info=True) if publisher: publisher.publish_workflow_status( workflow_uuid, 3, logs="workflow failed: {0}".format(e)) else: log.error( "Workflow {workflow_uuid} failed but status " "could not be published.".format(workflow_uuid=workflow_uuid))
def test_context(tmpdir, multiproc_backend): workdir = os.path.join(str(tmpdir), 'workdir') with steering_ctx('local:' + workdir, 'workflow.yml', {'input': [1, 2, 3]}, 'tests/testspecs/nestedmapreduce', multiproc_backend) as ys: ys.adage_argument(default_trackers=False)
def run_workflow(): with steering_ctx(workdir, 'workflow.yml', {'par':'value'}, 'tests/testspecs/local-helloworld', checksum_cached_multiproc, accept_metadir = True) as ys: ys.adage_argument(default_trackers = False)
def run_workflow(): with steering_ctx('local:'+ workdir, 'workflow.yml', {'par':'value'}, 'tests/testspecs/local-helloworld', checksum_cached_multiproc, accept_metadir = True) as ys: ys.adage_argument(default_trackers = False)
def run_yadage_workflow(workflow_uuid, workflow_workspace, workflow_json=None, workflow_file=None, workflow_parameters=None): """Run a ``yadage`` workflow.""" log.info('getting socket..') workflow_workspace = '{0}/{1}'.format(SHARED_VOLUME_PATH, workflow_workspace) # use some shared object between tasks. os.environ["workflow_uuid"] = workflow_uuid os.environ["workflow_workspace"] = workflow_workspace os.umask(REANA_WORKFLOW_UMASK) cap_backend = setupbackend_fromstring('fromenv') toplevel = os.getcwd() workflow = None if workflow_json: # When `yadage` is launched using an already validated workflow file. workflow_kwargs = dict(workflow_json=workflow_json) elif workflow: # When `yadage` resolves the workflow file from a remote repository: # i.e. github:reanahub/reana-demo-root6-roofit/workflow.yaml workflow_kwargs = dict(workflow=workflow, toplevel=toplevel) elif workflow_file: workflow_file_abs_path = os.path.join(workflow_workspace, workflow_file) if os.path.exists(workflow_file_abs_path): schema_name = 'yadage/workflow-schema' schemadir = None specopts = { 'toplevel': workflow_workspace, 'schema_name': schema_name, 'schemadir': schemadir, 'load_as_ref': False, } validopts = { 'schema_name': schema_name, 'schemadir': schemadir, } workflow_json = yadageschemas.load(spec=workflow_file, specopts=specopts, validopts=validopts, validate=True) workflow_kwargs = dict(workflow_json=workflow_json) dataopts = {'initdir': workflow_workspace} try: check_connection_to_job_controller() publisher = REANAWorkflowStatusPublisher() with steering_ctx( dataarg=workflow_workspace, dataopts=dataopts, initdata=workflow_parameters if workflow_parameters else {}, visualize=True, updateinterval=5, loginterval=5, backend=cap_backend, **workflow_kwargs) as ys: log.info('running workflow on context: {0}'.format(locals())) publisher.publish_workflow_status(workflow_uuid, 1) ys.adage_argument( additional_trackers=[REANATracker(identifier=workflow_uuid)]) publisher.publish_workflow_status(workflow_uuid, 2) log.info('Workflow {workflow_uuid} finished. Files available ' 'at {workflow_workspace}.'.format( workflow_uuid=workflow_uuid, workflow_workspace=workflow_workspace)) except Exception as e: log.info('workflow failed: {0}'.format(e), exc_info=True) if publisher: publisher.publish_workflow_status( workflow_uuid, 3, logs='workflow failed: {0}'.format(e)) else: log.error( 'Workflow {workflow_uuid} failed but status ' 'could not be published.'.format(workflow_uuid=workflow_uuid))
def test_incomplete_data_ctx(tmpdir): workdir = os.path.join(str(tmpdir), "workdir") with pytest.raises(RuntimeError): with steering_ctx("local:" + workdir) as ys: pass