def test_default_work_api(self): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = copy.deepcopy(get_rootDesc()) runner = arvados_cwl.executor.ArvCwlExecutor(api) self.assertEqual(runner.work_api, 'containers')
def test_default_work_api(self): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = copy.deepcopy(get_rootDesc()) del api._rootDesc.get('resources')['jobs']['methods']['create'] runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'containers')
def test_run(self, list_images_in_arv, mockcollection, mockcollectionreader): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = { "portable_data_hash": "99999999999999999999999999999993+99" } runner.api.collections().list().execute.return_vaulue = { "items": [{ "portable_data_hash": "99999999999999999999999999999993+99" }] } runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema( "v1.0") make_fs_access = functools.partial( arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) document_loader.fetcher_constructor = functools.partial( arvados_cwl.CollectionFetcher, api_client=api, fs_access=make_fs_access("")) document_loader.fetcher = document_loader.fetcher_constructor( document_loader.cache, document_loader.session) document_loader.fetch_text = document_loader.fetcher.fetch_text document_loader.check_exists = document_loader.fetcher.check_exists tool, metadata = document_loader.resolve_ref("tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection( ).portable_data_hash.return_value = "99999999999999999999999999999999+118" arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=document_loader, makeTool=runner.arv_make_tool, metadata=metadata) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access) it.next().run() it.next().run() with open("tests/wf/scatter2_subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher( { 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml' }, 'command': [ u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml' ], 'task.stdout': 'cwl.output.json' }] }, 'runtime_constraints': { 'min_scratch_mb_per_node': 2048, 'min_cores_per_node': 1, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz' }), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d' ], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True) mockcollection().open().__enter__().write.assert_has_calls( [mock.call(subwf)]) mockcollection().open().__enter__().write.assert_has_calls([ mock.call('''{ "fileblub": { "basename": "token.txt", "class": "File", "location": "/keep/99999999999999999999999999999999+118/token.txt" }, "sleeptime": 5 }''') ])
def test_resource_requirements(self, list_images_in_arv): runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 arvados_cwl.add_arv_hints() list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = { "portable_data_hash": "99999999999999999999999999999993+99" } document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema( "v1.0") tool = { "inputs": [], "outputs": [], "hints": [{ "class": "ResourceRequirement", "coresMin": 3, "ramMin": 3000, "tmpdirMin": 4000 }, { "class": "http://arvados.org/cwl#RuntimeConstraints", "keep_cache": 512, "outputDirType": "keep_output_dir" }, { "class": "http://arvados.org/cwl#APIRequirement", }, { "class": "http://arvados.org/cwl#ReuseRequirement", "enableReuse": False }], "baseCommand": "ls" } make_fs_access = functools.partial( arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access): j.run(enable_reuse=True) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'runtime_constraints': {}, 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.keepTmpOutput': True, 'command': ['ls'] }] }, 'script_version': 'master', 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script': 'crunchrunner', 'runtime_constraints': { 'docker_image': 'arvados/jobs', 'min_cores_per_node': 3, 'min_ram_mb_per_node': 3512, # ramMin + keep_cache 'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize 'keep_cache_mb_per_task': 512 } }), find_or_create=False, filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d' ], ['docker_image_locator', 'in docker', 'arvados/jobs']])
def test_resource_requirements(self): runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 arvados_cwl.add_arv_hints() document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema( "v1.0") tool = { "inputs": [], "outputs": [], "hints": [{ "class": "ResourceRequirement", "coresMin": 3, "ramMin": 3000, "tmpdirMin": 4000 }, { "class": "http://arvados.org/cwl#RuntimeConstraints", "keep_cache": 512, "outputDirType": "keep_output_dir" }, { "class": "http://arvados.org/cwl#APIRequirement", }], "baseCommand": "ls" } make_fs_access = functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access): j.run() runner.api.jobs().create.assert_called_with( body={ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'runtime_constraints': {}, 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.keepTmpOutput': True, 'command': ['ls'] }] }, 'script_version': 'master', 'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6', 'repository': 'arvados', 'script': 'crunchrunner', 'runtime_constraints': { 'docker_image': 'arvados/jobs', 'min_cores_per_node': 3, 'min_ram_mb_per_node': 3000, 'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize 'keep_cache_mb_per_task': 512 } }, find_or_create=True, filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6' ], ['docker_image_locator', 'in docker', 'arvados/jobs']])
def test_run(self, mockcollection): try: arvados_cwl.add_arv_hints() runner = arvados_cwl.ArvCwlRunner(mock.MagicMock()) runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema( "v1.0") tool, metadata = document_loader.resolve_ref( "tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection( ).portable_data_hash.return_value = "99999999999999999999999999999999+118" make_fs_access = functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api) arvtool = arvados_cwl.ArvadosWorkflow( runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=document_loader, makeTool=runner.arv_make_tool, metadata=metadata) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access) it.next().run() it.next().run() with open("tests/wf/scatter2_subwf.cwl") as f: subwf = f.read() mockcollection().open().__enter__().write.assert_has_calls( [mock.call(subwf)]) mockcollection().open().__enter__().write.assert_has_calls( [mock.call('{sleeptime: 5}')]) runner.api.jobs( ).create.assert_called_with(body={ 'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml' }, 'command': [ u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml' ], 'task.stdout': 'cwl.output.json' }] }, 'runtime_constraints': { 'min_scratch_mb_per_node': 2048, 'min_cores_per_node': 1, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz' }, filters= [['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6' ], [ 'docker_image_locator', 'in docker', 'arvados/jobs' ]], find_or_create=True) except: logging.exception("")
def test_resource_requirements(self, list_images_in_arv): runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 arvados_cwl.add_arv_hints() list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") tool = { "inputs": [], "outputs": [], "hints": [{ "class": "ResourceRequirement", "coresMin": 3, "ramMin": 3000, "tmpdirMin": 4000 }, { "class": "http://arvados.org/cwl#RuntimeConstraints", "keep_cache": 512, "outputDirType": "keep_output_dir" }, { "class": "http://arvados.org/cwl#APIRequirement", }], "baseCommand": "ls" } make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names, make_fs_access=make_fs_access, loader=Loader({})) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access): j.run() runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'runtime_constraints': {}, 'script_parameters': { 'tasks': [{ 'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'task.keepTmpOutput': True, 'command': ['ls'] }] }, 'script_version': 'master', 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script': 'crunchrunner', 'runtime_constraints': { 'docker_image': 'arvados/jobs', 'min_cores_per_node': 3, 'min_ram_mb_per_node': 3512, # ramMin + keep_cache 'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize 'keep_cache_mb_per_task': 512 } }), find_or_create=True, filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']])
def test_resource_requirements(self, list_images_in_arv): runner = mock.MagicMock() runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 arvados_cwl.add_arv_hints() list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} tool = { "inputs": [], "outputs": [], "hints": [{ "class": "ResourceRequirement", "coresMin": 3, "ramMin": 3000, "tmpdirMin": 4000 }, { "class": "http://arvados.org/cwl#RuntimeConstraints", "keep_cache": 512, "outputDirType": "keep_output_dir" }, { "class": "http://arvados.org/cwl#APIRequirement", }, { "class": "http://arvados.org/cwl#ReuseRequirement", "enableReuse": False }], "baseCommand": "ls", "id": "#", "class": "CommandLineTool" } loadingContext, runtimeContext = self.helper(runner) arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext) arvtool.formatgraph = None for j in arvtool.job({}, mock.MagicMock(), runtimeContext): j.run(runtimeContext) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz', 'runtime_constraints': {}, 'script_parameters': { 'tasks': [{ 'task.env': {'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'task.keepTmpOutput': True, 'command': ['ls'] }] }, 'script_version': 'master', 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script': 'crunchrunner', 'runtime_constraints': { 'docker_image': 'arvados/jobs', 'min_cores_per_node': 3, 'min_ram_mb_per_node': 3512, # ramMin + keep_cache 'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize 'keep_cache_mb_per_task': 512 } }), find_or_create=False, filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']])
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), output_name=output_name, output_tags=output_tags) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.arv_make_tool, fetcher_constructor=functools.partial(CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""), num_retries=runner.num_retries)) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def test_overall_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.executor.ArvCwlExecutor(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = { "portable_data_hash": "99999999999999999999999999999993+99" } runner.api.collections().list().execute.return_vaulue = { "items": [{ "portable_data_hash": "99999999999999999999999999999993+99" }] } runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 loadingContext, runtimeContext = self.helper(runner) tool, metadata = loadingContext.loader.resolve_ref( "tests/wf/echo-wf.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection.side_effect = lambda *args, **kwargs: CollectionMock( mock.MagicMock(), *args, **kwargs) arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), runtimeContext) it.next().run(runtimeContext) it.next().run(runtimeContext) with open("tests/wf/echo-subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher( { 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999996+99/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999996+99/cwl.input.yml' }, 'command': [ u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml' ], 'task.stdout': 'cwl.output.json' }] }, 'runtime_constraints': { 'min_scratch_mb_per_node': 4096, 'min_cores_per_node': 3, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz' }), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d' ], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True)
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] arvargs = argparse.Namespace() arvargs.work_api = "jobs" arvargs.output_name = output_name arvargs.output_tags = output_tags arvargs.thread_count = 1 runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), arvargs=arvargs) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.loadingContext) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = ArvRuntimeContext(vars(arvargs)) args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False args.tmp_outdir_prefix = "tmp" runner.arv_executor(t, job_order_object, args, logger=logger) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()), output_name=output_name, output_tags=output_tags) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.arv_make_tool, fetcher_constructor=functools.partial(CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""), num_retries=runner.num_retries)) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = False args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def run(): # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): v["location"] = keeppath(v["location"]) job_order_object["cwl:tool"] = "file://%s/%s" % ( os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"]) for k, v in job_order_object.items(): if isinstance( v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) adjustDirObjs( job_order_object, functools.partial( getListing, arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api))) output_name = None output_tags = None enable_reuse = True if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api( 'v1', model=OrderedJsonModel()), output_name=output_name, output_tags=output_tags) t = load_tool(job_order_object, runner.arv_make_tool) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.submit = False args.debug = True args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.cwl_runner_job = { "uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"] } outputObj = runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash( ) else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress': 1.0 }).execute()
def test_overall_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} runner.api.collections().list().execute.return_vaulue = {"items": [{"portable_data_hash": "99999999999999999999999999999993+99"}]} runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 loadingContext, runtimeContext = self.helper(runner) tool, metadata = loadingContext.loader.resolve_ref("tests/wf/echo-wf.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection().portable_data_hash.return_value = "99999999999999999999999999999999+118" arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), runtimeContext) it.next().run(runtimeContext) it.next().run(runtimeContext) with open("tests/wf/echo-subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml' }, 'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'], 'task.stdout': 'cwl.output.json'}]}, 'runtime_constraints': { 'min_scratch_mb_per_node': 4096, 'min_cores_per_node': 3, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True)
def test_overall_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader): arv_docker_clear_cache() arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.executor.ArvCwlExecutor(api) self.assertEqual(runner.work_api, 'containers') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_value = { "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", "portable_data_hash": "99999999999999999999999999999993+99" } runner.api.collections().list().execute.return_value = { "items": [{ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", "portable_data_hash": "99999999999999999999999999999993+99" }] } runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 runner.secret_store = cwltool.secrets.SecretStore() loadingContext, runtimeContext = self.helper(runner) runner.fs_access = runtimeContext.make_fs_access( runtimeContext.basedir) loadingContext.do_update = True tool, metadata = loadingContext.loader.resolve_ref( "tests/wf/echo-wf.cwl") mockcollection.side_effect = lambda *args, **kwargs: CollectionMock( mock.MagicMock(), *args, **kwargs) arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), runtimeContext) next(it).run(runtimeContext) next(it).run(runtimeContext) with open("tests/wf/echo-subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.container_requests().create.assert_called_with( body=JsonDiffMatcher({ 'output_ttl': 0, 'environment': { 'HOME': '/var/spool/cwl', 'TMPDIR': '/tmp' }, 'scheduling_parameters': {}, 'name': u'echo-subwf', 'secret_mounts': {}, 'runtime_constraints': { 'API': True, 'vcpus': 3, 'ram': 1073741824 }, 'properties': {}, 'priority': 500, 'mounts': { '/var/spool/cwl/cwl.input.yml': { 'portable_data_hash': '99999999999999999999999999999996+99', 'kind': 'collection', 'path': 'cwl.input.yml' }, '/var/spool/cwl/workflow.cwl': { 'portable_data_hash': '99999999999999999999999999999996+99', 'kind': 'collection', 'path': 'workflow.cwl' }, 'stdout': { 'path': '/var/spool/cwl/cwl.output.json', 'kind': 'file' }, '/tmp': { 'kind': 'tmp', 'capacity': 1073741824 }, '/var/spool/cwl': { 'kind': 'tmp', 'capacity': 3221225472 } }, 'state': 'Committed', 'output_path': '/var/spool/cwl', 'container_image': '99999999999999999999999999999993+99', 'command': [ u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml' ], 'use_existing': True, 'output_name': u'Output for step echo-subwf', 'cwd': '/var/spool/cwl' }))
def test_run(self, list_images_in_arv, mockcollection): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0") tool, metadata = document_loader.resolve_ref("tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection().portable_data_hash.return_value = "99999999999999999999999999999999+118" make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=document_loader, makeTool=runner.arv_make_tool, metadata=metadata) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access) it.next().run() it.next().run() with open("tests/wf/scatter2_subwf.cwl") as f: subwf = f.read() runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml' }, 'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'], 'task.stdout': 'cwl.output.json'}]}, 'runtime_constraints': { 'min_scratch_mb_per_node': 2048, 'min_cores_per_node': 1, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True) mockcollection().open().__enter__().write.assert_has_calls([mock.call(subwf)]) mockcollection().open().__enter__().write.assert_has_calls([mock.call( '''{ "sleeptime": 5 }''')])
def test_run(self, list_images_in_arv, mockcollection, mockcollectionreader): arv_docker_clear_cache() arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.executor.ArvCwlExecutor(api) self.assertEqual(runner.work_api, 'containers') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_value = { "portable_data_hash": "99999999999999999999999999999993+99" } runner.api.collections().list().execute.return_value = { "items": [{ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzzz", "portable_data_hash": "99999999999999999999999999999993+99" }] } runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 runner.secret_store = cwltool.secrets.SecretStore() loadingContext, runtimeContext = self.helper(runner) runner.fs_access = runtimeContext.make_fs_access( runtimeContext.basedir) tool, metadata = loadingContext.loader.resolve_ref( "tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockc = mock.MagicMock() mockcollection.side_effect = lambda *args, **kwargs: CollectionMock( mockc, *args, **kwargs) mockcollectionreader().find.return_value = arvados.arvfile.ArvadosFile( mock.MagicMock(), "token.txt") arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), runtimeContext) next(it).run(runtimeContext) next(it).run(runtimeContext) with open("tests/wf/scatter2_subwf.cwl") as f: subwf = StripYAMLComments(f.read()).rstrip() runner.api.container_requests( ).create.assert_called_with(body=JsonDiffMatcher({ "command": [ "cwltool", "--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml" ], "container_image": "99999999999999999999999999999993+99", "cwd": "/var/spool/cwl", "environment": { "HOME": "/var/spool/cwl", "TMPDIR": "/tmp" }, "mounts": { "/keep/99999999999999999999999999999999+118": { "kind": "collection", "portable_data_hash": "99999999999999999999999999999999+118" }, "/tmp": { "capacity": 1073741824, "kind": "tmp" }, "/var/spool/cwl": { "capacity": 1073741824, "kind": "tmp" }, "/var/spool/cwl/cwl.input.yml": { "kind": "collection", "path": "cwl.input.yml", "portable_data_hash": "99999999999999999999999999999996+99" }, "/var/spool/cwl/workflow.cwl": { "kind": "collection", "path": "workflow.cwl", "portable_data_hash": "99999999999999999999999999999996+99" }, "stdout": { "kind": "file", "path": "/var/spool/cwl/cwl.output.json" } }, "name": "scatterstep", "output_name": "Output for step scatterstep", "output_path": "/var/spool/cwl", "output_ttl": 0, "priority": 500, "properties": {}, "runtime_constraints": { "ram": 1073741824, "vcpus": 1 }, "scheduling_parameters": {}, "secret_mounts": {}, "state": "Committed", "use_existing": True })) mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)]) mockc.open().__enter__().write.assert_has_calls([ mock.call('''{ "fileblub": { "basename": "token.txt", "class": "File", "location": "/keep/99999999999999999999999999999999+118/token.txt", "size": 0 }, "sleeptime": 5 }''') ])
def test_run(self, list_images_in_arv, mockcollection, mockcollectionreader): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.executor.ArvCwlExecutor(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} runner.api.collections().list().execute.return_vaulue = {"items": [{"portable_data_hash": "99999999999999999999999999999993+99"}]} runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 loadingContext, runtimeContext = self.helper(runner) tool, metadata = loadingContext.loader.resolve_ref("tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockc = mock.MagicMock() mockcollection.side_effect = lambda *args, **kwargs: CollectionMock(mockc, *args, **kwargs) mockcollectionreader().find.return_value = arvados.arvfile.ArvadosFile(mock.MagicMock(), "token.txt") arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), runtimeContext) next(it).run(runtimeContext) next(it).run(runtimeContext) with open("tests/wf/scatter2_subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999996+99/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999996+99/cwl.input.yml' }, 'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'], 'task.stdout': 'cwl.output.json'}]}, 'runtime_constraints': { 'min_scratch_mb_per_node': 2048, 'min_cores_per_node': 1, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True) mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)]) mockc.open().__enter__().write.assert_has_calls([mock.call( bytes(b'''{ "fileblub": { "basename": "token.txt", "class": "File", "location": "/keep/99999999999999999999999999999999+118/token.txt", "size": 0 }, "sleeptime": 5 }'''))])