def test_basename_field_generation(self): base_file = { "class": "File", "location": "/foo/" } # (filename, expected: (nameroot, nameext)) testdata = [ ("foo.bar", ("foo", ".bar")), ("foo", ("foo", '')), (".foo", (".foo", '')), ("foo.", ("foo", '.')), ("foo.bar.baz", ("foo.bar", ".baz")) ] for filename, (nameroot, nameext) in testdata: file = dict(base_file) file["location"] = file["location"] + filename expected = dict(file) expected["basename"] = filename expected["nameroot"] = nameroot expected["nameext"] = nameext normalizeFilesDirs(file) self.assertEqual(file, expected)
def test_strip_trailing(self): d = {"class": "Directory", "location": "/foo/bar/"} normalizeFilesDirs(d) self.assertEqual( { "class": "Directory", "location": "/foo/bar", "basename": "bar" }, d)
def importFiles(tool): visit_class(tool, ("File", "Directory"), pathToLoc) normalizeFilesDirs(tool) adjustDirObjs(tool, functools.partial(get_listing, cwltool.stdfsaccess.StdFsAccess(""), recursive=True)) adjustFileObjs(tool, functools.partial(uploadFile, toil.importFile, fileindex, existing, skip_broken=True))
def importFiles(tool): visit_class(tool, ("File", "Directory"), pathToLoc) normalizeFilesDirs(tool) adjustDirObjs(tool, functools.partial(get_listing, cwltool.stdfsaccess.StdFsAccess(""), recursive=True)) adjustFileObjs(tool, functools.partial(uploadFile, toil.importFile, fileindex, existing, skip_broken=True))
def import_files(tool): visit_class(tool, ("File", "Directory"), path_to_loc) visit_class(tool, ("File", ), functools.partial( add_sizes, fs_access)) normalizeFilesDirs(tool) adjustDirObjs(tool, functools.partial( get_listing, fs_access, recursive=True)) adjustFileObjs(tool, functools.partial( uploadFile, toil.importFile, fileindex, existing, skip_broken=True))
def test_strip_trailing(self): d = { "class": "Directory", "location": "/foo/bar/" } normalizeFilesDirs(d) self.assertEqual( { "class": "Directory", "location": "/foo/bar", "basename": "bar" }, d)
def test_basename_field_generation(filename, expected): nameroot, nameext = expected expected = { "class": "File", "location": "/foo/" + filename, "basename": filename, "nameroot": nameroot, "nameext": nameext, } file = {"class": "File", "location": "/foo/" + filename} normalizeFilesDirs(file) assert file == expected
def test_basename_field_generation(filename, expected): nameroot, nameext = expected expected = { 'class': 'File', 'location': '/foo/' + filename, 'basename': filename, 'nameroot': nameroot, 'nameext': nameext } file = {'class': 'File', 'location': '/foo/' + filename} normalizeFilesDirs(file) assert file == expected
def test_basename_field_generation(filename, expected): nameroot, nameext = expected expected = { 'class': 'File', 'location': '/foo/' + filename, 'basename': filename, 'nameroot': nameroot, 'nameext': nameext } file = { 'class': 'File', 'location': '/foo/' + filename } normalizeFilesDirs(file) assert file == expected
def test_normalizeFilesDirs(self): n = {"class": "File", "location": "file1.txt"} normalizeFilesDirs(n) self.assertEqual( n, { "class": "File", "location": "file1.txt", 'basename': 'file1.txt', 'nameext': '.txt', 'nameroot': 'file1' }) n = {"class": "File", "location": "file:///foo/file1.txt"} normalizeFilesDirs(n) self.assertEqual( n, { "class": "File", "location": "file:///foo/file1.txt", 'basename': 'file1.txt', 'nameext': '.txt', 'nameroot': 'file1' }) n = {"class": "File", "location": "http://example.com/file1.txt"} normalizeFilesDirs(n) self.assertEqual( n, { "class": "File", "location": "http://example.com/file1.txt", 'basename': 'file1.txt', 'nameext': '.txt', 'nameroot': 'file1' })
def test_normalizeFilesDirs(self): n = { "class": "File", "location": "file1.txt" } normalizeFilesDirs(n) self.assertEqual(n, { "class": "File", "location": "file1.txt", 'basename': 'file1.txt', 'nameext': '.txt', 'nameroot': 'file1' }) n = { "class": "File", "location": "file:///foo/file1.txt" } normalizeFilesDirs(n) self.assertEqual(n, { "class": "File", "location": "file:///foo/file1.txt", 'basename': 'file1.txt', 'nameext': '.txt', 'nameroot': 'file1' }) n = { "class": "File", "location": "http://example.com/file1.txt" } normalizeFilesDirs(n) self.assertEqual(n, { "class": "File", "location": "http://example.com/file1.txt", 'basename': 'file1.txt', 'nameext': '.txt', 'nameroot': 'file1' })
def add_details_to_outputs(outputs): adjustFileObjs(outputs, expand_to_file_uri) adjustDirObjs(outputs, expand_to_file_uri) normalizeFilesDirs(outputs) adjustFileObjs(outputs, partial(compute_checksums, StdFsAccess("")))
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), output_name=output_name, output_tags=output_tags) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.arv_make_tool, fetcher_constructor=functools.partial(CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""), num_retries=runner.num_retries)) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def test_normalizeFilesDirs(name, file_dir, expected): normalizeFilesDirs(file_dir) assert file_dir == expected, name
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] arvargs = argparse.Namespace() arvargs.work_api = "jobs" arvargs.output_name = output_name arvargs.output_tags = output_tags arvargs.thread_count = 1 runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), arvargs=arvargs) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.loadingContext) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = ArvRuntimeContext(vars(arvargs)) args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False args.tmp_outdir_prefix = "tmp" runner.arv_executor(t, job_order_object, args, logger=logger) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()), output_name=output_name, output_tags=output_tags) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.arv_make_tool, fetcher_constructor=functools.partial(CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""), num_retries=runner.num_retries)) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = False args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def test_normalizeFilesDirs(name, file_dir, expected): normalizeFilesDirs(file_dir) assert file_dir == expected, name