def modify_jsonyaml_paths(jsonyaml_file): """ Changes relative paths in a json/yaml file to be relative to where the json/yaml file is located. :param jsonyaml_file: Path to a json/yaml file. """ loader = schema_salad.ref_resolver.Loader({ "location": { "@type": "@id" }, "path": { "@type": "@id" } }) input_dict, _ = loader.resolve_ref(jsonyaml_file, checklinks=False) basedir = os.path.dirname(jsonyaml_file) def fixpaths(d): """Make sure all paths have a URI scheme.""" if isinstance(d, dict): if "path" in d: if ":" not in d["path"]: local_path = os.path.normpath( os.path.join(os.getcwd(), basedir, d["path"])) d["location"] = urllib.pathname2url(local_path) else: d["location"] = d["path"] del d["path"] visit(input_dict, fixpaths) return json.dumps(input_dict)
def GetWorkflowLog(self, workflow_id): api = get_api() request = api.container_requests().get(uuid=workflow_id).execute() if request["container_uuid"]: container = api.containers().get( uuid=request["container_uuid"]).execute() # NOQA else: container = {"state": "Queued", "exit_code": None} stderr = request["properties"].get("arvados-cwl-runner-log", "") outputobj = {} if request["output_uuid"]: c = arvados.collection.CollectionReader(request["output_uuid"], api_client=api) with c.open("cwl.output.json") as f: outputobj = json.load(f) def keepref(d): if isinstance(d, dict) and "location" in d: d["location"] = "%sc=%s/_/%s" % ( api._resourceDesc["keepWebServiceUrl"], c.portable_data_hash(), d["location"]) # NOQA visit(outputobj, keepref) if request["log_uuid"]: c = arvados.collection.CollectionReader(request["log_uuid"], api_client=api) if "stderr.txt" in c: with c.open("stderr.txt") as f: stderr += f.read() r = { "workflow_id": request["uuid"], "request": {}, "state": statemap[container["state"]], "workflow_log": { "cmd": [""], "startTime": "", "endTime": "", "stdout": "", "stderr": stderr }, "task_logs": [], "outputs": outputobj } if container["exit_code"] is not None: r["workflow_log"]["exit_code"] = container["exit_code"] return r
def GetWorkflowLog(self, workflow_id): api = get_api() request = api.container_requests().get(uuid=workflow_id).execute() container = api.containers().get( uuid=request["container_uuid"]).execute() outputobj = {} if request["output_uuid"]: c = arvados.collection.CollectionReader(request["output_uuid"]) with c.open("cwl.output.json") as f: outputobj = json.load(f) def keepref(d): if isinstance(d, dict) and "location" in d: d["location"] = "keep:%s/%s" % (c.portable_data_hash(), d["location"]) visit(outputobj, keepref) stderr = "" if request["log_uuid"]: c = arvados.collection.CollectionReader(request["log_uuid"]) if "stderr.txt" in c: with c.open("stderr.txt") as f: stderr = f.read() r = { "workflow_id": request["uuid"], "request": {}, "state": statemap[container["state"]], "workflow_log": { "cmd": [""], "startTime": "", "endTime": "", "stdout": "", "stderr": stderr }, "task_logs": [], "outputs": outputobj } if container["exit_code"] is not None: r["workflow_log"]["exitCode"] = container["exit_code"] return r
def modify_jsonyaml_paths(jsonyaml_file, path_keys=None): """ Changes relative paths in a json/yaml file to be relative to where the JSON/YAML file is located. Args: jsonyaml_file (str): filepath or URL for JSON/YAML file containing workflow parameters path_keys (:obj:`list` of :obj:`str`): list of workflow parameter names to modify Returns: str: string contents of JSON/YAML file with modified paths """ logger.debug( "Resolving paths in parameters file '{}'".format(jsonyaml_file)) resolve_keys = {"path": {"@type": "@id"}, 'location': {"@type": "@id"}} if path_keys is not None: params_json = get_json(jsonyaml_file) for k, v in params_json.items(): if k in path_keys and ':' not in v[0] and ':' not in v: resolve_keys[k] = {"@type": "@id"} loader = schema_salad.ref_resolver.Loader(resolve_keys) input_dict, _ = loader.resolve_ref(jsonyaml_file, checklinks=False) basedir = os.path.dirname(jsonyaml_file) def fixpaths(d): """Make sure all paths have a URI scheme.""" if isinstance(d, dict): if "path" in d: if ":" not in d["path"]: local_path = os.path.normpath( os.path.join(os.getcwd(), basedir, d["path"])) d["location"] = urllib.pathname2url(local_path) else: d["location"] = d["path"] del d["path"] visit(input_dict, fixpaths) return json.dumps(input_dict)
def GetRunLog(self, run_id): api = get_api() request = api.container_requests().get(uuid=run_id).execute() if request["container_uuid"]: container = api.containers().get( uuid=request["container_uuid"]).execute() # NOQA task_reqs = arvados.util.list_all( api.container_requests().list, filters=[["requesting_container_uuid", "=", container["uuid"]]]) tasks = arvados.util.list_all( api.containers().list, filters=[[ "uuid", "in", [tr["container_uuid"] for tr in task_reqs] ]]) containers_map = {c["uuid"]: c for c in tasks} containers_map[container["uuid"]] = container else: container = { "state": "Queued" if request["priority"] > 0 else "Cancelled", "exit_code": None, "log": None } tasks = [] containers_map = {} task_reqs = [] outputobj = {} if request["output_uuid"]: c = arvados.collection.CollectionReader(request["output_uuid"], api_client=api) with c.open("cwl.output.json") as f: try: outputobj = json.load(f) except ValueError: pass def keepref(d): if isinstance(d, dict) and "location" in d: d["location"] = "%sc=%s/_/%s" % ( api._resourceDesc["keepWebServiceUrl"], c.portable_data_hash(), d["location"]) # NOQA visit(outputobj, keepref) def log_object(cr): if cr["container_uuid"]: containerlog = containers_map[cr["container_uuid"]] else: containerlog = { "started_at": "", "finished_at": "", "exit_code": None, "log": "" } r = { "name": cr["name"] or "", "cmd": cr["command"], "start_time": containerlog["started_at"] or "", "end_time": containerlog["finished_at"] or "", "stdout": "", "stderr": "", "exit_code": containerlog["exit_code"] or 0 } if containerlog["log"]: r["stdout_keep"] = "%sc=%s/_/%s" % ( api._resourceDesc["keepWebServiceUrl"], containerlog["log"], "stdout.txt") # NOQA r["stderr_keep"] = "%sc=%s/_/%s" % ( api._resourceDesc["keepWebServiceUrl"], containerlog["log"], "stderr.txt") # NOQA r["stdout"] = "%s/x-dynamic-logs/stdout" % (connexion.request.url) r["stderr"] = "%s/x-dynamic-logs/stderr" % (connexion.request.url) return r r = { "run_id": request["uuid"], "request": { "workflow_url": "", "workflow_params": request["mounts"].get("/var/lib/cwl/cwl.input.json", {}).get("content", {}) }, "state": statemap[container["state"]], "run_log": log_object(request), "task_logs": [log_object(t) for t in task_reqs], "outputs": outputobj } return r
def main(argv=sys.argv[1:]): parser = argparse.ArgumentParser(description='Workflow Execution Service') parser.add_argument("--host", type=str, default=os.environ.get("WES_API_HOST")) parser.add_argument("--auth", type=str, default=os.environ.get("WES_API_AUTH")) parser.add_argument("--proto", type=str, default=os.environ.get("WES_API_PROTO", "https")) parser.add_argument("--quiet", action="store_true", default=False) parser.add_argument("--outdir", type=str) exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--run", action="store_true", default=False) exgroup.add_argument("--get", type=str, default=None) exgroup.add_argument("--log", type=str, default=None) exgroup.add_argument("--cancel", type=str, default=None) exgroup.add_argument("--list", action="store_true", default=False) exgroup.add_argument("--info", action="store_true", default=False) exgroup.add_argument("--version", action="store_true", default=False) exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--wait", action="store_true", default=True, dest="wait") exgroup.add_argument("--no-wait", action="store_false", default=True, dest="wait") parser.add_argument("workflow_url", type=str, nargs="?", default=None) parser.add_argument("job_order", type=str, nargs="?", default=None) args = parser.parse_args(argv) if args.version: pkg = pkg_resources.require("wes_service") print(u"%s %s" % (sys.argv[0], pkg[0].version)) exit(0) http_client = RequestsClient() split = urlparse.urlsplit("%s://%s/" % (args.proto, args.host)) http_client.set_api_key( split.hostname, args.auth, param_name='Authorization', param_in='header') client = SwaggerClient.from_url("%s://%s/swagger.json" % (args.proto, args.host), http_client=http_client, config={'use_models': False}) if args.info: l = client.WorkflowExecutionService.GetServiceInfo() json.dump(l.result(), sys.stdout, indent=4) return 0 if args.list: l = client.WorkflowExecutionService.ListWorkflows() json.dump(l.result(), sys.stdout, indent=4) return 0 if args.log: l = client.WorkflowExecutionService.GetWorkflowLog(workflow_id=args.log) sys.stdout.write(l.result()["workflow_log"]["stderr"]) return 0 if args.get: l = client.WorkflowExecutionService.GetWorkflowStatus(workflow_id=args.get) json.dump(l.result(), sys.stdout, indent=4) return 0 if args.cancel: l = client.WorkflowExecutionService.CancelJob(workflow_id=args.cancel) json.dump(l.result(), sys.stdout, indent=4) return 0 with open(args.job_order) as f: input = yaml.safe_load(f) basedir = os.path.dirname(args.job_order) def fixpaths(d): if isinstance(d, dict) and "location" in d: if not ":" in d["location"]: d["location"] = urllib.pathname2url(os.path.normpath(os.path.join(os.getcwd(), basedir, d["location"]))) visit(input, fixpaths) workflow_url = args.workflow_url if not workflow_url.startswith("/") and ":" not in workflow_url: workflow_url = os.path.abspath(workflow_url) if args.quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) r = client.WorkflowExecutionService.RunWorkflow(body={ "workflow_url": workflow_url, "workflow_params": input, "workflow_type": "CWL", "workflow_type_version": "v1.0"}).result() if args.wait: logging.info("Workflow id is %s", r["workflow_id"]) else: sys.stdout.write(r["workflow_id"]+"\n") exit(0) r = client.WorkflowExecutionService.GetWorkflowStatus(workflow_id=r["workflow_id"]).result() while r["state"] in ("QUEUED", "INITIALIZING", "RUNNING"): time.sleep(1) r = client.WorkflowExecutionService.GetWorkflowStatus(workflow_id=r["workflow_id"]).result() logging.info("State is %s", r["state"]) s = client.WorkflowExecutionService.GetWorkflowLog(workflow_id=r["workflow_id"]).result() logging.info(s["workflow_log"]["stderr"]) if "fields" in s["outputs"] and s["outputs"]["fields"] is None: del s["outputs"]["fields"] json.dump(s["outputs"], sys.stdout, indent=4) if r["state"] == "COMPLETE": return 0 else: return 1
def main(argv=sys.argv[1:]): parser = argparse.ArgumentParser(description='Workflow Execution Service') parser.add_argument("--host", type=str, default=os.environ.get("WES_API_HOST")) parser.add_argument("--auth", type=str, default=os.environ.get("WES_API_AUTH")) parser.add_argument("--proto", type=str, default=os.environ.get("WES_API_PROTO", "https")) parser.add_argument("--quiet", action="store_true", default=False) parser.add_argument("--outdir", type=str) exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--run", action="store_true", default=False) exgroup.add_argument("--get", type=str, default=None) exgroup.add_argument("--log", type=str, default=None) exgroup.add_argument("--list", action="store_true", default=False) exgroup.add_argument("--info", action="store_true", default=False) exgroup.add_argument("--version", action="store_true", default=False) exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--wait", action="store_true", default=True, dest="wait") exgroup.add_argument("--no-wait", action="store_false", default=True, dest="wait") parser.add_argument("workflow_url", type=str, nargs="?", default=None) parser.add_argument("job_order", type=str, nargs="?", default=None) args = parser.parse_args(argv) if args.version: pkg = pkg_resources.require("wes_service") print(u"%s %s" % (sys.argv[0], pkg[0].version)) exit(0) http_client = RequestsClient() split = urlparse.urlsplit("%s://%s/" % (args.proto, args.host)) http_client.set_api_key(split.hostname, args.auth, param_name='Authorization', param_in='header') client = SwaggerClient.from_url("%s://%s/ga4gh/wes/v1/swagger.json" % (args.proto, args.host), http_client=http_client, config={'use_models': False}) if args.list: response = client.WorkflowExecutionService.ListWorkflows() json.dump(response.result(), sys.stdout, indent=4) return 0 if args.log: response = client.WorkflowExecutionService.GetWorkflowLog( workflow_id=args.log) sys.stdout.write(response.result()["workflow_log"]["stderr"]) return 0 if args.get: response = client.WorkflowExecutionService.GetWorkflowLog( workflow_id=args.get) json.dump(response.result(), sys.stdout, indent=4) return 0 if args.info: response = client.WorkflowExecutionService.GetServiceInfo() json.dump(response.result(), sys.stdout, indent=4) return 0 loader = schema_salad.ref_resolver.Loader({ "location": { "@type": "@id" }, "path": { "@type": "@id" } }) input, _ = loader.resolve_ref(args.job_order) basedir = os.path.dirname(args.job_order) def fixpaths(d): if isinstance(d, dict): if "path" in d: if ":" not in d["path"]: local_path = os.path.normpath( os.path.join(os.getcwd(), basedir, d["path"])) d["location"] = urllib.pathname2url(local_path) else: d["location"] = d["path"] del d["path"] loc = d.get("location", "") if d.get("class") == "Directory": if loc.startswith("http:") or loc.startswith("https:"): logging.error( "Directory inputs not supported with http references") exit(33) if not (loc.startswith("http:") or loc.startswith("https:") or args.job_order.startswith("http:") or args.job_order.startswith("https:")): logging.error( "Upload local files not supported, must use http: or https: references." ) exit(33) visit(input, fixpaths) workflow_url = args.workflow_url if not workflow_url.startswith("/") and ":" not in workflow_url: workflow_url = "file://" + os.path.abspath(workflow_url) if args.quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) body = { "workflow_params": input, "workflow_type": "CWL", "workflow_type_version": "v1.0" } if workflow_url.startswith("file://"): with open(workflow_url[7:], "r") as f: body["workflow_descriptor"] = f.read() else: body["workflow_url"] = workflow_url r = client.WorkflowExecutionService.RunWorkflow(body=body).result() if args.wait: logging.info("Workflow id is %s", r["workflow_id"]) else: sys.stdout.write(r["workflow_id"] + "\n") exit(0) r = client.WorkflowExecutionService.GetWorkflowStatus( workflow_id=r["workflow_id"]).result() while r["state"] in ("QUEUED", "INITIALIZING", "RUNNING"): time.sleep(1) r = client.WorkflowExecutionService.GetWorkflowStatus( workflow_id=r["workflow_id"]).result() logging.info("State is %s", r["state"]) s = client.WorkflowExecutionService.GetWorkflowLog( workflow_id=r["workflow_id"]).result() logging.info("Workflow log:\n" + s["workflow_log"]["stderr"]) if "fields" in s["outputs"] and s["outputs"]["fields"] is None: del s["outputs"]["fields"] json.dump(s["outputs"], sys.stdout, indent=4) if r["state"] == "COMPLETE": return 0 else: return 1
def main(argv=sys.argv[1:]): parser = argparse.ArgumentParser(description="Workflow Execution Service") parser.add_argument("--host", type=str, default=os.environ.get("WES_API_HOST"), help="Example: '--host=localhost:8080'. Defaults to WES_API_HOST.") parser.add_argument("--auth", type=str, default=os.environ.get("WES_API_AUTH"), help="Defaults to WES_API_AUTH.") parser.add_argument("--proto", type=str, default=os.environ.get("WES_API_PROTO", "https"), help="Options: [http, https]. Defaults to WES_API_PROTO (https).") parser.add_argument("--quiet", action="store_true", default=False) parser.add_argument("--outdir", type=str) parser.add_argument("--page", type=str, default=None) parser.add_argument("--page-size", type=int, default=None) exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--run", action="store_true", default=False) exgroup.add_argument("--get", type=str, default=None, help="Specify a <workflow-id>. Example: '--get=<workflow-id>'") exgroup.add_argument("--log", type=str, default=None, help="Specify a <workflow-id>. Example: '--log=<workflow-id>'") exgroup.add_argument("--list", action="store_true", default=False) exgroup.add_argument("--info", action="store_true", default=False) exgroup.add_argument("--version", action="store_true", default=False) exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--wait", action="store_true", default=True, dest="wait") exgroup.add_argument("--no-wait", action="store_false", default=True, dest="wait") parser.add_argument("workflow_url", type=str, nargs="?", default=None) parser.add_argument("job_order", type=str, nargs="?", default=None) args = parser.parse_args(argv) if args.version: pkg = pkg_resources.require("wes_service") print(u"%s %s" % (sys.argv[0], pkg[0].version)) exit(0) http_client = RequestsClient() split = urlparse.urlsplit("%s://%s/" % (args.proto, args.host)) http_client.set_api_key( split.hostname, args.auth, param_name="Authorization", param_in="header") client = SwaggerClient.from_url( "%s://%s/ga4gh/wes/v1/swagger.json" % (args.proto, args.host), http_client=http_client, config={"use_models": False}) if args.list: response = client.WorkflowExecutionService.ListRuns(page_token=args.page, page_size=args.page_size) json.dump(response.result(), sys.stdout, indent=4) return 0 if args.log: response = client.WorkflowExecutionService.GetRunLog(workflow_id=args.log) sys.stdout.write(response.result()["workflow_log"]["stderr"]) return 0 if args.get: response = client.WorkflowExecutionService.GetRunLog(workflow_id=args.get) json.dump(response.result(), sys.stdout, indent=4) return 0 if args.info: response = client.WorkflowExecutionService.GetServiceInfo() json.dump(response.result(), sys.stdout, indent=4) return 0 if not args.workflow_url: parser.print_help() return 1 if args.workflow_url.lower().endswith('wdl'): wf_type = 'WDL' elif args.workflow_url.lower().endswith('cwl'): wf_type = 'CWL' elif args.workflow_url.lower().endswith('py'): wf_type = 'PY' if not args.job_order: logging.error("Missing job order") return 1 loader = schema_salad.ref_resolver.Loader({ "location": {"@type": "@id"}, "path": {"@type": "@id"} }) input_dict, _ = loader.resolve_ref(args.job_order, checklinks=False) basedir = os.path.dirname(args.job_order) def fixpaths(d): """Make sure all paths have a schema.""" if isinstance(d, dict): if "path" in d: if ":" not in d["path"]: local_path = os.path.normpath(os.path.join(os.getcwd(), basedir, d["path"])) d["location"] = urllib.pathname2url(local_path) else: d["location"] = d["path"] del d["path"] visit(input_dict, fixpaths) workflow_url = args.workflow_url if ":" not in workflow_url: workflow_url = "file://" + os.path.abspath(workflow_url) if args.quiet: logging.basicConfig(level=logging.WARNING) else: logging.basicConfig(level=logging.INFO) parts = [ ("workflow_params", json.dumps(input_dict)), ("workflow_type", wf_type), ("workflow_type_version", "v1.0") ] if workflow_url.startswith("file://"): # with open(workflow_url[7:], "rb") as f: # body["workflow_attachment"] = f.read() rootdir = os.path.dirname(workflow_url[7:]) dirpath = rootdir # for dirpath, dirnames, filenames in os.walk(rootdir): for f in os.listdir(rootdir): if f.startswith("."): continue fn = os.path.join(dirpath, f) if os.path.isfile(fn): parts.append(('workflow_attachment', (fn[len(rootdir)+1:], open(fn, "rb")))) parts.append(("workflow_url", os.path.basename(workflow_url[7:]))) else: parts.append(("workflow_url", workflow_url)) postresult = http_client.session.post("%s://%s/ga4gh/wes/v1/runs" % (args.proto, args.host), files=parts, headers={"Authorization": args.auth}) try: r = json.loads(postresult.text) except ValueError: logging.error("%s", postresult.text) exit(1) if postresult.status_code != 200: logging.error("%s", r) exit(1) if args.wait: logging.info("Workflow run id is %s", r["run_id"]) else: sys.stdout.write(r["run_id"] + "\n") exit(0) r = client.WorkflowExecutionService.GetRunStatus(run_id=r["run_id"]).result() while r["state"] in ("QUEUED", "INITIALIZING", "RUNNING"): time.sleep(8) r = client.WorkflowExecutionService.GetRunStatus(run_id=r["run_id"]).result() logging.info("State is %s", r["state"]) s = client.WorkflowExecutionService.GetRunLog(run_id=r["run_id"]).result() try: # TODO: Only works with Arvados atm logging.info(str(s["workflow_log"]["stderr"])) logs = requests.get(s["workflow_log"]["stderr"], headers={"Authorization": args.auth}).text logging.info("Workflow log:\n" + logs) except InvalidSchema: logging.info("Workflow log:\n" + str(s["workflow_log"]["stderr"])) except MissingSchema: logging.info("Workflow log:\n" + str(s["workflow_log"]["stderr"])) if "fields" in s["outputs"] and s["outputs"]["fields"] is None: del s["outputs"]["fields"] json.dump(s["outputs"], sys.stdout, indent=4) if r["state"] == "COMPLETE": return 0 else: return 1