def stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds): # noqa C901 # only upload objects as files/collections for CWL workflows... tool_or_workflow = "tool" if runnable.type != RunnableType.cwl_workflow else "workflow" to_posix_lines = runnable.type.is_galaxy_artifact job_dict, datasets = PlanemoStagingInterface(ctx, runnable, user_gi, config.version_major).stage( tool_or_workflow, history_id=history_id, job_path=job_path, use_path_paste=config.use_path_paste, to_posix_lines=to_posix_lines, ) if datasets: ctx.vlog("uploaded datasets [%s] for activity, checking history state" % datasets) final_state = _wait_for_history(ctx, user_gi, history_id) for (dataset, path) in datasets: dataset_details = user_gi.histories.show_dataset( history_id, dataset["id"], ) ctx.vlog("Uploaded dataset for path [%s] with metadata [%s]" % (path, dataset_details)) else: # Mark uploads as ok because nothing to do. final_state = "ok" ctx.vlog("final state is %s" % final_state) if final_state != "ok": msg = "Failed to run job final job state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) raise Exception(msg) return job_dict, datasets
def _execute(ctx, config, runnable, job_path, **kwds): user_gi = config.user_gi admin_gi = config.gi history_id = _history_id(user_gi, **kwds) galaxy_paths, job_dict, _ = stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds) if runnable.type in [RunnableType.galaxy_tool, RunnableType.cwl_tool]: response_class = GalaxyToolRunResponse tool_id = _verified_tool_id(runnable, user_gi) inputs_representation = _inputs_representation(runnable) run_tool_payload = dict( history_id=history_id, tool_id=tool_id, inputs=job_dict, inputs_representation=inputs_representation, ) ctx.vlog("Post to Galaxy tool API with payload [%s]" % run_tool_payload) tool_run_response = user_gi.tools._tool_post(run_tool_payload) job = tool_run_response["jobs"][0] job_id = job["id"] try: final_state = _wait_for_job(user_gi, job_id) except Exception: summarize_history(ctx, user_gi, history_id) raise if final_state != "ok": msg = "Failed to run CWL tool job final job state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) with open("errored_galaxy.log", "w") as f: f.write(config.log_contents) raise Exception(msg) ctx.vlog("Final job state was ok, fetching details for job [%s]" % job_id) job_info = admin_gi.jobs.show_job(job_id) response_kwds = { 'job_info': job_info, 'api_run_response': tool_run_response, } if ctx.verbose: summarize_history(ctx, user_gi, history_id) elif runnable.type in [RunnableType.galaxy_workflow, RunnableType.cwl_workflow]: response_class = GalaxyWorkflowRunResponse workflow_id = config.workflow_id(runnable.path) ctx.vlog("Found Galaxy workflow ID [%s] for path [%s]" % (workflow_id, runnable.path)) # TODO: update bioblend to allow inputs_by. # invocation = user_gi.worklfows.invoke_workflow( # workflow_id, # history_id=history_id, # inputs=job_dict, # ) payload = dict( workflow_id=workflow_id, history_id=history_id, inputs=job_dict, inputs_by="name", allow_tool_state_corrections=True, ) invocations_url = "%s/%s/invocations" % ( user_gi._make_url(user_gi.workflows), workflow_id, ) invocation = Client._post(user_gi.workflows, payload, url=invocations_url) invocation_id = invocation["id"] ctx.vlog("Waiting for invocation [%s]" % invocation_id) try: final_invocation_state = _wait_for_invocation(ctx, user_gi, history_id, workflow_id, invocation_id) except Exception: ctx.vlog("Problem waiting on invocation...") summarize_history(ctx, user_gi, history_id) raise ctx.vlog("Final invocation state is [%s]" % final_invocation_state) final_state = _wait_for_history(ctx, user_gi, history_id) if final_state != "ok": msg = "Failed to run workflow final history state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) with open("errored_galaxy.log", "w") as f: f.write(config.log_contents) raise Exception(msg) ctx.vlog("Final history state is 'ok'") response_kwds = { 'workflow_id': workflow_id, 'invocation_id': invocation_id, } else: raise NotImplementedError() run_response = response_class( ctx=ctx, runnable=runnable, user_gi=user_gi, history_id=history_id, galaxy_paths=galaxy_paths, log=config.log_contents, **response_kwds ) output_directory = kwds.get("output_directory", None) ctx.vlog("collecting outputs from run...") run_response.collect_outputs(ctx, output_directory) ctx.vlog("collecting outputs complete") return run_response
def stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds): def upload_func(upload_target): if isinstance(upload_target, FileUploadTarget): file_path = upload_target.path upload_payload = user_gi.tools._upload_payload( history_id, file_type="auto", ) name = os.path.basename(file_path) upload_payload["inputs"]["files_0|auto_decompress"] = False upload_payload["inputs"]["auto_decompress"] = False upload_payload["inputs"]["files_0|url_paste"] = "file://%s" % os.path.abspath(file_path) upload_payload["inputs"]["files_0|NAME"] = name if upload_target.secondary_files: upload_payload["inputs"]["files_1|url_paste"] = "file://%s" % os.path.abspath(upload_target.secondary_files) upload_payload["inputs"]["files_1|type"] = "upload_dataset" upload_payload["inputs"]["files_1|auto_decompress"] = True upload_payload["inputs"]["file_count"] = "2" upload_payload["inputs"]["force_composite"] = "True" ctx.vlog("upload_payload is %s" % upload_payload) return user_gi.tools._tool_post(upload_payload, files_attached=False) elif isinstance(upload_target, DirectoryUploadTarget): tar_path = upload_target.tar_path upload_payload = user_gi.tools._upload_payload( history_id, file_type="tar", ) upload_payload["inputs"]["files_0|auto_decompress"] = False upload_payload["inputs"]["files_0|url_paste"] = "file://%s" % tar_path tar_upload_response = user_gi.tools._tool_post(upload_payload, files_attached=False) convert_response = user_gi.tools.run_tool( tool_id="CONVERTER_tar_to_directory", tool_inputs={"input1": {"src": "hda", "id": tar_upload_response["outputs"][0]["id"]}}, history_id=history_id, ) assert "outputs" in convert_response, convert_response return convert_response else: content = json.dumps(upload_target.object) return user_gi.tools.paste_content( content, history_id, file_type="expression.json", ) def create_collection_func(element_identifiers, collection_type): payload = { "name": "dataset collection", "instance_type": "history", "history_id": history_id, "element_identifiers": element_identifiers, "collection_type": collection_type, "fields": None if collection_type != "record" else "auto", } dataset_collections_url = user_gi.url + "/dataset_collections" dataset_collection = Client._post(user_gi.histories, payload, url=dataset_collections_url) return dataset_collection with open(job_path, "r") as f: job = yaml.load(f) # Figure out what "." should be here instead. job_dir = os.path.dirname(job_path) job_dict, datasets = galactic_job_json( job, job_dir, upload_func, create_collection_func, tool_or_workflow="tool" if runnable.type in [RunnableType.cwl_tool, RunnableType.galaxy_tool] else "workflow", ) if datasets: final_state = _wait_for_history(ctx, user_gi, history_id) for (dataset, path) in datasets: dataset_details = user_gi.histories.show_dataset( history_id, dataset["id"], ) ctx.vlog("Uploaded dataset for path [%s] with metadata [%s]" % (path, dataset_details)) else: # Mark uploads as ok because nothing to do. final_state = "ok" ctx.vlog("final state is %s" % final_state) if final_state != "ok": msg = "Failed to run job final job state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) with open("errored_galaxy.log", "w") as f: f.write(config.log_contents) raise Exception(msg) galaxy_paths = [] for (dataset, upload_target) in datasets: if isinstance(upload_target, FileUploadTarget): local_path = upload_target.path ctx.vlog("fetching full dataset for %s, %s" % (dataset, local_path)) dataset_full = user_gi.datasets.show_dataset(dataset["id"]) galaxy_path = dataset_full["file_name"] ctx.vlog("galaxy_path is %s" % galaxy_path) job_path = os.path.join(job_dir, local_path) galaxy_paths.append((job_path, galaxy_path)) ctx.vlog("galaxy_paths are %s" % galaxy_paths) return galaxy_paths, job_dict, datasets
def _execute(ctx, config, runnable, job_path, **kwds): user_gi = config.user_gi admin_gi = config.gi history_id = _history_id(user_gi, **kwds) job_dict, _ = stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds) if runnable.type in [RunnableType.galaxy_tool, RunnableType.cwl_tool]: response_class = GalaxyToolRunResponse tool_id = _verified_tool_id(runnable, user_gi) inputs_representation = _inputs_representation(runnable) run_tool_payload = dict( history_id=history_id, tool_id=tool_id, inputs=job_dict, inputs_representation=inputs_representation, ) ctx.vlog("Post to Galaxy tool API with payload [%s]" % run_tool_payload) tool_run_response = user_gi.tools._tool_post(run_tool_payload) job = tool_run_response["jobs"][0] job_id = job["id"] try: final_state = _wait_for_job(user_gi, job_id) except Exception: summarize_history(ctx, user_gi, history_id) raise if final_state != "ok": msg = "Failed to run CWL tool job final job state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) with open("errored_galaxy.log", "w") as f: f.write(log_contents_str(config)) raise Exception(msg) ctx.vlog("Final job state was ok, fetching details for job [%s]" % job_id) job_info = admin_gi.jobs.show_job(job_id) response_kwds = { 'job_info': job_info, 'api_run_response': tool_run_response, } if ctx.verbose: summarize_history(ctx, user_gi, history_id) elif runnable.type in [RunnableType.galaxy_workflow, RunnableType.cwl_workflow]: response_class = GalaxyWorkflowRunResponse workflow_id = config.workflow_id(runnable.path) ctx.vlog("Found Galaxy workflow ID [%s] for path [%s]" % (workflow_id, runnable.path)) # TODO: update bioblend to allow inputs_by. # invocation = user_gi.worklfows.invoke_workflow( # workflow_id, # history_id=history_id, # inputs=job_dict, # ) payload = dict( workflow_id=workflow_id, history_id=history_id, inputs=job_dict, inputs_by="name", allow_tool_state_corrections=True, ) invocations_url = "%s/%s/invocations" % ( user_gi._make_url(user_gi.workflows), workflow_id, ) invocation = Client._post(user_gi.workflows, payload, url=invocations_url) invocation_id = invocation["id"] ctx.vlog("Waiting for invocation [%s]" % invocation_id) polling_backoff = kwds.get("polling_backoff", 0) try: final_invocation_state = _wait_for_invocation(ctx, user_gi, history_id, workflow_id, invocation_id, polling_backoff) except Exception: ctx.vlog("Problem waiting on invocation...") summarize_history(ctx, user_gi, history_id) raise ctx.vlog("Final invocation state is [%s]" % final_invocation_state) final_state = _wait_for_history(ctx, user_gi, history_id, polling_backoff) if final_state != "ok": msg = "Failed to run workflow final history state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) with open("errored_galaxy.log", "w") as f: f.write(log_contents_str(config)) raise Exception(msg) ctx.vlog("Final history state is 'ok'") response_kwds = { 'workflow_id': workflow_id, 'invocation_id': invocation_id, } else: raise NotImplementedError() run_response = response_class( ctx=ctx, runnable=runnable, user_gi=user_gi, history_id=history_id, log=log_contents_str(config), **response_kwds ) output_directory = kwds.get("output_directory", None) ctx.vlog("collecting outputs from run...") run_response.collect_outputs(ctx, output_directory) ctx.vlog("collecting outputs complete") return run_response
def stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds): files_attached = [False] def upload_func(upload_target): def _attach_file(upload_payload, uri, index=0): uri = path_or_uri_to_uri(uri) is_path = uri.startswith("file://") if not is_path or config.use_path_paste: upload_payload["inputs"]["files_%d|url_paste" % index] = uri else: files_attached[0] = True path = uri[len("file://"):] upload_payload["files_%d|file_data" % index] = attach_file(path) if isinstance(upload_target, FileUploadTarget): file_path = upload_target.path upload_payload = user_gi.tools._upload_payload( history_id, file_type=upload_target.properties.get('filetype', None) or "auto", ) name = os.path.basename(file_path) upload_payload["inputs"]["files_0|auto_decompress"] = False upload_payload["inputs"]["auto_decompress"] = False _attach_file(upload_payload, file_path) upload_payload["inputs"]["files_0|NAME"] = name if upload_target.secondary_files: _attach_file(upload_payload, upload_target.secondary_files, index=1) upload_payload["inputs"]["files_1|type"] = "upload_dataset" upload_payload["inputs"]["files_1|auto_decompress"] = True upload_payload["inputs"]["file_count"] = "2" upload_payload["inputs"]["force_composite"] = "True" ctx.vlog("upload_payload is %s" % upload_payload) return user_gi.tools._tool_post(upload_payload, files_attached=files_attached[0]) elif isinstance(upload_target, DirectoryUploadTarget): tar_path = upload_target.tar_path upload_payload = user_gi.tools._upload_payload( history_id, file_type="tar", ) upload_payload["inputs"]["files_0|auto_decompress"] = False _attach_file(upload_payload, tar_path) tar_upload_response = user_gi.tools._tool_post(upload_payload, files_attached=files_attached[0]) convert_response = user_gi.tools.run_tool( tool_id="CONVERTER_tar_to_directory", tool_inputs={"input1": {"src": "hda", "id": tar_upload_response["outputs"][0]["id"]}}, history_id=history_id, ) assert "outputs" in convert_response, convert_response return convert_response else: content = json.dumps(upload_target.object) return user_gi.tools.paste_content( content, history_id, file_type="expression.json", ) def create_collection_func(element_identifiers, collection_type): payload = { "name": "dataset collection", "instance_type": "history", "history_id": history_id, "element_identifiers": element_identifiers, "collection_type": collection_type, "fields": None if collection_type != "record" else "auto", } dataset_collections_url = user_gi.url + "/dataset_collections" dataset_collection = Client._post(user_gi.histories, payload, url=dataset_collections_url) return dataset_collection with open(job_path, "r") as f: job = yaml.safe_load(f) # Figure out what "." should be here instead. job_dir = os.path.dirname(job_path) job_dict, datasets = galactic_job_json( job, job_dir, upload_func, create_collection_func, tool_or_workflow="tool" if runnable.type in [RunnableType.cwl_tool, RunnableType.galaxy_tool] else "workflow", ) if datasets: final_state = _wait_for_history(ctx, user_gi, history_id) for (dataset, path) in datasets: dataset_details = user_gi.histories.show_dataset( history_id, dataset["id"], ) ctx.vlog("Uploaded dataset for path [%s] with metadata [%s]" % (path, dataset_details)) else: # Mark uploads as ok because nothing to do. final_state = "ok" ctx.vlog("final state is %s" % final_state) if final_state != "ok": msg = "Failed to run job final job state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) with open("errored_galaxy.log", "w") as f: f.write(log_contents_str(config)) raise Exception(msg) return job_dict, datasets
def _execute(ctx, config, runnable, job_path, **kwds): user_gi = config.user_gi admin_gi = config.gi history_id = _history_id(user_gi, **kwds) try: job_dict, _ = stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds) except Exception: ctx.vlog("Problem with staging in data for Galaxy activities...") raise if runnable.type in [RunnableType.galaxy_tool, RunnableType.cwl_tool]: response_class = GalaxyToolRunResponse tool_id = _verified_tool_id(runnable, user_gi) inputs_representation = _inputs_representation(runnable) run_tool_payload = dict( history_id=history_id, tool_id=tool_id, inputs=job_dict, inputs_representation=inputs_representation, ) ctx.vlog("Post to Galaxy tool API with payload [%s]" % run_tool_payload) tool_run_response = user_gi.tools._post(run_tool_payload) job = tool_run_response["jobs"][0] job_id = job["id"] try: final_state = _wait_for_job(user_gi, job_id) except Exception: summarize_history(ctx, user_gi, history_id) raise if final_state != "ok": msg = "Failed to run CWL tool job final job state is [%s]." % final_state summarize_history(ctx, user_gi, history_id) raise Exception(msg) ctx.vlog("Final job state was ok, fetching details for job [%s]" % job_id) job_info = admin_gi.jobs.show_job(job_id) response_kwds = { 'job_info': job_info, 'api_run_response': tool_run_response, } if ctx.verbose: summarize_history(ctx, user_gi, history_id) elif runnable.type in [RunnableType.galaxy_workflow, RunnableType.cwl_workflow]: response_class = GalaxyWorkflowRunResponse workflow_id = config.workflow_id_for_runnable(runnable) ctx.vlog("Found Galaxy workflow ID [%s] for URI [%s]" % (workflow_id, runnable.uri)) # TODO: Use the following when BioBlend 0.14 is released # invocation = user_gi.worklfows.invoke_workflow( # workflow_id, # inputs=job_dict, # history_id=history_id, # allow_tool_state_corrections=True, # inputs_by="name", # ) payload = dict( workflow_id=workflow_id, history_id=history_id, inputs=job_dict, inputs_by="name", allow_tool_state_corrections=True, ) invocations_url = "%s/workflows/%s/invocations" % ( user_gi.url, workflow_id, ) invocation = user_gi.workflows._post(payload, url=invocations_url) invocation_id = invocation["id"] ctx.vlog("Waiting for invocation [%s]" % invocation_id) polling_backoff = kwds.get("polling_backoff", 0) final_invocation_state = 'new' error_message = "" try: final_invocation_state = _wait_for_invocation(ctx, user_gi, history_id, workflow_id, invocation_id, polling_backoff) assert final_invocation_state == 'scheduled' except Exception: ctx.vlog("Problem waiting on invocation...") summarize_history(ctx, user_gi, history_id) error_message = "Final invocation state is [%s]" % final_invocation_state ctx.vlog("Final invocation state is [%s]" % final_invocation_state) final_state = _wait_for_history(ctx, user_gi, history_id, polling_backoff) if final_state != "ok": msg = "Failed to run workflow final history state is [%s]." % final_state error_message = msg if not error_message else "%s. %s" % (error_message, msg) ctx.vlog(msg) summarize_history(ctx, user_gi, history_id) else: ctx.vlog("Final history state is 'ok'") response_kwds = { 'workflow_id': workflow_id, 'invocation_id': invocation_id, 'history_state': final_state, 'invocation_state': final_invocation_state, 'error_message': error_message, } else: raise NotImplementedError() run_response = response_class( ctx=ctx, runnable=runnable, user_gi=user_gi, history_id=history_id, log=log_contents_str(config), **response_kwds ) output_directory = kwds.get("output_directory", None) ctx.vlog("collecting outputs from run...") run_response.collect_outputs(ctx, output_directory) ctx.vlog("collecting outputs complete") return run_response