Exemplo n.º 1
0
def stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds):  # noqa C901
    # only upload objects as files/collections for CWL workflows...
    tool_or_workflow = "tool" if runnable.type != RunnableType.cwl_workflow else "workflow"
    to_posix_lines = runnable.type.is_galaxy_artifact
    job_dict, datasets = PlanemoStagingInterface(ctx, runnable, user_gi, config.version_major).stage(
        tool_or_workflow,
        history_id=history_id,
        job_path=job_path,
        use_path_paste=config.use_path_paste,
        to_posix_lines=to_posix_lines,
    )

    if datasets:
        ctx.vlog("uploaded datasets [%s] for activity, checking history state" % datasets)
        final_state = _wait_for_history(ctx, user_gi, history_id)

        for (dataset, path) in datasets:
            dataset_details = user_gi.histories.show_dataset(
                history_id,
                dataset["id"],
            )
            ctx.vlog("Uploaded dataset for path [%s] with metadata [%s]" % (path, dataset_details))
    else:
        # Mark uploads as ok because nothing to do.
        final_state = "ok"

    ctx.vlog("final state is %s" % final_state)
    if final_state != "ok":
        msg = "Failed to run job final job state is [%s]." % final_state
        summarize_history(ctx, user_gi, history_id)
        raise Exception(msg)

    return job_dict, datasets
Exemplo n.º 2
0
def _execute(ctx, config, runnable, job_path, **kwds):
    user_gi = config.user_gi
    admin_gi = config.gi

    history_id = _history_id(user_gi, **kwds)

    galaxy_paths, job_dict, _ = stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds)

    if runnable.type in [RunnableType.galaxy_tool, RunnableType.cwl_tool]:
        response_class = GalaxyToolRunResponse
        tool_id = _verified_tool_id(runnable, user_gi)
        inputs_representation = _inputs_representation(runnable)
        run_tool_payload = dict(
            history_id=history_id,
            tool_id=tool_id,
            inputs=job_dict,
            inputs_representation=inputs_representation,
        )
        ctx.vlog("Post to Galaxy tool API with payload [%s]" % run_tool_payload)
        tool_run_response = user_gi.tools._tool_post(run_tool_payload)

        job = tool_run_response["jobs"][0]
        job_id = job["id"]
        try:
            final_state = _wait_for_job(user_gi, job_id)
        except Exception:
            summarize_history(ctx, user_gi, history_id)
            raise
        if final_state != "ok":
            msg = "Failed to run CWL tool job final job state is [%s]." % final_state
            summarize_history(ctx, user_gi, history_id)
            with open("errored_galaxy.log", "w") as f:
                f.write(config.log_contents)
            raise Exception(msg)

        ctx.vlog("Final job state was ok, fetching details for job [%s]" % job_id)
        job_info = admin_gi.jobs.show_job(job_id)
        response_kwds = {
            'job_info': job_info,
            'api_run_response': tool_run_response,
        }
        if ctx.verbose:
            summarize_history(ctx, user_gi, history_id)
    elif runnable.type in [RunnableType.galaxy_workflow, RunnableType.cwl_workflow]:
        response_class = GalaxyWorkflowRunResponse
        workflow_id = config.workflow_id(runnable.path)
        ctx.vlog("Found Galaxy workflow ID [%s] for path [%s]" % (workflow_id, runnable.path))
        # TODO: update bioblend to allow inputs_by.
        # invocation = user_gi.worklfows.invoke_workflow(
        #    workflow_id,
        #    history_id=history_id,
        #    inputs=job_dict,
        # )
        payload = dict(
            workflow_id=workflow_id,
            history_id=history_id,
            inputs=job_dict,
            inputs_by="name",
            allow_tool_state_corrections=True,
        )
        invocations_url = "%s/%s/invocations" % (
            user_gi._make_url(user_gi.workflows),
            workflow_id,
        )
        invocation = Client._post(user_gi.workflows, payload, url=invocations_url)
        invocation_id = invocation["id"]
        ctx.vlog("Waiting for invocation [%s]" % invocation_id)
        try:
            final_invocation_state = _wait_for_invocation(ctx, user_gi, history_id, workflow_id, invocation_id)
        except Exception:
            ctx.vlog("Problem waiting on invocation...")
            summarize_history(ctx, user_gi, history_id)
            raise
        ctx.vlog("Final invocation state is [%s]" % final_invocation_state)
        final_state = _wait_for_history(ctx, user_gi, history_id)
        if final_state != "ok":
            msg = "Failed to run workflow final history state is [%s]." % final_state
            summarize_history(ctx, user_gi, history_id)
            with open("errored_galaxy.log", "w") as f:
                f.write(config.log_contents)
            raise Exception(msg)
        ctx.vlog("Final history state is 'ok'")
        response_kwds = {
            'workflow_id': workflow_id,
            'invocation_id': invocation_id,
        }
    else:
        raise NotImplementedError()

    run_response = response_class(
        ctx=ctx,
        runnable=runnable,
        user_gi=user_gi,
        history_id=history_id,
        galaxy_paths=galaxy_paths,
        log=config.log_contents,
        **response_kwds
    )
    output_directory = kwds.get("output_directory", None)
    ctx.vlog("collecting outputs from run...")
    run_response.collect_outputs(ctx, output_directory)
    ctx.vlog("collecting outputs complete")
    return run_response
Exemplo n.º 3
0
def stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds):

    def upload_func(upload_target):
        if isinstance(upload_target, FileUploadTarget):
            file_path = upload_target.path
            upload_payload = user_gi.tools._upload_payload(
                history_id,
                file_type="auto",
            )
            name = os.path.basename(file_path)
            upload_payload["inputs"]["files_0|auto_decompress"] = False
            upload_payload["inputs"]["auto_decompress"] = False
            upload_payload["inputs"]["files_0|url_paste"] = "file://%s" % os.path.abspath(file_path)
            upload_payload["inputs"]["files_0|NAME"] = name
            if upload_target.secondary_files:
                upload_payload["inputs"]["files_1|url_paste"] = "file://%s" % os.path.abspath(upload_target.secondary_files)
                upload_payload["inputs"]["files_1|type"] = "upload_dataset"
                upload_payload["inputs"]["files_1|auto_decompress"] = True
                upload_payload["inputs"]["file_count"] = "2"
                upload_payload["inputs"]["force_composite"] = "True"

            ctx.vlog("upload_payload is %s" % upload_payload)
            return user_gi.tools._tool_post(upload_payload, files_attached=False)
        elif isinstance(upload_target, DirectoryUploadTarget):
            tar_path = upload_target.tar_path

            upload_payload = user_gi.tools._upload_payload(
                history_id,
                file_type="tar",
            )
            upload_payload["inputs"]["files_0|auto_decompress"] = False
            upload_payload["inputs"]["files_0|url_paste"] = "file://%s" % tar_path
            tar_upload_response = user_gi.tools._tool_post(upload_payload, files_attached=False)
            convert_response = user_gi.tools.run_tool(
                tool_id="CONVERTER_tar_to_directory",
                tool_inputs={"input1": {"src": "hda", "id": tar_upload_response["outputs"][0]["id"]}},
                history_id=history_id,
            )
            assert "outputs" in convert_response, convert_response
            return convert_response
        else:
            content = json.dumps(upload_target.object)
            return user_gi.tools.paste_content(
                content,
                history_id,
                file_type="expression.json",
            )

    def create_collection_func(element_identifiers, collection_type):
        payload = {
            "name": "dataset collection",
            "instance_type": "history",
            "history_id": history_id,
            "element_identifiers": element_identifiers,
            "collection_type": collection_type,
            "fields": None if collection_type != "record" else "auto",
        }
        dataset_collections_url = user_gi.url + "/dataset_collections"
        dataset_collection = Client._post(user_gi.histories, payload, url=dataset_collections_url)
        return dataset_collection

    with open(job_path, "r") as f:
        job = yaml.load(f)

    # Figure out what "." should be here instead.
    job_dir = os.path.dirname(job_path)
    job_dict, datasets = galactic_job_json(
        job,
        job_dir,
        upload_func,
        create_collection_func,
        tool_or_workflow="tool" if runnable.type in [RunnableType.cwl_tool, RunnableType.galaxy_tool] else "workflow",
    )

    if datasets:
        final_state = _wait_for_history(ctx, user_gi, history_id)

        for (dataset, path) in datasets:
            dataset_details = user_gi.histories.show_dataset(
                history_id,
                dataset["id"],
            )
            ctx.vlog("Uploaded dataset for path [%s] with metadata [%s]" % (path, dataset_details))
    else:
        # Mark uploads as ok because nothing to do.
        final_state = "ok"

    ctx.vlog("final state is %s" % final_state)
    if final_state != "ok":
        msg = "Failed to run job final job state is [%s]." % final_state
        summarize_history(ctx, user_gi, history_id)
        with open("errored_galaxy.log", "w") as f:
            f.write(config.log_contents)
        raise Exception(msg)

    galaxy_paths = []
    for (dataset, upload_target) in datasets:
        if isinstance(upload_target, FileUploadTarget):
            local_path = upload_target.path
            ctx.vlog("fetching full dataset for %s, %s" % (dataset, local_path))
            dataset_full = user_gi.datasets.show_dataset(dataset["id"])
            galaxy_path = dataset_full["file_name"]
            ctx.vlog("galaxy_path is %s" % galaxy_path)
            job_path = os.path.join(job_dir, local_path)
            galaxy_paths.append((job_path, galaxy_path))

    ctx.vlog("galaxy_paths are %s" % galaxy_paths)
    return galaxy_paths, job_dict, datasets
Exemplo n.º 4
0
def _execute(ctx, config, runnable, job_path, **kwds):
    user_gi = config.user_gi
    admin_gi = config.gi

    history_id = _history_id(user_gi, **kwds)

    job_dict, _ = stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds)

    if runnable.type in [RunnableType.galaxy_tool, RunnableType.cwl_tool]:
        response_class = GalaxyToolRunResponse
        tool_id = _verified_tool_id(runnable, user_gi)
        inputs_representation = _inputs_representation(runnable)
        run_tool_payload = dict(
            history_id=history_id,
            tool_id=tool_id,
            inputs=job_dict,
            inputs_representation=inputs_representation,
        )
        ctx.vlog("Post to Galaxy tool API with payload [%s]" % run_tool_payload)
        tool_run_response = user_gi.tools._tool_post(run_tool_payload)

        job = tool_run_response["jobs"][0]
        job_id = job["id"]
        try:
            final_state = _wait_for_job(user_gi, job_id)
        except Exception:
            summarize_history(ctx, user_gi, history_id)
            raise
        if final_state != "ok":
            msg = "Failed to run CWL tool job final job state is [%s]." % final_state
            summarize_history(ctx, user_gi, history_id)
            with open("errored_galaxy.log", "w") as f:
                f.write(log_contents_str(config))
            raise Exception(msg)

        ctx.vlog("Final job state was ok, fetching details for job [%s]" % job_id)
        job_info = admin_gi.jobs.show_job(job_id)
        response_kwds = {
            'job_info': job_info,
            'api_run_response': tool_run_response,
        }
        if ctx.verbose:
            summarize_history(ctx, user_gi, history_id)
    elif runnable.type in [RunnableType.galaxy_workflow, RunnableType.cwl_workflow]:
        response_class = GalaxyWorkflowRunResponse
        workflow_id = config.workflow_id(runnable.path)
        ctx.vlog("Found Galaxy workflow ID [%s] for path [%s]" % (workflow_id, runnable.path))
        # TODO: update bioblend to allow inputs_by.
        # invocation = user_gi.worklfows.invoke_workflow(
        #    workflow_id,
        #    history_id=history_id,
        #    inputs=job_dict,
        # )
        payload = dict(
            workflow_id=workflow_id,
            history_id=history_id,
            inputs=job_dict,
            inputs_by="name",
            allow_tool_state_corrections=True,
        )
        invocations_url = "%s/%s/invocations" % (
            user_gi._make_url(user_gi.workflows),
            workflow_id,
        )
        invocation = Client._post(user_gi.workflows, payload, url=invocations_url)
        invocation_id = invocation["id"]
        ctx.vlog("Waiting for invocation [%s]" % invocation_id)
        polling_backoff = kwds.get("polling_backoff", 0)
        try:
            final_invocation_state = _wait_for_invocation(ctx, user_gi, history_id, workflow_id, invocation_id, polling_backoff)
        except Exception:
            ctx.vlog("Problem waiting on invocation...")
            summarize_history(ctx, user_gi, history_id)
            raise
        ctx.vlog("Final invocation state is [%s]" % final_invocation_state)
        final_state = _wait_for_history(ctx, user_gi, history_id, polling_backoff)
        if final_state != "ok":
            msg = "Failed to run workflow final history state is [%s]." % final_state
            summarize_history(ctx, user_gi, history_id)
            with open("errored_galaxy.log", "w") as f:
                f.write(log_contents_str(config))
            raise Exception(msg)
        ctx.vlog("Final history state is 'ok'")
        response_kwds = {
            'workflow_id': workflow_id,
            'invocation_id': invocation_id,
        }
    else:
        raise NotImplementedError()

    run_response = response_class(
        ctx=ctx,
        runnable=runnable,
        user_gi=user_gi,
        history_id=history_id,
        log=log_contents_str(config),
        **response_kwds
    )
    output_directory = kwds.get("output_directory", None)
    ctx.vlog("collecting outputs from run...")
    run_response.collect_outputs(ctx, output_directory)
    ctx.vlog("collecting outputs complete")
    return run_response
Exemplo n.º 5
0
def stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds):
    files_attached = [False]

    def upload_func(upload_target):

        def _attach_file(upload_payload, uri, index=0):
            uri = path_or_uri_to_uri(uri)
            is_path = uri.startswith("file://")
            if not is_path or config.use_path_paste:
                upload_payload["inputs"]["files_%d|url_paste" % index] = uri
            else:
                files_attached[0] = True
                path = uri[len("file://"):]
                upload_payload["files_%d|file_data" % index] = attach_file(path)

        if isinstance(upload_target, FileUploadTarget):
            file_path = upload_target.path
            upload_payload = user_gi.tools._upload_payload(
                history_id,
                file_type=upload_target.properties.get('filetype', None) or "auto",
            )
            name = os.path.basename(file_path)
            upload_payload["inputs"]["files_0|auto_decompress"] = False
            upload_payload["inputs"]["auto_decompress"] = False
            _attach_file(upload_payload, file_path)
            upload_payload["inputs"]["files_0|NAME"] = name
            if upload_target.secondary_files:
                _attach_file(upload_payload, upload_target.secondary_files, index=1)
                upload_payload["inputs"]["files_1|type"] = "upload_dataset"
                upload_payload["inputs"]["files_1|auto_decompress"] = True
                upload_payload["inputs"]["file_count"] = "2"
                upload_payload["inputs"]["force_composite"] = "True"

            ctx.vlog("upload_payload is %s" % upload_payload)
            return user_gi.tools._tool_post(upload_payload, files_attached=files_attached[0])
        elif isinstance(upload_target, DirectoryUploadTarget):
            tar_path = upload_target.tar_path

            upload_payload = user_gi.tools._upload_payload(
                history_id,
                file_type="tar",
            )
            upload_payload["inputs"]["files_0|auto_decompress"] = False
            _attach_file(upload_payload, tar_path)
            tar_upload_response = user_gi.tools._tool_post(upload_payload, files_attached=files_attached[0])
            convert_response = user_gi.tools.run_tool(
                tool_id="CONVERTER_tar_to_directory",
                tool_inputs={"input1": {"src": "hda", "id": tar_upload_response["outputs"][0]["id"]}},
                history_id=history_id,
            )
            assert "outputs" in convert_response, convert_response
            return convert_response
        else:
            content = json.dumps(upload_target.object)
            return user_gi.tools.paste_content(
                content,
                history_id,
                file_type="expression.json",
            )

    def create_collection_func(element_identifiers, collection_type):
        payload = {
            "name": "dataset collection",
            "instance_type": "history",
            "history_id": history_id,
            "element_identifiers": element_identifiers,
            "collection_type": collection_type,
            "fields": None if collection_type != "record" else "auto",
        }
        dataset_collections_url = user_gi.url + "/dataset_collections"
        dataset_collection = Client._post(user_gi.histories, payload, url=dataset_collections_url)
        return dataset_collection

    with open(job_path, "r") as f:
        job = yaml.safe_load(f)

    # Figure out what "." should be here instead.
    job_dir = os.path.dirname(job_path)
    job_dict, datasets = galactic_job_json(
        job,
        job_dir,
        upload_func,
        create_collection_func,
        tool_or_workflow="tool" if runnable.type in [RunnableType.cwl_tool, RunnableType.galaxy_tool] else "workflow",
    )

    if datasets:
        final_state = _wait_for_history(ctx, user_gi, history_id)

        for (dataset, path) in datasets:
            dataset_details = user_gi.histories.show_dataset(
                history_id,
                dataset["id"],
            )
            ctx.vlog("Uploaded dataset for path [%s] with metadata [%s]" % (path, dataset_details))
    else:
        # Mark uploads as ok because nothing to do.
        final_state = "ok"

    ctx.vlog("final state is %s" % final_state)
    if final_state != "ok":
        msg = "Failed to run job final job state is [%s]." % final_state
        summarize_history(ctx, user_gi, history_id)
        with open("errored_galaxy.log", "w") as f:
            f.write(log_contents_str(config))
        raise Exception(msg)

    return job_dict, datasets
Exemplo n.º 6
0
def _execute(ctx, config, runnable, job_path, **kwds):
    user_gi = config.user_gi
    admin_gi = config.gi

    history_id = _history_id(user_gi, **kwds)

    try:
        job_dict, _ = stage_in(ctx, runnable, config, user_gi, history_id, job_path, **kwds)
    except Exception:
        ctx.vlog("Problem with staging in data for Galaxy activities...")
        raise
    if runnable.type in [RunnableType.galaxy_tool, RunnableType.cwl_tool]:
        response_class = GalaxyToolRunResponse
        tool_id = _verified_tool_id(runnable, user_gi)
        inputs_representation = _inputs_representation(runnable)
        run_tool_payload = dict(
            history_id=history_id,
            tool_id=tool_id,
            inputs=job_dict,
            inputs_representation=inputs_representation,
        )
        ctx.vlog("Post to Galaxy tool API with payload [%s]" % run_tool_payload)
        tool_run_response = user_gi.tools._post(run_tool_payload)

        job = tool_run_response["jobs"][0]
        job_id = job["id"]
        try:
            final_state = _wait_for_job(user_gi, job_id)
        except Exception:
            summarize_history(ctx, user_gi, history_id)
            raise
        if final_state != "ok":
            msg = "Failed to run CWL tool job final job state is [%s]." % final_state
            summarize_history(ctx, user_gi, history_id)
            raise Exception(msg)

        ctx.vlog("Final job state was ok, fetching details for job [%s]" % job_id)
        job_info = admin_gi.jobs.show_job(job_id)
        response_kwds = {
            'job_info': job_info,
            'api_run_response': tool_run_response,
        }
        if ctx.verbose:
            summarize_history(ctx, user_gi, history_id)
    elif runnable.type in [RunnableType.galaxy_workflow, RunnableType.cwl_workflow]:
        response_class = GalaxyWorkflowRunResponse
        workflow_id = config.workflow_id_for_runnable(runnable)
        ctx.vlog("Found Galaxy workflow ID [%s] for URI [%s]" % (workflow_id, runnable.uri))
        # TODO: Use the following when BioBlend 0.14 is released
        # invocation = user_gi.worklfows.invoke_workflow(
        #    workflow_id,
        #    inputs=job_dict,
        #    history_id=history_id,
        #    allow_tool_state_corrections=True,
        #    inputs_by="name",
        # )
        payload = dict(
            workflow_id=workflow_id,
            history_id=history_id,
            inputs=job_dict,
            inputs_by="name",
            allow_tool_state_corrections=True,
        )
        invocations_url = "%s/workflows/%s/invocations" % (
            user_gi.url,
            workflow_id,
        )
        invocation = user_gi.workflows._post(payload, url=invocations_url)
        invocation_id = invocation["id"]
        ctx.vlog("Waiting for invocation [%s]" % invocation_id)
        polling_backoff = kwds.get("polling_backoff", 0)
        final_invocation_state = 'new'
        error_message = ""
        try:
            final_invocation_state = _wait_for_invocation(ctx, user_gi, history_id, workflow_id, invocation_id, polling_backoff)
            assert final_invocation_state == 'scheduled'
        except Exception:
            ctx.vlog("Problem waiting on invocation...")
            summarize_history(ctx, user_gi, history_id)
            error_message = "Final invocation state is [%s]" % final_invocation_state
        ctx.vlog("Final invocation state is [%s]" % final_invocation_state)
        final_state = _wait_for_history(ctx, user_gi, history_id, polling_backoff)
        if final_state != "ok":
            msg = "Failed to run workflow final history state is [%s]." % final_state
            error_message = msg if not error_message else "%s. %s" % (error_message, msg)
            ctx.vlog(msg)
            summarize_history(ctx, user_gi, history_id)
        else:
            ctx.vlog("Final history state is 'ok'")
        response_kwds = {
            'workflow_id': workflow_id,
            'invocation_id': invocation_id,
            'history_state': final_state,
            'invocation_state': final_invocation_state,
            'error_message': error_message,
        }
    else:
        raise NotImplementedError()

    run_response = response_class(
        ctx=ctx,
        runnable=runnable,
        user_gi=user_gi,
        history_id=history_id,
        log=log_contents_str(config),
        **response_kwds
    )
    output_directory = kwds.get("output_directory", None)
    ctx.vlog("collecting outputs from run...")
    run_response.collect_outputs(ctx, output_directory)
    ctx.vlog("collecting outputs complete")
    return run_response