Exemplo n.º 1
0
def run_workflow(wf, plugin, plugin_args, specfile="localspec"):
    cwd = os.getcwd()
    with pydra.Submitter(plugin=plugin, **plugin_args) as sub:
        sub(runnable=wf)
    results = wf.result(return_inputs=True)
    os.chdir(cwd)

    import pickle as pk
    import datetime

    timestamp = datetime.datetime.utcnow().isoformat()
    timestamp = timestamp.replace(":", "").replace("-", "")
    result_dir = f"out-{os.path.basename(specfile)}-{timestamp}"
    os.makedirs(result_dir)
    os.chdir(result_dir)
    with open(f"results-{timestamp}.pkl", "wb") as fp:
        pk.dump(results, fp)

    gen_report(
        results,
        prefix=wf.name,
        metrics=wf.inputs.metrics,
        gen_shap=wf.inputs.gen_shap,
        plot_top_n_shap=wf.inputs.plot_top_n_shap,
    )
    os.chdir(cwd)
    return results
Exemplo n.º 2
0
# Let's run a simple command `pwd` using pydra

import pydra

# +
cmd = "pwd"
# we should use an executable to pass the command we want to run
shelly = pydra.ShellCommandTask(name="shelly", executable=cmd)

# we can always check the cmdline of our task
shelly.cmdline
# -

# and now let's try to run it:

with pydra.Submitter(plugin="cf") as sub:
    sub(shelly)

# and check the result

shelly.result()

# the result should have `return_code`, `stdout` and `stderr`. If everything goes well `return_code` should be `0`, `stdout` should point to the working directory and `stderr` should be an empty string.

# ### Commands with arguments and inputs
# you can also use longer commands by providing a list:

# +
cmd = ["echo", "hail", "pydra"]
shelly = pydra.ShellCommandTask(name="shelly", executable=cmd)
print("cmndline = ", shelly.cmdline)
Exemplo n.º 3
0
task9()
print(f'total time: {time.time() - t0}')
task9.result()
# -

# The total time will depend on the machine you are using, but it could be below `1.1s`, so clearly the tasks are running in parallel!
#
# If we run `Task` that has a `State`, pydra will automatically create a `Submitter` with a default `Worker` that is `cf`, i.e. `ConcurrentFutures`.
#
# We could also create a `Submitter` first, and than use it to run the task:

# +
task10 = add_two_sleep(x=[1, 2, 3, 4]).split("x")

t0 = time.time()
with pydra.Submitter(plugin="cf") as sub:
    task10(submitter=sub)
print(f'total time: {time.time() - t0}')
print(f"results: {task10.result()}")

# -

# or we can provide the name of the plugin:

# +
task11 = add_two_sleep(x=[1, 2, 3, 4]).split("x")

t0 = time.time()
task11(plugin="cf")
print(f'total time: {time.time() - t0}')
print(f"results: {task11.result()}")
Exemplo n.º 4
0
    def _run_pydra(self, image, soft_ver_str):
        wf = pydra.Workflow(
            name="wf",
            input_spec=["image"],
            cache_dir=self.working_dir,
        )
        wf.inputs.image = image

        # 1st task - analysis
        param_run = self.params["analysis"]
        cmd_run = [param_run["command"]]
        inp_fields_run = []
        inp_val_run = {}

        if param_run["script"]:
            script_run = param_run["script"]
            inp_fields_run.append((
                "script",
                attr.ib(type=pydra.specs.File,
                        metadata={
                            "argstr": "",
                            "position": 1,
                            "help_string": "script file",
                            "mandatory": True,
                        }),
            ))
            inp_val_run[f"script"] = script_run

        output_file_dict = {}
        for ind, inputs in enumerate(param_run["inputs"]):
            inputs = deepcopy(inputs)
            value = inputs.pop("value")
            name = inputs.pop("name", f"inp_{ind}")
            output_file = inputs.pop("output_file", False)
            # default values for metadata
            metadata = {
                "argstr": "",
                "position": ind + 2,
                "help_string": f"inp_{ind}",
                "mandatory": True,
            }
            tp = inputs.pop("type")
            if tp == "File":
                tp = pydra.specs.File
                metadata["container_path"] = True
            # updating metadata with values provided in parameters file
            metadata.update(inputs)

            field = (name, attr.ib(type=tp, metadata=metadata))
            inp_fields_run.append(field)

            if tp is pydra.specs.File:
                inp_val_run[name] = f"/data/{value}"
                process_path_obj(value, self.data_path)
            else:
                if output_file:
                    output_file_dict[name] = value
                    value = os.path.join("/output_pydra", value)
                inp_val_run[name] = value

        input_spec_run = pydra.specs.SpecInfo(name="Input",
                                              fields=inp_fields_run,
                                              bases=(pydra.specs.DockerSpec, ))

        out_fields_run = []
        for el in self.params["tests"]:
            if isinstance(el["file"], str):
                if el["file"] in output_file_dict:
                    el["file"] = output_file_dict[el["file"]]
                out_fields_run.append(
                    (f"file_{el['name']}", pydra.specs.File, el["file"]))
            elif isinstance(el["file"], list):
                for ii, file in enumerate(el["file"]):
                    out_fields_run.append(
                        (f"file_{el['name']}_{ii}", pydra.specs.File, file))
            else:
                raise Exception(
                    f"value for file in params['tests'] has to be a str or a list"
                )

        output_spec_run = pydra.specs.SpecInfo(
            name="Output",
            fields=out_fields_run,
            bases=(pydra.specs.ShellOutSpec, ))

        task_run = pydra.DockerTask(
            name="run",
            executable=cmd_run,
            image=wf.lzin.image,
            input_spec=input_spec_run,
            output_spec=output_spec_run,
            bindings=[(self.data_path, "/data", "ro")],
            **inp_val_run,
        )
        wf.add(task_run)

        # 2nd task - creating list from the 1st task output
        @pydra.mark.task
        @pydra.mark.annotate({"return": {"outfiles": list}})
        def outfiles_list(res):
            out_f = []
            for el in self.params["tests"]:
                if isinstance(el["file"], (tuple, list)):
                    out_f.append(
                        tuple([
                            res[f"file_{el['name']}_{i}"]
                            for i in range(len(el["file"]))
                        ]))
                else:
                    out_f.append(res[f"file_{el['name']}"])
            return out_f

        wf.add(outfiles_list(name="outfiles", res=wf.run.lzout.all_))

        # 3rd task - tests
        input_spec_test = pydra.specs.SpecInfo(
            name="Input",
            fields=[
                (
                    "script_test",
                    attr.ib(type=pydra.specs.File,
                            metadata={
                                "argstr": "",
                                "position": 1,
                                "help_string": "test file",
                                "mandatory": True,
                            }),
                ),
                (
                    "file_out",
                    attr.ib(type=(tuple, pydra.specs.File),
                            metadata={
                                "position": 2,
                                "help_string": "out file",
                                "argstr": "-out",
                                "mandatory": True,
                            }),
                ),
                (
                    "file_ref",
                    attr.ib(type=(tuple, pydra.specs.File),
                            metadata={
                                "position": 3,
                                "argstr": "-ref",
                                "help_string": "out file",
                                "mandatory": True,
                                "container_path": True,
                            }),
                ),
                (
                    "name_test",
                    attr.ib(type=str,
                            metadata={
                                "position": 4,
                                "argstr": "-name",
                                "help_string": "test name",
                                "mandatory": True,
                            }),
                ),
            ],
            bases=(pydra.specs.ShellSpec, ),
        )

        output_spec_test = pydra.specs.SpecInfo(
            name="Output",
            fields=[("reports", pydra.specs.File, "report_*.json")],
            bases=(pydra.specs.ShellOutSpec, ),
        )

        if self.test_image:
            container_info = ("docker", self.test_image, [(self.data_ref_path,
                                                           "/data_ref", "ro")])
            file_ref_dir = Path("/data_ref")
        else:
            container_info = None
            file_ref_dir = self.data_ref_path

        inp_val_test = {}
        inp_val_test["name_test"] = [el["name"] for el in self.params["tests"]]
        inp_val_test["script_test"] = [
            el["script"] for el in self.params["tests"]
        ]
        inp_val_test["file_ref"] = []
        for el in self.params["tests"]:
            if isinstance(el["file"], str):
                inp_val_test["file_ref"].append(file_ref_dir / el["file"])
            elif isinstance(el["file"], list):
                inp_val_test["file_ref"].append(
                    tuple([file_ref_dir / file for file in el["file"]]))

        task_test = pydra.ShellCommandTask(
            name="test",
            executable="python",
            container_info=container_info,
            input_spec=input_spec_test,
            output_spec=output_spec_test,
            file_out=wf.outfiles.lzout.outfiles,
            **inp_val_test,
        ).split((("script_test", "name_test"), ("file_out", "file_ref")))
        wf.add(task_test)

        # setting wf output
        wf.set_output([
            ("outfiles", wf.outfiles.lzout.outfiles),
            ("test_out", wf.test.lzout.stdout),
            ("reports", wf.test.lzout.reports),
        ])
        print(f"\n running pydra workflow for {self.workflow_path} "
              f"in working directory - {self.working_dir}")
        with pydra.Submitter(plugin="cf") as sub:
            sub(wf)
        res = wf.result()
        self.reports[soft_ver_str] = res.output.reports