def run_workflow(wf, plugin, plugin_args, specfile="localspec"): cwd = os.getcwd() with pydra.Submitter(plugin=plugin, **plugin_args) as sub: sub(runnable=wf) results = wf.result(return_inputs=True) os.chdir(cwd) import pickle as pk import datetime timestamp = datetime.datetime.utcnow().isoformat() timestamp = timestamp.replace(":", "").replace("-", "") result_dir = f"out-{os.path.basename(specfile)}-{timestamp}" os.makedirs(result_dir) os.chdir(result_dir) with open(f"results-{timestamp}.pkl", "wb") as fp: pk.dump(results, fp) gen_report( results, prefix=wf.name, metrics=wf.inputs.metrics, gen_shap=wf.inputs.gen_shap, plot_top_n_shap=wf.inputs.plot_top_n_shap, ) os.chdir(cwd) return results
# Let's run a simple command `pwd` using pydra import pydra # + cmd = "pwd" # we should use an executable to pass the command we want to run shelly = pydra.ShellCommandTask(name="shelly", executable=cmd) # we can always check the cmdline of our task shelly.cmdline # - # and now let's try to run it: with pydra.Submitter(plugin="cf") as sub: sub(shelly) # and check the result shelly.result() # the result should have `return_code`, `stdout` and `stderr`. If everything goes well `return_code` should be `0`, `stdout` should point to the working directory and `stderr` should be an empty string. # ### Commands with arguments and inputs # you can also use longer commands by providing a list: # + cmd = ["echo", "hail", "pydra"] shelly = pydra.ShellCommandTask(name="shelly", executable=cmd) print("cmndline = ", shelly.cmdline)
task9() print(f'total time: {time.time() - t0}') task9.result() # - # The total time will depend on the machine you are using, but it could be below `1.1s`, so clearly the tasks are running in parallel! # # If we run `Task` that has a `State`, pydra will automatically create a `Submitter` with a default `Worker` that is `cf`, i.e. `ConcurrentFutures`. # # We could also create a `Submitter` first, and than use it to run the task: # + task10 = add_two_sleep(x=[1, 2, 3, 4]).split("x") t0 = time.time() with pydra.Submitter(plugin="cf") as sub: task10(submitter=sub) print(f'total time: {time.time() - t0}') print(f"results: {task10.result()}") # - # or we can provide the name of the plugin: # + task11 = add_two_sleep(x=[1, 2, 3, 4]).split("x") t0 = time.time() task11(plugin="cf") print(f'total time: {time.time() - t0}') print(f"results: {task11.result()}")
def _run_pydra(self, image, soft_ver_str): wf = pydra.Workflow( name="wf", input_spec=["image"], cache_dir=self.working_dir, ) wf.inputs.image = image # 1st task - analysis param_run = self.params["analysis"] cmd_run = [param_run["command"]] inp_fields_run = [] inp_val_run = {} if param_run["script"]: script_run = param_run["script"] inp_fields_run.append(( "script", attr.ib(type=pydra.specs.File, metadata={ "argstr": "", "position": 1, "help_string": "script file", "mandatory": True, }), )) inp_val_run[f"script"] = script_run output_file_dict = {} for ind, inputs in enumerate(param_run["inputs"]): inputs = deepcopy(inputs) value = inputs.pop("value") name = inputs.pop("name", f"inp_{ind}") output_file = inputs.pop("output_file", False) # default values for metadata metadata = { "argstr": "", "position": ind + 2, "help_string": f"inp_{ind}", "mandatory": True, } tp = inputs.pop("type") if tp == "File": tp = pydra.specs.File metadata["container_path"] = True # updating metadata with values provided in parameters file metadata.update(inputs) field = (name, attr.ib(type=tp, metadata=metadata)) inp_fields_run.append(field) if tp is pydra.specs.File: inp_val_run[name] = f"/data/{value}" process_path_obj(value, self.data_path) else: if output_file: output_file_dict[name] = value value = os.path.join("/output_pydra", value) inp_val_run[name] = value input_spec_run = pydra.specs.SpecInfo(name="Input", fields=inp_fields_run, bases=(pydra.specs.DockerSpec, )) out_fields_run = [] for el in self.params["tests"]: if isinstance(el["file"], str): if el["file"] in output_file_dict: el["file"] = output_file_dict[el["file"]] out_fields_run.append( (f"file_{el['name']}", pydra.specs.File, el["file"])) elif isinstance(el["file"], list): for ii, file in enumerate(el["file"]): out_fields_run.append( (f"file_{el['name']}_{ii}", pydra.specs.File, file)) else: raise Exception( f"value for file in params['tests'] has to be a str or a list" ) output_spec_run = pydra.specs.SpecInfo( name="Output", fields=out_fields_run, bases=(pydra.specs.ShellOutSpec, )) task_run = pydra.DockerTask( name="run", executable=cmd_run, image=wf.lzin.image, input_spec=input_spec_run, output_spec=output_spec_run, bindings=[(self.data_path, "/data", "ro")], **inp_val_run, ) wf.add(task_run) # 2nd task - creating list from the 1st task output @pydra.mark.task @pydra.mark.annotate({"return": {"outfiles": list}}) def outfiles_list(res): out_f = [] for el in self.params["tests"]: if isinstance(el["file"], (tuple, list)): out_f.append( tuple([ res[f"file_{el['name']}_{i}"] for i in range(len(el["file"])) ])) else: out_f.append(res[f"file_{el['name']}"]) return out_f wf.add(outfiles_list(name="outfiles", res=wf.run.lzout.all_)) # 3rd task - tests input_spec_test = pydra.specs.SpecInfo( name="Input", fields=[ ( "script_test", attr.ib(type=pydra.specs.File, metadata={ "argstr": "", "position": 1, "help_string": "test file", "mandatory": True, }), ), ( "file_out", attr.ib(type=(tuple, pydra.specs.File), metadata={ "position": 2, "help_string": "out file", "argstr": "-out", "mandatory": True, }), ), ( "file_ref", attr.ib(type=(tuple, pydra.specs.File), metadata={ "position": 3, "argstr": "-ref", "help_string": "out file", "mandatory": True, "container_path": True, }), ), ( "name_test", attr.ib(type=str, metadata={ "position": 4, "argstr": "-name", "help_string": "test name", "mandatory": True, }), ), ], bases=(pydra.specs.ShellSpec, ), ) output_spec_test = pydra.specs.SpecInfo( name="Output", fields=[("reports", pydra.specs.File, "report_*.json")], bases=(pydra.specs.ShellOutSpec, ), ) if self.test_image: container_info = ("docker", self.test_image, [(self.data_ref_path, "/data_ref", "ro")]) file_ref_dir = Path("/data_ref") else: container_info = None file_ref_dir = self.data_ref_path inp_val_test = {} inp_val_test["name_test"] = [el["name"] for el in self.params["tests"]] inp_val_test["script_test"] = [ el["script"] for el in self.params["tests"] ] inp_val_test["file_ref"] = [] for el in self.params["tests"]: if isinstance(el["file"], str): inp_val_test["file_ref"].append(file_ref_dir / el["file"]) elif isinstance(el["file"], list): inp_val_test["file_ref"].append( tuple([file_ref_dir / file for file in el["file"]])) task_test = pydra.ShellCommandTask( name="test", executable="python", container_info=container_info, input_spec=input_spec_test, output_spec=output_spec_test, file_out=wf.outfiles.lzout.outfiles, **inp_val_test, ).split((("script_test", "name_test"), ("file_out", "file_ref"))) wf.add(task_test) # setting wf output wf.set_output([ ("outfiles", wf.outfiles.lzout.outfiles), ("test_out", wf.test.lzout.stdout), ("reports", wf.test.lzout.reports), ]) print(f"\n running pydra workflow for {self.workflow_path} " f"in working directory - {self.working_dir}") with pydra.Submitter(plugin="cf") as sub: sub(wf) res = wf.result() self.reports[soft_ver_str] = res.output.reports