import time import uuid import attr from nipype.interfaces.base import ( Directory, File, InputMultiPath, OutputMultiPath, traits, ) from pydra import ShellCommandTask from pydra.engine.specs import SpecInfo, ShellSpec, File wf = pydra.Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) wf.inputs.cmd1 = "touch" wf.inputs.cmd2 = "cp" wf.inputs.args = "newfile.txt" my_input_spec1 = SpecInfo( name="Input", fields=[ ( "file", attr.ib( type=str, metadata={ "output_file_template": "{args}", "help_string": "output file",
"landmarkWeights": "/Shared/sinapse/CACHE/20200915_PREDICTHD_base_CACHE/Atlas/20141004_BCD/template_weights_50Lmks.wts", "landmarks": "/Shared/sinapse/CACHE/20200915_PREDICTHD_base_CACHE/Atlas/20141004_BCD/template_landmarks_50Lmks.fcsv", }, "out": { "output_dir": "/localscratch/Users/cjohnson30/output_dir" }, } nest_asyncio.apply() # Create the inputs to the workflow wf = pydra.Workflow(name="wf", input_spec=[ "t1", "templateModel", "llsModel", "landmarkWeights", "landmarks", "output_dir" ], output_spec=["output_dir"]) wf.split("t1", t1=[subject1_json["in"]["t1"], subject2_json["in"]["t1"]]) #t1=["/localscratch/Users/cjohnson30/BCD_Practice/t1w_examples2/sub-066260_ses-21713_run-002_T1w.nii.gz", # "/localscratch/Users/cjohnson30/BCD_Practice/t1w_examples2/sub-066217_ses-29931_run-003_T1w.nii.gz"]) print(wf.inputs.t1) # Set the inputs of Resample resample = BRAINSResample("BRAINSResample").get_task() resample.inputs.inputVolume = wf.lzin.t1 #wf.BRAINSConstellationDetector.lzout.outputResampledVolume resample.inputs.interpolationMode = "Linear" resample.inputs.pixelType = "binary" resample.inputs.referenceVolume = "/localscratch/Users/cjohnson30/resample_refs/t1_average_BRAINSABC.nii.gz" resample.inputs.warpTransform = "/localscratch/Users/cjohnson30/resample_refs/atlas_to_subject.h5"
def gen_workflow(inputs, cache_dir=None, cache_locations=None): wf = pydra.Workflow( name="ml_wf", input_spec=list(inputs.keys()), **inputs, cache_dir=cache_dir, cache_locations=cache_locations, audit_flags=AuditFlag.ALL, messengers=FileMessenger(), messenger_args={"message_dir": os.path.join(os.getcwd(), "messages")}, ) wf.split(["clf_info", "permute"]) wf.add( read_file_pdt( name="readcsv", filename=wf.lzin.filename, x_indices=wf.lzin.x_indices, target_vars=wf.lzin.target_vars, group=wf.lzin.group_var, )) wf.add( gen_splits_pdt( name="gensplit", n_splits=wf.lzin.n_splits, test_size=wf.lzin.test_size, X=wf.readcsv.lzout.X, Y=wf.readcsv.lzout.Y, groups=wf.readcsv.lzout.groups, )) wf.add( train_test_kernel_pdt( name="fit_clf", X=wf.readcsv.lzout.X, y=wf.readcsv.lzout.Y, train_test_split=wf.gensplit.lzout.splits, split_index=wf.gensplit.lzout.split_indices, clf_info=wf.lzin.clf_info, permute=wf.lzin.permute, )) wf.fit_clf.split("split_index") wf.add( calc_metric_pdt(name="metric", output=wf.fit_clf.lzout.output, metrics=wf.lzin.metrics)) wf.metric.combine("fit_clf.split_index") wf.add( get_shap_pdt( name="shap", X=wf.readcsv.lzout.X, permute=wf.lzin.permute, model=wf.fit_clf.lzout.model, gen_shap=wf.lzin.gen_shap, nsamples=wf.lzin.nsamples, l1_reg=wf.lzin.l1_reg, )) wf.shap.combine("fit_clf.split_index") wf.set_output([ ("output", wf.metric.lzout.output), ("score", wf.metric.lzout.score), ("shaps", wf.shap.lzout.shaps), ("feature_names", wf.readcsv.lzout.feature_names), ]) return wf
help="Name of the experiment being performed") parser.add_argument("iterations", type=int, help="number of iterations") parser.add_argument("delay", type=float, help="sleep delay during " "incrementation") parser.add_argument("--benchmark", action="store_true", help="benchmark results") args_ = parser.parse_args() paths = crawl_dir(os.path.abspath(args_.bb_dir)) wf = pydra.Workflow(name="pydra-incrementation", input_spec=["f", "start", "args", "it"], start=start, args=args_, cache_dir=args_.output_dir, it=0) print("Output directory", wf.output_dir) increment = pydra.mark.task(increment) wf.split("f", f=paths) func_name = "increment{}".format(0) wf.add( increment(name=func_name, filename=wf.lzin.f, start=wf.lzin.start, args=wf.lzin.args,
"formatted according to the BIDS standard.", ) parser.add_argument( "output_dir", help="The directory where the output files " "should be stored. If you are running group level analysis " "this folder should be prepopulated with the results of the" "participant level analysis.", ) args = parser.parse_args() subject_dirs = glob(os.path.join(args.bids_dir, "sub-*")) subjects_to_analyze = [subject_dir.split("-")[-1] for subject_dir in subject_dirs] wf = pydra.Workflow( name="BIDS App Example with Boutiques", input_spec=["infile", "maskfile"] ) T1_files = [ os.path.abspath(T1_file) for subject_label in subjects_to_analyze for T1_file in glob( os.path.join(args.bids_dir, "sub-%s" % subject_label, "anat", "*_T1w.nii*") ) + glob( os.path.join( args.bids_dir, "sub-%s" % subject_label, "ses-*", "anat", "*_T1w.nii*" ) ) ]
def _run_pydra(self, image, soft_ver_str): wf = pydra.Workflow( name="wf", input_spec=["image"], cache_dir=self.working_dir, ) wf.inputs.image = image # 1st task - analysis param_run = self.params["analysis"] cmd_run = [param_run["command"]] inp_fields_run = [] inp_val_run = {} if param_run["script"]: script_run = param_run["script"] inp_fields_run.append(( "script", attr.ib(type=pydra.specs.File, metadata={ "argstr": "", "position": 1, "help_string": "script file", "mandatory": True, }), )) inp_val_run[f"script"] = script_run output_file_dict = {} for ind, inputs in enumerate(param_run["inputs"]): inputs = deepcopy(inputs) value = inputs.pop("value") name = inputs.pop("name", f"inp_{ind}") output_file = inputs.pop("output_file", False) # default values for metadata metadata = { "argstr": "", "position": ind + 2, "help_string": f"inp_{ind}", "mandatory": True, } tp = inputs.pop("type") if tp == "File": tp = pydra.specs.File metadata["container_path"] = True # updating metadata with values provided in parameters file metadata.update(inputs) field = (name, attr.ib(type=tp, metadata=metadata)) inp_fields_run.append(field) if tp is pydra.specs.File: inp_val_run[name] = f"/data/{value}" process_path_obj(value, self.data_path) else: if output_file: output_file_dict[name] = value value = os.path.join("/output_pydra", value) inp_val_run[name] = value input_spec_run = pydra.specs.SpecInfo(name="Input", fields=inp_fields_run, bases=(pydra.specs.DockerSpec, )) out_fields_run = [] for el in self.params["tests"]: if isinstance(el["file"], str): if el["file"] in output_file_dict: el["file"] = output_file_dict[el["file"]] out_fields_run.append( (f"file_{el['name']}", pydra.specs.File, el["file"])) elif isinstance(el["file"], list): for ii, file in enumerate(el["file"]): out_fields_run.append( (f"file_{el['name']}_{ii}", pydra.specs.File, file)) else: raise Exception( f"value for file in params['tests'] has to be a str or a list" ) output_spec_run = pydra.specs.SpecInfo( name="Output", fields=out_fields_run, bases=(pydra.specs.ShellOutSpec, )) task_run = pydra.DockerTask( name="run", executable=cmd_run, image=wf.lzin.image, input_spec=input_spec_run, output_spec=output_spec_run, bindings=[(self.data_path, "/data", "ro")], **inp_val_run, ) wf.add(task_run) # 2nd task - creating list from the 1st task output @pydra.mark.task @pydra.mark.annotate({"return": {"outfiles": list}}) def outfiles_list(res): out_f = [] for el in self.params["tests"]: if isinstance(el["file"], (tuple, list)): out_f.append( tuple([ res[f"file_{el['name']}_{i}"] for i in range(len(el["file"])) ])) else: out_f.append(res[f"file_{el['name']}"]) return out_f wf.add(outfiles_list(name="outfiles", res=wf.run.lzout.all_)) # 3rd task - tests input_spec_test = pydra.specs.SpecInfo( name="Input", fields=[ ( "script_test", attr.ib(type=pydra.specs.File, metadata={ "argstr": "", "position": 1, "help_string": "test file", "mandatory": True, }), ), ( "file_out", attr.ib(type=(tuple, pydra.specs.File), metadata={ "position": 2, "help_string": "out file", "argstr": "-out", "mandatory": True, }), ), ( "file_ref", attr.ib(type=(tuple, pydra.specs.File), metadata={ "position": 3, "argstr": "-ref", "help_string": "out file", "mandatory": True, "container_path": True, }), ), ( "name_test", attr.ib(type=str, metadata={ "position": 4, "argstr": "-name", "help_string": "test name", "mandatory": True, }), ), ], bases=(pydra.specs.ShellSpec, ), ) output_spec_test = pydra.specs.SpecInfo( name="Output", fields=[("reports", pydra.specs.File, "report_*.json")], bases=(pydra.specs.ShellOutSpec, ), ) if self.test_image: container_info = ("docker", self.test_image, [(self.data_ref_path, "/data_ref", "ro")]) file_ref_dir = Path("/data_ref") else: container_info = None file_ref_dir = self.data_ref_path inp_val_test = {} inp_val_test["name_test"] = [el["name"] for el in self.params["tests"]] inp_val_test["script_test"] = [ el["script"] for el in self.params["tests"] ] inp_val_test["file_ref"] = [] for el in self.params["tests"]: if isinstance(el["file"], str): inp_val_test["file_ref"].append(file_ref_dir / el["file"]) elif isinstance(el["file"], list): inp_val_test["file_ref"].append( tuple([file_ref_dir / file for file in el["file"]])) task_test = pydra.ShellCommandTask( name="test", executable="python", container_info=container_info, input_spec=input_spec_test, output_spec=output_spec_test, file_out=wf.outfiles.lzout.outfiles, **inp_val_test, ).split((("script_test", "name_test"), ("file_out", "file_ref"))) wf.add(task_test) # setting wf output wf.set_output([ ("outfiles", wf.outfiles.lzout.outfiles), ("test_out", wf.test.lzout.stdout), ("reports", wf.test.lzout.reports), ]) print(f"\n running pydra workflow for {self.workflow_path} " f"in working directory - {self.working_dir}") with pydra.Submitter(plugin="cf") as sub: sub(wf) res = wf.result() self.reports[soft_ver_str] = res.output.reports
return a**n @pydra.mark.task def mult_var(a, b): return a * b # - # ## Introduction to Workflow # # In order to run multiple tasks within one pipeline, we use another *pydra* class - `Workflow`. The workflow will contain an arbitrary number of tasks that will be treated as a graph. # # Let's start from a workflow with a single task that has one input `x`. When we create a `Workflow`, we have to specify `input_spec` that contains all of the workflow inputs: wf1 = pydra.Workflow(name="wf1", input_spec=["x"], x=3) # Now, we can add a task and specify that `x` will be taken from the workflow input by using so-called *Lazy Input*, `x=wf1.lzin.x`. We should also add the `name` to the task we are using in the `Workflow`. wf1.add(add_two(name="sum", x=wf1.lzin.x)) # Now, we can access the task by using the task name: wf1.sum # We have to also specify what would be the workflow output, for this one-task workflow, we simply take the output of `sum` and we use *Lazy Output* to set it to `wf.output.out`: wf1.set_output([("out", wf1.sum.lzout.out)]) # We could also use a dictionary to set the output - `wf1.set_output({"out": wf1.sum.lzout.out})`, or a tuple if we set a single element: `wf1.set_output(("out", wf1.sum.lzout.out))`
"output_dir", help="The directory where the output files " "should be stored. If you are running group level analysis " "this folder should be prepopulated with the results of the" "participant level analysis.", ) args = parser.parse_args() subject_dirs = glob(os.path.join(args.bids_dir, "sub-*")) subjects_to_analyze = [ subject_dir.split("-")[-1] for subject_dir in subject_dirs ] wf = pydra.Workflow( name="BIDS App Example with Boutiques", input_spec=["T1_file", "output_dir"], output_dir=args.output_dir, ) @pydra.mark.task def fsl_bet_boutiques(T1_file, output_dir): maskfile = os.path.join( output_dir, (os.path.split(T1_file)[-1].replace("_T1w", "_brain").replace( ".gz", "").replace(".nii", "")), ) fsl_bet = function("zenodo.3267250") ret = fsl_bet( "-v{0}:{0}".format(T1_file.split('sub-')[0]), "-v{0}:{0}".format(output_dir), infile=T1_file,