workflow.add_task( "src/analysis.R -o [targets[0]] -d " + args.metadata, #Command depends=[TrackedExecutable("src/analysis.R") ], #Tracking executable dependencies targets=args.output, #Output target directory args=[args.metadata]) #Additional arguments # Task3 add_task_group - AnADAMA2 example to execute a task on multiple input files/dependencies multiple_input_files = glob(os.path.join( args.output, '*.txt')) #Initializing multiple input files output_files = [ os.path.join(args.output, 'data', os.path.basename(files + "_backup")) for files in multiple_input_files ] workflow.add_task_group( "cp [depends[0]] [targets[0]]", #Command depends=[multiple_input_files], #Tracking executable dependencies targets=output_files) #Output target directory # private python function definition def remove_end_tabs_function(task): with open(task.targets[0].name, 'w') as file_handle_out: for line in open(task.depends[0].name): file_handle_out.write(line.rstrip() + "\n") # Task4 add_task - AnADAMA2 example to usage of python task function workflow.add_task( remove_end_tabs_function, #Calling the python function depends=args.input, #Tracking executable dependencies targets=args.output + "/data/data.tsv.notabs", #Target output
# create a workflow instance, providing the version number and description # the version number will appear when running this script with the "--version" option # the description will appear when running this script with the "--help" option workflow = Workflow(version="0.1", description="A workflow to run KneadData") # add the custom arguments to the workflow workflow.add_argument("kneaddata-db", desc="the kneaddata database", default="/work/code/kneaddata/db/") workflow.add_argument("input-extension", desc="the input file extension", default="fastq") workflow.add_argument("threads", desc="number of threads for knead_data to use", default=1) # get the arguments from the command line args = workflow.parse_args() # get all input files with the input extension provided on the command line in_files = workflow.get_input_files(extension=args.input_extension) # get a list of output files, one for each input file, with the kneaddata tag out_files = workflow.name_output_files(name=in_files, tag="kneaddata") # create a task for each set of input and output files to run kneaddata workflow.add_task_group( "kneaddata --input [depends[0]] --output [output_folder] --reference-db [kneaddata_db] --threads [threads]", depends=in_files, targets=out_files, output_folder=args.output, kneaddata_db=args.kneaddata_db, threads=args.threads) workflow.go()
required=True) args = workflow.parse_args() # get all of the input files input_files = utilities.find_files(args.input, extension=args.input_extension, exit_if_not_found=True) sample_names = utilities.sample_names(input_files, args.input_extension) # for each raw input file, generate an md5sum file md5sum_outputs = [ os.path.join(args.output, output_file_name) + ".md5sum" for output_file_name in sample_names ] workflow.add_task_group("md5sum [depends[0]] > [targets[0]]", depends=input_files, targets=md5sum_outputs) # for each file, verify the checksum md5sum_checks = [ os.path.join(args.output, check_file_name) + ".check" for check_file_name in sample_names ] for in_file, sum_file, check_file in zip(input_files, md5sum_outputs, md5sum_checks): workflow.add_task(verify_checksum, depends=[in_file, sum_file, args.input_metadata], targets=[check_file]) workflow.go()