def process(self): cmd = self.get_exec_path("cutadapt") + \ " --error-rate " + str(self.error_rate) + \ (" --overlap " + str(self.min_overlap) if self.min_overlap == None else "") + \ (" --discard-untrimmed" if self.discard_untrimmed else "") + \ " -" + self.adaptor_type + " file:" + self.adaptor_file if len(self.in_R2) == 0: # Process single read cutadapt cmd += " --output $2" + \ " $1" + \ " > $3" + \ " 2> $4" cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(cutadapt_fct, inputs=[self.in_R1], outputs=[self.out_R1, self.stdout, self.stderr], includes=[self.adaptor_file]) else: # Process paired-end cutadapt cmd += " --output $3" + \ " --paired-output $4" + \ " $1" + \ " $2" + \ " > $5" + \ " 2> $6" cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap( cutadapt_fct, inputs=[self.in_R1, self.in_R2], outputs=[self.out_R1, self.out_R2, self.stdout, self.stderr], includes=[self.adaptor_file])
def process(self): if self.split_targets: self.process_split_targets() # Exec command cmd = self.get_exec_path("bamAreasToFastq.py") + \ " --min-overlap " + str(self.min_overlap) + \ " --input-targets $4" + \ " --input-aln $5" + \ ("" if len(self.R1) == 0 else " --input-R1 $6") + \ ("" if len(self.R2) == 0 else " --input-R2 $7") + \ " --output-R1 $1" + \ " --output-R2 $2" + \ " 2> $3" bam2fastq_fct = ShellFunction(cmd, cmd_format='{EXE} {OUT} {IN}') inputs = [ self.repeated_targets, (self.repeated_aln if self.split_targets else self.aln) ] if len(self.R1) > 0 and len(self.R2) > 0: inputs.extend([ (self.repeated_R1 if self.split_targets else self.R1), (self.repeated_R2 if self.split_targets else self.R2) ]) MultiMap( bam2fastq_fct, inputs=inputs, outputs=[self.out_R1, self.out_R2, self.stderr], )
def process(self): # Combine reads for idx, curr_prefix in enumerate(self.prefixes): flash2 = ShellFunction( self.get_exec_path("flash2") + " --compress " + " --threads " + str(self.nb_threads) + " --min-overlap " + str(self.min_overlap) + ("" if self.min_overlap == None else " --min-overlap " + str(self.min_overlap)) + " --max-mismatch-density " + str(self.mismatch_ratio) + " --phred-offset " + str(self.phred_offset) + " --output-prefix " + os.path.basename(curr_prefix) + " --output-directory " + self.output_directory + " $1 " + " $2 " + " 2> $3", cmd_format='{EXE} {IN} {OUT}' ) flash2( inputs=[self.R1[idx], self.R2[idx]], outputs=[self.stderr[idx], self.out_hist[idx], self.out_combined[idx], self.out_histogram[idx], self.out_not_combined_R1[idx], self.out_not_combined_R2[idx]] ) # Write report report_fct = PythonFunction(writeReport, cmd_format="{EXE} {IN} {OUT}") MultiMap(report_fct, inputs=[self.out_combined, self.out_not_combined_R1], outputs=[self.out_report])
def process(self): bwamem = PythonFunction(bwaWrapper, cmd_format='{EXE} ' + self.get_exec_path("bwa") + ' ' + self.get_exec_path("samtools") + ' {OUT} "' + self.reference_genome + '" {IN}') MultiMap(bwamem, inputs=[self.R1, self.R2], outputs=[self.aln_files, self.stderr], includes=[self.reference_genome])
def process(self): cmd = self.get_exec_path("addLociAnnotations.py") + \ " --input-loci-annot " + self.annotations_file + \ " --input-report $1 " + \ " --output-report $2" + \ " 2> $3" add_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(add_fct, inputs=[self.annotations_file, self.msi_files], outputs=[self.out_report, self.stderr], includes=self.info_file)
def process(self): cmd = self.get_exec_path("MSIMergeReports.py") + \ " --inputs-reports $1 $2" + \ " --output-report $3" + \ " 2> $4" merges_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap( merges_fct, inputs=[self.first_report, self.second_report], outputs=[self.out_report, self.stderr] )
def process(self): tmp_report = [curr_path + ".tmp" for curr_path in self.report] # Process mSINGS cmd = self.get_exec_path("msings_venv") + " " + self.get_exec_path("run_msings.py") + \ " --java-path " + self.get_exec_path("java") + \ " --java-mem " + str(self.java_mem) + \ " --multiplier " + str(self.multiplier) + \ " --msi-min-threshold " + str(self.msi_min_threshold) + \ " --msi-max-threshold " + str(self.msi_max_threshold) + \ " --input-baseline " + self.baseline + \ " --input-genome " + self.genome + \ " --input-intervals " + self.intervals + \ " --input-targets " + self.targets + \ " --input-aln $1 " + \ " --output-analyzer $2 " + \ " --output-report $3 " + \ " 2> $4" msings_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(msings_fct, inputs=[self.aln], outputs=[self.analysis, tmp_report, self.msings_stderr], includes=[self.genome, self.intervals, self.baseline, self.targets]) # Remove suffix in samples names cmd = self.get_exec_path("sed") + \ " -e 's/_analysis//'" + \ " $1" + \ " > $2" + \ " 2> $3" rename_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(rename_fct, inputs=[tmp_report], outputs=[self.report, self.rename_stderr]) # Aggregate report and analysis cmd = self.get_exec_path("mSINGSToReport.py") + \ " --input-report $1 " + \ " --input-analysis $2 " + \ " --output $3 " + \ " 2> $4" convert_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(convert_fct, inputs=[self.report, self.analysis], outputs=[self.aggreg_report, self.aggreg_stderr])
def process(self): cmd = self.get_exec_path("msiFilter.py") + \ " --consensus-method " + str(self.consensus_method) + \ " --method-name " + str(self.method_name) + \ " --min-voting-loci " + str(self.min_voting_loci) + \ " --min-distrib-support " + str(self.min_distrib_support) + \ " --undetermined-weight " + str(self.undetermined_weight) + \ (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \ (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \ (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \ " --input-reports $1" + \ " --output-reports $2" + \ " 2> $3" filter_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(filter_fct, inputs=[self.in_reports], outputs=[self.out_report, self.stderr])
def add_shell_execution(self, source, inputs=[], outputs=[], arguments=[], includes=[], cmd_format=None, map=False, shell=None, collect=False, local=False): shell_function = ShellFunction(source, shell=shell, cmd_format=cmd_format, modules=self.modules) # if abstraction is map or multimap if map: # if input and output are list or filelist if issubclass(inputs.__class__, list) and issubclass( outputs.__class__, list): MultiMap(shell_function, inputs=inputs, outputs=outputs, includes=includes, collect=collect, local=local, arguments=arguments) else: logging.getLogger("jflow").exception( "add_shell_execution: '" + source + "' map requires a list as inputs and output") raise Exception("add_shell_execution: '" + source + "' map requires a list as inputs and output") else: shell_function(inputs=inputs, outputs=outputs, arguments=arguments, includes=includes) self.__write_trace(source, inputs, outputs, arguments, cmd_format, map, "Shell")
def process(self): cmd = self.get_exec_path("miamsClassify.py") + \ ("" if self.random_seed == None else " --random-seed " + str(self.random_seed)) + \ " --classifier " + self.classifier + \ (" --classifier-params '" + self.classifier_params + "'" if self.classifier_params != None else "") + \ " --consensus-method " + self.consensus_method + \ " --method-name " + self.method_name + \ " --min-voting-loci " + str(self.min_voting_loci) + \ " --min-support-fragments " + str(self.min_support_fragments) + \ " --undetermined-weight " + str(self.undetermined_weight) + \ (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \ (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \ (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \ " --input-references " + self.references_samples + \ " --input-evaluated $1 " + \ " --output-report $2 " + \ " 2> $3" classifier_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}') MultiMap(classifier_fct, inputs=[self.evaluated_samples], outputs=[self.out_report, self.stderr], includes=[self.references_samples])
def add_python_execution(self, function, inputs=[], outputs=[], arguments=[], includes=[], add_path=set(), collect=False, local=False, map=False, cmd_format=""): workflow_dir = Path(os.path.dirname(inspect.getfile( self.__class__))).parent lib_dir = str(workflow_dir) + os.path.sep + "lib" add_path.add(lib_dir) if map: if not issubclass(inputs.__class__, list) or not issubclass( outputs.__class__, list): logging.getLogger("jflow").exception( "add_python_execution: '" + function.__name__ + "' map requires a list as inputs and output!") raise Exception("add_python_execution: '" + function.__name__ + "' map requires a list as inputs and output!") #Command format to build if cmd_format == "": cmd_format = "{EXE} " if len(arguments) > 0: cmd_format += " {ARG}" if (isinstance(inputs, list) and len(inputs) > 0) or (inputs is not None and inputs != []): cmd_format += " {IN}" if (isinstance(outputs, list) and len(outputs) > 0) or (outputs is not None and outputs != []): cmd_format += " {OUT}" py_function = PythonFunction(function, add_path=add_path, cmd_format=cmd_format, modules=self.modules) new_inputs, includes_in = self.__generate_iolist(inputs, map) new_outputs, includes_out = self.__generate_iolist(outputs, map) if not isinstance(includes, list): includes = [includes] if map: MultiMap(py_function, inputs=new_inputs, outputs=new_outputs, includes=includes + includes_in, collect=collect, local=local, arguments=arguments) else: py_function(inputs=new_inputs, outputs=new_outputs, arguments=arguments, includes=includes + includes_in) self.__write_trace(function.__name__, inputs, outputs, arguments, cmd_format, map, "PythonFunction")
def process(self): """ Run the component, can be implemented by subclasses for a more complex process """ # get all parameters parameters = [] inputs = [] outputs = [] for param_name in self.params_order: param = self.__getattribute__(param_name) if isinstance(param, AbstractParameter): if isinstance(param, AbstractInputFile): inputs.append(param) elif isinstance(param, AbstractOutputFile): outputs.append(param) else: parameters.append(param) # sort parameters using argpos parameters = sorted(parameters, key=attrgetter('argpos')) inputs = sorted(inputs, key=attrgetter('argpos')) outputs = sorted(outputs, key=attrgetter('argpos')) filteredparams = [] commandline = self.get_exec_path(self.get_command()) for p in parameters: if isinstance(p, BoolParameter): if p: commandline += " %s " % p.cmd_format else: if p.default: commandline += " %s %s " % (p.cmd_format, p.default) abstraction = self.get_abstraction() if abstraction == None: cpt = 1 for file in inputs + outputs: if isinstance(file, InputFile) or isinstance(file, OutputFile): commandline += ' %s $%s ' % (file.cmd_format, cpt) cpt += 1 # input file list or output file list / pattern / ends with else: for e in file: commandline += ' %s $%s ' % (file.cmd_format, cpt) cpt += 1 function = ShellFunction(commandline, cmd_format='{EXE} {IN} {OUT}', modules=self.modules) function(inputs=inputs, outputs=outputs) # weaver map abstraction elif abstraction == 'map': if not (len(inputs) == len(outputs) == 1): display_error_message( "You can only have one type of input and one type of output for the map abstraction" ) for file in inputs: commandline += ' %s $1 ' % file.cmd_format if isinstance(file, ParameterList): inputs = file for file in outputs: commandline += ' %s $2 ' % file.cmd_format if isinstance(file, ParameterList): outputs = file function = ShellFunction(commandline, cmd_format='{EXE} {IN} {OUT}', modules=self.modules) exe = Map(function, inputs=inputs, outputs=outputs) # jflow multimap elif abstraction == 'multimap': cpt = 1 for file in inputs + outputs: if not (isinstance(file, ParameterList)): display_error_message( "Multimap abstraction can be used only with ParameterList" ) commandline += ' %s $%s ' % (file.cmd_format, cpt) cpt += 1 function = ShellFunction(commandline, cmd_format='{EXE} {IN} {OUT}', modules=self.modules) exe = MultiMap(function, inputs=inputs, outputs=outputs) # anything other than that will be considered errored else: raise Exception('Unsupported abstraction %s ' % abstraction)
def process(self): index_fct = PythonFunction(bam_index, cmd_format='{EXE} ' + self.get_exec_path("samtools") + ' {IN} {OUT}') MultiMap(index_fct, inputs=[self.in_aln], outputs=[self.out_aln, self.out_idx, self.stderr])