示例#1
0
    def process(self):
        cmd = self.get_exec_path("cutadapt") + \
            " --error-rate " + str(self.error_rate) + \
            (" --overlap " + str(self.min_overlap) if self.min_overlap == None else "") + \
            (" --discard-untrimmed" if self.discard_untrimmed else "") + \
            " -" + self.adaptor_type + " file:" + self.adaptor_file

        if len(self.in_R2) == 0:  # Process single read cutadapt
            cmd += " --output $2" + \
                " $1" + \
                " > $3" + \
                " 2> $4"
            cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
            MultiMap(cutadapt_fct,
                     inputs=[self.in_R1],
                     outputs=[self.out_R1, self.stdout, self.stderr],
                     includes=[self.adaptor_file])
        else:  # Process paired-end cutadapt
            cmd += " --output $3" + \
                " --paired-output $4" + \
                " $1" + \
                " $2" + \
                " > $5" + \
                " 2> $6"
            cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
            MultiMap(
                cutadapt_fct,
                inputs=[self.in_R1, self.in_R2],
                outputs=[self.out_R1, self.out_R2, self.stdout, self.stderr],
                includes=[self.adaptor_file])
示例#2
0
 def process(self):
     if self.split_targets:
         self.process_split_targets()
     # Exec command
     cmd = self.get_exec_path("bamAreasToFastq.py") + \
         " --min-overlap " + str(self.min_overlap) + \
         " --input-targets $4" + \
         " --input-aln $5" + \
         ("" if len(self.R1) == 0 else " --input-R1 $6") + \
         ("" if len(self.R2) == 0 else " --input-R2 $7") + \
         " --output-R1 $1" + \
         " --output-R2 $2" + \
         " 2> $3"
     bam2fastq_fct = ShellFunction(cmd, cmd_format='{EXE} {OUT} {IN}')
     inputs = [
         self.repeated_targets,
         (self.repeated_aln if self.split_targets else self.aln)
     ]
     if len(self.R1) > 0 and len(self.R2) > 0:
         inputs.extend([
             (self.repeated_R1 if self.split_targets else self.R1),
             (self.repeated_R2 if self.split_targets else self.R2)
         ])
     MultiMap(
         bam2fastq_fct,
         inputs=inputs,
         outputs=[self.out_R1, self.out_R2, self.stderr],
     )
示例#3
0
文件: flash2.py 项目: bialimed/miams
 def process(self):
     # Combine reads
     for idx, curr_prefix in enumerate(self.prefixes):
         flash2 = ShellFunction(
             self.get_exec_path("flash2") +
             " --compress " +
             " --threads " + str(self.nb_threads) +
             " --min-overlap " + str(self.min_overlap) +
             ("" if self.min_overlap == None else " --min-overlap " + str(self.min_overlap)) +
             " --max-mismatch-density " + str(self.mismatch_ratio) +
             " --phred-offset " + str(self.phred_offset) +
             " --output-prefix " + os.path.basename(curr_prefix) +
             " --output-directory " + self.output_directory +
             " $1 " +
             " $2 " +
             " 2> $3",
             cmd_format='{EXE} {IN} {OUT}'
         )
         flash2(
             inputs=[self.R1[idx], self.R2[idx]],
             outputs=[self.stderr[idx], self.out_hist[idx], self.out_combined[idx], self.out_histogram[idx], self.out_not_combined_R1[idx], self.out_not_combined_R2[idx]]
         )
     # Write report
     report_fct = PythonFunction(writeReport, cmd_format="{EXE} {IN} {OUT}")
     MultiMap(report_fct, inputs=[self.out_combined, self.out_not_combined_R1], outputs=[self.out_report])
示例#4
0
文件: bwamem.py 项目: bialimed/miams
 def process(self):
     bwamem = PythonFunction(bwaWrapper,
                             cmd_format='{EXE} ' +
                             self.get_exec_path("bwa") + ' ' +
                             self.get_exec_path("samtools") + ' {OUT} "' +
                             self.reference_genome + '" {IN}')
     MultiMap(bwamem,
              inputs=[self.R1, self.R2],
              outputs=[self.aln_files, self.stderr],
              includes=[self.reference_genome])
示例#5
0
 def process(self):
     cmd = self.get_exec_path("addLociAnnotations.py") + \
         " --input-loci-annot " + self.annotations_file + \
         " --input-report $1 " + \
         " --output-report $2" + \
         " 2> $3"
     add_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(add_fct,
              inputs=[self.annotations_file, self.msi_files],
              outputs=[self.out_report, self.stderr],
              includes=self.info_file)
示例#6
0
 def process(self):
     cmd = self.get_exec_path("MSIMergeReports.py") + \
         " --inputs-reports $1 $2" + \
         " --output-report $3" + \
         " 2> $4"
     merges_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(
         merges_fct,
         inputs=[self.first_report, self.second_report],
         outputs=[self.out_report, self.stderr]
     )
示例#7
0
    def process(self):
        tmp_report = [curr_path + ".tmp" for curr_path in self.report]

        # Process mSINGS
        cmd = self.get_exec_path("msings_venv") + " " + self.get_exec_path("run_msings.py") + \
            " --java-path " + self.get_exec_path("java") + \
            " --java-mem " + str(self.java_mem) + \
            " --multiplier " + str(self.multiplier) + \
            " --msi-min-threshold " + str(self.msi_min_threshold) + \
            " --msi-max-threshold " + str(self.msi_max_threshold) + \
            " --input-baseline " + self.baseline + \
            " --input-genome " + self.genome + \
            " --input-intervals " + self.intervals + \
            " --input-targets " + self.targets + \
            " --input-aln $1 " + \
            " --output-analyzer $2 " + \
            " --output-report $3 " + \
            " 2> $4"
        msings_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
        MultiMap(msings_fct, inputs=[self.aln], outputs=[self.analysis, tmp_report, self.msings_stderr], includes=[self.genome, self.intervals, self.baseline, self.targets])

        # Remove suffix in samples names
        cmd = self.get_exec_path("sed") + \
            " -e 's/_analysis//'" + \
            " $1" + \
            " > $2" + \
            " 2> $3"
        rename_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
        MultiMap(rename_fct, inputs=[tmp_report], outputs=[self.report, self.rename_stderr])

        # Aggregate report and analysis
        cmd = self.get_exec_path("mSINGSToReport.py") + \
            " --input-report $1 " + \
            " --input-analysis $2 " + \
            " --output $3 " + \
            " 2> $4"
        convert_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
        MultiMap(convert_fct, inputs=[self.report, self.analysis], outputs=[self.aggreg_report, self.aggreg_stderr])
示例#8
0
 def process(self):
     cmd = self.get_exec_path("msiFilter.py") + \
         " --consensus-method " + str(self.consensus_method) + \
         " --method-name " + str(self.method_name) + \
         " --min-voting-loci " + str(self.min_voting_loci) + \
         " --min-distrib-support " + str(self.min_distrib_support) + \
         " --undetermined-weight " + str(self.undetermined_weight) + \
         (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \
         (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \
         (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \
         " --input-reports $1" + \
         " --output-reports $2" + \
         " 2> $3"
     filter_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(filter_fct,
              inputs=[self.in_reports],
              outputs=[self.out_report, self.stderr])
示例#9
0
    def add_shell_execution(self,
                            source,
                            inputs=[],
                            outputs=[],
                            arguments=[],
                            includes=[],
                            cmd_format=None,
                            map=False,
                            shell=None,
                            collect=False,
                            local=False):
        shell_function = ShellFunction(source,
                                       shell=shell,
                                       cmd_format=cmd_format,
                                       modules=self.modules)

        # if abstraction is map or multimap
        if map:
            # if input and output are list or filelist
            if issubclass(inputs.__class__, list) and issubclass(
                    outputs.__class__, list):
                MultiMap(shell_function,
                         inputs=inputs,
                         outputs=outputs,
                         includes=includes,
                         collect=collect,
                         local=local,
                         arguments=arguments)
            else:
                logging.getLogger("jflow").exception(
                    "add_shell_execution: '" + source +
                    "' map requires a list as inputs and output")
                raise Exception("add_shell_execution: '" + source +
                                "'  map requires a list as inputs and output")

        else:
            shell_function(inputs=inputs,
                           outputs=outputs,
                           arguments=arguments,
                           includes=includes)
        self.__write_trace(source, inputs, outputs, arguments, cmd_format, map,
                           "Shell")
示例#10
0
 def process(self):
     cmd = self.get_exec_path("miamsClassify.py") + \
         ("" if self.random_seed == None else " --random-seed " + str(self.random_seed)) + \
         " --classifier " + self.classifier + \
         (" --classifier-params '" + self.classifier_params + "'" if self.classifier_params != None else "") + \
         " --consensus-method " + self.consensus_method + \
         " --method-name " + self.method_name + \
         " --min-voting-loci " + str(self.min_voting_loci) + \
         " --min-support-fragments " + str(self.min_support_fragments) + \
         " --undetermined-weight " + str(self.undetermined_weight) + \
         (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \
         (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \
         (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \
         " --input-references " + self.references_samples + \
         " --input-evaluated $1 " + \
         " --output-report $2 " + \
         " 2> $3"
     classifier_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(classifier_fct,
              inputs=[self.evaluated_samples],
              outputs=[self.out_report, self.stderr],
              includes=[self.references_samples])
示例#11
0
    def add_python_execution(self,
                             function,
                             inputs=[],
                             outputs=[],
                             arguments=[],
                             includes=[],
                             add_path=set(),
                             collect=False,
                             local=False,
                             map=False,
                             cmd_format=""):

        workflow_dir = Path(os.path.dirname(inspect.getfile(
            self.__class__))).parent
        lib_dir = str(workflow_dir) + os.path.sep + "lib"
        add_path.add(lib_dir)

        if map:
            if not issubclass(inputs.__class__, list) or not issubclass(
                    outputs.__class__, list):
                logging.getLogger("jflow").exception(
                    "add_python_execution: '" + function.__name__ +
                    "' map requires a list as inputs and output!")
                raise Exception("add_python_execution: '" + function.__name__ +
                                "' map requires a list as inputs and output!")
        #Command format to build
        if cmd_format == "":
            cmd_format = "{EXE} "
            if len(arguments) > 0:
                cmd_format += " {ARG}"
            if (isinstance(inputs, list)
                    and len(inputs) > 0) or (inputs is not None
                                             and inputs != []):
                cmd_format += " {IN}"
            if (isinstance(outputs, list)
                    and len(outputs) > 0) or (outputs is not None
                                              and outputs != []):
                cmd_format += " {OUT}"
        py_function = PythonFunction(function,
                                     add_path=add_path,
                                     cmd_format=cmd_format,
                                     modules=self.modules)

        new_inputs, includes_in = self.__generate_iolist(inputs, map)
        new_outputs, includes_out = self.__generate_iolist(outputs, map)
        if not isinstance(includes, list):
            includes = [includes]
        if map:
            MultiMap(py_function,
                     inputs=new_inputs,
                     outputs=new_outputs,
                     includes=includes + includes_in,
                     collect=collect,
                     local=local,
                     arguments=arguments)
        else:
            py_function(inputs=new_inputs,
                        outputs=new_outputs,
                        arguments=arguments,
                        includes=includes + includes_in)

        self.__write_trace(function.__name__, inputs, outputs, arguments,
                           cmd_format, map, "PythonFunction")
示例#12
0
    def process(self):
        """ 
        Run the component, can be implemented by subclasses for a 
        more complex process 
        """
        # get all parameters
        parameters = []
        inputs = []
        outputs = []
        for param_name in self.params_order:
            param = self.__getattribute__(param_name)
            if isinstance(param, AbstractParameter):
                if isinstance(param, AbstractInputFile):
                    inputs.append(param)
                elif isinstance(param, AbstractOutputFile):
                    outputs.append(param)
                else:
                    parameters.append(param)

        # sort parameters using argpos
        parameters = sorted(parameters, key=attrgetter('argpos'))
        inputs = sorted(inputs, key=attrgetter('argpos'))
        outputs = sorted(outputs, key=attrgetter('argpos'))
        filteredparams = []
        commandline = self.get_exec_path(self.get_command())

        for p in parameters:
            if isinstance(p, BoolParameter):
                if p:
                    commandline += " %s " % p.cmd_format
            else:
                if p.default:
                    commandline += " %s %s " % (p.cmd_format, p.default)

        abstraction = self.get_abstraction()

        if abstraction == None:
            cpt = 1
            for file in inputs + outputs:
                if isinstance(file, InputFile) or isinstance(file, OutputFile):
                    commandline += ' %s $%s ' % (file.cmd_format, cpt)
                    cpt += 1
                # input file list or output file list / pattern / ends with
                else:
                    for e in file:
                        commandline += ' %s $%s ' % (file.cmd_format, cpt)
                        cpt += 1
            function = ShellFunction(commandline,
                                     cmd_format='{EXE} {IN} {OUT}',
                                     modules=self.modules)
            function(inputs=inputs, outputs=outputs)
        # weaver map abstraction
        elif abstraction == 'map':
            if not (len(inputs) == len(outputs) == 1):
                display_error_message(
                    "You can only have one type of input and one type of output for the map abstraction"
                )

            for file in inputs:
                commandline += ' %s $1 ' % file.cmd_format
                if isinstance(file, ParameterList):
                    inputs = file

            for file in outputs:
                commandline += ' %s $2 ' % file.cmd_format
                if isinstance(file, ParameterList):
                    outputs = file

            function = ShellFunction(commandline,
                                     cmd_format='{EXE} {IN} {OUT}',
                                     modules=self.modules)
            exe = Map(function, inputs=inputs, outputs=outputs)

        # jflow multimap
        elif abstraction == 'multimap':
            cpt = 1
            for file in inputs + outputs:
                if not (isinstance(file, ParameterList)):
                    display_error_message(
                        "Multimap abstraction can be used only with ParameterList"
                    )
                commandline += ' %s $%s ' % (file.cmd_format, cpt)
                cpt += 1

            function = ShellFunction(commandline,
                                     cmd_format='{EXE} {IN} {OUT}',
                                     modules=self.modules)
            exe = MultiMap(function, inputs=inputs, outputs=outputs)
        # anything other than that will be considered errored
        else:
            raise Exception('Unsupported abstraction %s ' % abstraction)
示例#13
0
 def process(self):
     index_fct = PythonFunction(bam_index, cmd_format='{EXE} ' + self.get_exec_path("samtools") + ' {IN} {OUT}')
     MultiMap(index_fct, inputs=[self.in_aln], outputs=[self.out_aln, self.out_idx, self.stderr])