Exemplo n.º 1
0
    def process(self):
        cmd = self.get_exec_path("cutadapt") + \
            " --error-rate " + str(self.error_rate) + \
            (" --overlap " + str(self.min_overlap) if self.min_overlap == None else "") + \
            (" --discard-untrimmed" if self.discard_untrimmed else "") + \
            " -" + self.adaptor_type + " file:" + self.adaptor_file

        if len(self.in_R2) == 0:  # Process single read cutadapt
            cmd += " --output $2" + \
                " $1" + \
                " > $3" + \
                " 2> $4"
            cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
            MultiMap(cutadapt_fct,
                     inputs=[self.in_R1],
                     outputs=[self.out_R1, self.stdout, self.stderr],
                     includes=[self.adaptor_file])
        else:  # Process paired-end cutadapt
            cmd += " --output $3" + \
                " --paired-output $4" + \
                " $1" + \
                " $2" + \
                " > $5" + \
                " 2> $6"
            cutadapt_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
            MultiMap(
                cutadapt_fct,
                inputs=[self.in_R1, self.in_R2],
                outputs=[self.out_R1, self.out_R2, self.stdout, self.stderr],
                includes=[self.adaptor_file])
Exemplo n.º 2
0
 def process(self):
     # Create bam list
     list_filepath = os.path.join(self.output_directory, "aln_list.txt")
     with open(list_filepath, "w") as FH_out:
         for curr_aln in self.aln:
             FH_out.write(curr_aln + "\n")
     # Set commands
     start_idx = 1
     inputs = [self.genome, self.intervals, self.targets, list_filepath]
     if self.status_annotations != None:
         start_idx = 2
         inputs.insert(0, self.status_annotations)
     cmd = self.get_exec_path("msings_venv") + " " + self.get_exec_path("create_baseline.py") + \
         " --java-path " + self.get_exec_path("java") + \
         " --java-mem " + str(self.java_mem) + \
         ("" if self.status_annotations == None else " --input-annotations $1") + \
         " --input-genome ${}".format(start_idx) + \
         " --input-intervals ${}".format(start_idx + 1) + \
         " --input-targets ${}".format(start_idx + 2) + \
         " --input-list ${}".format(start_idx + 3) + \
         " --output-baseline ${}".format(start_idx + 4) + \
         " 2> ${}".format(start_idx + 5)
     baseline_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     baseline_fct(inputs=inputs,
                  outputs=[self.baseline, self.stderr],
                  includes=self.aln)
Exemplo n.º 3
0
 def process(self):
     # Create combined lists
     targets = getAreas(self.targets_design)
     loci_reports_list_in_spl = list()
     reports_in_spl = two_dim_stack(self.loci_reports, len(targets))
     for spl_idx, reports in enumerate(reports_in_spl):
         list_filepath = os.path.join(
             self.output_directory,
             "spl_{}_comb_reports_list.tsv".format(spl_idx))
         loci_reports_list_in_spl.append(list_filepath)
         with open(list_filepath, "w") as FH_out:
             FH_out.write("#Locus_position\tLocus_name\tFilepath\n")
             for target_idx, curr_report in enumerate(reports):
                 target_id = "{}:{}-{}".format(
                     targets[target_idx].chrom,
                     targets[target_idx].start - 1, targets[target_idx].end)
                 FH_out.write("{}\t{}\t{}\n".format(
                     target_id, targets[target_idx].name, curr_report))
     # Set commands
     cmd = self.get_exec_path("gatherLocusRes.py") + \
         " --method-name '{}'".format(self.result_method) + \
         " --method-class-name '{}'".format(self.result_class_name) + \
         " --result-keys 'nb_by_length=nb_by_length'" + \
         " --input-loci-metrics-list $1" + \
         " --output-report $2 " + \
         " 2> $3"
     report_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     for spl_idx, spl_name in enumerate(self.samples_names):
         report_fct(
             inputs=[loci_reports_list_in_spl[spl_idx]],
             outputs=[self.out_report[spl_idx], self.stderr[spl_idx]],
             includes=[reports_in_spl[spl_idx]])
Exemplo n.º 4
0
 def process(self):
     # Combine reads
     for idx, curr_prefix in enumerate(self.prefixes):
         flash2 = ShellFunction(
             self.get_exec_path("flash2") +
             " --compress " +
             " --threads " + str(self.nb_threads) +
             " --min-overlap " + str(self.min_overlap) +
             ("" if self.min_overlap == None else " --min-overlap " + str(self.min_overlap)) +
             " --max-mismatch-density " + str(self.mismatch_ratio) +
             " --phred-offset " + str(self.phred_offset) +
             " --output-prefix " + os.path.basename(curr_prefix) +
             " --output-directory " + self.output_directory +
             " $1 " +
             " $2 " +
             " 2> $3",
             cmd_format='{EXE} {IN} {OUT}'
         )
         flash2(
             inputs=[self.R1[idx], self.R2[idx]],
             outputs=[self.stderr[idx], self.out_hist[idx], self.out_combined[idx], self.out_histogram[idx], self.out_not_combined_R1[idx], self.out_not_combined_R2[idx]]
         )
     # Write report
     report_fct = PythonFunction(writeReport, cmd_format="{EXE} {IN} {OUT}")
     MultiMap(report_fct, inputs=[self.out_combined, self.out_not_combined_R1], outputs=[self.out_report])
Exemplo n.º 5
0
 def process(self):
     # Create combined lists
     targets = getAreas(self.targets_design)
     comb_reports_list_in_spl = list()
     reports_in_spl = two_dim_stack(self.combination_report, len(targets))
     for spl_idx, reports in enumerate(reports_in_spl):
         list_filepath = os.path.join(
             self.output_directory,
             "spl_{}_comb_reports_list.tsv".format(spl_idx))
         comb_reports_list_in_spl.append(list_filepath)
         with open(list_filepath, "w") as FH_out:
             FH_out.write("#Locus_position\tLocus_name\tFilepath\n")
             for target_idx, curr_report in enumerate(reports):
                 target_id = "{}:{}-{}".format(
                     targets[target_idx].chrom,
                     targets[target_idx].start - 1, targets[target_idx].end)
                 FH_out.write("{}\t{}\t{}\n".format(
                     target_id, targets[target_idx].name, curr_report))
     # Set commands
     cmd = self.get_exec_path("tagMSIAmplSize.py") + \
         " --min-support " + str(self.min_support) + \
         " --input-combined-list $1" + \
         " --input-models $2" + \
         " --output-report $3 " + \
         " 2> $4"
     report_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     for spl_idx, spl_name in enumerate(self.samples_names):
         report_fct(
             inputs=[comb_reports_list_in_spl[spl_idx], self.models],
             outputs=[self.out_report[spl_idx], self.stderr[spl_idx]],
             includes=[reports_in_spl[spl_idx]])
Exemplo n.º 6
0
 def process(self):
     if self.split_targets:
         self.process_split_targets()
     # Exec command
     cmd = self.get_exec_path("bamAreasToFastq.py") + \
         " --min-overlap " + str(self.min_overlap) + \
         " --input-targets $4" + \
         " --input-aln $5" + \
         ("" if len(self.R1) == 0 else " --input-R1 $6") + \
         ("" if len(self.R2) == 0 else " --input-R2 $7") + \
         " --output-R1 $1" + \
         " --output-R2 $2" + \
         " 2> $3"
     bam2fastq_fct = ShellFunction(cmd, cmd_format='{EXE} {OUT} {IN}')
     inputs = [
         self.repeated_targets,
         (self.repeated_aln if self.split_targets else self.aln)
     ]
     if len(self.R1) > 0 and len(self.R2) > 0:
         inputs.extend([
             (self.repeated_R1 if self.split_targets else self.R1),
             (self.repeated_R2 if self.split_targets else self.R2)
         ])
     MultiMap(
         bam2fastq_fct,
         inputs=inputs,
         outputs=[self.out_R1, self.out_R2, self.stderr],
     )
Exemplo n.º 7
0
 def process(self):
     cmd = self.get_exec_path("addLociAnnotations.py") + \
         " --input-loci-annot " + self.annotations_file + \
         " --input-report $1 " + \
         " --output-report $2" + \
         " 2> $3"
     add_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(add_fct,
              inputs=[self.annotations_file, self.msi_files],
              outputs=[self.out_report, self.stderr],
              includes=self.info_file)
Exemplo n.º 8
0
 def process(self):
     cmd = self.get_exec_path("MSIMergeReports.py") + \
         " --inputs-reports $1 $2" + \
         " --output-report $3" + \
         " 2> $4"
     merges_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(
         merges_fct,
         inputs=[self.first_report, self.second_report],
         outputs=[self.out_report, self.stderr]
     )
Exemplo n.º 9
0
    def process(self):
        tmp_report = [curr_path + ".tmp" for curr_path in self.report]

        # Process mSINGS
        cmd = self.get_exec_path("msings_venv") + " " + self.get_exec_path("run_msings.py") + \
            " --java-path " + self.get_exec_path("java") + \
            " --java-mem " + str(self.java_mem) + \
            " --multiplier " + str(self.multiplier) + \
            " --msi-min-threshold " + str(self.msi_min_threshold) + \
            " --msi-max-threshold " + str(self.msi_max_threshold) + \
            " --input-baseline " + self.baseline + \
            " --input-genome " + self.genome + \
            " --input-intervals " + self.intervals + \
            " --input-targets " + self.targets + \
            " --input-aln $1 " + \
            " --output-analyzer $2 " + \
            " --output-report $3 " + \
            " 2> $4"
        msings_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
        MultiMap(msings_fct, inputs=[self.aln], outputs=[self.analysis, tmp_report, self.msings_stderr], includes=[self.genome, self.intervals, self.baseline, self.targets])

        # Remove suffix in samples names
        cmd = self.get_exec_path("sed") + \
            " -e 's/_analysis//'" + \
            " $1" + \
            " > $2" + \
            " 2> $3"
        rename_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
        MultiMap(rename_fct, inputs=[tmp_report], outputs=[self.report, self.rename_stderr])

        # Aggregate report and analysis
        cmd = self.get_exec_path("mSINGSToReport.py") + \
            " --input-report $1 " + \
            " --input-analysis $2 " + \
            " --output $3 " + \
            " 2> $4"
        convert_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
        MultiMap(convert_fct, inputs=[self.report, self.analysis], outputs=[self.aggreg_report, self.aggreg_stderr])
Exemplo n.º 10
0
 def process(self):
     cmd = self.get_exec_path("msiFilter.py") + \
         " --consensus-method " + str(self.consensus_method) + \
         " --method-name " + str(self.method_name) + \
         " --min-voting-loci " + str(self.min_voting_loci) + \
         " --min-distrib-support " + str(self.min_distrib_support) + \
         " --undetermined-weight " + str(self.undetermined_weight) + \
         (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \
         (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \
         (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \
         " --input-reports $1" + \
         " --output-reports $2" + \
         " 2> $3"
     filter_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(filter_fct,
              inputs=[self.in_reports],
              outputs=[self.out_report, self.stderr])
Exemplo n.º 11
0
    def add_shell_execution(self,
                            source,
                            inputs=[],
                            outputs=[],
                            arguments=[],
                            includes=[],
                            cmd_format=None,
                            map=False,
                            shell=None,
                            collect=False,
                            local=False):
        shell_function = ShellFunction(source,
                                       shell=shell,
                                       cmd_format=cmd_format,
                                       modules=self.modules)

        # if abstraction is map or multimap
        if map:
            # if input and output are list or filelist
            if issubclass(inputs.__class__, list) and issubclass(
                    outputs.__class__, list):
                MultiMap(shell_function,
                         inputs=inputs,
                         outputs=outputs,
                         includes=includes,
                         collect=collect,
                         local=local,
                         arguments=arguments)
            else:
                logging.getLogger("jflow").exception(
                    "add_shell_execution: '" + source +
                    "' map requires a list as inputs and output")
                raise Exception("add_shell_execution: '" + source +
                                "'  map requires a list as inputs and output")

        else:
            shell_function(inputs=inputs,
                           outputs=outputs,
                           arguments=arguments,
                           includes=includes)
        self.__write_trace(source, inputs, outputs, arguments, cmd_format, map,
                           "Shell")
Exemplo n.º 12
0
 def process(self):
     cmd = self.get_exec_path("miamsClassify.py") + \
         ("" if self.random_seed == None else " --random-seed " + str(self.random_seed)) + \
         " --classifier " + self.classifier + \
         (" --classifier-params '" + self.classifier_params + "'" if self.classifier_params != None else "") + \
         " --consensus-method " + self.consensus_method + \
         " --method-name " + self.method_name + \
         " --min-voting-loci " + str(self.min_voting_loci) + \
         " --min-support-fragments " + str(self.min_support_fragments) + \
         " --undetermined-weight " + str(self.undetermined_weight) + \
         (" --locus-weight-is-score" if self.locus_weight_is_score else "") + \
         (" --instability-ratio " + str(self.instability_ratio) if self.consensus_method == "ratio" else "") + \
         (" --instability-count " + str(self.instability_count) if self.consensus_method == "count" else "") + \
         " --input-references " + self.references_samples + \
         " --input-evaluated $1 " + \
         " --output-report $2 " + \
         " 2> $3"
     classifier_fct = ShellFunction(cmd, cmd_format='{EXE} {IN} {OUT}')
     MultiMap(classifier_fct,
              inputs=[self.evaluated_samples],
              outputs=[self.out_report, self.stderr],
              includes=[self.references_samples])
Exemplo n.º 13
0
    def process(self):
        """ 
        Run the component, can be implemented by subclasses for a 
        more complex process 
        """
        # get all parameters
        parameters = []
        inputs = []
        outputs = []
        for param_name in self.params_order:
            param = self.__getattribute__(param_name)
            if isinstance(param, AbstractParameter):
                if isinstance(param, AbstractInputFile):
                    inputs.append(param)
                elif isinstance(param, AbstractOutputFile):
                    outputs.append(param)
                else:
                    parameters.append(param)

        # sort parameters using argpos
        parameters = sorted(parameters, key=attrgetter('argpos'))
        inputs = sorted(inputs, key=attrgetter('argpos'))
        outputs = sorted(outputs, key=attrgetter('argpos'))
        filteredparams = []
        commandline = self.get_exec_path(self.get_command())

        for p in parameters:
            if isinstance(p, BoolParameter):
                if p:
                    commandline += " %s " % p.cmd_format
            else:
                if p.default:
                    commandline += " %s %s " % (p.cmd_format, p.default)

        abstraction = self.get_abstraction()

        if abstraction == None:
            cpt = 1
            for file in inputs + outputs:
                if isinstance(file, InputFile) or isinstance(file, OutputFile):
                    commandline += ' %s $%s ' % (file.cmd_format, cpt)
                    cpt += 1
                # input file list or output file list / pattern / ends with
                else:
                    for e in file:
                        commandline += ' %s $%s ' % (file.cmd_format, cpt)
                        cpt += 1
            function = ShellFunction(commandline,
                                     cmd_format='{EXE} {IN} {OUT}',
                                     modules=self.modules)
            function(inputs=inputs, outputs=outputs)
        # weaver map abstraction
        elif abstraction == 'map':
            if not (len(inputs) == len(outputs) == 1):
                display_error_message(
                    "You can only have one type of input and one type of output for the map abstraction"
                )

            for file in inputs:
                commandline += ' %s $1 ' % file.cmd_format
                if isinstance(file, ParameterList):
                    inputs = file

            for file in outputs:
                commandline += ' %s $2 ' % file.cmd_format
                if isinstance(file, ParameterList):
                    outputs = file

            function = ShellFunction(commandline,
                                     cmd_format='{EXE} {IN} {OUT}',
                                     modules=self.modules)
            exe = Map(function, inputs=inputs, outputs=outputs)

        # jflow multimap
        elif abstraction == 'multimap':
            cpt = 1
            for file in inputs + outputs:
                if not (isinstance(file, ParameterList)):
                    display_error_message(
                        "Multimap abstraction can be used only with ParameterList"
                    )
                commandline += ' %s $%s ' % (file.cmd_format, cpt)
                cpt += 1

            function = ShellFunction(commandline,
                                     cmd_format='{EXE} {IN} {OUT}',
                                     modules=self.modules)
            exe = MultiMap(function, inputs=inputs, outputs=outputs)
        # anything other than that will be considered errored
        else:
            raise Exception('Unsupported abstraction %s ' % abstraction)