Exemplo n.º 1
0
    def exec_deseq(self):
        """Execute the **Wedring**'s differential expression stage.

        :raises: :class:WedringError

        """
        if not self._qt:
            wedr_report("Calculating differential expression with DESeq.")
        outfile = path.join(self._o, "diffexpr.txt")
        errfile = path.join(self._ld, "diffexpr.log")
        de_cmd_line = "Rscript --vanilla %s %s %s %s %s" % (wedr_which("diffExprStage.R"),
                                                            self.wedring_diffexpr_parse_options(),
                                                            self._cnt_table,
                                                            self._cnd,
                                                            outfile)
        de = BioSoft(de_cmd_line, errfile=errfile)
        if not self._qt:
            wedr_report("Command line:\n    %s" % de.command)
        de.run()
        if 0 != de.return_code != -1:
            raise WedringError(141, "%s exitted with status %d. See log file '%s' for more details." %
                               (de.program_name, de.return_code, de.errfile))
        wedr_clean(de.errfile)
        # TODO Add verification of the DESeq's output with wedr_check_path()
        #   \_ table (OK), graphics
        wedr_check_path(outfile)
        if not self._qt:
            wedr_report("DESeq - Done!")
Exemplo n.º 2
0
 def exec_mapping_stage(self):
     """Executor of the Wedring pipeline."""
     if self.quiet:
         wedr_check_path(self.index + '.*')
         self.exec_mapping()
         self.aln_file = (path.join(self._mapper_out, "accepted_hits.bam") if
                          self.mapper == "tophat" else
                          self._mapper_out + ".sam")
         wedr_check_path(self.aln_file)
         if wedr_check_program("samtools"):
             self.exec_samtools()
             if wedr_check_program("bedtools"):
                 self.exec_bedtools()
     else:
         wedr_check_path(self.index + '.*')
         self.exec_mapping()
         self.aln_file = (path.join(self._mapper_out, "accepted_hits.bam") if
                          self.mapper == "tophat" else
                          self._mapper_out + ".sam")
         wedr_check_path(self.aln_file)
         if wedr_check_program("samtools"):
             wedr_report("[%s] Processing aligments with SAMtools." %
                         self._out_pref)
             self.exec_samtools()
             wedr_report("[%s] SAMtools - Done!." % self._out_pref)
             if wedr_check_program("bedtools"):
                 wedr_report("[%s] Calculating mapping coverage with BEDTools." %
                             self._out_pref)
                 self.exec_bedtools()
                 wedr_report("[%s] BEDTools - Done!" % self._out_pref)
     return self
Exemplo n.º 3
0
    def parse_args(self, out_dir="./wedr_index", ref_seq=None, index_label=None,
                   cfg_file=None, quiet=False):
        """This method checks the validity of :class:IndexBuilder parameters.


        :param out_dir: Which output directory? (default: ./wedr_out)
        :type out_dir: str
        :param ref_seq: Which reference sequence(s)?
        :type ref_seq: str
        :param index_label: A personalized name to the index.
        :type index_label: str
        :param cfg_file: Which configuration file?
        :type cfg_file: str
        :param quiet: If True don't print anything besides errors.
        :type quiet: bool
        :raises: :class:WedringError 

        """
        if wedr_check_program("bowtie-build"):
            if quiet:
                self.quiet = True
            if out_dir != "./wedr_index":
                self.out_dir = out_dir
            if cfg_file is not None:
                if wedr_check_path(cfg_file):
                    self.cfg_file = cfg_file
            if ref_seq is not None:
                if wedr_check_path(self.ref_seq.split(',')):
                    self.ref_seq = ref_seq
            else:
                raise WedringError(135, "You must provide the reference sequence file(s).")
            if index_label is not None:
                self.index = path.join(self.out_dir, index_label)
                self._out_pref = index_label
            elif ',' in self.ref_seq:
                index_name = [i for i in self.ref_seq.split(',') if i != ''][0]
                self.index = path.join(self.out_dir,
                                       path.split(path.splitext(index_name)[0])[1] +
                                                    "_andothers")
                self._out_pref = index_name
            else:
                self._out_pref = path.split(path.splitext(self.ref_seq)[0])[1]
                self.index = path.join(self.out_dir, self._out_pref)
Exemplo n.º 4
0
    def exec_samtools(self):
        """Executor of the SAMtools part of the pipeline.

        :raises: :class:WedringError

        """
        if self.mapper == "bowtie":
            if not self.quiet:
                wedr_report("[%s] Converting SAM file to BAM file." %
                            self._out_pref)
            sam_in_pref = path.splitext(self.aln_file)[0]
            bam_out = sam_in_pref + ".bam"
            errfile = path.join(self.log_dir, self._out_pref + "_view.log")
            st = BioSoft(command="samtools view -bS -o %s %s.sam" %
                         (bam_out, self._mapper_out), errfile=errfile)
            if not self.quiet:
                wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                             st.command))
            st.run()
            if 0 != st.return_code != -1:
                raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                                   (self._out_pref, st.program_name,
                                    st.return_code, st.errfile))
            wedr_clean(st.errfile)
            wedr_clean(self.aln_file, force=True)
            self.aln_file = bam_out
            wedr_check_path(self.aln_file)
            if not self.quiet:
                wedr_report("[%s] Sorting BAM file." % self._out_pref)
            errfile = path.join(self.log_dir, self._out_pref + "_sort.log")
            st = BioSoft(command = "samtools sort %s %s" %
                         (bam_out, sam_in_pref), errfile =errfile)
            if not self.quiet:
                wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                            st.command))
            st.run()
            if 0 != st.return_code != -1:
                raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                                   (self._out_pref, st.program_name,
                                    st.return_code, st.errfile))
            wedr_clean(st.errfile)
            wedr_check_path(self.aln_file)
        if not self.quiet:
            wedr_report("[%s] Indexing BAM file." % self._out_pref)
        errfile = path.join(self.log_dir, self._out_pref + "_index.log")
        st = BioSoft(command="samtools index %s" % self.aln_file,
                      errfile=errfile)
        if not self.quiet:
            wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                         st.command))
        st.run()
        if 0 != st.return_code != -1:
            raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                               (self._out_pref, st.program_name, st.return_code,
                                st.errfile))
        wedr_clean(st.errfile)
        wedr_check_path(self.aln_file + ".bai")
Exemplo n.º 5
0
    def exec_bedtools(self):
        """Executor of the BEDTools part of the pipeline.

        :raises: :class:WedringError

        """
        cov_out = self.aln_file.replace(".bam", ".cov")
        errfile = path.join(self.log_dir, self._out_pref + "_coverage.log")
        bt = BioSoft(command="bedtools coverage -s -abam %s -b %s" %
                     (self.aln_file, self.annot_file),
                     outfile=cov_out, errfile=errfile)
        if not self.quiet:
            wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                         bt.command))
        bt.run()
        if 0 != bt.return_code != -1:
            raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                               (self._out_pref, bt.program_name, bt.return_code,
                                bt.errfile))
        wedr_clean(bt.errfile)
        if wedr_check_path(cov_out):
            self.cov_file = cov_out
Exemplo n.º 6
0
 def run(self):
     """Execute all steps of the **Wedring** pipeline."""
     # This method will execute according to the self._wb value:
     # The value are set after the command line options --just-indexbuild,
     # --just-map, --just-counttable, --just-de, and the possible values are
     # defined in the globals TTL_PIPELINE, JUST_INDEX, JUST_MAP, JUST_TABLE
     # and JUST_DE, which mean:
     # TTL_PIPELINE -- execute all steps of the pipeline
     # JUST_INDEX -- just execute the indexing stage
     # JUST_MAP -- execute the indexing stage (if needed) and the mapping
     #             stage
     # JUST_TABLE -- just build the count table
     # JUST_DE -- just execute the differential expression stage
     if self._wb in (TTL_PIPELINE, JUST_INDEX, JUST_MAP):
         self.wedring_indexbuilder_parse_params()
         if self._indexbldr is not None:
             self._indexbldr.run()
             self._i = self._indexbldr.index
         if self._wb != JUST_INDEX:
             wedr_prepare_directory(self._o)
             wedr_prepare_directory(self._ld)
             self.wedring_mapping_parse_params()
             p = Pool(self._num_threads)
             self._wedr_list = p.map(WedringMast.exec_mapping_stage,
                                     self._wedr_list)
             if self._wb != JUST_MAP:
                 feats_file = path.join(self._o, "genomic_features.txt")
                 tbl_file = path.join(self._o, "count_table.txt")
                 if not self._qt:
                     wedr_report("Writing genomic features to disk.")
                 write_genomic_features_to_file(self._a, feats_file)
                 if wedr_check_path(feats_file):
                     if not self._qt:
                         wedr_report("Writing genomic features - Done!")
                 cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list]
                 if not self._qt:
                     wedr_report("Writing count table to disk.")
                 write_count_table_to_file(self._a, cov_f, self._cnd,
                                            tbl_file)
                 if wedr_check_path(tbl_file):
                     self._cnt_table = tbl_file
                     if not self._qt:
                         wedr_report("Writing count table - Done!")
                 self.exec_deseq()
     elif self._wb == JUST_TABLE:
         self.wedring_mapping_parse_params()
         wedr_prepare_directory(self._o)
         feats_file = path.join(self._o, "genomic_features.txt")
         tbl_file = path.join(self._o, "count_table.txt")
         if not self._qt:
             wedr_report("Writing genomic features to disk.")
         write_genomic_features_to_file(self._a, feats_file)
         if wedr_check_path(feats_file):
             if not self._qt:
                 wedr_report("Writing genomic features - Done!")
         cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list]
         if not self._qt:
             wedr_report("Writing count table to disk.")
         write_count_table_to_file(self._a, cov_f, self._cnd, tbl_file)
         if wedr_check_path(tbl_file):
             self._cnt_table = tbl_file
             if not self._qt:
                 wedr_report("Writing count table - Done!")
     elif self._wb == JUST_DE:
         wedr_prepare_directory(self._o)
         wedr_prepare_directory(self._ld)
         self.exec_deseq()
Exemplo n.º 7
0
    def parse_args(self, out_dir="./wedr_out", mapper="bowtie", index=None,
                   lib_file=None, lib_mate_1=None, lib_mate_2=None,
                   qual_file=None, q_mate_1=None, q_mate_2=None, annot_file=None,
                   cov_file=None, cfg_file=None, map_label=None, quiet=False,
                   barrier=0):
        """This method verifies if **Wedring** parameters are set correctly and
        makes other adjustments.

        :param out_dir: Which output directory? (default: ./wedr_out)
        :type out_dir: str -- Pathname to an existing (or not) directory.
        :param mapper: Which mapper? (default: bowtie)
        :type mapper: str -- "bowtie" or "tophat"
        :param index: Which BW index?
        :type index: str -- Pathname to *Bowtie*'s index (just the suffix)
        :param lib_file: Which library file(s)?
        :type lib_file: str
        :param lib_mate_1: Which first mate reads file?
        :type lib_mate_1: str
        :param lib_mate_2: Which second mate reads file?
        :type lib_mate_2: str
        :param qual_file: Which quality file(s)?
        :type qual_file: str
        :param q_mate_1: Which quality file(s) for mate 1?
        :type q_mate_1: str
        :param q_mate_2: Which quality file(s) for mate 2?
        :type q_mate_2: str
        :param annot_file: Which annotation file?
        :type q_mate_2: str
        :param cfg_file: Which configuration file?
        :type cfg_file: str
        :param map_label: A personalized name to the mapping.
        :type map_label: str
        :param barrier: When the pipeline will stop?
        :type barrier: int
        :param quiet: If True don't print anything besides errors.
        :type quiet: bool
        :raises: :class:WedringError

        """
        if self.wedr_barrier != 3:
            mp = mapper.lower()
            if mp in ("bowtie", "tophat"):
                if wedr_check_program(mp):
                    self.mapper = mp
            else:
                raise WedringError(134, "Invalid mapper name: %s" % mapper)
            if quiet:
                self.quiet = True
            if out_dir != "./wedr_out":
                self.out_dir = out_dir
                self.log_dir = path.join(out_dir, "log")
            else:
                self.log_dir = path.join(out_dir, "log")
            if cfg_file is not None:
                if wedr_check_path(cfg_file):
                    self.cfg_file = cfg_file
            if index is not None:
                if wedr_check_path(index + ".*"):
                    self.index = index
            else:
                raise WedringError(135, "You must provide Bowtie's BW index.")
            if lib_file is not None:
                lib_temp = [lf for lf in lib_file.split(',') if lf != '']
                if wedr_check_path(lib_temp):
                    self.lib_file = ','.join(lib_temp)
                    self._map_mode = 2
                if qual_file is not None:
                    qual_temp = [qf for qf in qual_file.split(',') if qf != '']
                    if wedr_check_path(qual_temp):
                        if len(lib_temp) != len(qual_temp):
                            raise WedringError(140, "Unbalanced number of library and quality files.")
                        self.qual_file = qual_file
                        self._map_mode += 5
                else:
                    self._map_mode += 6
            elif lib_mate_1 is not None and lib_mate_2 is not None:
                lib_1_temp = [l1 for l1 in lib_mate_1.split(',') if l1 != '']
                lib_2_temp = [l2 for l2 in lib_mate_2.split(',') if l2 != '']
                if wedr_check_path(lib_1_temp + lib_2_temp):
                    if len(lib_1_temp) != len(lib_2_temp):
                        raise WedringError(140, "Unbalanced number of pair-mate libraries.")
                    if q_mate_1 is not None and q_mate_2 is not None:
                        qual_1_temp = [q1 for q1 in q_mate_1.split(',') if q1 != '']
                        qual_2_temp = [q2 for q2 in q_mate_2.split(',') if q2 != '']
                        if wedr_check_path(qual_1_temp + qual_2_temp):
                            if (len(lib_1_temp) != len(qual_1_temp) or
                                len(lib_1_temp) != len(qual_2_temp)):
                                raise WedringError(140, "Unbalanced number of pair-mate libraries and its qualities.")
                            self.lib_mate_1 = ','.join(lib_1_temp)
                            self.lib_mate_2 = ','.join(lib_2_temp)
                            self.q_mate_1 = ','.join(qual_1_temp)
                            self.q_mate_2 = ','.join(qual_2_temp)
                            self._map_mode = 4
                    elif (q_mate_1 is None) ^ (q_mate_2 is None):
                        raise WedringError(135, "You must set both pair-mate quality files.")
                    self.lib_mate_1 = ','.join(lib_1_temp)
                    self.lib_mate_2 = ','.join(lib_2_temp)
                    self._map_mode = 5
            elif (lib_mate_1 is None) ^ (lib_mate_2 is None):
                raise WedringError(135, "You must set both pair-mate library files.")
            else:
                raise WedringError(135, "You must provide the library file(s).")
            if map_label is not None:
                self._mapper_out = path.join(self.out_dir, map_label)
                self._out_pref = map_label
            elif 7 != self._map_mode != 8:
                if ',' in self.lib_mate_1:
                    map_out_pref = (path.split(path.splitext(self.lib_mate_1[:self.lib_mate_1.index(',')])[0])[1],
                                    path.split(path.splitext(self.lib_mate_2[:self.lib_mate_2.index(',')])[0][1]))
                    if map_out_pref[0][:-2] == map_out_pref[1][:-2]:
                        self._mapper_out = (path.join(self.out_dir,
                                                      map_out_pref[0] +
                                                      "_andothers_vs_" +
                                                      path.split(self.index)[1]))
                        self._out_pref = map_out_pref[0]
                    else:
                        self._mapper_out = (path.join(self.out_dir,
                                                      map_out_pref[0][:-2] + '_' +
                                                      map_out_pref[1][:-2] +
                                                      "_andothers_vs_" +
                                                      path.split(self.index)[1]))
                        self._out_pref = (map_out_pref[0][:-2] + '_' +
                                         map_out_pref[1][:-2] + "_andothers_vs_")
                
                else:
                    map_out_pref = (path.split(path.splitext(self.lib_mate_1)[0])[1],
                                    path.split(path.splitext(self.lib_mate_2)[0])[1])
                    if map_out_pref[0][:-2] == map_out_pref[1][:-2]:
                        self._mapper_out = (path.join(self.out_dir,
                                                       map_out_pref[0][:-2] + "_vs_" +
                                                       path.split(self.index)[1]))
                        self._out_pref = map_out_pref[0][:-2]
                    else:
                        self._mapper_out = (path.join(self.out_dir,
                                                      map_out_pref[0][:-2] + '_' +
                                                      map_out_pref[1][:-2] + "_vs_" +
                                                      path.split(self.index)[1]))
                        self._out_pref = (map_out_pref[0][:-2] + '_' +
                                          map_out_pref[1][:-2])
            else:
                if ',' in self.lib_file:
                    map_out_pref = path.split(path.splitext(self.lib_file[:self.lib_file.index(',')])[0])[1]
                    self._mapper_out = (path.join(self.out_dir, map_out_pref +
                                                  "_andothers_vs_" +
                                                  path.split(self.index)[1]))
                    self._out_pref = map_out_pref + "_andothers_vs_"
                else:
                    map_out_pref = path.split(path.splitext(self.lib_file)[0])[1]
                    self._mapper_out = (path.join(self.out_dir, map_out_pref +
                                                  "_vs_" +
                                                  path.split(self.index)[1]))
                    self._out_pref = map_out_pref
            if annot_file is not None:
                if wedr_check_path(annot_file):
                    self.annot_file = annot_file
            else:
                raise WedringError(135, "You must provide the annotation file.")
        else:
            if annot_file is not None:
                if wedr_check_path(annot_file):
                    self.annot_file = annot_file
            else:
                raise WedringError(135, "You must provide the annotation file.")
            if cov_file is not None:
                if wedr_check_path(cov_file):
                    self.cov_file = cov_file
            else:
                raise WedringError(135, "You must provide the coverage file.")