示例#1
0
    def pca_rnaseq(self, counts_table_file):
        @program
        def pca(counts_table_file):
            outprefix = unique_filename_in()
            args = ['pca.R', counts_table_file, outprefix, "rpkm"]
            return {"arguments": args, "return_value": outprefix}

        if not program_exists('pca.R'):
            self.write_debug("Skipped PCA: pca.R not found.")
            return
        try:
            self.write_log("* PCA")
            outprefix = pca.nonblocking(self.ex,
                                        counts_table_file,
                                        via=self.via).wait()
        except Exception as err:
            self.write_debug("PCA failed: %s." % str(err))
            return
        if outprefix is None:
            self.write_debug("PCA failed.")
            return
        pca_descr_pdf = set_file_descr('pca.pdf',
                                       type='pdf',
                                       step='pca',
                                       ucsc=0)
        self.ex.add(outprefix + '.pdf', description=pca_descr_pdf)
示例#2
0
 def _run_tool(self, tool_name, args):
     if not program_exists(tool_name):
         raise OSError("Program not found in $PATH: %s" % tool_name)
     proc = subprocess.Popen([tool_name] + args, stderr=subprocess.PIPE)
     stdout, stderr = proc.communicate()
     if stderr:
         raise OSError("%s exited with message: %s" % (tool_name, stderr))
示例#3
0
    def differential_analysis(self, filename):
        """Launch an analysis of differential expression on the count
        values, and saves the output in the MiniLIMS."""
        @program
        def run_DE(data_file):
            """Run limma.R on *data_file*."""
            output_file = unique_filename_in()
            arguments = [
                "limma.R", data_file, "-s", "$'\t'", "-o", output_file
            ]
            return {'arguments': arguments, 'return_value': output_file}

        if not program_exists('limma.R'):
            self.write_debug("Skipped DE analysis: negbin.test.R not found.")
            return
        if filename is None:
            self.write_log(
                "  Skipped differential analysis: empty counts file.")
            return
        ncond = len(self.conditions)
        if ncond < 2:
            self.write_log(
                "  Skipped differential analysis: less than two groups.")
            return
        else:
            self.write_log("* Differential analysis")
            try:
                de_file = run_DE.nonblocking(self.ex, filename,
                                             via=self.via).wait()
            except Exception as err:
                self.write_debug("DE analysis failed with error: %s." %
                                 str(err))
                return
            if de_file is None:
                self.write_debug(
                    "DE analysis failed (see bein 'program' table).")
                return
            output_files = [
                f for f in os.listdir(self.ex.working_directory)
                if de_file in f
            ]
            if isinstance(de_file, Exception) or len(output_files) == 0:
                self.write_debug(
                    "Skipped differential analysis: `de_file` has value %s." %
                    str(de_file))
                return
            self.write_log("  ....done.")
            return output_files
示例#4
0
文件: rnaseq.py 项目: bbcf/bbcflib
    def pca_rnaseq(self,counts_table_file):
        @program
        def pca(counts_table_file):
            outprefix = unique_filename_in()
            args = ['pca.R', counts_table_file, outprefix, "rpkm"]
            return {"arguments": args, "return_value": outprefix}

        if not program_exists('pca.R'):
            self.write_debug("Skipped PCA: pca.R not found.")
            return
        try:
            self.write_log("* PCA")
            outprefix = pca.nonblocking(self.ex, counts_table_file, via=self.via).wait()
        except Exception as err:
            self.write_debug("PCA failed: %s." % str(err))
            return
        if outprefix is None:
            self.write_debug("PCA failed.")
            return
        pca_descr_pdf = set_file_descr('pca.pdf', type='pdf', step='pca', ucsc=0)
        self.ex.add(outprefix+'.pdf', description=pca_descr_pdf)
示例#5
0
文件: rnaseq.py 项目: bbcf/bbcflib
    def differential_analysis(self, filename):
        """Launch an analysis of differential expression on the count
        values, and saves the output in the MiniLIMS."""

        @program
        def run_DE(data_file):
            """Run limma.R on *data_file*."""
            output_file = unique_filename_in()
            arguments = ["limma.R", data_file, "-s","$'\t'", "-o",output_file]
            return {'arguments': arguments, 'return_value': output_file}

        if not program_exists('limma.R'):
            self.write_debug("Skipped DE analysis: negbin.test.R not found.")
            return
        if filename is None:
            self.write_log("  Skipped differential analysis: empty counts file.")
            return
        ncond = len(self.conditions)
        if ncond < 2:
            self.write_log("  Skipped differential analysis: less than two groups.")
            return
        else:
            self.write_log("* Differential analysis")
            try:
                de_file = run_DE.nonblocking(self.ex, filename, via=self.via).wait()
            except Exception as err:
                self.write_debug("DE analysis failed with error: %s." % str(err))
                return
            if de_file is None:
                self.write_debug("DE analysis failed (see bein 'program' table).")
                return
            output_files = [f for f in os.listdir(self.ex.working_directory) if de_file in f]
            if isinstance(de_file,Exception) or len(output_files)==0:
                self.write_debug("Skipped differential analysis: `de_file` has value %s." % str(de_file))
                return
            self.write_log("  ....done.")
            return output_files
示例#6
0
    def find_junctions(self,
                       soapsplice_index=None,
                       path_to_soapsplice=None,
                       soapsplice_options={}):
        """
        Retrieve unmapped reads from a precedent mapping and runs SOAPsplice on them.
        Return the names of a .bed track indicating the junctions positions, as well as
        of a bam file of the alignments attesting the junctions.

        :param soapsplice_index: (str) path to the SOAPsplice index.
        :param path_to_soapsplice: (str) specify the path to the program if it is not in your $PATH.
        :param soapsplice_options: (dict) SOAPsplice options, e.g. {'-m':2}.
        :rtype: str, str
        """
        @program
        def soapsplice(unmapped_R1,
                       unmapped_R2,
                       index,
                       output=None,
                       path_to_soapsplice=None,
                       options={}):
            """Bind 'soapsplice'. Return a text file containing the list of junctions.

            :param unmapped_R1: (str) path to the fastq file containing the 'left' reads.
            :param unmapped_R2: (str) path to the fastq file containing the 'right' reads.
            :param index: (str) path to the SOAPsplice index.
            :param output: (str) output file name.
            :param path_to_soapsplice: (str) path to the SOAPsplice executable.
                If not specified, the program must be in your $PATH.
            :param options: (dict) SOAPsplice options, given as {opt: value}.
            :rtype: str

            Main options::

            -p: number of threads, <= 20. [1]
            -S: 1: forward strand, 2: reverse strand, 3: both. [3]
            -m: maximum mismatch for one-segment alignment, <= 5. [3]
            -g: maximum indel for one-segment alignment, <= 2. [2]
            -i: length of tail that can be ignored in one-segment alignment. [7]
            -t: longest gap between two segments in two-segment alignment. [500000]
            -a: shortest length of a segment in two-segment alignment. [8]
            -q: input quality type in FASTQ file (0: old Illumina, 1: Sanger). [0]
            -L: maximum distance between paired-end reads. [500000]
            -l: minimum distance between paired-end reads. [50]
            -I: insert length of paired-end reads.
            """
            if not output: output = unique_filename_in()
            path_to_soapsplice = path_to_soapsplice or 'soapsplice'
            args = [
                path_to_soapsplice, '-d', index, '-1', unmapped_R1, '-2',
                unmapped_R2, '-o', output, '-f', '2'
            ]
            opts = []
            for k, v in options.iteritems():
                opts.extend([str(k), str(v)])
            return {"arguments": args + opts, "return_value": output}

        if not program_exists('soapsplice'):
            self.write_debug("Skipped junctions search: soapsplice not found.")
            return
        self.assembly.set_index_path(intype=3)
        soapsplice_index = soapsplice_index or self.assembly.index_path
        soapsplice_options.update(
            self.job.options.get('soapsplice_options', {}))
        soapsplice_options.setdefault('-p', 16)  # number of threads
        soapsplice_options.setdefault('-q', 1)  # Sanger format
        unmapped_fastq = {}
        for gid, group in self.job.groups.iteritems():
            unmapped_fastq[gid] = []
            for rid, run in group['runs'].iteritems():
                unmapped = self.job.files[gid][rid].get('unmapped_fastq')
                if not unmapped:
                    self.write_log(
                        "No unmapped reads found for group %s, run %d. Skip." %
                        (gid, rid))
                    continue
                elif not isinstance(unmapped, tuple):
                    self.write_log("Pair-end reads required. Skip.")
                    continue
                unmapped_fastq[gid].append(unmapped)
            if len(unmapped_fastq[gid]) == 0:
                continue
            R1 = cat(zip(*unmapped_fastq[gid])[0])
            R2 = cat(zip(*unmapped_fastq[gid])[1])
            future = soapsplice.nonblocking(
                self.ex,
                R1,
                R2,
                soapsplice_index,
                path_to_soapsplice=path_to_soapsplice,
                options=soapsplice_options,
                via=self.via,
                memory=8,
                threads=soapsplice_options['-p'])
            try:
                template = future.wait()
            except Exception as err:
                self.write_debug("SOAPsplice failed: %s." % str(err))
                return
            if template is None:
                self.write_debug("SOAPsplice failed.")
                return
            junc_file = template + '.junc'
            bed = self.convert_junc_file(junc_file, self.assembly)
            bed_descr = set_file_descr('junctions_%s.bed' % group['name'],
                                       groupId=gid,
                                       type='bed',
                                       step='junctions',
                                       ucsc=1)
            bam_descr = set_file_descr('junctions_%s.bam' % group['name'],
                                       groupId=gid,
                                       type='bam',
                                       step='junctions',
                                       ucsc=0)
            sam = template + '.sam'
            try:
                bam = sam_to_bam(self.ex, sam, reheader=self.assembly.name)
                add_and_index_bam(self.ex, bam, description=bam_descr)
                self.ex.add(bam, description=bam_descr)
            except Exception as e:
                self.write_debug(
                    "%s\n(Qualities may be in the wrong format, try with '-q 0'.)"
                    % str(e))
            self.ex.add(bed, description=bed_descr)
        return bed, bam
示例#7
0
文件: rnaseq.py 项目: bbcf/bbcflib
    def find_junctions(self, soapsplice_index=None, path_to_soapsplice=None, soapsplice_options={}):
        """
        Retrieve unmapped reads from a precedent mapping and runs SOAPsplice on them.
        Return the names of a .bed track indicating the junctions positions, as well as
        of a bam file of the alignments attesting the junctions.

        :param soapsplice_index: (str) path to the SOAPsplice index.
        :param path_to_soapsplice: (str) specify the path to the program if it is not in your $PATH.
        :param soapsplice_options: (dict) SOAPsplice options, e.g. {'-m':2}.
        :rtype: str, str
        """

        @program
        def soapsplice(unmapped_R1, unmapped_R2, index, output=None, path_to_soapsplice=None, options={}):
            """Bind 'soapsplice'. Return a text file containing the list of junctions.

            :param unmapped_R1: (str) path to the fastq file containing the 'left' reads.
            :param unmapped_R2: (str) path to the fastq file containing the 'right' reads.
            :param index: (str) path to the SOAPsplice index.
            :param output: (str) output file name.
            :param path_to_soapsplice: (str) path to the SOAPsplice executable.
                If not specified, the program must be in your $PATH.
            :param options: (dict) SOAPsplice options, given as {opt: value}.
            :rtype: str

            Main options::

            -p: number of threads, <= 20. [1]
            -S: 1: forward strand, 2: reverse strand, 3: both. [3]
            -m: maximum mismatch for one-segment alignment, <= 5. [3]
            -g: maximum indel for one-segment alignment, <= 2. [2]
            -i: length of tail that can be ignored in one-segment alignment. [7]
            -t: longest gap between two segments in two-segment alignment. [500000]
            -a: shortest length of a segment in two-segment alignment. [8]
            -q: input quality type in FASTQ file (0: old Illumina, 1: Sanger). [0]
            -L: maximum distance between paired-end reads. [500000]
            -l: minimum distance between paired-end reads. [50]
            -I: insert length of paired-end reads.
            """
            if not output: output = unique_filename_in()
            path_to_soapsplice = path_to_soapsplice or 'soapsplice'
            args = [path_to_soapsplice,'-d',index,'-1',unmapped_R1,'-2',unmapped_R2,'-o',output,'-f','2']
            opts = []
            for k,v in options.iteritems(): opts.extend([str(k),str(v)])
            return {"arguments": args+opts, "return_value": output}

        if not program_exists('soapsplice'):
            self.write_debug("Skipped junctions search: soapsplice not found.")
            return
        self.assembly.set_index_path(intype=3)
        soapsplice_index = soapsplice_index or self.assembly.index_path
        soapsplice_options.update(self.job.options.get('soapsplice_options',{}))
        soapsplice_options.setdefault('-p',16) # number of threads
        soapsplice_options.setdefault('-q',1)  # Sanger format
        unmapped_fastq = {}
        for gid, group in self.job.groups.iteritems():
            unmapped_fastq[gid] = []
            for rid, run in group['runs'].iteritems():
                unmapped = self.job.files[gid][rid].get('unmapped_fastq')
                if not unmapped:
                    self.write_log("No unmapped reads found for group %s, run %d. Skip." % (gid,rid))
                    continue
                elif not isinstance(unmapped,tuple):
                    self.write_log("Pair-end reads required. Skip.")
                    continue
                unmapped_fastq[gid].append(unmapped)
            if len(unmapped_fastq[gid]) == 0:
                continue
            R1 = cat(zip(*unmapped_fastq[gid])[0])
            R2 = cat(zip(*unmapped_fastq[gid])[1])
            future = soapsplice.nonblocking(self.ex,R1,R2,soapsplice_index,
                                            path_to_soapsplice=path_to_soapsplice,
                                            options=soapsplice_options,
                                            via=self.via, memory=8, threads=soapsplice_options['-p'])
            try:
                template = future.wait()
            except Exception as err:
                self.write_debug("SOAPsplice failed: %s." % str(err))
                return
            if template is None:
                self.write_debug("SOAPsplice failed.")
                return
            junc_file = template+'.junc'
            bed = self.convert_junc_file(junc_file,self.assembly)
            bed_descr = set_file_descr('junctions_%s.bed' % group['name'],
                                       groupId=gid,type='bed',step='junctions', ucsc=1)
            bam_descr = set_file_descr('junctions_%s.bam' % group['name'],
                                       groupId=gid,type='bam',step='junctions', ucsc=0)
            sam = template+'.sam'
            try:
                bam = sam_to_bam(self.ex,sam,reheader=self.assembly.name)
                add_and_index_bam(self.ex, bam, description=bam_descr)
                self.ex.add(bam, description=bam_descr)
            except Exception as e:
                self.write_debug("%s\n(Qualities may be in the wrong format, try with '-q 0'.)" %str(e))
            self.ex.add(bed, description=bed_descr)
        return bed, bam
示例#8
0
文件: bin.py 项目: bbcf/bbcflib
 def _run_tool(self, tool_name, args):
     if not program_exists(tool_name):
         raise OSError("Program not found in $PATH: %s" % tool_name)
     proc = subprocess.Popen([tool_name]+args, stderr=subprocess.PIPE)
     stdout, stderr = proc.communicate()
     if stderr: raise OSError("%s exited with message: %s" % (tool_name,stderr))