Пример #1
0
 def getRawCoverageLargeMode(self, chrom, start, end, largeMode=10):
     """Retrieve an array with the genome coverage."""
     out = np.zeros(end-start)
     infile = BigWigFile(self.path)
     wigs = infile.fetch(chrom, start, end)
     for wig in wigs:
         out[wig[0]-start:wig[1]-start] = wig[2]
     infile.close()
     return out[::largeMode]
Пример #2
0
    def coverage_from_bigwig(self, bigwig_file, stepsize=100):

        """Return list of arrays describing the coverage of each genomicRegions from <bigwig_file>.
        
        *Keyword arguments:*
        
        - bigwig_file -- path to bigwig file
        - stepsize -- used stepsize
        
        *Output:*
        
        Class variable <coverage>: a list where the elements correspond to the GenomicRegion. The list elements give
        the number of reads falling into the GenomicRegion.
        
        """
        
        if platform == "darwin" or "http" in bigwig_file:
            self.coverage = []
            # mp_input = []
            for gr in self.genomicRegions:
                # print(gr)
                steps = int(abs(gr.final-gr.initial)/stepsize)
                cmd = ["bigWigSummary",bigwig_file,gr.chrom,str(gr.initial-stepsize),str(gr.final-stepsize),str(steps)]
                # print(" ".join(cmd))
                try:
                    output = subprocess.check_output(cmd, shell=False, stderr=subprocess.STDOUT)
                    # print(output)
                    ds = [0 if "n/a" in x else float(x) for x in output.strip().split()]
                    self.coverage.append( np.array(ds) )
                except:
                    continue
        
        ### Linux platform
        else:
            # print("\tUsing ngslib on linux system...")
            from ngslib import BigWigFile
            self.coverage = []
            bwf = BigWigFile(bigwig_file)

            for gr in self.genomicRegions:
                depth = bwf.pileup(gr.chrom, max(0,int(gr.initial-stepsize/2)), 
                                             max(1,int(gr.final+stepsize/2)))
                ds = [depth[d] for d in range(0, gr.final-gr.initial, stepsize)]
                
                self.coverage.append( np.array(ds) )
            bwf.close()
Пример #3
0
    def coverage_from_bigwig(self, bigwig_file, stepsize=100):
        """Return list of arrays describing the coverage of each genomicRegions from <bigwig_file>.
        
        *Keyword arguments:*
        
        - bigwig_file -- path to bigwig file
        - stepsize -- used stepsize
        
        *Output:*
        
        Class variable <coverage>: a list where the elements correspond to the GenomicRegion. The list elements give
        the number of reads falling into the GenomicRegion.
        
        """
        try:
            from ngslib import BigWigFile
            self.coverage = []
            bwf = BigWigFile(bigwig_file)

            for gr in self.genomicRegions:
                depth = bwf.pileup(gr.chrom,
                                   max(0, int(gr.initial - stepsize / 2)),
                                   max(1, int(gr.final + stepsize / 2)))
                ds = [
                    depth[d] for d in range(0, gr.final - gr.initial, stepsize)
                ]
                self.coverage.append(np.array(ds))
            bwf.close()

        except ImportError, e:
            import pyBigWig
            self.coverage = []
            bwf = pyBigWig.open(bigwig_file)

            for gr in self.genomicRegions:
                steps = int(len(gr) / stepsize)
                ds = bwf.stats(gr.chrom,
                               gr.initial,
                               gr.final,
                               type="mean",
                               nBins=steps)
                ds = [x if x else 0 for x in ds]
                self.coverage.append(np.array(ds))
            bwf.close()
Пример #4
0
    def coverage_from_bigwig(self, bigwig_file, stepsize=100):

        """Return list of arrays describing the coverage of each genomicRegions from <bigwig_file>.
        
        *Keyword arguments:*
        
        - bigwig_file -- path to bigwig file
        - stepsize -- used stepsize
        
        *Output:*
        
        Class variable <coverage>: a list where the elements correspond to the GenomicRegion. The list elements give
        the number of reads falling into the GenomicRegion.
        
        """
        try:
            from ngslib import BigWigFile
            self.coverage = []
            bwf = BigWigFile(bigwig_file)

            for gr in self.genomicRegions:
                depth = bwf.pileup(gr.chrom, max(0, int(gr.initial - stepsize / 2)),
                                   max(1, int(gr.final + stepsize / 2)))
                ds = [depth[d] for d in range(0, gr.final - gr.initial, stepsize)]
                self.coverage.append(np.array(ds))
            bwf.close()

        except ImportError, e:
            import pyBigWig
            self.coverage = []
            bwf = pyBigWig.open(bigwig_file)

            for gr in self.genomicRegions:
                steps = int(len(gr) / stepsize)
                ds = bwf.stats(gr.chrom, gr.initial, gr.final, type="mean", nBins=steps)
                ds = [ x if x else 0 for x in ds ]
                self.coverage.append( np.array(ds) )
            bwf.close()
Пример #5
0
class BigWigDatasource(Datasource):
    """
    A datasource derived from a BigWig file.  For variants spanning a genomic range (i.e. non SNVs),
    the median of values from the BigWig are returned.
    """
    def __init__(self, src_file, title='', version=None):
        # only necessary to import ngslib if instance of BigWigDatasource is created
        # This should not run on OS X machines
        from ngslib import BigWigFile

        super(BigWigDatasource, self).__init__(src_file, title=title, version=version)

        self.output_headers = [title + '_score']
        self.bigwig_fh = BigWigFile(src_file)
        self.has_chr = True if self.bigwig_fh.chroms[0].startswith('chr') else False

    def annotate_mutation(self, mutation):
        if self.has_chr and not mutation.chr.startswith('chr'):
            chrn = 'chr' + mutation.chr
        else:
            chrn = mutation.chr

        variant_start, variant_end = int(mutation.start) - 1, int(mutation.end) #start - 1 because bigwig format is zero-based coords

        scores = [r[2] for r in self.bigwig_fh.fetch(chrom=chrn, start=variant_start, stop=variant_end)]

        if not scores:
            final_score = None
        elif len(scores) == 1:
            final_score = scores[0]
        else:
            final_score = np.median(scores)

        mutation.createAnnotation(self.output_headers[0], final_score, annotationSource=self.title)
        return mutation

    def close(self):
        self.bigwig_fh.close()
Пример #6
0
    def phastCons46way_score(self, stepsize=100):
        """Load the phastCons46way bigwig files to fetch the scores as coverage.
        
        *Keyword arguments:*
        
        - stepsize -- used stepsize
        """
        self.coverage = []
        phastCons46way_dir = "/data/phastCons46way/"
        for gr in self.genomicRegions:
            bwf = BigWigFile(os.path.join(phastCons46way_dir, gr.chrom+".phastCons46way.bw"))
            depth = bwf.pileup(gr.chrom, gr.initial-stepsize/2, gr.final+stepsize/2)
            ds = []
            for i in range(0, gr.final-gr.initial):
                d = [ depth[j] for j in range(i,i+stepsize) ]
                ds.append(sum(d)/len(d))
                
            if gr.orientation == "-":
                self.coverage.append( np.array(list(reversed(ds))) )
            else:
                self.coverage.append( np.array(ds) )

            bwf.close()
Пример #7
0
 def getChromSizesNGSLIB(self):
     infile = BigWigFile(self.path)
     out = infile.chromSizes()
     out = dict(zip(out[0], out[1]))
     infile.close()
     return out