def getRawCoverageLargeMode(self, chrom, start, end, largeMode=10): """Retrieve an array with the genome coverage.""" out = np.zeros(end-start) infile = BigWigFile(self.path) wigs = infile.fetch(chrom, start, end) for wig in wigs: out[wig[0]-start:wig[1]-start] = wig[2] infile.close() return out[::largeMode]
def coverage_from_bigwig(self, bigwig_file, stepsize=100): """Return list of arrays describing the coverage of each genomicRegions from <bigwig_file>. *Keyword arguments:* - bigwig_file -- path to bigwig file - stepsize -- used stepsize *Output:* Class variable <coverage>: a list where the elements correspond to the GenomicRegion. The list elements give the number of reads falling into the GenomicRegion. """ if platform == "darwin" or "http" in bigwig_file: self.coverage = [] # mp_input = [] for gr in self.genomicRegions: # print(gr) steps = int(abs(gr.final-gr.initial)/stepsize) cmd = ["bigWigSummary",bigwig_file,gr.chrom,str(gr.initial-stepsize),str(gr.final-stepsize),str(steps)] # print(" ".join(cmd)) try: output = subprocess.check_output(cmd, shell=False, stderr=subprocess.STDOUT) # print(output) ds = [0 if "n/a" in x else float(x) for x in output.strip().split()] self.coverage.append( np.array(ds) ) except: continue ### Linux platform else: # print("\tUsing ngslib on linux system...") from ngslib import BigWigFile self.coverage = [] bwf = BigWigFile(bigwig_file) for gr in self.genomicRegions: depth = bwf.pileup(gr.chrom, max(0,int(gr.initial-stepsize/2)), max(1,int(gr.final+stepsize/2))) ds = [depth[d] for d in range(0, gr.final-gr.initial, stepsize)] self.coverage.append( np.array(ds) ) bwf.close()
def coverage_from_bigwig(self, bigwig_file, stepsize=100): """Return list of arrays describing the coverage of each genomicRegions from <bigwig_file>. *Keyword arguments:* - bigwig_file -- path to bigwig file - stepsize -- used stepsize *Output:* Class variable <coverage>: a list where the elements correspond to the GenomicRegion. The list elements give the number of reads falling into the GenomicRegion. """ try: from ngslib import BigWigFile self.coverage = [] bwf = BigWigFile(bigwig_file) for gr in self.genomicRegions: depth = bwf.pileup(gr.chrom, max(0, int(gr.initial - stepsize / 2)), max(1, int(gr.final + stepsize / 2))) ds = [ depth[d] for d in range(0, gr.final - gr.initial, stepsize) ] self.coverage.append(np.array(ds)) bwf.close() except ImportError, e: import pyBigWig self.coverage = [] bwf = pyBigWig.open(bigwig_file) for gr in self.genomicRegions: steps = int(len(gr) / stepsize) ds = bwf.stats(gr.chrom, gr.initial, gr.final, type="mean", nBins=steps) ds = [x if x else 0 for x in ds] self.coverage.append(np.array(ds)) bwf.close()
def coverage_from_bigwig(self, bigwig_file, stepsize=100): """Return list of arrays describing the coverage of each genomicRegions from <bigwig_file>. *Keyword arguments:* - bigwig_file -- path to bigwig file - stepsize -- used stepsize *Output:* Class variable <coverage>: a list where the elements correspond to the GenomicRegion. The list elements give the number of reads falling into the GenomicRegion. """ try: from ngslib import BigWigFile self.coverage = [] bwf = BigWigFile(bigwig_file) for gr in self.genomicRegions: depth = bwf.pileup(gr.chrom, max(0, int(gr.initial - stepsize / 2)), max(1, int(gr.final + stepsize / 2))) ds = [depth[d] for d in range(0, gr.final - gr.initial, stepsize)] self.coverage.append(np.array(ds)) bwf.close() except ImportError, e: import pyBigWig self.coverage = [] bwf = pyBigWig.open(bigwig_file) for gr in self.genomicRegions: steps = int(len(gr) / stepsize) ds = bwf.stats(gr.chrom, gr.initial, gr.final, type="mean", nBins=steps) ds = [ x if x else 0 for x in ds ] self.coverage.append( np.array(ds) ) bwf.close()
class BigWigDatasource(Datasource): """ A datasource derived from a BigWig file. For variants spanning a genomic range (i.e. non SNVs), the median of values from the BigWig are returned. """ def __init__(self, src_file, title='', version=None): # only necessary to import ngslib if instance of BigWigDatasource is created # This should not run on OS X machines from ngslib import BigWigFile super(BigWigDatasource, self).__init__(src_file, title=title, version=version) self.output_headers = [title + '_score'] self.bigwig_fh = BigWigFile(src_file) self.has_chr = True if self.bigwig_fh.chroms[0].startswith('chr') else False def annotate_mutation(self, mutation): if self.has_chr and not mutation.chr.startswith('chr'): chrn = 'chr' + mutation.chr else: chrn = mutation.chr variant_start, variant_end = int(mutation.start) - 1, int(mutation.end) #start - 1 because bigwig format is zero-based coords scores = [r[2] for r in self.bigwig_fh.fetch(chrom=chrn, start=variant_start, stop=variant_end)] if not scores: final_score = None elif len(scores) == 1: final_score = scores[0] else: final_score = np.median(scores) mutation.createAnnotation(self.output_headers[0], final_score, annotationSource=self.title) return mutation def close(self): self.bigwig_fh.close()
def phastCons46way_score(self, stepsize=100): """Load the phastCons46way bigwig files to fetch the scores as coverage. *Keyword arguments:* - stepsize -- used stepsize """ self.coverage = [] phastCons46way_dir = "/data/phastCons46way/" for gr in self.genomicRegions: bwf = BigWigFile(os.path.join(phastCons46way_dir, gr.chrom+".phastCons46way.bw")) depth = bwf.pileup(gr.chrom, gr.initial-stepsize/2, gr.final+stepsize/2) ds = [] for i in range(0, gr.final-gr.initial): d = [ depth[j] for j in range(i,i+stepsize) ] ds.append(sum(d)/len(d)) if gr.orientation == "-": self.coverage.append( np.array(list(reversed(ds))) ) else: self.coverage.append( np.array(ds) ) bwf.close()
def getChromSizesNGSLIB(self): infile = BigWigFile(self.path) out = infile.chromSizes() out = dict(zip(out[0], out[1])) infile.close() return out