class OccChunk(Chunk): """Class for calculating occupancy and occupancy peaks """ def __init__(self, chunk): self.start = chunk.start self.end = chunk.end self.chrom = chunk.chrom self.peaks = {} self.nfrs = [] def getFragmentMat(self): self.mat = FragmentMat2D(self.chrom, self.start - self.params.flank, self.end + self.params.flank, 0, self.params.upper) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.flank, self.end + self.params.flank, 0, self.params.upper) if self.params.fasta is not None: bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, self.end + self.params.window + self.params.upper/2 + 1, log = True) bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) def calculateOcc(self): """calculate occupancy for chunk""" self.occ = OccupancyTrack(self.chrom,self.start,self.end) self.occ.calculateOccupancyMLE(self.mat, self.bias_mat, self.params) self.occ.makeSmoothed(window_len = self.params.window, sd = self.params.flank/3.0) def getCov(self): """Get read coverage for regions""" self.cov = CoverageTrack(self.chrom, self.start, self.end) self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window) def callPeaks(self): """Call peaks of occupancy profile""" peaks = call_peaks(self.occ.smoothed_vals, sep = self.params.sep, min_signal = self.params.min_occ) for peak in peaks: tmp = OccPeak(peak + self.start, self) if tmp.occ_lower > self.params.min_occ and tmp.reads > 0: self.peaks[peak] = tmp def getNucDist(self): """Get nucleosomal insert distribution""" nuc_dist = np.zeros(self.params.upper) for peak in self.peaks.keys(): sub = self.mat.get(start = self.peaks[peak].start-self.params.flank, end = self.peaks[peak].start+1+self.params.flank) sub_sum = np.sum(sub,axis=1) sub_sum = sub_sum / float(sum(sub_sum)) nuc_dist += sub_sum return(nuc_dist) def process(self, params): """proces chunk -- calculat occupancy, get coverage, call peaks""" self.params = params self.getFragmentMat() self.makeBiasMat() self.calculateOcc() self.getCov() self.callPeaks() def removeData(self): """remove data from chunk-- deletes all attributes""" names = self.__dict__.keys() for name in names: delattr(self, name)
def getNucSignal(self): """Gets Nucleosome Signal Track""" self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end) self.nuc_cov.calculateCoverage(self.mat, self.params.lower, self.params.upper, self.params.window) self.bias = BiasTrack(self.chrom, self.start, self.end) self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat, self.nuc_cov) self.nuc_signal = SignalTrack(self.chrom, self.start, self.end) self.nuc_signal.calculateSignal(self.mat, self.params.vmat) self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end) self.norm_signal.calculateNormSignal(self.nuc_signal, self.bias)
def _covHelper(arg): """Computes coverage track for a particular set of bed regions""" (chunk, args) = arg try: offset = args.window / 2 mat = FragmentMat2D(chunk.chrom,chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) cov.calculateCoverage(mat, lower = args.lower, upper = args.upper, window_len = args.window) cov.vals *= args.scale / float(args.window) except Exception as e: print('Caught exception when processing:\n'+ chunk.asBed()+"\n") traceback.print_exc() print() raise e return cov
def calculateBackgroundSignal(self, mat, vmat, nuc_cov): offset=self.start-mat.start-vmat.w if offset<0: raise Exception("Insufficient flanking region on \ mat to calculate signal") self.vmat = vmat self.bias_mat = mat self.cov = CoverageTrack(self.chrom, self.start, self.end) self.cov.calculateCoverage(self.bias_mat, vmat.lower, vmat.upper, vmat.w*2+1) self.nuc_cov = nuc_cov.vals self.vals = signal.correlate(self.bias_mat.get(vmat.lower,vmat.upper, self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] self.vals = self.vals * self.nuc_cov/ self.cov.vals
class BiasTrack(Track): """Class for getting Bias Signal Track-- Background model""" def __init__(self, chrom, start, end): Track.__init__(self, chrom, start, end, "bias") def calculateBackgroundSignal(self, mat, vmat, nuc_cov): offset=self.start-mat.start-vmat.w if offset<0: raise Exception("Insufficient flanking region on \ mat to calculate signal") self.vmat = vmat self.bias_mat = mat self.cov = CoverageTrack(self.chrom, self.start, self.end) self.cov.calculateCoverage(self.bias_mat, vmat.lower, vmat.upper, vmat.w*2+1) self.nuc_cov = nuc_cov.vals self.vals = signal.correlate(self.bias_mat.get(vmat.lower,vmat.upper, self.bias_mat.start + offset, self.bias_mat.end - offset), vmat.mat,mode = 'valid')[0] self.vals = self.vals * self.nuc_cov/ self.cov.vals
def _covHelper(arg): """Computes coverage track for a particular set of bed regions""" (chunk, args) = arg try: offset = args.window // 2 mat = FragmentMat2D(chunk.chrom, chunk.start - offset, chunk.end + offset, args.lower, args.upper, args.atac) mat.makeFragmentMat(args.bam) cov = CoverageTrack(chunk.chrom, chunk.start, chunk.end) cov.calculateCoverage(mat, lower=args.lower, upper=args.upper, window_len=args.window) cov.vals *= args.scale / float(args.window) except Exception as e: print(('Caught exception when processing:\n' + chunk.asBed() + "\n")) traceback.print_exc() print() raise e return cov
def getCov(self): """Get read coverage for regions""" self.cov = CoverageTrack(self.chrom, self.start, self.end) self.cov.calculateCoverage(self.mat, 0, self.params.upper, self.params.window)
def getNFR(self): """get number of reads of sub-nucleosomal length""" self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end) self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower, self.params.window)
class NucChunk(Chunk): """Class for storing and determining collection of nucleosome positions """ def __init__(self, chunk): self.start = chunk.start self.end = chunk.end self.chrom = chunk.chrom def initialize(self, parameters): self.params = parameters def getFragmentMat(self): self.mat = FragmentMat2D( self.chrom, self.start - max(self.params.window, self.params.upper // 2 + 1), self.end + max(self.params.window, self.params.upper // 2 + 1), 0, self.params.upper, atac=self.params.atac) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) bias_track = InsertionBiasTrack( self.chrom, self.start - self.params.window - self.params.upper // 2, self.end + self.params.window + self.params.upper // 2 + 1, log=True) if self.params.fasta is not None: bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) self.bias_mat_prenorm = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) self.bias_mat_prenorm.mat = copy(self.bias_mat.mat) self.bias_mat.normByInsertDist(self.params.fragmentsizes) def getNucSignal(self): """Gets Nucleosome Signal Track""" self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end) self.nuc_cov.calculateCoverage(self.mat, self.params.lower, self.params.upper, self.params.window) self.bias = BiasTrack(self.chrom, self.start, self.end) self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat, self.nuc_cov) self.nuc_signal = SignalTrack(self.chrom, self.start, self.end) self.nuc_signal.calculateSignal(self.mat, self.params.vmat) self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end) self.norm_signal.calculateNormSignal(self.nuc_signal, self.bias) def getNFR(self): """get number of reads of sub-nucleosomal length""" self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end) self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower, self.params.window) def smoothSignal(self): """Smooth thenormalized signal track""" window_len = 6 * self.params.smooth_sd + 1 self.smoothed = Track(self.chrom, self.start, self.end, "Smooth Signal") tmp = copy(self.norm_signal.vals) self.smoothed.assign_track(tmp) self.smoothed.vals[self.smoothed.vals < 0] = 0 self.smoothed.smooth_track(window_len, window="gaussian", sd=self.params.smooth_sd, mode='same', norm=True) def getOcc(self): """gets occupancy track-- either reads in from bw handle given, or makes new""" self.occ = Track(self.chrom, self.start, self.end, "Occupancy") self.occ.read_track(self.params.occ_track) lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz' self.occ_lower = Track(self.chrom, self.start, self.end, "Occupancy") self.occ_lower.read_track(lower_file) upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz' self.occ_upper = Track(self.chrom, self.start, self.end, "Occupancy") self.occ_upper.read_track(upper_file) def findAllNucs(self): """Find peaks in data""" self.nuc_collection = {} combined = self.norm_signal.vals + self.smoothed.vals #find peaks in normalized sigal cands1 = call_peaks(combined, min_signal=0, sep=self.params.redundant_sep, boundary=self.params.nonredundant_sep // 2, order=self.params.redundant_sep // 2) for i in cands1: nuc = Nucleosome(i + self.start, self) if nuc.nuc_cov > self.params.min_reads: nuc.getLR(self) if nuc.lr > self.params.min_lr: nuc.getZScore(self) if nuc.z >= self.params.min_z: nuc.getOcc(self) self.nuc_collection[i] = nuc self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys())) self.nonredundant = reduce_peaks( self.sorted_nuc_keys, [self.nuc_collection[x].z for x in self.sorted_nuc_keys], self.params.nonredundant_sep) self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant) def fit(self): x = np.linspace(0, self.length() - 1, self.length()) fit = np.zeros(self.length()) for nuc in self.sorted_nuc_keys: self.nuc_collection[nuc].getFuzz(self) fit += norm(x, self.nuc_collection[nuc].fuzz**2, self.nuc_collection[nuc].weight, self.nuc_collection[nuc].fit_pos) self.fitted = Track(self.chrom, self.start, self.end, "Fitted Nucleosome Signal") self.fitted.assign_track(fit) def makeInsertionTrack(self): """make insertion track for chunk""" self.ins = self.mat.getIns() def process(self, params): """wrapper to carry out all methods needed to call nucleosomes and nfrs""" self.initialize(params) self.getFragmentMat() self.makeBiasMat() self.getNucSignal() self.getNFR() self.smoothSignal() if params.occ_track is not None: self.getOcc() self.findAllNucs() self.fit() self.makeInsertionTrack() def removeData(self): """remove data from chunk-- deletes all attributes""" names = list(self.__dict__.keys()) for name in names: delattr(self, name)
class NucChunk(Chunk): """Class for storing and determining collection of nucleosome positions """ def __init__(self, chunk): self.start = chunk.start self.end = chunk.end self.chrom = chunk.chrom def initialize(self, parameters): self.params = parameters def getFragmentMat(self): self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1), self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac) self.mat.makeFragmentMat(self.params.bam) def makeBiasMat(self): self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2, self.end + self.params.window + self.params.upper/2 + 1, log = True) if self.params.fasta is not None: bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm) self.bias_mat.makeBiasMat(bias_track) self.bias_mat_prenorm = BiasMat2D(self.chrom, self.start - self.params.window, self.end + self.params.window, 0, self.params.upper) self.bias_mat_prenorm.mat = copy(self.bias_mat.mat) self.bias_mat.normByInsertDist(self.params.fragmentsizes) def getNucSignal(self): """Gets Nucleosome Signal Track""" self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end) self.nuc_cov.calculateCoverage(self.mat, self.params.lower, self.params.upper, self.params.window) self.bias = BiasTrack(self.chrom, self.start, self.end) self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat, self.nuc_cov) self.nuc_signal = SignalTrack(self.chrom, self.start, self.end) self.nuc_signal.calculateSignal(self.mat, self.params.vmat) self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end) self.norm_signal.calculateNormSignal(self.nuc_signal,self.bias) def getNFR(self): """get number of reads of sub-nucleosomal length""" self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end) self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower, self.params.window) def smoothSignal(self): """Smooth thenormalized signal track""" window_len = 6 * self.params.smooth_sd + 1 self.smoothed = Track(self.chrom,self.start,self.end, "Smooth Signal") tmp = copy(self.norm_signal.vals) self.smoothed.assign_track(tmp) self.smoothed.vals[ self.smoothed.vals < 0] = 0 self.smoothed.smooth_track(window_len, window = "gaussian", sd = self.params.smooth_sd, mode = 'same', norm = True) def getOcc(self): """gets occupancy track-- either reads in from bw handle given, or makes new""" self.occ = Track(self.chrom,self.start,self.end,"Occupancy") self.occ.read_track(self.params.occ_track) lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz' self.occ_lower = Track(self.chrom,self.start,self.end,"Occupancy") self.occ_lower.read_track(lower_file) upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz' self.occ_upper = Track(self.chrom,self.start,self.end,"Occupancy") self.occ_upper.read_track(upper_file) def findAllNucs(self): """Find peaks in data""" self.nuc_collection = {} combined = self.norm_signal.vals + self.smoothed.vals #find peaks in normalized sigal cands1 = call_peaks(combined, min_signal = 0, sep = self.params.redundant_sep, boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2) for i in cands1: nuc = Nucleosome(i + self.start, self) if nuc.nuc_cov > self.params.min_reads: nuc.getLR(self) if nuc.lr > self.params.min_lr: nuc.getZScore(self) if nuc.z >= self.params.min_z: nuc.getOcc(self) self.nuc_collection[i] = nuc self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys())) self.nonredundant = reduce_peaks( self.sorted_nuc_keys, map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys), self.params.nonredundant_sep) self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant) def fit(self): x = np.linspace(0,self.length() -1, self.length()) fit = np.zeros(self.length()) for nuc in self.sorted_nuc_keys: self.nuc_collection[nuc].getFuzz(self) fit += norm(x,self.nuc_collection[nuc].fuzz**2, self.nuc_collection[nuc].weight, self.nuc_collection[nuc].fit_pos) self.fitted = Track(self.chrom, self.start, self.end, "Fitted Nucleosome Signal") self.fitted.assign_track(fit) def makeInsertionTrack(self): """make insertion track for chunk""" self.ins = self.mat.getIns() def process(self, params): """wrapper to carry out all methods needed to call nucleosomes and nfrs""" self.initialize(params) self.getFragmentMat() self.makeBiasMat() self.getNucSignal() self.getNFR() self.smoothSignal() if params.occ_track is not None: self.getOcc() self.findAllNucs() self.fit() self.makeInsertionTrack() def removeData(self): """remove data from chunk-- deletes all attributes""" names = self.__dict__.keys() for name in names: delattr(self,name)