Пример #1
0
 def test_read_and_get(self):
     """test the read and get functionality of track class"""
     track = Track(self.chunk.chrom, self.chunk.start, self.chunk.end)
     track.read_track("example/example.Scores.bedgraph.gz")
     val = 1.35994655714
     self.assertTrue(abs(val - track.get(pos=706661)) < 0.001)
Пример #2
0
class NFRChunk(Chunk):
    """Class for storing and determining collection of nfr positions
    """
    def __init__(self, chunk):
        self.start = chunk.start
        self.end = chunk.end
        self.chrom = chunk.chrom
        self.nfrs = []

    def initialize(self, parameters):
        self.params = parameters

    def getOcc(self):
        """gets occupancy track-- reads in from bedgraph"""
        self.occ = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ.read_track(self.params.occ_track)
        #lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz'
        #self.occ_lower = Track(self.chrom,self.start,self.end,"Occupancy")
        #self.occ_lower.read_track(lower_file)
        upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz'
        self.occ_upper = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ_upper.read_track(upper_file)

    def getIns(self):
        """gets insertion track-- reads in from bedgraph"""
        if self.params.ins_track is None:
            self.ins = InsertionTrack(self.chrom, self.start, self.end)
            self.ins.calculateInsertions(self.params.bam)
        else:
            self.ins = Track(self.chrom, self.start, self.end, "Insertion")
            self.ins.read_track(self.params.ins_track)

    def getBias(self):
        """get bias"""
        self.bias = InsertionBiasTrack(self.chrom,
                                       self.start,
                                       self.end,
                                       log=True)
        if self.params.fasta is not None:
            self.bias.computeBias(self.params.fasta, self.params.chrs,
                                  self.params.pwm)

    def findNFRs(self):
        """find NFR regions"""
        region = np.ones(self.length())
        tbx = pysam.TabixFile(self.params.calls)
        nucs = []
        if self.chrom in tbx.contigs:
            for row in tbx.fetch(self.chrom,
                                 self.start,
                                 self.end,
                                 parser=pysam.asTuple()):
                nucs.append(int(row[1]))
        for j in xrange(1, len(nucs)):
            left = nucs[j - 1] + 73
            right = nucs[j] - 72
            if right <= left:
                continue
            candidate = NFR(left, right, self)
            if candidate.min_upper < self.params.max_occ_upper and candidate.occ < self.params.max_occ:
                self.nfrs.append(candidate)

    def process(self, params):
        """wrapper to carry out all methods needed to call nucleosomes and nfrs"""
        self.initialize(params)
        self.getOcc()
        self.getIns()
        self.getBias()
        self.findNFRs()

    def removeData(self):
        """remove data from chunk-- deletes all attributes"""
        names = self.__dict__.keys()
        for name in names:
            delattr(self, name)
Пример #3
0
class NucChunk(Chunk):
    """Class for storing and determining collection of nucleosome positions
    """
    def __init__(self, chunk):
        self.start = chunk.start
        self.end = chunk.end
        self.chrom = chunk.chrom

    def initialize(self, parameters):
        self.params = parameters

    def getFragmentMat(self):
        self.mat = FragmentMat2D(
            self.chrom,
            self.start - max(self.params.window, self.params.upper // 2 + 1),
            self.end + max(self.params.window, self.params.upper // 2 + 1),
            0,
            self.params.upper,
            atac=self.params.atac)
        self.mat.makeFragmentMat(self.params.bam)

    def makeBiasMat(self):
        self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
                                  self.end + self.params.window, 0,
                                  self.params.upper)
        bias_track = InsertionBiasTrack(
            self.chrom,
            self.start - self.params.window - self.params.upper // 2,
            self.end + self.params.window + self.params.upper // 2 + 1,
            log=True)
        if self.params.fasta is not None:
            bias_track.computeBias(self.params.fasta, self.params.chrs,
                                   self.params.pwm)
            self.bias_mat.makeBiasMat(bias_track)
        self.bias_mat_prenorm = BiasMat2D(self.chrom,
                                          self.start - self.params.window,
                                          self.end + self.params.window, 0,
                                          self.params.upper)
        self.bias_mat_prenorm.mat = copy(self.bias_mat.mat)
        self.bias_mat.normByInsertDist(self.params.fragmentsizes)

    def getNucSignal(self):
        """Gets Nucleosome Signal Track"""
        self.nuc_cov = CoverageTrack(self.chrom, self.start, self.end)
        self.nuc_cov.calculateCoverage(self.mat, self.params.lower,
                                       self.params.upper, self.params.window)
        self.bias = BiasTrack(self.chrom, self.start, self.end)
        self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat,
                                            self.nuc_cov)
        self.nuc_signal = SignalTrack(self.chrom, self.start, self.end)
        self.nuc_signal.calculateSignal(self.mat, self.params.vmat)
        self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end)
        self.norm_signal.calculateNormSignal(self.nuc_signal, self.bias)

    def getNFR(self):
        """get number of reads of sub-nucleosomal length"""
        self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end)
        self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower,
                                       self.params.window)

    def smoothSignal(self):
        """Smooth thenormalized signal track"""
        window_len = 6 * self.params.smooth_sd + 1
        self.smoothed = Track(self.chrom, self.start, self.end,
                              "Smooth Signal")
        tmp = copy(self.norm_signal.vals)
        self.smoothed.assign_track(tmp)
        self.smoothed.vals[self.smoothed.vals < 0] = 0
        self.smoothed.smooth_track(window_len,
                                   window="gaussian",
                                   sd=self.params.smooth_sd,
                                   mode='same',
                                   norm=True)

    def getOcc(self):
        """gets occupancy track-- either reads in from bw handle given, or makes new"""
        self.occ = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ.read_track(self.params.occ_track)
        lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz'
        self.occ_lower = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ_lower.read_track(lower_file)
        upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz'
        self.occ_upper = Track(self.chrom, self.start, self.end, "Occupancy")
        self.occ_upper.read_track(upper_file)

    def findAllNucs(self):
        """Find peaks in data"""
        self.nuc_collection = {}
        combined = self.norm_signal.vals + self.smoothed.vals
        #find peaks in normalized sigal
        cands1 = call_peaks(combined,
                            min_signal=0,
                            sep=self.params.redundant_sep,
                            boundary=self.params.nonredundant_sep // 2,
                            order=self.params.redundant_sep // 2)
        for i in cands1:
            nuc = Nucleosome(i + self.start, self)
            if nuc.nuc_cov > self.params.min_reads:
                nuc.getLR(self)
                if nuc.lr > self.params.min_lr:
                    nuc.getZScore(self)
                    if nuc.z >= self.params.min_z:
                        nuc.getOcc(self)
                        self.nuc_collection[i] = nuc
        self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
        self.nonredundant = reduce_peaks(
            self.sorted_nuc_keys,
            [self.nuc_collection[x].z
             for x in self.sorted_nuc_keys], self.params.nonredundant_sep)
        self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)

    def fit(self):
        x = np.linspace(0, self.length() - 1, self.length())
        fit = np.zeros(self.length())
        for nuc in self.sorted_nuc_keys:
            self.nuc_collection[nuc].getFuzz(self)
            fit += norm(x, self.nuc_collection[nuc].fuzz**2,
                        self.nuc_collection[nuc].weight,
                        self.nuc_collection[nuc].fit_pos)
        self.fitted = Track(self.chrom, self.start, self.end,
                            "Fitted Nucleosome Signal")
        self.fitted.assign_track(fit)

    def makeInsertionTrack(self):
        """make insertion track for chunk"""
        self.ins = self.mat.getIns()

    def process(self, params):
        """wrapper to carry out all methods needed to call nucleosomes and nfrs"""
        self.initialize(params)
        self.getFragmentMat()
        self.makeBiasMat()
        self.getNucSignal()
        self.getNFR()
        self.smoothSignal()
        if params.occ_track is not None:
            self.getOcc()
        self.findAllNucs()
        self.fit()
        self.makeInsertionTrack()

    def removeData(self):
        """remove data from chunk-- deletes all attributes"""
        names = list(self.__dict__.keys())
        for name in names:
            delattr(self, name)
Пример #4
0
 def test_read_and_get(self):
     """test the read and get functionality of track class"""
     track = Track(self.chunk.chrom, self.chunk.start, self.chunk.end)
     track.read_track('example/example.Scores.bedgraph.gz')
     val = 1.35994655714
     self.assertTrue(abs(val - track.get(pos=706661)) < 0.001)
Пример #5
0
class NucChunk(Chunk):
    """Class for storing and determining collection of nucleosome positions
    """
    def __init__(self, chunk):
        self.start = chunk.start
        self.end = chunk.end
        self.chrom = chunk.chrom
    def initialize(self, parameters):
        self.params = parameters
    def getFragmentMat(self):
        self.mat = FragmentMat2D(self.chrom, self.start - max(self.params.window,self.params.upper/2+1),
                                 self.end + max(self.params.window,self.params.upper/2+1), 0, self.params.upper, atac = self.params.atac)
        self.mat.makeFragmentMat(self.params.bam)
    def makeBiasMat(self):
        self.bias_mat = BiasMat2D(self.chrom, self.start - self.params.window,
                                 self.end + self.params.window, 0, self.params.upper)
        bias_track = InsertionBiasTrack(self.chrom, self.start - self.params.window - self.params.upper/2,
                                  self.end + self.params.window + self.params.upper/2 + 1, log = True)
        if self.params.fasta is not None:
            bias_track.computeBias(self.params.fasta, self.params.chrs, self.params.pwm)
            self.bias_mat.makeBiasMat(bias_track)
        self.bias_mat_prenorm = BiasMat2D(self.chrom, self.start - self.params.window,
                                 self.end + self.params.window, 0, self.params.upper)
        self.bias_mat_prenorm.mat = copy(self.bias_mat.mat)
        self.bias_mat.normByInsertDist(self.params.fragmentsizes)
    def getNucSignal(self):
        """Gets Nucleosome Signal Track"""
        self.nuc_cov = CoverageTrack(self.chrom, self.start,
                                     self.end)
        self.nuc_cov.calculateCoverage(self.mat, self.params.lower, self.params.upper,
                                        self.params.window)
        self.bias = BiasTrack(self.chrom, self.start,
                                     self.end)
        self.bias.calculateBackgroundSignal(self.bias_mat, self.params.vmat, self.nuc_cov)
        self.nuc_signal = SignalTrack(self.chrom, self.start,
                                     self.end)
        self.nuc_signal.calculateSignal(self.mat, self.params.vmat)
        self.norm_signal = NormSignalTrack(self.chrom, self.start, self.end)
        self.norm_signal.calculateNormSignal(self.nuc_signal,self.bias)
    def getNFR(self):
        """get number of reads of sub-nucleosomal length"""
        self.nfr_cov = CoverageTrack(self.chrom, self.start, self.end)
        self.nfr_cov.calculateCoverage(self.mat, 0, self.params.lower,
                                                        self.params.window)
    def smoothSignal(self):
        """Smooth thenormalized signal track"""
        window_len = 6 * self.params.smooth_sd + 1
        self.smoothed = Track(self.chrom,self.start,self.end, "Smooth Signal")
        tmp = copy(self.norm_signal.vals)
        self.smoothed.assign_track(tmp)
        self.smoothed.vals[ self.smoothed.vals < 0] = 0
        self.smoothed.smooth_track(window_len, window = "gaussian",
                             sd = self.params.smooth_sd, mode = 'same',
                             norm = True)
    def getOcc(self):
        """gets occupancy track-- either reads in from bw handle given, or makes new"""
        self.occ = Track(self.chrom,self.start,self.end,"Occupancy")
        self.occ.read_track(self.params.occ_track)
        lower_file = self.params.occ_track[:-11] + 'lower_bound.bedgraph.gz'
        self.occ_lower = Track(self.chrom,self.start,self.end,"Occupancy")
        self.occ_lower.read_track(lower_file)
        upper_file = self.params.occ_track[:-11] + 'upper_bound.bedgraph.gz'
        self.occ_upper = Track(self.chrom,self.start,self.end,"Occupancy")
        self.occ_upper.read_track(upper_file)
    def findAllNucs(self):
        """Find peaks in data"""
        self.nuc_collection = {}
        combined = self.norm_signal.vals + self.smoothed.vals
        #find peaks in normalized sigal
        cands1 = call_peaks(combined, min_signal = 0,
                                sep = self.params.redundant_sep,
                                boundary = self.params.nonredundant_sep/2, order = self.params.redundant_sep/2)
        for i in cands1:
            nuc = Nucleosome(i + self.start, self)
            if nuc.nuc_cov > self.params.min_reads:
                nuc.getLR(self)
                if nuc.lr > self.params.min_lr:
                    nuc.getZScore(self)
                    if nuc.z >= self.params.min_z:
                        nuc.getOcc(self)
                        self.nuc_collection[i] = nuc
        self.sorted_nuc_keys = np.array(sorted(self.nuc_collection.keys()))
        self.nonredundant = reduce_peaks( self.sorted_nuc_keys,
                                            map(lambda x: self.nuc_collection[x].z, self.sorted_nuc_keys),
                                                self.params.nonredundant_sep)
        self.redundant = np.setdiff1d(self.sorted_nuc_keys, self.nonredundant)
    def fit(self):
        x = np.linspace(0,self.length() -1, self.length())
        fit = np.zeros(self.length())
        for nuc in self.sorted_nuc_keys:
            self.nuc_collection[nuc].getFuzz(self)
            fit += norm(x,self.nuc_collection[nuc].fuzz**2, self.nuc_collection[nuc].weight, self.nuc_collection[nuc].fit_pos)
        self.fitted = Track(self.chrom, self.start, self.end,
                            "Fitted Nucleosome Signal")
        self.fitted.assign_track(fit)
    def makeInsertionTrack(self):
        """make insertion track for chunk"""
        self.ins = self.mat.getIns()
    def process(self, params):
        """wrapper to carry out all methods needed to call nucleosomes and nfrs"""
        self.initialize(params)
        self.getFragmentMat()
        self.makeBiasMat()
        self.getNucSignal()
        self.getNFR()
        self.smoothSignal()
        if params.occ_track is not None:
            self.getOcc()
        self.findAllNucs()
        self.fit()
        self.makeInsertionTrack()
    def removeData(self):
        """remove data from chunk-- deletes all attributes"""
        names = self.__dict__.keys()
        for name in names:
            delattr(self,name)