示例#1
0
文件: bias.py 项目: wzthu/NucleoATAC
 def computeBias(self, fasta, chromDict, pwm):
     """compute bias track based on sequence and pwm"""
     self.slop(chromDict, up=pwm.up, down=pwm.down)
     sequence = seq.get_sequence(self, fasta)
     seqmat = seq.seq_to_mat(sequence, pwm.nucleotides)
     self.vals = signal.correlate(seqmat, np.log(pwm.mat), mode='valid')[0]
     self.start += pwm.up
     self.end -= pwm.down
示例#2
0
 def computeBias(self, fasta, chromDict, pwm):
     """compute bias track based on sequence and pwm"""
     self.slop(chromDict, up = pwm.up, down = pwm.down)
     sequence = seq.get_sequence(self, fasta)
     seqmat = seq.seq_to_mat(sequence, pwm.nucleotides)
     self.vals = signal.correlate(seqmat,np.log(pwm.mat),mode='valid')[0]
     self.start += pwm.up
     self.end -= pwm.down
示例#3
0
 def getInsertionSequences(self, fasta, nucleotides=["C", "G", "A", "T"], up=10, down=10):
     """Get sequence content at insertions"""
     mat = np.zeros((len(nucleotides), up + down + 1))
     if np.sum(self.vals) == 0:
         return mat
     offset = max(up, down)
     seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset)
     sequence = get_sequence(seq_chunk, fasta)
     seq_mat = seq_to_mat(sequence, nucleotides)
     for i in range(self.length()):
         mat += self.vals[i] * seq_mat[:, (offset + i - up) : (offset + i + down + 1)]
     return mat
示例#4
0
 def getInsertionSequences(self, fasta, nucleotides = ["C","G","A","T"], up = 10, down = 10):
     """Get sequence content at insertions"""
     mat = np.zeros((len(nucleotides), up + down +1))
     if np.sum(self.vals) == 0:
         return mat
     offset = max(up,down)
     seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset)
     sequence = get_sequence(seq_chunk, fasta)
     seq_mat = seq_to_mat(sequence, nucleotides)
     for i in range(self.length()):
         mat += self.vals[i] * seq_mat[:,(offset + i - up):(offset + i + down + 1)]
     return mat
示例#5
0
 def getStrandedInsertionSequences(self, fasta, nucleotides=["C", "G", "A", "T"], up=10, down=10):
     """Get sequence content at insertions, taking into account strand"""
     mat = np.zeros((len(nucleotides), up + down + 1))
     if np.sum(self.vals) == 0:
         return mat
     offset = max(up, down)
     seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset)
     sequence = get_sequence(seq_chunk, fasta)
     minus_sequence = complement(sequence)
     seq_mat = seq_to_mat(sequence, nucleotides)
     minus_seq_mat = seq_to_mat(minus_sequence, nucleotides)
     for i in range(self.length()):
         mat += self.plus[i] * seq_mat[:, (offset + i - up) : (offset + i + down + 1)]
         mat += self.minus[i] * np.fliplr(minus_seq_mat[:, (offset + i - down) : (offset + i + up + 1)])
     return mat
示例#6
0
 def getStrandedInsertionSequences(self, fasta, nucleotides = ["C","G","A","T"], up = 10, down = 10):
     """Get sequence content at insertions, taking into account strand"""
     mat = np.zeros((len(nucleotides), up + down +1))
     if np.sum(self.vals) == 0:
         return mat
     offset = max(up,down)
     seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset)
     sequence = get_sequence(seq_chunk, fasta)
     minus_sequence = complement(sequence)
     seq_mat = seq_to_mat(sequence, nucleotides)
     minus_seq_mat = seq_to_mat(minus_sequence, nucleotides)
     for i in range(self.length()):
         mat += self.plus[i] * seq_mat[:,(offset + i - up):(offset + i + down + 1)]
         mat += self.minus[i] * np.fliplr(minus_seq_mat[:,(offset + i - down):(offset + i + up + 1)])
     return mat
示例#7
0
def _nucleotideHelper(arg):
    """Helper function for multiprocessing acquisition of sequence content around sites"""
    (chunks, params) = arg
    mat = np.zeros(params.matsize)
    n = 0.0
    try:
        for chunk in chunks:
            chunk.center()
            chunk.slop(chromDict = params.chrs, up = params.up, down = params.down + params.dinucleotide)
            sequence = seq.get_sequence(chunk, params.fasta)
            submat = seq.seq_to_mat(sequence, params.nucleotides)
            if len(sequence) == (params.up + params.down + 1 + params.dinucleotide):
                mat += submat
                n += 1
    except Exception as e:
        print('Caught exception when processing:\n'+  chunk.asBed()+'\n')
        traceback.print_exc()
        print()
        raise e
    return mat,n
示例#8
0
def _nucleotideHelper(arg):
    """Helper function for multiprocessing acquisition of sequence content around sites"""
    (chunks, params) = arg
    mat = np.zeros(params.matsize)
    n = 0.0
    try:
        for chunk in chunks:
            chunk.center()
            chunk.slop(chromDict=params.chrs,
                       up=params.up,
                       down=params.down + params.dinucleotide)
            sequence = seq.get_sequence(chunk, params.fasta)
            submat = seq.seq_to_mat(sequence, params.nucleotides)
            if len(sequence) == (params.up + params.down + 1 +
                                 params.dinucleotide):
                mat += submat
                n += 1
    except Exception as e:
        print(('Caught exception when processing:\n' + chunk.asBed() + '\n'))
        traceback.print_exc()
        print()
        raise e
    return mat, n