def computeBias(self, fasta, chromDict, pwm): """compute bias track based on sequence and pwm""" self.slop(chromDict, up=pwm.up, down=pwm.down) sequence = seq.get_sequence(self, fasta) seqmat = seq.seq_to_mat(sequence, pwm.nucleotides) self.vals = signal.correlate(seqmat, np.log(pwm.mat), mode='valid')[0] self.start += pwm.up self.end -= pwm.down
def computeBias(self, fasta, chromDict, pwm): """compute bias track based on sequence and pwm""" self.slop(chromDict, up = pwm.up, down = pwm.down) sequence = seq.get_sequence(self, fasta) seqmat = seq.seq_to_mat(sequence, pwm.nucleotides) self.vals = signal.correlate(seqmat,np.log(pwm.mat),mode='valid')[0] self.start += pwm.up self.end -= pwm.down
def getInsertionSequences(self, fasta, nucleotides=["C", "G", "A", "T"], up=10, down=10): """Get sequence content at insertions""" mat = np.zeros((len(nucleotides), up + down + 1)) if np.sum(self.vals) == 0: return mat offset = max(up, down) seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset) sequence = get_sequence(seq_chunk, fasta) seq_mat = seq_to_mat(sequence, nucleotides) for i in range(self.length()): mat += self.vals[i] * seq_mat[:, (offset + i - up) : (offset + i + down + 1)] return mat
def getInsertionSequences(self, fasta, nucleotides = ["C","G","A","T"], up = 10, down = 10): """Get sequence content at insertions""" mat = np.zeros((len(nucleotides), up + down +1)) if np.sum(self.vals) == 0: return mat offset = max(up,down) seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset) sequence = get_sequence(seq_chunk, fasta) seq_mat = seq_to_mat(sequence, nucleotides) for i in range(self.length()): mat += self.vals[i] * seq_mat[:,(offset + i - up):(offset + i + down + 1)] return mat
def getStrandedInsertionSequences(self, fasta, nucleotides=["C", "G", "A", "T"], up=10, down=10): """Get sequence content at insertions, taking into account strand""" mat = np.zeros((len(nucleotides), up + down + 1)) if np.sum(self.vals) == 0: return mat offset = max(up, down) seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset) sequence = get_sequence(seq_chunk, fasta) minus_sequence = complement(sequence) seq_mat = seq_to_mat(sequence, nucleotides) minus_seq_mat = seq_to_mat(minus_sequence, nucleotides) for i in range(self.length()): mat += self.plus[i] * seq_mat[:, (offset + i - up) : (offset + i + down + 1)] mat += self.minus[i] * np.fliplr(minus_seq_mat[:, (offset + i - down) : (offset + i + up + 1)]) return mat
def getStrandedInsertionSequences(self, fasta, nucleotides = ["C","G","A","T"], up = 10, down = 10): """Get sequence content at insertions, taking into account strand""" mat = np.zeros((len(nucleotides), up + down +1)) if np.sum(self.vals) == 0: return mat offset = max(up,down) seq_chunk = Chunk(self.chrom, self.start - offset, self.end + offset) sequence = get_sequence(seq_chunk, fasta) minus_sequence = complement(sequence) seq_mat = seq_to_mat(sequence, nucleotides) minus_seq_mat = seq_to_mat(minus_sequence, nucleotides) for i in range(self.length()): mat += self.plus[i] * seq_mat[:,(offset + i - up):(offset + i + down + 1)] mat += self.minus[i] * np.fliplr(minus_seq_mat[:,(offset + i - down):(offset + i + up + 1)]) return mat
def _nucleotideHelper(arg): """Helper function for multiprocessing acquisition of sequence content around sites""" (chunks, params) = arg mat = np.zeros(params.matsize) n = 0.0 try: for chunk in chunks: chunk.center() chunk.slop(chromDict = params.chrs, up = params.up, down = params.down + params.dinucleotide) sequence = seq.get_sequence(chunk, params.fasta) submat = seq.seq_to_mat(sequence, params.nucleotides) if len(sequence) == (params.up + params.down + 1 + params.dinucleotide): mat += submat n += 1 except Exception as e: print('Caught exception when processing:\n'+ chunk.asBed()+'\n') traceback.print_exc() print() raise e return mat,n
def _nucleotideHelper(arg): """Helper function for multiprocessing acquisition of sequence content around sites""" (chunks, params) = arg mat = np.zeros(params.matsize) n = 0.0 try: for chunk in chunks: chunk.center() chunk.slop(chromDict=params.chrs, up=params.up, down=params.down + params.dinucleotide) sequence = seq.get_sequence(chunk, params.fasta) submat = seq.seq_to_mat(sequence, params.nucleotides) if len(sequence) == (params.up + params.down + 1 + params.dinucleotide): mat += submat n += 1 except Exception as e: print(('Caught exception when processing:\n' + chunk.asBed() + '\n')) traceback.print_exc() print() raise e return mat, n