def testInsertionQualityJustEnough(self): ins1 = {2: ('CCC', 'JJJ')} ins2 = {10: ('GGG', 'JBJ')} expected_merged = {2: 'CCC', 10: 'GGG'} merged = merge_inserts(ins1, ins2, q_cutoff=32) self.assertEqual(expected_merged, merged)
def testNone(self): ins1 = None ins2 = None expected_merged = {} merged = merge_inserts(ins1, ins2) self.assertEqual(expected_merged, merged)
def testConflictingInsertionsTooCloseInQuality(self): ins1 = {2: ('CCC', 'JJJ')} ins2 = {2: ('CTC', 'JAJ')} expected_merged = {2: 'CNC'} merged = merge_inserts(ins1, ins2, minimum_q_delta=20) self.assertEqual(expected_merged, merged)
def testInsertionQualityTooLowReverse(self): ins1 = {2: ('CCC', 'JJJ')} ins2 = {10: ('GGG', 'JAJ')} expected_merged = {2: 'CCC'} merged = merge_inserts(ins1, ins2, q_cutoff=32) self.assertEqual(expected_merged, merged)
def testConflictingInsertions(self): ins1 = {2: ('CCC', 'JJJ')} ins2 = {2: ('CTC', 'JAJ')} expected_merged = {2: 'CCC'} merged = merge_inserts(ins1, ins2) self.assertEqual(expected_merged, merged)
def testIdenticalInsertions(self): ins1 = {2: ('CCC', 'JJJ')} ins2 = {2: ('CCC', 'JJJ')} expected_merged = {2: 'CCC'} merged = merge_inserts(ins1, ins2) self.assertEqual(expected_merged, merged)
def testSeparateInsertions(self): ins1 = {2: ('CCC', 'JJJ')} ins2 = {10: ('GGG', 'JJJ')} expected_merged = {2: 'CCC', 10: 'GGG'} merged = merge_inserts(ins1, ins2) self.assertEqual(expected_merged, merged)
def merge_reads(quality_cutoff, read_pair): """ Merge a pair of reads. Also skip reads that don't meet certain criteria. @param quality_cutoff: minimum quality score for a base to be counted @param read_pair: a sequence of two sequences, each with fields from a SAM file record @return: (rname, mseq, merged_inserts, qual1, qual2) or None to skip the pair """ read1, read2 = read_pair if read2 and read1[2] != read2[2]: # region mismatch, ignore the read pair. return None filtered_reads = [] rname = None for read in read_pair: if not read: continue (_qname, flag, rname, refpos_str, _mapq, cigar, _rnext, _pnext, _tlen, seq, qual) = read[:11] # ignore optional fields if is_unmapped_read(flag): continue filtered_reads.append(dict(rname=rname, cigar=cigar, seq=seq, qual=qual, pos=int(refpos_str))) if not filtered_reads: return None seq1, qual1, ins1 = apply_cigar(filtered_reads[0]['cigar'], filtered_reads[0]['seq'], filtered_reads[0]['qual'], filtered_reads[0]['pos']-1) if len(filtered_reads) == 1: seq2 = qual2 = '' ins2 = None else: seq2, qual2, ins2 = apply_cigar(filtered_reads[1]['cigar'], filtered_reads[1]['seq'], filtered_reads[1]['qual'], filtered_reads[1]['pos']-1) mseq = merge_pairs(seq1, seq2, qual1, qual2, q_cutoff=quality_cutoff) merged_inserts = merge_inserts(ins1, ins2, quality_cutoff) return rname, mseq, merged_inserts, qual1, qual2
def merge_reads(quality_cutoff, read_pair): """ Merge a pair of reads. Also skip reads that don't meet certain criteria. @param quality_cutoff: minimum quality score for a base to be counted @param read_pair: a sequence of two sequences, each with fields from a SAM file record @return: (rname, mseq, merged_inserts, qual1, qual2) or None to skip the pair """ read1, read2 = read_pair if read2 and read1[2] != read2[2]: # region mismatch, ignore the read pair. return None filtered_reads = [] for read in read_pair: if not read: continue (_qname, flag, rname, refpos_str, _mapq, cigar, _rnext, _pnext, _tlen, seq, qual) = read[:11] # ignore optional fields if is_unmapped_read(flag): continue filtered_reads.append(dict(rname=rname, cigar=cigar, seq=seq, qual=qual, pos=int(refpos_str))) if not filtered_reads: return None seq1, qual1, ins1 = apply_cigar(filtered_reads[0]['cigar'], filtered_reads[0]['seq'], filtered_reads[0]['qual'], filtered_reads[0]['pos']-1) if len(filtered_reads) == 1: seq2 = qual2 = '' ins2 = None else: seq2, qual2, ins2 = apply_cigar(filtered_reads[1]['cigar'], filtered_reads[1]['seq'], filtered_reads[1]['qual'], filtered_reads[1]['pos']-1) mseq = merge_pairs(seq1, seq2, qual1, qual2, q_cutoff=quality_cutoff) merged_inserts = merge_inserts(ins1, ins2, quality_cutoff) return rname, mseq, merged_inserts, qual1, qual2