def clip_primer_overlap_bases(alignedread, primer_overlap_peak_pos, primer_len): """Clip alignread's bases overlapping with primer pos by changing cigar string """ if alignedread.is_reverse: # WARNING: partial code duplication with else branch # # this is a reverse read overlapping with it's 5p end, # so starting from there clip bases, skipping any # non-match ops. do this p+primer_len - # alignedread.pos + 1 times num_clip = primer_overlap_peak_pos + primer_len - alignedread.pos + 1 LOG.debug("rv read starting at %d overlapping with fw peak start %d+%d by %d: %s" % ( alignedread.pos, primer_overlap_peak_pos, primer_len, num_clip, alignedread)) new_cigar_decoded = [] for op in cigar.decoded_ops(alignedread.cigar): if num_clip == 0 or op == 'D': new_cigar_decoded.append(op) else: # replace everything else with S new_cigar_decoded.append('S') if op in 'MI=X': num_clip -= 1 else: # WARNING: partial code duplication with if branch # # this is a forward read overlapping with its' 3p end, so # starting from there clip bases, skipping any non-match # ops. do this p+primer_len - alignedread.pos + 1 times num_clip = alignedread.aend - primer_overlap_peak_pos + primer_len + 1 LOG.debug("fw read ending at %d overlapping with rv peak end %d-%d by %d: %s" % ( alignedread.aend, primer_overlap_peak_pos, primer_len, num_clip, alignedread)) new_cigar_decoded = [] for op in list(cigar.decoded_ops(alignedread.cigar))[::-1]: if num_clip == 0 or op == 'D': new_cigar_decoded.insert(0, op) else: # replace everything else with S new_cigar_decoded.insert(0, 'S') if op in 'MI=X': num_clip -= 1 new_cigar = cigar.parse( cigar.cigar_from_decoded_ops(new_cigar_decoded)) # According to spec http://samtools.sourceforge.net/SAM1.pdf: # "Sum of lengths of the M/I/S/=/X operations shall equal the # length of SEQ" cigar_len = sum([1 for op in new_cigar_decoded if op in 'MISX=']) assert cigar_len == alignedread.rlen, ( "read length derived from new cigar (%s -> %d) mismatches rlen=%d for %s read: %s" % ( new_cigar, cigar_len, alignedread.rlen, "reverse" if alignedread.is_reverse else "forward", alignedread)) alignedread.cigar = new_cigar
def print_alignedread_info(alignedread): """Debugging only""" print "- pos, aend-1, alen = %d %d %s" % ( alignedread.pos, alignedread.aend-1, alignedread.alen) print "- cigar = %s" % (alignedread.cigar) print "- decoded cigar = %s" % ( ''.join(list(cigar.decoded_ops(alignedread.cigar)))) try: alnlen = cigar.aligned_length(alignedread.cigar) except ValueError: alnlen = -1 print "- cigar aln-len = %d" % (alnlen) print "- read sequence = %s" % (alignedread.query) print "- aligned parts = %s" % (alignedread.query)