def cigar2reflen(cigar): """infer ref length from cigar struct or string >>> cigar2reflen("10M") 10 >>> cigar2reflen("10M1S") 10 >>> cigar2reflen("1S10M") 10 >>> cigar2reflen("1S10M1S") 10 >>> cigar2reflen("1S5M1D5M1S") 11 >>> cigar2reflen("1S5M1I5M1S") 10 """ cig = chimeras2.cigar_to_list(cigar) rlen = 0 #ref_consuming = 'MDN=X' for (op_len, op_type) in cig: if op_type in Cigar.ref_consuming_ops: rlen += op_len return rlen
def cigar2querylen(cigar): """Determine length of query sequence from cigar string """ cig = chimeras2.cigar_to_list(cigar) qlen = 0 for (op_len, op_type) in cig: assert not isinstance(op_type, int), ( "Do not understand cigar {}. Might be from pysam?".format(op_type)) if op_type in ['M', 'I', '=', 'X']: # FIXME check SAM spec for more qlen += op_len return qlen
def query_aln_seq(queryseq, cigar): """Infer aligned bit of query sequence from cigar >>> query_aln_seq("ACGTACGT", "8M") 'ACGTACGT' >>> query_aln_seq("ACGTACGT", "1M7S") 'A' >>> query_aln_seq("ACGTACGT", "6S2M") 'GT' >>> query_aln_seq("ACGTACGT", "6S2M") 'GT' >>> query_aln_seq("ACGTACGT", "2S4M2S") 'GTAC' >>> query_aln_seq("ACGTACGT", "2S1M2I1M2S") 'GTAC' """ cig = chimeras2.cigar_to_list(cigar) start = 0 for (op_len, op_type) in cig: if op_type == 'S': start += op_len else: break end = len(queryseq) for (op_len, op_type) in cig[::-1]: if op_type == 'S': end -= op_len else: break assert end > 0 and start < end assert cigar2querylen(cig) == end - start, ("{} != {}".format( cigar2querylen(cig), end - start)) return queryseq[start:end]
def query_aln_seq(queryseq, cigar): """Infer aligned bit of query sequence from cigar >>> query_aln_seq("ACGTACGT", "8M") 'ACGTACGT' >>> query_aln_seq("ACGTACGT", "1M7S") 'A' >>> query_aln_seq("ACGTACGT", "6S2M") 'GT' >>> query_aln_seq("ACGTACGT", "6S2M") 'GT' >>> query_aln_seq("ACGTACGT", "2S4M2S") 'GTAC' >>> query_aln_seq("ACGTACGT", "2S1M2I1M2S") 'GTAC' """ cig = chimeras2.cigar_to_list(cigar) start = 0 for (op_len, op_type) in cig: if op_type == 'S': start += op_len else: break end = len(queryseq) for (op_len, op_type) in cig[::-1]: if op_type == 'S': end -= op_len else: break assert end > 0 and start < end assert cigar2querylen(cig) == end-start, ("{} != {}".format(cigar2querylen(cig), end-start)) return queryseq[start:end]