示例#1
0
def cigar2reflen(cigar):
    """infer ref length from cigar struct or string

    >>> cigar2reflen("10M")
    10
    >>> cigar2reflen("10M1S")
    10
    >>> cigar2reflen("1S10M")
    10
    >>> cigar2reflen("1S10M1S")
    10
    >>> cigar2reflen("1S5M1D5M1S")
    11
    >>> cigar2reflen("1S5M1I5M1S")
    10
    """

    cig = chimeras2.cigar_to_list(cigar)

    rlen = 0
    #ref_consuming = 'MDN=X'
    for (op_len, op_type) in cig:
        if op_type in Cigar.ref_consuming_ops:
            rlen += op_len
    return rlen
示例#2
0
def cigar2reflen(cigar):
    """infer ref length from cigar struct or string

    >>> cigar2reflen("10M")
    10
    >>> cigar2reflen("10M1S")
    10
    >>> cigar2reflen("1S10M")
    10
    >>> cigar2reflen("1S10M1S")
    10
    >>> cigar2reflen("1S5M1D5M1S")
    11
    >>> cigar2reflen("1S5M1I5M1S")
    10
    """

    cig = chimeras2.cigar_to_list(cigar)

    rlen = 0
    #ref_consuming = 'MDN=X'
    for (op_len, op_type) in cig:
        if op_type in Cigar.ref_consuming_ops:
            rlen += op_len
    return rlen
示例#3
0
def cigar2querylen(cigar):
    """Determine length of query sequence from cigar string
    """

    cig = chimeras2.cigar_to_list(cigar)
    qlen = 0
    for (op_len, op_type) in cig:
        assert not isinstance(op_type, int), (
            "Do not understand cigar {}. Might be from pysam?".format(op_type))
        if op_type in ['M', 'I', '=', 'X']:
            # FIXME check SAM spec for more
            qlen += op_len
    return qlen
示例#4
0
def cigar2querylen(cigar):
    """Determine length of query sequence from cigar string
    """

    cig = chimeras2.cigar_to_list(cigar)
    qlen = 0
    for (op_len, op_type) in cig:
        assert not isinstance(op_type, int), (
            "Do not understand cigar {}. Might be from pysam?".format(op_type))
        if op_type in ['M', 'I', '=', 'X']:
            # FIXME check SAM spec for more
            qlen += op_len
    return qlen
示例#5
0
def query_aln_seq(queryseq, cigar):
    """Infer aligned bit of query sequence from cigar

    >>> query_aln_seq("ACGTACGT", "8M")
    'ACGTACGT'
    >>> query_aln_seq("ACGTACGT", "1M7S")
    'A'
    >>> query_aln_seq("ACGTACGT", "6S2M")
    'GT'
    >>> query_aln_seq("ACGTACGT", "6S2M")
    'GT'
    >>> query_aln_seq("ACGTACGT", "2S4M2S")
    'GTAC'
    >>> query_aln_seq("ACGTACGT", "2S1M2I1M2S")
    'GTAC'
    """

    cig = chimeras2.cigar_to_list(cigar)

    start = 0
    for (op_len, op_type) in cig:
        if op_type == 'S':
            start += op_len
        else:
            break

    end = len(queryseq)
    for (op_len, op_type) in cig[::-1]:
        if op_type == 'S':
            end -= op_len
        else:
            break

    assert end > 0 and start < end
    assert cigar2querylen(cig) == end - start, ("{} != {}".format(
        cigar2querylen(cig), end - start))

    return queryseq[start:end]
示例#6
0
def query_aln_seq(queryseq, cigar):
    """Infer aligned bit of query sequence from cigar

    >>> query_aln_seq("ACGTACGT", "8M")
    'ACGTACGT'
    >>> query_aln_seq("ACGTACGT", "1M7S")
    'A'
    >>> query_aln_seq("ACGTACGT", "6S2M")
    'GT'
    >>> query_aln_seq("ACGTACGT", "6S2M")
    'GT'
    >>> query_aln_seq("ACGTACGT", "2S4M2S")
    'GTAC'
    >>> query_aln_seq("ACGTACGT", "2S1M2I1M2S")
    'GTAC'
    """

    cig = chimeras2.cigar_to_list(cigar)

    start = 0
    for (op_len, op_type) in cig:
        if op_type == 'S':
            start += op_len
        else:
            break

    end = len(queryseq)
    for (op_len, op_type) in cig[::-1]:
        if op_type == 'S':
            end -= op_len
        else:
            break

    assert end > 0 and start < end
    assert cigar2querylen(cig) == end-start, ("{} != {}".format(cigar2querylen(cig), end-start))

    return queryseq[start:end]