Exemplo n.º 1
0
def test_sanity(reads, n_frag, len_frag):
    '''Perform sanity checks on supposedly good reads'''
    if not test_integrity(reads):
        return False

    if not test_coh(reads):
        return False

    if not test_fragment_assignment(reads, n_frag, len_frag):
        return False

    return True
Exemplo n.º 2
0
def trim_primers(reads, frag_pos, include_tests=False):
    '''Trim inner primers
    
    - frag_pos are the coordinates of the fragment trimmed of inner primers
    '''
    # Note: this function is robust against fuzzy ends, i.e. it works also if the
    # insert reads into the adapters and crap like that.
    # Because we accept short inserts, the primer can also be at the end

    if include_tests:
        if test_integrity(reads):
            print 'trim_primers (entry):'
            import ipdb; ipdb.set_trace()

    tampered = False
    for read in reads:
        # FWD primer
        if read.pos < frag_pos[0]:
            tampered = True
            ref_pos = read.pos
            read_pos = 0
            cigar = read.cigar[::-1]
            for i, (bt, bl) in enumerate(read.cigar):
                if bt == 0:
                    # Strictly greater, because we dump the CIGAR ending at the
                    # right position. In the corner case, ref_pos == frag_pos[0]
                    # and we add the whole block (and do not move read_pos).
                    if ref_pos + bl > frag_pos[0]:
                        cigar[-1] = (bt, ref_pos + bl - frag_pos[0])
                        read_pos += frag_pos[0] - ref_pos
                        ref_pos = frag_pos[0]
                        break
                    cigar.pop(-1)
                    read_pos += bl
                    ref_pos += bl
                elif bt == 1:
                    cigar.pop(-1)
                    read_pos += bl
                elif bt == 2:
                    # Starting with a deletion is not allowed
                    cigar.pop(-1)
                    ref_pos += bl
                    if ref_pos > frag_pos[0]:
                        break
            cigar = cigar[::-1]

            # If you cut away everything, trash
            if not len(cigar):
                return True

            seq = read.seq
            qual = read.qual
            read.pos = ref_pos
            read.seq = seq[read_pos:]
            read.qual = qual[read_pos:]
            read.cigar = cigar

        # REV primer
        ref_pos = read.pos + sum(bl for (bt, bl) in read.cigar if bt in (0, 2))
        if ref_pos > frag_pos[1]:
            tampered = True
            read_pos = read.rlen
            cigar = read.cigar
            for i, (bt, bl) in enumerate(read.cigar[::-1]):
                if bt == 0:
                    # Strictly less, because we dump the CIGAR starting at the
                    # right position. In the corner case, ref_pos == frag_pos[1]
                    # and we add the whole block (and do not move read_pos).
                    if ref_pos - bl < frag_pos[1]:
                        cigar[-1] = (bt, frag_pos[1] - (ref_pos - bl))
                        read_pos -= ref_pos - frag_pos[1]
                        break
                    cigar.pop(-1)
                    read_pos -= bl
                    ref_pos -= bl
                elif bt == 1:
                    cigar.pop(-1)
                    read_pos -= bl
                elif bt == 2:
                    # Ending with a deletion is not allowed
                    cigar.pop(-1)
                    ref_pos -= bl
                    if ref_pos < frag_pos[1]:
                        break

            # If you cut away everything, trash
            if not len(cigar):
                return True

            seq = read.seq
            qual = read.qual
            read.seq = seq[:read_pos]
            read.qual = qual[:read_pos]
            read.cigar = cigar

    # Fix mate pair
    if tampered:
        i_fwd = reads[0].is_reverse
        i_rev = not i_fwd
        reads[i_fwd].mpos = reads[i_rev].pos
        reads[i_rev].mpos = reads[i_fwd].pos
        isize = reads[i_rev].pos + sum(bl for bt, bl in reads[i_rev].cigar
                                       if bt in (0, 2)) - reads[i_fwd].pos
        reads[i_fwd].isize = isize
        reads[i_rev].isize = -isize

    if include_tests:
        if test_integrity(reads):
            print 'trim_primers (exit):'
            import ipdb; ipdb.set_trace()

    return False