Exemplo n.º 1
0
def genrefblks(readseq, chrom, start, stop, strand, cigar, nreads):
    refpos = start
    readpos = 0
    if strand == '-':
        readseq = reverse_complement(readseq)

    tleftlim, trightlim = start + ENDTRIM, stop - ENDTRIM
    qleftlim, qrightlim = ENDTRIM, len(readseq) - ENDTRIM

    cigarcommands = cigar_pattern.findall(cigar)
    if cigarcommands[0][1] == 'S': # shift start site for the first soft clipping
        start -= int(cigarcommands[0][0])
    if len(cigarcommands) > 1 and cigarcommands[-1][1] == 'S': # last soft clipping
        stop += int(cigarcommands[-1][0])

    for num, cmd in cigarcommands:
        num = int(num)
        if cmd == 'M': # match
            mleft = max(qleftlim, readpos)
            mright = min(qrightlim, readpos + num)
            if mleft < mright:
                seq = readseq[mleft:mright]
                yield ('M', nreads, max(refpos, tleftlim), seq)
            refpos += num
            readpos += num
        elif cmd == 'S': # soft clip
            readpos += num
        elif cmd == 'N': # skip
            refpos += num
        elif cmd == 'D': # deletion
            if tleftlim <= refpos < trightlim:
                yield ('D', nreads, refpos, num)
            refpos += num
        elif cmd == 'I': # insertion
            ppos = (refpos if strand == '+' else (refpos-1))
            if tleftlim <= ppos < trightlim:
                yield ('I', nreads, ppos, num)
            readpos += num
        elif cmd == 'H': # hard clipping
            pass
        else:
            yield ('E', nreads, num, cmd, readseq)
            raise ValueError

    if strand == '+':
        fivep, threep = start, stop-1
    else:
        fivep, threep = stop-1, start

    yield ('5', nreads, fivep)
    yield ('3', nreads, threep)
Exemplo n.º 2
0
    def get_refseq(self, chrom, start, stop): # with automatic (-) strand detection
        if start >= 0:
            return self.seqs.get(chrom, start, stop).upper()

        start, stop = -stop, -start
        return sequtils.reverse_complement(self.seqs.get(chrom, start, stop).upper())