Python EmbossStandalone примеры использования

Язык программирования: Python

Пространство имен/Пакет: pythonlib

Класс/Тип: EmbossStandalone

Примеров на hotexamples.com: 4

Python EmbossStandalone - 4 примера найдено. Это лучшие примеры Python кода для pythonlib.EmbossStandalone, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

needle(4)

Основные методы

needle (4)

Пример #1

Показать файл

Файл: variation.py Проект: ozagordi/old-svn-sandbox

def align_consensus(cons_file_1, cons_file_2):
    """ Align consensus.faa to each other and to the HIV reference
    """
    from pythonlib import EmbossStandalone
    from pythonlib.MarkxIO import Markx10Iterator

    needle_exe = 'needle'

    out_file = 'map_cons.needle'
    EmbossStandalone.needle(needle_exe,
                            cons_file_1,
                            cons_file_2,
                            out=out_file,
                            aglobal3='False')

    alignment = Markx10Iterator(open(out_file)).next()

    cons_1 = alignment.get_seq_by_num(0).tostring().upper()
    cons_2 = alignment.get_seq_by_num(1).tostring().upper()

    map = []
    for c in zip(cons_1, cons_2):
        assert not (c[0] == c[1] and c[1] == '-'), 'not two gaps'
        if c[0] == '-':
            map.append(1)
        elif c[1] == '-':
            map.append(2)
        else:
            map.append(0)

    return map

Пример #2

Показать файл

        length += float(len_seq)
        length2 += float(len_seq * len_seq)
        #        readdict[read.] = [seq,len_seq]
        n += 1.

meanlr = length / n
stdlr = math.sqrt((n * length2 - length * length) / (n * n - n))
allowed_length = [meanlr - acclength * stdlr, meanlr + (1 + acclength) * stdlr]
print >> sys.stderr, 'Allowed interval for length is', allowed_length

if not os.path.isfile('tmp_align_f.needle'):
    print >> sys.stderr, 'needle forward'
    EmbossStandalone.needle(needle_exe,
                            options.ref,
                            f_fasta_forward_filename,
                            out='tmp_align_f.needle',
                            gapopen=6.0,
                            gapext=3.0,
                            aglobal3='False')
"""
else:
    print >>sys.stderr, 'The alignment file tmp_align_f.needle is already present'
    statinfo = os.stat('tmp_align_f.needle')
    age_sec = time.time() - statinfo.st_mtime
    if age_sec > 3600:
        print >>sys.stderr, 'Warning: it was modified more than an hour ago'
    age = time.gmtime(age_sec)
    
    print >>sys.stderr, 'If you want to run the alignment again, remove it'
    print >>sys.stderr, "using existing 'tmp_align_f.needle'..."
"""

Пример #3

Показать файл

def align_reads(filename):
    """reads the file with reads, align them with the reference,
    returns a dictionary with reads (in-dels are discarded)
    and starting position with respect to the reference
    """
    from pythonlib import EmbossStandalone
    from pythonlib.MarkxIO import Markx10Iterator

    needle_exe = 'needle'

    aligned_reads = {}

    f_fasta = open(filename)
    seqlist = list(SeqIO.parse(f_fasta, 'fasta'))
    countreads = len(seqlist)

    # forward...
    f_fasta_forward_filename = 'tmp_reads_f.fas'
    f_fasta_forward = open(f_fasta_forward_filename, 'w')
    SeqIO.write(seqlist, f_fasta_forward, 'fasta')
    f_fasta_forward.close()

    # ...and reverse
    for seq in seqlist:
        seq.seq = seq.seq.reverse_complement()
    f_fasta.close()
    f_fasta_reverse_filename = 'tmp_reads_r.fas'
    f_fasta_reverse = open(f_fasta_reverse_filename, 'w')
    SeqIO.write(seqlist, f_fasta_reverse, 'fasta')
    f_fasta_reverse.close()

    print >> sys.stderr, 'Found', countreads, 'reads'

    if not os.path.isfile('tmp_align_f.needle'):
        print >> sys.stderr, 'needle forward'
        EmbossStandalone.needle(needle_exe,
                                ref_genome,
                                f_fasta_forward_filename,
                                out='tmp_align_f.needle',
                                gapopen=6.0,
                                gapext=3.0,
                                aglobal3='False',
                                adesshow3='True')

    if not os.path.isfile('tmp_align_r.needle'):
        print >> sys.stderr, 'needle backward'
        EmbossStandalone.needle(needle_exe,
                                ref_genome,
                                f_fasta_reverse_filename,
                                out='tmp_align_r.needle',
                                gapopen=6.0,
                                gapext=3.0,
                                aglobal3='False',
                                adesshow3='True')

    f_forward = open('tmp_align_f.needle')
    f_reverse = open('tmp_align_r.needle')

    forwardaligniter = Markx10Iterator(f_forward)
    reversealigniter = Markx10Iterator(f_reverse)
    count_forward = 0
    count_reverse = 0

    while True:

        # pos += 1
        # print >> sys.stderr,  '\x1B[1A\x1B[2K', pos
        try:
            f_align = forwardaligniter.next()
            r_align = reversealigniter.next()
        except:
            break

        if f_align is None or r_align is None:
            break

        assert f_align.get_all_seqs()[1].id == r_align.get_all_seqs(
        )[1].id, 'same seq back and forward'

        this_id = f_align.get_all_seqs()[1].id

        if float(f_align._annotations['sw_score']) > float(
                r_align._annotations['sw_score']):
            tmp = f_align.get_seq_by_num(1).tostring().upper()
            refseq = f_align.get_seq_by_num(0).tostring().upper()
            count_forward += 1
        else:
            tmp = r_align.get_seq_by_num(1).tostring().upper()
            refseq = r_align.get_seq_by_num(0).tostring().upper()
            count_reverse += 1

        q_align_start = len(tmp) - len(tmp.lstrip('-'))
        q_align_end = len(tmp.rstrip('-'))

        m_align_start = len(refseq) - len(refseq.lstrip('-'))
        m_align_end = len(refseq.rstrip('-'))

        align_start = max(m_align_start, q_align_start)
        align_end = min(m_align_end, q_align_end)

        this_read = []
        for c in zip(refseq[align_start:align_end + 1],
                     tmp[align_start:align_end + 1]):
            if c[0] != '-' and c[1] != '-':
                this_read.append(c[1])
            elif c[1] == '-':
                this_read.append(c[0])
            elif c[0] == '-':
                pass
        aligned_reads[this_id] = [''.join(this_read), align_start]

    return aligned_reads

Пример #4

Показать файл

def align_to_ref(al_exe, ref_file, reads_file, gen_length):
    """
    Calls water standalone program to align reads to reference genome
    """
    from pythonlib import EmbossStandalone
    import MyAlignIO
    import time

    max_read_length = 300
    format = 'markx10'
    align_file = '%s.needle' % reads_file.rstrip('.fas')
    out_reads = {}
    cov_prof = [0] * (2 * gen_length + max_read_length)

    if not os.path.isfile(align_file):
        print 'Aligning reads via Needleman-Wunsch algorithm'
        EmbossStandalone.needle(al_exe,
                                ref_file,
                                reads_file,
                                out=align_file,
                                gapopen=go_default,
                                gapext=ge_default,
                                aglobal3='False')
    else:
        print 'The alignment file', align_file, 'is already present'
        statinfo = os.stat(align_file)
        age_sec = time.time() - statinfo.st_mtime
        if age_sec > 3600:
            print 'Warning: it was modified more than an hour ago'
        age = time.gmtime(age_sec)

        print 'If you want to run the alignment again, remove it'

    assert os.path.isfile(align_file), 'File %s not found' % align_file
    handle = open(align_file, 'rU')
    print 'Parsing alignment output'

    for alin in MyAlignIO.parse(handle, format):
        assert len(alin.get_all_seqs()) == 2, "Should be pairwise!"
        alength = int(alin.get_alignment_length())
        #        print 'Alignment is', alength, 'bases long'

        record = iter(alin)

        # These are the information of the query sequence, i.e. the reference
        query_rec = record.next()
        assert query_rec.name == 'query', 'This should be the query'
        qstart = int(query_rec.annotations['al_start'])
        qstop = int(query_rec.annotations['al_stop'])

        gaps_query = 0
        qst = query_rec.seq.tostring()

        qls = list(qst)
        for c in qst.strip('-'):
            if c == '-':
                gaps_query = gaps_query + 1

        # These are for the matching sequences, i.e. the reads
        match_rec = record.next()
        assert match_rec.name == 'match', 'This should be the match'

        mst = match_rec.seq.tostring()

        mls = list(mst)

        for c in mls:
            if c != '-':
                mstart = mls.index(c) + 1
                break
        mstop = len(mst.rstrip('-'))

        # counts the gaps in the read (no flanking gaps)
        gaps_match = 0
        for c in mst.strip('-'):
            if c == '-':
                gaps_match = gaps_match + 1
        match_length = len(mst.strip('-'))
        if gaps_query + gaps_match > round(tolerance * match_length):
            # print 'too many indels,', (gaps_query + gaps_match)
            continue

        out_reads[match_rec.id] = [None, None, None, None, []]
        out_reads[
            match_rec.id][0] = qstart  # is this really useful at this time?
        out_reads[
            match_rec.id][1] = qstop  # is this really useful at this time?
        out_reads[match_rec.id][2] = mstart  # this is
        out_reads[match_rec.id][3] = mstop  # this too

        for i in range(mstart, mstop + 1):
            try:
                cov_prof[i] = cov_prof[i] + 1
            except IndexError:
                print 'out of coverage', i
        this_q = qls[mstart - 1:mstop]
        this_m = list(mst.strip('-'))

        assert len(this_q) == len(this_m), 'Length must be the same %d %d' % (
            len(this_q), len(this_m))

        amb_calls = 0

        # There are three possibilities: insertions, deletions, no in-dels
        for i in range(len(this_m)):

            if this_m[i] == '-' and this_q[i] != '-':
                out_reads[match_rec.id][4].append('-')

            if this_m[i] != '-' and this_q[i] == '-':
                pass

            if this_m[i] != '-' and this_q[i] != '-':
                out_reads[match_rec.id][4].append(this_m[i])

            # This should never happen
            if this_m[i] == '-' and this_q[i] == '-':
                print 'Should this happen?'
                sys.exit()

            if this_m[i] == 'N':
                amb_calls = amb_calls + 1
                if verbose:
                    print >> sys.stderr, 'Found an N in', match_rec.id

        if amb_calls > amb_thresh:
            if verbose:
                print 'Read', match_rec.id, 'has too many Ns'
            del out_reads[match_rec.id]
    cp = open('./%s.covprof' % reads_file.rstrip('_reads.fas'), 'w')
    for i in range(1, gen_length):
        cp.write('%i\t%i\n' % (i, cov_prof[i]))
    cp.close()

    return out_reads