Пример #1
0
def format_oligos(oligos, options):
    '''for output'''
    oligo_list = []
    for oligo in oligos:
	id = oligo['id']
	seq = oligo['seq'].upper()
	size = oligo['size']
        GC = chilli.cal_GC_content(seq, size)
	Tm = TmDeltaG.calTm(seq, Seq.complement(seq), mono_conc=options.mono_conc, diva_conc=options.diva_conc, dntp_conc=options.dntp_conc, oligo_conc=options.oligo_conc)
        GC = '%.1f' % GC
        Tm = '%.1f' % Tm
        oligo_list.append((id, seq, size, GC, Tm))

    return oligo_list
Пример #2
0
def tab_out(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    # amp_id, fp_id, rp_id, ppc, size, gc, fp_tm, fp_dg, rp_tm, rp_dg, seq, hit_id
    options.outfile.write(
        "AmpID\tFpID\tRpID\tHitID\tPPC\tSize\tAmpGC\tFpTm\tRpTm\tFpDg\tRpDg\tBindingStart\tBindingStop\tAmpSeq\n"
    )
    sn = 0
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1

        amp_len = amp['size']

        # p for plus, m for minus primer # History reason

        p_qid = amp['pid']
        f_3_pos = amp['f3_pos']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_tail = amp['p_tail']
        p_Tm = amp['p_Tm']
        p_DeltaG = amp['p_DeltaG']
        p_sb = f_3_pos - len(p_aseq) + 1

        m_qid = amp['mid']
        r_3_pos = amp['r3_pos']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_tail = amp['m_tail']
        m_Tm = amp['m_Tm']
        m_DeltaG = amp['m_DeltaG']
        m_se = r_3_pos + len(m_aseq)

        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = chilli.cal_GC_content(amp_seq, +amp_len)

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc

        options.outfile.write(
            "%d\t%s\t%s\t%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%d\t%d\t%s\n"
            % (sn, p_qid, m_qid, real_hid, ppc, amp_len, amp_GC, p_Tm, m_Tm,
               p_DeltaG, m_DeltaG, p_sb, m_se, amp_seq))
Пример #3
0
def tab_out(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    # amp_id, fp_id, rp_id, ppc, size, gc, fp_tm, fp_dg, rp_tm, rp_dg, seq, hit_id
    options.outfile.write("AmpID\tFpID\tRpID\tHitID\tPPC\tSize\tAmpGC\tFpTm\tRpTm\tFpDg\tRpDg\tBindingStart\tBindingStop\tAmpSeq\n")
    sn = 0
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1

        amp_len = amp['size']

	# p for plus, m for minus primer # History reason

        p_qid = amp['pid']
        f_3_pos = amp['f3_pos'] 
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_tail = amp['p_tail']
        p_Tm = amp['p_Tm']
        p_DeltaG = amp['p_DeltaG']
        p_sb = f_3_pos - len(p_aseq) + 1

        m_qid = amp['mid']
        r_3_pos = amp['r3_pos']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_tail = amp['m_tail']
        m_Tm = amp['m_Tm']
        m_DeltaG = amp['m_DeltaG']
        m_se = r_3_pos + len(m_aseq)

        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = chilli.cal_GC_content(amp_seq, + amp_len)

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc

	options.outfile.write("%d\t%s\t%s\t%s\t%s\t%d\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%d\t%d\t%s\n" % (sn, p_qid, m_qid, real_hid, ppc, amp_len, amp_GC, p_Tm, m_Tm, p_DeltaG, m_DeltaG, p_sb, m_se, amp_seq))
Пример #4
0
def format_output_primer(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    linesep = os.linesep

    out = []

    out = print_head(out, options)

    ID_list = []
    for i in xrange(len(oligos)):
        ID_list.append(oligos[i]['id'])

    query_line = textwrap.fill('Query = %s' % ('; '.join(ID_list)), 80)
    out.append(query_line)
    out.append('        %s primer sequences' % (len(oligos)))

    out.append(linesep)

    out.append('Database = %s' % textwrap.fill(', '.join([os.path.basename(db) for db in options.database]), 80))
    #out.append('        %s sequences' % (len(fcdict)))
    out.append(linesep)

    out.append('Reports Beginning'.ljust(80, '.'))
    out.append(linesep * 2)

    amp_num = len(amp_list)
    if amp_num > 1:
        out.append('Distribution of %s potential PCR amplicons predicted by MFEprimer-2.0 on the query primers' % amp_num)
    else:
        out.append('Distribution of %s potential PCR amplicon predicted by MFEprimer-2.0 on the query primers' % amp_num)

    out.append(linesep)
    out.append('[Sorted by average Tm in descending order]')
    out.append('FP '.rjust(69) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8))
    # Δ takes two characters position
    out.append('Size'.rjust(53) + 'PPC '.rjust(8) + 'Tm '.rjust(8) + 'Tm '.rjust(8) + ('%sG' % u'\u0394').rjust(7) + ('%sG' % u'\u0394').rjust(7) + ('3\'%sG' % u'\u0394').rjust(8) + ('3\'%sG' % u'\u0394').rjust(7))
    out.append('Primers producing potential PCR products:'.ljust(42) + '(bp)'.rjust(11) + '(%) '.rjust(8) + u'\u2103'.rjust(6) + u'\u2103'.rjust(7) + '(kcal/mol)'.center(22) + '(kcal/mol)'.center(14))
    out.append(linesep)

    detail_line = []
    fa_file = []
    sn = 0
    #amp_list.append([amp_len, ave_Tm, p, m, ppc, amp_graphic, mid_seq, real_hid, hdesc])
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1
        hid = amp['real_hid']
        desc = '%s: %s' % (sn, hid)

        amp_len = amp['size']

        p_qid = amp['pid']
        f_len = amp['plen']
        pseq = amp['pseq']
        f_3_pos = amp['f3_pos'] 
        p_3_DeltaG = amp['p_3_DeltaG']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_sseq = amp['p_sseq']
        p_tail = amp['p_tail']
        p_Tm = amp['p_Tm']
        p_DeltaG = amp['p_DeltaG']
        p_sb = f_3_pos - len(p_aseq) + 1

        m_qid = amp['mid']
        r_len = amp['mlen']
        mseq = amp['mseq']
        r_3_pos = amp['r3_pos']
        m_3_DeltaG = amp['m_3_DeltaG']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_sseq = amp['m_sseq']
        m_tail = amp['m_tail']
        m_Tm = amp['m_Tm']
        m_DeltaG = amp['m_DeltaG']
        m_se = r_3_pos + len(m_aseq)

        amp_graphic = amp['amp_graphic']
        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']
        hdesc = amp['hdesc']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = chilli.cal_GC_content(amp_seq, + amp_len)

        if len(desc) > 42:
            desc = desc[:42] + '...' 

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc

        out.append(desc.ljust(42) + (str(amp_len)).rjust(11) + ppc.rjust(8) + ('%.1f' % p_Tm).rjust(8) + ('%.1f' % m_Tm).rjust(8) + ('%.1f' % p_DeltaG).rjust(8) + ('%.1f' % m_DeltaG).rjust(8) + ('%.1f' % p_3_DeltaG).rjust(8) + ('%.1f' % m_3_DeltaG).rjust(8))

        if not hdesc:
            detail_line.append('%s: %s + %s ==> %s%s' % (sn, p_qid, m_qid, hid, linesep))
            fa_desc = '>%s %s + %s %s' % (sn, p_qid, m_qid, hid)
        else:
            detail_line.append('%s: %s + %s ==> %s %s%s' % (sn, p_qid, m_qid, hid, hdesc, linesep))
            fa_desc = '>%s %s %s %s %s' % (sn, p_qid, m_qid, hid, hdesc)

        detail_line.append('  ' + 'PPC = %s%%, Size = %s bp, GC content = %.1f%%' % (ppc, amp_len, amp_GC))
        detail_line.append('  ' + 'FP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (p_Tm, u'\u2103', u'\u0394', p_DeltaG, u'\u0394', p_3_DeltaG))
        detail_line.append('  ' + 'RP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (m_Tm, u'\u2103', u'\u0394', m_DeltaG, u'\u0394', m_3_DeltaG))
	detail_line.append('  ' + 'Binding sites: %s(%s/%s) ... %s(%s/%s)' % (p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len))
        detail_line.append(linesep)
        detail_line.append(amp_graphic + linesep)
        fa_seq = chilli.print_seq(amp_seq, 80)
        fa_file.append(fa_desc)
        fa_file.append(fa_seq)
        detail_line.append(fa_desc + linesep + fa_seq + linesep)

    #out = []
    out.append(linesep)
    out.append('Details for the primers binding to the DNA template') 
    out.append('[Sorted by average Tm in descending order]' + linesep)
    for i in xrange(len(detail_line)):
        line = detail_line[i]
        out.append(line)

    out.append(linesep*2)
    out = print_foot(out, options, start_time)

    out = os.linesep.join(out)
    options.outfile.write(out.encode('utf-8'))

    if options.amplicon:
        try:
            out_file = options.outfile.name + '.fa'
            fh = open(out_file, 'w')
        except:
            msg = 'Error: can not open %s for write' % out_file
            print2stderr(msg)

        fh.write(os.linesep.join(fa_file))
        fh.close()
Пример #5
0
def format_output_primer(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    linesep = os.linesep

    out = []

    out = print_head(out, options)

    ID_list = []
    for i in xrange(len(oligos)):
        ID_list.append(oligos[i]['id'])

    query_line = textwrap.fill('Query = %s' % ('; '.join(ID_list)), 80)
    out.append(query_line)
    out.append('        %s primer sequences' % (len(oligos)))

    out.append(linesep)

    out.append('Database = %s' % textwrap.fill(
        ', '.join([os.path.basename(db) for db in options.database]), 80))
    #out.append('        %s sequences' % (len(fcdict)))
    out.append(linesep)

    out.append('Reports Beginning'.ljust(80, '.'))
    out.append(linesep * 2)

    amp_num = len(amp_list)
    if amp_num > 1:
        out.append(
            'Distribution of %s potential PCR amplicons predicted by MFEprimer-2.0 on the query primers'
            % amp_num)
    else:
        out.append(
            'Distribution of %s potential PCR amplicon predicted by MFEprimer-2.0 on the query primers'
            % amp_num)

    out.append(linesep)
    out.append('[Sorted by average Tm in descending order]')
    out.append('FP '.rjust(69) + 'RP '.rjust(8) + 'FP '.rjust(8) +
               'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8))
    # Δ takes two characters position
    out.append('Size'.rjust(53) + 'PPC '.rjust(8) + 'Tm '.rjust(8) +
               'Tm '.rjust(8) + ('%sG' % u'\u0394').rjust(7) +
               ('%sG' % u'\u0394').rjust(7) + ('3\'%sG' % u'\u0394').rjust(8) +
               ('3\'%sG' % u'\u0394').rjust(7))
    out.append('Primers producing potential PCR products:'.ljust(42) +
               '(bp)'.rjust(11) + '(%) '.rjust(8) + u'\u2103'.rjust(6) +
               u'\u2103'.rjust(7) + '(kcal/mol)'.center(22) +
               '(kcal/mol)'.center(14))
    out.append(linesep)

    detail_line = []
    fa_file = []
    sn = 0
    #amp_list.append([amp_len, ave_Tm, p, m, ppc, amp_graphic, mid_seq, real_hid, hdesc])
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1
        hid = amp['real_hid']
        desc = '%s: %s' % (sn, hid)

        amp_len = amp['size']

        p_qid = amp['pid']
        f_len = amp['plen']
        pseq = amp['pseq']
        f_3_pos = amp['f3_pos']
        p_3_DeltaG = amp['p_3_DeltaG']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_sseq = amp['p_sseq']
        p_tail = amp['p_tail']
        p_Tm = amp['p_Tm']
        p_DeltaG = amp['p_DeltaG']
        p_sb = f_3_pos - len(p_aseq) + 1

        m_qid = amp['mid']
        r_len = amp['mlen']
        mseq = amp['mseq']
        r_3_pos = amp['r3_pos']
        m_3_DeltaG = amp['m_3_DeltaG']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_sseq = amp['m_sseq']
        m_tail = amp['m_tail']
        m_Tm = amp['m_Tm']
        m_DeltaG = amp['m_DeltaG']
        m_se = r_3_pos + len(m_aseq)

        amp_graphic = amp['amp_graphic']
        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']
        hdesc = amp['hdesc']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = chilli.cal_GC_content(amp_seq, +amp_len)

        if len(desc) > 42:
            desc = desc[:42] + '...'

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc

        out.append(
            desc.ljust(42) + (str(amp_len)).rjust(11) + ppc.rjust(8) +
            ('%.1f' % p_Tm).rjust(8) + ('%.1f' % m_Tm).rjust(8) +
            ('%.1f' % p_DeltaG).rjust(8) + ('%.1f' % m_DeltaG).rjust(8) +
            ('%.1f' % p_3_DeltaG).rjust(8) + ('%.1f' % m_3_DeltaG).rjust(8))

        if not hdesc:
            detail_line.append('%s: %s + %s ==> %s%s' %
                               (sn, p_qid, m_qid, hid, linesep))
            fa_desc = '>%s %s + %s %s' % (sn, p_qid, m_qid, hid)
        else:
            detail_line.append('%s: %s + %s ==> %s %s%s' %
                               (sn, p_qid, m_qid, hid, hdesc, linesep))
            fa_desc = '>%s %s %s %s %s' % (sn, p_qid, m_qid, hid, hdesc)

        detail_line.append('  ' +
                           'PPC = %s%%, Size = %s bp, GC content = %.1f%%' %
                           (ppc, amp_len, amp_GC))
        detail_line.append(
            '  ' +
            'FP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)'
            % (p_Tm, u'\u2103', u'\u0394', p_DeltaG, u'\u0394', p_3_DeltaG))
        detail_line.append(
            '  ' +
            'RP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)'
            % (m_Tm, u'\u2103', u'\u0394', m_DeltaG, u'\u0394', m_3_DeltaG))
        detail_line.append(
            '  ' + 'Binding sites: %s(%s/%s) ... %s(%s/%s)' %
            (p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len))
        detail_line.append(linesep)
        detail_line.append(amp_graphic + linesep)
        fa_seq = chilli.print_seq(amp_seq, 80)
        fa_file.append(fa_desc)
        fa_file.append(fa_seq)
        detail_line.append(fa_desc + linesep + fa_seq + linesep)

    #out = []
    out.append(linesep)
    out.append('Details for the primers binding to the DNA template')
    out.append('[Sorted by average Tm in descending order]' + linesep)
    for i in xrange(len(detail_line)):
        line = detail_line[i]
        out.append(line)

    out.append(linesep * 2)
    out = print_foot(out, options, start_time)

    out = os.linesep.join(out)
    options.outfile.write(out.encode('utf-8'))

    if options.amplicon:
        try:
            out_file = options.outfile.name + '.fa'
            fh = open(out_file, 'w')
        except:
            msg = 'Error: can not open %s for write' % out_file
            print2stderr(msg)

        fh.write(os.linesep.join(fa_file))
        fh.close()
Пример #6
0
def format_output_primer(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    linesep = os.linesep
    detail = []
    #fa_file = []

    ID_list = []
    for i in xrange(len(oligos)):
        ID_list.append(oligos[i]['id'])

    size_dict = {}
    dg_dict = {}
    tm_dict = {}
    sn = 0
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1
        hid = amp['real_hid']
        try:
            acc = hid.split('|')[3]
        except:
            acc = hid
        desc = '%s: %s' % (sn, hid)

        amp_len = amp['size']

        p_qid = amp['pid']
        f_len = amp['plen']
        pseq = amp['pseq']
        f_3_pos = amp['f3_pos'] 
        #p_3_DeltaG = amp['p_3_DeltaG']
        p_3_DeltaG = '%.1f' % amp['p_3_DeltaG']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_sseq = amp['p_sseq']
        p_tail = amp['p_tail']
        #p_Tm = amp['p_Tm']
        p_Tm = '%.1f' % amp['p_Tm']
        #p_DeltaG = amp['p_DeltaG']
        p_DeltaG = '%.1f' % amp['p_DeltaG']
        p_sb = f_3_pos - len(pseq) + 1

        m_qid = amp['mid']
        r_len = amp['mlen']
        mseq = amp['mseq']
        r_3_pos = amp['r3_pos']
        #m_3_DeltaG = amp['m_3_DeltaG']
        m_3_DeltaG = '%.1f' % amp['m_3_DeltaG']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_sseq = amp['m_sseq']
        m_tail = amp['m_tail']
        #m_Tm = amp['m_Tm']
        m_Tm = '%.1f' % amp['m_Tm']
        #m_DeltaG = amp['m_DeltaG']
        m_DeltaG = '%.1f' % amp['m_DeltaG']
        m_se = r_3_pos + len(mseq)

        amp_graphic = amp['amp_graphic']
        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']
        hdesc = amp['hdesc']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = '%.1f' % chilli.cal_GC_content(amp_seq, + amp_len)

        if len(desc) > 42:
            desc = desc[:42] + '...' 

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc


        if not hdesc:
            amp_title = '%s: %s + %s ==> %s' % (sn, p_qid, m_qid, hid)
            fa_desc = '>Amp_%s %s + %s ==> %s' % (sn, p_qid, m_qid, hid)
        else:
            amp_title = '%s: %s + %s ==> %s %s' % (sn, p_qid, m_qid, hid, hdesc)
            fa_desc = '>Amp_%s %s + %s ==> %s %s' % (sn, p_qid, m_qid, hid, hdesc)

        fa_seq = chilli.print_seq(amp_seq, 80)
        fa_seq = fa_desc + linesep + fa_seq + linesep
        #fa_file.append(fa_desc)
        #fa_file.append(fa_seq)
	size_dict[amp_len] = size_dict.setdefault(amp_len, 0) + 1
	tm_dict[p_Tm] = tm_dict.setdefault(p_Tm, 0) + 1
	tm_dict[m_Tm] = tm_dict.setdefault(m_Tm, 0) + 1

	dg_dict[p_DeltaG] = dg_dict.setdefault(p_DeltaG, 0) + 1
	dg_dict[m_DeltaG] = dg_dict.setdefault(m_DeltaG, 0) + 1
	
	detail.append((sn, acc, p_qid, m_qid, amp_len, ppc, p_Tm, m_Tm, p_DeltaG, m_DeltaG, p_3_DeltaG, m_3_DeltaG, amp_GC, p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len, amp_graphic, fa_seq, hid, hdesc))

    return detail, size_dict, tm_dict, dg_dict