Пример #1
0
def format_out(groups, amp_num, fa_amp_file_name, session_dir):
    '''Format output'''
    records = FastaFormatParser.parse(open(os.path.join(session_dir, fa_amp_file_name)))
    seq_dict = {}
    for record in records:
	id = record['id'].split()[0]
	desc = record['desc']
	seq = record['seq']
        seq = chilli.print_seq(seq, 80)
	fa_seq = '>%s %s%s%s%s' % (id, desc, os.linesep, seq, os.linesep)
	seq_dict[id] = fa_seq

    group_list = []
    for group_sn, group in enumerate(groups):
	seq_list = [seq_dict[amp_id] for amp_id, strand in group]

	file_name = write_amplicons(session_dir, seq_list)
        file_name = os.path.join(session_dir, file_name)
	if len(group) < 2:
	    t_coffee_result = file_name
	else:
	    t_coffee_result = run_t_coffee(file_name)

	t_coffee_result = os.path.basename(t_coffee_result)

        group_list.append((group_sn+1, ', '.join(['%s (%s)' % (amp_id, strand) for amp_id, strand in group]), t_coffee_result))

    return group_list
Пример #2
0
def format_output_primer(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    linesep = os.linesep

    out = []

    out = print_head(out, options)

    ID_list = []
    for i in xrange(len(oligos)):
        ID_list.append(oligos[i]['id'])

    query_line = textwrap.fill('Query = %s' % ('; '.join(ID_list)), 80)
    out.append(query_line)
    out.append('        %s primer sequences' % (len(oligos)))

    out.append(linesep)

    out.append('Database = %s' % textwrap.fill(', '.join([os.path.basename(db) for db in options.database]), 80))
    #out.append('        %s sequences' % (len(fcdict)))
    out.append(linesep)

    out.append('Reports Beginning'.ljust(80, '.'))
    out.append(linesep * 2)

    amp_num = len(amp_list)
    if amp_num > 1:
        out.append('Distribution of %s potential PCR amplicons predicted by MFEprimer-2.0 on the query primers' % amp_num)
    else:
        out.append('Distribution of %s potential PCR amplicon predicted by MFEprimer-2.0 on the query primers' % amp_num)

    out.append(linesep)
    out.append('[Sorted by average Tm in descending order]')
    out.append('FP '.rjust(69) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8))
    # Δ takes two characters position
    out.append('Size'.rjust(53) + 'PPC '.rjust(8) + 'Tm '.rjust(8) + 'Tm '.rjust(8) + ('%sG' % u'\u0394').rjust(7) + ('%sG' % u'\u0394').rjust(7) + ('3\'%sG' % u'\u0394').rjust(8) + ('3\'%sG' % u'\u0394').rjust(7))
    out.append('Primers producing potential PCR products:'.ljust(42) + '(bp)'.rjust(11) + '(%) '.rjust(8) + u'\u2103'.rjust(6) + u'\u2103'.rjust(7) + '(kcal/mol)'.center(22) + '(kcal/mol)'.center(14))
    out.append(linesep)

    detail_line = []
    fa_file = []
    sn = 0
    #amp_list.append([amp_len, ave_Tm, p, m, ppc, amp_graphic, mid_seq, real_hid, hdesc])
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1
        hid = amp['real_hid']
        desc = '%s: %s' % (sn, hid)

        amp_len = amp['size']

        p_qid = amp['pid']
        f_len = amp['plen']
        pseq = amp['pseq']
        f_3_pos = amp['f3_pos'] 
        p_3_DeltaG = amp['p_3_DeltaG']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_sseq = amp['p_sseq']
        p_tail = amp['p_tail']
        p_Tm = amp['p_Tm']
        p_DeltaG = amp['p_DeltaG']
        p_sb = f_3_pos - len(p_aseq) + 1

        m_qid = amp['mid']
        r_len = amp['mlen']
        mseq = amp['mseq']
        r_3_pos = amp['r3_pos']
        m_3_DeltaG = amp['m_3_DeltaG']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_sseq = amp['m_sseq']
        m_tail = amp['m_tail']
        m_Tm = amp['m_Tm']
        m_DeltaG = amp['m_DeltaG']
        m_se = r_3_pos + len(m_aseq)

        amp_graphic = amp['amp_graphic']
        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']
        hdesc = amp['hdesc']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = chilli.cal_GC_content(amp_seq, + amp_len)

        if len(desc) > 42:
            desc = desc[:42] + '...' 

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc

        out.append(desc.ljust(42) + (str(amp_len)).rjust(11) + ppc.rjust(8) + ('%.1f' % p_Tm).rjust(8) + ('%.1f' % m_Tm).rjust(8) + ('%.1f' % p_DeltaG).rjust(8) + ('%.1f' % m_DeltaG).rjust(8) + ('%.1f' % p_3_DeltaG).rjust(8) + ('%.1f' % m_3_DeltaG).rjust(8))

        if not hdesc:
            detail_line.append('%s: %s + %s ==> %s%s' % (sn, p_qid, m_qid, hid, linesep))
            fa_desc = '>%s %s + %s %s' % (sn, p_qid, m_qid, hid)
        else:
            detail_line.append('%s: %s + %s ==> %s %s%s' % (sn, p_qid, m_qid, hid, hdesc, linesep))
            fa_desc = '>%s %s %s %s %s' % (sn, p_qid, m_qid, hid, hdesc)

        detail_line.append('  ' + 'PPC = %s%%, Size = %s bp, GC content = %.1f%%' % (ppc, amp_len, amp_GC))
        detail_line.append('  ' + 'FP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (p_Tm, u'\u2103', u'\u0394', p_DeltaG, u'\u0394', p_3_DeltaG))
        detail_line.append('  ' + 'RP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (m_Tm, u'\u2103', u'\u0394', m_DeltaG, u'\u0394', m_3_DeltaG))
	detail_line.append('  ' + 'Binding sites: %s(%s/%s) ... %s(%s/%s)' % (p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len))
        detail_line.append(linesep)
        detail_line.append(amp_graphic + linesep)
        fa_seq = chilli.print_seq(amp_seq, 80)
        fa_file.append(fa_desc)
        fa_file.append(fa_seq)
        detail_line.append(fa_desc + linesep + fa_seq + linesep)

    #out = []
    out.append(linesep)
    out.append('Details for the primers binding to the DNA template') 
    out.append('[Sorted by average Tm in descending order]' + linesep)
    for i in xrange(len(detail_line)):
        line = detail_line[i]
        out.append(line)

    out.append(linesep*2)
    out = print_foot(out, options, start_time)

    out = os.linesep.join(out)
    options.outfile.write(out.encode('utf-8'))

    if options.amplicon:
        try:
            out_file = options.outfile.name + '.fa'
            fh = open(out_file, 'w')
        except:
            msg = 'Error: can not open %s for write' % out_file
            print2stderr(msg)

        fh.write(os.linesep.join(fa_file))
        fh.close()
Пример #3
0
def format_output_primer(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    linesep = os.linesep

    out = []

    out = print_head(out, options)

    ID_list = []
    for i in xrange(len(oligos)):
        ID_list.append(oligos[i]['id'])

    query_line = textwrap.fill('Query = %s' % ('; '.join(ID_list)), 80)
    out.append(query_line)
    out.append('        %s primer sequences' % (len(oligos)))

    out.append(linesep)

    out.append('Database = %s' % textwrap.fill(
        ', '.join([os.path.basename(db) for db in options.database]), 80))
    #out.append('        %s sequences' % (len(fcdict)))
    out.append(linesep)

    out.append('Reports Beginning'.ljust(80, '.'))
    out.append(linesep * 2)

    amp_num = len(amp_list)
    if amp_num > 1:
        out.append(
            'Distribution of %s potential PCR amplicons predicted by MFEprimer-2.0 on the query primers'
            % amp_num)
    else:
        out.append(
            'Distribution of %s potential PCR amplicon predicted by MFEprimer-2.0 on the query primers'
            % amp_num)

    out.append(linesep)
    out.append('[Sorted by average Tm in descending order]')
    out.append('FP '.rjust(69) + 'RP '.rjust(8) + 'FP '.rjust(8) +
               'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8))
    # Δ takes two characters position
    out.append('Size'.rjust(53) + 'PPC '.rjust(8) + 'Tm '.rjust(8) +
               'Tm '.rjust(8) + ('%sG' % u'\u0394').rjust(7) +
               ('%sG' % u'\u0394').rjust(7) + ('3\'%sG' % u'\u0394').rjust(8) +
               ('3\'%sG' % u'\u0394').rjust(7))
    out.append('Primers producing potential PCR products:'.ljust(42) +
               '(bp)'.rjust(11) + '(%) '.rjust(8) + u'\u2103'.rjust(6) +
               u'\u2103'.rjust(7) + '(kcal/mol)'.center(22) +
               '(kcal/mol)'.center(14))
    out.append(linesep)

    detail_line = []
    fa_file = []
    sn = 0
    #amp_list.append([amp_len, ave_Tm, p, m, ppc, amp_graphic, mid_seq, real_hid, hdesc])
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1
        hid = amp['real_hid']
        desc = '%s: %s' % (sn, hid)

        amp_len = amp['size']

        p_qid = amp['pid']
        f_len = amp['plen']
        pseq = amp['pseq']
        f_3_pos = amp['f3_pos']
        p_3_DeltaG = amp['p_3_DeltaG']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_sseq = amp['p_sseq']
        p_tail = amp['p_tail']
        p_Tm = amp['p_Tm']
        p_DeltaG = amp['p_DeltaG']
        p_sb = f_3_pos - len(p_aseq) + 1

        m_qid = amp['mid']
        r_len = amp['mlen']
        mseq = amp['mseq']
        r_3_pos = amp['r3_pos']
        m_3_DeltaG = amp['m_3_DeltaG']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_sseq = amp['m_sseq']
        m_tail = amp['m_tail']
        m_Tm = amp['m_Tm']
        m_DeltaG = amp['m_DeltaG']
        m_se = r_3_pos + len(m_aseq)

        amp_graphic = amp['amp_graphic']
        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']
        hdesc = amp['hdesc']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = chilli.cal_GC_content(amp_seq, +amp_len)

        if len(desc) > 42:
            desc = desc[:42] + '...'

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc

        out.append(
            desc.ljust(42) + (str(amp_len)).rjust(11) + ppc.rjust(8) +
            ('%.1f' % p_Tm).rjust(8) + ('%.1f' % m_Tm).rjust(8) +
            ('%.1f' % p_DeltaG).rjust(8) + ('%.1f' % m_DeltaG).rjust(8) +
            ('%.1f' % p_3_DeltaG).rjust(8) + ('%.1f' % m_3_DeltaG).rjust(8))

        if not hdesc:
            detail_line.append('%s: %s + %s ==> %s%s' %
                               (sn, p_qid, m_qid, hid, linesep))
            fa_desc = '>%s %s + %s %s' % (sn, p_qid, m_qid, hid)
        else:
            detail_line.append('%s: %s + %s ==> %s %s%s' %
                               (sn, p_qid, m_qid, hid, hdesc, linesep))
            fa_desc = '>%s %s %s %s %s' % (sn, p_qid, m_qid, hid, hdesc)

        detail_line.append('  ' +
                           'PPC = %s%%, Size = %s bp, GC content = %.1f%%' %
                           (ppc, amp_len, amp_GC))
        detail_line.append(
            '  ' +
            'FP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)'
            % (p_Tm, u'\u2103', u'\u0394', p_DeltaG, u'\u0394', p_3_DeltaG))
        detail_line.append(
            '  ' +
            'RP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)'
            % (m_Tm, u'\u2103', u'\u0394', m_DeltaG, u'\u0394', m_3_DeltaG))
        detail_line.append(
            '  ' + 'Binding sites: %s(%s/%s) ... %s(%s/%s)' %
            (p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len))
        detail_line.append(linesep)
        detail_line.append(amp_graphic + linesep)
        fa_seq = chilli.print_seq(amp_seq, 80)
        fa_file.append(fa_desc)
        fa_file.append(fa_seq)
        detail_line.append(fa_desc + linesep + fa_seq + linesep)

    #out = []
    out.append(linesep)
    out.append('Details for the primers binding to the DNA template')
    out.append('[Sorted by average Tm in descending order]' + linesep)
    for i in xrange(len(detail_line)):
        line = detail_line[i]
        out.append(line)

    out.append(linesep * 2)
    out = print_foot(out, options, start_time)

    out = os.linesep.join(out)
    options.outfile.write(out.encode('utf-8'))

    if options.amplicon:
        try:
            out_file = options.outfile.name + '.fa'
            fh = open(out_file, 'w')
        except:
            msg = 'Error: can not open %s for write' % out_file
            print2stderr(msg)

        fh.write(os.linesep.join(fa_file))
        fh.close()
Пример #4
0
def format_output_primer(amp_list, oligos, options, start_time, session_dir):
    '''Format output in primer task'''
    linesep = os.linesep
    detail = []
    #fa_file = []

    ID_list = []
    for i in xrange(len(oligos)):
        ID_list.append(oligos[i]['id'])

    size_dict = {}
    dg_dict = {}
    tm_dict = {}
    sn = 0
    amp_list.sort(key=itemgetter(1, 2), reverse=True)
    for ave_Tm, ppc, amp_len, amp in amp_list:
        sn = sn + 1
        hid = amp['real_hid']
        try:
            acc = hid.split('|')[3]
        except:
            acc = hid
        desc = '%s: %s' % (sn, hid)

        amp_len = amp['size']

        p_qid = amp['pid']
        f_len = amp['plen']
        pseq = amp['pseq']
        f_3_pos = amp['f3_pos'] 
        #p_3_DeltaG = amp['p_3_DeltaG']
        p_3_DeltaG = '%.1f' % amp['p_3_DeltaG']
        p_qseq = amp['p_qseq']
        p_aseq = amp['p_aseq']
        p_sseq = amp['p_sseq']
        p_tail = amp['p_tail']
        #p_Tm = amp['p_Tm']
        p_Tm = '%.1f' % amp['p_Tm']
        #p_DeltaG = amp['p_DeltaG']
        p_DeltaG = '%.1f' % amp['p_DeltaG']
        p_sb = f_3_pos - len(pseq) + 1

        m_qid = amp['mid']
        r_len = amp['mlen']
        mseq = amp['mseq']
        r_3_pos = amp['r3_pos']
        #m_3_DeltaG = amp['m_3_DeltaG']
        m_3_DeltaG = '%.1f' % amp['m_3_DeltaG']
        m_qseq = amp['m_qseq']
        m_aseq = amp['m_aseq']
        m_sseq = amp['m_sseq']
        m_tail = amp['m_tail']
        #m_Tm = amp['m_Tm']
        m_Tm = '%.1f' % amp['m_Tm']
        #m_DeltaG = amp['m_DeltaG']
        m_DeltaG = '%.1f' % amp['m_DeltaG']
        m_se = r_3_pos + len(mseq)

        amp_graphic = amp['amp_graphic']
        mid_seq = amp['mid_seq']
        real_hid = amp['real_hid']
        hdesc = amp['hdesc']

        amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail
        amp_GC = '%.1f' % chilli.cal_GC_content(amp_seq, + amp_len)

        if len(desc) > 42:
            desc = desc[:42] + '...' 

        if p_qid == m_qid:
            ppc = '-%.1f' % ppc
        else:
            ppc = '%.1f' % ppc


        if not hdesc:
            amp_title = '%s: %s + %s ==> %s' % (sn, p_qid, m_qid, hid)
            fa_desc = '>Amp_%s %s + %s ==> %s' % (sn, p_qid, m_qid, hid)
        else:
            amp_title = '%s: %s + %s ==> %s %s' % (sn, p_qid, m_qid, hid, hdesc)
            fa_desc = '>Amp_%s %s + %s ==> %s %s' % (sn, p_qid, m_qid, hid, hdesc)

        fa_seq = chilli.print_seq(amp_seq, 80)
        fa_seq = fa_desc + linesep + fa_seq + linesep
        #fa_file.append(fa_desc)
        #fa_file.append(fa_seq)
	size_dict[amp_len] = size_dict.setdefault(amp_len, 0) + 1
	tm_dict[p_Tm] = tm_dict.setdefault(p_Tm, 0) + 1
	tm_dict[m_Tm] = tm_dict.setdefault(m_Tm, 0) + 1

	dg_dict[p_DeltaG] = dg_dict.setdefault(p_DeltaG, 0) + 1
	dg_dict[m_DeltaG] = dg_dict.setdefault(m_DeltaG, 0) + 1
	
	detail.append((sn, acc, p_qid, m_qid, amp_len, ppc, p_Tm, m_Tm, p_DeltaG, m_DeltaG, p_3_DeltaG, m_3_DeltaG, amp_GC, p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len, amp_graphic, fa_seq, hid, hdesc))

    return detail, size_dict, tm_dict, dg_dict