def format_out(groups, amp_num, fa_amp_file_name, session_dir): '''Format output''' records = FastaFormatParser.parse(open(os.path.join(session_dir, fa_amp_file_name))) seq_dict = {} for record in records: id = record['id'].split()[0] desc = record['desc'] seq = record['seq'] seq = chilli.print_seq(seq, 80) fa_seq = '>%s %s%s%s%s' % (id, desc, os.linesep, seq, os.linesep) seq_dict[id] = fa_seq group_list = [] for group_sn, group in enumerate(groups): seq_list = [seq_dict[amp_id] for amp_id, strand in group] file_name = write_amplicons(session_dir, seq_list) file_name = os.path.join(session_dir, file_name) if len(group) < 2: t_coffee_result = file_name else: t_coffee_result = run_t_coffee(file_name) t_coffee_result = os.path.basename(t_coffee_result) group_list.append((group_sn+1, ', '.join(['%s (%s)' % (amp_id, strand) for amp_id, strand in group]), t_coffee_result)) return group_list
def format_output_primer(amp_list, oligos, options, start_time, session_dir): '''Format output in primer task''' linesep = os.linesep out = [] out = print_head(out, options) ID_list = [] for i in xrange(len(oligos)): ID_list.append(oligos[i]['id']) query_line = textwrap.fill('Query = %s' % ('; '.join(ID_list)), 80) out.append(query_line) out.append(' %s primer sequences' % (len(oligos))) out.append(linesep) out.append('Database = %s' % textwrap.fill(', '.join([os.path.basename(db) for db in options.database]), 80)) #out.append(' %s sequences' % (len(fcdict))) out.append(linesep) out.append('Reports Beginning'.ljust(80, '.')) out.append(linesep * 2) amp_num = len(amp_list) if amp_num > 1: out.append('Distribution of %s potential PCR amplicons predicted by MFEprimer-2.0 on the query primers' % amp_num) else: out.append('Distribution of %s potential PCR amplicon predicted by MFEprimer-2.0 on the query primers' % amp_num) out.append(linesep) out.append('[Sorted by average Tm in descending order]') out.append('FP '.rjust(69) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8)) # Δ takes two characters position out.append('Size'.rjust(53) + 'PPC '.rjust(8) + 'Tm '.rjust(8) + 'Tm '.rjust(8) + ('%sG' % u'\u0394').rjust(7) + ('%sG' % u'\u0394').rjust(7) + ('3\'%sG' % u'\u0394').rjust(8) + ('3\'%sG' % u'\u0394').rjust(7)) out.append('Primers producing potential PCR products:'.ljust(42) + '(bp)'.rjust(11) + '(%) '.rjust(8) + u'\u2103'.rjust(6) + u'\u2103'.rjust(7) + '(kcal/mol)'.center(22) + '(kcal/mol)'.center(14)) out.append(linesep) detail_line = [] fa_file = [] sn = 0 #amp_list.append([amp_len, ave_Tm, p, m, ppc, amp_graphic, mid_seq, real_hid, hdesc]) amp_list.sort(key=itemgetter(1, 2), reverse=True) for ave_Tm, ppc, amp_len, amp in amp_list: sn = sn + 1 hid = amp['real_hid'] desc = '%s: %s' % (sn, hid) amp_len = amp['size'] p_qid = amp['pid'] f_len = amp['plen'] pseq = amp['pseq'] f_3_pos = amp['f3_pos'] p_3_DeltaG = amp['p_3_DeltaG'] p_qseq = amp['p_qseq'] p_aseq = amp['p_aseq'] p_sseq = amp['p_sseq'] p_tail = amp['p_tail'] p_Tm = amp['p_Tm'] p_DeltaG = amp['p_DeltaG'] p_sb = f_3_pos - len(p_aseq) + 1 m_qid = amp['mid'] r_len = amp['mlen'] mseq = amp['mseq'] r_3_pos = amp['r3_pos'] m_3_DeltaG = amp['m_3_DeltaG'] m_qseq = amp['m_qseq'] m_aseq = amp['m_aseq'] m_sseq = amp['m_sseq'] m_tail = amp['m_tail'] m_Tm = amp['m_Tm'] m_DeltaG = amp['m_DeltaG'] m_se = r_3_pos + len(m_aseq) amp_graphic = amp['amp_graphic'] mid_seq = amp['mid_seq'] real_hid = amp['real_hid'] hdesc = amp['hdesc'] amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail amp_GC = chilli.cal_GC_content(amp_seq, + amp_len) if len(desc) > 42: desc = desc[:42] + '...' if p_qid == m_qid: ppc = '-%.1f' % ppc else: ppc = '%.1f' % ppc out.append(desc.ljust(42) + (str(amp_len)).rjust(11) + ppc.rjust(8) + ('%.1f' % p_Tm).rjust(8) + ('%.1f' % m_Tm).rjust(8) + ('%.1f' % p_DeltaG).rjust(8) + ('%.1f' % m_DeltaG).rjust(8) + ('%.1f' % p_3_DeltaG).rjust(8) + ('%.1f' % m_3_DeltaG).rjust(8)) if not hdesc: detail_line.append('%s: %s + %s ==> %s%s' % (sn, p_qid, m_qid, hid, linesep)) fa_desc = '>%s %s + %s %s' % (sn, p_qid, m_qid, hid) else: detail_line.append('%s: %s + %s ==> %s %s%s' % (sn, p_qid, m_qid, hid, hdesc, linesep)) fa_desc = '>%s %s %s %s %s' % (sn, p_qid, m_qid, hid, hdesc) detail_line.append(' ' + 'PPC = %s%%, Size = %s bp, GC content = %.1f%%' % (ppc, amp_len, amp_GC)) detail_line.append(' ' + 'FP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (p_Tm, u'\u2103', u'\u0394', p_DeltaG, u'\u0394', p_3_DeltaG)) detail_line.append(' ' + 'RP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (m_Tm, u'\u2103', u'\u0394', m_DeltaG, u'\u0394', m_3_DeltaG)) detail_line.append(' ' + 'Binding sites: %s(%s/%s) ... %s(%s/%s)' % (p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len)) detail_line.append(linesep) detail_line.append(amp_graphic + linesep) fa_seq = chilli.print_seq(amp_seq, 80) fa_file.append(fa_desc) fa_file.append(fa_seq) detail_line.append(fa_desc + linesep + fa_seq + linesep) #out = [] out.append(linesep) out.append('Details for the primers binding to the DNA template') out.append('[Sorted by average Tm in descending order]' + linesep) for i in xrange(len(detail_line)): line = detail_line[i] out.append(line) out.append(linesep*2) out = print_foot(out, options, start_time) out = os.linesep.join(out) options.outfile.write(out.encode('utf-8')) if options.amplicon: try: out_file = options.outfile.name + '.fa' fh = open(out_file, 'w') except: msg = 'Error: can not open %s for write' % out_file print2stderr(msg) fh.write(os.linesep.join(fa_file)) fh.close()
def format_output_primer(amp_list, oligos, options, start_time, session_dir): '''Format output in primer task''' linesep = os.linesep out = [] out = print_head(out, options) ID_list = [] for i in xrange(len(oligos)): ID_list.append(oligos[i]['id']) query_line = textwrap.fill('Query = %s' % ('; '.join(ID_list)), 80) out.append(query_line) out.append(' %s primer sequences' % (len(oligos))) out.append(linesep) out.append('Database = %s' % textwrap.fill( ', '.join([os.path.basename(db) for db in options.database]), 80)) #out.append(' %s sequences' % (len(fcdict))) out.append(linesep) out.append('Reports Beginning'.ljust(80, '.')) out.append(linesep * 2) amp_num = len(amp_list) if amp_num > 1: out.append( 'Distribution of %s potential PCR amplicons predicted by MFEprimer-2.0 on the query primers' % amp_num) else: out.append( 'Distribution of %s potential PCR amplicon predicted by MFEprimer-2.0 on the query primers' % amp_num) out.append(linesep) out.append('[Sorted by average Tm in descending order]') out.append('FP '.rjust(69) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8) + 'FP '.rjust(8) + 'RP '.rjust(8)) # Δ takes two characters position out.append('Size'.rjust(53) + 'PPC '.rjust(8) + 'Tm '.rjust(8) + 'Tm '.rjust(8) + ('%sG' % u'\u0394').rjust(7) + ('%sG' % u'\u0394').rjust(7) + ('3\'%sG' % u'\u0394').rjust(8) + ('3\'%sG' % u'\u0394').rjust(7)) out.append('Primers producing potential PCR products:'.ljust(42) + '(bp)'.rjust(11) + '(%) '.rjust(8) + u'\u2103'.rjust(6) + u'\u2103'.rjust(7) + '(kcal/mol)'.center(22) + '(kcal/mol)'.center(14)) out.append(linesep) detail_line = [] fa_file = [] sn = 0 #amp_list.append([amp_len, ave_Tm, p, m, ppc, amp_graphic, mid_seq, real_hid, hdesc]) amp_list.sort(key=itemgetter(1, 2), reverse=True) for ave_Tm, ppc, amp_len, amp in amp_list: sn = sn + 1 hid = amp['real_hid'] desc = '%s: %s' % (sn, hid) amp_len = amp['size'] p_qid = amp['pid'] f_len = amp['plen'] pseq = amp['pseq'] f_3_pos = amp['f3_pos'] p_3_DeltaG = amp['p_3_DeltaG'] p_qseq = amp['p_qseq'] p_aseq = amp['p_aseq'] p_sseq = amp['p_sseq'] p_tail = amp['p_tail'] p_Tm = amp['p_Tm'] p_DeltaG = amp['p_DeltaG'] p_sb = f_3_pos - len(p_aseq) + 1 m_qid = amp['mid'] r_len = amp['mlen'] mseq = amp['mseq'] r_3_pos = amp['r3_pos'] m_3_DeltaG = amp['m_3_DeltaG'] m_qseq = amp['m_qseq'] m_aseq = amp['m_aseq'] m_sseq = amp['m_sseq'] m_tail = amp['m_tail'] m_Tm = amp['m_Tm'] m_DeltaG = amp['m_DeltaG'] m_se = r_3_pos + len(m_aseq) amp_graphic = amp['amp_graphic'] mid_seq = amp['mid_seq'] real_hid = amp['real_hid'] hdesc = amp['hdesc'] amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail amp_GC = chilli.cal_GC_content(amp_seq, +amp_len) if len(desc) > 42: desc = desc[:42] + '...' if p_qid == m_qid: ppc = '-%.1f' % ppc else: ppc = '%.1f' % ppc out.append( desc.ljust(42) + (str(amp_len)).rjust(11) + ppc.rjust(8) + ('%.1f' % p_Tm).rjust(8) + ('%.1f' % m_Tm).rjust(8) + ('%.1f' % p_DeltaG).rjust(8) + ('%.1f' % m_DeltaG).rjust(8) + ('%.1f' % p_3_DeltaG).rjust(8) + ('%.1f' % m_3_DeltaG).rjust(8)) if not hdesc: detail_line.append('%s: %s + %s ==> %s%s' % (sn, p_qid, m_qid, hid, linesep)) fa_desc = '>%s %s + %s %s' % (sn, p_qid, m_qid, hid) else: detail_line.append('%s: %s + %s ==> %s %s%s' % (sn, p_qid, m_qid, hid, hdesc, linesep)) fa_desc = '>%s %s %s %s %s' % (sn, p_qid, m_qid, hid, hdesc) detail_line.append(' ' + 'PPC = %s%%, Size = %s bp, GC content = %.1f%%' % (ppc, amp_len, amp_GC)) detail_line.append( ' ' + 'FP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (p_Tm, u'\u2103', u'\u0394', p_DeltaG, u'\u0394', p_3_DeltaG)) detail_line.append( ' ' + 'RP: Tm = %.1f (%s), %sG = %.1f (kcal/mol), 3\'%sG = %.1f (kcal/mol)' % (m_Tm, u'\u2103', u'\u0394', m_DeltaG, u'\u0394', m_3_DeltaG)) detail_line.append( ' ' + 'Binding sites: %s(%s/%s) ... %s(%s/%s)' % (p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len)) detail_line.append(linesep) detail_line.append(amp_graphic + linesep) fa_seq = chilli.print_seq(amp_seq, 80) fa_file.append(fa_desc) fa_file.append(fa_seq) detail_line.append(fa_desc + linesep + fa_seq + linesep) #out = [] out.append(linesep) out.append('Details for the primers binding to the DNA template') out.append('[Sorted by average Tm in descending order]' + linesep) for i in xrange(len(detail_line)): line = detail_line[i] out.append(line) out.append(linesep * 2) out = print_foot(out, options, start_time) out = os.linesep.join(out) options.outfile.write(out.encode('utf-8')) if options.amplicon: try: out_file = options.outfile.name + '.fa' fh = open(out_file, 'w') except: msg = 'Error: can not open %s for write' % out_file print2stderr(msg) fh.write(os.linesep.join(fa_file)) fh.close()
def format_output_primer(amp_list, oligos, options, start_time, session_dir): '''Format output in primer task''' linesep = os.linesep detail = [] #fa_file = [] ID_list = [] for i in xrange(len(oligos)): ID_list.append(oligos[i]['id']) size_dict = {} dg_dict = {} tm_dict = {} sn = 0 amp_list.sort(key=itemgetter(1, 2), reverse=True) for ave_Tm, ppc, amp_len, amp in amp_list: sn = sn + 1 hid = amp['real_hid'] try: acc = hid.split('|')[3] except: acc = hid desc = '%s: %s' % (sn, hid) amp_len = amp['size'] p_qid = amp['pid'] f_len = amp['plen'] pseq = amp['pseq'] f_3_pos = amp['f3_pos'] #p_3_DeltaG = amp['p_3_DeltaG'] p_3_DeltaG = '%.1f' % amp['p_3_DeltaG'] p_qseq = amp['p_qseq'] p_aseq = amp['p_aseq'] p_sseq = amp['p_sseq'] p_tail = amp['p_tail'] #p_Tm = amp['p_Tm'] p_Tm = '%.1f' % amp['p_Tm'] #p_DeltaG = amp['p_DeltaG'] p_DeltaG = '%.1f' % amp['p_DeltaG'] p_sb = f_3_pos - len(pseq) + 1 m_qid = amp['mid'] r_len = amp['mlen'] mseq = amp['mseq'] r_3_pos = amp['r3_pos'] #m_3_DeltaG = amp['m_3_DeltaG'] m_3_DeltaG = '%.1f' % amp['m_3_DeltaG'] m_qseq = amp['m_qseq'] m_aseq = amp['m_aseq'] m_sseq = amp['m_sseq'] m_tail = amp['m_tail'] #m_Tm = amp['m_Tm'] m_Tm = '%.1f' % amp['m_Tm'] #m_DeltaG = amp['m_DeltaG'] m_DeltaG = '%.1f' % amp['m_DeltaG'] m_se = r_3_pos + len(mseq) amp_graphic = amp['amp_graphic'] mid_seq = amp['mid_seq'] real_hid = amp['real_hid'] hdesc = amp['hdesc'] amp_seq = p_tail + p_qseq + mid_seq + m_qseq + m_tail amp_GC = '%.1f' % chilli.cal_GC_content(amp_seq, + amp_len) if len(desc) > 42: desc = desc[:42] + '...' if p_qid == m_qid: ppc = '-%.1f' % ppc else: ppc = '%.1f' % ppc if not hdesc: amp_title = '%s: %s + %s ==> %s' % (sn, p_qid, m_qid, hid) fa_desc = '>Amp_%s %s + %s ==> %s' % (sn, p_qid, m_qid, hid) else: amp_title = '%s: %s + %s ==> %s %s' % (sn, p_qid, m_qid, hid, hdesc) fa_desc = '>Amp_%s %s + %s ==> %s %s' % (sn, p_qid, m_qid, hid, hdesc) fa_seq = chilli.print_seq(amp_seq, 80) fa_seq = fa_desc + linesep + fa_seq + linesep #fa_file.append(fa_desc) #fa_file.append(fa_seq) size_dict[amp_len] = size_dict.setdefault(amp_len, 0) + 1 tm_dict[p_Tm] = tm_dict.setdefault(p_Tm, 0) + 1 tm_dict[m_Tm] = tm_dict.setdefault(m_Tm, 0) + 1 dg_dict[p_DeltaG] = dg_dict.setdefault(p_DeltaG, 0) + 1 dg_dict[m_DeltaG] = dg_dict.setdefault(m_DeltaG, 0) + 1 detail.append((sn, acc, p_qid, m_qid, amp_len, ppc, p_Tm, m_Tm, p_DeltaG, m_DeltaG, p_3_DeltaG, m_3_DeltaG, amp_GC, p_sb, len(p_aseq), f_len, m_se, len(m_aseq), r_len, amp_graphic, fa_seq, hid, hdesc)) return detail, size_dict, tm_dict, dg_dict