def main(alignment_file, edlevel_step, confint_p, spec_ids_str, permut_n, select_sites): spec_ids_list = spec_ids_str.split(',') edlevel_dict = edlevel_dict_init(edlevel_step) for align_obj in align_parse(alignment_file): if (spec_ids_list[0] not in align_obj.species_list) or (spec_ids_list[1] not in align_obj.species_list): continue align_length = len(align_obj.align_dict[spec_ids_list[0]]) for i in range(align_length): if align_obj.align_dict[spec_ids_list[0]][i] != 'A': continue if align_obj.align_dict[spec_ids_list[1]][i] == "-": continue for j in edlevel_dict.keys(): edlevel_dict[j].n_A += 1 if align_obj.align_dict[spec_ids_list[1]][i] == 'G': for j in edlevel_dict.keys(): edlevel_dict[j].A2G += 1 elif align_obj.align_dict[spec_ids_list[1]][i] in ('C','T'): for j in edlevel_dict.keys(): edlevel_dict[j].A2CT += 1 if not align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): continue if select_sites == 'syn' and align_obj.edinfo_dict[spec_ids_list[0]][i+1].codon_change != 'syn': continue if select_sites == 'nsyn' and align_obj.edinfo_dict[spec_ids_list[0]][i+1].codon_change == 'syn': continue edlevel = align_obj.edinfo_dict[spec_ids_list[0]][i+1].edlevel for j in edlevel_dict.keys(): if j < edlevel: edlevel_dict[j].n_A_ed += 1 if align_obj.edinfo_dict[spec_ids_list[1]].get(i+1): continue if align_obj.align_dict[spec_ids_list[1]][i] == 'G': for j in edlevel_dict.keys(): if j < edlevel: edlevel_dict[j].A_ed2G += 1 elif align_obj.align_dict[spec_ids_list[1]][i] in ('C','T'): for j in edlevel_dict.keys(): if j < edlevel: edlevel_dict[j].A_ed2CT += 1 print "spec_id\tedlevel_thr\tp_Aed2G\tp_A2G\tp_A_ed2CT\tp_A2CT\trf_G\trf_G_low\trf_G_high\trf_CT\trf_CT_low\trf_CT_high" for i in sorted(edlevel_dict.keys()): try: edlevel_dict[i].p_and_confint_print(confint_p, spec_ids_str, permut_n) except ZeroDivisionError: pass
def build_subst_mat(alignment_file, outgr_str, spec_str, flag, orf_crd_table): outgroup_ref = outgr_str.split(',') specs = spec_str.split(',') orf_crd_dict = make_orf_crd_dict(orf_crd_table) subst_mat = subst_mat_init() n_E = 0 n_A = 0 n_G = 0 n_Y = 0 n_E_anc = 0 n_A_anc = 0 n_G_anc = 0 n_Y_anc = 0 n_E2G = 0 n_A2G = 0 n_E2Y = 0 n_A2Y = 0 n_G2E = 0 n_G2A = 0 n_Y2E = 0 n_Y2A = 0 for align_obj in align_parse(alignment_file): if specs[0] not in align_obj.species_list: continue if specs[1] not in align_obj.species_list: continue if (outgroup_ref[0] not in align_obj.species_list) and (outgroup_ref[1] not in align_obj.species_list): continue outgroup = [spec_id for spec_id in outgroup_ref if spec_id in align_obj.species_list] seq_id = align_obj.seqinfo_dict[specs[0]].keys()[0] orf_crds = orf_crd_dict[seq_id] align_length = len(align_obj.align_dict[specs[0]]) for i in range(orf_crds[0], orf_crds[1]): if len(outgroup) == 1: outgroup_let = align_obj.align_dict[outgroup[0]][i] if align_obj.edinfo_dict[outgroup[0]].get(i+1): outgroup_let = 'E' elif len(outgroup) == 2: let1 = align_obj.align_dict[outgroup[0]][i] if align_obj.edinfo_dict[outgroup[0]].get(i+1): let1 = 'E' let2 = align_obj.align_dict[outgroup[1]][i] if align_obj.edinfo_dict[outgroup[1]].get(i+1): let2 = 'E' outgroup_let = anc_letter(let1, let2) if outgroup_let == '-': continue let1 = align_obj.align_dict[specs[0]][i] dna_let1 = let1 if align_obj.edinfo_dict[specs[0]].get(i+1): let1 = 'E' let2 = align_obj.align_dict[specs[1]][i] if align_obj.edinfo_dict[specs[1]].get(i+1): let2 = 'E' if (let1 == '-') or (let2 == '-'): continue if outgroup_let != let2: continue subst_mat[outgroup_let][let1] += 1 if flag == "all": if let1 == 'A': n_A += 1 if outgroup_let == 'G': n_G2A += 1 elif outgroup_let in ('C', 'T'): n_Y2A += 1 elif let1 == 'E': n_E += 1 if outgroup_let == 'G': n_G2E += 1 elif outgroup_let in ('C', 'T'): n_Y2E += 1 elif let1 == 'G': n_G += 1 elif let1 in ('C', 'T'): n_Y += 1 if outgroup_let == 'A': n_A_anc += 1 if let1 == 'G': n_A2G += 1 elif let1 in ('C', 'T'): n_A2Y += 1 elif outgroup_let == 'E': n_E_anc += 1 if let1 == 'G': n_E2G += 1 elif let1 in ('C', 'T'): n_E2Y += 1 elif outgroup_let == 'G': n_G_anc += 1 elif outgroup_let in ('C', 'T'): n_Y_anc += 1 continue codon, aacid, shift = get_codon(align_obj, specs[0], orf_crds[0], i) codon_new = codon[:] codon_new[shift] = 'A' aacid_new = str(Seq(''.join(codon_new)).translate()) codon_new_G = codon[:] codon_new_G[shift] = 'G' aacid_new_G = str(Seq(''.join(codon_new_G)).translate()) if aacid_new != aacid_new_G and flag == "syn": continue elif aacid_new == aacid_new_G and flag == "nsyn": continue if let1 == 'A': n_A += 1 if outgroup_let == 'G': n_G2A += 1 elif outgroup_let in ('C', 'T'): n_Y2A += 1 elif let1 == 'E': n_E += 1 if outgroup_let == 'G': n_G2E += 1 elif outgroup_let in ('C', 'T'): n_Y2E += 1 elif let1 == 'G': n_G += 1 elif let1 in ('C', 'T'): n_Y += 1 if outgroup_let == 'A': n_A_anc += 1 if let1 == 'G': n_A2G += 1 elif let1 in ('C', 'T'): n_A2Y += 1 elif outgroup_let == 'E': n_E_anc += 1 if let1 == 'G': n_E2G += 1 elif let1 in ('C', 'T'): n_E2Y += 1 elif outgroup_let == 'G': n_G_anc += 1 elif outgroup_let in ('C', 'T'): n_Y_anc += 1 print "@\tn_E\tn_A\tn_G\tn_Y\tn_E_anc\tn_A_anc\tn_G_anc\tn_Y_anc\tn_E2G\tn_A2G\tn_E2Y\tn_A2Y\tn_G2E\tn_G2A\tn_Y2E\tn_Y2A" print '*', n_E, n_A, n_G, n_Y, n_E_anc, n_A_anc, n_G_anc, n_Y_anc, n_E2G, n_A2G, n_E2Y, n_A2Y, n_G2E, n_G2A, n_Y2E, n_Y2A print float(n_E2G)/n_E_anc, float(n_A2G)/n_A_anc, (float(n_E2G)/n_E_anc)/(float(n_A2G)/n_A_anc) print float(n_E2Y)/n_E_anc, float(n_A2Y)/n_A_anc, (float(n_E2Y)/n_E_anc)/(float(n_A2Y)/n_A_anc) print ((float(n_E2G)/n_E_anc)/(float(n_A2G)/n_A_anc))/((float(n_E2Y)/n_E_anc)/(float(n_A2Y)/n_A_anc)) print float(n_G2E)/n_G_anc, float(n_G2A)/n_G_anc, (float(n_G2E)/n_G_anc)/(float(n_G2A)/n_G_anc) print float(n_Y2E)/n_Y_anc, float(n_Y2A)/n_Y_anc, (float(n_Y2E)/n_Y_anc)/(float(n_Y2A)/n_Y_anc) print ((float(n_G2E)/n_G_anc)/(float(n_G2A)/n_G_anc))/((float(n_Y2E)/n_Y_anc)/(float(n_Y2A)/n_Y_anc)) print (float(n_A2G))/n_A_anc print (float(n_A2Y))/n_A_anc print (float(n_G2A))/n_G_anc print (float(n_Y2A))/n_Y_anc subst_mat_print(subst_mat) # print control_str return subst_mat
def build_subst_mat(alignment_file, outgr_str, spec_str, select_sites, orf_crd_table): outgroup_ref = outgr_str.split(',') specs = spec_str.split(',') subst_mat = subst_mat_init() orf_crd_dict = make_orf_crd_dict(orf_crd_table) # control_str = "" # count = 0 for align_obj in align_parse(alignment_file): if specs[0] not in align_obj.species_list: continue if specs[1] not in align_obj.species_list: continue if (outgroup_ref[0] not in align_obj.species_list) and (outgroup_ref[1] not in align_obj.species_list): continue # print align_obj.seqinfo_dict seq_id = align_obj.seqinfo_dict[specs[0]].keys()[0] orf_crds = orf_crd_dict[seq_id] outgroup = [spec_id for spec_id in outgroup_ref if spec_id in align_obj.species_list] align_length = len(align_obj.align_dict[specs[0]]) for i in range(align_length): if len(outgroup) == 1: outgroup_let = align_obj.align_dict[outgroup[0]][i] if align_obj.edinfo_dict[outgroup[0]].get(i+1): outgroup_let = codon_change_A_or_E(select_sites, align_obj.edinfo_dict[outgroup[0]][i+1].codon_change) elif len(outgroup) == 2: let1 = align_obj.align_dict[outgroup[0]][i] if align_obj.edinfo_dict[outgroup[0]].get(i+1): let1 = codon_change_A_or_E(select_sites, align_obj.edinfo_dict[outgroup[0]][i+1].codon_change) let2 = align_obj.align_dict[outgroup[1]][i] if align_obj.edinfo_dict[outgroup[1]].get(i+1): let2 = codon_change_A_or_E(select_sites, align_obj.edinfo_dict[outgroup[1]][i+1].codon_change) outgroup_let = anc_letter(let1, let2) if outgroup_let == '-': continue let1 = align_obj.align_dict[specs[0]][i] if align_obj.edinfo_dict[specs[0]].get(i+1): let1 = codon_change_A_or_E(select_sites, align_obj.edinfo_dict[specs[0]][i+1].codon_change) let2 = align_obj.align_dict[specs[1]][i] if align_obj.edinfo_dict[specs[1]].get(i+1): let2 = codon_change_A_or_E(select_sites, align_obj.edinfo_dict[specs[1]][i+1].codon_change) if (let1 == '-') or (let2 == '-'): continue if outgroup_let == let2: if select_sites == "all": subst_mat[outgroup_let][let1] += 1 continue if i < orf_crds[0] or i >= orf_crds[1]: inorf = False else: inorf = True # if not inorf and select_sites == "syn": # subst_mat[outgroup_let][let1] += 1 # continue if not inorf: continue codon_start = orf_crds[0] + ((i - orf_crds[0])/3)*3 codon_end = codon_start + 3 codon = align_obj.align_dict[specs[0]][codon_start:codon_end] codon_pos = (i - orf_crds[0]) % 3 codon = list(codon) for j in range(3): if align_obj.edinfo_dict[specs[0]].get(codon_start + j + 1): codon[j] = 'A' codon_anc = codon[:] if outgroup_let == 'E': codon_anc[codon_pos] = 'A' else: codon_anc[codon_pos] = outgroup_let codon_anc = ''.join(codon_anc) codon = ''.join(codon) aacid = str(Seq(codon).translate()) aacid_anc = str(Seq(codon_anc).translate()) if aacid == aacid_anc and select_sites == "syn": subst_mat[outgroup_let][let1] += 1 if aacid != aacid_anc and select_sites == "nsyn": subst_mat[outgroup_let][let1] += 1 # if let1 == 'E' or outgroup_let == 'E': # print codon, codon_anc, aacid, aacid_anc # print count subst_mat_print(subst_mat) # print control_str return subst_mat
def main(alignment_file, edlevel_step, confint_p, spec_ids_str, permut_n, orf_crd_table, nucl_list): nucl_list = list(nucl_list) spec_ids_list = spec_ids_str.split(',') edlevel_dict = edlevel_dict_init(edlevel_step) orf_crd_dict = make_orf_crd_dict(orf_crd_table) for align_obj in align_parse(alignment_file): if (spec_ids_list[0] not in align_obj.species_list) or (spec_ids_list[1] not in align_obj.species_list): continue align_length = len(align_obj.align_dict[spec_ids_list[0]]) seq_id = align_obj.seqinfo_dict[spec_ids_list[0]].keys()[0] orf_crds = orf_crd_dict[seq_id] # print orf_crds, len(align_obj.align_dict[spec_ids_list[0]]) for i in range(orf_crds[0], orf_crds[1]): if align_obj.align_dict[spec_ids_list[0]][i] != 'A': if not align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): continue if align_obj.align_dict[spec_ids_list[1]][i] == "-": continue if align_obj.edinfo_dict[spec_ids_list[1]].get(i+1): continue if align_obj.align_dict[spec_ids_list[1]][i] in nucl_list: codon_start = orf_crds[0] + ((i - orf_crds[0])/3)*3 codon_end = codon_start + 3 shift = (i - orf_crds[0]) % 3 codon = align_obj.align_dict[spec_ids_list[0]][codon_start:codon_end] codon = list(codon) for j in range(3): if align_obj.edinfo_dict[spec_ids_list[0]].get(codon_start + j + 1): codon[j] = 'A' codon_subst = codon[:] codon_subst[shift] = align_obj.align_dict[spec_ids_list[1]][i] codon = ''.join(codon) codon_subst = ''.join(codon_subst) aacid = str(Seq(codon).translate()) aacid_subst = str(Seq(codon_subst).translate()) if align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): edlevel = align_obj.edinfo_dict[spec_ids_list[0]][i+1].edlevel l = len(edlevel_dict.keys()) edlevels = sorted(edlevel_dict.keys()) for j in range(1,l): if (edlevel < edlevels[j]) and (edlevel >= edlevels[j - 1]): if aacid == aacid_subst: edlevel_dict[edlevels[j - 1]].dS_e += 1 else: edlevel_dict[edlevels[j - 1]].dN_e += 1 else: for j in edlevel_dict.keys(): if aacid == aacid_subst: edlevel_dict[j].dS += 1 else: edlevel_dict[j].dN += 1 # print "spec_id\tedlevel_threshold\tdN_e\tdN\tdS_e\tdS\tdN_e_dS_e\dN_dS\tdN_dS_n\tdN_dS_n_low\tdN_dS_n_high" for i in sorted(edlevel_dict.keys()): try: edlevel_dict[i].p_and_confint_print(confint_p, spec_ids_str, permut_n) except ZeroDivisionError: pass
def build_subst_mat(alignment_file, confint_p, outgr_str, spec_str, select_sites): outgroup_ref = outgr_str.split(',') specs = spec_str.split(',') subst_mat = subst_mat_init() # control_str = "" # count = 0 for align_obj in align_parse(alignment_file): if specs[0] not in align_obj.species_list: continue if specs[1] not in align_obj.species_list: continue if (outgroup_ref[0] not in align_obj.species_list) and ( outgroup_ref[1] not in align_obj.species_list): continue outgroup = [ spec_id for spec_id in outgroup_ref if spec_id in align_obj.species_list ] align_length = len(align_obj.align_dict[specs[0]]) for i in range(align_length): if len(outgroup) == 1: outgroup_let = align_obj.align_dict[outgroup[0]][i] if align_obj.edinfo_dict[outgroup[0]].get(i + 1): outgroup_let = codon_change_A_or_E( select_sites, align_obj.edinfo_dict[outgroup[0]][i + 1].codon_change) elif len(outgroup) == 2: let1 = align_obj.align_dict[outgroup[0]][i] if align_obj.edinfo_dict[outgroup[0]].get(i + 1): let1 = codon_change_A_or_E( select_sites, align_obj.edinfo_dict[outgroup[0]][i + 1].codon_change) let2 = align_obj.align_dict[outgroup[1]][i] if align_obj.edinfo_dict[outgroup[1]].get(i + 1): let2 = codon_change_A_or_E( select_sites, align_obj.edinfo_dict[outgroup[1]][i + 1].codon_change) outgroup_let = anc_letter(let1, let2) if outgroup_let == '-': continue let1 = align_obj.align_dict[specs[0]][i] if align_obj.edinfo_dict[specs[0]].get(i + 1): let1 = codon_change_A_or_E( select_sites, align_obj.edinfo_dict[specs[0]][i + 1].codon_change) let2 = align_obj.align_dict[specs[1]][i] if align_obj.edinfo_dict[specs[1]].get(i + 1): let2 = codon_change_A_or_E( select_sites, align_obj.edinfo_dict[specs[1]][i + 1].codon_change) if (let1 == '-') or (let2 == '-'): continue if outgroup_let == let2: subst_mat[outgroup_let][let1] += 1 # print count subst_mat_print(subst_mat) # print control_str return subst_mat
def main(alignment_file, edlevel_step, confint_p, spec_ids_str, permut_n, syn_nsyn_esites, syn_nsyn_asites, orf_crd_table): spec_ids_list = spec_ids_str.split(',') edlevel_dict = edlevel_dict_init(edlevel_step) orf_crd_dict = make_orf_crd_dict(orf_crd_table) for align_obj in align_parse(alignment_file): if (spec_ids_list[0] not in align_obj.species_list) or ( spec_ids_list[1] not in align_obj.species_list): continue # print spec_ids_list align_length = len(align_obj.align_dict[spec_ids_list[0]]) seq_id = align_obj.seqinfo_dict[spec_ids_list[0]].keys()[0] orf_crds = orf_crd_dict[seq_id] for i in range(orf_crds[0], orf_crds[1]): edsite = False let_1 = align_obj.align_dict[spec_ids_list[0]][i] let_2 = align_obj.align_dict[spec_ids_list[1]][i] if let_2 == "-": continue if let_1 != "A": if not align_obj.edinfo_dict[spec_ids_list[0]].get(i + 1): continue else: let_1 = 'A' if align_obj.edinfo_dict[spec_ids_list[0]].get(i + 1): let_1 = 'A' edsite = True if align_obj.edinfo_dict[spec_ids_list[1]].get(i + 1): let_2 = 'A' if edsite: edlevel = align_obj.edinfo_dict[spec_ids_list[0]][i + 1].edlevel if syn_nsyn_esites == 'syn' and align_obj.edinfo_dict[ spec_ids_list[0]][i + 1].codon_change != 'syn': continue if syn_nsyn_esites == 'nsyn' and align_obj.edinfo_dict[ spec_ids_list[0]][i + 1].codon_change == 'syn': continue for j in edlevel_dict.keys(): if j <= edlevel: edlevel_dict[j].n_E += 1 if let_2 == 'G': for j in edlevel_dict.keys(): if j <= edlevel: edlevel_dict[j].E2G += 1 elif let_2 in ('C', 'T'): for j in edlevel_dict.keys(): if j <= edlevel: edlevel_dict[j].E2Y += 1 else: codon, aacid, shift = get_codon(align_obj, spec_ids_list[0], orf_crds[0], i) codon_new = codon[:] codon_new[shift] = let_2 aacid_new = str(Seq(''.join(codon_new)).translate()) codon_new_2_G = codon[:] codon_new_2_G[shift] = 'G' aacid_new_2_G = str(Seq(''.join(codon_new_2_G)).translate()) # codon_new_2_C = codon[:] # codon_new_2_C[shift] = 'C' # aacid_new_2_C = str(Seq(''.join(codon_new_2_C)).translate()) # codon_new_2_T = codon[:] # codon_new_2_T[shift] = 'T' # aacid_new_2_T = str(Seq(''.join(codon_new_2_T)).translate()) if aacid == aacid_new_2_G: syn = True else: syn = False if syn_nsyn_asites == 'syn' and not syn: continue elif syn_nsyn_asites == 'nsyn' and syn: continue elif syn and syn_nsyn_asites == 'syn': for j in edlevel_dict.keys(): edlevel_dict[j].n_AG += 1 edlevel_dict[j].n_AY += 1 elif not syn and syn_nsyn_asites == 'nsyn': for j in edlevel_dict.keys(): edlevel_dict[j].n_AG += 1 edlevel_dict[j].n_AY += 1 elif syn_nsyn_asites == 'all': for j in edlevel_dict.keys(): edlevel_dict[j].n_AG += 1 # if aacid == aacid_new_2_C and syn_nsyn_asites == 'syn': # if aacid == aacid_new_2_G: # for j in edlevel_dict.keys(): # edlevel_dict[j].n_AY += 1 # elif aacid != aacid_new_2_C and syn_nsyn_asites == 'nsyn': # if aacid != aacid_new_2_G: # for j in edlevel_dict.keys(): # edlevel_dict[j].n_AY += 1 # # if aacid == aacid_new_2_T and syn_nsyn_asites == 'syn': # if aacid == aacid_new_2_G: # for j in edlevel_dict.keys(): # edlevel_dict[j].n_AY += 1 # elif aacid != aacid_new_2_T and syn_nsyn_asites == 'nsyn': # if aacid != aacid_new_2_G: # for j in edlevel_dict.keys(): # edlevel_dict[j].n_AY += 1 if let_2 == 'G': for j in edlevel_dict.keys(): edlevel_dict[j].A2G += 1 elif let_2 in ('C', 'T'): for j in edlevel_dict.keys(): edlevel_dict[j].A2Y += 1 print "spec_id\tedlevel_thr\tp_E2G\tp_A2G\tp_E2Y\tp_A2Y\trf_G\trf_G_low\trf_G_high\trf_Y\trf_Y_low\trf_Y_high" for i in sorted(edlevel_dict.keys()): try: edlevel_dict[i].p_and_confint_print(confint_p, spec_ids_str, permut_n, syn_nsyn_asites) except ZeroDivisionError: pass
def main(alignment_file, edlevel_step, confint_p, spec_ids_str, permut_n, orf_crd_table): spec_ids_list = spec_ids_str.split(',') edlevel_dict = edlevel_dict_init(edlevel_step) orf_crd_dict = make_orf_crd_dict(orf_crd_table) codon_usage_dict = dict() for align_obj in align_parse(alignment_file): if (spec_ids_list[0] not in align_obj.species_list) or (spec_ids_list[1] not in align_obj.species_list): continue align_length = len(align_obj.align_dict[spec_ids_list[0]]) seq_id = align_obj.seqinfo_dict[spec_ids_list[0]].keys()[0] orf_crds = orf_crd_dict[seq_id] for i in range(orf_crds[0], orf_crds[1]): edsite = False let_1 = align_obj.align_dict[spec_ids_list[0]][i] let_2 = align_obj.align_dict[spec_ids_list[1]][i] if let_2 == "-": continue if let_1 != "A": if not align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): continue else: let_1 = 'A' if align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): let_1 = 'A' edsite = True if align_obj.edinfo_dict[spec_ids_list[1]].get(i+1): let_2 = 'A' if edsite: edlevel = align_obj.edinfo_dict[spec_ids_list[0]][i+1].edlevel if align_obj.edinfo_dict[spec_ids_list[0]][i+1].codon_change == 'syn': continue for j in edlevel_dict.keys(): if j <= edlevel: edlevel_dict[j].n_E_nsyn += 1 if let_2 == 'G': for j in edlevel_dict.keys(): if j <= edlevel: edlevel_dict[j].E_nsyn2G += 1 else: if let_2 != 'G' and let_2 != 'A': continue codon, aacid, shift = get_codon(align_obj, spec_ids_list[0], orf_crds[0], i) str_codon = ''.join(codon) if codon_usage_dict.get(str_codon): codon_usage_dict[str_codon] += 1 else: codon_usage_dict[str_codon] = 1 codon_new = codon[:] codon_new[shift] = let_2 aacid_new = str(Seq(''.join(codon_new)).translate()) codon_new_2_G = codon[:] codon_new_2_G[shift] = 'G' aacid_new_2_G = str(Seq(''.join(codon_new_2_G)).translate()) if aacid != aacid_new_2_G: continue for j in edlevel_dict.keys(): edlevel_dict[j].n_A_syn += 1 if let_2 == 'G': for j in edlevel_dict.keys(): edlevel_dict[j].A_syn2G += 1 alpha_nsyn, alpha_syn = alpha_count(codon_usage_dict) for i in sorted(edlevel_dict.keys()): edlevel_dict[i].alpha_nsyn = alpha_nsyn edlevel_dict[i].alpha_syn = alpha_syn print "spec_id\tedlevel_thr\tE_nsyn2G\tn_E_nsyn\tA_syn2G\tn_A_syn\talpha_nsyn\talpha_syn\tcomb_coeff\tmidvalue_dnds\tdnds_low\tdnds_high" for i in sorted(edlevel_dict.keys()): try: edlevel_dict[i].p_and_confint_print(confint_p, spec_ids_str, permut_n) except ZeroDivisionError: pass
def main(alignment_file, edlevel_step, confint_p, spec_ids_str, permut_n, orf_crd_table, nucl_list): global aminoacids nucl_list = list(nucl_list) aminoacids = list(aminoacids) spec_ids_list = spec_ids_str.split(',') edlevel_dict = edlevel_dict_init(edlevel_step) edlevels = sorted(edlevel_dict.keys()) l = len(edlevels) orf_crd_dict = make_orf_crd_dict(orf_crd_table) for align_obj in align_parse(alignment_file): if (spec_ids_list[0] not in align_obj.species_list) or (spec_ids_list[1] not in align_obj.species_list): continue align_length = len(align_obj.align_dict[spec_ids_list[0]]) seq_id = align_obj.seqinfo_dict[spec_ids_list[0]].keys()[0] orf_crds = orf_crd_dict[seq_id] for i in range(orf_crds[0], orf_crds[1]): if align_obj.align_dict[spec_ids_list[0]][i] != 'A': if not align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): continue if align_obj.align_dict[spec_ids_list[1]][i] == "-": continue if align_obj.edinfo_dict[spec_ids_list[1]].get(i+1): let2 = 'A' else: let2 = align_obj.align_dict[spec_ids_list[1]][i] let1 = 'A' codon, aacid, shift = get_codon(align_obj, spec_ids_list[0], orf_crds[0], i) syn = True for nucl in nucl_list: codon_new = codon[:] codon_new[shift] = nucl if aacid != str(Seq(''.join(codon_new)).translate()): syn = False if syn and aacid not in aminoacids: continue if align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): edlevel = align_obj.edinfo_dict[spec_ids_list[0]][i+1].edlevel if syn: for j in range(1, l): if (edlevel < edlevels[j]) and (edlevel >= edlevels[j - 1]): edlevel_dict[edlevels[j - 1]].total_S_E += 1 else: for j in range(1, l): if (edlevel < edlevels[j]) and (edlevel >= edlevels[j - 1]): edlevel_dict[edlevels[j - 1]].total_N_E += 1 else: if syn: for j in range(1, l): edlevel_dict[edlevels[j - 1]].total_S_A += 1 else: for j in range(1, l): edlevel_dict[edlevels[j - 1]].total_N_A += 1 if let1 == let2: continue if let2 in nucl_list: codon_new = codon[:] codon_new[shift] = let2 aacid_new = str(Seq(''.join(codon_new)).translate()) if aacid == aacid_new: syn = True else: syn = False if align_obj.edinfo_dict[spec_ids_list[0]].get(i+1): edlevel = align_obj.edinfo_dict[spec_ids_list[0]][i+1].edlevel if syn: for j in range(1, l): if (edlevel < edlevels[j]) and (edlevel >= edlevels[j - 1]): edlevel_dict[edlevels[j - 1]].dS_e += 1 else: for j in range(1, l): if (edlevel < edlevels[j]) and (edlevel >= edlevels[j - 1]): edlevel_dict[edlevels[j - 1]].dN_e += 1 else: if syn: for j in range(1, l): edlevel_dict[edlevels[j - 1]].dS += 1 else: for j in range(1, l): edlevel_dict[edlevels[j - 1]].dN += 1 s = edlevel_dict[sorted(edlevel_dict.keys())[0]] dnds_a = (float(s.dN)/s.total_N_A)/(float(s.dS)/s.total_S_A) for i in sorted(edlevel_dict.keys()): edlevel_dict[i].p_and_confint_print(confint_p, spec_ids_str, permut_n, dnds_a)