def run_evol_py(tree, alg, branch_model, site_models, workir='data/evol_output', tool_dir="ete3_apps/bin"): print(tree, alg, branch_model, site_models) builtin_apps_path = None builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir) tree = EvolTree(tree, binpath=builtin_apps_path) tree.link_to_alignment(alg) tree.workdir = workir ###branch model if branch_model: branch_model = str(branch_model) tree.run_model(branch_model) print(tree.get_evol_model(branch_model)) ### site model for site_model in site_models: tree.run_model(site_model) #tree.run_model('SLR.lele') global evol_output_dir, final_evol_tree evol_output_dir = workir final_evol_tree = evol_output_dir + '/tree_evol_result.png' tree.render(final_evol_tree, layout=evol_clean_layout, histfaces=site_models) return tree
def count_omega(align_file, gene_name): print(gene_name) tree = EvolTree(tree_file) tree.link_to_alignment(align_file) # # #free branch ratio count tree.run_model('fb') fb_results = tree.get_evol_model('fb') print(fb_results) with open(temp, 'w') as temp_file: temp_file.write(str(fb_results)) write_in_table(gene_name)
def run_codeml(mark_id, aln_file, tree_file, sleep): logger.info('sub-process: {0}'.format(str(mark_id))) time.sleep(round(sleep / args.threads, 2)) run_dir = os.path.join(output_dir, str(mark_id)) os.makedirs(run_dir) tree = EvolTree(tree_file, format=0) tree.link_to_alignment(aln_file) tree.run_model('M0') tree.workdir = run_dir tree.mark_tree([mark_id], marks=['#1']) tree.run_model('bsA.' + str(mark_id)) tree.run_model('bsA1.' + str(mark_id)) ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id)) rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0') bsA = tree.get_evol_model('bsA.' + str(mark_id)) p_bsA = bsA.classes['proportions'][2] wfrg2a = bsA.classes['foreground w'][2] if ps < 0.05 and float(wfrg2a) > 1: result = [mark_id, ps, rx, p_bsA, 'positive selection'] elif rx < 0.05 and ps >= 0.05: result = [mark_id, ps, rx, p_bsA, 'relaxation'] else: result = [mark_id, ps, rx, p_bsA, 'no signal'] return result
print ('running model M1') tree.run_model ('M1') print ('running model M2') tree.run_model ('M2') print ('\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1'))) #tree.show() print ('by default the hist represented is this one:') tree.show (histfaces=['M2']) print ('but we can choose between many others...') model2 = tree.get_evol_model ('M2') col2 = {'NS' : 'black', 'RX' : 'black', 'RX+': 'black', 'CN' : 'black', 'CN+': 'black', 'PS' : 'black', 'PS+': 'black'} model2.set_histface (up=False, kind='curve', colors=col2, ylim=[0,4], hlines = [2.5, 1.0, 4.0, 0.5], header = 'Many lines, error boxes, background black', hlines_col=['orange', 'yellow', 'red', 'cyan'], errors=True) tree.show(histfaces=['M2']) model2.set_histface (up=False, kind='stick', hlines = [1.0,0.3], hlines_col=['black','grey']) tree.show(histfaces=['M2']) col = {'NS' : 'grey', 'RX' : 'black',
continue else: evotree.link_to_alignment(subfasta) workdirname = './codeml_' + "__".join(closest_seq_ids) evotree.workdir = workdirname list_of_tempdirs.append(workdirname) # mark the foreground branch foreground_leafnode = evotree & seqid # print (seqid) # print(foreground_leafnode.node_id) # print (evotree.write()) evotree.mark_tree([foreground_leafnode.node_id], ['#1']) # print (evotree.write()) evotree.run_model('b_free.run') b_free_fit = evotree.get_evol_model('b_free.run') out_branches_dict = b_free_fit.branches for b in out_branches_dict: if out_branches_dict[b]["mark"] == " #1": # check if there are at least 1 synonymous substitutions expected on this branch... otherwise not very meaningful to estimate omega (it will be very high). if out_branches_dict[b]["S"] * out_branches_dict[b][ "dS"] >= 1.0: omega = out_branches_dict[b]["w"] else: omega = "NA" break omega_list.append(omega) numeric_omegas = [float(x) for x in omega_list if not x == "NA"] try: avg_omega = sum(numeric_omegas) / float(len(numeric_omegas)) except ZeroDivisionError:
LRT between b_free and M0 (that is one or two rates of omega value) p-value ofthis comparison is:''') print (tree.get_most_likely ('b_free.137', 'M0')) input (''' Now test if foreground rate is significantly different of 1. (b_free with significantly better likelihood than b_neut) if significantly different, and higher than one, we will be under positive selection, if different and lower than 1 we will be under negative selection. And finally if models are not significantly different we should accept null hypothesis that omega value on marked branches is equal to 1, what would be a signal of relaxation. p-value for difference in rates between marked branches and the rest:''') print (tree.get_most_likely ('b_free.137', 'M0')) print ('p-value representing significance that omega is different of 1:') print (tree.get_most_likely ('b_free.137', 'b_neut.137')) print ('value of omega in marked branch (frg branch):') b_free = tree.get_evol_model ('b_free.137') print (b_free.branches[1]['w']) print ('and value of omega for background: ') print (b_free.branches[2]['w']) print ('we will now run 2 branch models over this tree, one letting the omega \nvalue of foreground species to be free, and the other fixing it at one.\n') print ("The End.")
raw_input ('\n alignment loaded, hit some key to see.\n') tree.show() print ''' we will run free-ratio model that is one of models available through function run_model: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ''' print tree.run_model.__doc__ +'\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' tree.run_model ('fb.example') raw_input ('free-ratio model runned, all results are store in a Model object.') fb = tree.get_evol_model('fb.example') print 'Have a look to the parameters used to run this model on codeml: ' print fb.get_ctrl_string() raw_input ('hit some key...') print 'Have a look to run message of codeml: ' print fb.run raw_input ('hit some key...') print 'Have a look to log likelihood value of this model, and number of parameters:' print 'lnL: %s and np: %s' % (fb.lnL, fb.np) raw_input ('hit some key...') raw_input ('finally have a look to two layouts available to display free-ratio:')
raw_input('''Now we can do comparisons... Compare first if we have one or 2 rates of evolution among phylogeny. LRT between b_free and M0 (that is one or two rates of omega value) p-value ofthis comparison is:''') print tree.get_most_likely('b_free.137', 'M0') raw_input(''' Now test if foreground rate is significantly different of 1. (b_free with significantly better likelihood than b_neut) if significantly different, and higher than one, we will be under positive selection, if different and lower than 1 we will be under negative selection. And finally if models are not significantly different we should accept null hypothesis that omega value on marked branches is equal to 1, what would be a signal of relaxation. p-value for difference in rates between marked branches and the rest:''') print tree.get_most_likely('b_free.137', 'M0') print 'p-value representing significance that omega is different of 1:' print tree.get_most_likely('b_free.137', 'b_neut.137') print 'value of omega in marked branch (frg branch):' b_free = tree.get_evol_model('b_free.137') print b_free.branches[1]['w'] print 'and value of omega for background: ' print b_free.branches[2]['w'] print 'we will now run 2 branch models over this tree, one letting the omega \nvalue of foreground species to be free, and the other fixing it at one.\n' print "The End."
print ('\n---------\nNow working with leaf ' + leaf.name) tree.mark_tree([leaf.node_id], marks=['#1']) print (tree.write()) # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete3.../bsA.. directory) print ('running model bsA and bsA1') tree.run_model('bsA.'+ leaf.name) tree.run_model('bsA1.' + leaf.name) print ('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.'+ leaf.name) rx = tree.get_most_likely('bsA1.'+ leaf.name, 'M0') print (str(ps)) print ('p-value of relaxation for sites on this branch is: ') print (str(rx)) model = tree.get_evol_model("bsA." + leaf.name) if ps < 0.05 and float(model.classes['foreground w'][2]) > 1: print ('we have positive selection on sites on this branch') tree.show(histfaces=['bsA.' + leaf.name]) elif rx<0.05 and ps>=0.05: print ('we have relaxation on sites on this branch') else: print ('no signal detected on this branch, best fit for M0') print ('\nclean tree, remove marks') tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()), marks=[''] * len(tree.get_descendants()), verbose=True) # nothing working yet to get which sites are under positive selection/relaxation, # have to look at the main outfile or rst outfile print ('The End.')
06 Feb 2011 use slr to compute evolutionary rates """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ("data/S_example/alignment_S_measuring_evol.fasta") tree.run_model ('SLR') slr = tree.get_evol_model ('SLR') slr.set_histface (up=False, kind='curve',errors=True, hlines = [1.0,0.3], hlines_col=['black','grey']) tree.show (histfaces=['SLR'])
for initial_omega in [0.2, 0.7, 1.2]: if model == 'bsA1': initial_omega = 1.0 model_specifications = model + '.' + branch_estimation + '_' + \ str(initial_omega) + 'w' print 'Testing model ' + model + ' on ' + alignment_name + \ ' using starting branch length option ' + \ branch_estimation + ' and initial omega: ' + \ str(initial_omega) + 'w' if model == 'XX': tree.run_model(model_specifications, \ fix_blength=starting_branch_length_option, \ omega=initial_omega, NSsites=22, ncatG=3) # Here's the garbage I wrote to make sure that it parses the out files correctly tree.get_evol_model( model_specifications).properties['typ'] = 'branch-site' tree.get_evol_model(model_specifications)._load( model_specifications + '/out') else: tree.run_model(model_specifications, \ fix_blength=starting_branch_length_option, \ omega=initial_omega) current_model = tree.get_evol_model(model_specifications) print 'The fitting of model ' + model + ' on ' + alignment_name + \ ' using starting branch length option ' + \ branch_estimation + ' and initial omega: ' + \ str(initial_omega) + 'w, the likelihood was: ' + \ str(current_model.lnL) if current_model.lnL > best_lnL[model]: best_lnL[model] = current_model.lnL
print('\n---------\nNow working with leaf ' + leaf.name) tree.mark_tree([leaf.node_id], marks=['#1']) print(tree.write()) # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete3.../bsA.. directory) print('running model bsA and bsA1') tree.run_model('bsA.' + leaf.name) tree.run_model('bsA1.' + leaf.name) print('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.' + leaf.name) rx = tree.get_most_likely('bsA1.' + leaf.name, 'M0') print(str(ps)) print('p-value of relaxation for sites on this branch is: ') print(str(rx)) model = tree.get_evol_model("bsA." + leaf.name) if ps < 0.05 and float(model.classes['foreground w'][2]) > 1: print('we have positive selection on sites on this branch') tree.show(histfaces=['bsA.' + leaf.name]) elif rx < 0.05 and ps >= 0.05: print('we have relaxation on sites on this branch') else: print('no signal detected on this branch, best fit for M0') print('\nclean tree, remove marks') tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()), marks=[''] * len(tree.get_descendants()), verbose=True) # nothing working yet to get which sites are under positive selection/relaxation, # have to look at the main outfile or rst outfile
for leaf in chimaeriformes: tree.mark_tree([leaf.node_id], marks=["#1"]) #tree.run_model("bsA." + chimaeriformes) #tree.mark_tree([leaf.node_id], marks = ["#1"]) print("Running") print(tree.write()) tree.run_model('bsA.Chimaeriformes') tree.run_model("bsA1.Chimaeriformes") print('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes') print(str(ps)) rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0') print(str(rx)) model = tree.get_evol_model("bsA.Chimaeriformes") if ps < 0.05 and float(model.classes['foreground w'][2]) > 1: print('we have positive selection on sites on this branch') tree.show(histfaces=['bsA1.Chimaeriformes']) elif rx < 0.05 and ps >= 0.05: print('we have relaxation on sites on this branch') else: print('no signal detected on this branch, best fit for M0') #tree.show(histfaces=['bsA1.']) for models in tree._models: print(tree.get_evol_model(models)) from _pickle import dump #out = open('my_tree.pik', 'w')
print( """ we will run free-ratio model that is one of models available through function run_model: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ """ ) print( tree.run_model.__doc__ + "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) tree.run_model("fb.example") input("free-ratio model runned, all results are store in a Model object.") fb = tree.get_evol_model("fb.example") print("Have a look to the parameters used to run this model on codeml: ") print(fb.get_ctrl_string()) input("hit some key...") print("Have a look to run message of codeml: ") print(fb.run) input("hit some key...") print("Have a look to log likelihood value of this model, and number of parameters:") print("lnL: %s and np: %s" % (fb.lnL, fb.np)) input("hit some key...") input("finally have a look to two layouts available to display free-ratio:")