__licence__ = "GPLv3" __version__ = "0.0" from ete2 import TreeStyle from ete2 import EvolTree from ete2 import faces tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta") print tree print "\n Running free-ratio model with calculation of ancestral sequences..." tree.run_model("fb_anc") # tree.link_to_evol_model('/tmp/ete2-codeml/fb_anc/out', 'fb_anc') I = TreeStyle() I.force_topology = False I.draw_aligned_faces_as_table = True I.draw_guiding_lines = True I.guiding_lines_type = 2 I.guiding_lines_color = "#CCCCCC" for n in sorted(tree.get_descendants() + [tree], key=lambda x: x.node_id): if n.is_leaf(): continue anc_face = faces.SequenceFace(n.sequence, "aa", fsize=10, bg_colors={}) I.aligned_foot.add_face(anc_face, 1) I.aligned_foot.add_face(faces.TextFace("node_id: #%d " % (n.node_id), fsize=8), 0) print "display result of bs_anc model, with ancestral amino acid sequences."
def run_branch_test(cluster_name, treefile, alignment, folder_temp, folder_plots): from ete2 import EvolTree from ete2.treeview.layouts import evol_clean_layout import os from collections import defaultdict import math from scipy.stats import chi2 print "Processing cluster: " + cluster_name tree = EvolTree(treefile) tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True) #Create temporal folder temp_cluster_folder = folder_temp + "/" + cluster_name if not os.path.exists(temp_cluster_folder): os.makedirs(temp_cluster_folder) tree.workdir = temp_cluster_folder #Run M0 as the null model tree.run_model("M0") #Look at the site selection on each branch printed_tree = 0 i = 0 #Output list with the results output_list = [] for node in tree.iter_descendants(): #Mark the tree for the leaf under analysis tree.mark_tree([node.node_id], marks=["#1"]) #Use the node id as folder name temp_leaf_name = str(node.node_id) print "Processing: " + cluster_name + " " + temp_leaf_name + " " + ",".join(node.get_leaf_names()) #Run computation of each model. #From the notes on ETE: # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete2.../bsA.. directory) tree.run_model("bsA." + temp_leaf_name) tree.run_model("bsA1." + temp_leaf_name) bsA = tree.get_evol_model("bsA." + temp_leaf_name) bsA1 = tree.get_evol_model("bsA1." + temp_leaf_name) ps_sites = defaultdict() total_sites = 0 sites_over_95 = 0 for s in range(len(bsA.sites['BEB']['aa'])): p_value_site = float(bsA.sites['BEB']['p2'][s]) if p_value_site > 0.50: ps_sites[s] = [bsA.sites['BEB']['aa'][s], bsA.sites['BEB']['p2'][s]] total_sites += 1 if p_value_site > 0.95: sites_over_95 += 1 #ps = float(tree.get_most_likely("bsA." + temp_leaf_name, "bsA1." + temp_leaf_name)) rx = float(tree.get_most_likely("bsA1." + temp_leaf_name, "M0")) lrt_value = 2 * math.fabs(bsA1.lnL - bsA.lnL) # LRT test value ps = 1 - chi2.cdf(lrt_value, 1) # p-value based on chi-square test_status = None #Evidence of positive selection in the branch omega_value = float(bsA.classes['foreground w'][2]) proportion_sites = float(bsA.classes['proportions'][2]) #Plot file plot_file = folder_plots + "/" + cluster_name if ps < 0.05 and omega_value > 1: #Save plots, both in jpg and svg of the clusters with evidence of positive selection test_status = "Positive" if printed_tree == 0: #tree.render(plot_file + ".svg", layout=evol_clean_layout) #tree.render(plot_file + ".jpg", layout=evol_clean_layout) printed_tree = 1 else: continue elif rx < 0.05 and ps >= 0.05: test_status = "Relaxed" else: #print "no signal" test_status = None #Remove marks on the tree tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()), marks=[''] * len(tree.get_descendants()), verbose=False) result_entry = [cluster_name, node.node_id, omega_value, proportion_sites, ps, test_status, total_sites, sites_over_95, ",".join(node.get_leaf_names())] # print result_entry #print ps_sites #node_results[node.node_id] = [result_entry, ps_sites] output_list = [result_entry, ps_sites] return output_list
def run_site_tests(cluster_name, treefile, alignment, folder_temp, folder_plots): from ete2 import EvolTree from ete2.treeview.layouts import evol_clean_layout import os from collections import defaultdict import math from scipy.stats import chi2 print "Processing cluster: " + cluster_name tree = EvolTree(treefile) tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True) # Create temporal folder temp_cluster_folder = folder_temp + "/" + cluster_name if not os.path.exists(temp_cluster_folder): os.makedirs(temp_cluster_folder) tree.workdir = temp_cluster_folder # Run M1 as the null model tree.run_model("M1") # Run M2 as the alternative model tree.run_model("M2") model1 = tree.get_evol_model("M1") model2 = tree.get_evol_model("M2") # Get the results of the model # Run the LRT test, using ETE # pval = tree.get_most_likely("M2", "M1") # Get the positive selected sites ps_sites = defaultdict() total_sites = 0 sites_over_95 = 0 for s in range(len(model2.sites["BEB"]["aa"])): p_value_site = float(model2.sites["BEB"]["p2"][s]) if p_value_site > 0.50: ps_sites[s] = [model2.sites["BEB"]["aa"][s], model2.sites["BEB"]["p2"][s]] total_sites += 1 if p_value_site > 0.95: sites_over_95 += 1 # LRT Test lrt_value = 2 * math.fabs(model1.lnL - model2.lnL) # LRT test value pval = 1 - chi2.cdf(lrt_value, 2) # p-value based on chi-square test_status = None # Evidence of positive selection in the branch omega_value = float(model2.classes["w"][2]) proportion_sites = float(model2.classes["proportions"][2]) # Plot file plot_file = folder_plots + "/" + cluster_name col2 = {"NS": "black", "RX": "black", "RX+": "black", "CN": "black", "CN+": "black", "PS": "black", "PS+": "black"} if pval < 0.05 and omega_value > 1: # Save plots, both in jpg and svg of the clusters with evidence of positive selection test_status = "Positive" model2.set_histface( up=False, kind="curve", colors=col2, ylim=[0, 4], hlines=[2.5, 1.0, 4.0, 0.5], hlines_col=["orange", "yellow", "red", "cyan"], errors=True, ) tree.render(plot_file + ".svg", layout=evol_clean_layout, histfaces=["M2"]) # tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2']) else: # print "no signal" test_status = None result_entry = [cluster_name, omega_value, proportion_sites, pval, test_status, total_sites, sites_over_95] # print result_entry # print ps_sites # node_results[node.node_id] = [result_entry, ps_sites] output_list = [result_entry, ps_sites] return output_list
__licence__ = "GPLv3" __version__ = "0.0" from ete2 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input ('\n tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n') print 'running model M1' tree.run_model ('M1') print 'running model M2' tree.run_model ('M2') print '\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1')) #tree.show() print 'by default the hist represented is this one:' tree.show (histfaces=['M2']) print 'but we can choose between many others...' model2 = tree.get_evol_model ('M2')
node.img_style ['bgcolor'] = '#ffaa00' tree.show() print '''now running branch-site models C and D that represents the addition of one class of sites in on specific branch. These models must be compared to null models M1 and M3. if branch-site models are detected to be significantly better, than, one class of site is evolving at different rate in the marked clade. ''' # TODO: re-enable model M3 print 'running branch-site C...' tree.run_model ('bsC.137') #print 'running branch-site D...' #tree.run_model ('bsD.137') print 'running M1 (all branches have the save value of omega)...' tree.run_model ('M1') #print 'running M3 (all branches have the save value of omega)...' #tree.run_model ('M3') print '''p-value that, in marked clade, we have one class of site specifically evolving at a different rate:''' print tree.get_most_likely ('bsC.137', 'M1') #print 'p-value representing significance that omega is different of 1:' #print tree.get_most_likely ('bsD.137', 'M3') print 'The End.'
def run_site_tests(cluster_name, treefile, alignment, folder_temp, folder_plots): from ete2 import EvolTree from ete2.treeview.layouts import evol_clean_layout import os from collections import defaultdict import math from scipy.stats import chi2 print "Processing cluster: " + cluster_name tree = EvolTree(treefile) tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True) #Create temporal folder temp_cluster_folder = folder_temp + "/" + cluster_name if not os.path.exists(temp_cluster_folder): os.makedirs(temp_cluster_folder) tree.workdir = temp_cluster_folder #Run M1 as the null model tree.run_model("M1") #Run M2 as the alternative model tree.run_model("M2") model1 = tree.get_evol_model("M1") model2 = tree.get_evol_model("M2") # Get the results of the model #Run the LRT test, using ETE #pval = tree.get_most_likely("M2", "M1") #Get the positive selected sites ps_sites = defaultdict() total_sites = 0 sites_over_95 = 0 for s in range(len(model2.sites['BEB']['aa'])): p_value_site = float(model2.sites['BEB']['p2'][s]) if p_value_site > 0.50: ps_sites[s] = [ model2.sites['BEB']['aa'][s], model2.sites['BEB']['p2'][s] ] total_sites += 1 if p_value_site > 0.95: sites_over_95 += 1 #LRT Test lrt_value = 2 * math.fabs(model1.lnL - model2.lnL) # LRT test value pval = 1 - chi2.cdf(lrt_value, 2) # p-value based on chi-square test_status = None #Evidence of positive selection in the branch omega_value = float(model2.classes['w'][2]) proportion_sites = float(model2.classes['proportions'][2]) #Plot file plot_file = folder_plots + "/" + cluster_name col2 = { 'NS': 'black', 'RX': 'black', 'RX+': 'black', 'CN': 'black', 'CN+': 'black', 'PS': 'black', 'PS+': 'black' } if pval < 0.05 and omega_value > 1: #Save plots, both in jpg and svg of the clusters with evidence of positive selection test_status = "Positive" model2.set_histface(up=False, kind='curve', colors=col2, ylim=[0, 4], hlines=[2.5, 1.0, 4.0, 0.5], hlines_col=['orange', 'yellow', 'red', 'cyan'], errors=True) tree.render(plot_file + ".svg", layout=evol_clean_layout, histfaces=['M2']) #tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2']) else: #print "no signal" test_status = None result_entry = [ cluster_name, omega_value, proportion_sites, pval, test_status, total_sites, sites_over_95 ] # print result_entry #print ps_sites #node_results[node.node_id] = [result_entry, ps_sites] output_list = [result_entry, ps_sites] return output_list
# display marked branches in orange for node in tree.traverse (): if not hasattr (node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle () node.img_style ['bgcolor'] = '#ffaa00' tree.show() print '''now running branch models free branch models, 2 groups of branches, one with Gorilla and chimp, the other with the rest of the phylogeny ''' print 'running branch free...' tree.run_model ('b_free.137') print 'running branch neut...' tree.run_model ('b_neut.137') print 'running M0 (all branches have the save value of omega)...' tree.run_model ('M0') raw_input ('''Now we can do comparisons... Compare first if we have one or 2 rates of evolution among phylogeny. LRT between b_free and M0 (that is one or two rates of omega value) p-value ofthis comparison is:''') print tree.get_most_likely ('b_free.137', 'M0') raw_input (''' Now test if foreground rate is significantly different of 1. (b_free with significantly better likelihood than b_neut) if significantly different, and higher than one, we will be under
__licence__ = "GPLv3" __version__ = "0.0" from ete2 import EvolTree tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input('\n tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n') print 'running model M0, for comparison with branch-site models...' tree.run_model('M0') # each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify # the node_id of the nodes we want to mark, and the kind of mark in this way: for leaf in tree: leaf.node_id print '\n---------\nNow working with leaf ' + leaf.name tree.mark_tree([leaf.node_id], marks=['#1']) print tree.write() # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete2.../bsA.. directory) print 'running model bsA and bsA1' tree.run_model('bsA.'+ leaf.name) tree.run_model('bsA1.' + leaf.name)
print 'Now, it is necessary to link this tree to an alignment:' tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') raw_input ('\n alignment loaded, hit some key to see.\n') tree.show() print ''' we will run free-ratio model that is one of models available through function run_model: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ''' print tree.run_model.__doc__ +'\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' tree.run_model ('fb.example') raw_input ('free-ratio model runned, all results are store in a Model object.') fb = tree.get_evol_model('fb.example') print 'Have a look to the parameters used to run this model on codeml: ' print fb.get_ctrl_string() raw_input ('hit some key...') print 'Have a look to run message of codeml: ' print fb.run raw_input ('hit some key...') print 'Have a look to log likelihood value of this model, and number of parameters:'