def run_evol_py(tree, alg, branch_model, site_models, workir='data/evol_output', tool_dir="ete3_apps/bin"): print(tree, alg, branch_model, site_models) builtin_apps_path = None builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir) tree = EvolTree(tree, binpath=builtin_apps_path) tree.link_to_alignment(alg) tree.workdir = workir ###branch model if branch_model: branch_model = str(branch_model) tree.run_model(branch_model) print(tree.get_evol_model(branch_model)) ### site model for site_model in site_models: tree.run_model(site_model) #tree.run_model('SLR.lele') global evol_output_dir, final_evol_tree evol_output_dir = workir final_evol_tree = evol_output_dir + '/tree_evol_result.png' tree.render(final_evol_tree, layout=evol_clean_layout, histfaces=site_models) return tree
def run(self, pamlsrc, output_folder, model='M1'): """Run PAML using ETE. The default model is M1 as it is best for orthology inference in our case. You can use models `M2`, `M0`, `M3`. Ensure that you have the correct path to your codeml binary. It should be in the paml `/bin`. :param pamlsrc: Path to the codemly binary. :param output_folder: The name of the output folder. :param model: The model to be used. (Default value = 'M1') """ # Import the newick tree tree = EvolTree('temptree.nw') # Import the alignment tree.link_to_alignment(self.alignmentfile) tree.workdir = self.workdir # Set the binpath of the codeml binary tree.execpath = pamlsrc # Run the model M1, M2, M3, or M0 model_path = model + '.' + output_folder tree.run_model(model_path) self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
def main(self): """The main function for running the test.""" print("Running model %s paml on input." % str(self.defaultmodel)) tree = EvolTree(self.tree) # Import the newick tree tree.link_to_alignment(self.alignment) # Import the alignment tree.workdir = self.workdir # Set the working directory tree.execpath = self.pamlpath # Set the binpath of the codeml binary tree.run_model(self.defaultmodel) # Run the codeml model
def count_omega(align_file, gene_name): print(gene_name) tree = EvolTree(tree_file) tree.link_to_alignment(align_file) # # #free branch ratio count tree.run_model('fb') fb_results = tree.get_evol_model('fb') print(fb_results) with open(temp, 'w') as temp_file: temp_file.write(str(fb_results)) write_in_table(gene_name)
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'): """ Use ETE3's integration with PAML""" # Import the species tree to compare species that are present in alignment # file t = Tree('data/initial-data/species_tree.nw', format=1) orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None) # Create a list name/variable and use list() orgs = list(orgsfile[0]) organismslist = formatlist(orgs) # Import alignment file as string alignment_file = open( 'data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta', 'r') alignment_str = alignment_file.read() alignment_file.close() # Keep the branches in the species tree for species in the alignment file # Some species may not be present in the alignment file branches2keep = [] for organism in organismslist: if organism in alignment_str: #print('Yup.') branches2keep.append(organism) else: pass #print('Nope.') Make an error code in the log # Input a list of branches to keep on the base tree speciestree = t.prune(branches2keep, preserve_branch_length=True) # Import the newick tree tree = EvolTree(speciestree) # Import the alignment tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta') tree.workdir = workdir # Set the binpath of the codeml binary tree.execpath = paml_path # Run the codeml model tree.run_model(model + '.' + gene)
def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger): tree = EvolTree(treeFile) os.mkdir(outDir + "paml_site/") tree.workdir = outDir + "paml_site/" tree.link_to_alignment(alnFile, "Fasta") logger.info("PAML codeml") dModelRun = {} for model in lModels: if model in ["M0", "M1", "M2", "M7", "M8"]: logger.info("Running {:s}".format(model)) dModelRun[model] = tree.run_model(model) if "M1" and "M2" in dModelRun: p12 = tree.get_most_likely("M2", "M1") logger.info("LRT of M1 vs M2 = {}".format(p12)) if "M7" and "M8" in dModelRun: p78 = tree.get_most_likely("M8", "M7") logger.info("LRT of M7 vs M8 = {}".format(p78)) """
def run_codeml(mark_id, aln_file, tree_file, sleep): logger.info('sub-process: {0}'.format(str(mark_id))) time.sleep(round(sleep / args.threads, 2)) run_dir = os.path.join(output_dir, str(mark_id)) os.makedirs(run_dir) tree = EvolTree(tree_file, format=0) tree.link_to_alignment(aln_file) tree.run_model('M0') tree.workdir = run_dir tree.mark_tree([mark_id], marks=['#1']) tree.run_model('bsA.' + str(mark_id)) tree.run_model('bsA1.' + str(mark_id)) ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id)) rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0') bsA = tree.get_evol_model('bsA.' + str(mark_id)) p_bsA = bsA.classes['proportions'][2] wfrg2a = bsA.classes['foreground w'][2] if ps < 0.05 and float(wfrg2a) > 1: result = [mark_id, ps, rx, p_bsA, 'positive selection'] elif rx < 0.05 and ps >= 0.05: result = [mark_id, ps, rx, p_bsA, 'relaxation'] else: result = [mark_id, ps, rx, p_bsA, 'no signal'] return result
print( """now running branch-site models C and D that represents the addition of one class of sites in on specific branch. These models must be compared to null models M1 and M3. if branch-site models are detected to be significantly better, than, one class of site is evolving at different rate in the marked clade. """ ) # TODO: re-enable model M3 print("running branch-site C...") tree.run_model("bsC.137") # print ('running branch-site D...') # tree.run_model ('bsD.137') print("running M1 (all branches have the save value of omega)...") tree.run_model("M1") # print ('running M3 (all branches have the save value of omega)...') # tree.run_model ('M3') print( """p-value that, in marked clade, we have one class of site specifically evolving at a different rate:""" ) print(tree.get_most_likely("bsC.137", "M1")) # print ('p-value representing significance that omega is different of 1:') # print (tree.get_most_likely ('bsD.137', 'M3'))
import sys, os, subprocess import argparse from ete3 import EvolTree tree = EvolTree("tree.nw", binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin") tree.link_to_alignment("infile.phy", alg_format="phylip") tree.workdir = os.getcwd() print(tree) print('running model M0, for comparison with branch-site models...') tree.run_model('M0', keep=True) #tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0") chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1") #chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP") for leaf in chimaeriformes: tree.mark_tree([leaf.node_id], marks=["#1"]) #tree.run_model("bsA." + chimaeriformes) #tree.mark_tree([leaf.node_id], marks = ["#1"]) print("Running") print(tree.write()) tree.run_model('bsA.Chimaeriformes') tree.run_model("bsA1.Chimaeriformes") print('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes') print(str(ps)) rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')
print 'Now, it is necessary to link this tree to an alignment:' tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') raw_input ('\n alignment loaded, hit some key to see.\n') tree.show() print ''' we will run free-ratio model that is one of models available through function run_model: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ''' print tree.run_model.__doc__ +'\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' tree.run_model ('fb.example') raw_input ('free-ratio model runned, all results are store in a Model object.') fb = tree.get_evol_model('fb.example') print 'Have a look to the parameters used to run this model on codeml: ' print fb.get_ctrl_string() raw_input ('hit some key...') print 'Have a look to run message of codeml: ' print fb.run raw_input ('hit some key...') print 'Have a look to log likelihood value of this model, and number of parameters:'
node.img_style = NodeStyle() node.img_style['bgcolor'] = '#ffaa00' tree.show() print('''now running branch-site models C and D that represents the addition of one class of sites in on specific branch. These models must be compared to null models M1 and M3. if branch-site models are detected to be significantly better, than, one class of site is evolving at different rate in the marked clade. ''') # TODO: re-enable model M3 print('running branch-site C...') tree.run_model('bsC.137') #print ('running branch-site D...') #tree.run_model ('bsD.137') print('running M1 (all branches have the save value of omega)...') tree.run_model('M1') #print ('running M3 (all branches have the save value of omega)...') #tree.run_model ('M3') print('''p-value that, in marked clade, we have one class of site specifically evolving at a different rate:''') print(tree.get_most_likely('bsC.137', 'M1')) #print ('p-value representing significance that omega is different of 1:') #print (tree.get_most_likely ('bsD.137', 'M3')) print('The End.')
__email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import TreeStyle from ete3 import EvolTree from ete3 import faces tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print(tree) print('\n Running free-ratio model with calculation of ancestral sequences...') tree.run_model('fb_anc') #tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc') I = TreeStyle() I.force_topology = False I.draw_aligned_faces_as_table = True I.draw_guiding_lines = True I.guiding_lines_type = 2 I.guiding_lines_color = "#CCCCCC" for n in sorted(tree.get_descendants() + [tree], key=lambda x: x.node_id): if n.is_leaf(): continue anc_face = faces.SequenceFace(n.sequence, 'aa', fsize=10, bg_colors={}) I.aligned_foot.add_face(anc_face, 1) I.aligned_foot.add_face( faces.TextFace('node_id: #%d ' % (n.node_id), fsize=8), 0) print('display result of bs_anc model, with ancestral amino acid sequences.')
__licence__ = "GPLv3" __version__ = "0.0" from ete3 import TreeStyle from ete3 import EvolTree from ete3 import faces tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print tree print '\n Running free-ratio model with calculation of ancestral sequences...' tree.run_model ('fb_anc') #tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc') I = TreeStyle() I.force_topology = False I.draw_aligned_faces_as_table = True I.draw_guiding_lines = True I.guiding_lines_type = 2 I.guiding_lines_color = "#CCCCCC" for n in sorted (tree.get_descendants()+[tree], key=lambda x: x.node_id): if n.is_leaf(): continue anc_face = faces.SequenceFace (n.sequence, 'aa', fsize=10, bg_colors={}) I.aligned_foot.add_face(anc_face, 1) I.aligned_foot.add_face(faces.TextFace('node_id: #%d '%(n.node_id), fsize=8), 0)
# display marked branches in orange for node in tree.traverse(): if not hasattr(node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle() node.img_style['bgcolor'] = '#ffaa00' tree.show() print '''now running branch models free branch models, 2 groups of branches, one with Gorilla and chimp, the other with the rest of the phylogeny ''' print 'running branch free...' tree.run_model('b_free.137') print 'running branch neut...' tree.run_model('b_neut.137') print 'running M0 (all branches have the save value of omega)...' tree.run_model('M0') raw_input('''Now we can do comparisons... Compare first if we have one or 2 rates of evolution among phylogeny. LRT between b_free and M0 (that is one or two rates of omega value) p-value ofthis comparison is:''') print tree.get_most_likely('b_free.137', 'M0') raw_input(''' Now test if foreground rate is significantly different of 1. (b_free with significantly better likelihood than b_neut) if significantly different, and higher than one, we will be under
node.img_style ['bgcolor'] = '#ffaa00' tree.show() print '''now running branch-site models C and D that represents the addition of one class of sites in on specific branch. These models must be compared to null models M1 and M3. if branch-site models are detected to be significantly better, than, one class of site is evolving at different rate in the marked clade. ''' # TODO: re-enable model M3 print 'running branch-site C...' tree.run_model ('bsC.137') #print 'running branch-site D...' #tree.run_model ('bsD.137') print 'running M1 (all branches have the save value of omega)...' tree.run_model ('M1') #print 'running M3 (all branches have the save value of omega)...' #tree.run_model ('M3') print '''p-value that, in marked clade, we have one class of site specifically evolving at a different rate:''' print tree.get_most_likely ('bsC.137', 'M1') #print 'p-value representing significance that omega is different of 1:' #print tree.get_most_likely ('bsD.137', 'M3') print 'The End.'
from ete3 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print (tree) try: input = raw_input except NameError: pass input ('\n tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n') print ('running model M1') tree.run_model ('M1') print ('running model M2') tree.run_model ('M2') print ('\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1'))) #tree.show() print ('by default the hist represented is this one:') tree.show (histfaces=['M2']) print ('but we can choose between many others...') model2 = tree.get_evol_model ('M2')
# display marked branches in orange for node in tree.traverse (): if not hasattr (node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle () node.img_style ['bgcolor'] = '#ffaa00' tree.show() print ('''now running branch models free branch models, 2 groups of branches, one with Gorilla and chimp, the other with the rest of the phylogeny ''') print ('running branch free...') tree.run_model ('b_free.137') print ('running branch neut...') tree.run_model ('b_neut.137') print ('running M0 (all branches have the save value of omega)...') tree.run_model ('M0') input ('''Now we can do comparisons... Compare first if we have one or 2 rates of evolution among phylogeny. LRT between b_free and M0 (that is one or two rates of omega value) p-value ofthis comparison is:''') print (tree.get_most_likely ('b_free.137', 'M0')) input (''' Now test if foreground rate is significantly different of 1. (b_free with significantly better likelihood than b_neut) if significantly different, and higher than one, we will be under
if starting_branch_length_option == 1: branch_estimation = 'bl' elif starting_branch_length_option == -1: branch_estimation = 'random' for initial_omega in [0.2, 0.7, 1.2]: if model == 'bsA1': initial_omega = 1.0 model_specifications = model + '.' + branch_estimation + '_' + \ str(initial_omega) + 'w' print 'Testing model ' + model + ' on ' + alignment_name + \ ' using starting branch length option ' + \ branch_estimation + ' and initial omega: ' + \ str(initial_omega) + 'w' if model == 'XX': tree.run_model(model_specifications, \ fix_blength=starting_branch_length_option, \ omega=initial_omega, NSsites=22, ncatG=3) # Here's the garbage I wrote to make sure that it parses the out files correctly tree.get_evol_model( model_specifications).properties['typ'] = 'branch-site' tree.get_evol_model(model_specifications)._load( model_specifications + '/out') else: tree.run_model(model_specifications, \ fix_blength=starting_branch_length_option, \ omega=initial_omega) current_model = tree.get_evol_model(model_specifications) print 'The fitting of model ' + model + ' on ' + alignment_name + \ ' using starting branch length option ' + \
from ete3 import EvolTree from string import ascii_letters # CREATE TREE fasta_lines = open("./whales.fasta", "r").readlines() taxa = [l.replace('>', '').strip() for l in fasta_lines if l.startswith('>')] taxa_map = { t: ascii_letters[i] for i, t in enumerate(taxa) } taxa_string = '(' * (len(taxa) - 1) + '%s,%s)' % (ascii_letters[0], ascii_letters[1]) for t in ascii_letters[2:len(taxa)]: taxa_string = taxa_string + ',%s)' % t taxa_string = taxa_string + ';' align = ''.join(fasta_lines) for t in taxa: align = align.replace(t, taxa_map[t]) tree = EvolTree(taxa_string) tree.link_to_alignment(align) #tree.link_to_evol_model("M2") #tree.get_evol_model("M2") print(tree.run_model.__doc__) tree.run_model("fb")
omega_list.append("NA") continue else: evotree.link_to_alignment(subfasta) workdirname = './codeml_' + "__".join(closest_seq_ids) evotree.workdir = workdirname list_of_tempdirs.append(workdirname) # mark the foreground branch foreground_leafnode = evotree & seqid # print (seqid) # print(foreground_leafnode.node_id) # print (evotree.write()) evotree.mark_tree([foreground_leafnode.node_id], ['#1']) # print (evotree.write()) evotree.run_model('b_free.run') b_free_fit = evotree.get_evol_model('b_free.run') out_branches_dict = b_free_fit.branches for b in out_branches_dict: if out_branches_dict[b]["mark"] == " #1": # check if there are at least 1 synonymous substitutions expected on this branch... otherwise not very meaningful to estimate omega (it will be very high). if out_branches_dict[b]["S"] * out_branches_dict[b][ "dS"] >= 1.0: omega = out_branches_dict[b]["w"] else: omega = "NA" break omega_list.append(omega) numeric_omegas = [float(x) for x in omega_list if not x == "NA"] try: avg_omega = sum(numeric_omegas) / float(len(numeric_omegas))
06 Feb 2011 use slr to compute evolutionary rates """ __author__ = "Francois-Jose Serra" __email__ = "*****@*****.**" __licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ("data/S_example/alignment_S_measuring_evol.fasta") tree.run_model ('SLR') slr = tree.get_evol_model ('SLR') slr.set_histface (up=False, kind='curve',errors=True, hlines = [1.0,0.3], hlines_col=['black','grey']) tree.show (histfaces=['SLR'])
__licence__ = "GPLv3" __version__ = "0.0" from ete3 import EvolTree tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print tree raw_input('\n tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n') print 'running model M0, for comparison with branch-site models...' tree.run_model('M0') # each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify # the node_id of the nodes we want to mark, and the kind of mark in this way: for leaf in tree: leaf.node_id print '\n---------\nNow working with leaf ' + leaf.name tree.mark_tree([leaf.node_id], marks=['#1']) print tree.write() # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete3.../bsA.. directory) print 'running model bsA and bsA1' tree.run_model('bsA.'+ leaf.name) tree.run_model('bsA1.' + leaf.name)
input("\n alignment loaded, hit some key to see.\n") tree.show() print( """ we will run free-ratio model that is one of models available through function run_model: +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ """ ) print( tree.run_model.__doc__ + "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" ) tree.run_model("fb.example") input("free-ratio model runned, all results are store in a Model object.") fb = tree.get_evol_model("fb.example") print("Have a look to the parameters used to run this model on codeml: ") print(fb.get_ctrl_string()) input("hit some key...") print("Have a look to run message of codeml: ") print(fb.run) input("hit some key...") print("Have a look to log likelihood value of this model, and number of parameters:")