def run_evol_py(tree, alg, branch_model, site_models, workir='data/evol_output', tool_dir="ete3_apps/bin"): print(tree, alg, branch_model, site_models) builtin_apps_path = None builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir) tree = EvolTree(tree, binpath=builtin_apps_path) tree.link_to_alignment(alg) tree.workdir = workir ###branch model if branch_model: branch_model = str(branch_model) tree.run_model(branch_model) print(tree.get_evol_model(branch_model)) ### site model for site_model in site_models: tree.run_model(site_model) #tree.run_model('SLR.lele') global evol_output_dir, final_evol_tree evol_output_dir = workir final_evol_tree = evol_output_dir + '/tree_evol_result.png' tree.render(final_evol_tree, layout=evol_clean_layout, histfaces=site_models) return tree
def run(self, pamlsrc, output_folder, model='M1'): """Run PAML using ETE. The default model is M1 as it is best for orthology inference in our case. You can use models `M2`, `M0`, `M3`. Ensure that you have the correct path to your codeml binary. It should be in the paml `/bin`. :param pamlsrc: Path to the codemly binary. :param output_folder: The name of the output folder. :param model: The model to be used. (Default value = 'M1') """ # Import the newick tree tree = EvolTree('temptree.nw') # Import the alignment tree.link_to_alignment(self.alignmentfile) tree.workdir = self.workdir # Set the binpath of the codeml binary tree.execpath = pamlsrc # Run the model M1, M2, M3, or M0 model_path = model + '.' + output_folder tree.run_model(model_path) self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
def main(self): """The main function for running the test.""" print("Running model %s paml on input." % str(self.defaultmodel)) tree = EvolTree(self.tree) # Import the newick tree tree.link_to_alignment(self.alignment) # Import the alignment tree.workdir = self.workdir # Set the working directory tree.execpath = self.pamlpath # Set the binpath of the codeml binary tree.run_model(self.defaultmodel) # Run the codeml model
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'): """ Use ETE3's integration with PAML""" # Import the species tree to compare species that are present in alignment # file t = Tree('data/initial-data/species_tree.nw', format=1) orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None) # Create a list name/variable and use list() orgs = list(orgsfile[0]) organismslist = formatlist(orgs) # Import alignment file as string alignment_file = open( 'data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta', 'r') alignment_str = alignment_file.read() alignment_file.close() # Keep the branches in the species tree for species in the alignment file # Some species may not be present in the alignment file branches2keep = [] for organism in organismslist: if organism in alignment_str: #print('Yup.') branches2keep.append(organism) else: pass #print('Nope.') Make an error code in the log # Input a list of branches to keep on the base tree speciestree = t.prune(branches2keep, preserve_branch_length=True) # Import the newick tree tree = EvolTree(speciestree) # Import the alignment tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene + '_aligned_cds_nucl.fasta') tree.workdir = workdir # Set the binpath of the codeml binary tree.execpath = paml_path # Run the codeml model tree.run_model(model + '.' + gene)
def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger): tree = EvolTree(treeFile) os.mkdir(outDir + "paml_site/") tree.workdir = outDir + "paml_site/" tree.link_to_alignment(alnFile, "Fasta") logger.info("PAML codeml") dModelRun = {} for model in lModels: if model in ["M0", "M1", "M2", "M7", "M8"]: logger.info("Running {:s}".format(model)) dModelRun[model] = tree.run_model(model) if "M1" and "M2" in dModelRun: p12 = tree.get_most_likely("M2", "M1") logger.info("LRT of M1 vs M2 = {}".format(p12)) if "M7" and "M8" in dModelRun: p78 = tree.get_most_likely("M8", "M7") logger.info("LRT of M7 vs M8 = {}".format(p78)) """
def main(): """ main function """ tree = EvolTree(WRKDIR + 'tree.nw') tree.workdir = 'data/protamine/PRM1/paml/' random_swap(tree) tree.link_to_evol_model(WRKDIR + 'paml/fb/fb.out', 'fb') check_annotation(tree) tree.link_to_evol_model(WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model(WRKDIR + 'paml/M2/M2.out', 'M2') tree.link_to_evol_model(WRKDIR + 'paml/M7/M7.out', 'M7') tree.link_to_evol_model(WRKDIR + 'paml/M8/M8.out', 'M8') tree.link_to_alignment(WRKDIR + 'alignments.fasta_ali') print 'pv of LRT M2 vs M1: ', print tree.get_most_likely('M2', 'M1') print 'pv of LRT M8 vs M7: ', print tree.get_most_likely('M8', 'M7') tree.show(histfaces=['M2']) print 'The End.'
def run_codeml(mark_id, aln_file, tree_file, sleep): logger.info('sub-process: {0}'.format(str(mark_id))) time.sleep(round(sleep / args.threads, 2)) run_dir = os.path.join(output_dir, str(mark_id)) os.makedirs(run_dir) tree = EvolTree(tree_file, format=0) tree.link_to_alignment(aln_file) tree.run_model('M0') tree.workdir = run_dir tree.mark_tree([mark_id], marks=['#1']) tree.run_model('bsA.' + str(mark_id)) tree.run_model('bsA1.' + str(mark_id)) ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id)) rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0') bsA = tree.get_evol_model('bsA.' + str(mark_id)) p_bsA = bsA.classes['proportions'][2] wfrg2a = bsA.classes['foreground w'][2] if ps < 0.05 and float(wfrg2a) > 1: result = [mark_id, ps, rx, p_bsA, 'positive selection'] elif rx < 0.05 and ps >= 0.05: result = [mark_id, ps, rx, p_bsA, 'relaxation'] else: result = [mark_id, ps, rx, p_bsA, 'no signal'] return result
def main(args): if args.BinPath: tree = EvolTree(args.Tree, binpath=args.BinPath) else: tree = EvolTree(args.Tree) if args.MSA[:-3] == ".phy": tree.link_to_alignment(args.MSA, format="phylip") elif args.MSA: tree.link_to_alignment(args.MSA) print(tree) tree.workdir = os.getcwd() if args.LoadedModels: load_model(args.LoadedModels, tree) compare_models(models=args.LoadedModels, tree=tree, args=args) if args.Models: run_models(args.models, tree) if args.Compare: if args.TreeStruct: tree_structure = parse_structure_file(args.TreeStruct) compare_models(models=args.Compare, tree=tree, tree_structure=tree_structure) else: compare_models(models=args.Compare, tree=tree, args=args)
def main(): """ main function """ tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'data/protamine/PRM1/paml/' random_swap(tree) tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb') check_annotation (tree) tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2') tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7') tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8') tree.link_to_alignment (WRKDIR + 'alignments.fasta_ali') print 'pv of LRT M2 vs M1: ', print tree.get_most_likely ('M2','M1') print 'pv of LRT M8 vs M7: ', print tree.get_most_likely ('M8','M7') tree.show (histfaces=['M2']) print 'The End.'
closest_seq_ids = [seqid] for d in idxes_of_3_smallest: closest_seq_ids.append(seqids_of_other_species[d]) # ete3 has codeml handling implemented!! No need for own functions. subtree = t.copy() subtree.prune(closest_seq_ids, preserve_branch_length=True) subtree.unroot() evotree = EvolTree(subtree.write()) subfasta = make_clean_fasta(closest_seq_ids, seqdatadict) if not subfasta: omega_list.append("NA") continue else: evotree.link_to_alignment(subfasta) workdirname = './codeml_' + "__".join(closest_seq_ids) evotree.workdir = workdirname list_of_tempdirs.append(workdirname) # mark the foreground branch foreground_leafnode = evotree & seqid # print (seqid) # print(foreground_leafnode.node_id) # print (evotree.write()) evotree.mark_tree([foreground_leafnode.node_id], ['#1']) # print (evotree.write()) evotree.run_model('b_free.run') b_free_fit = evotree.get_evol_model('b_free.run') out_branches_dict = b_free_fit.branches for b in out_branches_dict: if out_branches_dict[b]["mark"] == " #1": # check if there are at least 1 synonymous substitutions expected on this branch... otherwise not very meaningful to estimate omega (it will be very high).
out_tree_name = os.path.splitext(out_tree_name)[0] out_tree_name = out_tree_name + '_' + gene_name + '.tre' # If there is a new alignment, prune the tree down to the taxa that remain in # the new alignment and write a new tree because EvolTree is shit and can't # use the pruned tree saved in memory if empty_seq_count >= 1: if len(taxa_in_alignment) >= 1: tree.prune(taxa_in_alignment, preserve_branch_length=True) tree.unroot() tree.write(outfile=out_tree_name, format=0) tree = EvolTree(out_tree_name) tree.link_to_alignment(alignment_file) tree.workdir = os.getcwd() # Record list of all node_ids in the tree for later retrieving omega from a # background branch in the b_free model list_of_node_ids = [] for node in tree.traverse('postorder'): list_of_node_ids.append(node.node_id) test_taxa = [] with open(test_taxa_file, 'r') as test_taxa_list: for taxon in test_taxa_list: taxon = taxon.rstrip() test_taxa.append(taxon) marked_taxon_ids = [] # Mark test taxa