def max_entropy_thinning(paths, input_alignment, taxa_number, output_alignment): util.make_path_clean(paths.me_thinning_runs_dir) prefix = os.path.join(paths.me_thinning_runs_dir, "me_thinning") command = [] command.append(common.genesis_max_entropy) command.append(input_alignment) command.append(str(taxa_number)) command.append(prefix) print(" ".join(command)) subprocess.check_call(command) shutil.move(prefix + "_pruned_alignment.fasta", output_alignment)
def clade_compression_thinning(paths, input_tree_filename, input_alignment, taxa_number, output_alignment): util.make_path_clean(paths.cc_thinning_runs_dir) prefix = os.path.join(paths.cc_thinning_runs_dir, "cc_thinning") command = [] command.append(common.genesis_clade_compression) command.append(input_tree_filename) command.append(input_alignment) command.append(str(taxa_number)) command.append(str(10000)) command.append(prefix) print(" ".join(command)) subprocess.check_call(command) shutil.move(prefix + "_pruned_alignment.fasta", output_alignment)
def launch_pargenes(alignment, model, output_dir, seed, rand_trees, pars_trees, bs_trees, cores): util.make_path_clean(output_dir) debug = False alignment_dir = os.path.join(output_dir, "alignments") util.mkdirp(alignment_dir) alignment_symlink = os.path.join(alignment_dir, common.pargenes_ali_name) raxml_options_file = os.path.join(output_dir, "raxml_options.txt") with open(raxml_options_file, "w") as writer: writer.write("--model " + model + " ") writer.write("--blmin " + common.raxml_min_bl + " ") writer.write("--precision " + str(common.raxml_precision) + " ") relative_symlink(alignment, alignment_symlink) prefix = os.path.join(output_dir, "pargenes") cmd = [] cmd.append("python") cmd.append(common.pargenes) cmd.append("-a") cmd.append(alignment_dir) cmd.append("-o") cmd.append(os.path.join(output_dir, "pargenes_output")) cmd.append("-r") cmd.append(raxml_options_file) cmd.append("--seed") cmd.append(str(seed)) cmd.append("-s") cmd.append(str(rand_trees)) cmd.append("-p") cmd.append(str(pars_trees)) cmd.append("-b") cmd.append(str(bs_trees)) cmd.append("-c") cmd.append(str(cores)) cmd.append("--core-assignment") cmd.append("low") print(" ".join(cmd)) launcher.submit(prefix, cmd, cores, debug)
# tree = paths.raxml_best_tree modelfile = paths.raxml_best_model epa_out_dir = paths.epa_runs_dir hmmer_out_dir = paths.hmmer_runs_dir try: util.expect_file_exists( paths.raxml_credible_ml_trees ) except Exception as e: print("ERROR: Must run iqtree_tests stage of pipeline first") raise e # ================================================================ # start by ensuring we have the outgroups aligned against the ref # ================================================================ util.make_path_clean( epa_out_dir ) # if outgroup is included in the alignment, separate the two from the alignment currently seen as final if paths.dataset_has_outgroups: ref_msa = paths.alignment query_msa = paths.outgroups_file # ref_msa, query_msa = placement.split_alignment_outgroups( paths.alignment, common.outgroup_spec, epa_out_dir ) else: ref_msa = paths.alignment # create outgroup alignment using hmmer util.make_path_clean( hmmer_out_dir ) #create the hmm profile hmm_profile = placement.launch_hmmbuild( ref_msa, hmmer_out_dir ) # align outgroups against it
paths = common.Paths(sys.argv) # lay some stones in the wrong path try: util.expect_dir_exists(paths.epa_rooting_dir) except Exception as e: print("ERROR: Must run placement stage of pipeline first") raise e # get a separate workdir for this task runs_dir = paths.wuhan_placement_runs_dir # also a separate results dir result_dir = paths.wuhan_placement_dir util.make_path_clean(runs_dir) util.make_path_clean(result_dir) # get the wuhan sequence out of the master raw file into its own file in # the runs dir wuhan_fasta = os.path.join(runs_dir, "sequence.fasta") placement.extract_sequence(paths.raw_sequences, "EPI_ISL_406801", wuhan_fasta) ref_msa = paths.alignment # check if there already is a hmmprofile (should be the case for *msan runs) hmm_profile = os.path.join(paths.hmmer_runs_dir, "reference.hmm") # build it if it doesn't exist if not os.path.isfile(hmm_profile): hmm_profile = placement.launch_hmmbuild(ref_msa, paths.hmmer_runs_dir)
def evaluate_all_trees(paths): """ This will evaluate all given trees with the given model description. For each tree, the model and branch lengths will be optimized while fixing the topology. This contrasts iqtree_tests, as a model optimization is performed for each treee separately. """ iqtree_eval_dir_model = os.path.join(paths.runs_dir, 'iqtree_eval_model') util.mkdirp(iqtree_eval_dir_model) iqtree_eval_dir_gamma = os.path.join(paths.runs_dir, 'iqtree_eval_gamma') util.mkdirp(iqtree_eval_dir_gamma) raxml_eval_dir_gamma = os.path.join(paths.runs_dir, 'raxml_eval_gamma') util.make_path_clean(raxml_eval_dir_gamma) raxmlng_eval_dir_gamma = os.path.join(paths.runs_dir, 'raxmlng_eval_gamma') util.mkdirp(raxmlng_eval_dir_gamma) iqtree_eval_dir_gamma_median = os.path.join(paths.runs_dir, 'iqtree_eval_gamma_median') util.mkdirp(iqtree_eval_dir_gamma_median) raxmlng_eval_dir_gamma_median = os.path.join(paths.runs_dir, 'raxmlng_eval_gamma_median') util.mkdirp(raxmlng_eval_dir_gamma_median) print('Comparing LLHs for model %s' % common.subst_model) print('Loading RAxML-ng LLHs... ', end = '') raxmlng_lls = [] with open(paths.raxml_all_ml_trees_ll) as reader: for line in reader: raxmlng_lls.append(float(line.split(' ')[0])) print('done.') print('Evaluating trees with iqtree (including model & brlen optimization)... ', end = '') tree_files = [] with open(paths.raxml_all_ml_trees) as trees_file: for i, tree_str in enumerate(trees_file): # Write a separate newick file for this tree tree_file_name = os.path.join(iqtree_eval_dir_model, 'tree_%d.newick' % i) with open(tree_file_name, 'w') as tree_file: tree_file.write(tree_str) tree_files.append(tree_file_name) # Evaluate trees with model & brlen optimization pool = mp.Pool(common.available_cores) iqtree_lls = pool.starmap(iqtree_eval, [(paths.alignment, common.subst_model, tree_file_name, tree_file_name) for tree_file_name in tree_files]) print('done') with open(paths.raxml_iqtree_ll_all, "w") as writer: writer.write('# this file contains the likelihood of all ML trees at the end of the raxml-ng run as\n') writer.write('# well as the likelihood as evaluated by iqtree (with model & brlen optimization under\n') writer.write('# fixed tree topology\n') writer.write('raxmlng,iqtree\n') for raxmlng_ll, iqtree_ll in zip(raxmlng_lls, iqtree_lls): writer.write('%.3f,%.3f\n' % (raxmlng_ll, iqtree_ll)) # Evaluate using GTR+GAMMA Model print('Comparing LLHs for GTR+GAMMA model') print('Evaluating trees with RAxML-ng (including model & brlen optimization)... ', end = '') raxmlng_lls = raxmlng_eval_all(paths.alignment, 'GTR+FO+G', paths.raxml_all_ml_trees, os.path.join(raxmlng_eval_dir_gamma, 'eval')) print('done') print('Evaluating trees with iqtree (including model & brlen optimization)... ', end = '') iqtree_lls = pool.starmap(iqtree_eval, [(paths.alignment, 'GTR+FO+G', tree_file_name, tree_file_name.replace(iqtree_eval_dir_model, iqtree_eval_dir_gamma)) for tree_file_name in tree_files]) print('done') print('Evaluating trees with RAxML (including model & brlen optimization)... ', end = '') raxml_lls = raxml_eval_all(paths.alignment, 'GTRGAMMAX', paths.raxml_all_ml_trees, os.path.join(raxml_eval_dir_gamma, 'eval')) print('done') with open(paths.gamma_ll_all, "w") as writer: writer.write('# this file contains the likelihood of all ML trees optimized and evaluated\n') writer.write('# under GTR+F0+G and fixed tree topology\n') writer.write('raxmlng,iqtree,raxml\n') for raxmlng_ll, iqtree_ll, raxml_ll in zip(raxmlng_lls, iqtree_lls, raxml_lls): writer.write('%.3f,%.3f,%.3f\n' % (raxmlng_ll, iqtree_ll, raxml_ll)) # Evaluate using GTR+GAMMA Model with median rates print('Comparing LLHs for GTR+GAMMA model with median rates') print('Evaluating trees with RAxML-ng (including model & brlen optimization)... ', end = '') raxmlng_lls = raxmlng_eval_all(paths.alignment, 'GTR+FO+GA', paths.raxml_all_ml_trees, os.path.join(raxmlng_eval_dir_gamma_median, 'eval')) print('done') print('Evaluating trees with iqtree (including model & brlen optimization)... ', end = '') iqtree_lls = pool.starmap(iqtree_eval, [(paths.alignment, 'GTR+FO+G', tree_file_name, tree_file_name.replace(iqtree_eval_dir_model, iqtree_eval_dir_gamma_median), True) for tree_file_name in tree_files]) print('done') with open(paths.gamma_median_ll_all, "w") as writer: writer.write('# this file contains the likelihood of all ML trees optimized and evaluated\n') writer.write('# under GTR+F0+GA and fixed tree topology\n') writer.write('raxmlng,iqtree\n') for raxmlng_ll, iqtree_ll in zip(raxmlng_lls, iqtree_lls): writer.write('%.3f,%.3f\n' % (raxmlng_ll, iqtree_ll))