def load_parameters(): descendant_dict = defaultdict() with open(input_file, 'r') as f: all_lines = f.readlines() aln_file = all_lines[0].strip() if not os.path.exists(aln_file): logger.error('Invalid cDNA alignment file: {0}'.format(aln_file)) sys.exit(1) logger.info('Input cDNA alignment file: {0}'.format(aln_file)) seq_id_dict = defaultdict() seq_id_list = [] for seq_record in AlignIO.read(aln_file, 'fasta'): seq_id_dict[str(seq_record.id)] = 1 seq_id_list.append(str(seq_record.id)) tree_file = all_lines[1].strip() if not os.path.exists(tree_file): logger.error('Invalid tree file: {0}'.format(tree_file)) sys.exit(1) logger.info('Input tree file: {0}'.format(tree_file)) tmp_t = Tree(tree_file, format=0) node_id_dict = defaultdict() for node in tmp_t: node_id_dict[str(node.name)] = 1 if seq_id_dict != node_id_dict: if len(seq_id_dict) < len(node_id_dict): logger.warning('Sequences is less than tree nodes.') logger.info('Trim input tree file.') tree_file = trim_tree(tree_file, seq_id_list) else: logger.error('Sequences is falsely greater than tree nodes.') sys.exit(1) t = EvolTree(tree_file, format=1) for descendant in t.iter_descendants(): descendant_dict[descendant.node_id] = str(descendant) root = t.get_tree_root() id_list = [] for leaf in t.traverse('preorder'): id_list.append(leaf.node_id) select_nodes = [] if len(all_lines) > 2: for each_line in all_lines[2:]: s = each_line.strip() if s: select_nodes.append(s) if select_nodes: nodes_line = ', '.join(select_nodes) logger.info('Input nodes: {0}'.format(nodes_line)) for node in select_nodes: if node not in t: logger.error('Error node: {0}'.format(node)) sys.exit(1) if not t.check_monophyly(values=select_nodes, target_attr='name'): logger.error('Some nodes are not monophyletic.') sys.exit(1) common_ancestor = t.get_common_ancestor(select_nodes) else: common_ancestor = root logger.info('No specific node') run_list = [] for s in common_ancestor.iter_descendants(): run_list.append(s.node_id) logger.info('These node ids will be checked: {0}'.format( str(run_list))) return run_list, aln_file, tree_file, descendant_dict
tree.prune(taxa_in_alignment, preserve_branch_length=True) test_taxa = [] with open(test_taxa_file, 'r') as test_taxa_list: for taxon in test_taxa_list: taxon = taxon.rstrip() test_taxa.append(taxon) nodes_to_mark = set() # set since we want it to be all unique ids # Mark the test taxa for taxon in test_taxa: taxon_node = tree & taxon # ete3 notation for finding a node within a tree taxon_id = taxon_node.node_id # mark_tree only takes node_ids, not labels nodes_to_mark.add(taxon_id) # Find internal nodes below the test taxa and mark them for i in range(len(test_taxa), 1, -1): taxa_groups = [x for x in combinations(test_taxa, i)] for group in taxa_groups: common_node = tree.get_common_ancestor(*group) taxon_id = common_node.node_id nodes_to_mark.add(taxon_id) #TODO change the names of the nodes for mark_id in nodes_to_mark: test_node = tree.search_nodes(node_id=mark_id)[0] test_node.name += '{test}' tree.write(outfile=out_tree_name, format=1)
import sys, os, subprocess import argparse from ete3 import EvolTree tree = EvolTree("tree.nw", binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin") tree.link_to_alignment("infile.phy", alg_format="phylip") tree.workdir = os.getcwd() print(tree) print('running model M0, for comparison with branch-site models...') tree.run_model('M0', keep=True) #tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0") chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1") #chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP") for leaf in chimaeriformes: tree.mark_tree([leaf.node_id], marks=["#1"]) #tree.run_model("bsA." + chimaeriformes) #tree.mark_tree([leaf.node_id], marks = ["#1"]) print("Running") print(tree.write()) tree.run_model('bsA.Chimaeriformes') tree.run_model("bsA1.Chimaeriformes") print('p-value of positive selection for sites on this branch is: ') ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes') print(str(ps)) rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')