def load_parameters():
    descendant_dict = defaultdict()
    with open(input_file, 'r') as f:
        all_lines = f.readlines()
        aln_file = all_lines[0].strip()
        if not os.path.exists(aln_file):
            logger.error('Invalid cDNA alignment file: {0}'.format(aln_file))
            sys.exit(1)
        logger.info('Input cDNA alignment file: {0}'.format(aln_file))
        seq_id_dict = defaultdict()
        seq_id_list = []
        for seq_record in AlignIO.read(aln_file, 'fasta'):
            seq_id_dict[str(seq_record.id)] = 1
            seq_id_list.append(str(seq_record.id))
        tree_file = all_lines[1].strip()
        if not os.path.exists(tree_file):
            logger.error('Invalid tree file: {0}'.format(tree_file))
            sys.exit(1)
        logger.info('Input tree file: {0}'.format(tree_file))
        tmp_t = Tree(tree_file, format=0)
        node_id_dict = defaultdict()
        for node in tmp_t:
            node_id_dict[str(node.name)] = 1
        if seq_id_dict != node_id_dict:
            if len(seq_id_dict) < len(node_id_dict):
                logger.warning('Sequences is less than tree nodes.')
                logger.info('Trim input tree file.')
                tree_file = trim_tree(tree_file, seq_id_list)
            else:
                logger.error('Sequences is falsely greater than tree nodes.')
                sys.exit(1)
        t = EvolTree(tree_file, format=1)
        for descendant in t.iter_descendants():
            descendant_dict[descendant.node_id] = str(descendant)
        root = t.get_tree_root()
        id_list = []
        for leaf in t.traverse('preorder'):
            id_list.append(leaf.node_id)
        select_nodes = []
        if len(all_lines) > 2:
            for each_line in all_lines[2:]:
                s = each_line.strip()
                if s:
                    select_nodes.append(s)
        if select_nodes:
            nodes_line = ', '.join(select_nodes)
            logger.info('Input nodes: {0}'.format(nodes_line))
            for node in select_nodes:
                if node not in t:
                    logger.error('Error node: {0}'.format(node))
                    sys.exit(1)
            if not t.check_monophyly(values=select_nodes, target_attr='name'):
                logger.error('Some nodes are not monophyletic.')
                sys.exit(1)
            common_ancestor = t.get_common_ancestor(select_nodes)
        else:
            common_ancestor = root
            logger.info('No specific node')
        run_list = []
        for s in common_ancestor.iter_descendants():
            run_list.append(s.node_id)
        logger.info('These node ids will be checked: {0}'.format(
            str(run_list)))
        return run_list, aln_file, tree_file, descendant_dict
Exemplo n.º 2
0
        tree.prune(taxa_in_alignment, preserve_branch_length=True)

test_taxa = []
with open(test_taxa_file, 'r') as test_taxa_list:
    for taxon in test_taxa_list:
        taxon = taxon.rstrip()
        test_taxa.append(taxon)

nodes_to_mark = set()  # set since we want it to be all unique ids

# Mark the test taxa
for taxon in test_taxa:
    taxon_node = tree & taxon  # ete3 notation for finding a node within a tree
    taxon_id = taxon_node.node_id  # mark_tree only takes node_ids, not labels
    nodes_to_mark.add(taxon_id)

# Find internal nodes below the test taxa and mark them
for i in range(len(test_taxa), 1, -1):
    taxa_groups = [x for x in combinations(test_taxa, i)]
    for group in taxa_groups:
        common_node = tree.get_common_ancestor(*group)
        taxon_id = common_node.node_id
        nodes_to_mark.add(taxon_id)

#TODO change the names of the nodes
for mark_id in nodes_to_mark:
    test_node = tree.search_nodes(node_id=mark_id)[0]
    test_node.name += '{test}'

tree.write(outfile=out_tree_name, format=1)
Exemplo n.º 3
0
import sys, os, subprocess
import argparse
from ete3 import EvolTree

tree = EvolTree("tree.nw",
                binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin")
tree.link_to_alignment("infile.phy", alg_format="phylip")
tree.workdir = os.getcwd()

print(tree)

print('running model M0, for comparison with branch-site models...')

tree.run_model('M0', keep=True)
#tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0")
chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1")
#chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP")

for leaf in chimaeriformes:
    tree.mark_tree([leaf.node_id], marks=["#1"])
#tree.run_model("bsA." + chimaeriformes)
#tree.mark_tree([leaf.node_id], marks = ["#1"])
print("Running")
print(tree.write())
tree.run_model('bsA.Chimaeriformes')
tree.run_model("bsA1.Chimaeriformes")

print('p-value of positive selection for sites on this branch is: ')
ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes')
print(str(ps))
rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')