Exemplo n.º 1
0
with open(testclassfile) as tf:
    target_tips = set(tf.read().splitlines())

for line in lines:
    if line == "":
        continue
    else:
        fields = line.split("\t")
        hog = fields[1]
        tree = fields[0]
        try:
            t = EvolTree(fields[2])
        except:
            continue
        for node in t.traverse():
            #UGLY!
            istarget = node_in_class(node, t, target_tips)
            brstat = node.dist
            nname = node.name
            if nname == "":
                nname = "-".join(node.get_leaf_names())
            try:
                pname = node.up.name
            except AttributeError:
                pname = "root"
            if pname == "":
                pname = "-".join(node.up.get_leaf_names())
            brname = pname + ":" + nname
            print(hog,
                  tree,
Exemplo n.º 2
0
tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print (tree)

print ('Tree and alignment loaded.')
input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.')

marks = ['1', '3', '7']

tree.mark_tree (marks, ['#1'] * 3)
print (tree.write ())

print ('we can easily colorize marked branches')
# display marked branches in orange
for node in tree.traverse ():
    if not hasattr (node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle ()
    node.img_style ['bgcolor'] = '#ffaa00'
tree.show()

print ('''now running branch models
free branch models, 2 groups of branches, one with Gorilla and
chimp, the other with the rest of the phylogeny
''')
print ('running branch free...')
tree.run_model ('b_free.137')
print ('running branch neut...')
Exemplo n.º 3
0
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print(tree)

print('Tree and alignment loaded.')
input(
    'Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.'
)

marks = ['1', 3, '7']

tree.mark_tree(marks, ['#1'] * 3)
print(tree.write())

# display marked branches in orange
for node in tree.traverse():
    if not hasattr(node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle()
    node.img_style['bgcolor'] = '#ffaa00'
tree.show()

print('''now running branch-site models C and D that represents
the addition of one class of sites in on specific branch.
These models must be compared to null models M1 and M3.
if branch-site models are detected to be significantly better,
than, one class of site is evolving at different rate in the marked
clade.
''')
def load_parameters():
    descendant_dict = defaultdict()
    with open(input_file, 'r') as f:
        all_lines = f.readlines()
        aln_file = all_lines[0].strip()
        if not os.path.exists(aln_file):
            logger.error('Invalid cDNA alignment file: {0}'.format(aln_file))
            sys.exit(1)
        logger.info('Input cDNA alignment file: {0}'.format(aln_file))
        seq_id_dict = defaultdict()
        seq_id_list = []
        for seq_record in AlignIO.read(aln_file, 'fasta'):
            seq_id_dict[str(seq_record.id)] = 1
            seq_id_list.append(str(seq_record.id))
        tree_file = all_lines[1].strip()
        if not os.path.exists(tree_file):
            logger.error('Invalid tree file: {0}'.format(tree_file))
            sys.exit(1)
        logger.info('Input tree file: {0}'.format(tree_file))
        tmp_t = Tree(tree_file, format=0)
        node_id_dict = defaultdict()
        for node in tmp_t:
            node_id_dict[str(node.name)] = 1
        if seq_id_dict != node_id_dict:
            if len(seq_id_dict) < len(node_id_dict):
                logger.warning('Sequences is less than tree nodes.')
                logger.info('Trim input tree file.')
                tree_file = trim_tree(tree_file, seq_id_list)
            else:
                logger.error('Sequences is falsely greater than tree nodes.')
                sys.exit(1)
        t = EvolTree(tree_file, format=1)
        for descendant in t.iter_descendants():
            descendant_dict[descendant.node_id] = str(descendant)
        root = t.get_tree_root()
        id_list = []
        for leaf in t.traverse('preorder'):
            id_list.append(leaf.node_id)
        select_nodes = []
        if len(all_lines) > 2:
            for each_line in all_lines[2:]:
                s = each_line.strip()
                if s:
                    select_nodes.append(s)
        if select_nodes:
            nodes_line = ', '.join(select_nodes)
            logger.info('Input nodes: {0}'.format(nodes_line))
            for node in select_nodes:
                if node not in t:
                    logger.error('Error node: {0}'.format(node))
                    sys.exit(1)
            if not t.check_monophyly(values=select_nodes, target_attr='name'):
                logger.error('Some nodes are not monophyletic.')
                sys.exit(1)
            common_ancestor = t.get_common_ancestor(select_nodes)
        else:
            common_ancestor = root
            logger.info('No specific node')
        run_list = []
        for s in common_ancestor.iter_descendants():
            run_list.append(s.node_id)
        logger.info('These node ids will be checked: {0}'.format(
            str(run_list)))
        return run_list, aln_file, tree_file, descendant_dict
Exemplo n.º 5
0
from ete3 import EvolTree
import sys

treepath=sys.argv[1]
treeout=sys.argv[2]

t = EvolTree(treepath)
ratites = {'aptHaa', 'aptRow', 'aptOwe', 'strCam', 'droNov', 'casCas', 'rheAme', 'rhePen'}
#annotate leaves
for leaf in t.traverse():
    if leaf.is_leaf():
        if leaf.name in ratites:
            leaf.add_features(mark="{RatiteLeaf}")
    else:
        #internal node, get all leaf names and make sure all are in ratites
        desc=set(leaf.get_leaf_names())
        checkDesc=desc - ratites
        if not checkDesc:
            leaf.add_features(mark="{RatiteInternal}")

t.write(outfile=treeout)
Exemplo n.º 6
0
# use the pruned tree saved in memory
if empty_seq_count >= 1:
    if len(taxa_in_alignment) >= 1:
        tree.prune(taxa_in_alignment, preserve_branch_length=True)
        tree.unroot()
        tree.write(outfile=out_tree_name, format=0)
        tree = EvolTree(out_tree_name)

tree.link_to_alignment(alignment_file)

tree.workdir = os.getcwd()

# Record list of all node_ids in the tree for later retrieving omega from a
# background branch in the b_free model
list_of_node_ids = []
for node in tree.traverse('postorder'):
    list_of_node_ids.append(node.node_id)

test_taxa = []
with open(test_taxa_file, 'r') as test_taxa_list:
    for taxon in test_taxa_list:
        taxon = taxon.rstrip()
        test_taxa.append(taxon)

marked_taxon_ids = []
# Mark test taxa
for taxon in test_taxa:
    taxon_node = tree & taxon  # ete3 notation for finding a node within a tree
    marked_taxon_id = taxon_node.node_id  # mark_tree only takes node_ids, not
    # labels
    tree.mark_tree([marked_taxon_id])
with open(testclassfile) as tf:
    target_tips=set(tf.read().splitlines())

for line in lines:
    if line=="":
        continue
    else:
        fields=line.split("\t")
        hog=fields[1]
        tree=fields[0]
        try:
            t=EvolTree(fields[2])
        except:
            continue
        for node in t.traverse():
            #UGLY!
            istarget=node_in_class(node,t,target_tips)
            brstat=node.dist
            nname=node.name
            if nname=="":
                nname="-".join(node.get_leaf_names())
            try:
                pname=node.up.name
            except AttributeError:
                pname="root"
            if pname=="":
                pname="-".join(node.up.get_leaf_names())
            brname=pname + ":" + nname
            print(hog,tree,pname,nname,brname,brstat,istarget, sep=",", end="\n")