Exemplo n.º 1
0
def parse_tree_tips(tree_dir):

    tips = []
    tip_to_tree = {}

    for fn in os.listdir(tree_dir):
        if fn.endswith(".tree"):
            tree_name = fn.split(".")[0]
            tree = bt.loadNewick(tree_dir + "/" + fn, absoluteTime=False)
            for k in tree.Objects:
                if k.branchType == 'leaf' and "inserted" not in k.name:
                    tips.append(k.name)
                    tip_to_tree[k.name] = tree_name

        elif fn.endswith(".txt"):
            tree_name = fn.split(".")[0]
            with open(tree_dir + "/" + fn) as f:
                for l in f:
                    tip_string = l.strip("\n").split("\t")[1]
                    tip_list = tip_string.split(",")
                    tips.extend(tip_list)
                    for i in tip_list:
                        tip_to_tree[i] = tree_name

    return tips, tip_to_tree
Exemplo n.º 2
0
def find_tallest_tree(input_dir):
    tree_heights = []

    for r, d, f in os.walk(input_dir):
        for fn in f:
            if fn.endswith(".tree"):
                num_taxa = 0
                intro_name = ""
                with open(r + '/' + fn, "r") as f:
                    for l in f:
                        l = l.rstrip("\n")

                        if l.startswith(" Dimensions NTax="):

                            num_taxa = int(l.rstrip(";").split("=")[1])
                            intro_name = fn.rstrip(".tree")

                if num_taxa > 1:
                    tree_file = os.path.join(r, fn)
                    tree = bt.loadNewick(tree_file, absoluteTime=False)
                    tips = []

                    for k in tree.Objects:
                        if k.branchType == 'leaf':
                            tips.append(k.name)

                    tree_heights.append(tree.treeHeight)

    max_height = sorted(tree_heights, reverse=True)[0]
    return max_height
Exemplo n.º 3
0
def relabel_tips(treeFile, outTree):

    myTree = bt.loadNewick(treeFile, absoluteTime=False)
    #myTree.setAbsoluteTime(2019.00) # need to set this to time of last sampled tip
    myTree.traverse_tree()  ## required to set heights
    myTree.treeStats()  ## report stats about tree

    for k in myTree.Objects:  ## iterate over a flat list of branches
        if k.branchType == 'leaf':
            curr_name = k.numName
            if 'type' in k.traits:
                ttype = k.traits['type']
            else:
                ttype = 'Unknown'  #Assign tip unknown type if not given
            if 'time' in k.traits:
                time = k.traits['time']
            else:
                time = 'Unknown'
            k.numName = curr_name + '_' + str(time) + '_' + ttype
            print(k.numName)

    newick = myTree.toString(traits=[], numName=True, nexus=False)
    newick = newick.replace('\'', '')

    tfile = open(outTree, "w")
    tfile.write(newick)
    tfile.close()
Exemplo n.º 4
0
 def test_newick(self):
     tree = bt.loadNewick('./tests/data/zika.nwk')
     expected_num_nodes = 564
     assert len(tree.Objects) == expected_num_nodes, 'Newick tree does not contain correct number of nodes. Expected: {}. Observed: {}'.format(expected_num_nodes, len(tree.Objects))
     max_height = round(max([i.height for i in tree.Objects]), 4)
     expected_height = 0.0058
     assert max_height == expected_height, 'Newick tree height is not correct. Expected: {}. Observed: {}'.format(expected_height, max_height)
Exemplo n.º 5
0
def find_tallest_tree(input_dir):
    tree_heights = []

    for r, d, f in os.walk(input_dir):
        for fn in f:
            if fn.endswith(".tree"):

                tree_file = os.path.join(r, fn)
                tree = bt.loadNewick(tree_file, absoluteTime=False)
                tips = []

                for k in tree.Objects:
                    if k.branchType == 'leaf':
                        tips.append(k.name)

                tree_heights.append(tree.treeHeight)

    max_height = sorted(tree_heights, reverse=True)[0]
    return max_height
Exemplo n.º 6
0
def relabel_tips(treeFile, outTree):

    myTree = bt.loadNewick(treeFile, absoluteTime=False)
    #myTree.setAbsoluteTime(2019.00) # need to set this to time of last sampled tip
    myTree.traverse_tree()  ## required to set heights
    myTree.treeStats()  ## report stats about tree

    for k in myTree.Objects:  ## iterate over a flat list of branches
        if k.branchType == 'leaf':
            curr_name = k.numName
            curr_name = curr_name.replace('|', '_') + '_Il'
            curr_name = curr_name.replace('2020_EPI_ISL_', '')
            k.numName = curr_name
            print(k.numName)

    newick = myTree.toString(traits=[], numName=True, nexus=False)
    newick = newick.replace('\'', '')

    tfile = open(outTree, "w")
    tfile.write(newick)
    tfile.close()
Exemplo n.º 7
0
def make_all_of_the_trees(input_dir,
                          tree_name_stem,
                          taxon_dict,
                          query_dict,
                          desired_fields,
                          custom_tip_labels,
                          graphic_dict,
                          min_uk_taxa=3):

    tallest_height = find_tallest_tree(input_dir)

    too_tall_trees = []
    colour_dict_dict = defaultdict(dict)

    overall_df_dict = defaultdict(dict)

    overall_tree_count = 0

    lst = sort_trees_index(input_dir)

    for trait, colour_scheme in graphic_dict.items():
        colour_dict = find_colour_dict(query_dict, trait, colour_scheme)

        colour_dict_dict[trait] = colour_dict

    for fn in lst:
        lineage = fn
        treename = f"{tree_name_stem}_{fn}"
        treefile = f"{tree_name_stem}_{fn}.tree"
        nodefile = f"{tree_name_stem}_{fn}"
        num_taxa = 0
        intro_name = ""
        with open(input_dir + "/" + treefile, "r") as f:
            for l in f:
                l = l.rstrip("\n")
                if l.startswith(" Dimensions NTax="):
                    num_taxa = int(l.rstrip(";").split("=")[1])
                    intro_name = fn

        if num_taxa > 1:
            tree = bt.loadNewick(input_dir + "/" + treefile,
                                 absoluteTime=False)

            #make root line
            old_node = tree.root
            new_node = bt.node()
            new_node.children.append(old_node)
            old_node.parent = new_node
            old_node.length = 0.000015
            new_node.height = 0
            new_node.y = old_node.y
            tree.root = new_node

            tree.Objects.append(new_node)

            tips = []

            for k in tree.Objects:
                if k.branchType == 'leaf':
                    tips.append(k.name)
            if len(tips) < 1000:

                df_dict = summarise_node_table(input_dir, treename, taxon_dict)

                overall_df_dict[treename] = df_dict

                overall_tree_count += 1

                make_scaled_tree(tree, treename, input_dir, len(tips),
                                 colour_dict_dict, desired_fields,
                                 tallest_height, taxon_dict, query_dict,
                                 custom_tip_labels, graphic_dict)

            else:
                too_tall_trees.append(lineage)
                continue

    return too_tall_trees, overall_tree_count, colour_dict_dict, overall_df_dict
Exemplo n.º 8
0
def make_all_of_the_trees(input_dir, taxon_dict, query_id_dict, query_dict, desired_fields, min_uk_taxa=3):

    tallest_height = find_tallest_tree(input_dir)

    too_tall_trees = []
    colour_dict_dict = defaultdict(dict)

    overall_tree_count = 0
    
    lst = sort_trees_index(input_dir)

    for fn in lst:
        lineage = fn
        treename = "tree_" + str(fn)
        treefile = "tree_" + str(fn) + ".tree"
        num_taxa = 0
        intro_name = ""
        with open(input_dir + "/" + treefile,"r") as f:
            for l in f:
                l = l.rstrip("\n")
                if l.startswith(" Dimensions NTax="):
                    num_taxa = int(l.rstrip(";").split("=")[1])
                    intro_name = fn

        if num_taxa > 1: 
            tree = bt.loadNewick(input_dir + "/" + treefile, absoluteTime=False)

            old_node = tree.root
            new_node = bt.node()
            new_node.children.append(old_node)
            old_node.parent = new_node
            old_node.length=2.0
            new_node.height = 0
            new_node.y = old_node.y
            tree.root = new_node

            tree.Objects.append(new_node)

            tips = []
            
            for k in tree.Objects:
                if k.branchType == 'leaf':
                    tips.append(k.name)
            if len(tips) < 1000:
                overall_tree_count += 1      
                
                if desired_fields == []:
                    colour_by = ["adm1"]
                
                else:
                    colour_by = desired_fields
            
                for trait in colour_by:
                    colour_dict = find_colour_dict(query_dict, trait)
                    colour_dict_dict[trait] = colour_dict
                    make_scaled_tree_without_legend(tree, treename, input_dir, len(tips), colour_dict, trait, tallest_height, lineage, taxon_dict, query_id_dict, query_dict)     
            else:
                too_tall_trees.append(lineage)
                continue

    return too_tall_trees, overall_tree_count, colour_dict_dict
Exemplo n.º 9
0
def make_all_of_the_trees(input_dir,
                          outdir,
                          tree_name_stem,
                          taxon_dict,
                          query_dict,
                          colour_fields,
                          label_fields,
                          min_uk_taxa=3):

    tallest_height = find_tallest_tree(input_dir)

    too_tall_trees = []
    colour_dict_dict = defaultdict(dict)

    overall_df_dict = defaultdict(dict)

    overall_tree_count = 0

    lst = sort_trees_index(input_dir)

    for trait in colour_fields:
        colour_dict = find_colour_dict(query_dict, trait)
        colour_dict_dict[trait] = colour_dict

    for tree_number in lst:
        treename = f"tree_{tree_number}"
        treefile = f"{tree_name_stem}_{tree_number}.tree"
        nodefile = f"{tree_name_stem}_{tree_number}"
        num_taxa = 0

        tree = bt.loadNewick(input_dir + "/" + treefile, absoluteTime=False)

        old_node = tree.root
        new_node = bt.node()
        new_node.children.append(old_node)
        old_node.parent = new_node
        old_node.length = 0.000015
        new_node.height = 0
        new_node.y = old_node.y
        tree.root = new_node

        tree.Objects.append(new_node)

        tips = []

        for k in tree.Objects:
            if k.branchType == 'leaf':
                tips.append(k.name)

        if len(tips) < 1000:

            df_dict = summarise_node_table(input_dir, nodefile, taxon_dict)

            overall_df_dict[treename] = df_dict

            overall_tree_count += 1

            make_scaled_tree(tree, nodefile, input_dir, outdir, len(tips),
                             colour_dict_dict, colour_fields, label_fields,
                             tallest_height, tree_number, taxon_dict,
                             query_dict)

        else:
            too_tall_trees.append(tree_number)
            continue

    return too_tall_trees, overall_tree_count, overall_df_dict, colour_dict_dict
Exemplo n.º 10
0
@author: david
"""

from Bio import Phylo
import baltic as bt
import re

# '1-2936','2937-4936', '4937-6870','6871-8473','8474-19706','19707-20428'
in_tree = 'sars-like-CoVs-sub_19707-20428.tre'
out_tree = 'sars-like-CoVs-sub_19707-20428.nexus'

# Basic conversion with Phylo module doesn't work here because we need BEAST-style NEXUS files
#Phylo.convert(in_tree, 'newick', out_tree, 'nexus')

myTree = bt.loadNewick(in_tree, absoluteTime=False)
#myTree.setAbsoluteTime(2019.00) # need to set this to time of last sampled tip
myTree.traverse_tree()  ## required to set heights
myTree.treeStats()  ## report stats about tree
names = []
for idx, k in enumerate(
        myTree.Objects):  ## iterate over a flat list of branches
    if k.branchType == 'leaf':
        curr_name = k.numName
        names.append(curr_name)

#print(names)

date_str = '_2020.00'

# Write taxa names
Exemplo n.º 11
0
                        help="Pruned tree including only taxa in list")
    args = parser.parse_args()

    timetree = args.timetree
    tmrcas = args.tmrcas
    output = args.output

    # path = "/Users/anderson/GLab Dropbox/Anderson Brito/past&future/PhD/works/phylog/species_trees/viral_sppTrees/rhv04_virevol1/trees/ba/run0_host/"
    # timetree = path + "host_tree.tree"
    # tmrcas = path + 'tmrcas.txt'
    # output = path + 'new_host_tree.nexus'

    all_traits = ['node_name', 'height_95%_HPD']

    # load tree
    tree = bt.loadNewick(timetree)  #, tip_regex='_([0-9\-]+)$')
    # print(tree)

    # tmrca dataframe
    df = pd.read_csv(tmrcas, encoding='utf-8', sep='\t', dtype='str')
    # print(df)

    # df['members'] = df['members'].apply(lambda x: ', '.join(sorted(x.split(','))))
    # print(df['members'].to_list())

    print('Starting tree file processing...')
    # transfer supporting value from a newick tree
    for k in sorted(tree.Objects, key=lambda q: q.height
                    ):  ## iterate over branches from most recent to oldest
        if k.branchType == 'node':  ## can only sort nodes
            terminals = ", ".join(sorted([leaf for leaf in k.leaves]))