def main (folder=None,seed=None): print("Folder %s, seed %s") % (folder,seed) r=numpy.random.RandomState(seed) gene_trees=TreeList() taxa = dendropy.TaxonNamespace() treefiles=glob.glob(args.sd+"/"+folder+"/g_trees*.trees") tree_yielder=Tree.yield_from_files(files=treefiles,schema="newick",rooting="default-rooted",preserve_underscores=True,taxon_namespace=taxa) #Modify gene trees #I have to modify here the trees if args.mk=="random": for gtree in tree_yielder: onodes=gtree.leaf_nodes() nodes=remove_taxa_prov(r,onodes,args.pr) if len(nodes) < len(onodes)-3: #Tree with missing leaves gtree.prune_taxa(nodes,update_bipartitions=False, suppress_unifurcations=True) gene_trees.append(gtree) else: #The whole tree is missing (the tree would have 3 leaves or less, which is not an unrooted tree) continue elif args.mk=="byindividual": tagProbs=None for gtree in tree_yielder: onodes=gtree.leaf_nodes() if not tagProbs: tagProbs={} probs=truncated_normal(r,n=len(onodes),mean=args.pr,sd=args.ist,min=args.itmin,max=args.itmax) #one prob for each leaf for leafi in xrange(len(onodes)): tagProbs[onodes[leafi].taxon.label]=probs[leafi]#assigment to leaf labels in the dictionary nodes=remove_taxa_tagprobs(r,onodes,tagProbs) if len(nodes) < len(onodes)-3: #Tree with missing leaves gtree.prune_taxa(nodes,update_bipartitions=False, suppress_unifurcations=True) gene_trees.append(gtree) else: #The whole tree is missing (the tree would have 3 leaves or less, which is not an unrooted tree) continue else: print("Yet unsupported option") #Write gene trees gene_trees.write(path=args.sd+"/"+folder+"/"+args.o,schema="newick")
def main(args): if len(args) < 2: print """USAGE: %s [tree_file] [outgroups] [-mrca -mrca-dummy (optional)] [output name (optional)] [-igerr (optional)] -- tree_file: a path to the newick tree file -- outgroups: a list of outgroups, separated by comma. The script goes through the list of outgroups. If the outgroup is found in the tree, the tree is rooted at that outgroup. Otherwise, the next outgroup in the list is used. Each element in the comma-delimited list is itself a + delimited list of taxa. By default the script makes sure that this list of taxa are monophyletic in the tree and roots the tree at the node leading to the clade represented by outgroups given in the + delimited list. Alternatively, you can specify -m which will result in mid-point rooting. Example: HUMAN,ANOCA,STRCA+TINMA first tries to root at HUMAN, if not present, tries to use ANOCA, if not present, tries to root at parent of STRCA and TINMA which need to be monophyletic. If not monophyletic, roots at STRCA. -- (optional) -mrca: using this option the mono-phyletic requirement is relaxed and always the mrca of the + delimited list of outgroups is used. -- (optional) -mrca-dummy: is like -mrca, but also adds a dummy taxon as outgroup to the root. """ % args[ 0 ] sys.exit(1) treeName = args[1] outgroups = [x.replace("_", " ") for x in args[2].split(",")] use_mrca = True if len(args) > 3 and (args[3] == "-mrca" or args[3] == "-mrca-dummy") else False add_dummy = True if len(args) > 3 and (args[3] == "-mrca-dummy") else False resultsFile = ( args[4] if len(args) > 4 else ("%s.rooted" % treeName[:-9] if treeName.endswith("unrooted") else "%s.rooted" % treeName) ) ignore = True if len(args) > 5 and args[5] == "-igerr" else False print "Reading input trees %s ..." % treeName, trees = dendropy.TreeList.get_from_path(treeName, "newick", rooted=True) print "%d tree(s) found" % len(trees) i = 0 outtrees = TreeList() for tree in trees: i += 1 print ".", oldroot = tree.seed_node # print "Tree %d:" %i if outgroups[0] == "-m": print "Midpoint rooting ... " tree.reroot_at_midpoint(update_splits=False) else: mrca = None for outgroup in outgroups: outs = outgroup.split("+") outns = [] for out in outs: n = tree.find_node_with_taxon_label(out) if n is None: print "outgroup not found %s," % out, continue outns.append(n.taxon) if len(outns) != 0: # Find an ingroup and root the tree there for n in tree.leaf_iter(): if n.taxon not in outns: ingroup = n break # print "rerooting at ingroup %s" %ingroup.taxon.label """reroot at an ingroup, so that outgroups form monophyletic groups, if possible""" if ingroup.edge.length is not None: tree.reroot_at_edge( ingroup.edge, update_splits=True, length1=ingroup.edge.length / 2, length2=ingroup.edge.length / 2, ) else: tree.reroot_at_edge(ingroup.edge, update_splits=True) mrca = tree.mrca(taxa=outns) break if mrca is None: if ignore: print >> sys.stderr, "Outgroups not found: %s" % outgroups continue else: raise KeyError("Outgroups not found %d: %s" % (i, outgroups)) # print mrca.leaf_nodes() # if not mono-phyletic, then use the first if not use_mrca and len(mrca.leaf_nodes()) != len(outns): print >> sys.stderr, "selected set is not monophyletic. Using %s instead. " % outns[0] mrca = tree.find_node_with_taxon_label(outns[0].label) if mrca.parent_node is None: print >> sys.stderr, "Already rooted at the root." # print "rerooting on %s" % [s.label for s in outns] # tree.reroot_at_midpoint() elif mrca.edge.length is not None: # print "rerooting at %s" %mrca.as_newick_string() if ingroup.edge.length is not None: tree.reroot_at_edge( mrca.edge, update_splits=False, length1=mrca.edge.length / 2, length2=mrca.edge.length / 2 ) else: tree.reroot_at_edge(mrca.edge, update_splits=False) else: tree.reroot_at_edge(mrca.edge, update_splits=False) if add_dummy: dummy = tree.seed_node.new_child(taxon=Taxon(label="outgroup"), edge_length=1) tree.reroot_at_edge(dummy.edge, update_splits=False) outtrees.append(tree) """This is to fix internal node labels when treated as support values""" while oldroot.parent_node != tree.seed_node and oldroot.parent_node != None: oldroot.label = oldroot.parent_node.label oldroot = oldroot.parent_node if len(oldroot.sister_nodes()) > 0: oldroot.label = oldroot.sister_nodes()[0].label # tree.reroot_at_midpoint(update_splits=False) print >> sys.stderr, "writing results to %s" % resultsFile outtrees.write(open(resultsFile, "w"), "newick", edge_lengths=True, internal_labels=True, write_rooting=False)
#! /usr/bin/env python from dendropy import TreeList from sys import argv trees = TreeList() trees.read(path=argv[1],schema="newick") trees.write(path=argv[2],schema="nexus")
@author: smirarab ''' import dendropy import sys import os import copy import os.path from dendropy import TreeList if __name__ == '__main__': if len(sys.argv) < 4: print "USAGE: count [output|-] treefile*" sys.exit(1) count= int(sys.argv[1]) out=open(sys.argv[2],'w') if sys.argv[2] != "-" else sys.stdout c={} trees = None for treeName in sys.argv[3:]: a = dendropy.TreeList.get_from_path(treeName, 'nexus',rooted=True, tree_offset=200) if trees: trees.append(a) else: trees = a import random samples = TreeList(random.sample(trees,count)) samples.write(out,'newick',write_rooting=False) if out != sys.stdout: out.close()
def main(args): if len(args) < 2: print '''USAGE: %s [tree_file] [outgroups] [-mrca -mrca-dummy (optional)] [output name (optional)] [-igerr (optional)] -- tree_file: a path to the newick tree file -- outgroups: a list of outgroups, separated by comma. The script goes through the list of outgroups. If the outgroup is found in the tree, the tree is rooted at that outgroup. Otherwise, the next outgroup in the list is used. Each element in the comma-delimited list is itself a + delimited list of taxa. By default the script makes sure that this list of taxa are monophyletic in the tree and roots the tree at the node leading to the clade represented by outgroups given in the + delimited list. Alternatively, you can specify -m which will result in mid-point rooting. Example: HUMAN,ANOCA,STRCA+TINMA first tries to root at HUMAN, if not present, tries to use ANOCA, if not present, tries to root at parent of STRCA and TINMA which need to be monophyletic. If not monophyletic, roots at STRCA. -- (optional) -mrca: using this option the mono-phyletic requirement is relaxed and always the mrca of the + delimited list of outgroups is used. -- (optional) -mrca-dummy: is like -mrca, but also adds a dummy taxon as outgroup to the root. ''' % args[0] sys.exit(1) treeName = args[1] outgroups = [x.replace("_", " ") for x in args[2].split(",")] # uym2 editted: keep underscore #outgroups = [x for x in args[2].split(",")] use_mrca = True if len(args) > 3 and ( args[3] == "-mrca" or args[3] == "-mrca-dummy") else False add_dummy = True if len(args) > 3 and (args[3] == "-mrca-dummy") else False resultsFile = args[4] if len(args) > 4 else ( "%s.rooted" % treeName[:-9] if treeName.endswith("unrooted") else "%s.rooted" % treeName) ignore = True if len(args) > 5 and args[5] == "-igerr" else False print >> sys.stderr, "Reading input trees %s ..." % treeName, #trees = dendropy.treelist.get_from_path(treename, 'newick',rooted=true) # uym2 edited: hack for dendropy4 trees = dendropy.TreeList.get_from_path(treeName, "newick") print >> sys.stderr, "%d tree(s) found" % len(trees) i = 0 outtrees = TreeList() for tree in trees: i += 1 print >> sys.stderr, ".", oldroot = tree.seed_node #print "Tree %d:" %i if outgroups[0] == "-m": print >> sys.stderr, "Midpoint rooting ... " tree.reroot_at_midpoint(update_splits=False) else: mrca = None for outgroup in outgroups: outs = outgroup.split("+") outns = [] for out in outs: n = tree.find_node_with_taxon_label(out) if n is None: print >> sys.stderr, "outgroup not found %s," % out, continue outns.append(n.taxon) if len(outns) != 0: # Find an ingroup and root the tree there for n in tree.leaf_node_iter(): if n.taxon not in outns: ingroup = n break #print "rerooting at ingroup %s" %ingroup.taxon.label '''reroot at an ingroup, so that outgroups form monophyletic groups, if possible''' if ingroup.edge.length is not None: #tree.reroot_at_edge(ingroup.edge, update_splits=True,length1=ingroup.edge.length/2,length2=ingroup.edge.length/2) # uym2 editted: hack for dendropy4 tree.reroot_at_edge(ingroup.edge, length1=ingroup.edge.length / 2, length2=ingroup.edge.length / 2) else: #tree.reroot_at_edge(ingroup.edge, update_splits=True) tree.reroot_at_edge(ingroup.edge) mrca = tree.mrca(taxa=outns) break if mrca is None: if ignore: print >> sys.stderr, "Outgroups not found: %s" % outgroups print >> sys.stdout, tree.as_string(schema="newick"), continue else: print >> sys.stderr, "Outgroups not found: %s" % outgroups continue #raise KeyError("Outgroups not found %d: %s" %(i,outgroups)) #print mrca.leaf_nodes() #if not mono-phyletic, then use the first if not use_mrca and len(mrca.leaf_nodes()) != len(outns): print >> sys.stderr, "selected set is not monophyletic. Using %s instead. " % outns[ 0] mrca = tree.find_node_with_taxon_label(outns[0].label) if mrca.parent_node is None: print >> sys.stderr, "Already rooted at the root." #print "rerooting on %s" % [s.label for s in outns] #tree.reroot_at_midpoint() elif mrca.edge.length is not None: #print "rerooting at %s" %mrca.as_newick_string() if ingroup.edge.length is not None: #tree.reroot_at_edge(mrca.edge, update_splits=False,length1=mrca.edge.length/2,length2=mrca.edge.length/2) #uym2 editted: hack for dendropy4 tree.reroot_at_edge(mrca.edge, length1=mrca.edge.length / 2, length2=mrca.edge.length / 2) else: #tree.reroot_at_edge(mrca.edge, update_splits=False) #uym2 editted: hack for dendropy4 tree.reroot_at_edge(mrca.edge) else: tree.reroot_at_edge(mrca.edge, update_splits=False) if add_dummy: dummy = tree.seed_node.new_child(taxon=Taxon(label="outgroup"), edge_length=1) tree.reroot_at_edge(dummy.edge, update_splits=False) outtrees.append(tree) '''This is to fix internal node labels when treated as support values''' while oldroot.parent_node != tree.seed_node and oldroot.parent_node != None: oldroot.label = oldroot.parent_node.label oldroot = oldroot.parent_node if len(oldroot.sister_nodes()) > 0: oldroot.label = oldroot.sister_nodes()[0].label #tree.reroot_at_midpoint(update_splits=False) print >> sys.stderr, "writing results to %s" % resultsFile #outtrees.write(open(resultsFile,'w'),'newick',edge_lengths=True, internal_labels=True,write_rooting=False) #uym2 editted: hack for dendropy4 outtrees.write( path=resultsFile, schema='newick', suppress_rooting=True ) #,edge_lengths=True, internal_labels=True,write_rooting=False)
#!/opt/local/bin/python ### Imports ### import dendropy from dendropy import TreeList,Tree import sys import argparse from os import walk import glob ### Main ### ### Argparse parser = argparse.ArgumentParser(description="Reads a newick trees and reroots it with a basal trifurcation",prog="strictunroot.py") parser.add_argument("-i",required=True,type=str,help="Input newick tree name") parser.add_argument("-o",required=True,type=str,help="Output file name") args = parser.parse_args() ###Main itrees=TreeList.get(path=args.i,schema="newick",rooting="default-rooted",preserve_underscores=True) otrees=TreeList() for tree in itrees: tree.collapse_basal_bifurcation() otrees.append(tree) otrees.write(path=args.o,schema="newick",unquoted_underscores=True,suppress_rooting=True) print("Done!")
from os import walk import glob ### Main ### ### Argparse parser = argparse.ArgumentParser( description="Reads a newick trees and reroots it with a basal trifurcation", prog="strictunroot.py") parser.add_argument("-i", required=True, type=str, help="Input newick tree name") parser.add_argument("-o", required=True, type=str, help="Output file name") args = parser.parse_args() ###Main itrees = TreeList.get(path=args.i, schema="newick", rooting="default-rooted", preserve_underscores=True) otrees = TreeList() for tree in itrees: tree.collapse_basal_bifurcation() otrees.append(tree) otrees.write(path=args.o, schema="newick", unquoted_underscores=True, suppress_rooting=True) print("Done!")