def get_example_tree(): # Set dashed blue lines in all leaves nst1 = NodeStyle() nst1["bgcolor"] = "LightSteelBlue" nst2 = NodeStyle() nst2["bgcolor"] = "Moccasin" nst3 = NodeStyle() nst3["bgcolor"] = "DarkSeaGreen" nst4 = NodeStyle() nst4["bgcolor"] = "Khaki" t = Tree("((((a1,a2),a3), ((b1,b2),(b3,b4))), ((c1,c2),c3));") for n in t.traverse(): n.dist = 0 n1 = t.get_common_ancestor("a1", "a2", "a3") n1.set_style(nst1) n2 = t.get_common_ancestor("b1", "b2", "b3", "b4") n2.set_style(nst2) n3 = t.get_common_ancestor("c1", "c2", "c3") n3.set_style(nst3) n4 = t.get_common_ancestor("b3", "b4") n4.set_style(nst4) ts = TreeStyle() ts.layout_fn = layout ts.show_leaf_name = False ts.mode = "c" ts.root_opening_factor = 1 return t, ts
import random from ete_dev import Tree # Creates a normal tree t = Tree( '((H:0.3,I:0.1):0.5, A:1, (B:0.4,(C:0.5,(J:1.3, (F:1.2, D:0.1):0.5):0.5):0.5):0.5);' ) print t # Let's locate some nodes using the get common ancestor method ancestor = t.get_common_ancestor("J", "F", "C") # the search_nodes method (I take only the first match ) A = t.search_nodes(name="A")[0] # and using the shorcut to finding nodes by name C = t & "C" H = t & "H" I = t & "I" # Let's now add some custom features to our nodes. add_features can be # used to add many features at the same time. C.add_features(vowel=False, confidence=1.0) A.add_features(vowel=True, confidence=0.5) ancestor.add_features(nodetype="internal") # Or, using the oneliner notation (t & "H").add_features(vowel=False, confidence=0.2) # But we can automatize this. (note that i will overwrite the previous # values) for leaf in t.traverse(): if leaf.name in "AEIOU": leaf.add_features(vowel=True, confidence=random.random()) else: leaf.add_features(vowel=False, confidence=random.random()) # Now we use these information to analyze the tree. print "This tree has", len(t.search_nodes(vowel=True)), "vowel nodes"
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) input_args = parser.add_argument_group("INPUT OPTIONS") input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*", help='a list of source tree files') input_args.add_argument("--source_file", dest="source_file", type=str, help="""path to a file containing many source trees, one per line""") input_args.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", type=str, default="name", help=("attribute in ref tree used as leaf name")) input_args.add_argument("--src_tree_attr", dest="src_tree_attr", type=str, default="name", help=("attribute in source tree used as leaf name")) input_args.add_argument("--min_support_ref", type=float, default=0.0, help=("min support for branches to be considered from the ref tree")) input_args.add_argument("--min_support_src", type=float, default=0.0, help=("min support for branches to be considered from the source tree")) output_args = parser.add_argument_group("OUTPUT OPTIONS") output_args.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") opt_args = parser.add_argument_group("DISTANCE OPTIONS") opt_args.add_argument("--outgroup", dest="outgroup", nargs = "+", help="""outgroup used to root reference and source trees before distance computation""") opt_args.add_argument("--expand_polytomies", dest="polytomies", action = "store_true", help="""expand politomies if necessary""") opt_args.add_argument("--unrooted", dest="unrooted", action = "store_true", help="""compare trees as unrooted""") opt_args.add_argument("--min_support", dest="min_support", type=float, default=0.0, help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)")) opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS") opt_args.add_argument("--extract_species", action = "store_true", help="When used, leaf names in the reference and source trees are assumed to represent species." " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name," " it can be automatically extracted by providing a Perl regular expression that extract a " " valid species code (see --sp_regexp). Such information will be also used to detect duplication" " events. ") opt_args.add_argument("--sp_regexp", type=str, help=("Specifies a Perl regular expression to automatically extract species names" " from the name string in source trees. If not used, leaf names are assumed to represent species names." " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'.")) opt_args.add_argument("--collateral", action='store_true', help=("")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.source_file and args.source_trees: print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.source_file: source_trees = tree_iterator(args.source_file) else: source_trees = args.source_trees ref_tree = Tree(reftree) if args.ref_tree_attr: for lf in ref_tree.iter_leaves(): lf._origname = lf.name if args.ref_tree_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = ref_tree.get_common_ancestor(args.outgroup) else: out = ref_tree.search_nodes(name=args.outgroup[0])[0] ref_tree.set_outgroup(out) HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist") if args.output: OUT = open(args.output, "w") print >>OUT, '# ' + ctime() print >>OUT, '# ' + ' '.join(sys.argv) print >>OUT, '#'+'\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 20] + [9] * 10 print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None ref_fname = os.path.basename(args.reftree) for counter, tfile in enumerate(source_trees): if args.source_file: seedid, tfile = tfile else: seedid = None if args.extract_species: if args.sp_regexp: SPMATCHER = re.compile(args.sp_regexp) get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0] else: get_sp_name = lambda x: x tt = PhyloTree(tfile, sp_naming_function = get_sp_name) else: tt = Tree(tfile) if args.src_tree_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.src_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.source_trees: fname = os.path.basename(tfile) else: fname = '%05d' %counter r = tt.compare(ref_tree, ref_tree_attr=args.ref_tree_attr, source_tree_attr=args.src_tree_attr, min_support_ref=args.min_support_ref, min_support_source = args.min_support_src, unrooted=args.unrooted, has_duplications=args.extract_species) print_table([map(istr, [fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"], r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist']])], fix_col_width = COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
print t # /-A # | # | /-H #---------|---------| # | \-F # | # | /-B # \--------| # | /-E # \--------| # \-D # # Let's define that the ancestor of E and D as the tree outgroup. Of # course, the definition of an outgroup will depend on user criteria. ancestor = t.get_common_ancestor("E","D") t.set_outgroup(ancestor) print "Tree rooteda at E and D's ancestor is more basal that the others." print t # # /-B # /--------| # | | /-A # | \--------| # | | /-H #---------| \--------| # | \-F # | # | /-E # \--------| # \-D
tree = Tree("((H:1,I:1):0.5, A:1, (B:1,(C:1,D:1):0.5):0.5);") print "this is the original tree:" print tree # /-H # /--------| # | \-I # | # ---------|--A # | # | /-B # \--------| # | /-C # \--------| # \-D # Finds the first common ancestor between B and C. ancestor = tree.get_common_ancestor("D", "C") print "The ancestor of C and D is:" print ancestor # /-C # ---------| # \-D # You can use more than two nodes in the search ancestor = tree.get_common_ancestor("B", "C", "D") print "The ancestor of B, C and D is:" print ancestor # /-B # ---------| # | /-C # \--------| # \-D # Finds the first sister branch of the ancestor node. Because
import random from ete_dev import Tree # Creates a normal tree t = Tree( '((H:0.3,I:0.1):0.5, A:1, (B:0.4,(C:0.5,(J:1.3, (F:1.2, D:0.1):0.5):0.5):0.5):0.5);' ) print t # Let's locate some nodes using the get common ancestor method ancestor=t.get_common_ancestor("J", "F", "C") # the search_nodes method (I take only the first match ) A = t.search_nodes(name="A")[0] # and using the shorcut to finding nodes by name C= t&"C" H= t&"H" I= t&"I" # Let's now add some custom features to our nodes. add_features can be # used to add many features at the same time. C.add_features(vowel=False, confidence=1.0) A.add_features(vowel=True, confidence=0.5) ancestor.add_features(nodetype="internal") # Or, using the oneliner notation (t&"H").add_features(vowel=False, confidence=0.2) # But we can automatize this. (note that i will overwrite the previous # values) for leaf in t.traverse(): if leaf.name in "AEIOU": leaf.add_features(vowel=True, confidence=random.random()) else: leaf.add_features(vowel=False, confidence=random.random()) # Now we use these information to analyze the tree. print "This tree has", len(t.search_nodes(vowel=True)), "vowel nodes" print "Which are", [leaf.name for leaf in t.iter_leaves() if leaf.vowel==True] # But features may refer to any kind of data, not only simple
tree = Tree('((H:1,I:1):0.5, A:1, (B:1,(C:1,D:1):0.5):0.5);') print "this is the original tree:" print tree # /-H # /--------| # | \-I # | #---------|--A # | # | /-B # \--------| # | /-C # \--------| # \-D # Finds the first common ancestor between B and C. ancestor = tree.get_common_ancestor("D", "C") print "The ancestor of C and D is:" print ancestor # /-C #---------| # \-D # You can use more than two nodes in the search ancestor = tree.get_common_ancestor("B", "C", "D") print "The ancestor of B, C and D is:" print ancestor # /-B #---------| # | /-C # \--------| # \-D # Finds the first sister branch of the ancestor node. Because
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) input_args = parser.add_argument_group("INPUT OPTIONS") input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*", help='a list of source tree files') input_args.add_argument( "--source_file", dest="source_file", type=str, help="""path to a file containing many source trees, one per line""") input_args.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", type=str, default="name", help=("attribute in ref tree used as leaf name")) input_args.add_argument( "--src_tree_attr", dest="src_tree_attr", type=str, default="name", help=("attribute in source tree used as leaf name")) input_args.add_argument( "--min_support_ref", type=float, default=0.0, help=("min support for branches to be considered from the ref tree")) input_args.add_argument( "--min_support_src", type=float, default=0.0, help=( "min support for branches to be considered from the source tree")) output_args = parser.add_argument_group("OUTPUT OPTIONS") output_args.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") opt_args = parser.add_argument_group("DISTANCE OPTIONS") opt_args.add_argument( "--outgroup", dest="outgroup", nargs="+", help= """outgroup used to root reference and source trees before distance computation""" ) opt_args.add_argument("--expand_polytomies", dest="polytomies", action="store_true", help="""expand politomies if necessary""") opt_args.add_argument("--unrooted", dest="unrooted", action="store_true", help="""compare trees as unrooted""") opt_args.add_argument( "--min_support", dest="min_support", type=float, default=0.0, help= ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)" )) opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS") opt_args.add_argument( "--extract_species", action="store_true", help= "When used, leaf names in the reference and source trees are assumed to represent species." " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name," " it can be automatically extracted by providing a Perl regular expression that extract a " " valid species code (see --sp_regexp). Such information will be also used to detect duplication" " events. ") opt_args.add_argument( "--sp_regexp", type=str, help= ("Specifies a Perl regular expression to automatically extract species names" " from the name string in source trees. If not used, leaf names are assumed to represent species names." " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'." )) opt_args.add_argument("--collateral", action='store_true', help=("")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.source_file and args.source_trees: print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.source_file: source_trees = tree_iterator(args.source_file) else: source_trees = args.source_trees ref_tree = Tree(reftree) if args.ref_tree_attr: for lf in ref_tree.iter_leaves(): lf._origname = lf.name if args.ref_tree_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = ref_tree.get_common_ancestor(args.outgroup) else: out = ref_tree.search_nodes(name=args.outgroup[0])[0] ref_tree.set_outgroup(out) HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist") if args.output: OUT = open(args.output, "w") print >> OUT, '# ' + ctime() print >> OUT, '# ' + ' '.join(sys.argv) print >> OUT, '#' + '\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 20] + [9] * 10 print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None ref_fname = os.path.basename(args.reftree) for counter, tfile in enumerate(source_trees): if args.source_file: seedid, tfile = tfile else: seedid = None if args.extract_species: if args.sp_regexp: SPMATCHER = re.compile(args.sp_regexp) get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0] else: get_sp_name = lambda x: x tt = PhyloTree(tfile, sp_naming_function=get_sp_name) else: tt = Tree(tfile) if args.src_tree_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.src_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.source_trees: fname = os.path.basename(tfile) else: fname = '%05d' % counter r = tt.compare(ref_tree, ref_tree_attr=args.ref_tree_attr, source_tree_attr=args.src_tree_attr, min_support_ref=args.min_support_ref, min_support_source=args.min_support_src, unrooted=args.unrooted, has_duplications=args.extract_species) print_table([ map(istr, [ fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"], r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist'] ]) ], fix_col_width=COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
# Set dashed blue lines in all leaves nst1 = NodeStyle() nst1["bgcolor"] = "LightSteelBlue" nst2 = NodeStyle() nst2["bgcolor"] = "Moccasin" nst3 = NodeStyle() nst3["bgcolor"] = "DarkSeaGreen" nst4 = NodeStyle() nst4["bgcolor"] = "Khaki" t = Tree("((((a1,a2),a3), ((b1,b2),(b3,b4))), ((c1,c2),c3));") n1 = t.get_common_ancestor("a1", "a2", "a3") n1.set_style(nst1) n2 = t.get_common_ancestor("b1", "b2", "b3", "b4") n2.set_style(nst2) n3 = t.get_common_ancestor("c1", "c2", "c3") n3.set_style(nst3) n4 = t.get_common_ancestor("b3", "b4") n4.set_style(nst4) ts = TreeStyle() ts.layout_fn = layout ts.show_leaf_name = False ts.mode = "c" t.render("node_background.png", w=400, tree_style=ts) t.show(tree_style=ts)
# | | /-L # | \--------| #---------| \-M # | # | /-B # | /--------| # | | | /-J # | | \--------| # \--------| \-K # | # | /-E # \--------| # \-D # # Each main branch of the tree is independently rooted. node1 = t.get_common_ancestor("A","H") node2 = t.get_common_ancestor("B","D") node1.set_outgroup("H") node2.set_outgroup("E") print "Tree after rooting each node independently:" print t # # /-F # | # /--------| /-L # | | /--------| # | | | \-M # | \--------| # /--------| | /-A # | | \--------| # | | \-C
print t # /-A # | # | /-H #---------|---------| # | \-F # | # | /-B # \--------| # | /-E # \--------| # \-D # # Let's define that the ancestor of E and D as the tree outgroup. Of # course, the definition of an outgroup will depend on user criteria. ancestor = t.get_common_ancestor("E", "D") t.set_outgroup(ancestor) print "Tree rooteda at E and D's ancestor is more basal that the others." print t # # /-B # /--------| # | | /-A # | \--------| # | | /-H #---------| \--------| # | \-F # | # | /-E # \--------| # \-D