def plot_data_tree(args, plotdir, plotname, glsfnames, glslabels, leg_title=None, title=None): all_genes, gl_sets = get_gene_sets(glsfnames, glslabels) print_data_results(gl_sets) treefname = make_tree(all_genes, plotdir + '/workdir', use_cache=args.use_cache) with open(treefname) as treefile: treestr = treefile.read().strip() # treestr = "(A:0.7,B:0.7):0.3;" etree = ete3.ClusterTree(treestr) node_names = set() # make sure we get out all the genes we put in for node in etree.traverse(): node.dist = 1 status = getdatastatus(gl_sets, node, pair=False) set_node_style(node, status, data=True) if node.is_leaf(): node_names.add(node.name) if len(set(all_genes) - node_names) > 0: raise Exception('missing genes from final tree: %s' % ' '.join(set(all_genes) - node_names)) tstyle = ete3.TreeStyle() tstyle.show_leaf_name = True tstyle.mode = 'c' tstyle.show_scale = False etree.render(plotdir + '/' + plotname + '.svg', h=750, tree_style=tstyle)
def draw_tree(plotdir, plotname, treestr, gl_sets, all_genes, gene_categories, ref_label=None, arc_start=None, arc_span=None): etree = ete3.ClusterTree(treestr) node_names = set() # make sure we get out all the genes we put in for node in etree.traverse(): if set_distance_to_zero(node): node.dist = 0. if ref_label is not None else 1e-9 # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9 # node.dist = 1. status = getstatus(gene_categories, node, ref_label=ref_label) set_node_style(node, status, len(gl_sets), ref_label=ref_label) if node.is_leaf(): node_names.add(node.name) if len(set(all_genes) - node_names) > 0: raise Exception('missing genes from final tree: %s' % ' '.join(node_names)) if ref_label is None: # have to do it in a separate loop so it doesn't screw up the distance setting for node in [n for n in etree.traverse() if n.is_leaf()]: # yeah I'm sure there's a fcn for that node.name = shorten_name(node.name) tstyle = ete3.TreeStyle() tstyle.show_scale = False if not args.leaf_names: tstyle.show_leaf_name = False # tstyle.mode = 'c' # if arc_start is not None: # tstyle.arc_start = arc_start # if arc_span is not None: # tstyle.arc_span = arc_span write_legend(plotdir) if args.title is not None: fsize = 13 tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize, bold=True), column=0) if args.title_color is not None: # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1) tcol = scolors[ args. title_color] if args.title_color in scolors else args.title_color rect_width = 3 if len(args.title) < 12 else 2 tstyle.title.add_face(ete3.RectFace(width=rect_width * fsize, height=fsize, bgcolor=tcol, fgcolor=None), column=1) suffix = '.svg' imagefname = plotdir + '/' + plotname + suffix print ' %s' % imagefname etree.render(imagefname, tree_style=tstyle)
def draw_tree(plotdir, plotname, treestr, gl_sets, all_genes, gene_categories, ref_label=None, arc_start=None, arc_span=None): etree = ete3.ClusterTree(treestr) node_names = set() # make sure we get out all the genes we put in for node in etree.traverse(): if set_distance_to_zero(node): node.dist = 0. if ref_label is not None else 1e-9 # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9 # node.dist = 1. status = getstatus(gene_categories, node, ref_label=ref_label) set_node_style(node, status, len(gl_sets), ref_label=ref_label) if node.is_leaf(): node_names.add(node.name) if len(set(all_genes) - node_names) > 0: raise Exception('missing genes from final tree: %s' % ' '.join(node_names)) if args.param_dirs is not None: countfo = OrderedDict() for label, pdir in zip(args.glslabels, args.param_dirs): # it would be cleaner to do this somewhere else if pdir == 'None': # not the best way to do this continue countfo[label] = utils.read_overall_gene_probs(pdir, normalize=True)[args.region] for node in etree.traverse(): node.countstr = '%s' % ' '.join([('%.2f' % (100 * cfo[node.name])) if node.name in cfo else '-' for cfo in countfo.values()]) if ref_label is None: # have to do it in a separate loop so it doesn't screw up the distance setting for node in [n for n in etree.traverse() if n.is_leaf()]: # yeah I'm sure there's a fcn for that node.name = utils.shorten_gene_name(node.name) tstyle = ete3.TreeStyle() tstyle.show_scale = False if len(args.glslabels) > 1: write_legend(plotdir) if args.title is not None: fsize = 13 tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize, bold=True), column=0) if args.title_color is not None: # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1) tcol = scolors[args.title_color] if args.title_color in scolors else args.title_color rect_width = 3 if len(args.title) < 12 else 2 tstyle.title.add_face(ete3.RectFace(width=rect_width*fsize, height=fsize, bgcolor=tcol, fgcolor=None), column=1) suffix = '.svg' imagefname = plotdir + '/' + plotname + suffix print ' %s' % imagefname etree.render(utils.insert_before_suffix('-leaf-names', imagefname), tree_style=tstyle) tstyle.show_leaf_name = False etree.render(imagefname, tree_style=tstyle) # NOTE all the node names are screwed up after this, so you'll have to fix them if you add another step if args.param_dirs is not None: for node in etree.traverse(): node.name = node.countstr tstyle.show_leaf_name = True etree.render(utils.insert_before_suffix('-gene-counts', imagefname), tree_style=tstyle)
def plot_gls_gen_tree(args, plotdir, plotname, glsfnames, glslabels, leg_title=None, title=None): assert glslabels == ['sim', 'inf'] # otherwise stuff needs to be updated all_genes, gl_sets = get_gene_sets(glsfnames, glslabels, ref_label='sim') print_results(gl_sets) treefname = make_tree(all_genes, plotdir + '/workdir', use_cache=args.use_cache) with open(treefname) as treefile: treestr = treefile.read().strip() # treestr = "(A:0.7,B:0.7):0.3;" etree = ete3.ClusterTree(treestr) node_names = set() # make sure we get out all the genes we put in for node in etree.traverse(): node.dist = 1 status = getstatus(gl_sets, node) set_node_style(node, status) if node.is_leaf(): if status in faces: node.add_face(copy.deepcopy(faces[status]), column=0) node_names.add(node.name) if len(set(all_genes) - node_names) > 0: raise Exception('missing genes from final tree: %s' % ' '.join(node_names)) tstyle = ete3.TreeStyle() tstyle.show_leaf_name = False tstyle.mode = 'c' tstyle.show_scale = False etree.render(plotdir + '/' + plotname + '.svg', h=750, tree_style=tstyle)
def write_legend(plotdir): def get_leg_name(status): if args.legends is not None and status in args.glslabels: lname = args.legends[args.glslabels.index(status)] elif status == 'both': if len(args.glsfnames) == 2: lname = 'both' elif len(args.glsfnames) == 3: lname = 'two' else: raise Exception('wtf %d' % len(args.glsfnames)) elif status == 'all': if len(args.glsfnames) == 2: lname = 'both' elif len(args.glsfnames) == 3: lname = 'all three' else: raise Exception('wtf %d' % len(args.glsfnames)) else: lname = status return lname def add_stuff(status, leg_name, color): legfo[leg_name] = color if status in used_faces: facefo[leg_name] = used_faces[status] legfo, facefo = {}, {} if args.ref_label is not None: for status, color in simu_colors.items(): add_stuff(status, status, color) else: added_two_method_color = False for status, color in used_colors.items(): if '-&-' in status: for substatus in status.split( '-&-' ): # arg, have to handle cases where the single one isn't in there if get_leg_name(substatus) not in legfo: add_stuff(substatus, get_leg_name(substatus), scolors[substatus]) if not added_two_method_color: leg_name = get_leg_name('both') added_two_method_color = True else: continue else: leg_name = get_leg_name(status) add_stuff(status, leg_name, color) # figure out the order we want 'em in lnames = sorted(legfo.keys()) for status in ['both', 'all']: if get_leg_name(status) in lnames: lnames.remove(get_leg_name(status)) lnames.append(get_leg_name(status)) etree = ete3.ClusterTree() #'(a);') tstyle = ete3.TreeStyle() tstyle.show_scale = False # tstyle.show_leaf_name = False # for node in etree.traverse(): # print node.name # node.add_face(ete3.CircleFace(args.novel_dot_size, scolors['novel']), column=1) #, position='float') # if args.leaf_names else 'branch') dummy_column = 0 pic_column = 1 text_column = 2 leg_title_height = 1.5 * args.leafheight # if args.legend_title is not None else 0.75 * args.leafheight for icol in range(text_column + 1): # add a top border tstyle.title.add_face(ete3.RectFace(0.9 * args.leafheight, 0.9 * args.leafheight, fgcolor=None, bgcolor=None), column=icol) tstyle.title.add_face(ete3.TextFace(' ', fsize=leg_title_height), column=dummy_column) # adds a left border if args.legend_title is not None: tstyle.title.add_face( ete3.TextFace('', fsize=leg_title_height), column=pic_column ) # keeps the first legend entry from getting added on this line tstyle.title.add_face( ete3.TextFace(args.legend_title, fsize=leg_title_height, fgcolor='black', bold=True), column=text_column ) # add an empty title so there's some white space at the top, even with no actual title text for leg_name in lnames: color = legfo[leg_name] size_factor = 2. if leg_name in facefo: tstyle.title.add_face( ete3.StackedBarFace([80., 20.], width=size_factor * args.leafheight, height=size_factor * args.leafheight, colors=[color, facefo[leg_name]], line_color='black'), column=pic_column ) # looks like maybe they reversed fg/bg kwarg names else: tstyle.title.add_face( ete3.RectFace(size_factor * args.leafheight, size_factor * args.leafheight, fgcolor='black', bgcolor=color), column=pic_column ) # looks like maybe they reversed fg/bg kwarg names tstyle.title.add_face(ete3.TextFace(' ' + leg_name, fsize=args.leafheight, fgcolor='black'), column=text_column) tstyle.title.add_face(ete3.CircleFace(1.5 * args.novel_dot_size, scolors['novel']), column=pic_column) tstyle.title.add_face( ete3.TextFace('novel allele', fsize=args.leafheight), column=text_column ) # keeps the first legend entry from getting added on this line etree.render(plotdir + '/legend.svg', tree_style=tstyle)
def create_tree( # Base newick=None, name=None, format=0, dist=1.0, support=1.0, quoted_node_names=False, # ClusterTree text_array=None, fdist=None, # PhyloTree alignment=None, alg_format='fasta', sp_naming_function=None, # PhyloxmlTree phyloxml_clade=None, phyloxml_phylogeny=None, # Constructor node_prefix="y", into=ete3.Tree, prune=None, force_bifuraction=True, # Keywords tree_kws=dict(), bifurcation_kws=dict(recursive=True), ): """ Next: Convert to NetworkX """ # Should the tree be converted to skbio convert_to_skbio = False if into in [skbio.TreeNode]: into = ete3.Tree convert_to_skbio = True # ete3 construction if into == ete3.Tree: tree = ete3.Tree(newick=newick, format=format, quoted_node_names=quoted_node_names, **tree_kws) if into == ete3.ClusterTree: if isinstance(text_array, pd.DataFrame): text_array = dataframe_to_matrixstring(text_array) tree = ete3.ClusterTree(newick=newick, text_array=text_array, fdist=fdist, **tree_kws) if into == ete3.PhyloTree: tree = ete3.PhyloTree(newick=newick, alignment=alignment, alg_format=alg_format, sp_naming_function=sp_naming_function, format=format, **tree_kws) if into == ete3.PhyloxmlTree: tree = ete3.PhyloxmlTree(phyloxml_clade=phyloxml_clade, phyloxml_phylogeny=phyloxml_phylogeny, **tree_kws) # Set base attributes for k, v in dict(name=name, dist=dist, support=support).items(): setattr(tree, k, v) # Prune if prune is not None: tree.prune(prune) # Bifurcation if force_bifuraction: n_internal_nodes = len( [*filter(lambda node: node.is_leaf() == False, tree.traverse())]) n_leaves = len([*filter(lambda node: node.is_leaf(), tree.traverse())]) if n_internal_nodes < (n_leaves - 1): tree.resolve_polytomy(**bifurcation_kws) # Node prefix if node_prefix is not None: tree = name_tree_nodes(tree, node_prefix=node_prefix) if not convert_to_skbio: return tree # skbio else: return ete_to_skbio(tree, node_prefix=None)
#!/usr/bin/env python3 import numpy as np import ete3 from ete3 import ClusterTree, ProfileFace, ArrayTable, TreeStyle, AttrFace, CircleFace, TextFace from ete3.treeview.faces import add_face_to_node # tree = ete3.PhyloTree("all_species.tre", sp_naming_function=lambda node: node.name) data = ArrayTable("one_result.array") ct = ete3.ClusterTree("all_species.tre", data) data_max = np.max(ct.arraytable.matrix, axis=0) data_min = np.min(ct.arraytable.matrix, axis=0) data_median = np.median(ct.arraytable.matrix, axis=0) def mylayout(node): if node.is_leaf(): if node.name == "Danio_rerio": # import pudb; pudb.set_trace() pass # profile_face = ProfileFace(data_max[1], data_min[1], 0.0, 100, 14, "heatmap") profile_face = ProfileFace(data_max[0], data_min[0], 0.0, 100, 14, "heatmap") ete3.treeview.faces.add_face_to_node(profile_face, node, 0, aligned=True) # profile_face = ProfileFace(data_max[0], data_min[0], data_median[0], 100, 14, "heatmap") # ete3.treeview.faces.add_face_to_node(profile_face, node, 1, aligned=True)