def add_t(node): nd = TextFace("-") nd.fsize = 4 nd.background.color = "black" nd.margin_right = 0 nd.margin_top = 0 nd.margin_left = 0 nd.margin_bottom = 0 nd.border.width = 1 nd2 = TextFace(" ") nd2.fsize = 4 node.add_face(nd, column=0, position = "float") node.add_face(nd2, column=1, position = "float")
def styleFace(val): x = TextFace(val) x.margin_bottom = 5 x.margin_right = 10 x.rotation = 270 x.fsize = 6 return x
def my_layout(node): F = TextFace(node.name, tight_text=True) F.fsize=6 F.margin_left=5 F.margin_right=5 F.margin_top=0 F.margin_bottom=15 F.rotation=-90 add_face_to_node(F, node, column=0, position="branch-right")
def format_nodes(node, node_style, sus_clades, t): """This function visually formats the nodes in the svg file based on the nodes' support values and whether or not the clade is suspicous""" supp = TextFace(f'{int(node.support)}', fsize=8) if node.support >= 70: supp.bold = True taxons = set() orgs = node.get_leaf_names() if len(orgs) > 1: for org in orgs: if '..' in org: # for paralogs org = org.split('..')[0] else: org = org.split('_')[0] # for potential orthologs taxons.add(metadata[org]['Higher Taxonomy']) if len(taxons) > 1 and (len(node) < (len(t) / 2)): node_style['shape'] = 'sphere' node_style['size'] = 12 node_style['fgcolor'] = 'red' node_style['bgcolor'] = 'Silver' sus_clades += 1 else: supp.fsize = 7 return supp, sus_clades
time_end = trackResult[r, 2] parent_id = trackResult[r, 3] time_duration = np.abs(time_begin-time_end) # for root if parent_id == 0: # Add name to root for the first iteration root.add_feature("name", str(cell_id)) # change the branch length root.add_feature("dist", time_duration) #change node style root.set_style(ns_root) # set node name to face nameFace = TextFace(root.name) nameFace.fgcolor = "white" nameFace.fsize = 15 # nameFace.border.width = 1 nameFace.background.color = "green" node_cur.add_face(nameFace, column=1, position="branch-bottom") else: # for child #### search the parent node by parent_id node_cur = root.search_nodes(name=str(parent_id)) # there should be only one parent node if len(node_cur) == 1: #### set child with its id node_cur = node_cur[0].add_child(name=str(cell_id)) #### set duration node_cur.add_feature("dist", time_duration) # set node style
def plot_phylum_counts(NOG_id, rank='phylum', colapse_low_species_counts=4, remove_unlassified=True): ''' 1. get phylum tree 2. foreach species => get phylum 3. build phylum2count dictionnary 3. plot barchart # merge eukaryotes into 5 main clades # merge virus as a single clade ATTENTION: no-rank groups and no-rank species... ''' import MySQLdb import os from chlamdb.biosqldb import manipulate_biosqldb from ete3 import NCBITaxa, Tree, TextFace, TreeStyle, StackedBarFace ncbi = NCBITaxa() sqlpsw = os.environ['SQLPSW'] conn = MySQLdb.connect( host="localhost", # your host, usually localhost user="******", # your username passwd=sqlpsw, # your password db="eggnog") # name of the data base cursor = conn.cursor() sql = 'select * from eggnog.leaf2n_genomes_%s' % rank cursor.execute(sql, ) leaf_taxon2n_species = manipulate_biosqldb.to_dict(cursor.fetchall()) leaf_taxon2n_species_with_domain = get_NOG_taxonomy(NOG_id, rank) sql = 'select phylogeny from eggnog.phylogeny where rank="%s"' % (rank) cursor.execute(sql, ) tree = Tree(cursor.fetchall()[0][0], format=1) sql = 'select * from eggnog.taxid2label_%s' % rank cursor.execute(sql, ) taxon_id2scientific_name_and_rank = manipulate_biosqldb.to_dict( cursor.fetchall()) taxon_id2scientific_name_and_rank = { str(k): v for k, v in taxon_id2scientific_name_and_rank.items() } tss = TreeStyle() tss.draw_guiding_lines = True tss.guiding_lines_color = "blue" keep = [] for lf in tree.iter_leaves(): # n genomes if remove_unlassified: label = taxon_id2scientific_name_and_rank[str(lf.name)][0] if 'unclassified' in label: continue n_genomes = int(leaf_taxon2n_species[lf.name]) if n_genomes > colapse_low_species_counts: keep.append(lf.name) print('number of leaaves:', len(keep)) tree.prune(keep) header_list = ['Rank', 'N genomes', 'N with %s' % NOG_id, 'Percentage'] for col, header in enumerate(header_list): n = TextFace('%s' % (header)) n.margin_top = 0 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, col) for lf in tree.iter_leaves(): # n genomes n_genomes = int(leaf_taxon2n_species[lf.name]) if n_genomes <= colapse_low_species_counts: continue n = TextFace(' %s ' % str(leaf_taxon2n_species[lf.name])) n.margin_top = 1 n.margin_right = 1 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 2, position="aligned") # n genomes with domain try: m = TextFace(' %s ' % str(leaf_taxon2n_species_with_domain[lf.name])) except: m = TextFace(' 0 ') m.margin_top = 1 m.margin_right = 1 m.margin_left = 0 m.margin_bottom = 1 m.fsize = 7 m.inner_background.color = "white" m.opacity = 1. lf.add_face(m, 3, position="aligned") # rank ranks = ncbi.get_rank([lf.name]) try: r = ranks[max(ranks.keys())] except: r = '-' n = TextFace(' %s ' % r, fsize=14, fgcolor='red') n.margin_top = 1 n.margin_right = 1 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 1, position="aligned") # percent with target domain try: percentage = (float(leaf_taxon2n_species_with_domain[lf.name]) / float(leaf_taxon2n_species[lf.name])) * 100 except: percentage = 0 m = TextFace(' %s ' % str(round(percentage, 2))) m.fsize = 1 m.margin_top = 1 m.margin_right = 1 m.margin_left = 0 m.margin_bottom = 1 m.fsize = 7 m.inner_background.color = "white" m.opacity = 1. lf.add_face(m, 4, position="aligned") b = StackedBarFace([percentage, 100 - percentage], width=100, height=10, colors=["#7fc97f", "white"]) b.rotation = 0 b.inner_border.color = "grey" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 lf.add_face(b, 5, position="aligned") n = TextFace('%s' % taxon_id2scientific_name_and_rank[str(lf.name)][0], fgcolor="black", fsize=9) # , fstyle = 'italic' lf.name = " %s (%s)" % (taxon_id2scientific_name_and_rank[str( lf.name)][0], str(lf.name)) n.margin_right = 10 lf.add_face(n, 0) tss.show_leaf_name = False for node in tree.traverse("postorder"): try: r = taxon_id2scientific_name_and_rank[str(node.name)][1] except: pass try: if r in ['phylum', 'superkingdom', 'class', 'subphylum' ] or taxon_id2scientific_name_and_rank[str( node.name)][0] in ['FCB group']: hola = TextFace( "%s" % (taxon_id2scientific_name_and_rank[str(node.name)][0])) node.add_face(hola, column=0, position="branch-top") except: pass return tree, tss
time_end = trackResult[r, 2] parent_id = trackResult[r, 3] time_duration = np.abs(time_begin - time_end) # for root if parent_id == 0: # Add name to root for the first iteration root.add_feature("name", str(cell_id)) # change the branch length root.add_feature("dist", time_duration) #change node style root.set_style(ns_root) # set node name to face nameFace = TextFace(root.name) nameFace.fgcolor = "white" nameFace.fsize = 15 # nameFace.border.width = 1 nameFace.background.color = "green" node_cur.add_face(nameFace, column=1, position="branch-bottom") else: # for child #### search the parent node by parent_id node_cur = root.search_nodes(name=str(parent_id)) # there should be only one parent node if len(node_cur) == 1: #### set child with its id node_cur = node_cur[0].add_child(name=str(cell_id)) #### set duration node_cur.add_feature("dist", time_duration) # set node style
def plot_heat_tree(tree_file, biodb="chlamydia_04_16", exclude_outgroup=False, bw_scale=True): from chlamdb.biosqldb import manipulate_biosqldb import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl server, db = manipulate_biosqldb.load_db(biodb) sql_biodatabase_id = 'select biodatabase_id from biodatabase where name="%s"' % biodb db_id = server.adaptor.execute_and_fetchall(sql_biodatabase_id, )[0][0] if type(tree_file) == str: t1 = Tree(tree_file) try: R = t1.get_midpoint_outgroup() #print 'root', R # and set it as tree outgroup t1.set_outgroup(R) except: pass elif isinstance(tree_file, Tree): t1 = tree_file else: IOError('Unkown tree format') tss = TreeStyle() tss.draw_guiding_lines = True tss.guiding_lines_color = "gray" tss.show_leaf_name = False #print "tree", t1 sql1 = 'select taxon_id, description from bioentry where biodatabase_id=%s and description not like "%%%%plasmid%%%%"' % db_id sql2 = 'select t2.taxon_id, t1.GC from genomes_info_%s as t1 inner join bioentry as t2 ' \ ' on t1.accession=t2.accession where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id) sql3 = 'select t2.taxon_id, t1.genome_size from genomes_info_%s as t1 ' \ ' inner join bioentry as t2 on t1.accession=t2.accession ' \ ' where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id) sql4 = 'select t2.taxon_id,percent_non_coding from genomes_info_%s as t1 ' \ ' inner join bioentry as t2 on t1.accession=t2.accession ' \ ' where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id) sql_checkm_completeness = 'select taxon_id, completeness from custom_tables.checkm_%s;' % biodb sql_checkm_contamination = 'select taxon_id,contamination from custom_tables.checkm_%s;' % biodb try: taxon_id2completeness = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql_checkm_completeness)) taxon_id2contamination = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql_checkm_contamination)) except: taxon_id2completeness = False #taxon2description = manipulate_biosqldb.to_dict(server.adaptor.execute_and_fetchall(sql1,)) taxon2description = manipulate_biosqldb.taxon_id2genome_description( server, biodb, filter_names=True) taxon2gc = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql2, )) taxon2genome_size = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql3, )) taxon2coding_density = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql4, )) my_taxons = [lf.name for lf in t1.iter_leaves()] # Calculate the midpoint node if exclude_outgroup: excluded = str(list(t1.iter_leaves())[0].name) my_taxons.pop(my_taxons.index(excluded)) genome_sizes = [float(taxon2genome_size[i]) for i in my_taxons] gc_list = [float(taxon2gc[i]) for i in my_taxons] fraction_list = [float(taxon2coding_density[i]) for i in my_taxons] value = 1 max_genome_size = max(genome_sizes) #3424182# max_gc = max(gc_list) #48.23 cmap = cm.YlGnBu #YlOrRd#OrRd norm = mpl.colors.Normalize(vmin=min(genome_sizes) - 100000, vmax=max(genome_sizes)) m1 = cm.ScalarMappable(norm=norm, cmap=cmap) norm = mpl.colors.Normalize(vmin=min(gc_list), vmax=max(gc_list)) m2 = cm.ScalarMappable(norm=norm, cmap=cmap) norm = mpl.colors.Normalize(vmin=min(fraction_list), vmax=max(fraction_list)) m3 = cm.ScalarMappable(norm=norm, cmap=cmap) for i, lf in enumerate(t1.iter_leaves()): #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068': # lf.name = 'Pirellula staleyi DSM 6068' # continue if i == 0: n = TextFace('Size (Mbp)') n.rotation = -25 n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, 3, position="aligned") tss.aligned_header.add_face(n, 3) n = TextFace('GC (%)') n.rotation = -25 n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, 5, position="aligned") tss.aligned_header.add_face(n, 5) n = TextFace('') #lf.add_face(n, 2, position="aligned") tss.aligned_header.add_face(n, 2) #lf.add_face(n, 4, position="aligned") tss.aligned_header.add_face(n, 4) n = TextFace('Non coding (%)') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") tss.aligned_header.add_face(n, 7) n = TextFace('') #lf.add_face(n, 6, position="aligned") tss.aligned_header.add_face(n, 6) if taxon_id2completeness: n = TextFace('Completeness (%)') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") tss.aligned_header.add_face(n, 9) n = TextFace('') #lf.add_face(n, 6, position="aligned") tss.aligned_header.add_face(n, 8) n = TextFace('Contamination (%)') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") tss.aligned_header.add_face(n, 11) n = TextFace('') #lf.add_face(n, 6, position="aligned") tss.aligned_header.add_face(n, 10) value += 1 #print '------ %s' % lf.name if exclude_outgroup and i == 0: lf.name = taxon2description[lf.name] #print '#######################' continue n = TextFace( ' %s ' % str(round(taxon2genome_size[lf.name] / float(1000000), 2))) n.margin_top = 1 n.margin_right = 1 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 2, position="aligned") #if max_genome_size > 3424182: # max_genome_size = 3424182 fraction_biggest = (float(taxon2genome_size[lf.name]) / max_genome_size) * 100 fraction_rest = 100 - fraction_biggest if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358': col = '#fc8d59' else: if not bw_scale: col = rgb2hex(m1.to_rgba(float( taxon2genome_size[lf.name]))) # 'grey' else: col = '#fc8d59' b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=9, colors=[col, 'white']) b.rotation = 0 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 lf.add_face(b, 3, position="aligned") fraction_biggest = (float(taxon2gc[lf.name]) / max_gc) * 100 fraction_rest = 100 - fraction_biggest if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358': col = '#91bfdb' else: if not bw_scale: col = rgb2hex(m2.to_rgba(float(taxon2gc[lf.name]))) else: col = '#91bfdb' b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=9, colors=[col, 'white']) b.rotation = 0 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 0 b.margin_right = 15 lf.add_face(b, 5, position="aligned") n = TextFace(' %s ' % str(round(float(taxon2gc[lf.name]), 2))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 4, position="aligned") if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358': col = '#99d594' else: if not bw_scale: col = rgb2hex(m3.to_rgba(float(taxon2coding_density[lf.name]))) else: col = '#99d594' n = TextFace(' %s ' % str(float(taxon2coding_density[lf.name]))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_right = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 6, position="aligned") fraction = (float(taxon2coding_density[lf.name]) / max(taxon2coding_density.values())) * 100 fraction_rest = ((max(taxon2coding_density.values()) - taxon2coding_density[lf.name]) / float(max(taxon2coding_density.values()))) * 100 #print 'fraction, rest', fraction, fraction_rest b = StackedBarFace( [fraction, fraction_rest], width=100, height=9, colors=[col, 'white' ]) # 1-round(float(taxon2coding_density[lf.name]), 2) b.rotation = 0 b.margin_right = 1 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 5 lf.add_face(b, 7, position="aligned") if taxon_id2completeness: n = TextFace(' %s ' % str(float(taxon_id2completeness[lf.name]))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_right = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 8, position="aligned") fraction = float(taxon_id2completeness[lf.name]) fraction_rest = 100 - fraction #print 'fraction, rest', fraction, fraction_rest b = StackedBarFace( [fraction, fraction_rest], width=100, height=9, colors=["#d7191c", 'white' ]) # 1-round(float(taxon2coding_density[lf.name]), 2) b.rotation = 0 b.margin_right = 1 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 5 lf.add_face(b, 9, position="aligned") n = TextFace(' %s ' % str(float(taxon_id2contamination[lf.name]))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_right = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 10, position="aligned") fraction = float(taxon_id2contamination[lf.name]) fraction_rest = 100 - fraction #print 'fraction, rest', fraction, fraction_rest b = StackedBarFace( [fraction, fraction_rest], width=100, height=9, colors=["black", 'white' ]) # 1-round(float(taxon2coding_density[lf.name]), 2) b.rotation = 0 b.margin_right = 1 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 5 lf.add_face(b, 11, position="aligned") #lf.name = taxon2description[lf.name] n = TextFace(taxon2description[lf.name], fgcolor="black", fsize=9, fstyle='italic') n.margin_right = 30 lf.add_face(n, 0) for n in t1.traverse(): nstyle = NodeStyle() if n.support < 1: nstyle["fgcolor"] = "black" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss