def traitTree(traits, mapper, outDir): ### Take dict of traits and [R,G,B]-returning function ### Draw a tree with the continuous trait painted on via a colormapping function def rgb2hex(r, g, b): hex = "#{:02x}{:02x}{:02x}".format(r, g, b) return hex for n in tree.traverse(): if n.is_leaf(): n.set_style(nstyle_L) n.add_face(TextFace(str(n.name)), column=0, position="aligned") else: n.set_style(nstyle) #nd = TextFace(str(n.ND)) # label with node ID nd = TextFace(str(int( traits[n.ND]))) # label with rounded continuous trait value nd.background.color = rgb2hex(*[ int(val) for val in mapper(traits[n.ND], gamma=0.8, scaleMax=255) ]) # setup for wl2RGB nd.margin_right = 2 nd.margin_top = 1 nd.margin_left = 2 nd.margin_bottom = 1 nd.border.width = 1 n.add_face(nd, column=0, position="float") n.add_face(TextFace(" "), column=0, position="branch-bottom") #outFile = args.output + "/test_trees/cont_trait.pdf" outFile = outDir + "/cont_trait.pdf" tree.render(outFile, tree_style=tree_style) print >> sys.stderr, outFile
def draw_tree(tree): tree_copy = tree.copy("newick-extended") tree_copy.add_feature("i", tree.i) tree_copy.add_feature("Condition", tree.Condition) for n in tree_copy.traverse(): if n.is_leaf(): n.set_style(nstyle) n.add_face(TextFace(str(n.name)), column=0, position="aligned") else: n.set_style(nstyle) nd = TextFace(str(n.i)) nd.background.color = condi_color_dic[str(n.Condition)] nd.margin_right = 2 nd.margin_top = 1 nd.margin_left = 2 nd.margin_bottom = 1 nd.border.width = 1 if add_transition: if hasattr(n, "Transition"): nd.border.color = "red" nd.border.width = 2 n.add_face(nd, column=0, position="float") n.add_face(TextFace(" "), column=0, position="branch-bottom") return tree_copy
def custom_layout(node): if node.is_leaf(): aligned_name_face = TextFace(node.name, fgcolor='olive', fsize=14) add_face_to_node(aligned_name_face, node, column=2, position='aligned') name_face = TextFace(node.name, fgcolor='#333333', fsize=11) add_face_to_node(name_face, node, column=2, position='branch-right') node.img_style['size'] = 0 if (node.name in tip2info) and (node.name in image_checker): # image img_face = ImgFace(tip2info[node.name][0], is_url=True) add_face_to_node(img_face, node, column=4, position='branch-right') habitat_face = TextFace(tip2info[node.name][2], fsize=11, fgcolor='white') habitat_face.background.color = 'steelblue' habitat_face.margin_left = 3 habitat_face.margin_top = 3 habitat_face.margin_right = 3 habitat_face.margin_bottom = 3 add_face_to_node(habitat_face, node, column=3, position='aligned') else: node.img_style['size'] = 4 node.img_style['shape'] = 'square' if node.name: name_face = TextFace(node.name, fgcolor='grey', fsize=10) name_face.margin_bottom = 2 add_face_to_node(name_face, node, column=0, position='branch-top') if node.support: support_face = TextFace(node.support, fgcolor='indianred', fsize=10) add_face_to_node(support_face, node, column=0, position='branch-bottom')
def add_text_face(self, taxon2text, header_name, color_scale=False): from metagenlab_libs.colors import get_categorical_color_scale if color_scale: value2color = get_categorical_color_scale(taxon2text.values()) self._add_header(header_name) # add column for i, lf in enumerate(self.tree.iter_leaves()): if lf.name in taxon2text: n = TextFace('%s' % taxon2text[lf.name]) if color_scale: n.background.color = value2color[taxon2text[lf.name]] else: print(lf.name, "not in", taxon2text) n = TextFace('-') n.margin_top = 1 n.margin_right = 10 n.margin_left = 10 n.margin_bottom = 1 n.opacity = 1. if self.rotate: n.rotation= 270 lf.add_face(n, self.column_count, position="aligned") self.column_count += 1
def custom_layout(node): if node.is_leaf(): aligned_name_face = TextFace(node.name, fgcolor='olive', fsize=14) add_face_to_node(aligned_name_face, node, column=2, position='aligned') name_face = TextFace(node.name, fgcolor='#333333', fsize=11) add_face_to_node(name_face, node, column=2, position='branch-right') node.img_style['size'] = 0 if node.name in tip2info: # For some reason img urls are very slow! #img_face = ImgFace(tip2info[node.name][0], is_url=True) #add_face_to_node(img_face, node, column=4, position='branch-right') habitat_face = TextFace(tip2info[node.name][2], fsize=11, fgcolor='white') habitat_face.background.color = 'steelblue' habitat_face.margin_left = 3 habitat_face.margin_top = 3 habitat_face.margin_right = 3 habitat_face.margin_bottom = 3 add_face_to_node(habitat_face, node, column=3, position='aligned') else: node.img_style['size'] = 4 node.img_style['shape'] = 'square' if node.name: name_face = TextFace(node.name, fgcolor='grey', fsize=10) name_face.margin_bottom = 2 add_face_to_node(name_face, node, column=0, position='branch-top') if node.support: support_face = TextFace(node.support, fgcolor='indianred', fsize=10) add_face_to_node(support_face, node, column=0, position='branch-bottom')
def my_layout(node): F = TextFace(node.name, tight_text=True) F.fsize=6 F.margin_left=5 F.margin_right=5 F.margin_top=0 F.margin_bottom=15 F.rotation=-90 add_face_to_node(F, node, column=0, position="branch-right")
def testTrees(scenarios, outDir): ### Draw test trees. This is a modified version of the test routine in pcoc_num_tree.py, stuffed in a for loop for cutoff in sorted(scenarios.keys()): tree = init_tree(args.tree) # not mucking with additive trees yet; ultrametrize the tree and normalize to length 1 tree.convert_to_ultrametric(tree_length=1) manual_mode_nodes = {} manual_mode_nodes = {"T": [], "C": []} p_events = scenarios[cutoff].strip().split("/") for e in p_events: l_e = map(int, e.split(",")) manual_mode_nodes["T"].append(l_e[0]) manual_mode_nodes["C"].extend(l_e[1:]) for n in tree.traverse(): if n.is_leaf(): n.set_style(nstyle_L) n.add_face(TextFace(str(n.name)), column=0, position="aligned") else: n.set_style(nstyle) nd = TextFace(str(n.ND)) if manual_mode_nodes: if n.ND in manual_mode_nodes["T"]: nd.background.color = "red" elif n.ND in manual_mode_nodes["C"]: nd.background.color = "orange" else: nd.background.color = "white" else: nd.background.color = "white" nd.background.color = "white" nd.margin_right = 2 nd.margin_top = 1 nd.margin_left = 2 nd.margin_bottom = 1 nd.border.width = 1 n.add_face(nd, column=0, position="float") n.add_face(TextFace(" "), column=0, position="branch-bottom") # if --float set, limit number of digits in filename if args.float: outFile = str(cutoff).replace( '.', '_')[:np.min([args.float, len(str(cutoff))])] + ".pdf" else: outFile = str(cutoff).replace('.', '_') + ".pdf" # prepend path to filename outFile = outDir + '/' + outFile tree.render(outFile, tree_style=tree_style) print >> sys.stderr, outFile
def add_t(node): nd = TextFace("-") nd.fsize = 4 nd.background.color = "black" nd.margin_right = 0 nd.margin_top = 0 nd.margin_left = 0 nd.margin_bottom = 0 nd.border.width = 1 nd2 = TextFace(" ") nd2.fsize = 4 node.add_face(nd, column=0, position = "float") node.add_face(nd2, column=1, position = "float")
def add_continuous_colorscale_legend(self, title, min_val, max_val, scale): self.tss.legend.add_face(TextFace(f"{title}", fsize = 4 * self.text_scale), column=0) if min_val != max_val: n = TextFace(" " * int(self.text_scale), fsize = 4 * self.text_scale) n.margin_top = 1 n.margin_right = 1 n.margin_left = 10 n.margin_bottom = 1 n.inner_background.color = rgb2hex(scale[0].to_rgba(float(max_val))) n2 = TextFace(" " * int(self.text_scale), fsize = 4 * self.text_scale) n2.margin_top = 1 n2.margin_right = 1 n2.margin_left = 10 n2.margin_bottom = 1 n2.inner_background.color = rgb2hex(scale[0].to_rgba(float(min_val))) self.tss.legend.add_face(n, column=1) self.tss.legend.add_face(TextFace(f"{max_val} % (max)", fsize = 4 * self.text_scale), column=2) self.tss.legend.add_face(n2, column=1) self.tss.legend.add_face(TextFace(f"{min_val} % (min)", fsize = 4 * self.text_scale), column=2) else: n2 = TextFace(" " * int(self.text_scale), fsize = 4 * self.text_scale) n2.margin_top = 1 n2.margin_right = 1 n2.margin_left = 10 n2.margin_bottom = 1 n2.inner_background.color = rgb2hex(scale[0].to_rgba(float(min_val))) self.tss.legend.add_face(n2, column=0) self.tss.legend.add_face(TextFace(f"{max_val} % Id", fsize = 4 * self.text_scale), column=1)
def _add_header(self, header_name, column_add=0): n = TextFace(f'{header_name}') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.hz_align = 2 n.vt_align = 2 n.rotation = 270 n.inner_background.color = "white" n.opacity = 1. # add header self.tss.aligned_header.add_face(n, self.column_count-1+column_add)
def add_heatmap(self, taxon2value, header_name, continuous_scale=False, show_text=False): from metagenlab_libs.colors import get_continuous_scale self._add_header(header_name) if continuous_scale: color_scale = get_continuous_scale(taxon2value.values()) for i, lf in enumerate(self.tree.iter_leaves()): if not lf.name in taxon2value: n = TextFace('') else: value = taxon2value[lf.name] if show_text: n = TextFace('%s' % value) else: n = TextFace(' ') n.margin_top = 2 n.margin_right = 3 n.margin_left = 3 n.margin_bottom = 2 n.hz_align = 1 n.vt_align = 1 n.border.width = 3 n.border.color = "#ffffff" if continuous_scale: n.background.color = rgb2hex(color_scale[0].to_rgba(float(value))) n.opacity = 1. i+=1 if self.rotate: n.rotation = 270 lf.add_face(n, self.column_count, position="aligned") self.column_count += 1
def add_heatmap(self, taxon2value, header_name, scale_type="continuous", palette=False): from metagenlab_libs.colors import get_categorical_color_scale from metagenlab_libs.colors import get_continuous_scale if scale_type == "continuous": scale = get_continuous_scale(taxon2value.values()) self.add_continuous_colorscale_legend("Closest hit identity", min(taxon2value.values()), max(taxon2value.values()), scale) elif scale_type == "categorical": scale = get_categorical_color_scale(taxon2value.values()) self.add_categorical_colorscale_legend("MLST", scale) else: raise IOError("unknown type") for i, lf in enumerate(self.tree.iter_leaves()): n = TextFace(" " * int(self.text_scale)) if lf.name in taxon2value: value = taxon2value[lf.name] n = TextFace(" " * int(self.text_scale)) if scale_type == "categorical": n.inner_background.color = scale[value] if scale_type == "continuous": n.inner_background.color = rgb2hex(scale[0].to_rgba(float(value))) n.margin_top = 0 n.margin_right = 0 n.margin_left = 10 n.margin_bottom = 0 n.opacity = 1. if self.rotate: n.rotation= 270 lf.add_face(n, self.column_count, position="aligned") self.column_count += 1
def testTrees(scenarios, tree_style): ### Draw test trees. This is a modified version of the test routine in pcoc_num_tree.py, stuffed in a for loop for cutoff in sorted(scenarios.keys()): # this keeps attributes from stacking up in the same tree tree = init_tree(args.tree) manual_mode_nodes = {} manual_mode_nodes = {"T": [], "C": []} p_events = scenarios[cutoff].strip().split("/") for e in p_events: l_e = map(int, e.split(",")) manual_mode_nodes["T"].append(l_e[0]) manual_mode_nodes["C"].extend(l_e[1:]) for n in tree.traverse(): if n.is_leaf(): n.set_style(nstyle_L) n.add_face(TextFace(str(n.name)), column=0, position="aligned") else: n.set_style(nstyle) nd = TextFace(str(n.ND)) if manual_mode_nodes: if n.ND in manual_mode_nodes["T"]: nd.background.color = "red" elif n.ND in manual_mode_nodes["C"]: nd.background.color = "orange" else: nd.background.color = "white" else: nd.background.color = "white" nd.background.color = "white" nd.margin_right = 2 nd.margin_top = 1 nd.margin_left = 2 nd.margin_bottom = 1 nd.border.width = 1 n.add_face(nd, column=0, position="float") n.add_face(TextFace(" "), column=0, position="branch-bottom") outfile = args.output + "/test_trees/" + str(cutoff).replace('.','_') + ".pdf" tree.render(outfile, tree_style=tree_style) print >> sys.stderr, outfile
def add_categorical_colorscale_legend(self, title, scale): self.tss.legend.add_face(TextFace(f"{title}", fsize = 4 * self.text_scale), column=0) col = 1 for n,value in enumerate(scale): n2 = TextFace(" " * int(self.text_scale), fsize = 4 * self.text_scale) n2.margin_top = 1 n2.margin_right = 1 n2.margin_left = 10 n2.margin_bottom = 1 n2.inner_background.color = scale[value] self.tss.legend.add_face(n2, column=col) self.tss.legend.add_face(TextFace(f"{value}", fsize = 4 * self.text_scale), column=col+1) col+=2 if col>16: self.tss.legend.add_face(TextFace(f" ", fsize = 4 * self.text_scale), column=0) col = 1
def nodeLayoutFunc(node): taxid = int(node.name) if taxid in taxidsToKeep: taxGroupName = ncbiTaxa.get_taxid_translator( [taxid] )[taxid] # There has to be an easier way to look up names... row = None rangeRows = None print(len(ranges)) if (len(ranges) == 1): row = df[(df['ExplanatoryVar'] == var) & (df['TaxGroup'] == taxid) & (df['Range'] == ranges[0])] assert (len(row) == len(ranges)) elif len(ranges) > 1: row = df[(df['ExplanatoryVar'] == var) & (df['TaxGroup'] == taxid) & (df['Range'] == 0)] assert (len(row) == 1) rangeRows = df[(df['ExplanatoryVar'] == var) & (df['TaxGroup'] == taxid) & (df['Range'].isin(set(ranges)))] else: assert (False) overallPval = float(row['Pvalue'].values[0]) name = TextFace("%s" % taxGroupName, fsize=baseFontSize * 2.5) name.tight_text = True name.margin_left = 20 name.margin_right = 0 name.margin_top = 40 name.margin_bottom = 12 faces.add_face_to_node(name, node, column=0) #print(rangeRows) # For each range to be included in this plot, add a bar for rangeId in ranges: #print("rangeId = %s" % (rangeId)) rowForThisRange = None if len(ranges) == 1: rowForThisRange = row else: rowForThisRange = rangeRows[rangeRows['Range'] == rangeId] assert (len(rowForThisRange) == 1) # Extract p-value and "effect-size" (signed R^2) effectSize = float( rowForThisRange['EffectSize'].values[0]) pval = float(rowForThisRange['Pvalue'].values[0]) # Set bar-graph color and significance markers barColor = "" significanceMarker = "" if (pval < significanceLevel): significanceMarker = " %s" % unichr(0x2731) if effectSize < 0: barColor = "#1133ff" else: barColor = "#ff3311" else: # not significant if effectSize < 0: barColor = "#b0b0f0" else: barColor = "#ccb090" # Add the minus sign if needed signChar = "" if effectSize < 0: signChar = unichr( 0x2212 ) # minus sign (more legible than a hypen...) v = RectFace(width=abs(effectSize) * barScale, height=baseFontSize * 3.5, fgcolor=barColor, bgcolor=barColor, label={ "text": "%s%.2g %s" % (signChar, abs(effectSize), significanceMarker), "fontsize": baseFontSize * 1.8, "color": "black" }) #v.rotation = -90 v.margin_top = 1 v.margin_left = 30 v.margin_right = 8 v.margin_bottom = 12 faces.add_face_to_node(v, node, column=0) details = TextFace( "N=%d" % row['NumSpecies'], fsize=baseFontSize * 1.5) #, fsize=baseFontSize) #, fstyle="italic") details.background.color = "#dfdfdf" details.margin_left = 6 details.margin_right = 20 #details.margin_top=5 #details.margin_bottom=0 faces.add_face_to_node(details, node, column=1) nstyle = NodeStyle() nstyle["size"] = 0 node.set_style(nstyle)
def add_simple_barplot(self, taxon2value, header_name, color=False, show_values=False, substract_min=False, highlight_cutoff=False, highlight_reverse=False, max_value=False): if not show_values: self._add_header(header_name, column_add=0) else: self._add_header(header_name, column_add=1) values_lists = [float(i) for i in taxon2value.values()] min_value = min(values_lists) if substract_min: values_lists = [i-min_value for i in values_lists] for taxon in list(taxon2value.keys()): taxon2value[taxon] = taxon2value[taxon]-min_value if not color: color = self._get_default_barplot_color() for i, lf in enumerate(self.tree.iter_leaves()): try: value = taxon2value[lf.name] except KeyError: value = 0 if show_values: barplot_column = 1 if substract_min: real_value = value + min_value else: real_value = value if isinstance(real_value, float): a = TextFace(" %s " % str(round(real_value,2))) else: a = TextFace(" %s " % str(real_value)) a.margin_top = 1 a.margin_right = 2 a.margin_left = 5 a.margin_bottom = 1 if self.rotate: a.rotation = 270 lf.add_face(a, self.column_count, position="aligned") else: barplot_column = 0 if not max_value: fraction_biggest = (float(value)/max(values_lists))*100 else: fraction_biggest = (float(value)/max_value)*100 fraction_rest = 100-fraction_biggest if highlight_cutoff: if substract_min: real_value = value + min_value else: real_value = value if highlight_reverse: if real_value > highlight_cutoff: lcolor = "grey" else: lcolor = color else: if real_value < highlight_cutoff: lcolor = "grey" else: lcolor = color else: lcolor = color b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=15,colors=[lcolor, 'white']) b.rotation= 0 b.inner_border.color = "grey" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 if self.rotate: b.rotation = 270 lf.add_face(b, self.column_count + barplot_column, position="aligned") self.column_count += (1 + barplot_column)
t = Tree(t[0]) ts = TreeStyle() ts.show_leaf_name = False ts.show_branch_length = False ts.show_branch_support = True ts.scale = 2 #ts.min_leaf_separation = 3 ts.branch_vertical_margin = 12 ts.legend_position = 4 #ts.legend.add_face(CircleFace(3, "red"), column=0) mark = TextFace("Outbreak", fsize=10, fgcolor=outbreak_color) mark.margin_top = 10 mark.margin_right = 10 mark.margin_left = 5 mark.margin_bottom = 10 #ts.legend.add_face(mark, column=1) mark2 = TextFace("X", fsize=10, fgcolor="black") # Set some attributes mark2.margin_top = 0 mark2.margin_right = 1 mark2.margin_left = 1 mark2.margin_bottom = 0 mark2.opacity = 1 # from 0 to 1 mark2.border.width = 1 mark2.background.color = "#F5F5DC" ts.legend.add_face(mark2, column=0)
def plot_tree_barplot(tree_file, taxon2mlst, header_list): ''' display one or more barplot :param tree_file: :param taxon2value_list: :param exclude_outgroup: :param bw_scale: :param barplot2percentage: list of bool to indicates if the number are percentages and the range should be set to 0-100 :return: ''' import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl mlst_list = list(set(taxon2mlst.values())) mlst2color = dict(zip(mlst_list, get_spaced_colors(len(mlst_list)))) mlst2color['-'] = 'white' if isinstance(tree_file, Tree): t1 = tree_file else: t1 = Tree(tree_file) # Calculate the midpoint node R = t1.get_midpoint_outgroup() # and set it as tree outgroup t1.set_outgroup(R) tss = TreeStyle() value = 1 tss.draw_guiding_lines = True tss.guiding_lines_color = "gray" tss.show_leaf_name = False cmap = cm.YlGnBu #YlOrRd#OrRd scale_list = [] max_value_list = [] for i, lf in enumerate(t1.iter_leaves()): #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068': # lf.name = 'Pirellula staleyi DSM 6068' # continue if i == 0: # header col_add = 0 #lf.add_face(n, column, position="aligned") n = TextFace('MLST') n.margin_top = 1 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 1 n.rotation = 90 n.inner_background.color = "white" n.opacity = 1. n.hz_align = 2 n.vt_align = 2 tss.aligned_header.add_face(n, col_add + 1) try: #if lf.name in leaf2mlst or int(lf.name) in leaf2mlst: n = TextFace(' %s ' % taxon2mlst[int(lf.name)]) n.inner_background.color = 'white' m = TextFace(' ') m.inner_background.color = mlst2color[taxon2mlst[int(lf.name)]] except: n = TextFace(' na ') n.inner_background.color = "grey" m = TextFace(' ') m.inner_background.color = "white" n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 0 n.margin_bottom = 2 m.margin_top = 2 m.margin_right = 0 m.margin_left = 2 m.margin_bottom = 2 lf.add_face(m, 0, position="aligned") lf.add_face(n, 1, position="aligned") n = TextFace(lf.name, fgcolor="black", fsize=12, fstyle='italic') lf.add_face(n, 0) for n in t1.traverse(): nstyle = NodeStyle() if n.support < 1: nstyle["fgcolor"] = "black" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss
def plot_heat_tree_V1(taxid2n, tree_file, genes, taxid2st=False, leaf_label_conversion_dico=False): ''' Plot heatmap next to a tree. The order of the heatmap **MUST** be the same, as order of the leafs on the tree. The tree must be in the Newick format. If *output_file* is specified, then heat-tree will be rendered as a PNG, otherwise interactive browser will pop-up with your heat-tree. TODO ajouter en option la possibilite d'ajouter en option la valeur dans la cellule Parameters ---------- tree_file: str Path to the tree file in Newick format. The leaf node labels should be the same as as row names in the heatmap file. E.g. row1, row2. output_file: str, optional If specified the heat-tree will be rendered in that file as a PNG image, otherwise interactive browser will pop-up. **N.B.** program will wait for you to exit the browser before continuing. ''' t1 = Tree(tree_file) tss = TreeStyle() #t.populate(8) # Calculate the midpoint node R = t1.get_midpoint_outgroup() # and set it as tree outgroup t1.set_outgroup(R) # To operate with numbers efficiently import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl norm = mpl.colors.Normalize(vmin=0.8, vmax=1) # map2count[map[0]][0] cmap_blue = cm.Blues m2 = cm.ScalarMappable(norm=norm, cmap=cmap_blue) leaf_number = 0 for lf in t1.iter_leaves(): leaf_number += 1 lf.branch_vertical_margin = 0 try: data = taxid2n[str(lf.name)] except: data = [0] try: st = taxid2st[lf.name] except: st = False ''' if "taxon2accession_list" not in locals(): from chlamdb.biosqldb import manipulate_biosqldb server, db = manipulate_biosqldb.load_db("k_cosson_05_16") sql = 'select taxon_id, accession from bioentry where biodatabase_id=104' data_tax = server.adaptor.execute_and_fetchall(sql,) taxon2accession_list = {} for i in data_tax: if i[0] not in taxon2accession_list: taxon2accession_list[i[0]] = [i[1]] else: taxon2accession_list[i[0]].append(i[1]) else: for taxon in taxon2accession_list: if lf.name in taxon2accession_list[taxon]: for accession in taxon2accession_list[taxon]: print lf.name, accession try: st = taxid2st[accession] data = taxid2n[accession] print 'st ok!!', st break except: continue ''' if accession2description: try: lf.name = accession2description[lf.name] except: pass if st: lf.name = lf.name + ' (' + st + ')' else: pass for col, value in enumerate(data): if leaf_number == 1: n = TextFace('%s' % (genes[col]), fsize=6) n.vt_align = 2 n.hz_align = 2 n.rotation = 270 n.margin_top = 0 n.margin_right = 0 n.margin_left = 4 n.margin_bottom = 0 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, col) #lf.add_face(n, col, position="aligned") if value > 0: n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.inner_background.color = rgb2hex(m2.to_rgba( float(value))) #'#140718' #"#81BEF7" n.opacity = 1. lf.add_face(n, col, position="aligned") else: n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") return t1, leaf_number, tss
def plot_heat_tree(tree_file, biodb="chlamydia_04_16", exclude_outgroup=False, bw_scale=True): from chlamdb.biosqldb import manipulate_biosqldb import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl server, db = manipulate_biosqldb.load_db(biodb) sql_biodatabase_id = 'select biodatabase_id from biodatabase where name="%s"' % biodb db_id = server.adaptor.execute_and_fetchall(sql_biodatabase_id, )[0][0] if type(tree_file) == str: t1 = Tree(tree_file) try: R = t1.get_midpoint_outgroup() #print 'root', R # and set it as tree outgroup t1.set_outgroup(R) except: pass elif isinstance(tree_file, Tree): t1 = tree_file else: IOError('Unkown tree format') tss = TreeStyle() tss.draw_guiding_lines = True tss.guiding_lines_color = "gray" tss.show_leaf_name = False #print "tree", t1 sql1 = 'select taxon_id, description from bioentry where biodatabase_id=%s and description not like "%%%%plasmid%%%%"' % db_id sql2 = 'select t2.taxon_id, t1.GC from genomes_info_%s as t1 inner join bioentry as t2 ' \ ' on t1.accession=t2.accession where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id) sql3 = 'select t2.taxon_id, t1.genome_size from genomes_info_%s as t1 ' \ ' inner join bioentry as t2 on t1.accession=t2.accession ' \ ' where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id) sql4 = 'select t2.taxon_id,percent_non_coding from genomes_info_%s as t1 ' \ ' inner join bioentry as t2 on t1.accession=t2.accession ' \ ' where t2.biodatabase_id=%s and t1.description not like "%%%%plasmid%%%%";' % (biodb, db_id) sql_checkm_completeness = 'select taxon_id, completeness from custom_tables.checkm_%s;' % biodb sql_checkm_contamination = 'select taxon_id,contamination from custom_tables.checkm_%s;' % biodb try: taxon_id2completeness = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql_checkm_completeness)) taxon_id2contamination = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql_checkm_contamination)) except: taxon_id2completeness = False #taxon2description = manipulate_biosqldb.to_dict(server.adaptor.execute_and_fetchall(sql1,)) taxon2description = manipulate_biosqldb.taxon_id2genome_description( server, biodb, filter_names=True) taxon2gc = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql2, )) taxon2genome_size = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql3, )) taxon2coding_density = manipulate_biosqldb.to_dict( server.adaptor.execute_and_fetchall(sql4, )) my_taxons = [lf.name for lf in t1.iter_leaves()] # Calculate the midpoint node if exclude_outgroup: excluded = str(list(t1.iter_leaves())[0].name) my_taxons.pop(my_taxons.index(excluded)) genome_sizes = [float(taxon2genome_size[i]) for i in my_taxons] gc_list = [float(taxon2gc[i]) for i in my_taxons] fraction_list = [float(taxon2coding_density[i]) for i in my_taxons] value = 1 max_genome_size = max(genome_sizes) #3424182# max_gc = max(gc_list) #48.23 cmap = cm.YlGnBu #YlOrRd#OrRd norm = mpl.colors.Normalize(vmin=min(genome_sizes) - 100000, vmax=max(genome_sizes)) m1 = cm.ScalarMappable(norm=norm, cmap=cmap) norm = mpl.colors.Normalize(vmin=min(gc_list), vmax=max(gc_list)) m2 = cm.ScalarMappable(norm=norm, cmap=cmap) norm = mpl.colors.Normalize(vmin=min(fraction_list), vmax=max(fraction_list)) m3 = cm.ScalarMappable(norm=norm, cmap=cmap) for i, lf in enumerate(t1.iter_leaves()): #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068': # lf.name = 'Pirellula staleyi DSM 6068' # continue if i == 0: n = TextFace('Size (Mbp)') n.rotation = -25 n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, 3, position="aligned") tss.aligned_header.add_face(n, 3) n = TextFace('GC (%)') n.rotation = -25 n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, 5, position="aligned") tss.aligned_header.add_face(n, 5) n = TextFace('') #lf.add_face(n, 2, position="aligned") tss.aligned_header.add_face(n, 2) #lf.add_face(n, 4, position="aligned") tss.aligned_header.add_face(n, 4) n = TextFace('Non coding (%)') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") tss.aligned_header.add_face(n, 7) n = TextFace('') #lf.add_face(n, 6, position="aligned") tss.aligned_header.add_face(n, 6) if taxon_id2completeness: n = TextFace('Completeness (%)') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") tss.aligned_header.add_face(n, 9) n = TextFace('') #lf.add_face(n, 6, position="aligned") tss.aligned_header.add_face(n, 8) n = TextFace('Contamination (%)') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") tss.aligned_header.add_face(n, 11) n = TextFace('') #lf.add_face(n, 6, position="aligned") tss.aligned_header.add_face(n, 10) value += 1 #print '------ %s' % lf.name if exclude_outgroup and i == 0: lf.name = taxon2description[lf.name] #print '#######################' continue n = TextFace( ' %s ' % str(round(taxon2genome_size[lf.name] / float(1000000), 2))) n.margin_top = 1 n.margin_right = 1 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 2, position="aligned") #if max_genome_size > 3424182: # max_genome_size = 3424182 fraction_biggest = (float(taxon2genome_size[lf.name]) / max_genome_size) * 100 fraction_rest = 100 - fraction_biggest if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358': col = '#fc8d59' else: if not bw_scale: col = rgb2hex(m1.to_rgba(float( taxon2genome_size[lf.name]))) # 'grey' else: col = '#fc8d59' b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=9, colors=[col, 'white']) b.rotation = 0 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 lf.add_face(b, 3, position="aligned") fraction_biggest = (float(taxon2gc[lf.name]) / max_gc) * 100 fraction_rest = 100 - fraction_biggest if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358': col = '#91bfdb' else: if not bw_scale: col = rgb2hex(m2.to_rgba(float(taxon2gc[lf.name]))) else: col = '#91bfdb' b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=9, colors=[col, 'white']) b.rotation = 0 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 0 b.margin_right = 15 lf.add_face(b, 5, position="aligned") n = TextFace(' %s ' % str(round(float(taxon2gc[lf.name]), 2))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 4, position="aligned") if taxon2description[lf.name] == 'Rhabdochlamydia helveticae T3358': col = '#99d594' else: if not bw_scale: col = rgb2hex(m3.to_rgba(float(taxon2coding_density[lf.name]))) else: col = '#99d594' n = TextFace(' %s ' % str(float(taxon2coding_density[lf.name]))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_right = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 6, position="aligned") fraction = (float(taxon2coding_density[lf.name]) / max(taxon2coding_density.values())) * 100 fraction_rest = ((max(taxon2coding_density.values()) - taxon2coding_density[lf.name]) / float(max(taxon2coding_density.values()))) * 100 #print 'fraction, rest', fraction, fraction_rest b = StackedBarFace( [fraction, fraction_rest], width=100, height=9, colors=[col, 'white' ]) # 1-round(float(taxon2coding_density[lf.name]), 2) b.rotation = 0 b.margin_right = 1 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 5 lf.add_face(b, 7, position="aligned") if taxon_id2completeness: n = TextFace(' %s ' % str(float(taxon_id2completeness[lf.name]))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_right = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 8, position="aligned") fraction = float(taxon_id2completeness[lf.name]) fraction_rest = 100 - fraction #print 'fraction, rest', fraction, fraction_rest b = StackedBarFace( [fraction, fraction_rest], width=100, height=9, colors=["#d7191c", 'white' ]) # 1-round(float(taxon2coding_density[lf.name]), 2) b.rotation = 0 b.margin_right = 1 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 5 lf.add_face(b, 9, position="aligned") n = TextFace(' %s ' % str(float(taxon_id2contamination[lf.name]))) n.margin_top = 1 n.margin_right = 0 n.margin_left = 0 n.margin_right = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 10, position="aligned") fraction = float(taxon_id2contamination[lf.name]) fraction_rest = 100 - fraction #print 'fraction, rest', fraction, fraction_rest b = StackedBarFace( [fraction, fraction_rest], width=100, height=9, colors=["black", 'white' ]) # 1-round(float(taxon2coding_density[lf.name]), 2) b.rotation = 0 b.margin_right = 1 b.inner_border.color = "black" b.inner_border.width = 0 b.margin_left = 5 lf.add_face(b, 11, position="aligned") #lf.name = taxon2description[lf.name] n = TextFace(taxon2description[lf.name], fgcolor="black", fsize=9, fstyle='italic') n.margin_right = 30 lf.add_face(n, 0) for n in t1.traverse(): nstyle = NodeStyle() if n.support < 1: nstyle["fgcolor"] = "black" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss
def plot_tree_stacked_barplot( tree_file, taxon2value_list_barplot=False, header_list=False, # header stackedbarplots taxon2set2value_heatmap=False, taxon2label=False, header_list2=False, # header counts columns biodb=False, column_scale=True, general_max=False, header_list3=False, set2taxon2value_list_simple_barplot=False, set2taxon2value_list_simple_barplot_counts=True, rotate=False, taxon2description=False): ''' taxon2value_list_barplot list of lists: [[bar1_part1, bar1_part2,...],[bar2_part1, bar2_part2]] valeures de chaque liste transformes en pourcentages :param tree_file: :param taxon2value_list: :param biodb: :param exclude_outgroup: :param bw_scale: :return: ''' if biodb: from chlamdb.biosqldb import manipulate_biosqldb server, db = manipulate_biosqldb.load_db(biodb) taxon2description = manipulate_biosqldb.taxon_id2genome_description( server, biodb, filter_names=True) t1 = Tree(tree_file) # Calculate the midpoint node R = t1.get_midpoint_outgroup() # and set it as tree outgroup t1.set_outgroup(R) colors2 = [ "red", "#FFFF00", "#58FA58", "#819FF7", "#F781F3", "#2E2E2E", "#F7F8E0", 'black' ] colors = [ "#7fc97f", "#386cb0", "#fdc086", "#ffffb3", "#fdb462", "#f0027f", "#F7F8E0", 'black' ] # fdc086ff 386cb0ff f0027fff tss = TreeStyle() tss.draw_guiding_lines = True tss.guiding_lines_color = "gray" tss.show_leaf_name = False if column_scale and header_list2: import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl column2scale = {} col_n = 0 for column in header_list2: values = taxon2set2value_heatmap[column].values() #print values if min(values) == max(values): min_val = 0 max_val = 1.5 * max(values) else: min_val = min(values) max_val = max(values) #print 'min-max', min_val, max_val norm = mpl.colors.Normalize(vmin=min_val, vmax=max_val) # *1.1 if col_n < 4: cmap = cm.OrRd # else: cmap = cm.YlGnBu #PuBu#OrRd m = cm.ScalarMappable(norm=norm, cmap=cmap) column2scale[column] = [m, float(max_val)] # *0.7 col_n += 1 for i, lf in enumerate(t1.iter_leaves()): #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068': # lf.name = 'Pirellula staleyi DSM 6068' # continue if i == 0: if taxon2label: n = TextFace(' ') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.hz_align = 2 n.vt_align = 2 n.rotation = 270 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, 0) col_add = 1 else: col_add = 1 if header_list: for col, header in enumerate(header_list): n = TextFace('%s' % (header)) n.margin_top = 0 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, col + col_add) col_add += col + 1 if header_list3: #print 'header_list 3!' col_tmp = 0 for header in header_list3: n = TextFace('%s' % (header)) n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. if set2taxon2value_list_simple_barplot_counts: if col_tmp == 0: col_tmp += 1 tss.aligned_header.add_face(n, col_tmp + 1 + col_add) n = TextFace(' ') tss.aligned_header.add_face(n, col_tmp + col_add) col_tmp += 2 else: tss.aligned_header.add_face(n, col_tmp + col_add) col_tmp += 1 if set2taxon2value_list_simple_barplot_counts: col_add += col_tmp else: col_add += col_tmp if header_list2: for col, header in enumerate(header_list2): n = TextFace('%s' % (header)) n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, col + col_add) col_add += col + 1 if taxon2label: try: n = TextFace('%s' % taxon2label[lf.name]) except: try: n = TextFace('%s' % taxon2label[int(lf.name)]) except: n = TextFace('-') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. if rotate: n.rotation = 270 lf.add_face(n, 1, position="aligned") col_add = 2 else: col_add = 2 if taxon2value_list_barplot: try: val_list_of_lists = taxon2value_list_barplot[lf.name] except: val_list_of_lists = taxon2value_list_barplot[int(lf.name)] #col_count = 0 for col, value_list in enumerate(val_list_of_lists): total = float(sum(value_list)) percentages = [(i / total) * 100 for i in value_list] if col % 3 == 0: col_list = colors2 else: col_list = colors b = StackedBarFace(percentages, width=150, height=18, colors=col_list[0:len(percentages)]) b.rotation = 0 b.inner_border.color = "white" b.inner_border.width = 0 b.margin_right = 5 b.margin_left = 5 if rotate: b.rotation = 270 lf.add_face(b, col + col_add, position="aligned") #col_count+=1 col_add += col + 1 if set2taxon2value_list_simple_barplot: col_list = [ '#fc8d59', '#91bfdb', '#99d594', '#c51b7d', '#f1a340', '#999999' ] color_i = 0 col = 0 for one_set in header_list3: if color_i > 5: color_i = 0 color = col_list[color_i] color_i += 1 # values for all taxons values_lists = [ float(i) for i in set2taxon2value_list_simple_barplot[one_set].values() ] #print values_lists #print one_set value = set2taxon2value_list_simple_barplot[one_set][lf.name] if set2taxon2value_list_simple_barplot_counts: if isinstance(value, float): a = TextFace(" %s " % str(round(value, 2))) else: a = TextFace(" %s " % str(value)) a.margin_top = 1 a.margin_right = 2 a.margin_left = 5 a.margin_bottom = 1 if rotate: a.rotation = 270 lf.add_face(a, col + col_add, position="aligned") #print 'value and max', value, max(values_lists) fraction_biggest = (float(value) / max(values_lists)) * 100 fraction_rest = 100 - fraction_biggest #print 'fractions', fraction_biggest, fraction_rest b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=15, colors=[color, 'white']) b.rotation = 0 b.inner_border.color = "grey" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 if rotate: b.rotation = 270 if set2taxon2value_list_simple_barplot_counts: if col == 0: col += 1 lf.add_face(b, col + 1 + col_add, position="aligned") col += 2 else: lf.add_face(b, col + col_add, position="aligned") col += 1 if set2taxon2value_list_simple_barplot_counts: col_add += col else: col_add += col if taxon2set2value_heatmap: i = 0 #if not taxon2label: # col_add-=1 for col2, head in enumerate(header_list2): col_name = header_list2[i] try: value = taxon2set2value_heatmap[col_name][str(lf.name)] except: try: value = taxon2set2value_heatmap[col_name][round( float(lf.name), 2)] except: value = 0 if header_list2[i] == 'duplicates': print('dupli', lf.name, value) #print 'val----------------', value if int(value) > 0: if int(value) >= 10 and int(value) < 100: n = TextFace('%4i' % value) elif int(value) >= 100: n = TextFace('%3i' % value) else: n = TextFace('%5i' % value) n.margin_top = 1 n.margin_right = 2 n.margin_left = 5 n.margin_bottom = 1 n.hz_align = 1 n.vt_align = 1 if rotate: n.rotation = 270 n.inner_background.color = rgb2hex( column2scale[col_name][0].to_rgba( float(value))) #"orange" #print 'xaxaxaxaxa', value, if float(value) > column2scale[col_name][1]: n.fgcolor = 'white' n.opacity = 1. n.hz_align = 1 n.vt_align = 1 lf.add_face(n, col2 + col_add, position="aligned") i += 1 else: n = TextFace('') n.margin_top = 1 n.margin_right = 1 n.margin_left = 5 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. if rotate: n.rotation = 270 lf.add_face(n, col2 + col_add, position="aligned") i += 1 #lf.name = taxon2description[lf.name] n = TextFace(taxon2description[lf.name], fgcolor="black", fsize=12, fstyle='italic') lf.add_face(n, 0) for n in t1.traverse(): nstyle = NodeStyle() if n.support < 1: nstyle["fgcolor"] = "black" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss
nstyle["size"] = 1 MESSAGE("Setting tree style") tree_style = TreeStyle() tree_style.show_leaf_name = False tree_style.show_branch_length = False tree_style.draw_guiding_lines = True tree_style.complete_branch_lines_when_necessary = True tree_style.legend_position = 1 MESSAGE("Setting legend with condition numbers and colors") for condi_i in sorted(condi_color_dic.keys()): tf = TextFace("Condition " + condi_i) tf.background.color = condi_color_dic[condi_i] tf.margin_right = 2 tf.margin_top = 1 tf.margin_left = 2 tf.margin_bottom = 1 tf.border.width = 1 tree_style.legend.add_face(tf, column=1) if add_transition: MESSAGE("Setting transition style") tf = TextFace("Transition -> x") tf.background.color = "white" tf.margin_right = 2 tf.margin_top = 1 tf.margin_left = 2 tf.margin_bottom = 1 tf.border.color = "red" tf.border.width = 2
def visualize_phylogeny(gene_dict, context_file): #Read in tree and assign additional information to each leaf t = Tree(context_file[0].replace('.fna', '.unique.tree')) for node in t.traverse(): if node.is_leaf(): id = node.name.split('__')[1] node.add_features(organism=gene_dict[id]['organism']) node.add_features(assembly=gene_dict[id]['assembly']) node.add_features(pident=gene_dict[id]['perc_id']) if args.compressed == True: node.add_features(cluster_size=gene_dict[id]['cluster_size']) #Create dictionary to append motifs to motif_dict = {} #Create keyword lists to set gene color tnps = [ 'iscr', 'transpos', 'tnp', 'insertion', '-like', ] ints = ['inti', 'integrase', 'xerc', 'xerd'] mobiles=['secretion', 'mobiliza', 'moba', 'mobb', 'mobc', 'mobl', 'plasmid', 'relaxase',\ 'conjugation', 'type iv'] res = [ 'lactam', 'aminoglyco', 'fluoroquinolo', 'tetracyclin', 'macrolid', 'carbapenem' ] print('decorating the tree...') #Create motifs for each gene associated with a leaf for leaf in t.traverse(): if leaf.is_leaf(): #traverse through environment genes for the respective sequence for key, value in gene_dict.items(): motifs = [] #Assign start and end position for annotated gene gene_start = gene_dict[key]['start'] gene_end = gene_dict[key]['stop'] #Sort such that the greater number is end and smaller is start if gene_start > gene_end: gene_end = gene_dict[key]['start'] gene_start = gene_dict[key]['stop'] #Append motif for annotated gene gene_motif=[gene_start, gene_end,'()', \ 2, 10, 'red', 'red', 'arial|10|black|'+str(gene_dict[key]['name'])] if not str(gene_dict[key]['frame']).startswith('-'): ori_motif = [ gene_end, gene_end + 10, '>', 2, 10, 'red', 'red', None ] else: ori_motif=[gene_start-10, gene_start, '<', 2, 10, \ 'red', 'red', None] motifs.extend([gene_motif, ori_motif]) for key2, value2 in value['env_genes'].items(): #Set color, default is orange color = 'orange' if any(keyword in value2['env_name'].lower() for keyword in tnps): color = 'violet' if any(keyword in value2['env_name'].lower() for keyword in ints): color = 'yellow' if any(keyword in value2['env_name'].lower() for keyword in mobiles): color = 'green' if any(keyword in value2['env_name'].lower() for keyword in res): color = 'red' if 'hypothetical' in value2['env_name']: color = 'grey' #Create motif for one env gene at a time and append to motif list motif=[value2['env_start'], value2['env_stop'], '()', 2, 10, color, color, \ 'arial|10|black|'+str(value2['env_name'])] #Set condition: If env gene != annotated gene, append motif arg_pos = { i for i in range(int(gene_motif[0]), int(gene_motif[1])) } env_pos = {i for i in range(int(motif[0]), int(motif[1]))} #Calculate overlap percentage between annotated gene and env gene total_overlap = float(len(arg_pos.intersection(env_pos))) overlap_perc = float( total_overlap / int(gene_dict[key]['length'])) * 100 if overlap_perc <= 70.0: motifs.append(motif) #Create additional motif to show gene orientation if value2['env_strand'] == '+': ori_motif=[value2['env_stop'], value2['env_stop']+10, '>', 2, 10, \ color, color, None] else: ori_motif=[value2['env_start']-10, value2['env_start'], '<', 2, 10, \ color, color, None] motifs.append(ori_motif) #append motif lists to respective annotated gene in dict gene_dict[key]['motifs'] = motifs #Set node style nst_plasmid = NodeStyle() nst_plasmid['bgcolor'] = 'DarkSeaGreen' nst_other = NodeStyle() nst_other = 'AntiqueWhite' #Now annotate the tree with the motifs for node in t.traverse(): if node.is_leaf(): if 'plasmid' in node.organism: node.set_style(nst_plasmid) else: node.set_style(nst_other) seqFace=SeqMotifFace(seq=None, motifs=gene_dict[node.name.split('__')[1]]['motifs'], \ seq_format='blank', gap_format='line') (t & node.name).add_face(seqFace, 1, 'aligned') #Create box showing gene percent id similarity = TextFace(node.pident, fsize=8) similarity.margin_top = 2 similarity.margin_bottom = 2 similarity.margin_left = 2 similarity.margin_right = 2 #Set box background color based on pident if node.pident <= 90.0: similarity.background.color = 'DarkGoldenrod' elif 90.0 < node.pident <= 95.0: similarity.background.color = 'ForestGreen' elif 95.0 <= node.pident: similarity.background.color = 'YellowGreen' node.add_face(similarity, column=2, position='aligned') #Create box showing cluster size if args.compressed == True: clust_box = TextFace(node.cluster_size, fsize=8) clust_box.margin_top = 2 clust_box.margin_bottom = 2 clust_box.margin_left = 2 clust_box.margin_right = 2 node.add_face(clust_box, column=3, position='aligned') #Return the annotated tree return t
def plot_ete_tree(tree_file, ordered_queries, leaf_id2protein_id2identity, leaf_id2mlst, leaf_id2spa, leaf_id2meca, show_identity_values=True, leaf_id2description=False): mlst_list = list(set(leaf_id2mlst.values())) mlst2color = dict(zip(mlst_list, get_spaced_colors(len(mlst_list)))) mlst2color['-'] = 'white' t1 = Tree(tree_file) tss = TreeStyle() R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() head = True column_add = 4 for lf in t1.iter_leaves(): lf.branch_vertical_margin = 0 # add MLST if head: n = TextFace(' MLST ') n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, 1) if lf.name in leaf2mlst: n = TextFace(' %s ' % leaf_id2mlst[lf.name]) n.inner_background.color = 'white' m = TextFace(' ') m.inner_background.color = mlst2color[leaf_id2mlst[lf.name]] else: n = TextFace(' na ') n.inner_background.color = "grey" m = TextFace(' ') m.inner_background.color = "white" n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 0 n.margin_bottom = 2 m.margin_top = 2 m.margin_right = 0 m.margin_left = 20 m.margin_bottom = 2 lf.add_face(m, 0, position="aligned") lf.add_face(n, 1, position="aligned") # add spa typing if head: n = TextFace(' spa ') n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, column_add-2) if lf.name in leaf_id2spa: n = TextFace(' %s ' % leaf_id2spa[lf.name]) n.inner_background.color = "white" else: n = TextFace(' na ') n.inner_background.color = "grey" n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 lf.add_face(n, column_add-2, position="aligned") # add mecA typing if head: n = TextFace(' mecA ') n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, column_add-1) if lf.name in leaf_id2meca: n = TextFace(' %s ' % leaf_id2meca[lf.name]) if leaf_id2meca[lf.name] == 'Perfect': n.inner_background.color = "red" elif leaf_id2meca[lf.name] == 'Strict': n.inner_background.color = "orange" else: n.inner_background.color = "white" else: n = TextFace(' na ') n.inner_background.color = "grey" n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 lf.add_face(n, column_add-1, position="aligned") # loop to add virulence gene hits for column, protein_id in enumerate(ordered_queries): # draw labels at the top of each column if head: if show_identity_values: n = TextFace(' %s ' % str(protein_id)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, column+column_add) else: n = TextFace(' %s ' % str(protein_id), fsize=6) n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. # lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, column+column_add) # draw column content if lf.name not in leaf_id2protein_id2identity: n = TextFace(' %s ' % str(' na ')) n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "grey" lf.add_face(n, column+column_add, position="aligned") else: if protein_id in leaf_id2protein_id2identity[lf.name]: identity_value = float(leaf_id2protein_id2identity[lf.name][protein_id]) color = rgb2hex(m_blue.to_rgba(identity_value)) if show_identity_values: # report identity values in coloured boxes # adapt box size depending the digit width if str(identity_value) == '100.00' or str(identity_value) == '100.0': identity_value = '100' n = TextFace(" %s " % identity_value) else: n = TextFace("%.2f" % round(float(identity_value), 2)) # color text to white for dark cells if float(identity_value) > 95: n.fgcolor = "white" n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = color lf.add_face(n, column+column_add, position="aligned") else: # draw coloured boxes without text n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 # n.color = color n.inner_background.color = color lf.add_face(n, column+column_add, position="aligned") else: n = TextFace(' %s ' % str(' - ')) n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" lf.add_face(n, column+column_add, position="aligned") # end of first leaf: turn off header head = False # add boostrap supports for n in t1.traverse(): nstyle = NodeStyle() if n.support < 0.9: nstyle["fgcolor"] = "blue" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss
def add_simple_barplot(self, taxon2value, header_name, color=False, show_values=False, substract_min=False, max_value=False): print("scale factor", self.text_scale) if not show_values: self._add_header(header_name, column_add=0) else: self._add_header(header_name, column_add=1) values_lists = [float(i) for i in taxon2value.values()] min_value = min(values_lists) if substract_min: values_lists = [i-min_value for i in values_lists] for taxon in list(taxon2value.keys()): taxon2value[taxon] = taxon2value[taxon]-min_value if not color: color = self._get_default_barplot_color() for i, lf in enumerate(self.tree.iter_leaves()): try: value = taxon2value[lf.name] except: value = 0 if show_values: barplot_column = 1 if isinstance(value, float): a = TextFace(" %s " % str(round(value,2))) else: a = TextFace(" %s " % str(value)) a.margin_top = 1 a.margin_right = 2 a.margin_left = 5 a.margin_bottom = 1 if self.rotate: a.rotation = 270 lf.add_face(a, self.column_count, position="aligned") else: barplot_column = 0 if not max_value: fraction_biggest = (float(value)/max(values_lists))*100 else: fraction_biggest = (float(value)/max_value)*100 fraction_rest = 100-fraction_biggest b = StackedBarFace([fraction_biggest, fraction_rest], width=100 * (self.text_scale/3), height=18, colors=[color, 'white']) b.rotation= 0 #b.inner_border.color = "grey" #b.inner_border.width = 0 b.margin_right = 10 b.margin_left = 10 b.hz_align = 2 b.vt_align = 2 b.rotable = False if self.rotate: b.rotation = 270 lf.add_face(b, self.column_count + barplot_column, position="aligned") self.column_count += (1 + barplot_column)
def plot_phylum_counts(NOG_id, rank='phylum', colapse_low_species_counts=4, remove_unlassified=True): ''' 1. get phylum tree 2. foreach species => get phylum 3. build phylum2count dictionnary 3. plot barchart # merge eukaryotes into 5 main clades # merge virus as a single clade ATTENTION: no-rank groups and no-rank species... ''' import MySQLdb import os from chlamdb.biosqldb import manipulate_biosqldb from ete3 import NCBITaxa, Tree, TextFace, TreeStyle, StackedBarFace ncbi = NCBITaxa() sqlpsw = os.environ['SQLPSW'] conn = MySQLdb.connect( host="localhost", # your host, usually localhost user="******", # your username passwd=sqlpsw, # your password db="eggnog") # name of the data base cursor = conn.cursor() sql = 'select * from eggnog.leaf2n_genomes_%s' % rank cursor.execute(sql, ) leaf_taxon2n_species = manipulate_biosqldb.to_dict(cursor.fetchall()) leaf_taxon2n_species_with_domain = get_NOG_taxonomy(NOG_id, rank) sql = 'select phylogeny from eggnog.phylogeny where rank="%s"' % (rank) cursor.execute(sql, ) tree = Tree(cursor.fetchall()[0][0], format=1) sql = 'select * from eggnog.taxid2label_%s' % rank cursor.execute(sql, ) taxon_id2scientific_name_and_rank = manipulate_biosqldb.to_dict( cursor.fetchall()) taxon_id2scientific_name_and_rank = { str(k): v for k, v in taxon_id2scientific_name_and_rank.items() } tss = TreeStyle() tss.draw_guiding_lines = True tss.guiding_lines_color = "blue" keep = [] for lf in tree.iter_leaves(): # n genomes if remove_unlassified: label = taxon_id2scientific_name_and_rank[str(lf.name)][0] if 'unclassified' in label: continue n_genomes = int(leaf_taxon2n_species[lf.name]) if n_genomes > colapse_low_species_counts: keep.append(lf.name) print('number of leaaves:', len(keep)) tree.prune(keep) header_list = ['Rank', 'N genomes', 'N with %s' % NOG_id, 'Percentage'] for col, header in enumerate(header_list): n = TextFace('%s' % (header)) n.margin_top = 0 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, col) for lf in tree.iter_leaves(): # n genomes n_genomes = int(leaf_taxon2n_species[lf.name]) if n_genomes <= colapse_low_species_counts: continue n = TextFace(' %s ' % str(leaf_taxon2n_species[lf.name])) n.margin_top = 1 n.margin_right = 1 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 2, position="aligned") # n genomes with domain try: m = TextFace(' %s ' % str(leaf_taxon2n_species_with_domain[lf.name])) except: m = TextFace(' 0 ') m.margin_top = 1 m.margin_right = 1 m.margin_left = 0 m.margin_bottom = 1 m.fsize = 7 m.inner_background.color = "white" m.opacity = 1. lf.add_face(m, 3, position="aligned") # rank ranks = ncbi.get_rank([lf.name]) try: r = ranks[max(ranks.keys())] except: r = '-' n = TextFace(' %s ' % r, fsize=14, fgcolor='red') n.margin_top = 1 n.margin_right = 1 n.margin_left = 0 n.margin_bottom = 1 n.fsize = 7 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, 1, position="aligned") # percent with target domain try: percentage = (float(leaf_taxon2n_species_with_domain[lf.name]) / float(leaf_taxon2n_species[lf.name])) * 100 except: percentage = 0 m = TextFace(' %s ' % str(round(percentage, 2))) m.fsize = 1 m.margin_top = 1 m.margin_right = 1 m.margin_left = 0 m.margin_bottom = 1 m.fsize = 7 m.inner_background.color = "white" m.opacity = 1. lf.add_face(m, 4, position="aligned") b = StackedBarFace([percentage, 100 - percentage], width=100, height=10, colors=["#7fc97f", "white"]) b.rotation = 0 b.inner_border.color = "grey" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 lf.add_face(b, 5, position="aligned") n = TextFace('%s' % taxon_id2scientific_name_and_rank[str(lf.name)][0], fgcolor="black", fsize=9) # , fstyle = 'italic' lf.name = " %s (%s)" % (taxon_id2scientific_name_and_rank[str( lf.name)][0], str(lf.name)) n.margin_right = 10 lf.add_face(n, 0) tss.show_leaf_name = False for node in tree.traverse("postorder"): try: r = taxon_id2scientific_name_and_rank[str(node.name)][1] except: pass try: if r in ['phylum', 'superkingdom', 'class', 'subphylum' ] or taxon_id2scientific_name_and_rank[str( node.name)][0] in ['FCB group']: hola = TextFace( "%s" % (taxon_id2scientific_name_and_rank[str(node.name)][0])) node.add_face(hola, column=0, position="branch-top") except: pass return tree, tss
def plot_tree_text_metadata(tree_file, header2taxon2text, ordered_header_list, biodb): from chlamdb.biosqldb import manipulate_biosqldb server, db = manipulate_biosqldb.load_db(biodb) t1 = Tree(tree_file) taxon2description = manipulate_biosqldb.taxon_id2genome_description( server, biodb, filter_names=True) # Calculate the midpoint node R = t1.get_midpoint_outgroup() # and set it as tree outgroup t1.set_outgroup(R) tss = TreeStyle() tss.draw_guiding_lines = True tss.guiding_lines_color = "gray" tss.show_leaf_name = False for i, leaf in enumerate(t1.iter_leaves()): # first leaf, add headers if i == 0: for column, header in enumerate(ordered_header_list): n = TextFace('%s' % (header)) n.margin_top = 0 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, column) for column, header in enumerate(ordered_header_list): text = header2taxon2text[header][int(leaf.name)] n = TextFace('%s' % text) n.margin_top = 1 n.margin_right = 1 n.margin_left = 5 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. #n.rotation = 270 leaf.add_face(n, column + 1, position="aligned") # rename leaf (taxon_id => description) n = TextFace(taxon2description[leaf.name], fgcolor="black", fsize=12, fstyle='italic') leaf.add_face(n, 0) for n in t1.traverse(): # rename leaf nstyle = NodeStyle() if n.support < 1: nstyle["fgcolor"] = "black" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss
def custom_layout(node): ncbi=connect_ncbitaxa() if node.is_leaf(): total_name = (node.name) if not total_name or total_name == "": sys.stderr.write("Name of node is null or empty when creating custom layout.\n") return #seq_name = (total_name.split('.', 1)[-1]) seq_name = (total_name.split('|')[1]) other_info = (total_name.split('|')[2]) aligned_name_face = TextFace(seq_name, fgcolor='brown', fsize=11) aligned_name_face.margin_top = 0 aligned_name_face.margin_bottom = 0 aligned_name_face.margin_left = 5 add_face_to_node(aligned_name_face, node, column=2, position='aligned') #node.name=(node.name.split('|')[0]) node_name = node.name.split('|')[0] if not node_name or node_name.strip() == "": sys.stderr.write("Node name is null or empty when creating custom layout.\n") return name2taxid=ncbi.get_name_translator([node_name]) taxid=name2taxid[node_name] lin = ncbi.get_lineage(int(taxid[0])) if int('7742') in lin: N = TextFace('vertebrata', fsize=11, fgcolor="red") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('6040') in lin: N = TextFace('porifera', fsize=11, fgcolor="green") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('6073') in lin: N = TextFace('cnidario', fsize=11, fgcolor="orange") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('33317') in lin: N = TextFace('protostomia', fsize=11, fgcolor="blue") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('10197') in lin: N = TextFace('Ctenophora', fsize=11, fgcolor="indigo") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('10226') in lin: N = TextFace('Ctenophora', fsize=11, fgcolor="sienna") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('6157') in lin: N = TextFace('Platyhelminthes', fsize=11, fgcolor="olive") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') if int('7735') in lin: N = TextFace('Cephalochordata', fsize=11, fgcolor="skyblue") N.margin_left = 5 N.background.color = "Linen" add_face_to_node(N, node, column=3, position = 'aligned') tax, seqs_info = other_info.split('.', 1) try: tax = int(tax) except: tax = tax if tax in lin: aligned_name_face = TextFace(seqs_info, fgcolor='grey', fsize=11) aligned_name_face.margin_top = 0 aligned_name_face.margin_bottom = 0 aligned_name_face.margin_left = 5 add_face_to_node(aligned_name_face, node, column=4, position='aligned') else: aligned_name_face = TextFace(other_info, fgcolor='red', fsize=11) aligned_name_face.margin_top = 0 aligned_name_face.margin_bottom = 0 aligned_name_face.margin_left = 5 add_face_to_node(aligned_name_face, node, column=4, position='aligned') seqFace = SeqMotifFace(node.sequence, gap_format="blank") add_face_to_node(seqFace, node, column=5, position="aligned") node.img_style['size'] = 0 #try: # g_sym=gene_sym[sci_name] # predNameFace = faces.TextFace(g_sym,fgcolor = "navy" , fsize=28) # add_face_to_node(predNameFace, node, column=3, position="branch-right" ) #except: # predNameFace = faces.TextFace(' ',fgcolor="navy", fsize=28) # add_face_to_node(predNameFace, node, column=3, position="branch-right") if node_name.startswith("H**o"): # Add an static face that handles the node name N = TextFace(node_name, fsize=11, fgcolor="red") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Spongilla"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Sycon"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Amphimedon"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Oscarella"): N = TextFace(node_name, fsize=11, fgcolor="green") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Gallus"): N = TextFace(node_name, fsize=11, fgcolor="red") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Branchiostoma"): N = TextFace(node_name, fsize=11, fgcolor="red") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Trichoplax"): N = TextFace(node_name, fsize=11, fgcolor="orange") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Nematostella"): N = TextFace(node_name, fsize=11, fgcolor="orange") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Hydra"): N = TextFace(node_name, fsize=11, fgcolor="orange") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Drosophila"): N = TextFace(node_name, fsize=11, fgcolor="blue") add_face_to_node(N, node, column=0, position = 'branch-right') elif node_name.startswith("Crassostrea"): N = TextFace(node_name, fsize=11, fgcolor="blue") add_face_to_node(N, node, column=0, position = 'branch-right') else: name_face = TextFace(node_name, fgcolor='#333333', fsize=11) name_face.margin_top = 0 name_face.margin_bottom = 0 add_face_to_node(name_face, node, column=0, position='branch-right') else: node.img_style['size'] = 3 node.img_style['shape'] = 'square' if node.name: name_face = TextFace(node.name, fgcolor='grey', fsize=10) name_face.margin_bottom = 1 add_face_to_node(name_face, node, column=0, position='branch-top') if node.support: support_face = TextFace(node.support, fgcolor='indianred', fsize=8) support_face.margin_bottom = 1 add_face_to_node(support_face, node, column=0, position='branch-bottom') return
def plot_tree_barplot(tree_file, taxon2value_list_barplot, header_list, taxon2set2value_heatmap=False, header_list2=False, column_scale=True, general_max=False, barplot2percentage=False, taxon2mlst=False): ''' display one or more barplot :param tree_file: :param taxon2value_list: :param exclude_outgroup: :param bw_scale: :param barplot2percentage: list of bool to indicates if the number are percentages and the range should be set to 0-100 :return: ''' import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl if taxon2mlst: mlst_list = list(set(taxon2mlst.values())) mlst2color = dict(zip(mlst_list, get_spaced_colors(len(mlst_list)))) mlst2color['-'] = 'white' if isinstance(tree_file, Tree): t1 = tree_file else: t1 = Tree(tree_file) # Calculate the midpoint node R = t1.get_midpoint_outgroup() # and set it as tree outgroup t1.set_outgroup(R) tss = TreeStyle() value = 1 tss.draw_guiding_lines = True tss.guiding_lines_color = "gray" tss.show_leaf_name = False if column_scale and header_list2: import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl column2scale = {} for column in header_list2: values = taxon2set2value_heatmap[column].values() norm = mpl.colors.Normalize(vmin=min(values), vmax=max(values)) cmap = cm.OrRd m = cm.ScalarMappable(norm=norm, cmap=cmap) column2scale[column] = m cmap = cm.YlGnBu #YlOrRd#OrRd values_lists = taxon2value_list_barplot.values() scale_list = [] max_value_list = [] for n, header in enumerate(header_list): #print 'scale', n, header data = [float(i[n]) for i in values_lists] if barplot2percentage is False: max_value = max(data) #3424182# min_value = min(data) #48.23 else: if barplot2percentage[n] is True: max_value = 100 min_value = 0 else: max_value = max(data) #3424182# min_value = min(data) #48.23 norm = mpl.colors.Normalize(vmin=min_value, vmax=max_value) m1 = cm.ScalarMappable(norm=norm, cmap=cmap) scale_list.append(m1) if not general_max: max_value_list.append(float(max_value)) else: max_value_list.append(general_max) for i, lf in enumerate(t1.iter_leaves()): #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068': # lf.name = 'Pirellula staleyi DSM 6068' # continue if i == 0: col_add = 0 if taxon2mlst: header_list = ['MLST'] + header_list for col, header in enumerate(header_list): #lf.add_face(n, column, position="aligned") n = TextFace(' ') n.margin_top = 1 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 1 n.rotation = 90 n.inner_background.color = "white" n.opacity = 1. n.hz_align = 2 n.vt_align = 2 tss.aligned_header.add_face(n, col_add + 1) n = TextFace('%s' % header) n.margin_top = 1 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.inner_background.color = "white" n.opacity = 1. n.hz_align = 2 n.vt_align = 1 tss.aligned_header.add_face(n, col_add) col_add += 2 if header_list2: for col, header in enumerate(header_list2): n = TextFace('%s' % header) n.margin_top = 1 n.margin_right = 20 n.margin_left = 2 n.margin_bottom = 1 n.rotation = 270 n.hz_align = 2 n.vt_align = 2 n.inner_background.color = "white" n.opacity = 1. tss.aligned_header.add_face(n, col + col_add) if taxon2mlst: try: #if lf.name in leaf2mlst or int(lf.name) in leaf2mlst: n = TextFace(' %s ' % taxon2mlst[int(lf.name)]) n.inner_background.color = 'white' m = TextFace(' ') m.inner_background.color = mlst2color[taxon2mlst[int(lf.name)]] except: n = TextFace(' na ') n.inner_background.color = "grey" m = TextFace(' ') m.inner_background.color = "white" n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 0 n.margin_bottom = 2 m.margin_top = 2 m.margin_right = 0 m.margin_left = 2 m.margin_bottom = 2 lf.add_face(m, 0, position="aligned") lf.add_face(n, 1, position="aligned") col_add = 2 else: col_add = 0 try: val_list = taxon2value_list_barplot[lf.name] except: if not taxon2mlst: val_list = ['na'] * len(header_list) else: val_list = ['na'] * (len(header_list) - 1) for col, value in enumerate(val_list): # show value itself try: n = TextFace(' %s ' % str(value)) except: n = TextFace(' %s ' % str(value)) n.margin_top = 1 n.margin_right = 5 n.margin_left = 10 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col_add, position="aligned") # show bar try: color = rgb2hex(scale_list[col].to_rgba(float(value))) except: color = 'white' try: percentage = (value / max_value_list[col]) * 100 #percentage = value except: percentage = 0 try: maximum_bar = ( (max_value_list[col] - value) / max_value_list[col]) * 100 except: maximum_bar = 0 #maximum_bar = 100-percentage b = StackedBarFace([percentage, maximum_bar], width=100, height=10, colors=[color, "white"]) b.rotation = 0 b.inner_border.color = "grey" b.inner_border.width = 0 b.margin_right = 15 b.margin_left = 0 lf.add_face(b, col_add + 1, position="aligned") col_add += 2 if taxon2set2value_heatmap: shift = col + col_add + 1 i = 0 for col, col_name in enumerate(header_list2): try: value = taxon2set2value_heatmap[col_name][lf.name] except: try: value = taxon2set2value_heatmap[col_name][int(lf.name)] except: value = 0 if int(value) > 0: if int(value) > 9: n = TextFace(' %i ' % int(value)) else: n = TextFace(' %i ' % int(value)) n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.fgcolor = "white" n.inner_background.color = rgb2hex( column2scale[col_name].to_rgba( float(value))) #"orange" n.opacity = 1. lf.add_face(n, col + col_add, position="aligned") i += 1 else: n = TextFace(' ') #% str(value)) n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col + col_add, position="aligned") n = TextFace(lf.name, fgcolor="black", fsize=12, fstyle='italic') lf.add_face(n, 0) for n in t1.traverse(): nstyle = NodeStyle() if n.support < 1: nstyle["fgcolor"] = "black" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) return t1, tss
def custom_layout(self,node): if node.is_leaf(): aligned_name_face = TextFace(node.name, fgcolor='olive', fsize=12) aligned_name_face.margin_top = 5 aligned_name_face.margin_right = 5 aligned_name_face.margin_left = 5 aligned_name_face.margin_bottom = 5 aligned_name_face.hz_align = 0 #0 = left, 1 = center, 2 = right add_face_to_node(aligned_name_face, node, column=2, position='aligned') #name_face = TextFace(node.name, fgcolor='#333333', fsize=11) #name_face.margin_top = 3 #name_face.margin_right = 3 #name_face.margin_left = 3 #name_face.margin_bottom = 3 #add_face_to_node(name_face, node, column=2, position='branch-right') node.img_style['size'] = 0 #--------------------------------------------- #displaying extra categorical and numeric data if (node.name in self._tip2info): column_no = 3 for headerIndex, dataheader in enumerate(self._tip2headers): extra_data = self._tip2info[node.name][headerIndex] if isinstance( extra_data, ( int, float ) ): extra_face = BarChartFace([extra_data], width=100,height=25,colors=[self._tip2color[node.name][headerIndex]],labels=[dataheader],min_value=0.0,max_value=self._tip_max) else: extra_face = TextFace(extra_data, fsize=11, fgcolor='black') extra_face.background.color = self._tip2color[node.name][headerIndex] extra_face.margin_left = 5 extra_face.margin_top = 5 extra_face.margin_right = 5 extra_face.margin_bottom = 5 add_face_to_node(extra_face, node, column=column_no, position='aligned') #add_face_to_node(extra_face, node, column=column_no, aligned = True, position='branch-right') column_no += 1 else: #print "No data available" column_no = 3 for headerIndex, dataheader in enumerate(self._tip2headers): extra_face = TextFace("No data available", fsize=10, fgcolor='black') extra_face.margin_left = 5 extra_face.margin_top = 5 extra_face.margin_right = 5 extra_face.margin_bottom = 5 add_face_to_node(extra_face, node, column=column_no, position='aligned') column_no += 1 image_col_no = column_no #---------------------------------------------- if (node.name in self._img_chk_list): if self._img_data_dic[node.name] is not None: img_face = ImgFace(self._img_data_dic[node.name], is_url=True) #img_face = ImgFace(self._tip2info[node.name][0], is_url=True) #img_path = os.path.join("file:///home/tayeen/TayeenFolders/TreeViewer/WebTreeApp/newplugin_test/data/", "328653.jpg") #img_face = ImgFace(img_path, is_url=True) img_face.margin_top = 10 img_face.margin_right = 10 img_face.margin_left = 10 img_face.margin_bottom = 10 #add_face_to_node(img_face, node, column=3, position='branch-right') #add_face_to_node(img_face, node, column=3, aligned= True, position='branch-right') else: img_path = os.path.join("file://"+image_path, "ina.jpg") img_face = ImgFace(img_path, is_url=True) #add_face_to_node(img_face, node, column=5, position='branch-right') add_face_to_node(img_face, node, column=image_col_no, position='aligned') else: #node is not a leaf node.img_style['size'] = 4 node.img_style['shape'] = 'square' if node.name and self._custom_options["draw_internal"]: name_face = TextFace(node.name, fgcolor='grey', fsize=10) name_face.margin_top = 4 name_face.margin_right = 4 name_face.margin_left = 4 name_face.margin_bottom = 4 add_face_to_node(name_face, node, column=0, position='branch-top') if node.name in self._node2label: label_face = TextFace(self._node2label[node.name], fgcolor='DarkGreen', fsize=10) label_face.margin_top = 4 label_face.margin_right = 4 label_face.margin_left = 4 label_face.margin_bottom = 4 add_face_to_node(label_face, node, column=0, position="branch-top") if node.support and self._custom_options["draw_support"]: support_face = TextFace(node.support, fgcolor='indianred', fsize=10) support_face.margin_top = 4 support_face.margin_right = 4 support_face.margin_left = 4 support_face.margin_bottom = 4 add_face_to_node(support_face, node, column=0, position='branch-bottom') if hasattr(node, "hide") and int(node.hide) == 1: node.img_style["draw_descendants"]= False collapsed_face = faces.TextFace(" %s collapsed leaves." %len(node), \ fsize=10, fgcolor="#444", ftype="Arial") faces.add_face_to_node(collapsed_face, node, 0) else: node.img_style["draw_descendants"] = True # Parse node features features and conver them into styles. This must be done like this, since current ete version #does not allow modifying style outside the layout function. if hasattr(node, "bsize"): node.img_style["size"]= int(node.bsize) if hasattr(node, "shape"): node.img_style["shape"]= node.shape if hasattr(node, "bgcolor"): node.img_style["bgcolor"]= node.bgcolor if hasattr(node, "fgcolor"): node.img_style["fgcolor"]= node.fgcolor #parse all nodes features if hasattr(node, "bh_bgcolor"): node.img_style["bgcolor"]= node.bh_bgcolor if hasattr(node, "bh_size"): node.img_style["size"]= node.bh_size if hasattr(node, "lh_color"): node.img_style['hz_line_color'] = node.lh_color node.img_style["vt_line_color"] = node.lh_color if hasattr(node, "lh_width"): node.img_style['hz_line_width'] = node.lh_width node.img_style['vt_line_width'] = node.lh_width if hasattr(node, "lh_width") and hasattr(node, "lh_color"): for n in node.iter_descendants(): n.img_style['hz_line_color'] = node.lh_color n.img_style["vt_line_color"] = node.lh_color n.img_style['hz_line_width'] = node.lh_width n.img_style['vt_line_width'] = node.lh_width
manual_mode_nodes["T"].append(l_e[0]) manual_mode_nodes["C"].extend(l_e[1:]) for n in tree.traverse(): if n.is_leaf(): n.set_style(nstyle_L) n.add_face(TextFace(str(n.name)), column=0, position="aligned") else: n.set_style(nstyle) nd = TextFace(str(n.ND)) if manual_mode_nodes: if n.ND in manual_mode_nodes["T"]: nd.background.color = "red" elif n.ND in manual_mode_nodes["C"]: nd.background.color = "orange" else: nd.background.color = "white" else: nd.background.color = "white" nd.margin_right = 2 nd.margin_top = 1 nd.margin_left = 2 nd.margin_bottom = 1 nd.border.width = 1 n.add_face(nd, column=0, position="float") n.add_face(TextFace(" "), column=0, position="branch-bottom") tree.render(args.output, tree_style=tree_style) print args.output
def plot_heatmap_tree_locus(biodb, tree_file, taxid2count, taxid2identity=False, taxid2locus=False, reference_taxon=False, n_paralogs_barplot=False): ''' plot tree and associated heatmap with count of homolgs optional: - add identity of closest homolog - add locus tag of closest homolog ''' from chlamdb.biosqldb import manipulate_biosqldb server, db = manipulate_biosqldb.load_db(biodb) taxid2organism = manipulate_biosqldb.taxon_id2genome_description( server, biodb, True) t1 = Tree(tree_file) ts = TreeStyle() ts.draw_guiding_lines = True ts.guiding_lines_color = "gray" # Calculate the midpoint node R = t1.get_midpoint_outgroup() # and set it as tree outgroup t1.set_outgroup(R) leaf_number = 0 for lf in t1.iter_leaves(): if str(lf.name) not in taxid2count: taxid2count[str(lf.name)] = 0 max_count = max([taxid2count[str(lf.name)] for lf in t1.iter_leaves()]) for i, lf in enumerate(t1.iter_leaves()): # top leaf, add header if i == 0: n = TextFace('Number of homologs') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") ts.aligned_header.add_face(n, 1) if taxid2identity: n = TextFace('Protein identity') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") ts.aligned_header.add_face(n, 2) if taxid2locus: n = TextFace('Locus tag') n.margin_top = 1 n.margin_right = 1 n.margin_left = 20 n.margin_bottom = 1 n.inner_background.color = "white" n.opacity = 1. n.rotation = -25 #lf.add_face(n, 7, position="aligned") ts.aligned_header.add_face(n, 3) leaf_number += 1 lf.branch_vertical_margin = 0 data = [taxid2count[str(lf.name)]] # possibility to add one or more columns for col, value in enumerate(data): col_index = col if value > 0: n = TextFace(' %s ' % str(value)) n.margin_top = 2 n.margin_right = 2 if col == 0: n.margin_left = 20 else: n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" # #81BEF7 n.opacity = 1. lf.add_face(n, col, position="aligned") else: n = TextFace(' %s ' % str(value)) n.margin_top = 2 n.margin_right = 2 if col == 0: n.margin_left = 20 else: n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") # optionally indicate number of paralogs as a barplot if n_paralogs_barplot: col_index += 1 percent = (float(value) / max_count) * 100 n = StackedBarFace([percent, 100 - percent], width=150, height=18, colors=['#6699ff', 'white'], line_color='white') n.rotation = 0 n.inner_border.color = "white" n.inner_border.width = 0 n.margin_right = 15 n.margin_left = 0 lf.add_face(n, col + 1, position="aligned") # optionally add additionnal column with identity if taxid2identity: import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl norm = mpl.colors.Normalize(vmin=0, vmax=100) cmap = cm.OrRd m = cm.ScalarMappable(norm=norm, cmap=cmap) try: if round(taxid2identity[str(lf.name)], 2) != 100: value = "%.2f" % round(taxid2identity[str(lf.name)], 2) else: value = "%.1f" % round(taxid2identity[str(lf.name)], 2) except: value = '-' if str(lf.name) == str(reference_taxon): value = ' ' n = TextFace(' %s ' % value) n.margin_top = 2 n.margin_right = 2 n.margin_left = 20 n.margin_bottom = 2 if not value.isspace() and value is not '-': n.inner_background.color = rgb2hex(m.to_rgba(float(value))) if float(value) > 82: n.fgcolor = 'white' n.opacity = 1. if str(lf.name) == str(reference_taxon): n.inner_background.color = '#800000' lf.add_face(n, col_index + 1, position="aligned") # optionaly add column with locus name if taxid2locus: try: value = str(taxid2locus[str(lf.name)]) except: value = '-' n = TextFace(' %s ' % value) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 if str(lf.name) != str(reference_taxon): n.inner_background.color = "white" else: n.fgcolor = '#ff0000' n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col_index + 2, position="aligned") lf.name = taxid2organism[str(lf.name)] return t1, leaf_number, ts