def treelegendtext(cluster, color): text = TextFace(" %s " % cluster) text.hz_align = False text.fsize = 30 text.fstyle = 'Bold' text.background.color = color return text
def treelegendtext(whattoprint, color): text = TextFace(" %s " % whattoprint) text.hz_align = False text.fsize = 30 text.fstyle = 'Bold' text.background.color = color return text
def make_tree(treefile, image_file, clone_info): colour_list = ['MidnightBlue','RoyalBlue', 'LightSkyBlue', 'Aquamarine', 'SpringGreen', 'GreenYellow',\ 'Gold','DarkOrange'] weeks = ['16', '30', '38', '48', '59', '119', '176', '206'] weeks = ['6', '14', '53', '92','144'] t = Tree(treefile,format = 1) ts = TreeStyle() for i in range(5): ts.legend.add_face(CircleFace(20, colour_list[i]), column=0) ts.legend.add_face(TextFace('week' + weeks[i]), column=1) ts.legend_position = 2 ts.show_leaf_name = True ts.branch_vertical_margin = 15 ts.rotation = 90 ns = NodeStyle() ns["size"] = 1 ns.hz_line_width = 10 ns.vt_line_width = 10 edge = 0 for node in t.traverse(): node.name = node.name.replace("'", "") node.name = node.name.replace(".", ",") name = node.name.split(' ')[0] print name if name in clone_info.keys(): style_node(node, colour_list[int(clone_info[name][0])-1], int(int(clone_info[name][1])/10)+5) if not node.is_leaf() and node.name != 'NoName': f = TextFace(node.name) f.margin_top = 2.5 f.margin_bottom = 2.5 f.margin_right = 2.5 f.margin_left = 2.5 node.add_face(f, column=0, position="branch-top") t.render(image_file, tree_style = ts)
def render_annotate(newick_path, output_path): """Render the annotated tree, showing internal node names. The output_path should end in .PNG, .PDF or .SVG: this will determine the format. To aid in testing, if output_path is None, the tree is shown rather than rendered. """ tree = Tree(newick_path, format=1) ts = TreeStyle() ts.show_leaf_name = True ts.branch_vertical_margin = 15 ns = NodeStyle() ns["size"] = 1 edge = 0 for node in tree.traverse(): node.name = node.name.replace("'", "") node.name = node.name.replace("+", ",") if not node.is_leaf() and node.name != "NoName": f = TextFace(node.name) f.margin_top = 5 f.margin_bottom = 5 f.margin_right = 10 f.margin_left = 10 edge += 1 node.add_face(f, column=0, position="branch-top") if output_path is None: tree.show(tree_style=ts) else: tree.render(output_path)
def draw_tree(tree, file): root = tree.get_midpoint_outgroup() try: tree.set_outgroup(root) except: pass root = tree.get_tree_root() root.dist = 0 add_sig(tree) ts = TreeStyle() ts.branch_vertical_margin = 1 #ts.scale = 1500 ts.show_leaf_name = False #ts.show_branch_support = True leg_file = path.join(path.expanduser('~'), 'Perl', 'Modules', 'TreeLegend.png') leg_face= ImgFace(img_file=leg_file) leg_face.margin_left, leg_face.margin_top = 5, 5 ts.legend.add_face(leg_face, column=1) ts.legend_position=1 title_face = TextFace(text=file.split('.')[0]) title_face.margin_left, title_face.margin_top = 10, 5 ts.title.add_face(title_face, column=1) (ts.margin_left, ts.margin_right) = (5,5) tree.render(file, tree_style=ts, w=6000, units='mm')
def rotation_layout(node): if node.is_leaf(): F = TextFace(node.name, tight_text=True) F.rotation = randint(0, 360) add_face_to_node(TextFace("third" ), node, column=8, position="branch-right") add_face_to_node(TextFace("second" ), node, column=2, position="branch-right") add_face_to_node(F, node, column=0, position="branch-right") F.border.width = 1 F.inner_border.width = 1
def drawTree(nwfile, outfile): from ete2 import Tree, TreeStyle, TextFace ts = TreeStyle() ts.show_leaf_name = False ts.layout_fn = my_layout ts.branch_vertical_margin = 12.75 ts.orientation = 1 titleFace = TextFace("Phylogenetic Tree", fsize=18, fgcolor="white") titleFace.margin_top = 15 ts.title.add_face(titleFace, column=1) t = Tree(nwfile) t.render(outfile, tree_style=ts)
def ly_tax_labels(node): if node.is_leaf(): c = LABEL_START_COL largest = 0 for tname in TRACKED_CLADES: if hasattr(node, "named_lineage") and tname in node.named_lineage: linF = TextFace(tname, fsize=10, fgcolor='white') linF.margin_left = 3 linF.margin_right = 2 linF.background.color = lin2color[tname] add_face_to_node(linF, node, c, position='aligned') c += 1 for n in xrange(c, len(TRACKED_CLADES)): add_face_to_node(TextFace('', fsize=10, fgcolor='slategrey'), node, c, position='aligned') c+=1
def prettifyTree(ete_tree, leaf_font_size = 32, branch_support_size = 20, show_bootstraps = True, title=None, ts = None): ''' Perform standardized functions to make the ETE trees easier to read: - Make the branch support bigger - Make the leaf fonts bigger - Turn off elongating branches for visualization purposes (i.e. make sure the real branch lengths are represented) - Change both to standard font (Times) - Standardize the tree's width (calculate based on the maximum length from the root to a tip) - (optional) add title to tree ''' for node in ete_tree.traverse(): if node.is_leaf(): # Make text faces with name = the existing node name but with big font. # A side effect of this is that we also get the annotations lined up F = faces.TextFace(node.name, ftype="Times", fsize=leaf_font_size) node.add_face(F, 0, position="aligned") else: if show_bootstraps: # Make branch support bigger F = faces.TextFace(node._support, ftype="Times", fsize=branch_support_size) node.add_face(F, 0, position="branch-top") #correct the long root node bug (fixed in next release) ete_tree.dist=0 # Optionally create a new TreeStyle if we are not passing in an old one. if ts is None: ts = TreeStyle() # This "fixes" the dashed line issue but makes the tree look terrible. # There may be no way around this (and it's likely other tree viewers do the same thing # but just don't tell you). #ts.optimal_scale_level = "full" # We made these bigger so lets turn off the default ones too. ts.show_branch_support = False ts.show_leaf_name = False if title is not None: ts.title.clear() title = TextFace(title) title.hz_align = True title.fsize = 52 ts.title.add_face(title, 0) return ete_tree, ts
def layout(node): node.img_style["size"] = 0 node.img_style['hz_line_width'] = 2 node.img_style['vt_line_width'] = 2 if node.is_leaf(): # parse names fields = node.orig_name.split("__") name = fields[1].replace('_', ' ') code = "%s" %fields[0].strip() # Specie name nF = TextFace(name, fsize=12, fgcolor='#444', fstyle='italic') add_face_to_node(nF, node, column=0, position='aligned') # Species code cF = TextFace(code, fsize=12, fgcolor='grey') cF.margin_left = 4 cF.margin_right = 4 add_face_to_node(cF, node, column=1, position='branch-right') # Lead node styling node.img_style['hz_line_color'] = "green" node.img_style['vt_line_color'] = "green" else: # L90: green, L70: blue, L50: dark blue, L30: pink and L10: red. For the species # tree discordance test we collapse all branches below L90. B = float(node.B) if B >= 90: color = "green" elif B >= 70: color = "blue" elif B >= 50: color = "darkblue" elif B >= 30: color = "pink" elif B >= 10: color = "red" else: color = "yellow" node.img_style['hz_line_color'] = color node.img_style['vt_line_color'] = color
def add_faces(cur, field, leaf, label_info, colours, bg_colour, outfile): y = 0 for x in range(len(label_info)): if x == 0: label_info[x] += ':' elif x < len(label_info) - 1: label_info[x] += ',' if '.svg' in outfile: padding = 1 + len(label_info[x]) /5 #this isn't label_info[x] += ' ' * padding label = TextFace(label_info[x]) if '.svg' in outfile: label.margin_left = 20 else: label.margin_left = 5 label.fgcolor = colours[x] label.background.color = bg_colour if x > 1 and x % 3 == 0: y += 3 leaf.add_face(label, column=x-y+1, position="branch-right")
def render_tree_image(self, filename): def my_layout(node): name_faces = AttrFace("name", fsize=10, fgcolor = "#0000FF") faces.add_face_to_node(name_faces, node, column=0, position="branch-right") t = Tree("%s;" % self.newick_string(), format = 1) s = "0" for n in t.iter_descendants(): text = TextFace(s) if s == "0": s = "1" else: s = "0" text.fgcolor = "#FF0000"; nstyle = NodeStyle() nstyle['size'] = 15 nstyle['fgcolor'] = "#333" n.set_style(nstyle) n.add_face(text, column = 0, position = "branch-top") ts = TreeStyle() ts.rotation = 90 ts.show_leaf_name = False ts.layout_fn = my_layout ts.margin_left = 0 ts.branch_vertical_margin = 50 ts.show_scale = False t.render(filename, tree_style = ts, w = 2000)
def MODZ_ALL(DIST_DIR, SCORE, TREE, OUT_DIR, OUTGROUPS): ####################################### # Load all distances into a data matrix ####################################### OUTGROUP_FILE = open(OUTGROUPS, 'r') t0 = time() print('************************** Loading data from distance files.') DATA = {} for DIST_FILE in glob('%s/RAxML_distances.*' % DIST_DIR): GENE = os.path.basename(DIST_FILE).split('.')[1] JUST_DATA = [] with open(DIST_FILE, 'r') as INPUT: for LINE in INPUT: # Find distance by splitting the line on a space tab space, select last item in that list, and stripping out the line break DATUM = float(LINE.split(' \t ')[-1].strip('\n')) JUST_DATA.append(DATUM) JUST_MEDIAN = np.median(JUST_DATA) with open(DIST_FILE, 'r') as INPUT: for LINE in INPUT: # Find distance by splitting the line on a space tab space, select last item in that list, and stripping out the line break DATUM = float(LINE.split(' \t ')[-1].strip('\n')) RATIO = (DATUM / JUST_MEDIAN) # # For each species we first split on a space-tab-space, then split on just a space. Now we have the two species separated. # # Now we need to remove the seq-id. So we split each species by a triple underscore. Lastly we join the species with a # # triple underscore. SPECIES1 = sorted( LINE.split(' \t ')[0].split(' '))[0].split("___")[0] SPECIES2 = sorted( LINE.split(' \t ')[0].split(' '))[1].split("___")[0] KEY = "___".join([SPECIES1, SPECIES2]) #Fill Dictionary with the newly found KEY and DATUM try: DATA[KEY][GENE] = RATIO except KeyError: DATA[KEY] = {GENE: RATIO} t1 = time() print('Loading took %f seconds' % (t1 - t0)) ####################################### # LOAD SEQ IDS ####################################### SEQ_ARR = {} for SEQ_FILE in glob('%s/*.phy' % DIST_DIR): GENE = os.path.basename(SEQ_FILE).split('.')[0] with open(SEQ_FILE, 'r ') as SEQ_DATA: for LINE in SEQ_DATA: if LINE and LINE[0].isalpha(): SPECIES = LINE.split('___')[0] SEQ_ID = LINE.split('___')[1].split(' ')[0] try: SEQ_ARR[GENE][SPECIES] = SEQ_ID except KeyError: SEQ_ARR[GENE] = {SPECIES: SEQ_ID} # GENE = 'KOG3218' # TAXON = 'Scrippsiella_trochoidea' # TEST = SEQ_ARR[GENE][TAXON] # print('%s' % TEST) ####################################### # Generate Modified Z-Scores For Each Distance ####################################### # Create a new matrix by copying the data and dividing each datum by 0.1 * median - # This centers the distribution on 10 for every distance distribution, which will allow # normalizing the data (by taking the log) without shifting the distribution into negative # values which will cause problems for a Z-Score calculation. t0 = time() print("************************** Calculating Z-Scores") DATA_NORMALIZED = copy.deepcopy(DATA) for SPECIES___SPECIES, GENES_DATA in DATA_NORMALIZED.iteritems(): DEMON = 0.1 * np.median(GENES_DATA.values()) for GENE, DATUM in GENES_DATA.iteritems(): DATUM_WEIRDED = DATUM / DEMON DATUM_NORMALIZED = np.log(DATUM_WEIRDED) GENES_DATA[GENE] = DATUM_NORMALIZED DATA_MODZ = copy.deepcopy(DATA_NORMALIZED) for SPECIES___SPECIES, GENES_DATA in DATA_MODZ.iteritems(): MEDIAN = np.median(GENES_DATA.values()) MAD_GENES_DATA = MAD(GENES_DATA.values()) if MAD_GENES_DATA == 0: for GENE, DATUM in GENES_DATA.iteritems(): if DATUM >= 2.31: GENES_DATA[GENE] = DATUM + 1.2 else: for GENE, DATUM in GENES_DATA.iteritems(): DATUM_MODZ = (0.6745 * (DATUM - MEDIAN)) / MAD_GENES_DATA GENES_DATA[GENE] = DATUM_MODZ t1 = time() print('Calculating took %f seconds' % (t1 - t0)) ####################################### # Collect Modified Z-Scores by Gene ####################################### # So here I am generating a new data matrix where all the data is organized by gene, rather # than by species-to-species distances. Each gene will have a list of species, and each species # will have a list of distances. Lots of nested dictionaries, but it makes sense to me. GENE_DATA = {} for SPECIES___SPECIES, GENES_DATA in DATA_NORMALIZED.iteritems(): SPECIES_1 = SPECIES___SPECIES.split('___')[0] SPECIES_2 = SPECIES___SPECIES.split('___')[1] for GENE, DATUM in GENES_DATA.iteritems(): try: GENE_DATA[GENE][SPECIES_1][SPECIES___SPECIES] = DATUM except KeyError: try: GENE_DATA[GENE][SPECIES_1] = {} GENE_DATA[GENE][SPECIES_1][SPECIES___SPECIES] = DATUM except KeyError: GENE_DATA[GENE] = {} GENE_DATA[GENE][SPECIES_1] = {} GENE_DATA[GENE][SPECIES_1][SPECIES___SPECIES] = DATUM try: GENE_DATA[GENE][SPECIES_2][SPECIES___SPECIES] = DATUM except KeyError: try: GENE_DATA[GENE][SPECIES_2] = {} GENE_DATA[GENE][SPECIES_2][SPECIES___SPECIES] = DATUM except KeyError: GENE_DATA[GENE] = {} GENE_DATA[GENE][SPECIES_2] = {} GENE_DATA[GENE][SPECIES_2][SPECIES___SPECIES] = DATUM # First loop through text file and save the taxa from the Outgroup line to a list OUTGROUP1 = [] OUTGROUP2 = [] OUTGROUP3 = [] # Make lists of each outgroup for LINE in OUTGROUP_FILE: if LINE.split()[0] == "Outgroup1": for OUT in LINE.split()[2:]: OUTGROUP1.append(OUT) if LINE.split()[0] == "Outgroup2": for OUT in LINE.split()[2:]: OUTGROUP2.append(OUT) if LINE.split()[0] == "Outgroup3": for OUT in LINE.split()[2:]: OUTGROUP3.append(OUT) ####################################### # COUNT OUTLIERS AND WRITE FILES ####################################### t0 = time() print("************************** Writing Output") for GENE, VALUES in GENE_DATA.iteritems(): BADSPECIES = {} TOTAL_SPECIES = len(VALUES.keys()) # print( '%s' % TOTAL_SPECIES) TOTAL_COMPARISONS = ((TOTAL_SPECIES - 1) * TOTAL_SPECIES) / 2 # print( '%s' % TOTAL_COMPARISONS) for SPECIES, DISTANCES in VALUES.iteritems(): SPECIES_COUNT = 0 for SPECIES___SPECIES, DATUM in DISTANCES.iteritems(): SPECIES_COUNT = float(SPECIES_COUNT) + float(DATUM) # print( '%s' % SPECIES_COUNT ) RATIO = float( float(SPECIES_COUNT) / float(TOTAL_COMPARISONS)) * 1000 # print( '%s' % RATIO ) # if DATUM >= 3: # SPECIES_COUNT = SPECIES_COUNT + 1 # PERCENT_OUTLIERS = float( ( float(SPECIES_COUNT) / float(TOTAL_SPECIES) ) * 100 ) # if PERCENT_OUTLIERS > float(SCORE): if RATIO > float(SCORE): if SPECIES not in BADSPECIES.keys(): BADSPECIES[SPECIES] = RATIO for TAXON in BADSPECIES.keys(): with open('%s/outlier_taxa.%s.txt' % (OUT_DIR, GENE), 'a') as OUT_OUT: OUT_OUT.write("%s\n" % TAXON) with open('%s/%s_seqids.txt' % (OUT_DIR, TAXON), 'a') as OUT_IDS: TAXON_SEQ = SEQ_ARR[GENE][TAXON] OUT_IDS.write("%s\n" % TAXON_SEQ) ####################################### # GENERATE TREES ####################################### # Root the tree using the outgroup specified in the text file # Next check if our outgroup taxa are in the tree and create a new list of just species present. TREE_LIST = {} # Make list of all species in tree. T = Tree("%s/RAxML_result.%s.constrained.tre" % (TREE, GENE)) for LEAF in T: SPECIES = LEAF.name.split("___")[0] SEQID = LEAF.name.split("___")[1] TREE_LIST[SPECIES] = SEQID NEW_OUTGROUP = [] for SPECIES, SEQID in TREE_LIST.iteritems(): if SPECIES in OUTGROUP1: FULL_NAME = "___".join([SPECIES, SEQID]) NEW_OUTGROUP.append(FULL_NAME) # Root tree using the Outgroup taxa that are present, and if no outgroup taxa are present use the midpoint method to root the tree. if len(NEW_OUTGROUP) > 1: ANCESTOR = T.get_common_ancestor(NEW_OUTGROUP) T.set_outgroup(ANCESTOR) if len(NEW_OUTGROUP) == 1: T.set_outgroup(NEW_OUTGROUP[0]) if len(NEW_OUTGROUP) < 1: for SPECIES, SEQID in TREE_LIST.iteritems(): if SPECIES in OUTGROUP2: FULL_NAME = "___".join([SPECIES, SEQID]) NEW_OUTGROUP.append(FULL_NAME) if len(NEW_OUTGROUP) > 1: ANCESTOR = T.get_common_ancestor(NEW_OUTGROUP) T.set_outgroup(ANCESTOR) if len(NEW_OUTGROUP) == 1: T.set_outgroup(NEW_OUTGROUP[0]) if len(NEW_OUTGROUP) < 1: for SPECIES, SEQID in TREE_LIST.iteritems(): if SPECIES in OUTGROUP3: FULL_NAME = "___".join([SPECIES, SEQID]) NEW_OUTGROUP.append(FULL_NAME) if len(NEW_OUTGROUP) > 1: ANCESTOR = T.get_common_ancestor(NEW_OUTGROUP) T.set_outgroup(ANCESTOR) if len(NEW_OUTGROUP) == 1: T.set_outgroup(NEW_OUTGROUP[0]) if len(NEW_OUTGROUP) < 1: print( "%s: No outgroup taxa present. Rooting at midpoint instead. This may break a monophyletic group." % GENE) R = T.get_midpoint_outgroup() T.set_outgroup(R) # Write a new tree file with the long branches indicated and their clades indicated for CLADE in T.traverse(): CLADE.set_style(nstyle) for LEAF in T: SPECIES = LEAF.name.split('___')[0] if SPECIES in BADSPECIES.keys(): LEAF.img_style = RED LEAF.add_face(TextFace("\t%.2f" % BADSPECIES[SPECIES]), column=1, position="branch-right") T.render('%s/%s.tre.pdf' % (OUT_DIR, GENE), tree_style=ts) t1 = time() print('Writing took %f seconds' % (t1 - t0))
def showTree(delimitation, scale = 500, render = False, fout = "", form = "svg", show_support = False): """delimitation: species_setting class""" tree = delimitation.root style0 = NodeStyle() style0["fgcolor"] = "#000000" style0["vt_line_color"] = "#0000aa" style0["hz_line_color"] = "#0000aa" style0["vt_line_width"] = 2 style0["hz_line_width"] = 2 style0["vt_line_type"] = 0 style0["hz_line_type"] = 0 style0["size"] = 0 tree.clear_face() for node in tree.get_descendants(): node.set_style(style0) node.img_style["size"] = 0 node.clear_face() tree.set_style(style0) tree.img_style["size"] = 0 style1 = NodeStyle() style1["fgcolor"] = "#000000" style1["vt_line_color"] = "#ff0000" style1["hz_line_color"] = "#0000aa" style1["vt_line_width"] = 2 style1["hz_line_width"] = 2 style1["vt_line_type"] = 0 style1["hz_line_type"] = 0 style1["size"] = 0 style2 = NodeStyle() style2["fgcolor"] = "#0f0f0f" style2["vt_line_color"] = "#ff0000" style2["hz_line_color"] = "#ff0000" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 style2["hz_line_type"] = 0 style2["size"] = 0 for node in delimitation.active_nodes: node.set_style(style1) node.img_style["size"] = 0 for des in node.get_descendants(): des.set_style(style2) des.img_style["size"] = 0 for node in delimitation.root.traverse(strategy='preorder'): if show_support and hasattr(node, "bs"): if node.bs == 0.0: node.add_face(TextFace("0", fsize = 8), column=0, position = "branch-top") else: node.add_face(TextFace("{0:.2f}".format(node.bs), fsize = 8), column=0, position = "branch-top") ts = TreeStyle() """scale pixels per branch length unit""" ts.scale = scale ts.branch_vertical_margin = 7 if render: tree.render(fout+"."+form, tree_style=ts) else: tree.show(tree_style=ts)
def draw_tree_regions(clusterrunid, t, ts, cur, greyout=3): ''' Draw the neighborhoods around each of the genes in a gene tree given the cluster and run IDs and the tree (t) clusterrunid is the run ID to use to identify homoloous clusters and ts is the treeStyle object associeted with the ETE tree t cur is a SQLite cursor object for the database The arrows are grayed out if less than "greyout" genes appear in a given cluster. ''' # DEPRECIATED t, tblastnadded = removeLeadingDashes(t) unsanitized = [] for genename in t.get_leaf_names(): unsanitized.append(unsanitizeGeneId(genename)) # Create a list of SeqFeature objects for the neighbors of each gene in the tree # If passed a TBLASTN hit it will create seq objects for every gene surrounding the TBLASTN hit and # for the TBLASTN hit itself. # # Nothing is added if we can't find that ID in the database or the ID is badly formatted. seqfeatures={} for genename in unsanitized: sys.stderr.write("Getting gene neighborhoods for gene %s...\n" %(genename) ) features_for_genename = makeSeqFeaturesForGeneNeighbors(genename, clusterrunid, cur) if len(features_for_genename) > 0: seqfeatures[genename] = features_for_genename else: # Try TBLASTN and if that doesn't work, just give up. try: features_for_tblastn = makeSeqObjectsForTblastnNeighbors(genename, clusterrunid, cur) seqfeatures[genename] = features_for_tblastn except ValueError: sys.stderr.write("WARNING: Unable to find entries for gene or TBLASTN hit %s in database\n" %(genename) ) pass # Don't bother trying the rest if nothing matches at all. if len(seqfeatures.keys()) == 0: sys.stderr.write("WARNING: No genes in input tree had entries in the database so no neighborhoods will be drawn\n") return t, ts # Get a list of clusters containing these genes allclusters = [] for gene in seqfeatures: for feature in seqfeatures[gene]: allclusters.append(feature.qualifiers["cluster_id"]) uniqueclusters = set(allclusters) # Get clusters that have enough members to bother trying to color them (as determined by # the greyout keyword) multipleclusters = [c for c in uniqueclusters if allclusters.count(c) >= greyout] # Don't die if nothing has enough clusters... if len(multipleclusters) > 0: getcolor = colormap(multipleclusters) else: getcolor = {} #also add in grey (0.5,0.5,0.5 in RGB) for all others singleclusters = [c for c in uniqueclusters if allclusters.count(c) < greyout] getcolor.update([(sc, (0.5,0.5,0.5)) for sc in singleclusters]) #generate the region images for any leaf that has them, and map onto the tree #we will want to know the max width to make the figures widths = [] for genelocs in seqfeatures.values(): start, end = regionlength(genelocs) widths.append(abs(end - start)) maxwidth = max(widths) for leaf in t.iter_leaves(): newname = unsanitizeGeneId(leaf.name) # Not all genes necessarily are in the database and we don't want to crash if that happens. # Instead, Just don't print a neighborhood for them. try: genelocs = seqfeatures[newname] except KeyError: continue sys.stderr.write("Making region drawing for gene ID %s...\n" %(newname)) imgfileloc = make_region_drawing(genelocs, getcolor, newname, maxwidth) imageFace = faces.ImgFace(imgfileloc) leaf.add_face(imageFace, column=2, position = 'aligned') if newname in tblastnadded: leaf.add_face(TextFace("TBlastN added", fsize=30), column=3, position = 'aligned') #add legend for clusters ts = treelegend(ts, getcolor, greyout) return t, ts
applyNodeStyle(tE5B, "root", LINEWIDTH, "White", POINTSIZE, "Black") applyNodeStyle(tE5B, " Bacteria", LINEWIDTH, "Silver", POINTSIZE, "Black") applyNodeStyle(tE5B, " Eukaryota", LINEWIDTH, "Gainsboro", POINTSIZE, "Black") applyNodeStyle(tE5B, " Viruses", LINEWIDTH, "DarkGrey", POINTSIZE, "Black") applyNodeStyle(tE5C, "root", LINEWIDTH, "White", POINTSIZE, "Black") applyNodeStyle(tE5C, " Bacteria", LINEWIDTH, "Silver", POINTSIZE, "Black") applyNodeStyle(tE5C, " Eukaryota", LINEWIDTH, "Gainsboro", POINTSIZE, "Black") applyNodeStyle(tE5C, " Viruses", LINEWIDTH, "DarkGrey", POINTSIZE, "Black") FONTSIZE = 24 for leaf in tB50square.iter_leaves(): T = TextFace(' ' + leaf.name, fsize=(FONTSIZE), fgcolor='Black') leaf.add_face(T, 0, position="aligned") #square_style.show_branch_length = True; #tB50square.show(tree_style = square_style) tB50 = copy.deepcopy(tB50square) for leaf in tE5A.iter_leaves(): T = TextFace(' ' + leaf.name, fsize=(FONTSIZE), fgcolor='Black') leaf.add_face(T, 0, position="aligned") for leaf in tE5B.iter_leaves(): T = TextFace(' ' + leaf.name, fsize=(FONTSIZE), fgcolor='Black') leaf.add_face(T, 0, position="aligned") for leaf in tE5C.iter_leaves():
def makeGraphsToFile(t, filenameStem, outputpath, count): t_back = copy.deepcopy(t) #All Nodes applyNodeStyle(t, "root", LINEWIDTH, "White", POINTSIZE, "Black") #Other Kingdoms applyNodeStyle(t, "uk_Prokaryota", LINEWIDTH, "lightgreen", POINTSIZE, "Black") applyNodeStyle(t, "k_Fungi", LINEWIDTH, "wheat", POINTSIZE, "Black") #Stramenopiles applyNodeStyle(t, "o_Peronosporales", LINEWIDTH, "goldenrod", POINTSIZE, "Black") applyNodeStyle(t, "o_Saprolegniales", LINEWIDTH, "goldenrod", POINTSIZE, "Black") t2 = copy.deepcopy(t) for n in t.iter_leaves(): #this creates text labels (control, infect) = count[n.name] #addition of spaces code: +' ' # helps readability in a few cases, but overall stretches the graph T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='MediumBlue') n.add_face(T, 0, position="aligned") T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='FireBrick') n.add_face(T, 1, position="aligned") #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black') #n.add_face( T, 2, position="aligned" ) T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2), fgcolor='Black') #add a space so not too crowded n.add_face(T, 2, position="aligned") circular_style = TreeStyle() circular_style.mode = "c" # draw tree in circular mode circular_style.scale = 20 circular_style.scale = 31 #length of 1 level transition in tree circular_style.show_scale = False circular_style.show_leaf_name = False #circular_style.allow_face_overlap = True #t.show(tree_style=circular_style) t.render(outputpath + filenameStem + "_color_v1.png", tree_style=circular_style, w=WIDTH, dpi=DPI) ### COLOR -- Alternate ordering of text labels ### using copied t2 ''' CAUSING BUG -- don't need to reload from wrong file count = dict() input = open(filename) for line in input: line = line.split() name = line[1] control = (int( line[2])) infect = (int(line[3])) count[name] = (control, infect) ''' for n in t2.iter_leaves(): #this creates text labels (control, infect) = count[n.name] #addition of spaces code: +' ' # helps readability in a few cases, but overall stretches the graph T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='MediumBlue') n.add_face(T, 1, position="aligned") T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='FireBrick') n.add_face(T, 2, position="aligned") #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black') #n.add_face( T, 2, position="aligned" ) T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2), fgcolor='Black') #add a space so not too crowded n.add_face(T, 0, position="aligned") t2.render(outputpath + filenameStem + "_color_v2.png", tree_style=circular_style, w=WIDTH, dpi=DPI) t = copy.deepcopy(t_back) ###GREYSCALE #t = buildTree( filename, names, nodes, filter = taxa_accepted ) t = copy.deepcopy(t_back) #All Nodes applyNodeStyle(t, "root", LINEWIDTH, "White", POINTSIZE, "Black") #Other Kingdoms #applyNodeStyle(t,"uk_Prokaryota",LINEWIDTH,"White",POINTSIZE,"Black") #already defined by all nodes applyNodeStyle(t, "k_Fungi", LINEWIDTH, "Silver", POINTSIZE, "Black") applyNodeStyle(t, "f_Retroviridae", LINEWIDTH, "GainsBoro", POINTSIZE, "Black") #Stramenopiles applyNodeStyle(t, "o_Peronosporales", LINEWIDTH, "DarkGrey", POINTSIZE, "Black") applyNodeStyle(t, "o_Saprolegniales", LINEWIDTH, "DarkGrey", POINTSIZE, "Black") t2 = copy.deepcopy(t) for n in t.iter_leaves(): #this creates text labels #BUG OCCURS HERE: can't find citrobacter, but it isn't in the previous version of t? (above) (control, infect) = count[n.name] #addition of spaces code: +' ' # helps readability in a few cases, but overall stretches the graph T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='Black') n.add_face(T, 1, position="aligned") T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='DimGray') n.add_face(T, 2, position="aligned") #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black') #n.add_face( T, 2, position="aligned" ) T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2), fgcolor='Black') #add a space so not too crowded n.add_face(T, 0, position="aligned") #t.show(tree_style=circular_style) t.render(outputpath + filenameStem + "_grey_v1.png", tree_style=circular_style, w=WIDTH, dpi=DPI) ### GREY -- ALTERNATE ordering of text labels ### using copied t2 for n in t2.iter_leaves(): #this creates text labels (control, infect) = count[n.name] #addition of spaces code: +' ' # helps readability in a few cases, but overall stretches the graph T = TextFace(str(control) + ' ', fsize=FONTSIZE, fgcolor='Black') n.add_face(T, 0, position="aligned") T = TextFace(str(infect) + ' ', fsize=FONTSIZE, fgcolor='DimGray') n.add_face(T, 1, position="aligned") #T = TextFace(str(infect+control)+' ', fsize=10, fgcolor='black') #n.add_face( T, 2, position="aligned" ) T = TextFace(" " + n.name + " ", fsize=(FONTSIZE + 2), fgcolor='Black') #add a space so not too crowded n.add_face(T, 2, position="aligned") #t.show(tree_style=circular_style) t2.render(outputpath + filenameStem + "_grey_v2.png", tree_style=circular_style, w=WIDTH, dpi=DPI)
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst, check_overlap, ordered_queries, fasta_file2accession, id_cutoff=80, reference_accession='-', accession2hit_filter=False, show_identity_values=True): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' import blast_utils blast2data, queries = blast_utils.remove_blast_redundancy( blast_result_file_list, check_overlap) queries_count = {} for query in queries: queries_count[query] = 0 for one_blast in blast2data: if query in blast2data[one_blast]: #print blast2data[one_blast][query] if float(blast2data[one_blast][query][0]) > id_cutoff: queries_count[query] += 1 else: del blast2data[one_blast][query] print queries_count for query in queries: print "Hit counts: %s\t%s" % (query, queries_count[query]) if queries_count[query] == 0: queries.pop(queries.index(query)) print 'delete columns with no matches ok' ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) tss = TreeStyle() #tss.show_branch_support = True # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() ordered_queries_filtered = [] for query in ordered_queries: hit_count = 0 for lf2 in t1.iter_leaves(): try: accession = fasta_file2accession[lf2.name] tmpidentity = blast2data[accession][query][0] if float(tmpidentity) > float(id_cutoff): hit_count += 1 except: continue if hit_count > 0: ordered_queries_filtered.append(query) #print 'skippink-----------' head = True print 'drawing tree' print 'n initial queries: %s n kept: %s' % (len(ordered_queries), len(ordered_queries_filtered)) for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] accession = fasta_file2accession[lf.name] for col, value in enumerate(ordered_queries_filtered): if head: if show_identity_values: #'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) else: n = TextFace(' %s ' % str(value), fsize=6) n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) try: identity_value = blast2data[accession][value][0] #print 'identity', lf.name, value, identity_value if lf.name != reference_accession: if not accession2hit_filter: # m_red color = rgb2hex(m_blue.to_rgba(float(identity_value))) else: # if filter, color hits that are not in the filter in green if accession in accession2hit_filter: if value in accession2hit_filter[accession]: # mred color = rgb2hex( m_green.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: # reference taxon, blue scale color = rgb2hex(m_blue.to_rgba(float(identity_value))) #if not show_identity_values: # color = rgb2hex(m_blue.to_rgba(float(identity_value))) except: identity_value = 0 color = "white" if show_identity_values: if float(identity_value) >= float(id_cutoff): if str(identity_value) == '100.00' or str( identity_value) == '100.0': identity_value = '100' n = TextFace("%s " % identity_value) else: # identity_value = str(round(float(identity_value), 1)) n = TextFace("%.2f" % round(float(identity_value), 2)) if float(identity_value) > 95: n.fgcolor = "white" n.opacity = 1. else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = color lf.add_face(n, col, position="aligned") else: if float(identity_value) >= float(id_cutoff): # don't show identity values n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 #n.color = color n.inner_background.color = color lf.add_face(n, col, position="aligned") try: accession = fasta_file2accession[lf.name] lf.name = ' %s (%s)' % (id2description[accession], id2mlst[lf.name]) except KeyError: print '--------', id2description lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name]) head = False for n in t1.traverse(): nstyle = NodeStyle() if n.support < 0.9: #mundo = TextFace("%s" % str(n.support)) #n.add_face(mundo, column=1, position="branch-bottom") nstyle["fgcolor"] = "blue" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) print 'rendering tree' t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)
def main(argv): treefilename = '' outfilename = '' database = 'test' usage = 'ColourTree.py -t <treefile> -o <outfile>' try: opts, args = getopt.getopt(argv,"ht:o:d:",["tree=","out=","db="]) if not opts: raise getopt.GetoptError('no opts') except getopt.GetoptError: print usage sys.exit(2) for opt, arg in opts: if opt == "-h": print usage sys.exit() elif opt in ("-t", "--tree"): treefilename = arg elif opt in ("-o", "--out"): outfilename = arg elif opt in ("-d", "--db"): database = arg tree = Tree(treefilename) if not outfilename: outfilename = treefilename.replace(".nwk", ".pdf") con = mdb.connect('localhost', 'root', '', database); with con: cur = con.cursor() for leaf in tree: name = leaf.name if name.find('KRAUS') > -1: color = 'Green' elif name.find('MOEL') > -1: color = 'Red' elif name.find('UNC') > -1: color = 'Orange' elif name.find('WILD') > -1: color = 'MediumBlue' else: cur.execute("Select Species.Genus, Species.Species FROM Species, Sequences WHERE Species.abbreviation = Sequences.species AND Sequences.seqid = %s", name) try: (genus, species) = cur.fetchone() leaf.name = '_'.join((genus[0], species, leaf.name)) except TypeError, e: print e continue if leaf.name.find('kraussiana') > -1: color = 'LightGreen' elif leaf.name.find('willdenowii') > -1: color = 'SteelBlue' else: color = 'Black' label = TextFace(leaf.name, fgcolor=color, fsize=16) leaf.add_face(label, column = 0, position="branch-right") leaf.add_face(TextFace(' '), column = 1, position="branch-right") name = '_'.join(name.split('_')[:-2]) try: cur.execute("SELECT vsd.leaf1, vsd.leaf2, vsd.leaf3, vsd.leaf4 FROM vsd, orfs WHERE vsd.gene_id = orfs.gene_id AND orfs.seqid = %s", name) vsd = cur.fetchone() cur.execute("SELECT normalized.leaf1, normalized.leaf2, normalized.leaf3, normalized.leaf4 FROM normalized, orfs WHERE normalized.gene_id = orfs.gene_id AND orfs.seqid = %s", name) normalized = cur.fetchone() for x in range(4): if vsd[x] == 'none': continue expression_label= TextFace(' %s ' % normalized[x], fsize=16) expression_label.background.color = get_colour(vsd[x]) expression_label.border.width = 1 expression_label.margin_left, expression_label.margin_right, expression_label.margin_top, expression_label.margin_bottom = 1,1,2,1 # This isn't working right : ( expression_label.border.width=1 leaf.add_face(expression_label, column = x+2, position="branch-right") except TypeError: continue draw_tree(tree, outfilename)
def taxo_msa(outfile='taxo_msa.svg', taxids=[], annotation='', msa=[], title='', width=2000): """ Visualize MSA together with a taxonomy tree taxids - list of taxids in the same order as seqs in msa """ # taxid2gi={f_df.loc[f_df.gi==int(gi),'taxid'].values[0]:gi for gi in list(f_df['gi'])} # gi2variant={gi:f_df.loc[f_df.gi==int(gi),'hist_var'].values[0] for gi in list(f_df['gi'])} # msa_dict={i.id:i.seq for i in msa_tr} ncbi = NCBITaxa() taxids = map(int, taxids) t = ncbi.get_topology(taxids, intermediate_nodes=False) a = t.add_child(name='annotation') a.add_feature('sci_name', 'annotation') t.sort_descendants(attr='sci_name') ts = TreeStyle() def layout(node): # print node.rank # print node.sci_name if getattr(node, "rank", None): if (node.rank in ['order', 'class', 'phylum', 'kingdom']): rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred") node.add_face(rank_face, column=0, position="branch-top") if node.is_leaf(): sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue") node.add_face(sciname_face, column=0, position="branch-right") if node.is_leaf() and not node.name == 'annotation': s = str(msa[taxids.index(int(node.name))].seq) seqFace = SeqMotifFace( s, [[0, len(s), "seq", 10, 10, None, None, None]], scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned") # gi=taxid2gi[int(node.name)] add_face_to_node(TextFace(' ' + msa[taxids.index(int(node.name))].id), node, column=1, position="aligned") # add_face_to_node(TextFace(' '+str(int(node.name))+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+str(gi2variant[gi])+' '),node,column=3, position = "aligned") if node.is_leaf() and node.name == 'annotation': if (annotation): s = annotation # get_hist_ss_in_aln_as_string(msa_tr) else: s = ' ' * len(msa[0].seq) seqFace = SeqMotifFace( s, [[0, len(s), "seq", 10, 10, None, None, None]], scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned") add_face_to_node(TextFace(' ' + 'SEQ_ID'), node, column=1, position="aligned") # add_face_to_node(TextFace(' '+'NCBI_TAXID'+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+'Variant'+' '),node,column=3, position = "aligned") ts.layout_fn = layout ts.show_leaf_name = False ts.title.add_face(TextFace(title, fsize=20), column=0) t.render(outfile, w=width, dpi=300, tree_style=ts)
def main(): title = '' #1. Getting data ######################################################## ######################################################## # df=pd.read_csv('int_data/seqs_rs_redef.csv') #Histone types info #Does not really seem that we need to redefine variants based on best score. df = pd.read_csv('int_data/seqs_rs.csv') #Histone types info fasta_dict = pickle.load(open("int_data/fasta_dict.p", "rb")) #Sequences #2. Filtering - filter initial dataset by type, variant and other parameters ######################################################## ######################################################## #2.1. Narrow by variant/type ######################################################## title += 'H2A' # f_df=df[(df['hist_var']=='canonical_H4')] # f_df['hist_var']='canonical_H4' f_df = df[( (df['hist_var'] == 'canonical_H2A') | (df['hist_var'] == 'H2A.X')) & (df['partial'] == False) & (df['non_st_aa'] == False)] # f_df=df[((df['hist_var']=='H2A.Z'))&(df['partial']==False)&(df['non_st_aa']==False)] # f_df=df[(df['hist_type']=='H2A')] print "Number of seqs after narrowing by hist type/var:", len(f_df) #2.2. Filter by list of taxonomy clades - restrict sequences to certain taxonomic clades ######################################################### title += ' across cellular organisms' # parent_nodes=[9443] #131567 - cellular organisms, 7215 4930 Drosophila and yeast, 9443 - primates parent_nodes = [ 131567 ] #131567 - cellular organisms, 7215 4930 Drosophila and yeast, 9443 - primates #33682 - euglenozoa #6656 - arthropods # 4751 - fungi #5782 - dictostelium #This is akin manual removal of bad species del_nodes = [5782, 5690] print "Selecting taxonomic subset for taxids: ", parent_nodes print "while removing taxonomic subset for taxids: ", del_nodes taxids = set(parent_nodes) for i in parent_nodes: taxids.update(ncbi.get_descendant_taxa(i, intermediate_nodes=True)) for i in del_nodes: taxids = taxids.difference(set([i])) taxids = taxids.difference( set(ncbi.get_descendant_taxa(i, intermediate_nodes=True))) f_df = f_df[f_df['taxid'].isin(taxids)] print "Number of seq after taxonomic subset: ", len(f_df) #2.3.0 Marking number of identical sequence within each species and subspecies. #This will simplify further analysis of sequence filtering on similarity #We know that all refseqs are duplicated for instance. ################################################ ident = dict() new_gis = list() tids = set(list(f_df['taxid'])) for i in tids: # print i.name, i.sci_name temp_df = f_df[(f_df['taxid'] == i)] gis = list(temp_df['gi']) #this is to limit exec time # print gis if (len(gis) > 1): res = cluster_seq_support({gi: fasta_dict[str(gi)] for gi in gis}, ident_thresh=1.00) ident.update(res) else: ident.update({gis[0]: 1}) f_df['ident'] = [ident.get(k, 1) for k in f_df['gi']] #where ident - number of identical sequnces for current sepecies/subspecies. print "Identity of sequence inside each taxid determined" #2.3.1. Calculate number of similar seqs for every seq in tax group ######################################################### # Use powerful method, to get rid of random errors is to identify identical sequences # if a sequence is supported by two or more entires - this is good. # Here we add a degen column to our data set - showing how many similar sequences are found # for a given sequence in its taxonomic clade (genus currently) #We will traverse the species tree by species, genus or family, and determine degeneracy level degen = dict() new_gis = list() tids = list(f_df['taxid']) t = ncbi.get_topology(tids, intermediate_nodes=True) for i in t.search_nodes(rank='family'): # print i.name, i.sci_name nodeset = list() for k in i.traverse(): nodeset.append(int(k.name)) temp_df = f_df[(f_df['taxid'].isin(nodeset))] gis = list(temp_df['gi']) #this is to limit exec time # print gis res = cluster_seq_support({gi: fasta_dict[str(gi)] for gi in gis}, ident_thresh=1.00) degen.update(res) # print degen f_df['degen'] = [degen.get(k, 1) for k in f_df['gi']] #2.3.2. Remove seqs that do not have support outside their species # if they are not curated or RefSeq NP. ########################################################### f_df = f_df.sort( ['RefSeq', 'degen'], ascending=False ) # so that RefSeq record get priority on removing duplicates f_df = f_df[(f_df['degen'] > f_df['ident']) | (f_df['curated'] == True) | (f_df['RefSeq'] == 2)] print "After removing mined seqs with no support in neighboring species: ", len( f_df) #2.3.3. Shuffle sequnces, so that upon further selection, RefSeq and high degeneracy get priority ########################################################### #RefSeq and degenerate sequence get priority # title+=' 1ptax' f_df = f_df.sort( ['RefSeq', 'degen'], ascending=False ) # so that RefSeq record get priority on removing duplicates # print f_df[0:10] # f_df=f_df.drop_duplicates(['taxid','hist_var']) #2.4 Take one best representative per specific taxonomic rank (e.g. genus) ############################################################ pruningrank = 'genus' print "Pruning taxonomy by ", pruningrank title += ' , one seq. per %s' % pruningrank #Common ranks: superorder-order-suborder-infraorder-parvorder-superfamily-family-subfamily-genus-species-subspecies seqtaxids = list(f_df['taxid']) #old list grouped_taxids = group_taxids(seqtaxids, rank=pruningrank) # print seqtaxids # print grouped_taxids #Now we need to take best representative #refseq NP, curated, or the one with largest degeneracy new_gis = list() for tids in grouped_taxids: t_df = f_df[f_df['taxid'].isin(tids)] #try take curated first if (len(t_df[t_df['curated'] == True]) > 0): new_gis.append(t_df.loc[t_df.curated == True, 'gi'].values[0]) continue #try take NP records nest #RefSeq 2 means NP, 1 means XP if (len(t_df[t_df['RefSeq'] == 2]) > 0): new_gis.append(t_df.loc[t_df.RefSeq == 2, 'gi'].values[0]) continue # take best degenerate otherwise else: t_df = t_df.sort(['degen', 'RefSeq'], ascending=False) new_gis.append(t_df['gi'].iloc[0]) f_df = f_df[f_df['gi'].isin(new_gis)] print "After pruning taxonomy we have: ", len(f_df) #2.5. Check seq for sanity - needs to be checked! ############################################## # title+=' seqQC ' # print "Checkig sequence quality" # newgis=list() # for i,row in f_df.iterrows(): # gi=row['gi'] # seq=fasta_dict[str(gi)].seq # hist_type=row['hist_type'] # hist_var=row['hist_var'] # if(check_hist_length(seq,hist_type,hist_var,5)&check_hist_core_length(seq,hist_type,5)): # newgis.append(gi) # f_df=f_df[f_df['gi'].isin(newgis)] #remake the dataframe # print len(f_df) #3. Make a list of seq with good ids and descriptions ############################################## f_fasta_dict = { key: value for (key, value) in fasta_dict.iteritems() if int(key) in list(f_df['gi']) } print len(f_fasta_dict) taxid2name = ncbi.get_taxid_translator(list(f_df['taxid'])) #Relabel sequences gi=> type and organism f_fasta_dict = { key: SeqRecord( id=key, description=f_df.loc[f_df.gi == int(key), 'hist_var'].values[0] + ' ' + taxid2name[f_df.loc[f_df.gi == int(key), 'taxid'].values[0]], seq=value.seq) for (key, value) in f_fasta_dict.iteritems() } #with arbitrary index # f_fasta_dict_rel={key: SeqRecord(id=str(index), description=f_hist_df.loc[f_hist_df.gi==key,'hist_var'].values[0]+' '+taxid2names[f_hist_df.loc[f_hist_df.gi==key,'taxid'].values[0]],seq=f_fasta_dict[key].seq) for (index,key) in enumerate(f_fasta_dict) } # exit() #4. Make MSA ################# #Here we construct MSA msa = muscle_aln(f_fasta_dict.values(), gapopen=float(-20)) AlignIO.write(msa, "int_data/example_msa.fasta", "fasta") msa_annot = MultipleSeqAlignment([ SeqRecord(Seq(''.join(get_hist_ss_in_aln_as_string(msa)).replace( ' ', '-')), id='annotation', name='') ]) msa_annot.extend(msa) AlignIO.write(msa_annot, "int_data/example_msa_annot.fasta", "fasta") for i in range(len(msa)): gi = msa[i].id msa[i].description = f_fasta_dict[gi].description.replace( 'canonical', 'ca') msa.sort(key=lambda x: x.description) #5. Visualize MSA############ aln2html(msa, 'example_h2a.html', features=get_hist_ss_in_aln_for_html(msa, 'H2A', 0), title="canonical H2A alignment", description=True, field1w=10, field2w=35) #6. Trim alignment - this is optional #6.1. Trim gaps # title+=' gaptrim' # msa_tr=trim_aln_gaps(msa,threshold=0.8) #6.2. Trim to histone core sequence msa_tr = trim_hist_aln_to_core(msa) # msa_tr=msa # print get_hist_ss_in_aln_for_shade(msa_tr,below=True) # exit() #7. Vizualize MSA with ete2.########## taxid2gi = { f_df.loc[f_df.gi == int(gi), 'taxid'].values[0]: gi for gi in list(f_df['gi']) } gi2variant = { gi: f_df.loc[f_df.gi == int(gi), 'hist_var'].values[0] for gi in list(f_df['gi']) } msa_dict = {i.id: i.seq for i in msa_tr} t = ncbi.get_topology(list(f_df['taxid']), intermediate_nodes=False) a = t.add_child(name='annotation') a.add_feature('sci_name', 'annotation') t.sort_descendants(attr='sci_name') ts = TreeStyle() def layout(node): # print node.rank # print node.sci_name if getattr(node, "rank", None): if (node.rank in ['order', 'class', 'phylum', 'kingdom']): rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred") node.add_face(rank_face, column=0, position="branch-top") if node.is_leaf(): sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue") node.add_face(sciname_face, column=0, position="branch-right") if node.is_leaf() and not node.name == 'annotation': s = str(msa_dict[str(taxid2gi[int(node.name)])]) seqFace = SeqMotifFace( s, [[0, len(s), "seq", 10, 10, None, None, None]], scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned") gi = taxid2gi[int(node.name)] add_face_to_node(TextFace(' ' + str(gi) + ' '), node, column=1, position="aligned") add_face_to_node(TextFace(' ' + str(int(node.name)) + ' '), node, column=2, position="aligned") add_face_to_node(TextFace(' ' + str(gi2variant[gi]) + ' '), node, column=3, position="aligned") if node.is_leaf() and node.name == 'annotation': s = get_hist_ss_in_aln_as_string(msa_tr) seqFace = SeqMotifFace( s, [[0, len(s), "seq", 10, 10, None, None, None]], scale_factor=1) add_face_to_node(seqFace, node, 0, position="aligned") add_face_to_node(TextFace(' ' + 'NCBI_GI' + ' '), node, column=1, position="aligned") add_face_to_node(TextFace(' ' + 'NCBI_TAXID' + ' '), node, column=2, position="aligned") add_face_to_node(TextFace(' ' + 'Variant' + ' '), node, column=3, position="aligned") ts.layout_fn = layout ts.show_leaf_name = False ts.title.add_face(TextFace(title, fsize=20), column=0) t.render("example_motifs_H2A.svg", w=6000, dpi=300, tree_style=ts) #10. Conservation############ ############################# features = get_hist_ss_in_aln_for_shade(msa_tr, below=True) cn = add_consensus(msa_tr, threshold=0.5)[-2:-1] # Below are three methods that we find useful. # plot_prof4seq('cons_sofp_psic',map(float,cons_prof(msa_tr,f=2,c=2)),cn,features,axis='conservation') plot_prof4seq('example_cons_ent_unw', map(lambda x: log(20) + x, map(float, cons_prof(msa_tr, f=0, c=0))), cn, features, axis='conservation') plot_prof4seq('example_cons_ent_unw_norm', map(lambda x: log(20) + x, map(float, cons_prof(msa_tr, f=0, c=0, norm="T"))), cn, features, axis='conservation') # plot_prof4seq('cons_sofp_unw',map(float,cons_prof(msa_tr,f=0,c=2)),cn,features,axis='conservation') plot_prof4seq('example_cons_sofp_unw_renorm1', map(float, cons_prof(msa_tr, f=0, c=2, m=1)), cn, features, axis='conservation') plot_prof4seq('example_cons_sofp_unw', map(float, cons_prof(msa_tr, f=0, c=2, m=0)), cn, features, axis='conservation') plot_prof4seq('example_cons_sofp_psic_renorm1', map(float, cons_prof(msa_tr, f=2, c=2, m=1)), cn, features, axis='conservation')
n.set_style(nstyle) # Create an independent node style for each leaf, which # specifies the colour given in the locations.csv file for n in t.get_leaves(): name = n.get_leaf_names()[0] print name nstyle = NodeStyle() try: nstyle["fgcolor"] = colours[samples[name]['Hospitals']] #Hospitals']] except KeyError: nstyle["fgcolor"] = "grey" nstyle["size"] = 10 n.set_style(nstyle) try: n.add_face(TextFace(samples[name]['Locations']), column=0, position="branch-right") except: pass try: n.add_face(TextFace(samples[name]['SamplingPeriod']), column=1, position="branch-right") except: pass t.render(file_name=sys.argv[3], tree_style=ts)
def main(): args = parse_args() if args.data: print "\nReading tree from " + args.tree + " and data matrix from " + args.data tree = ClusterTree(args.tree, text_array=args.data) else: print "\nReading tree from " + args.tree tree = Tree(args.tree) if args.midpoint: R = tree.get_midpoint_outgroup() tree.set_outgroup(R) print "- Applying midpoint rooting" elif args.outgroup: tree.set_outgroup( tree&args.outgroup ) print "- Rooting using outgroup " + args.outgroup if not args.no_ladderize: tree.ladderize() print "- Ladderizing tree" table, column_list, column_values = readtable(args, tree.get_leaf_names()) labels = [] if args.labels: print "\nThese labels will be printed next to each strain:" for label in args.labels: if label in column_list: labels.append(label) print " " + label else: print "WARNING: specified label " + label + " was not found in the columns of the info file provided, " + args.info # set node styles # start by setting all to no shapes, black labels for n in tree.traverse(): nstyle = NodeStyle() nstyle["fgcolor"] = "black" nstyle["size"] = 0 n.set_style(nstyle) # add colour tags next to nodes if args.colour_tags: colour_tags = [] print "\nThese columns will be used to generate colour tags:" for label in args.colour_tags: if label in column_list: colour_tags.append(label) print " " + label else: print "\tWARNING: specified label for colour tagging, " + label + ", was not found in the columns of the info file provided, " + args.info for i in range(0,len(colour_tags)): label = colour_tags[i] colour_dict = getColourPalette(column_values[label],args,label) print "- Adding colour tag for " + label for node in tree.get_leaves(): this_face = Face() this_face.margin_left = args.padding node.add_face(this_face, column=0, position = "aligned") if node.name in table: this_label = table[node.name][label] this_colour = colour_dict[this_label] else: this_colour = "white" this_face = Face() this_face.background.color = this_colour this_face.margin_right = args.margin_right this_face.margin_left = args.margin_left this_face.margin_top = args.margin_top this_face.margin_bottom = args.margin_bottom this_face.border.width = args.border_width this_face.border.color="white" node.add_face(this_face, column=i+1, position = "aligned") print else: colour_tags = [] # add labels as columns for i in range(0,len(labels)): label = labels[i] print "- Adding label " + label if label == args.colour_nodes_by: print " also colouring nodes by these values" colour_dict = getColourPalette(column_values[label],args,label) for node in tree.get_leaves(): if node.name in table: this_label = table[node.name][label] this_colour = colour_dict[this_label] else: this_label = "" this_colour = "black" this_face = TextFace(text=this_label, fsize = args.font_size) if args.tags: this_face.background.color = this_colour elif label == args.colour_nodes_by: this_face.fgcolor = this_colour this_face.margin_right = args.padding if i == 0: this_face.margin_left = args.padding node.add_face(this_face, column=i+len(colour_tags)+1, position = "aligned") # set leaves to coloured circles node.img_style["size"] = args.node_size if label == args.colour_nodes_by: node.img_style["fgcolor"] = this_colour if args.colour_branches_by or args.colour_backgrounds_by or args.branch_support_colour: if args.colour_branches_by: print "- Colouring branches by label " + args.colour_branches_by colour_dict_br = getColourPalette(column_values[args.colour_branches_by],args,args.colour_branches_by) if args.colour_backgrounds_by: print "- Colouring node backgrounds by label " + args.colour_backgrounds_by colour_dict_bg = getColourPalette(column_values[args.colour_backgrounds_by],args,args.colour_backgrounds_by) if args.branch_support_colour: print "- Colouring branches by support values" # colours extracted from R using rgb( colorRamp(c("white","red", "black"))(seq(0, 1, length = 100)), max = 255) # support_colours = {0.0:"#FFFFFF",0.01:"#FFFFFF", 0.02:"#FFF9F9", 0.03:"#FFF4F4", 0.04:"#FFEFEF", 0.05:"#FFEAEA", 0.06:"#FFE5E5", 0.07:"#FFE0E0", 0.08:"#FFDADA", 0.09:"#FFD5D5", 0.1:"#FFD0D0", 0.11:"#FFCBCB", 0.12:"#FFC6C6", 0.13:"#FFC1C1", 0.14:"#FFBCBC", 0.15:"#FFB6B6", 0.16:"#FFB1B1", 0.17:"#FFACAC", 0.18:"#FFA7A7", 0.19:"#FFA2A2", 0.2:"#FF9D9D", 0.21:"#FF9797", 0.22:"#FF9292", 0.23:"#FF8D8D", 0.24:"#FF8888", 0.25:"#FF8383", 0.26:"#FF7E7E", 0.27:"#FF7979", 0.28:"#FF7373", 0.29:"#FF6E6E", 0.3:"#FF6969", 0.31:"#FF6464", 0.32:"#FF5F5F", 0.33:"#FF5A5A", 0.34:"#FF5454", 0.35:"#FF4F4F", 0.36:"#FF4A4A", 0.37:"#FF4545", 0.38:"#FF4040", 0.39:"#FF3B3B", 0.4:"#FF3636", 0.41:"#FF3030", 0.42:"#FF2B2B", 0.43:"#FF2626", 0.44:"#FF2121", 0.45:"#FF1C1C", 0.46:"#FF1717", 0.47:"#FF1212", 0.48:"#FF0C0C", 0.49:"#FF0707", 0.5:"#FF0202", 0.51:"#FC0000", 0.52:"#F70000", 0.53:"#F20000", 0.54:"#EC0000", 0.55:"#E70000", 0.56:"#E20000", 0.57:"#DD0000", 0.58:"#D80000", 0.59:"#D30000", 0.6:"#CE0000", 0.61:"#C80000", 0.62:"#C30000", 0.63:"#BE0000", 0.64:"#B90000", 0.65:"#B40000", 0.66:"#AF0000", 0.67:"#A90000", 0.68:"#A40000", 0.69:"#9F0000", 0.7:"#9A0000", 0.71:"#950000", 0.72:"#900000", 0.73:"#8B0000", 0.74:"#850000", 0.75:"#800000", 0.76:"#7B0000", 0.77:"#760000", 0.78:"#710000", 0.79:"#6C0000", 0.8:"#670000", 0.81:"#610000", 0.82:"#5C0000", 0.83:"#570000", 0.84:"#520000", 0.85:"#4D0000", 0.86:"#480000", 0.87:"#420000", 0.88:"#3D0000", 0.89:"#380000", 0.9:"#330000", 0.91:"#2E0000", 0.92:"#290000", 0.93:"#240000", 0.94:"#1E0000", 0.95:"#190000", 0.96:"#140000", 0.97:"#0F0000", 0.98:"#0A0000", 0.99:"#050000", 1:"#000000"} # rgb( colorRamp(c("red", "black"))(seq(0, 1, length = 100)), max = 255)) support_colours = {} if args.branch_support_cutoff: for i in range(0,args.branch_support_cutoff): support_colours[i] = "#FF0000" for i in range(args.branch_support_cutoff,101): support_colours[i] = "#000000" else: if args.branch_support_percent: support_colours = {0:"#FF0000",1:"#FF0000",2:"#FC0000",3:"#F90000",4:"#F70000",5:"#F40000",6:"#F20000",7:"#EF0000",8:"#EC0000",9:"#EA0000",10:"#E70000",11:"#E50000",12:"#E20000",13:"#E00000",14:"#DD0000",15:"#DA0000",16:"#D80000",17:"#D50000",18:"#D30000",19:"#D00000",20:"#CE0000",21:"#CB0000",22:"#C80000",23:"#C60000",24:"#C30000",25:"#C10000",26:"#BE0000",27:"#BC0000",28:"#B90000",29:"#B60000",30:"#B40000",31:"#B10000",32:"#AF0000",33:"#AC0000",34:"#AA0000",35:"#A70000",36:"#A40000",37:"#A20000",38:"#9F0000",39:"#9D0000",40:"#9A0000",41:"#970000",42:"#950000",43:"#920000",44:"#900000",45:"#8D0000",46:"#8B0000",47:"#880000",48:"#850000",49:"#830000",50:"#800000",51:"#7E0000",52:"#7B0000",53:"#790000",54:"#760000",55:"#730000",56:"#710000",57:"#6E0000",58:"#6C0000",59:"#690000",60:"#670000",61:"#640000",62:"#610000",63:"#5F0000",64:"#5C0000",65:"#5A0000",66:"#570000",67:"#540000",68:"#520000",69:"#4F0000",70:"#4D0000",71:"#4A0000",72:"#480000",73:"#450000",74:"#420000",75:"#400000",76:"#3D0000",77:"#3B0000",78:"#380000",79:"#360000",80:"#330000",81:"#300000",82:"#2E0000",83:"#2B0000",84:"#290000",85:"#260000",86:"#240000",87:"#210000",88:"#1E0000",89:"#1C0000",90:"#190000",91:"#170000",92:"#140000",93:"#120000",94:"#0F0000",95:"#0C0000",96:"#0A0000",97:"#070000",98:"#050000",99:"#020000",100:"#000000"} else: support_colours = {0.0:"#FF0000", 0.01:"#FF0000", 0.02:"#FC0000", 0.03:"#F90000", 0.04:"#F70000", 0.05:"#F40000", 0.06:"#F20000", 0.07:"#EF0000", 0.08:"#EC0000", 0.09:"#EA0000", 0.1:"#E70000", 0.11:"#E50000", 0.12:"#E20000", 0.13:"#E00000", 0.14:"#DD0000", 0.15:"#DA0000", 0.16:"#D80000", 0.17:"#D50000", 0.18:"#D30000", 0.19:"#D00000", 0.2:"#CE0000", 0.21:"#CB0000", 0.22:"#C80000", 0.23:"#C60000", 0.24:"#C30000", 0.25:"#C10000", 0.26:"#BE0000", 0.27:"#BC0000", 0.28:"#B90000", 0.29:"#B60000", 0.3:"#B40000", 0.31:"#B10000", 0.32:"#AF0000", 0.33:"#AC0000", 0.34:"#AA0000", 0.35:"#A70000", 0.36:"#A40000", 0.37:"#A20000", 0.38:"#9F0000", 0.39:"#9D0000", 0.4:"#9A0000", 0.41:"#970000", 0.42:"#950000", 0.43:"#920000", 0.44:"#900000", 0.45:"#8D0000", 0.46:"#8B0000", 0.47:"#880000", 0.48:"#850000", 0.49:"#830000", 0.5:"#800000", 0.51:"#7E0000", 0.52:"#7B0000", 0.53:"#790000", 0.54:"#760000", 0.55:"#730000", 0.56:"#710000", 0.57:"#6E0000", 0.58:"#6C0000", 0.59:"#690000", 0.6:"#670000", 0.61:"#640000", 0.62:"#610000", 0.63:"#5F0000", 0.64:"#5C0000", 0.65:"#5A0000", 0.66:"#570000", 0.67:"#540000", 0.68:"#520000", 0.69:"#4F0000", 0.7:"#4D0000", 0.71:"#4A0000", 0.72:"#480000", 0.73:"#450000", 0.74:"#420000", 0.75:"#400000", 0.76:"#3D0000", 0.77:"#3B0000", 0.78:"#380000", 0.79:"#360000", 0.8:"#330000", 0.81:"#300000", 0.82:"#2E0000", 0.83:"#2B0000", 0.84:"#290000", 0.85:"#260000", 0.86:"#240000", 0.87:"#210000", 0.88:"#1E0000", 0.89:"#1C0000", 0.9:"#190000", 0.91:"#170000", 0.92:"#140000", 0.93:"#120000", 0.94:"#0F0000", 0.95:"#0C0000", 0.96:"#0A0000", 0.97:"#070000", 0.98:"#050000", 0.99:"#020000", 1.0:"#000000"} for node in tree.traverse(): nstyle = NodeStyle() nstyle["size"] = 0 if node.name in table: #print "Colouring individual " + node.name if args.colour_branches_by: nstyle["vt_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]] # set branch colour nstyle["hz_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]] if args.colour_backgrounds_by: if args.colour_branches_by in table[node.name]: if table[node.name][args.colour_branches_by] != "none": nstyle["bgcolor"] = colour_dict_bg[table[node.name][args.colour_backgrounds_by]] # set background colour node.set_style(nstyle) else: # internal node descendants = node.get_leaves() descendant_labels_br = [] descendant_labels_bg = [] for d in descendants: if args.colour_branches_by: if d.name in table: this_label_br = table[d.name][args.colour_branches_by] if this_label_br not in descendant_labels_br: descendant_labels_br.append(this_label_br) elif "none" not in descendant_labels_br: descendant_labels_br.append("none") if args.colour_backgrounds_by: if d.name in table: this_label_bg = table[d.name][args.colour_backgrounds_by] if this_label_bg not in descendant_labels_bg: descendant_labels_bg.append(this_label_bg) elif "none" not in descendant_labels_bg: descendant_labels_bg.append("none") # nstyle = NodeStyle() # nstyle["size"] = 0 if len(descendant_labels_br) == 1 and descendant_labels_br[0] != "none": this_colour = colour_dict_br[descendant_labels_br[0]] nstyle["vt_line_color"] = this_colour # set branch colour nstyle["hz_line_color"] = this_colour elif args.branch_support_colour and not node.is_leaf(): if int(node.support) in support_colours: nstyle["vt_line_color"] = support_colours[int(node.support)] # take colour from support value nstyle["hz_line_color"] = support_colours[int(node.support)] else: print " WARNING support values don't make sense. Note scale is assumed to be 0-1 unless using the --branch_support_percent flag." if len(descendant_labels_bg) == 1 and descendant_labels_bg[0] != "none": this_colour = colour_dict_bg[descendant_labels_bg[0]] nstyle["bgcolor"] = this_colour # set background colour node.set_style(nstyle) if args.colour_nodes_by: if args.colour_nodes_by not in labels: print "- Colouring nodes by label " + args.colour_nodes_by colour_dict = getColourPalette(column_values[args.colour_nodes_by],args,args.colour_nodes_by) for node in tree.get_leaves(): if node.name in table: this_label = table[node.name][args.colour_nodes_by] this_colour = colour_dict[this_label] if this_colour != "None": node.img_style["fgcolor"] = this_colour node.img_style["size"] = args.node_size # set tree style ts = TreeStyle() if args.show_leaf_names: ts.show_leaf_name = True else: ts.show_leaf_name = False if args.length_scale: ts.scale = args.length_scale if args.branch_padding: ts.branch_vertical_margin = args.branch_padding if args.branch_support_print: ts.show_branch_support = True if args.fan: ts.mode = "c" print "\nPrinting circular tree (--fan)" else: print "\nPrinting rectangular tree, to switch to circular use --fan" if args.title: title = TextFace(args.title, fsize=20) title.margin_left = 20 title.margin_top = 20 ts.title.add_face(title, column=1) if args.no_guiding_lines: ts.draw_guiding_lines = False if args.data: print "\nPrinting data matrix as " + args.data_type + " with range (" + str(args.mindata) + "->" + str(args.maxdata) + ";" + str(args.centervalue) + "), height " + str(args.data_height) + ", width " + str(args.data_width) profileFace = ProfileFace(min_v=args.mindata, max_v=args.maxdata, center_v=args.centervalue, width=args.data_width, height=args.data_height, style=args.data_type) def mylayout(node): if node.is_leaf(): add_face_to_node(profileFace, node, 0, aligned=True) ts.layout_fn = mylayout # set root branch length to zero tree.dist=0 # render tree tree.render(args.output, w=args.width, dpi=300, units="mm", tree_style=ts) print "\n FINISHED! Tree plot printed to file " + args.output print if args.print_colour_dict: print colour_dict if args.colour_branches_by: print colour_dict_br if args.colour_backgrounds_by: print colour_dict_bg if args.interactive: print "\nEntering interactive mode..." tree.show(tree_style=ts)
def draw_tree(tree, conf, outfile): try: from ete2 import (add_face_to_node, AttrFace, TextFace, TreeStyle, RectFace, CircleFace, SequenceFace, random_color, SeqMotifFace) except ImportError as e: print e return def ly_basic(node): if node.is_leaf(): node.img_style['size'] = 0 else: node.img_style['size'] = 0 node.img_style['shape'] = 'square' if len(MIXED_RES) > 1 and hasattr(node, "tree_seqtype"): if node.tree_seqtype == "nt": node.img_style["bgcolor"] = "#CFE6CA" ntF = TextFace("nt", fsize=6, fgcolor='#444', ftype='Helvetica') add_face_to_node(ntF, node, 10, position="branch-bottom") if len(NPR_TREES) > 1 and hasattr(node, "tree_type"): node.img_style['size'] = 4 node.img_style['fgcolor'] = "steelblue" node.img_style['hz_line_width'] = 1 node.img_style['vt_line_width'] = 1 def ly_leaf_names(node): if node.is_leaf(): spF = TextFace(node.species, fsize=10, fgcolor='#444444', fstyle='italic', ftype='Helvetica') add_face_to_node(spF, node, column=0, position='branch-right') if hasattr(node, 'genename'): geneF = TextFace(" (%s)" %node.genename, fsize=8, fgcolor='#777777', ftype='Helvetica') add_face_to_node(geneF, node, column=1, position='branch-right') def ly_supports(node): if not node.is_leaf() and node.up: supFace = TextFace("%0.2g" %(node.support), fsize=7, fgcolor='indianred') add_face_to_node(supFace, node, column=0, position='branch-top') def ly_tax_labels(node): if node.is_leaf(): c = LABEL_START_COL largest = 0 for tname in TRACKED_CLADES: if hasattr(node, "named_lineage") and tname in node.named_lineage: linF = TextFace(tname, fsize=10, fgcolor='white') linF.margin_left = 3 linF.margin_right = 2 linF.background.color = lin2color[tname] add_face_to_node(linF, node, c, position='aligned') c += 1 for n in xrange(c, len(TRACKED_CLADES)): add_face_to_node(TextFace('', fsize=10, fgcolor='slategrey'), node, c, position='aligned') c+=1 def ly_full_alg(node): pass def ly_block_alg(node): if node.is_leaf(): if 'sequence' in node.features: seqFace = SeqMotifFace(node.sequence, []) # [10, 100, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|domain Name"], motifs = [] last_lt = None for c, lt in enumerate(node.sequence): if lt != '-': if last_lt is None: last_lt = c if c+1 == len(node.sequence): start, end = last_lt, c motifs.append([start, end, "()", 0, 12, "slategrey", "slategrey", None]) last_lt = None elif lt == '-': if last_lt is not None: start, end = last_lt, c-1 motifs.append([start, end, "()", 0, 12, "grey", "slategrey", None]) last_lt = None seqFace = SeqMotifFace(node.sequence, motifs, intermotif_format="line", seqtail_format="line", scale_factor=ALG_SCALE) add_face_to_node(seqFace, node, ALG_START_COL, aligned=True) TRACKED_CLADES = ["Eukaryota", "Viridiplantae", "Fungi", "Alveolata", "Metazoa", "Stramenopiles", "Rhodophyta", "Amoebozoa", "Crypthophyta", "Bacteria", "Alphaproteobacteria", "Betaproteobacteria", "Cyanobacteria", "Gammaproteobacteria",] # ["Opisthokonta", "Apicomplexa"] colors = random_color(num=len(TRACKED_CLADES), s=0.45) lin2color = dict([(ln, colors[i]) for i, ln in enumerate(TRACKED_CLADES)]) NAME_FACE = AttrFace('name', fsize=10, fgcolor='#444444') LABEL_START_COL = 10 ALG_START_COL = 40 ts = TreeStyle() ts.draw_aligned_faces_as_table = False ts.draw_guiding_lines = False ts.show_leaf_name = False ts.show_branch_support = False ts.scale = 160 ts.layout_fn = [ly_basic, ly_leaf_names, ly_supports, ly_tax_labels] MIXED_RES = set() MAX_SEQ_LEN = 0 NPR_TREES = [] for n in tree.traverse(): if hasattr(n, "tree_seqtype"): MIXED_RES.add(n.tree_seqtype) if hasattr(n, "tree_type"): NPR_TREES.append(n.tree_type) seq = getattr(n, "sequence", "") MAX_SEQ_LEN = max(len(seq), MAX_SEQ_LEN) if MAX_SEQ_LEN: ALG_SCALE = min(1, 1000./MAX_SEQ_LEN) ts.layout_fn.append(ly_block_alg) if len(NPR_TREES) > 1: rF = RectFace(4, 4, "steelblue", "steelblue") rF.margin_right = 10 rF.margin_left = 10 ts.legend.add_face(rF, 0) ts.legend.add_face(TextFace(" NPR node"), 1) ts.legend_position = 3 if len(MIXED_RES) > 1: rF = RectFace(20, 20, "#CFE6CA", "#CFE6CA") rF.margin_right = 10 rF.margin_left = 10 ts.legend.add_face(rF, 0) ts.legend.add_face(TextFace(" Nucleotide based alignment"), 1) ts.legend_position = 3 try: tree.set_species_naming_function(spname) annotate_tree_with_ncbi(tree) a = tree.search_nodes(species='Dictyostelium discoideum')[0] b = tree.search_nodes(species='Chondrus crispus')[0] #out = tree.get_common_ancestor([a, b]) #out = tree.search_nodes(species='Haemophilus parahaemolyticus')[0].up tree.set_outgroup(out) tree.swap_children() except Exception: pass tree.render(outfile, tree_style=ts, w=170, units='mm', dpi=150) tree.render(outfile+'.svg', tree_style=ts, w=170, units='mm', dpi=150) tree.render(outfile+'.pdf', tree_style=ts, w=170, units='mm', dpi=150)
def main(argv): input_file='' title='Title' label_internal_nodes = False label_leaves = False out_file='' width=750 out_file_xml='' try: opts, args = getopt.getopt(argv,"h:i:t:lno:w:x:",["Help=","InputFile=","Title=","LabelLeaves=", "LabelInternalNodes=","OutFile=","Width=","OutFileXML="]) except getopt.GetoptError: print 'Unknown option, call using: ./PlotTree.py -i <InputCAMIFile> -t <Title> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -o <OutFile.png> -x <Outfile.xml> -w <Width>' sys.exit(2) for opt, arg in opts: if opt == '-h': print './PlotTree.py -i <InputCAMIFile> -t <Title> -l <LabelLeavesFlag> -n <LabelInternalNodesFlag> -o <OutFile> -x <OutFile.xml> -w <Width>' sys.exit(2) elif opt in ("-i", "--InputFile"): input_file = arg elif opt in ("-t", "--Title"): title = arg elif opt in ("-l", "--LabelLeaves"): label_leaves = True elif opt in ("-n","--LabelInternalNodes"): label_internal_nodes = True elif opt in ("-o", "--OutFile"): out_file = arg elif opt in ("-w", "--Width"): width = int(arg) elif opt in ("-x", "--OutFileXML"): out_file_xml = arg schema_names = COLOR_SCHEMES.keys() #Read the common kmer profile ckm_tax_paths = [] ckm_name_to_perc = dict() fid = open(input_file,'r') file = fid.readlines() fid.close() #Put placeholders in for missing names like: "||" -> "|NA1|" file_noblank = list() i=0 for line in file: while "||" in line: line = line.replace("||","|NONAME|",1) i = i+1 file_noblank.append(line) #Get the names and weights for line in file_noblank: if line[0]!='#' and line[0]!='@' and line[0]!='\n': #Don't parse comments or blank lines temp = line.split()[3] #Get the names ckm_tax_paths.append(temp) ckm_name_to_perc[temp.split("|")[-1]] = line.split()[-1] #Get the weights #Create the tree t=Tree() names_to_nodes = dict() for i in range(0,len(ckm_tax_paths)): split_tax_path = ckm_tax_paths[i].split("|") if len(split_tax_path)==1: #If len==1, then it's a superkingdom names_to_nodes[split_tax_path[0]] = t.add_child(name=split_tax_path[0]) #connect directly to tree else: if split_tax_path[-2] in names_to_nodes: #If the parent is already in the tree, add to tree names_to_nodes[split_tax_path[-1]] = names_to_nodes[split_tax_path[-2]].add_child(name=split_tax_path[-1]) else: #Otherwise iterate up until we have something that is in the tree j=2 while split_tax_path[-j]=="NONAME": j = j + 1 #This skips over the NONAMES names_to_nodes[split_tax_path[-1]] = names_to_nodes[split_tax_path[-j]].add_child(name=split_tax_path[-1]) #Show the tree #print t.get_ascii(show_internal=True) #scheme = random.sample(schema_names, 1)[0] #'set2' is nice, scheme = 'set2' def layout(node): if node.name in ckm_name_to_perc: ckm_perc = float(ckm_name_to_perc[node.name]) else: ckm_perc = 0 F = CircleFace(radius=3.14*math.sqrt(ckm_perc), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F,node, 0, position="branch-right") if label_internal_nodes: faces.add_face_to_node(TextFace(node.name, fsize=7),node, 0, position="branch-top") ts = TreeStyle() ts.layout_fn = layout ts.mode = "r" ts.show_leaf_name = label_leaves ts.min_leaf_separation = 50 ts.title.add_face(TextFace(title, fsize=20), column=0) #Export the tree to a png image t.render(out_file, w=width, units="mm", tree_style=ts) #Export the xml file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) phylo.phyloxml_phylogeny.set_name(title) project.add_phylogeny(phylo) project.export(open(out_file_xml,'w'))
def layout(node): # print node.rank # print node.sci_name if getattr(node, "rank", None): if (node.rank in ['order', 'class', 'phylum', 'kingdom']): rank_face = AttrFace("sci_name", fsize=7, fgcolor="indianred") node.add_face(rank_face, column=0, position="branch-top") if node.is_leaf(): sciname_face = AttrFace("sci_name", fsize=9, fgcolor="steelblue") node.add_face(sciname_face, column=0, position="branch-right") if node.is_leaf() and not node.name == 'annotation': #here we are adding faces and we need to play with seqmotif face seq = str(seqreclist[taxids.index(int(node.name))].seq) motifs = [] #[[0,len(seq), "seq", 10, 10, None, None, None]] for f in seqreclist[taxids.index(int(node.name))].features: if f.type == 'domain': motifs.append([ f.location.start, f.location.end, "[]", None, 10, "blue", f.qualifiers.get('color', get_color( f.qualifiers['name'])).lower(), "arial|8|black|%s" % f.qualifiers['name'] ]) if f.type == 'motif': #It turns out that we need to solve overlap problem here, here it is solved only in case of one overlap s = f.location.start e = f.location.end flag = True overlappedm = [] for m in motifs: if m[2] == 'seq' and m[0] < e and m[ 1] > s: #we have an overlap, four cases, preceding motife always is on top flag = False overlappedm.append(m) if not flag: #we have to solve multiple overlap problem #let's do it by scanning sflag = False eflag = False for x in range(s, e + 1): if not sflag: #check if we can start overlap = False for m in overlappedm: if x >= m[0] and x < m[1]: overlap = True if not overlap: ts = x sflag = True #check if is time to end if sflag and not eflag: overlap = False for m in overlappedm: if x == m[0]: overlap = True if overlap or x == e: te = x eflag = True if sflag and eflag: motifs.append([ ts, te, "seq", 10, 10, "black", f.qualifiers.get( 'color', get_color( f.qualifiers['name'])).lower(), None ]) sflag = False eflag = False if flag: motifs.append([ f.location.start, f.location.end, "seq", 10, 10, "black", f.qualifiers.get( 'color', get_color(f.qualifiers['name'])).lower(), None ]) seqFace = SeqMotifFace(seq, motifs, scale_factor=1, seq_format="[]") seqFace.overlaping_motif_opacity = 1.0 # seqFace.fg=aafgcolors # seqFace.bg=aabgcolors_gray add_face_to_node(seqFace, node, 0, position="aligned") # gi=taxid2gi[int(node.name)] add_face_to_node( TextFace(' ' + seqreclist[taxids.index(int(node.name))].id + ' '), node, column=1, position="aligned") # add_face_to_node(TextFace(' '+str(int(node.name))+' '),node,column=2, position = "aligned") # add_face_to_node(TextFace(' '+str(gi2variant[gi])+' '),node,column=3, position = "aligned") #We currently disable annotation if node.is_leaf() and node.name == 'annotation': if (annotation): s = annotation # get_hist_ss_in_aln_as_string(msa_tr) else: s = ' ' * max(map(lambda x: len(x.seq), seqreclist))
def run(args): if args.text_mode: from ete2 import Tree for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = Tree(nw) else: t = Tree(tfile) print t.get_ascii(show_internal=args.show_internal_names, attributes=args.show_attributes) return import random import re import colorsys from collections import defaultdict from ete2 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle, add_face_to_node, random_color) global FACES if args.face: FACES = parse_faces(args.face) else: FACES = [] # VISUALIZATION ts = TreeStyle() ts.mode = args.mode ts.show_leaf_name = True ts.tree_width = args.tree_width for f in FACES: if f["value"] == "@name": ts.show_leaf_name = False break if args.as_ncbi: ts.show_leaf_name = False FACES.extend(parse_faces( ['value:@sci_name, size:10, fstyle:italic', 'value:@taxid, color:grey, size:6, format:" - %s"', 'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal', 'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal', ])) if args.alg: FACES.extend(parse_faces( ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type] )) if args.heatmap: FACES.extend(parse_faces( ['value:@name, size:10, pos:aligned, ftype:heatmap'] )) if args.bubbles: for bubble in args.bubbles: FACES.extend(parse_faces( ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble, ])) ts.branch_vertical_margin = args.branch_separation if args.show_support: ts.show_branch_support = True if args.show_branch_length: ts.show_branch_length = True if args.force_topology: ts.force_topology = True ts.layout_fn = lambda x: None for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = PhyloTree(nw) else: t = PhyloTree(tfile) if args.alg: t.link_to_alignment(args.alg, alg_format=args.alg_format) if args.heatmap: DEFAULT_COLOR_SATURATION = 0.3 BASE_LIGHTNESS = 0.7 def gradient_color(value, max_value, saturation=0.5, hue=0.1): def rgb2hex(rgb): return '#%02x%02x%02x' % rgb def hls2hex(h, l, s): return rgb2hex( tuple(map(lambda x: int(x*255), colorsys.hls_to_rgb(h, l, s)))) lightness = 1 - (value * BASE_LIGHTNESS) / max_value return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION) heatmap_data = {} max_value, min_value = None, None for line in open(args.heatmap): if line.startswith('#COLNAMES'): pass elif line.startswith('#') or not line.strip(): pass else: fields = line.split('\t') name = fields[0].strip() values = map(lambda x: float(x) if x else None, fields[1:]) maxv = max(values) minv = min(values) if max_value is None or maxv > max_value: max_value = maxv if min_value is None or minv < min_value: min_value = minv heatmap_data[name] = values heatmap_center_value = 0 heatmap_color_center = "white" heatmap_color_up = 0.3 heatmap_color_down = 0.7 heatmap_color_missing = "black" heatmap_max_value = abs(heatmap_center_value - max_value) heatmap_min_value = abs(heatmap_center_value - min_value) if heatmap_center_value <= min_value: heatmap_max_value = heatmap_min_value + heatmap_max_value else: heatmap_max_value = max(heatmap_min_value, heatmap_max_value) # scale the tree if not args.height: args.height = None if not args.width: args.width = None f2color = {} f2last_seed = {} for node in t.traverse(): node.img_style['size'] = 0 if len(node.children) == 1: node.img_style['size'] = 2 node.img_style['shape'] = "square" node.img_style['fgcolor'] = "steelblue" ftype_pos = defaultdict(int) for findex, f in enumerate(FACES): if (f['nodetype'] == 'any' or (f['nodetype'] == 'leaf' and node.is_leaf()) or (f['nodetype'] == 'internal' and not node.is_leaf())): # if node passes face filters if node_matcher(node, f["filters"]): if f["value"].startswith("@"): fvalue = getattr(node, f["value"][1:], None) else: fvalue = f["value"] # if node's attribute has content, generate face if fvalue is not None: fsize = f["size"] fbgcolor = f["bgcolor"] fcolor = f['color'] if fcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fcolor = f2color.setdefault(color_bin, random_color(h=seed)) if fbgcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fbgcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fbgcolor = f2color.setdefault(color_bin, random_color(h=seed)) if f["ftype"] == "text": if f.get("format", None): fvalue = f["format"] % fvalue F = TextFace(fvalue, fsize = fsize, fgcolor = fcolor or "black", fstyle = f.get('fstyle', None)) elif f["ftype"] == "fullseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="seq", seqtail_format="seq", height=fsize) elif f["ftype"] == "compactseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq", seqtail_format="compactseq", height=fsize) elif f["ftype"] == "blockseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq", seqtail_format="blockseq", height=fsize, fgcolor=fcolor or "slategrey", bgcolor=fbgcolor or "slategrey", scale_factor = 1.0) fbgcolor = None elif f["ftype"] == "bubble": try: v = float(fvalue) except ValueError: rad = fsize else: rad = fsize * v F = faces.CircleFace(radius=rad, style="sphere", color=fcolor or "steelblue") elif f["ftype"] == "heatmap": if not f['column']: col = ftype_pos[f["pos"]] else: col = f["column"] for i, value in enumerate(heatmap_data.get(node.name, [])): ftype_pos[f["pos"]] += 1 if value is None: color = heatmap_color_missing elif value > heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up) elif value < heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down) else: color = heatmap_color_center node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i) # Add header # for i, name in enumerate(header): # nameF = TextFace(name, fsize=7) # nameF.rotation = -90 # tree_style.aligned_header.add_face(nameF, column=i) F = None elif f["ftype"] == "profile": # internal profiles? F = None elif f["ftype"] == "barchart": F = None elif f["ftype"] == "piechart": F = None # Add the Face if F: F.opacity = f['opacity'] or 1.0 # Set face general attributes if fbgcolor: F.background.color = fbgcolor if not f['column']: col = ftype_pos[f["pos"]] ftype_pos[f["pos"]] += 1 else: col = f["column"] node.add_face(F, column=col, position=f["pos"]) if args.image: t.render("t%d.%s" %(tindex, args.image), tree_style=ts, w=args.width, h=args.height, units=args.size_units) else: t.show(None, tree_style=ts)
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylog�nie et la conservation de facteurs de firulence c�te � c�te N�cessite r�sultats MLST, ensemble des r�sultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des g�nomes et les noms qu'on veut afficher dans la phylog�nie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylog�nie au format newick avec identifiants correspondants � tous les dico utilis�s :param blast_result_file_list: r�sultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants g�nome utiis� dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' blast2data = {} queries = [] for one_blast_file in blast_result_file_list: with open(one_blast_file, 'r') as f: for line in f: line = line.split('\t') if line[1] not in blast2data: blast2data[line[1]] = {} blast2data[line[1]][line[0]] = [ float(line[2]), int(line[8]), int(line[9]) ] else: blast2data[line[1]][line[0]] = [ float(line[2]), int(line[8]), int(line[9]) ] if line[0] not in queries: queries.append(line[0]) print blast2data print queries for one_blast in blast2data.keys(): for ref_gene in blast2data[one_blast].keys(): for query_gene in blast2data[one_blast].keys(): overlap = False if ref_gene == query_gene: continue if one_blast == 'NC_002745' and ref_gene == 'selm': print 'target:', ref_gene, blast2data[one_blast][ref_gene] print query_gene, blast2data[one_blast][query_gene] # check if position is overlapping try: sorted_coordinates = sorted( blast2data[one_blast][ref_gene][1:3]) if blast2data[one_blast][query_gene][ 1] <= sorted_coordinates[1] and blast2data[ one_blast][query_gene][ 1] >= sorted_coordinates[0]: print 'Overlaping locations!' print one_blast, ref_gene, blast2data[one_blast][ ref_gene] print one_blast, query_gene, blast2data[one_blast][ query_gene] overlap = True sorted_coordinates = sorted( blast2data[one_blast][query_gene][1:3]) if blast2data[one_blast][ref_gene][1] <= sorted_coordinates[ 1] and blast2data[one_blast][ref_gene][ 1] >= sorted_coordinates[0]: print 'Overlapping locations!' print one_blast, ref_gene, blast2data[one_blast][ ref_gene] print one_blast, query_gene, blast2data[one_blast][ query_gene] overlap = True if overlap: if blast2data[one_blast][ref_gene][0] > blast2data[ one_blast][query_gene][0]: del blast2data[one_blast][query_gene] print 'removing', query_gene else: del blast2data[one_blast][ref_gene] print 'removing', ref_gene break except KeyError: print 'colocation already resolved:', query_gene, ref_gene ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE', 'eta', 'etb', 'sec', 'tst'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) #t.populate(8) # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() #t2=t1 #for lf in t2.iter_leaves(): # try: # lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name]) # except: # #lf.name = ' %s (%s)' % (lf.name, lf.name) # # a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name])) # a.fgcolor = "red" # lf.name = a #t2.render("test.svg", dpi=800, h=400) #import sys #sys.exit() # and set it as tree outgroup for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] for col, value in enumerate(queries): if lf.name == "1505183575": 'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 4 n.margin_right = 4 n.margin_left = 4 n.margin_bottom = 4 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") try: identity_value = blast2data[lf.name][value][0] if 'CHUV' in id2description[lf.name]: if float(identity_value) > 70: if str(identity_value) == '100.00' or str( identity_value) == '100.0': identity_value = '100' else: identity_value = str( round(float(identity_value), 1)) n = TextFace(' %s ' % str(identity_value)) n.margin_top = 4 n.margin_right = 4 n.margin_left = 4 n.margin_bottom = 4 n.inner_background.color = rgb2hex( m.to_rgba(float(identity_value))) if float(identity_value) > 92: n.fgcolor = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: if float(identity_value) > 70: if str(identity_value) == '100.00' or str( identity_value) == '100.0': identity_value = '100' else: identity_value = str( round(float(identity_value), 1)) n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = rgb2hex( m2.to_rgba(float(identity_value))) if float(identity_value) > 92: n.fgcolor = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") except KeyError: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") try: lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name]) except: #lf.name = ' %s (%s)' % (lf.name, lf.name) a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name])) a.fgcolor = "red" lf.name = a #.add_face(a, 0, position="aligned") # add boostrap suppot #for n in t1.traverse(): # if n.is_leaf(): # continue # n.add_face(TextFace(str(t1.support)), column=0, position = "branch-bottom") #ts = TreeStyle() #ts.show_branch_support = True # , tree_style=ts t1.render("saureus_tree.svg", dpi=800, h=400) t1.write(format=0, outfile="new_tree.nw")
def main(): args = parse_args() if args.data: print "\nReading tree from " + args.tree + " and data matrix from " + args.data tree = ClusterTree(args.tree, text_array=args.data) else: print "\nReading tree from " + args.tree tree = Tree(args.tree) if args.midpoint: R = tree.get_midpoint_outgroup() tree.set_outgroup(R) print "- Applying midpoint rooting" elif args.outgroup: tree.set_outgroup( tree&args.outgroup ) print "- Rooting using outgroup " + args.outgroup if not args.no_ladderize: tree.ladderize() print "- Ladderizing tree" table, column_list, column_values = readtable(args, tree.get_leaf_names()) labels = [] if args.labels: print "\nThese labels will be printed next to each strain:" for label in args.labels: if label in column_list: labels.append(label) print " " + label else: print "WARNING: specified label " + label + " was not found in the columns of the info file provided, " + args.info # set node styles # start by setting all to no shapes, black labels for n in tree.traverse(): nstyle = NodeStyle() nstyle["fgcolor"] = "black" nstyle["size"] = 0 n.set_style(nstyle) # add colour tags next to nodes if args.colour_tags: colour_tags = [] print "\nThese columns will be used to generate colour tags:" for label in args.colour_tags: if label in column_list: colour_tags.append(label) print " " + label else: print "\tWARNING: specified label for colour tagging, " + label + ", was not found in the columns of the info file provided, " + args.info for i in range(0,len(colour_tags)): label = colour_tags[i] colour_dict = getColourPalette(column_values[label],args,label) print "- Adding colour tag for " + label for node in tree.get_leaves(): this_face = Face() this_face.margin_left = args.padding node.add_face(this_face, column=0, position = "aligned") if node.name in table: this_label = table[node.name][label] this_colour = colour_dict[this_label] else: this_colour = "white" this_face = Face() this_face.background.color = this_colour this_face.margin_right = args.margin_right this_face.margin_left = args.margin_left this_face.margin_top = args.margin_top this_face.margin_bottom = args.margin_bottom this_face.border.width = args.border_width this_face.border.color="white" node.add_face(this_face, column=i+1, position = "aligned") print else: colour_tags = [] # add labels as columns for i in range(0,len(labels)): label = labels[i] print "- Adding label " + label if label == args.colour_nodes_by: print " also colouring nodes by these values" colour_dict = getColourPalette(column_values[label],args,label) for node in tree.get_leaves(): if node.name in table: this_label = table[node.name][label] this_colour = colour_dict[this_label] else: this_label = "" this_colour = "black" this_face = TextFace(text=this_label, fsize = args.font_size) if args.tags: this_face.background.color = this_colour elif label == args.colour_nodes_by: this_face.fgcolor = this_colour this_face.margin_right = args.padding if i == 0: this_face.margin_left = args.padding node.add_face(this_face, column=i+len(colour_tags)+1, position = "aligned") # set leaves to coloured circles node.img_style["size"] = args.node_size if label == args.colour_nodes_by: node.img_style["fgcolor"] = this_colour if args.colour_branches_by or args.colour_backgrounds_by or args.branch_support_colour: if args.colour_branches_by: print "- Colouring branches by label " + args.colour_branches_by colour_dict_br = getColourPalette(column_values[args.colour_branches_by],args,args.colour_branches_by) if args.colour_backgrounds_by: print "- Colouring node backgrounds by label " + args.colour_backgrounds_by colour_dict_bg = getColourPalette(column_values[args.colour_backgrounds_by],args,args.colour_backgrounds_by) if args.branch_support_colour: print "- Colouring branches by support values" # colours extracted from R using rgb( colorRamp(c("white","red", "black"))(seq(0, 1, length = 100)), max = 255) # support_colours = {0.0:"#FFFFFF",0.01:"#FFFFFF", 0.02:"#FFF9F9", 0.03:"#FFF4F4", 0.04:"#FFEFEF", 0.05:"#FFEAEA", 0.06:"#FFE5E5", 0.07:"#FFE0E0", 0.08:"#FFDADA", 0.09:"#FFD5D5", 0.1:"#FFD0D0", 0.11:"#FFCBCB", 0.12:"#FFC6C6", 0.13:"#FFC1C1", 0.14:"#FFBCBC", 0.15:"#FFB6B6", 0.16:"#FFB1B1", 0.17:"#FFACAC", 0.18:"#FFA7A7", 0.19:"#FFA2A2", 0.2:"#FF9D9D", 0.21:"#FF9797", 0.22:"#FF9292", 0.23:"#FF8D8D", 0.24:"#FF8888", 0.25:"#FF8383", 0.26:"#FF7E7E", 0.27:"#FF7979", 0.28:"#FF7373", 0.29:"#FF6E6E", 0.3:"#FF6969", 0.31:"#FF6464", 0.32:"#FF5F5F", 0.33:"#FF5A5A", 0.34:"#FF5454", 0.35:"#FF4F4F", 0.36:"#FF4A4A", 0.37:"#FF4545", 0.38:"#FF4040", 0.39:"#FF3B3B", 0.4:"#FF3636", 0.41:"#FF3030", 0.42:"#FF2B2B", 0.43:"#FF2626", 0.44:"#FF2121", 0.45:"#FF1C1C", 0.46:"#FF1717", 0.47:"#FF1212", 0.48:"#FF0C0C", 0.49:"#FF0707", 0.5:"#FF0202", 0.51:"#FC0000", 0.52:"#F70000", 0.53:"#F20000", 0.54:"#EC0000", 0.55:"#E70000", 0.56:"#E20000", 0.57:"#DD0000", 0.58:"#D80000", 0.59:"#D30000", 0.6:"#CE0000", 0.61:"#C80000", 0.62:"#C30000", 0.63:"#BE0000", 0.64:"#B90000", 0.65:"#B40000", 0.66:"#AF0000", 0.67:"#A90000", 0.68:"#A40000", 0.69:"#9F0000", 0.7:"#9A0000", 0.71:"#950000", 0.72:"#900000", 0.73:"#8B0000", 0.74:"#850000", 0.75:"#800000", 0.76:"#7B0000", 0.77:"#760000", 0.78:"#710000", 0.79:"#6C0000", 0.8:"#670000", 0.81:"#610000", 0.82:"#5C0000", 0.83:"#570000", 0.84:"#520000", 0.85:"#4D0000", 0.86:"#480000", 0.87:"#420000", 0.88:"#3D0000", 0.89:"#380000", 0.9:"#330000", 0.91:"#2E0000", 0.92:"#290000", 0.93:"#240000", 0.94:"#1E0000", 0.95:"#190000", 0.96:"#140000", 0.97:"#0F0000", 0.98:"#0A0000", 0.99:"#050000", 1:"#000000"} # rgb( colorRamp(c("red", "black"))(seq(0, 1, length = 100)), max = 255)) support_colours = {} if args.branch_support_cutoff: for i in range(0,args.branch_support_cutoff): support_colours[i] = "#FF0000" for i in range(args.branch_support_cutoff,101): support_colours[i] = "#000000" else: if args.branch_support_percent: support_colours = {0:"#FF0000",1:"#FF0000",2:"#FC0000",3:"#F90000",4:"#F70000",5:"#F40000",6:"#F20000",7:"#EF0000",8:"#EC0000",9:"#EA0000",10:"#E70000",11:"#E50000",12:"#E20000",13:"#E00000",14:"#DD0000",15:"#DA0000",16:"#D80000",17:"#D50000",18:"#D30000",19:"#D00000",20:"#CE0000",21:"#CB0000",22:"#C80000",23:"#C60000",24:"#C30000",25:"#C10000",26:"#BE0000",27:"#BC0000",28:"#B90000",29:"#B60000",30:"#B40000",31:"#B10000",32:"#AF0000",33:"#AC0000",34:"#AA0000",35:"#A70000",36:"#A40000",37:"#A20000",38:"#9F0000",39:"#9D0000",40:"#9A0000",41:"#970000",42:"#950000",43:"#920000",44:"#900000",45:"#8D0000",46:"#8B0000",47:"#880000",48:"#850000",49:"#830000",50:"#800000",51:"#7E0000",52:"#7B0000",53:"#790000",54:"#760000",55:"#730000",56:"#710000",57:"#6E0000",58:"#6C0000",59:"#690000",60:"#670000",61:"#640000",62:"#610000",63:"#5F0000",64:"#5C0000",65:"#5A0000",66:"#570000",67:"#540000",68:"#520000",69:"#4F0000",70:"#4D0000",71:"#4A0000",72:"#480000",73:"#450000",74:"#420000",75:"#400000",76:"#3D0000",77:"#3B0000",78:"#380000",79:"#360000",80:"#330000",81:"#300000",82:"#2E0000",83:"#2B0000",84:"#290000",85:"#260000",86:"#240000",87:"#210000",88:"#1E0000",89:"#1C0000",90:"#190000",91:"#170000",92:"#140000",93:"#120000",94:"#0F0000",95:"#0C0000",96:"#0A0000",97:"#070000",98:"#050000",99:"#020000",100:"#000000"} else: support_colours = {0.0:"#FF0000", 0.01:"#FF0000", 0.02:"#FC0000", 0.03:"#F90000", 0.04:"#F70000", 0.05:"#F40000", 0.06:"#F20000", 0.07:"#EF0000", 0.08:"#EC0000", 0.09:"#EA0000", 0.1:"#E70000", 0.11:"#E50000", 0.12:"#E20000", 0.13:"#E00000", 0.14:"#DD0000", 0.15:"#DA0000", 0.16:"#D80000", 0.17:"#D50000", 0.18:"#D30000", 0.19:"#D00000", 0.2:"#CE0000", 0.21:"#CB0000", 0.22:"#C80000", 0.23:"#C60000", 0.24:"#C30000", 0.25:"#C10000", 0.26:"#BE0000", 0.27:"#BC0000", 0.28:"#B90000", 0.29:"#B60000", 0.3:"#B40000", 0.31:"#B10000", 0.32:"#AF0000", 0.33:"#AC0000", 0.34:"#AA0000", 0.35:"#A70000", 0.36:"#A40000", 0.37:"#A20000", 0.38:"#9F0000", 0.39:"#9D0000", 0.4:"#9A0000", 0.41:"#970000", 0.42:"#950000", 0.43:"#920000", 0.44:"#900000", 0.45:"#8D0000", 0.46:"#8B0000", 0.47:"#880000", 0.48:"#850000", 0.49:"#830000", 0.5:"#800000", 0.51:"#7E0000", 0.52:"#7B0000", 0.53:"#790000", 0.54:"#760000", 0.55:"#730000", 0.56:"#710000", 0.57:"#6E0000", 0.58:"#6C0000", 0.59:"#690000", 0.6:"#670000", 0.61:"#640000", 0.62:"#610000", 0.63:"#5F0000", 0.64:"#5C0000", 0.65:"#5A0000", 0.66:"#570000", 0.67:"#540000", 0.68:"#520000", 0.69:"#4F0000", 0.7:"#4D0000", 0.71:"#4A0000", 0.72:"#480000", 0.73:"#450000", 0.74:"#420000", 0.75:"#400000", 0.76:"#3D0000", 0.77:"#3B0000", 0.78:"#380000", 0.79:"#360000", 0.8:"#330000", 0.81:"#300000", 0.82:"#2E0000", 0.83:"#2B0000", 0.84:"#290000", 0.85:"#260000", 0.86:"#240000", 0.87:"#210000", 0.88:"#1E0000", 0.89:"#1C0000", 0.9:"#190000", 0.91:"#170000", 0.92:"#140000", 0.93:"#120000", 0.94:"#0F0000", 0.95:"#0C0000", 0.96:"#0A0000", 0.97:"#070000", 0.98:"#050000", 0.99:"#020000", 1.0:"#000000"} for node in tree.traverse(): nstyle = NodeStyle() nstyle["size"] = 0 if node.name in table: #print "Colouring individual " + node.name if args.colour_branches_by: nstyle["vt_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]] # set branch colour nstyle["hz_line_color"] = colour_dict_br[table[node.name][args.colour_branches_by]] if args.colour_backgrounds_by: if args.colour_branches_by in table[node.name]: if table[node.name][args.colour_branches_by] != "none": nstyle["bgcolor"] = colour_dict_bg[table[node.name][args.colour_backgrounds_by]] # set background colour node.set_style(nstyle) else: # internal node descendants = node.get_leaves() descendant_labels_br = [] descendant_labels_bg = [] for d in descendants: if args.colour_branches_by: if d.name in table: this_label_br = table[d.name][args.colour_branches_by] if this_label_br not in descendant_labels_br: descendant_labels_br.append(this_label_br) elif "none" not in descendant_labels_br: descendant_labels_br.append("none") if args.colour_backgrounds_by: if d.name in table: this_label_bg = table[d.name][args.colour_backgrounds_by] if this_label_bg not in descendant_labels_bg: descendant_labels_bg.append(this_label_bg) elif "none" not in descendant_labels_bg: descendant_labels_bg.append("none") # nstyle = NodeStyle() # nstyle["size"] = 0 if len(descendant_labels_br) == 1 and descendant_labels_br[0] != "none": this_colour = colour_dict_br[descendant_labels_br[0]] nstyle["vt_line_color"] = this_colour # set branch colour nstyle["hz_line_color"] = this_colour elif args.branch_support_colour and not node.is_leaf(): if int(node.support) in support_colours: nstyle["vt_line_color"] = support_colours[int(node.support)] # take colour from support value nstyle["hz_line_color"] = support_colours[int(node.support)] else: print " WARNING support values don't make sense. Note scale is assumed to be 0-1 unless using the --branch_support_percent flag." if len(descendant_labels_bg) == 1 and descendant_labels_bg[0] != "none": this_colour = colour_dict_bg[descendant_labels_bg[0]] nstyle["bgcolor"] = this_colour # set background colour node.set_style(nstyle) if args.colour_nodes_by: if args.colour_nodes_by not in labels: print "- Colouring nodes by label " + args.colour_nodes_by colour_dict = getColourPalette(column_values[args.colour_nodes_by],args,args.colour_nodes_by) for node in tree.get_leaves(): if node.name in table: this_label = table[node.name][args.colour_nodes_by] this_colour = colour_dict[this_label] if this_colour != "None": node.img_style["fgcolor"] = this_colour node.img_style["size"] = args.node_size for node in tree.traverse(): node.img_style["hz_line_width"] = args.branch_thickness node.img_style["vt_line_width"] = args.branch_thickness # set tree style ts = TreeStyle() if args.show_leaf_names: ts.show_leaf_name = True else: ts.show_leaf_name = False if args.length_scale: ts.scale = args.length_scale if args.branch_padding: ts.branch_vertical_margin = args.branch_padding if args.branch_support_print: ts.show_branch_support = True if args.fan: ts.mode = "c" print "\nPrinting circular tree (--fan)" else: print "\nPrinting rectangular tree, to switch to circular use --fan" if args.title: title = TextFace(args.title, fsize=20) title.margin_left = 20 title.margin_top = 20 ts.title.add_face(title, column=1) if args.no_guiding_lines: ts.draw_guiding_lines = False if args.data: print "\nPrinting data matrix as " + args.data_type + " with range (" + str(args.mindata) + "->" + str(args.maxdata) + ";" + str(args.centervalue) + "), height " + str(args.data_height) + ", width " + str(args.data_width) profileFace = ProfileFace(min_v=args.mindata, max_v=args.maxdata, center_v=args.centervalue, width=args.data_width, height=args.data_height, style=args.data_type) def mylayout(node): if node.is_leaf(): add_face_to_node(profileFace, node, 0, aligned=True) ts.layout_fn = mylayout # set root branch length to zero tree.dist=0 # render tree tree.render(args.output, w=args.width, dpi=300, units="mm", tree_style=ts) print "\n FINISHED! Tree plot printed to file " + args.output print if args.print_colour_dict: print colour_dict if args.colour_branches_by: print colour_dict_br if args.colour_backgrounds_by: print colour_dict_bg if args.interactive: print "\nEntering interactive mode..." tree.show(tree_style=ts)
def MakePlot(x, org_names, ckm30, ckm50, outgroup, outfile, outfilexml, sum_x): #Make sure names are unique names = org_names for name in names: if names.count(name) > 1: temp_name = name i = 1 for dummy in range( 0, names.count(name) - 1 ): #Don't change the last one, just to make sure we don't conflict with the outgroup names[names.index(temp_name)] = temp_name + "_" + str(i) i = i + 1 #Normalize the x vector x = map(lambda y: y / sum(x), x) ckm30_norm = np.multiply(ckm30, 1 / np.diag(ckm30)) ckm50_norm = np.multiply(ckm50, 1 / np.diag(ckm50)) num_rows = ckm30_norm.shape[0] num_cols = ckm30_norm.shape[1] matrix = list() for i in range(num_rows): matrix.append([ .5 * (1 - .5 * ckm30_norm[i, j] - .5 * ckm30_norm[j, i]) + .5 * (1 - .5 * ckm50_norm[i, j] - .5 * ckm50_norm[j, i]) for j in range(i + 1) ]) #Make the list of distances (ave of the two ckm matrices) ckm_ave_train = .5 * ckm30_norm + .5 * ckm50_norm ckm_ave_train_dist = dict() for i in range(len(org_names)): ckm_ave_train_dist[org_names[i]] = [ .5 * ckm_ave_train[i, j] + .5 * ckm_ave_train[j, i] for j in range(len(org_names)) ] #Construct the tree. Note I could use RapidNJ here, but a few tests have shown that the trees that RapidNJ creates are rubbish. dm = _DistanceMatrix(names, matrix) constructor = DistanceTreeConstructor() tree = constructor.nj(dm) t = Tree(tree.format('newick'), format=1) #tree.format('newick') #Phylo.draw_ascii(tree) #Now I will put internal nodes in a certain phylogenetic distance between the root and a given node. #Function to insert a node at a given distance def insert_node(t, name_to_insert, insert_above, dist_along): insert_at_node = t.search_nodes(name=insert_above)[0] parent = (t & insert_above).up orig_branch_length = t.get_distance(insert_at_node, parent) if orig_branch_length < dist_along: raise ValueError( "error: dist_along larger than orig_branch_length in PlotPackage.py" ) removed_node = insert_at_node.detach() removed_node.dist = orig_branch_length - dist_along added_node = parent.add_child(name=name_to_insert, dist=dist_along) added_node.add_child(removed_node) #Function to insert a node some % along a branch, taking into account the ckm distances and nodes already created in the NJ tree (and what distance their descendants are from everyone else) def insert_hyp_node(t, leaf_name, percent, ckm_ave_train_dist, org_names): dists = map(lambda y: abs(y - percent), ckm_ave_train_dist[leaf_name]) nearby_indicies = list() #Add all the organisms that are within 0.05 of the given percent # for i in range(len(dists)): # if dists[i]<=.05: # nearby_indicies.append(i) nearby_names = list() #If there are no nearby indicies, add the closest organism to the given percent if nearby_indicies == []: nearby_names.append(org_names[dists.index(min(dists))]) else: for i in range(len(nearby_indicies)): nearby_names.append(org_names[i]) mean_dist = np.mean( map(lambda y: ckm_ave_train_dist[leaf_name][org_names.index(y)], nearby_names)) nearby_names.append(leaf_name) LCA = t.get_common_ancestor(nearby_names) LCA_to_leaf_dist = t.get_distance(LCA, leaf_name) #divide the dist to the right/left of the LCA node by the number of percentage points in there if LCA.name == t.name: percent_dist = percent * LCA_to_leaf_dist if mean_dist <= percent: child_node = (t & leaf_name) else: child_node = ( t & nearby_names[0] ) #This means "go up from root" in the direction of the nearest guy ancestor_node = (t & child_node.name).up elif mean_dist <= percent: percent_dist = t.get_distance(LCA) + abs(percent - mean_dist) * ( LCA_to_leaf_dist) / (1 - mean_dist) child_node = (t & leaf_name) ancestor_node = (t & child_node.name).up else: percent_dist = t.get_distance(LCA) - abs(percent - mean_dist) * ( t.get_distance(LCA)) / (mean_dist) child_node = (t & leaf_name) ancestor_node = (t & child_node.name).up while t.get_distance(t.name, ancestor_node) > percent_dist: child_node = ancestor_node ancestor_node = (t & child_node.name).up insert_node(t, leaf_name + "_" + str(percent), child_node.name, percent_dist - t.get_distance(t.name, ancestor_node)) #Set outgroup if outgroup in names: t.set_outgroup( t & outgroup ) #I will need to check that this outgroup is actually one of the names... else: print("WARNING: the chosen outgroup " + outgroup + " is not in the given taxonomy: ") print(names) print( "Proceeding without setting an outgroup. This may cause results to be uninterpretable." ) #Insert hypothetical nodes hyp_node_names = dict() cutoffs = [.9, .8, .7, .6, .5, .4, .3, .2, .1] cutoffs = [ -.5141 * (val**3) + 1.0932 * (val**2) + 0.3824 * val for val in cutoffs ] for i in range(len(org_names)): xi = x[i:len(x):len(org_names)] for j in range(1, len(cutoffs) + 1): if xi[j] > 0: insert_hyp_node(t, org_names[i], cutoffs[j - 1], ckm_ave_train_dist, org_names) hyp_node_names[org_names[i] + "_" + str(cutoffs[j - 1])] = [ org_names[i], cutoffs[j - 1], j - 1 ] #in case there are "_" in the file names size_factor = 250 font_size = 55 #Now put the bubbles on the nodes def layout(node): node_style = NodeStyle() node_style["hz_line_width"] = 10 node_style["vt_line_width"] = 10 node.set_style(node_style) #print(node) if node.is_leaf(): if node.name in org_names: #make reconstructed bubble size = x[org_names.index(node.name)] F = CircleFace(radius=size_factor * math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F, node, 0, position="branch-right") #Denote that this was a training organism nameFace = AttrFace("name", fsize=font_size, fgcolor='black') faces.add_face_to_node(nameFace, node, 0, position="branch-right") elif node.name in hyp_node_names: #Otherwise it's a hypothetical node, just use recon x node_base_name = hyp_node_names[node.name][0] percent = hyp_node_names[node.name][1] if node_base_name in org_names: idx = hyp_node_names[node.name][2] size = x[org_names.index(node_base_name) + (idx + 1) * len(org_names)] F = CircleFace(radius=size_factor * math.sqrt(size), color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 faces.add_face_to_node(F, node, 0, position="branch-right") #This is if I want the names of the hypothetical nodes to be printed as well #nameFace = AttrFace("name", fsize=font_size, fgcolor='black') #faces.add_face_to_node(nameFace, node, 0, position="branch-right") else: size = 0 else: size = 0 ts = TreeStyle() ts.layout_fn = layout ts.mode = "r" #ts.mode = "c" ts.scale = 2 * 1000 ts.show_leaf_name = False ts.min_leaf_separation = 50 F = CircleFace(radius=.87 * size_factor, color="RoyalBlue", style="sphere") F.border.width = None F.opacity = 0.6 ts.legend.add_face(F, 0) ts.legend.add_face( TextFace(" Inferred relative abundance", fsize=1.5 * font_size, fgcolor="Blue"), 1) ts.legend.add_face( TextFace(" Total absolute abundance depicted " + str(sum_x)[0:8], fsize=1.5 * font_size, fgcolor="Black"), 1) ts.legend_position = 4 #t.show(tree_style=ts) t.render(outfile, w=550, units="mm", tree_style=ts) #Redner the XML file project = Phyloxml() phylo = phyloxml.PhyloxmlTree(newick=t.write(format=0, features=[])) project.add_phylogeny(phylo) project.export(open(outfilexml, 'w'))
def ly_supports(node): if not node.is_leaf() and node.up: supFace = TextFace("%0.2g" %(node.support), fsize=7, fgcolor='indianred') add_face_to_node(supFace, node, column=0, position='branch-top')
cons = get_consensus(multi, cognateGuideTree, recon_alg="sankoff_parsimony", gaps=True, classes=False, rep_weights = rep_weights, local = "gap") cognateParsimony = 0.0 #aggregate the parsimony value for i in range(len(cognateGuideTree.reconstructed)): cognateParsimony += min(cognateGuideTree.sankoffTable[i].values()) familyParsimony += cognateParsimony print("Reconstructed proto-" + familyName + " word for concept " + str(conceptID - 3) + ":\t" + cons + "\twith average parsimony " + str(cognateParsimony / len(cognateLangs))) #PRINT OUT RECONSTRUCTION STEPS IN A TREE VISUALIZATION if graphicalOutput: outputTree = Tree() outputTree.add_face(TextFace(str("".join(cognateGuideTree.reconstructed))), column=0, position = "branch-right") def copyChildrenIntoOutput(treeNode, outputTreeNode): for child in treeNode.Children: outputChild = outputTreeNode.add_child() if child.isTip(): outputChild.name = str("".join(child.reconstructed)) + " (" + cognateNameTable[int(child.Name)] + ")" else: outputChild.add_face(TextFace(str("".join(child.reconstructed))), column=0, position = "branch-right") copyChildrenIntoOutput(child, outputChild) copyChildrenIntoOutput(cognateGuideTree,outputTree) outputTree.render("output/" + phylName + "/" + familyName + "/" + str(conceptID - 3) + "." + cons.replace("-","") +".png") #print("\nDetermining and counting sound changes at the edges of the guide tree, and cascading them to the supertrees:") for node in cognateGuideTree.postorder(): if not hasattr(node, "recon_changes"): node.recon_changes = {}
spaciators.add(len(column_header) - 1) header2column = dict([(name, i) for i, name in enumerate(column_header)]) ts = TreeStyle() ts.mode = 'r' ts.draw_guiding_lines = False ts.show_leaf_name = False ts.force_topology = False ts.layout_fn = layout ts.tree_width = 800 ts.draw_aligned_faces_as_table = True for i, name in enumerate(column_header): if name: headerF = TextFace(str(name), fgcolor=column_color[i], fsize=40) headerF.rotation = -85 else: headerF = RectFace(300, 5, "white", "white") ts.aligned_header.add_face(headerF, i) #tree_files = sys.argv[1:] for treefile in args.tree_files: output = treefile + '.png' print 'rendering', output try: t = Tree(open(treefile).read().replace('|', ',')) except Exception, e: print e, treefile else: t.set_outgroup(t.get_midpoint_outgroup())