import ete3 ### Example tree exported from phyphy, with .extract_absrel_tree(), with the feature `Selected` ### mytree = "(0564_7:0.00708844[&&NHX:Selected=0],(((((0564_11:0.00527268[&&NHX:Selected=0],0564_4:0.00714182[&&NHX:Selected=0])Node20:0.0022574[&&NHX:Selected=0],(0564_1:0.00583239[&&NHX:Selected=0],(0564_21:0.00121537[&&NHX:Selected=0],0564_5:0.00266921[&&NHX:Selected=0])Node25:0.000797211[&&NHX:Selected=0])Node23:0.00142056[&&NHX:Selected=0])Node19:0.0019147[&&NHX:Selected=0],0564_17:0.00605582[&&NHX:Selected=0])Node18:0.00100178[&&NHX:Selected=0],((0564_13:0.0053066[&&NHX:Selected=0],(0564_15:0.00346989[&&NHX:Selected=0])Node32:0.000752206[&&NHX:Selected=0])Node30:0.00188243[&&NHX:Selected=0],((0564_22:0.00686981[&&NHX:Selected=0],0564_6:0.00581523[&&NHX:Selected=0])Node36:0.00125905[&&NHX:Selected=0],0564_3:0.00791919[&&NHX:Selected=1])Node35:0.0174886[&&NHX:Selected=1])Node29:0.0010489[&&NHX:Selected=0])Node17:0.00156911[&&NHX:Selected=0],0564_9:0.00551506[&&NHX:Selected=0])Node16:0.000783733[&&NHX:Selected=0],(((0557_24:0.00078793[&&NHX:Selected=0],0557_4:0.000787896[&&NHX:Selected=0],0557_2:0.000399166[&&NHX:Selected=0])Node9:0.00206483[&&NHX:Selected=0],0557_12:0.00267531[&&NHX:Selected=0])Node8:0.00118205[&&NHX:Selected=0],((0557_21:0[&&NHX:Selected=0],0557_6:0.000391941[&&NHX:Selected=0],0557_9:0.000402021[&&NHX:Selected=0],0557_11:0.00156985[&&NHX:Selected=0],0557_13:0.000401742[&&NHX:Selected=0],0557_26:0.00079377[&&NHX:Selected=0],(0557_5:0.00117641[&&NHX:Selected=0],0557_7:0[&&NHX:Selected=0])Node53:0.000391973[&&NHX:Selected=0])Node6:0.00118062[&&NHX:Selected=0],0557_25:0.00220372[&&NHX:Selected=0])Node7:0.00103489[&&NHX:Selected=0])Separator:0.00822051[&&NHX:Selected=1])[&&NHX:Selected=0];" ### Read in to ete3, specifying format=1 t = ete3.Tree( mytree, format=1 ) ## Define a treestyle, to show both leaf names and branch lengths in output ts = ete3.TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ## Define node styles, specifying that selected branches be colored red and non-selected branches be colored black. style_selected = ete3.NodeStyle() style_selected["vt_line_color"] = "red" style_selected["hz_line_color"] = "red" style_notselected = ete3.NodeStyle() style_notselected["vt_line_color"] = "black" style_notselected["hz_line_color"] = "black" ### Set style for nodes by traversing tree and applying styles where appropriate ### Note that all features are strings in the tree, and the `.Selected` feature comes directly from the feature string in the tree itself for node in t.traverse("preorder"): if node.Selected=="1": node.set_style(style_selected) elif node.Selected=="0": node.set_style(style_notselected)
def _make_tree_figure( tree, fig, colors, orders, root_name, scale=None, branch_vert_margin=None, fontsize=12, show_names=True, name_field='seq_id', rename_function=None, color_node_labels=False, label_colors=None, tree_orientation=0, min_order_fraction=0.1, show_root_name=False, chain=None, # linked_alignment=None, alignment_fontsize=11, alignment_height=50, alignment_width=50, compact_alignment=False, scale_factor=1, linewidth=1, show_scale=False, ladderize=True, delete_nodes=None): if delete_nodes is None: delete_nodes = [] elif type(delete_nodes) in STR_TYPES: delete_nodes = [ delete_nodes, ] if show_root_name is True: show_names.append(root_name) # if linked_alignment is not None: # t = ete3.PhyloTree(tree, alignment=linked_alignment, alg_format='fasta') # ete3.faces.SequenceItem = MySequenceItem else: t = ete3.Tree(tree) if root_name is not None: t.set_outgroup(t & root_name) # style the nodes for node in t.traverse(): if node.name in delete_nodes: node.delete() continue if orders is not None: leaves = node.get_leaf_names() order_count = Counter([orders[l] for l in leaves]) for order in sorted(order_count.keys()): if float(order_count[order]) / len( leaves) >= min_order_fraction: color = colors[order] break else: color = colors.get(node.name, '#000000') if linked_alignment is not None: node.add_feature('aln_fontsize', alignment_fontsize) node.add_feature('aln_height', alignment_height) node.add_feature('aln_width', alignment_width) node.add_feature('fontsize', fontsize) node.add_feature('format', 'seq') node.add_feature('scale_factor', scale_factor) style = ete3.NodeStyle() style['size'] = 0 style['vt_line_width'] = float(linewidth) style['hz_line_width'] = float(linewidth) style['vt_line_color'] = color style['hz_line_color'] = color style['vt_line_type'] = 0 style['hz_line_type'] = 0 if show_names is True: tf = _build_node_text_face(node, color_node_labels, color, label_colors, fontsize, rename_function) node.add_face(tf, column=0) elif node.name in show_names: tf = _build_node_text_face(node, color_node_labels, color, label_colors, fontsize, rename_function) node.add_face(tf, column=0) node.set_style(style) t.dist = 0 ts = ete3.TreeStyle() # if linked_alignment is not None: # ts.layout_fn = _phyloalignment_layout_function ts.orientation = tree_orientation ts.show_leaf_name = False if scale is not None: ts.scale = int(scale) if branch_vert_margin is not None: ts.branch_vertical_margin = float(branch_vert_margin) ts.show_scale = show_scale if ladderize: t.ladderize() t.render(fig, tree_style=ts)
'#ffee33', '#e9debb', '#ffcdf3', '#ffffff' ] pal2 = ['#a0a0a0', '#e9debb'] * 4 + ['#a0a0a0' ] + ['#a0a0a0', '#e9debb'] * 4 pal2 = ['#a0a0a0', '#e9debb'] * 8 bgcol = iter(pal16[2:]) bgcol = iter(pal2) nodestyles = {} for node in tree.iter_leaves(): binom, spp, common = mapname(node.name) if (binom, spp) in nodestyles: nodestyles[(binom, spp)][1].append(node) continue col = next(bgcol) s = ete3.NodeStyle() s["bgcolor"] = col #s["hz_line_color"] = s["vt_line_color"] = col #s["hz_line_width"] = s["vt_line_width"] = 1 nodestyles[(binom, spp)] = (s, [node]) import copy labelnodes = {} for (binom, spp), (ns, nodelist) in nodestyles.iteritems(): if len(nodelist) == 1: anc = nodelist[0] else: anc = tree.get_common_ancestor(*nodelist) #labelnodes[nodelist[len(nodelist)/2]] = mapname(nodelist[0].name) labelnodes[nodelist[0]] = mapname(nodelist[0].name)
'd1.names.nwk', 'd2_EDITED.names.nwk', 'e1.names.nwk', 'e2.names.nwk' ] #newick_dir = '/Volumes/page_lab/users/lsteitz/1000_Genomes_Male_Figures/Phylogenetic_Trees/' #newicks = ['tree.all.nwk'] yvarfile = open( '/Volumes/page_lab/users/lsteitz/1000_Genomes_Male_Figures/Phylogenetic_Trees/1000_Ys_variant_men.txt', 'r') yvars = {} for line in yvarfile: if not line.startswith('#'): data = line.split() yvars[data[0]] = data[1] varstyle = ete3.NodeStyle() varstyle['fgcolor'] = 'red' for newick in newicks: t = ete3.Tree('%s%s' % (tree_dir, newick), format=1) #Remove nodes not in my data good_leaves = [] for leaf in t.iter_leaves(): if leaf.name.split('_')[0] in yvars: good_leaves.append(leaf.name) t.prune(good_leaves) # for node in t.search_nodes(): # nodename = node.name.split('_')[0] # if node.is_leaf():
def return_treestyle_with_columns(cmapvector): ''' Need column names again to print header in order ''' [d_seq_color, d_seq_label, width_and_names] = cmapvector rect_width = [x[0] for x in width_and_names] column_names = [x[1] for x in width_and_names] label_font_size = 7 # default node (not used since it's lost with any customisation, so we create all node styles independently) ns1 = ete3.NodeStyle() ns1["size"] = 1 ns1["shape"] = "square" ns1["fgcolor"] = "101010" ns1["hz_line_type"] = ns1[ "vt_line_type"] = 0 # 0=solid, 1=dashed, 2=dotted ns1["hz_line_color"] = ns1["vt_line_color"] = "darkred" def tree_profile_layout( node ): # prepare table and other node information (local function so mind the identation) if "NORW" in (getattr(node, "submission_org_code")): this_color = "darkred" else: this_color = "#080816" node.img_style['hz_line_type'] = node.img_style[ 'vt_line_type'] = 0 # 0=solid, 1=dashed, 2=dotted node.img_style['hz_line_width'] = node.img_style['vt_line_width'] = 4 node.img_style['hz_line_color'] = node.img_style[ 'vt_line_color'] = this_color if node.is_leaf( ): # the aligned leaf is "column 0", thus traits go to column+1 node.img_style['size'] = 2 node.img_style['shape'] = "sphere" node.img_style['fgcolor'] = this_color ete3.add_face_to_node(ete3.AttrFace("name", fsize=label_font_size, text_suffix=" "), node, 0, position="aligned") for column, (rgb_val, lab, wdt) in enumerate( zip(d_seq_color[node.name], d_seq_label[node.name], rect_width)): label = { "text": lab[:10], "color": "Black", "fontsize": label_font_size - 1 } ete3.add_face_to_node(ete3.RectFace(wdt, 12, fgcolor=rgb_val, bgcolor=rgb_val, label=label), node, 2 * column + 1, position="aligned") ete3.add_face_to_node(ete3.RectFace(2, 12, fgcolor="#ffffff", bgcolor="#ffffff", label=""), node, 2 * column + 2, position="aligned") else: node.img_style['size'] = 0 ts = ete3.TreeStyle() ts.draw_guiding_lines = True # dotted line between tip and name ts.guiding_lines_color = "#f4f4f4" # "#bdb76d" ts.guiding_lines_type = 2 # 0=solid, 1=dashed, 2=dotted ts.layout_fn = tree_profile_layout ts.branch_vertical_margin = 0 ts.min_leaf_separation = 1 # Min separation, in pixels, between two adjacent branches ts.scale = 2000000 # 2e6 pixels per branch length unit (i.e. brlen=1 should be how many pixels?) ts.show_scale = False show_branch_length = True ts.show_leaf_name = False # we handle this in the layout function ## STILL dont know how to do it #ts.legend.add_face(CircleFace(10, "red"), column=0) #ts.legend.add_face(TextFace("0.5 support"), column=1) #ts.legend_position = 3 # TopLeft corner if 1, TopRight if 2, BottomLeft if 3, BottomRight if 4 for col, label in enumerate(column_names): # the first are tip labels labelFace = ete3.TextFace( label, fsize=9, fgcolor="DimGray") # fsize controls interval betweel columns labelFace.rotation = 270 labelFace.vt_align = 1 # 0 top, 1 center, 2 bottom labelFace.hz_align = 1 # 0 left, 1 center, 2 right ts.aligned_header.add_face(labelFace, 2 * col + 1) return ts
def Main(): if len(sensitivePath) > 0: sensitive_meta_data = SensitiveMetadata() metadata = ParseWorkflowResults(metadataPath) distance = read(distancePath) treeFile = "".join(read(treePath)) distanceDict = {} #store the distance matrix as rowname:list<string> for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region create box tree #region step5: tree construction treeFile = "".join(read(treePath)) t = e.Tree(treeFile) t.set_outgroup(t & "Reference") #set the tree style ts = e.TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key].plasmids: for inc in plasmid.PlasmidRepType.split(","): if (inc.lower().find("inc") > -1): if not (inc in plasmidIncs): plasmidIncs[inc] = [metadata[key].ID] else: if metadata[key].ID not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key].ID) #plasmidIncs = sorted(plasmidIncs) for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): #if its the reference branch, populate the faces with column headers index = 0 if len(sensitivePath) > 0: #sensitive metadat @ chris for sensitive_data_column in sensitive_meta_data.get_columns(): (t & "Reference").add_face(addFace(sensitive_data_column), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("SampleID"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("New?"), index, "aligned") index = index + 1 for i in range( len(plasmidIncs) ): #this loop adds the columns (aka the incs) to the reference node (t & "Reference").add_face( addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") index = index + len(plasmidIncs) (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Sequence Type"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Carbapenamases"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Plasmid Best Match"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Best Match Identity"), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds the distance matrix (t & "Reference").add_face( addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") index = index + len(distanceDict[list(distanceDict.keys())[0]]) elif (n.is_leaf() and not n.name == "Reference"): #not reference branches, populate with metadata index = 0 if len(sensitivePath) > 0: #sensitive metadata @ chris # pushing in sensitive data for sensitive_data_column in sensitive_meta_data.get_columns(): # tree uses bcids like BC18A021A_S12 # while sens meta-data uses BC18A021A # trim the "_S.*" if present bcid = str(mData.ID) if bcid.find("_S") != -1: bcid = bcid[0:bcid.find("_S")] sens_col_val = sensitive_meta_data.get_value( bcid=bcid, column_name=sensitive_data_column) n.add_face(addFace(sens_col_val), index, "aligned") index = index + 1 if (n.name.replace(".fa", "") in metadata.keys()): mData = metadata[n.name.replace(".fa", "")] else: mData = metadata["na"] n.add_face(addFace(mData.ID), index, "aligned") index = index + 1 if (mData.new == True): #new column face = e.RectFace( 30, 30, "green", "green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, index, "aligned") index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa", "") in plasmidIncs[incs]): face = e.RectFace( 30, 30, "black", "black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") index = index + len(plasmidIncs) n.add_face(addFace(mData.MLSTSpecies), index, "aligned") index = index + 1 n.add_face(addFace(mData.SequenceType), index, "aligned") index = index + 1 n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned") index = index + 1 n.add_face(addFace(mData.plasmidBestMatch), index, "aligned") index = index + 1 n.add_face(addFace(mData.plasmididentity), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds distance matrix if (n.name in distanceDict ): #make sure the column is in the distance matrice n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render(outputFile, w=5000, units="mm", tree_style=ts) #save it as a png, pdf, svg or an phyloxml
def print_tree(tree): # set node style nstyle = ete3.NodeStyle() nstyle["shape"] = "sphere" nstyle["size"] = 0 nstyle["fgcolor"] = "darkred" nstyle["vt_line_width"] = 7 nstyle["hz_line_width"] = 7 for node in tree.traverse(): if node.is_leaf(): # node.add_face(ete3.TextFace(len(node.arrangements), fsize=30), column=1, position="aligned") node.add_face(ete3.TextFace(len(node.domains), fsize=30), column=1, position="aligned") else: # node.add_face(ete3.TextFace(str(len(node.arrangements))+" ", fsize=25), column=0, position="branch-bottom") node.add_face(ete3.TextFace(str(len(node.domains)) + " ", fsize=25), column=0, position="branch-bottom") node.add_face(ete3.TextFace(node.name + " ", fsize=25), column=0, position="branch-bottom") node.add_face(ete3.TextFace("+" + str(len(node.gained_domains)), fgcolor="green", fsize=25), column=0, position="branch-top") # node.add_face(ete3.TextFace("+"+str(len(node.gained_arr)), fgcolor="green", fsize=25), column=0, position="branch-top") node.add_face(ete3.TextFace("-" + str(len(node.lost_domains)), fgcolor="red", fsize=25), column=0, position="branch-top") # node.add_face(ete3.TextFace("-"+str(len(node.lost_arr)), fgcolor="red", fsize=25), column=0, position="branch-top") node.set_style(nstyle) def layout(node): N = ete3.AttrFace("name", fsize=30) N.margin_right = 10 if node.is_leaf(): if node.name == "Orussus_abietinus": N.background.color = "lightblue" elif node.name == "Athalia_rosae": N.background.color = "lightgreen" ete3.faces.add_face_to_node(N, node, 0, position="aligned") ts = ete3.TreeStyle() ts.show_leaf_name = False ts.draw_guiding_lines = True # ts.guiding_lines_type = 0 ts.extra_branch_line_color = "black" ts.extra_branch_line_type = 1 ts.show_scale = False ts.layout_fn = layout ts.optimal_scale_level = "mid" tree.render("tree_gain_loss_domains_20160211.pdf", tree_style=ts, w=199, units="mm")
def main(treefile, outfile=None, cladelistfile=None, datafile=None, prune=False, log_dist=False, values=None, quoted_node_names=False, startmatch=False): global tot_clades tot_clades = 0 tree = ete3.Tree(treefile, format=1, quoted_node_names=quoted_node_names) if cladelistfile: with open(cladelistfile) as f: cladelist = set((line.rstrip().split('\t')[0] for line in f if \ not line.startswith('#'))) else: cladelist = set() if datafile: assert values data = pd.read_csv(datafile, sep='\t', index_col=0, header=0) assert all(val in data.columns for val in values) value_face = { val: ete3.AttrFace(val, formatter=' %.2f ', fgcolor='grey', fsize=8) for val in values } max_val = data[values].max().max() min_val = data[values].min().min() average_val = data[values].mean().mean() alt_col = ['#7FD09C', '#4cbdad'] ns = [ete3.NodeStyle(bgcolor=col) for col in alt_col] default_ns = ete3.NodeStyle(size=0) labelface = ete3.AttrFace('name') if startmatch: test = lambda node: any( node.name.startswith(clade) for clade in cladelist) and not prune else: test = lambda node: node.name in cladelist and not prune #for node in tree.traverse(): def mylayout(node): global tot_clades if log_dist: node.add_feature('orig_dist', node.dist) node.dist = log10( node.dist ) if node.dist > 0 else -log10(-node.dist) if node.dist < 0 else 0 if test(node): node.set_style(ns[0]) ns.insert(0, ns.pop()) #Cycle through colors if not node.is_leaf(): ete3.add_face_to_node(labelface, node, column=0, position='branch-right') tot_clades += 1 else: node.set_style(default_ns) if datafile and node.is_leaf(): node.add_feature('profile', [data[val][node.name] for val in values]) node.add_feature('deviation', [0.0] * len(values)) heatface = ete3.ProfileFace(max_val, min_val, average_val, width=20 * len(values), height=20, style='heatmap') ete3.add_face_to_node(heatface, node, column=1, aligned=True) for i, val in enumerate(values, start=2): node.add_feature(val, data[val][node.name]) ete3.add_face_to_node(value_face[val], node, column=i, aligned=True) if prune: tree.prune(cladelist, preserve_branch_length=True) tree.dist = 0 if outfile: tree.render(outfile, mylayout) else: tree.show(layout=mylayout) # Print summary print("Found %d clades" % tot_clades)
def main_fun(dr, ftree, fplot, fst, fld, typef, branch_scale, branch_support, show_legend, legend_box_size, max_legend_stack, legend_font_size, img_height, img_dpi, show, typecoldict): ## paths ftree = os.path.join(dr, ftree) if fplot is None: fplot = ftree.replace('.nwk', '.png') fst = os.path.join(dr, fst) ## checks # tree file is present if not os.path.exists(ftree): raise FileNotFoundError('tree file %s must be present.' % ftree) # should you proceed if the output path already exists if not utils.outcheck(fplot): return # plot file has png suffix if fplot.split('.')[-1] != 'png': raise ValueError('output file must have suffix "png".') # if info file ( for sequence types) is provided, it is a valid file path if fld: click.echo("Location file provided.") fld = os.path.join(dr, fld) if not os.path.exists(fld): raise FileNotFoundError("couldn't find the file %s." % fld) else: click.echo( "No location file! Annotation will only be for sequence types") # load tree t = ete3.Tree(ftree) # list of leaves leaves = t.get_leaf_names() ## create treestyle ts = ete3.TreeStyle() ts.show_branch_support = branch_support ts.mode = "c" ts.scale = branch_scale ### types ##################################### # table of genomes and their sequence types typedata = utils.readcsv(fst) # threshold for a type to be shown explicitly in the figure th = len(typedata) * typef # dict of sequence type with isolates type_isols = utils.split_data(data=typedata, ix=1, cixs=0) # empty list of types to be removed rmkeys = [] # empty list of such minor isolates minors = [] # for every type and its isolates for k, v in type_isols.items(): # if the type is unkown if k == 'U': # skip continue # if no. of isolates for the types are less than the above threshold if len(v) < th: # minor isolates minors.extend(v) # excluded type rmkeys.append(k) # type isolate dict with low represetation types excluded type_isols = {k: v for k, v in type_isols.items() if k not in rmkeys} # and added back as minors under type 'O'thers type_isols['O'] = minors # modified table of genome and types typedata = [[i, k] for k, v in type_isols.items() for i in v] # dict of isolate and its type if the isolate is present on the tree isol_type = {i[0]: i[1] for i in typedata if i[0] in leaves} # color representation of types isol_type_color, type_color = colbyinfo(infodict=isol_type, sorting_func=typesortingfunc) # if a color dict was explicitly provided if typecoldict is not None: tcl = typecoldict.split(',') type_color = {tcl[x]: tcl[x + 1] for x in range(0, len(tcl), 2)} isol_type_color = {k: type_color[v] for k, v in isol_type.items()} for k, v in isol_type_color.items(): if v == type_color['U']: isol_type_color[k] = 'white' if 'O' in type_color.keys() and v == type_color['O']: isol_type_color[k] = 'grey' type_color['U'] = 'white' type_color['O'] = 'grey' ############################################### # basic tree style with type annotation for n in t.traverse(): # if branch support is less than 0.5, delete the branch if n.support < 0.5: n.delete() continue n.dist = 0.1 ns = ete3.NodeStyle() if n.is_leaf(): ns['size'] = 10 if n.name in isol_type_color.keys(): ns['bgcolor'] = isol_type_color[n.name] else: ns['bgcolor'] = 'grey' else: ns['size'] = 0 n.set_style(ns) # If mapping is available, then use it to color leaves and branches if fld is not None: dmap = pandas.read_csv(fld) nrow = len(dmap) head = dmap.columns # colors for locations isol_loc = { dmap.at[x, 'accession']: dmap.at[x, 'location'] for x in range(nrow) } isol_loc_color, loc_color = colbyinfo(infodict=isol_loc) # colors for months isol_month = { dmap.at[x, 'accession']: '-'.join(dmap.at[x, 'date'].split('-')[:2]) for x in range(nrow) if dmap.at[x, 'date'].count('-') == 2 } # months months = sorted(list(set(isol_month.values()))) # dict of month names and corresponding key month_key = {} for x, i in enumerate(months): month_key[i] = x + 1 # replace months with key in the above isol_mkey = {k: month_key[v] for k, v in isol_month.items()} months = sorted(list(set(isol_mkey.values()))) nm = len(months) month_colors = seaborn.color_palette('Blues', n_colors=nm) isol_month_color = {} for k, v in isol_mkey.items(): x = months.index(v) c = month_colors[x] isol_month_color[k] = matplotlib.colors.to_hex(c) boxsize = 10 * branch_scale / 100 for n in t.traverse(): if n.name not in isol_loc_color.keys(): continue if n.is_leaf(): rct1 = ete3.RectFace(width=boxsize, height=boxsize, fgcolor='', bgcolor=isol_loc_color[n.name]) n.add_face(rct1, column=2, position='aligned') if n.name in isol_month_color.keys(): rct2 = ete3.RectFace(width=boxsize, height=boxsize, fgcolor='', bgcolor=isol_month_color[n.name]) n.add_face(rct2, column=3, position='aligned') else: n.img_style['size'] = 0 ### legend ################################## if show_legend: ts.legend_position = 3 stack_size = 0 colx = 0 for k, v in type_color.items(): rct = ete3.RectFace(legend_box_size, legend_box_size, '', v) rct.margin_left = 10 rct.margin_right = 10 txt = ete3.TextFace(k, fsize=legend_font_size) txt.margin_left = 10 txt.margin_right = 10 if stack_size > max_legend_stack: stack_size = 0 colx += 2 if stack_size == 0: rct.margin_top = 20 ts.legend.add_face(rct, column=colx) ts.legend.add_face(txt, column=colx + 1) stack_size += legend_box_size ############################################### ## output if show is not None: t.render(fplot, tree_style=ts, units='px', h=img_height, dpi=img_dpi) click.echo("{}: Tree plotting complete. Output was saved in {}".format( utils.timer(start), fplot)) else: t.show(tree_style=ts) ################
def simulate(args): """Simulation subprogram. Simulates a Galton–Watson process, with mutation probabilities according to a user defined motif model e.g. S5F """ random.seed(a=args.seed) mutation_model = mm.MutationModel(args.mutability, args.substitution) if args.lambda0 is None: args.lambda0 = [max([1, int(0.01 * len(args.sequence))])] args.sequence = args.sequence.upper() if args.sequence2 is not None: # Use the same mutation rate on both sequences if len(args.lambda0) == 1: args.lambda0 = [args.lambda0[0], args.lambda0[0]] elif len(args.lambda0) != 2: raise Exception("Only one or two lambda0 can be defined for a two " "sequence simulation.") # Require both sequences to be in frame 1: if args.frame is not None and args.frame != 1: if args.verbose: print("Warning: When simulating with two sequences they are " "truncated to be beginning at frame 1.") args.sequence = args.sequence[(args.frame - 1):(args.frame - 1 + (3 * (((len(args.sequence) - (args.frame - 1)) // 3))))] args.sequence2 = args.sequence2[(args.frame - 1):( args.frame - 1 + (3 * (((len(args.sequence2) - (args.frame - 1)) // 3))))] # Extract the bounds between sequence 1 and 2: seq_bounds = ( (0, len(args.sequence)), (len(args.sequence), len(args.sequence) + len(args.sequence2)), ) # Merge the two seqeunces to simplify future dealing with the pair: args.sequence += args.sequence2 else: seq_bounds = None trials = 1000 # this loop makes us resimulate if size too small, or backmutation for trial in range(trials): try: tree = mutation_model.simulate( args.sequence, seq_bounds=seq_bounds, progeny=lambda seq: args.lambda_, lambda0=args.lambda0, n=args.n, N=args.N, T=args.T, frame=args.frame, verbose=args.verbose, ) # this will fail if backmutations collapsed_tree = bp.CollapsedTree(tree=tree) tree.ladderize() uniques = sum(node.abundance > 0 for node in collapsed_tree.tree.traverse()) if uniques < 2: raise RuntimeError(f"collapsed tree contains {uniques} " "sampled sequences") break except RuntimeError as e: print(f"{e}, trying again") else: raise if trial == trials - 1: raise RuntimeError(f"{trials} attempts exceeded") # In the case of a sequence pair print them to separate files: if args.sequence2 is not None: fh1 = open(args.outbase + ".simulation_seq1.fasta", "w") fh2 = open(args.outbase + ".simulation_seq2.fasta", "w") fh1.write(">root\n") fh1.write(args.sequence[seq_bounds[0][0]:seq_bounds[0][1]] + "\n") fh2.write(">root\n") fh2.write(args.sequence[seq_bounds[1][0]:seq_bounds[1][1]] + "\n") for leaf in tree.iter_leaves(): if leaf.abundance != 0: fh1.write(">" + leaf.name + "\n") fh1.write(leaf.sequence[seq_bounds[0][0]:seq_bounds[0][1]] + "\n") fh2.write(">" + leaf.name + "\n") fh2.write(leaf.sequence[seq_bounds[1][0]:seq_bounds[1][1]] + "\n") else: with open(args.outbase + ".simulation.fasta", "w") as f: f.write(">root\n") f.write(args.sequence + "\n") for leaf in tree.iter_leaves(): if leaf.abundance != 0: f.write(">" + leaf.name + "\n") f.write(leaf.sequence + "\n") # some observable simulation stats to write abundance, distance_from_root, degree = zip(*[( node.abundance, utils.hamming_distance(node.sequence, args.sequence), sum( utils.hamming_distance(node.sequence, node2.sequence) == 1 for node2 in collapsed_tree.tree.traverse() if node2.abundance and node2 is not node), ) for node in collapsed_tree.tree.traverse() if node.abundance]) stats = pd.DataFrame({ "genotype abundance": abundance, "Hamming distance to root genotype": distance_from_root, "Hamming neighbor genotypes": degree, }) stats.to_csv(args.outbase + ".simulation.stats.tsv", sep="\t", index=False) print(f"{sum(leaf.abundance for leaf in collapsed_tree.tree.traverse())}" " simulated observed sequences") # render the full lineage tree ts = ete3.TreeStyle() ts.rotation = 90 ts.show_leaf_name = False ts.show_scale = False colors = {} palette = ete3.SVG_COLORS palette -= set(["black", "white", "gray"]) palette = itertools.cycle(list(palette)) # <-- circular iterator colors[tree.sequence] = "gray" for n in tree.traverse(): nstyle = ete3.NodeStyle() nstyle["size"] = 10 if args.plotAA: if n.AAseq not in colors: colors[n.AAseq] = next(palette) nstyle["fgcolor"] = colors[n.AAseq] else: if n.sequence not in colors: colors[n.sequence] = next(palette) nstyle["fgcolor"] = colors[n.sequence] n.set_style(nstyle) # this makes the rendered branch lenths correspond to time for node in tree.iter_descendants(): node.dist = node.time - node.up.time tree.render(args.outbase + ".simulation.lineage_tree.svg", tree_style=ts) # render collapsed tree # create an id-wise colormap # NOTE: node.name can be a set colormap = { node.name: colors[node.sequence] for node in collapsed_tree.tree.traverse() } collapsed_tree.write(args.outbase + ".simulation.collapsed_tree.p") collapsed_tree.render( args.outbase + ".simulation.collapsed_tree.svg", idlabel=args.idlabel, colormap=colormap, frame=args.frame, ) # print colormap to file with open(args.outbase + ".simulation.collapsed_tree.colormap.tsv", "w") as f: for name, color in colormap.items(): f.write((name if isinstance(name, str) else ",".join(name)) + "\t" + color + "\n")
def main(term, outbase=None, outfmt=None, nhx=False, show_img=False, recurs=0): graphics_fmts = set(outfmt).intersection(('png', 'jpg', 'svg', 'pdf')) \ if outfmt is not None else set() if graphics_fmts or (not outbase and not outfmt): # Define only when the above conditions are verified, so that you # can fallback on text methods when PyQt is not installed. if show_img: #async def? def add_img(node): nodeimg = getattr(node, 'img', None) if nodeimg: if nodeimg.startswith('//'): nodeimg = 'https:' + nodeimg #await ? ete3.add_face_to_node(ete3.ImgFace( nodeimg, width=int(node.imgwidth), height=int(node.imgheight), is_url=True), node, column=1, position='branch-right') else: def add_img(node): pass ns = ete3.NodeStyle(size=0) dashed_branch = ete3.NodeStyle(size=0, hz_line_type=1) def mylayout(node): node.set_style(ns) if not node.is_leaf(): ete3.add_face_to_node(ete3.TextFace(node.name), node, column=0, position='branch-top') ete3.add_face_to_node(ete3.TextFace('\n'.join( getattr(node, 'info', []))), node, column=0, position='branch-bottom') if node.support <= 0.5: node.set_style(dashed_branch) add_img(node) treesoups = get_wiki_tree(term) logger.info("Found %d phylogenetic trees", len(treesoups)) if outfmt: if outbase: outbase += '-%d' outputfuncs = [] if 'nwk' in outfmt: features = ['support', 'info', 'link', 'img'] if nhx else None def output(tree, i): # format 8: all names outfile = (outbase % i + '.nwk') if outbase else None txt = tree.write(outfile=outfile, format=8, format_root_node=True, features=features) if txt is not None: print(txt) outputfuncs.append(output) if 'ascii' in outfmt: # Always to stdout def output(tree, i): print(tree.get_ascii()) outputfuncs.append(output) if graphics_fmts and outbase: def output(tree, i): for fmt in graphics_fmts: tree.render((outbase % i) + '.' + fmt, mylayout) outputfuncs.append(output) def outputs(tree, i): for outfunc in outputfuncs: outfunc(tree, i) else: def outputs(tree, i): tree.show(mylayout, name=('Tree n°%d: %s' % (i, tree.name))) for i, treesoup in enumerate(treesoups): tree, = build_tree(treesoup, recurs) outputs(tree, i)
'../rooted_partitions-with_named_branches.treefile', format=1) final_transfers = { '003575': [ 'm83 = LCA[GCA000012905_ABA79637, GCA001459775_CUU43052]: Transfer, Mapping --> n262, Recipient --> n297' ], '004119': [ 'm16 = LCA[GCA000019945_ACB79599, GCA900100665_SDG33339]: Transfer, Mapping --> n134, Recipient --> n293' ] } genome_table = pd.read_table('../selected_genomes.tab', index_col=31) genome_table.index = [ index.replace('_', '').split('.')[0] for index in genome_table.index ] reticulation_style = ete3.NodeStyle() donor_style = ete3.NodeStyle() recipient_style = ete3.NodeStyle() #reticulation_style["fgcolor"] = "#0f0f0f" #reticulation_style["size"] = 0 reticulation_style["vt_line_color"] = "#ff0000" reticulation_style["hz_line_color"] = "#ff0000" reticulation_style["vt_line_width"] = 5 reticulation_style["hz_line_width"] = 5 reticulation_style["vt_line_type"] = 1 reticulation_style["hz_line_type"] = 0 donor_style['bgcolor'] = 'LightSteelBlue' recipient_style['bgcolor'] = 'DarkSeaGreen'
def run_FastTree(self, ctx, params): """ Method for Tree building of either DNA or PROTEIN sequences ** ** input_type: MSA ** output_type: Tree :param params: instance of type "FastTree_Params" (FastTree Input Params) -> structure: parameter "workspace_name" of type "workspace_name" (** The workspace object refs are of form: ** ** objects = ws.get_objects([{'ref': params['workspace_id']+'/'+params['obj_name']}]) ** ** "ref" means the entire name combining the workspace id and the object name ** "id" is a numerical identifier of the workspace or object, and should just be used for workspace ** "name" is a string identifier of a workspace or object. This is received from Narrative.), parameter "desc" of String, parameter "input_ref" of type "data_obj_ref", parameter "output_name" of type "data_obj_name", parameter "species_tree_flag" of Long, parameter "intree_ref" of type "data_obj_ref", parameter "fastest" of Long, parameter "pseudo" of Long, parameter "gtr" of Long, parameter "wag" of Long, parameter "noml" of Long, parameter "nome" of Long, parameter "cat" of Long, parameter "nocat" of Long, parameter "gamma" of Long :returns: instance of type "FastTree_Output" (FastTree Output) -> structure: parameter "report_name" of type "data_obj_name", parameter "report_ref" of type "data_obj_ref", parameter "output_ref" of type "data_obj_ref" """ # ctx is the context object # return variables are: returnVal #BEGIN run_FastTree # init # dfu = DFUClient(self.callbackURL) console = [] invalid_msgs = [] self.log(console, 'Running run_FastTree with params=') self.log(console, "\n" + pformat(params)) report = '' # report = 'Running run_FastTree with params=' # report += "\n"+pformat(params) #### do some basic checks # if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'input_ref' not in params: raise ValueError('input_ref parameter is required') if 'output_name' not in params: raise ValueError('output_name parameter is required') #### Get the input_ref MSA object ## try: ws = workspaceService(self.workspaceURL, token=ctx['token']) objects = ws.get_objects([{'ref': params['input_ref']}]) data = objects[0]['data'] info = objects[0]['info'] input_name = info[1] input_type_name = info[2].split('.')[1].split('-')[0] except Exception as e: raise ValueError( 'Unable to fetch input_ref object from workspace: ' + str(e)) #to get the full stack trace: traceback.format_exc() if input_type_name == 'MSA': MSA_in = data # DEBUG #for field in MSA_in.keys(): # self.log(console, "MSA key: '"+field+"'") row_order = [] default_row_labels = dict() if 'row_order' in MSA_in: row_order = MSA_in['row_order'] else: row_order = sorted(MSA_in['alignment'].keys()) if 'default_row_labels' in MSA_in: default_row_labels = MSA_in['default_row_labels'] else: for row_id in row_order: default_row_labels[row_id] = row_id if len(row_order) < 2: self.log( invalid_msgs, "must have multiple records in MSA: " + params['input_ref']) # DEBUG #for row_id in row_order: # self.log(console, "row_id: '"+row_id+"' default_row_label: '"+default_row_labels[row_id]+"'") # export features to FASTA file new_ids = dict() input_MSA_file_path = os.path.join(self.scratch, input_name + ".fasta") self.log(console, 'writing fasta file: ' + input_MSA_file_path) records = [] for row_id in row_order: # take care of characters that will mess up newick and/or fasttree row_id_disp = re.sub('\s', '_', row_id) row_id_disp = re.sub('\/', '%' + '/'.encode("hex"), row_id_disp) row_id_disp = re.sub(r'\\', '%' + '\\'.encode("hex"), row_id_disp) row_id_disp = re.sub('\(', '%' + '('.encode("hex"), row_id_disp) row_id_disp = re.sub('\)', '%' + ')'.encode("hex"), row_id_disp) row_id_disp = re.sub('\[', '%' + '['.encode("hex"), row_id_disp) row_id_disp = re.sub('\]', '%' + ']'.encode("hex"), row_id_disp) row_id_disp = re.sub('\:', '%' + ':'.encode("hex"), row_id_disp) row_id_disp = re.sub('\;', '%' + ';'.encode("hex"), row_id_disp) row_id_disp = re.sub('\|', '%' + ';'.encode("hex"), row_id_disp) new_ids[row_id] = row_id_disp #self.log(console,"row_id: '"+row_id+"' row_id_disp: '"+row_id_disp+"'") # DEBUG #self.log(console,"alignment: '"+MSA_in['alignment'][row_id]+"'") # DEBUG # using SeqIO makes multiline sequences. FastTree doesn't like #record = SeqRecord(Seq(MSA_in['alignment'][row_id]), id=row_id, description=default_row_labels[row_id]) #records.append(record) #SeqIO.write(records, input_MSA_file_path, "fasta") #records.extend(['>'+row_id, records.extend( ['>' + row_id_disp, MSA_in['alignment'][row_id]]) with open(input_MSA_file_path, 'w', 0) as input_MSA_file_handle: input_MSA_file_handle.write("\n".join(records) + "\n") # DEBUG #self.log(console, "MSA INPUT:") #self.log(console, "\n".join(records)+"\n") # DEBUG # Determine whether nuc or protein sequences # NUC_MSA_pattern = re.compile( "^[\.\-_ACGTUXNRYSWKMBDHVacgtuxnryswkmbdhv \t\n]+$") all_seqs_nuc = True for row_id in row_order: #self.log(console, row_id+": '"+MSA_in['alignment'][row_id]+"'") if NUC_MSA_pattern.match(MSA_in['alignment'][row_id]) == None: all_seqs_nuc = False break # Missing proper input_type # else: raise ValueError('Cannot yet handle input_name type of: ' + type_name) # Get start tree (if any) # if 'intree_ref' in params and params['intree_ref'] != None and params[ 'intree_ref'] != '': try: ws = workspaceService(self.workspaceURL, token=ctx['token']) objects = ws.get_objects([{'ref': params['intree_ref']}]) data = objects[0]['data'] info = objects[0]['info'] intree_name = info[1] intree_type_name = info[2].split('.')[1].split('-')[0] except Exception as e: raise ValueError( 'Unable to fetch intree_ref object from workspace: ' + str(e)) #to get the full stack trace: traceback.format_exc() if intree_type_name == 'Tree': tree_in = data intree_newick_file_path = os.path.join(self.scratch, intree_name + ".newick") self.log(console, 'writing intree file: ' + intree_newick_file_path) intree_newick_file_handle = open(intree_newick_file_path, 'w', 0) intree_newick_file_handle.write(tree_in['tree']) intree_newick_file_handle.close() else: raise ValueError('Cannot yet handle intree type of: ' + type_name) # DEBUG: check the MSA file contents # with open(input_MSA_file_path, 'r', 0) as input_MSA_file_handle: # for line in input_MSA_file_handle: # #self.log(console,"MSA_LINE: '"+line+"'") # too big for console # self.log(invalid_msgs,"MSA_LINE: '"+line+"'") # validate input data # if len(invalid_msgs) > 0: # load the method provenance from the context object self.log(console, "SETTING PROVENANCE") # DEBUG provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] # add additional info to provenance here, in this case the input data object reference provenance[0]['input_ws_objects'] = [] provenance[0]['input_ws_objects'].append(params['input_ref']) if 'intree_ref' in params and params[ 'intree_ref'] != None and params['intree_ref'] != '': provenance[0]['input_ws_objects'].append(params['intree_ref']) provenance[0]['service'] = 'kb_fasttree' provenance[0]['method'] = 'run_FastTree' # report report += "FAILURE\n\n" + "\n".join(invalid_msgs) + "\n" reportObj = {'objects_created': [], 'text_message': report} reportName = 'fasttree_report_' + str(uuid.uuid4()) report_obj_info = ws.save_objects({ # 'id':info[6], 'workspace': params['workspace_name'], 'objects': [{ 'type': 'KBaseReport.Report', 'data': reportObj, 'name': reportName, 'meta': {}, 'hidden': 1, 'provenance': provenance }] })[0] self.log(console, "BUILDING RETURN OBJECT") returnVal = { 'report_name': reportName, 'report_ref': str(report_obj_info[6]) + '/' + str(report_obj_info[0]) + '/' + str(report_obj_info[4]), 'output_ref': None } self.log(console, "run_FastTree DONE") return [returnVal] ### Construct the command # # e.g. fasttree -in <fasta_in> -out <fasta_out> -maxiters <n> -haxours <h> # fasttree_cmd = [self.FASTTREE_bin] # fasttree_cmd = [] # DEBUG # check for necessary files if not os.path.isfile(self.FASTTREE_bin): raise ValueError("no such file '" + self.FASTTREE_bin + "'") if not os.path.isfile(input_MSA_file_path): raise ValueError("no such file '" + input_MSA_file_path + "'") if not os.path.getsize(input_MSA_file_path) > 0: raise ValueError("empty file '" + input_MSA_file_path + "'") # DEBUG # with open(input_MSA_file_path,'r',0) as input_MSA_file_handle: # for line in input_MSA_file_handle: # #self.log(console,"MSA LINE: '"+line+"'") # too big for console # self.log(invalid_msgs,"MSA LINE: '"+line+"'") # set the output path timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) output_dir = os.path.join(self.scratch, 'output.' + str(timestamp)) if not os.path.exists(output_dir): os.makedirs(output_dir) output_newick_file_path = os.path.join( output_dir, params['output_name'] + '.newick') # This doesn't work for some reason # fasttree_cmd.append('-out') # fasttree_cmd.append(output_newick_file_path) # options #fasttree_cmd.append('-quiet') fasttree_cmd.append('-nopr') if 'fastest' in params and params['fastest'] != None and params[ 'fastest'] != 0: fasttree_cmd.append('-fastest') if 'pseudo' in params and params['pseudo'] != None and params[ 'pseudo'] != 0: fasttree_cmd.append('-pseudo') if 'intree_ref' in params and params['intree_ref'] != None and params[ 'intree_ref'] != '': fasttree_cmd.append('-intree') fasttree_cmd.append(intree_newick_file_path) if all_seqs_nuc and 'gtr' in params and params[ 'gtr'] != None and params['gtr'] != 0: fasttree_cmd.append('-gtr') if not all_seqs_nuc and 'wag' in params and params[ 'wag'] != None and params['wag'] != 0: fasttree_cmd.append('-wag') if 'noml' in params and params['noml'] != None and params['noml'] != 0: fasttree_cmd.append('-noml') if 'nome' in params and params['nome'] != None and params['nome'] != 0: fasttree_cmd.append('-nome') if 'nocat' in params and params['nocat'] != None and params[ 'nocat'] != 0: fasttree_cmd.append('-nocat') elif not all_seqs_nuc and 'cat' in params and params[ 'cat'] != None and params['cat'] > 0: # DEBUG # elif 'cat' in params and params['cat'] != None and params['cat'] > 0: fasttree_cmd.append('-cat') fasttree_cmd.append(str(params['cat'])) if 'gamma' in params and params['gamma'] != None and params[ 'gamma'] != 0: fasttree_cmd.append('-gamma') if all_seqs_nuc: fasttree_cmd.append('-nt') # better (meaning it works) to write MSA to STDIN (below) # fasttree_cmd.append('<') # fasttree_cmd.append(input_MSA_file_path) fasttree_cmd.append('>') fasttree_cmd.append(output_newick_file_path) # Run FASTTREE, capture output as it happens # self.log(console, 'RUNNING FASTTREE:') self.log(console, ' ' + ' '.join(fasttree_cmd)) # self.log(console, ' '+self.FASTTREE_bin+' '+' '.join(fasttree_cmd)) # report += "\n"+'running FASTTREE:'+"\n" # report += ' '+' '.join(fasttree_cmd)+"\n" # FastTree requires shell=True in order to see input data env = os.environ.copy() # p = subprocess.Popen(fasttree_cmd, \ joined_fasttree_cmd = ' '.join( fasttree_cmd ) # redirect out doesn't work with subprocess unless you join command first p = subprocess.Popen([joined_fasttree_cmd], \ cwd = self.scratch, \ stdin = subprocess.PIPE, \ stdout = subprocess.PIPE, \ stderr = subprocess.PIPE, \ shell = True, \ env = env) # stdout = subprocess.PIPE, \ # executable = '/bin/bash' ) # p = subprocess.Popen(fasttree_cmd, \ # cwd = self.scratch, \ # stdout = subprocess.PIPE, \ # stderr = subprocess.STDOUT, \ # shell = True, \ # env = env, \ # executable = self.FASTTREE_bin ) # shell = True, \ # seems necessary? # stdout = subprocess.PIPE, \ # stdout = output_newick_file_path, \ # write MSA to process for FastTree # with open(input_MSA_file_path, 'r', 0) as input_MSA_file_handle: for line in input_MSA_file_handle: p.stdin.write(line) p.stdin.close() p.wait() # Read output # while True: line = p.stdout.readline() #line = p.stderr.readline() if not line: break self.log(console, line.replace('\n', '')) p.stdout.close() #p.stderr.close() p.wait() self.log(console, 'return code: ' + str(p.returncode)) if p.returncode != 0: raise ValueError('Error running FASTTREE, return code: ' + str(p.returncode) + '\n\n' + '\n'.join(console)) # Check that FASTREE produced output # if not os.path.isfile(output_newick_file_path): raise ValueError("failed to create FASTTREE output: " + output_newick_file_path) elif not os.path.getsize(output_newick_file_path) > 0: raise ValueError("created empty file for FASTTREE output: " + output_newick_file_path) # load the method provenance from the context object # self.log(console, "SETTING PROVENANCE") # DEBUG provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] # add additional info to provenance here, in this case the input data object reference provenance[0]['input_ws_objects'] = [] provenance[0]['input_ws_objects'].append(params['input_ref']) if 'intree_ref' in params and params['intree_ref'] != None and params[ 'intree_ref'] != '': provenance[0]['input_ws_objects'].append(params['intree_ref']) provenance[0]['service'] = 'kb_fasttree' provenance[0]['method'] = 'run_FastTree' # Upload results # if len(invalid_msgs) == 0: self.log(console, "UPLOADING RESULTS") # DEBUG tree_name = params['output_name'] tree_description = params['desc'] tree_type = 'GeneTree' if 'species_tree_flag' in params and params[ 'species_tree_flag'] != None and params[ 'species_tree_flag'] != 0: tree_type = 'SpeciesTree' with open(output_newick_file_path, 'r', 0) as output_newick_file_handle: output_newick_buf = output_newick_file_handle.read() output_newick_buf = output_newick_buf.rstrip() if not output_newick_buf.endswith(';'): output_newick_buf += ';' self.log(console, "\nNEWICK:\n" + output_newick_buf + "\n") # Extract info from MSA # tree_attributes = None default_node_labels = None ws_refs = None kb_refs = None leaf_list = None if default_row_labels: default_node_labels = dict() leaf_list = [] for row_id in default_row_labels.keys(): new_row_id = new_ids[row_id] #default_node_labels[row_id] = default_row_labels[row_id] default_node_labels[new_row_id] = default_row_labels[ row_id] leaf_list.append(new_row_id) if 'ws_refs' in MSA_in.keys() and MSA_in['ws_refs'] != None: ws_refs = MSA_in['ws_refs'] if 'kb_refs' in MSA_in.keys() and MSA_in['kb_refs'] != None: kb_refs = MSA_in['kb_refs'] # Build output_Tree structure # output_Tree = { 'name': tree_name, 'description': tree_description, 'type': tree_type, 'tree': output_newick_buf } if tree_attributes != None: output_Tree['tree_attributes'] = tree_attributes if default_node_labels != None: output_Tree['default_node_labels'] = default_node_labels if ws_refs != None: output_Tree['ws_refs'] = ws_refs if kb_refs != None: output_Tree['kb_refs'] = kb_refs if leaf_list != None: output_Tree['leaf_list'] = leaf_list # Store output_Tree # try: new_obj_info = ws.save_objects({ 'workspace': params['workspace_name'], 'objects': [{ 'type': 'KBaseTrees.Tree', 'data': output_Tree, 'name': params['output_name'], 'meta': {}, 'provenance': provenance }] })[0] except Exception as e: raise ValueError('Unable to save tree ' + params['output_name'] + ' object to workspace ' + str(params['workspace_name']) + ': ' + str(e)) #to get the full stack trace: traceback.format_exc() # If input data is invalid # self.log(console, "BUILDING REPORT") # DEBUG if len(invalid_msgs) != 0: reportName = 'fasttree_report_' + str(uuid.uuid4()) report += "FAILURE\n\n" + "\n".join(invalid_msgs) + "\n" reportObj = {'objects_created': [], 'text_message': report} report_obj_info = ws.save_objects({ #'id':info[6], 'workspace': params['workspace_name'], 'objects': [{ 'type': 'KBaseReport.Report', 'data': reportObj, 'name': reportName, 'meta': {}, 'hidden': 1, 'provenance': provenance }] })[0] returnVal = { 'report_name': reportName, 'report_ref': str(report_obj_info[6]) + '/' + str(report_obj_info[0]) + '/' + str(report_obj_info[4]), } return [returnVal] # Upload newick and newick labels # newick_labels_file = params['output_name'] + '-labels.newick' output_newick_labels_file_path = os.path.join(output_dir, newick_labels_file) mod_newick_buf = output_newick_buf for row_id in new_ids: new_id = new_ids[row_id] label = default_node_labels[new_id] label = re.sub('\s', '_', label) label = re.sub('\/', '%' + '/'.encode("hex"), label) label = re.sub(r'\\', '%' + '\\'.encode("hex"), label) label = re.sub('\(', '%' + '('.encode("hex"), label) label = re.sub('\)', '%' + ')'.encode("hex"), label) label = re.sub('\[', '%' + '['.encode("hex"), label) label = re.sub('\]', '%' + ']'.encode("hex"), label) label = re.sub('\:', '%' + ':'.encode("hex"), label) label = re.sub('\;', '%' + ';'.encode("hex"), label) label = re.sub('\|', '%' + ';'.encode("hex"), label) mod_newick_buf = re.sub('\(' + new_id + '\:', '(' + label + ':', mod_newick_buf) mod_newick_buf = re.sub('\,' + new_id + '\:', ',' + label + ':', mod_newick_buf) #self.log(console, "new_id: '"+new_id+"' label: '"+label+"'") # DEBUG mod_newick_buf = re.sub('_', ' ', mod_newick_buf) with open(output_newick_labels_file_path, 'w', 0) as output_newick_labels_file_handle: output_newick_labels_file_handle.write(mod_newick_buf) # upload try: newick_upload_ret = dfu.file_to_shock({ 'file_path': output_newick_file_path, #'pack': 'zip'}) 'make_handle': 0 }) except: raise ValueError('error uploading newick file to shock') try: newick_labels_upload_ret = dfu.file_to_shock({ 'file_path': output_newick_labels_file_path, #'pack': 'zip'}) 'make_handle': 0 }) except: raise ValueError('error uploading newick labels file to shock') # Create html with tree image # timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) html_output_dir = os.path.join(self.scratch, 'output_html.' + str(timestamp)) if not os.path.exists(html_output_dir): os.makedirs(html_output_dir) html_file = params['output_name'] + '.html' png_file = params['output_name'] + '.png' pdf_file = params['output_name'] + '.pdf' output_html_file_path = os.path.join(html_output_dir, html_file) output_png_file_path = os.path.join(html_output_dir, png_file) output_pdf_file_path = os.path.join(output_dir, pdf_file) # init ETE3 objects t = ete3.Tree(mod_newick_buf) ts = ete3.TreeStyle() # customize ts.show_leaf_name = True ts.show_branch_length = False ts.show_branch_support = True #ts.scale = 50 # 50 pixels per branch length unit ts.branch_vertical_margin = 5 # pixels between adjacent branches ts.title.add_face(ete3.TextFace(params['output_name'] + ": " + params['desc'], fsize=10), column=0) node_style = ete3.NodeStyle() node_style["fgcolor"] = "#606060" # for node balls node_style["size"] = 10 # for node balls (gets reset based on support) node_style["vt_line_color"] = "#606060" node_style["hz_line_color"] = "#606060" node_style["vt_line_width"] = 2 node_style["hz_line_width"] = 2 node_style["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted node_style["hz_line_type"] = 0 leaf_style = ete3.NodeStyle() leaf_style["fgcolor"] = "#ffffff" # for node balls leaf_style["size"] = 2 # for node balls (we're using it to add space) leaf_style["vt_line_color"] = "#606060" # unecessary leaf_style["hz_line_color"] = "#606060" leaf_style["vt_line_width"] = 2 leaf_style["hz_line_width"] = 2 leaf_style["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted leaf_style["hz_line_type"] = 0 for n in t.traverse(): if n.is_leaf(): style = leaf_style else: style = ete3.NodeStyle() for k in node_style.keys(): style[k] = node_style[k] if n.support > 0.95: style["size"] = 6 elif n.support > 0.90: style["size"] = 5 elif n.support > 0.80: style["size"] = 4 else: style["size"] = 2 n.set_style(style) # save images dpi = 300 img_units = "in" img_pix_width = 1200 img_in_width = round(float(img_pix_width) / float(dpi), 1) img_html_width = img_pix_width // 2 t.render(output_png_file_path, w=img_in_width, units=img_units, dpi=dpi, tree_style=ts) t.render(output_pdf_file_path, w=img_in_width, units=img_units, tree_style=ts) # dpi irrelevant # make html html_report_lines = [] html_report_lines += ['<html>'] html_report_lines += [ '<head><title>KBase FastTree-2: ' + params['output_name'] + '</title></head>' ] html_report_lines += ['<body bgcolor="white">'] html_report_lines += [ '<img width=' + str(img_html_width) + ' src="' + png_file + '">' ] html_report_lines += ['</body>'] html_report_lines += ['</html>'] html_report_str = "\n".join(html_report_lines) with open(output_html_file_path, 'w', 0) as html_handle: html_handle.write(html_report_str) # upload images and html try: png_upload_ret = dfu.file_to_shock({ 'file_path': output_png_file_path, #'pack': 'zip'}) 'make_handle': 0 }) except: raise ValueError('error uploading png file to shock') try: pdf_upload_ret = dfu.file_to_shock({ 'file_path': output_pdf_file_path, #'pack': 'zip'}) 'make_handle': 0 }) except: raise ValueError('error uploading pdf file to shock') try: html_upload_ret = dfu.file_to_shock({ 'file_path': html_output_dir, 'make_handle': 0, 'pack': 'zip' }) except: raise ValueError('error uploading png file to shock') # Create report obj # reportName = 'blast_report_' + str(uuid.uuid4()) #report += output_newick_buf+"\n" reportObj = { 'objects_created': [], #'text_message': '', # or is it 'message'? 'message': '', # or is it 'text_message'? 'direct_html': '', 'direct_html_link_index': None, 'file_links': [], 'html_links': [], 'workspace_name': params['workspace_name'], 'report_object_name': reportName } reportObj['objects_created'].append({ 'ref': str(params['workspace_name']) + '/' + str(params['output_name']), 'description': params['output_name'] + ' Tree' }) reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{ 'shock_id': html_upload_ret['shock_id'], 'name': html_file, 'label': params['output_name'] + ' HTML' }] reportObj['file_links'] = [{ 'shock_id': newick_upload_ret['shock_id'], 'name': params['output_name'] + '.newick', 'label': params['output_name'] + ' NEWICK' }, { 'shock_id': newick_labels_upload_ret['shock_id'], 'name': params['output_name'] + '-labels.newick', 'label': params['output_name'] + ' NEWICK (with labels)' }, { 'shock_id': png_upload_ret['shock_id'], 'name': params['output_name'] + '.png', 'label': params['output_name'] + ' PNG' }, { 'shock_id': pdf_upload_ret['shock_id'], 'name': params['output_name'] + '.pdf', 'label': params['output_name'] + ' PDF' }] SERVICE_VER = 'release' reportClient = KBaseReport(self.callbackURL, token=ctx['token'], service_ver=SERVICE_VER) report_info = reportClient.create_extended_report(reportObj) # Done # self.log(console, "BUILDING RETURN OBJECT") returnVal = { 'report_name': report_info['name'], 'report_ref': report_info['ref'], 'output_ref': str(new_obj_info[6]) + '/' + str(new_obj_info[0]) + '/' + str(new_obj_info[4]) } self.log(console, "run_FastTree DONE") #END run_FastTree # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method run_FastTree return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def ete3_pdf_tree_output(self): output_tree = ete3.Tree(self.ori_tree_string) output_tree.ladderize() # treestyle ts = ete3.TreeStyle() ts.show_leaf_name = False # generate color scheme for taxon_to_clusterid if len(self.clusterid_to_taxa.keys()) > 1: clusterid_to_color = self.generate_color_scheme( self.clusterid_to_taxa.keys()) else: clusterid_to_color = {self.clusterid_to_taxa.keys()[0]: '#ff2929'} for n, node in enumerate(output_tree.traverse(strategy='levelorder')): if n == 0: try: ts.scale_length = float('{:.3f}'.format( node.get_farthest_leaf()[-1] / 10)) except: pass if node.is_leaf(): # color branches ns = ete3.NodeStyle() ns["size"] = 0 # no node shape taxon = node.name if taxon in self.taxon_to_clusterid: clusterid = self.taxon_to_clusterid[taxon] ns["hz_line_color"] = clusterid_to_color[clusterid] # write taxon names aligned to the right taxon_name = ete3.TextFace( taxon, ftype='Arial', fsize=2, bold=True, fgcolor=clusterid_to_color[clusterid]) else: # write taxon names aligned to the right taxon_name = ete3.TextFace(taxon, ftype='Arial', fsize=2, fstyle="italic") node.set_style(ns) taxon_name.margin_left = 2 node.add_face(taxon_name, column=0, position='branch-right') else: ns = ete3.NodeStyle() ns["size"] = 0 # no node shape # set node style node.set_style(ns) heatmap_headers = ['Cluster-ID'] output_tree = self.generate_heatmap(output_tree, self.taxon_to_clusterid, clusterid_to_color) # heatmap header for lh_index, legend_header in enumerate(heatmap_headers): header_face = ete3.TextFace(legend_header, ftype='Arial', fsize=2) header_face.hz_align = 1 header_face.vt_align = 1 header_face.margin_left = 5 header_face.margin_right = 5 ts.aligned_header.add_face(header_face, lh_index) # render as pdf output_tree.render('pdftree_{}.pdf'.format(self.outfname), tree_style=ts)
def evolve_tree(newick, dup_mut=0.00043, del_mut=0.0, dup_rev=0.00086, del_rev=0.0): t = copy.deepcopy(newick) mutstyle = ete3.NodeStyle() mutstyle['fgcolor'] = 'red' gen_per_snp = 1 / ( 25 * .0076 ) #Generations per unit of branch length/SNP; .0076 is SNPs per year, 25 is years per generation for node in t.traverse(): if node.is_root(): node.add_feature('cnv_state', 'reference') continue node.add_feature('cnv_state', node.up.cnv_state) orig_state = node.cnv_state ### ##Full generational simulation; use when multiple mutations per branch are allowed #for generation in xrange(int(node.dist * gen_per_snp)): # evolve_node(node, dup_mut=dup_mut, del_mut=del_mut, dup_rev=dup_rev, del_rev=del_rev) ### ### #Shortcut simulation; more accurate times since generations aren't rounded down #Right now, does NOT WORK for multi-parameter model #rand_draw = random.random() #if rand_draw >= (1 - dup_mut) ** (max(0.5, node.dist) * gen_per_snp): #NEW: LEN 0 NODES ARE TREATED AS LEN 0.5! # node.cnv_state = 'deletion' ### ### #Generational mutation including fractional generations #Works for multi-parameter model gens = int(node.dist * gen_per_snp) frac_gen = node.dist * gen_per_snp - gens #The fractional remainder of a generation for gen in xrange(gens): rand_draw = random.random() if node.cnv_state == 'reference': if rand_draw < dup_mut: node.cnv_state = 'mutation' else: if rand_draw < dup_rev: node.cnv_state = 'reference' rand_draw = random.random() if node.cnv_state == 'reference': if rand_draw >= (1 - dup_mut)**frac_gen: node.cnv_state = 'mutation' else: if rand_draw >= (1 - dup_rev)**frac_gen: node.cnv_state = 'reference' ''' if node.cnv_state != orig_state: node.set_style(mutstyle) if node.is_leaf() and node.cnv_state != 'reference': node.add_face(ete3.TextFace(' %s' %(node.cnv_state[:3])), 1, 'branch-right') node.cnv_state = 'mutation' ''' return t
def tree_draw(tree_file, tree_name=None, order_vector_file=None, cell_colors_file=None, clustering_colors_file=None, clustering_sizes_file=None, intermediate_node_sizes_file=None, intermediate_node_labels_file=None, leaf_labels_file=None, legend_file=None, duplicate_file=None, tree_scale='linear', tree_rotation=True, font_size=7, font_legend=7, node_size=3, scale_rate=None, distance_factor=1, y_scale=True): t = ete3.Tree(newick=tree_file, format=1) ts = ete3.TreeStyle() if tree_rotation: ts.rotation = 90 ts.show_leaf_name = True ts.show_scale = False ts.scale = 1 if tree_name: ts.title.add_face(ete3.TextFace(tree_name, fsize=20), column=0) styles = {} max_dist = 0 # initialize all nodes and branches for n in t.traverse(): styles[n.name] = dict() styles[n.name]['style'] = ete3.NodeStyle() styles[n.name]['style']['fgcolor'] = 'black' styles[n.name]['style']["vt_line_width"] = 2 styles[n.name]['style']["hz_line_width"] = 1 max_dist = max(max_dist, n.dist) # print (max_dist) # calculate the scale for the tree (log, linear and right size) if tree_scale == 'log': max_dist = 0 root = t.get_tree_root() last_leaf = root.get_farthest_leaf() ts.y_axis['scale_min_value'] = root.dist ts.y_axis['scale_max_value'] = last_leaf[1] for n in t.traverse(): if tree_scale == 'log': if n == root: styles[n.name]['dist'] = 0 else: father_path = 0 for ancestor in n.get_ancestors(): father_path += styles[ancestor.name]['dist'] dist = math.log10(n.get_distance(root) * distance_factor + 1) - father_path if dist < 0: dist = 0 styles[n.name]['dist'] = dist max_dist = max(max_dist, dist) elif tree_scale == 'linear': # if max_dist > 1: # styles[n.name]['dist'] = round(n.dist/max_dist) # else: styles[n.name]['dist'] = n.dist # leaf styles and update distance if not scale_rate: # scale_rate = max(10, round(1/max_dist)) scale_rate = ts.scale for n in t.traverse(): if 'dist' in styles[n.name]: n.dist = styles[n.name]['dist'] * scale_rate if not n.is_leaf(): styles[n.name]['style']["size"] = 0 else: styles[n.name]['style']["size"] = node_size # add bootstrap values to the branches (size of the node) if intermediate_node_sizes_file: bootsrtap_sizes = utils.get_bootsrtap_size( intermediate_node_sizes_file) for branch, size in bootsrtap_sizes.items(): styles[branch]['style']["size"] = size styles[branch]['style']['fgcolor'] = 'black' # add colors to the leafs if cell_colors_file: cells_colors = utils.get_cells_colors(cell_colors_file) for name, color in cells_colors.items(): styles[name]['style']['fgcolor'] = color # reorder the tree by pre-proses if possible if order_vector_file: leaf_order = utils.get_leaf_order(order_vector_file) for n in t.traverse('postorder'): if n.get_descendants(): a = '' for leaf in n.get_descendants(strategy='postorder'): if leaf.is_leaf(): if not a: a = leaf b = n.get_descendants(strategy='preorder')[-1] if a.is_leaf() and b.is_leaf(): if leaf_order[a.name] > leaf_order[b.name]: left, right = n.children n.children = [right, left] # add width to branches if clustering_sizes_file: t, styles = size_clustering(t, styles, clustering_sizes_file) # add colors to branches if clustering_colors_file: t, ts, styles = color_clustering(t, ts, styles, clustering_colors_file) # add new leaf labels if leaf_labels_file: cells_labels = utils.get_cells_labels(leaf_labels_file) ts.show_leaf_name = False for name, label in cells_labels.items(): nodes = t.search_nodes(name=name) assert len(nodes) == 1, nodes node = nodes[0] if name in cells_colors: name_face = ete3.faces.TextFace(cells_labels[name], fsize=font_size, fgcolor=cells_colors[name]) else: name_face = ete3.faces.TextFace(cells_labels[name], fsize=font_size) name_face.margin_left = 3 node.add_face(name_face, 0, "aligned") # add duplicate tags to nodes if duplicate_file: dup_labels = utils.get_dup_labels(duplicate_file) for name, color in dup_labels.items(): node = node_check(name, t) if not node: continue dup_face = ete3.faces.TextFace('*', fsize=10, fgcolor=color) dup_face.margin_left = 5 node.add_face(dup_face, column=1) # add y-scale to the picture if y_scale: ts.y_axis['show'] = True ts.y_axis['scale_type'] = tree_scale ts.y_axis['scale_length'] = int(root.get_farthest_leaf()[1] - root.dist + 10) # add legend to the tree if legend_file: legend = utils.get_legend(legend_file) for mark in list(legend.keys()): ts.legend.add_face(ete3.faces.CircleFace(2, legend[mark]), column=0) legend_txt = ete3.faces.TextFace(mark, fsize=font_legend) legend_txt.margin_left = 5 ts.legend.add_face(legend_txt, column=1) ts.legend_position = 4 # set all the styles for n in t.traverse(): n.set_style(styles[n.name]['style']) # root = ete3.faces.CircleFace(2, 'white') # root.border.width = 1 # root.border.color = 'black' # t.add_face(root, column=0, position='float') # t.render("%%inline", tree_style=ts) return t, ts
def Main(): sensitive_meta_data = SensitiveMetadata() # print( sensitive_meta_data.get_columns() ) metadata = ParseWorkflowResults(metadataPath) distance = read(distancePath) treeFile = "".join(read(treePath)) distanceDict = {} #store the distance matrix as rowname:list<string> for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region step5: tree construction ''' #region create detailed tree plasmidCount = 0 for n in t.traverse(): if (n.is_leaf() and not n.name == "Reference"): mData = metadata[n.name.replace(".fa","")] face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True) face.border.margin = 5 face.margin_left = 10 face.margin_right = 10 n.add_face(face, 0, "aligned") face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, 1, "aligned") face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, 2, "aligned") index = 3 if (mData.TotalPlasmids > plasmidCount): plasmidCount = mData.TotalPlasmids for i in range(0, mData.TotalPlasmids): face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, index, "aligned") index+=1 face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, index, "aligned") index+=1 face = faces.TextFace("Species",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 face.margin_left = 10 (t&"Reference").add_face(face, 0, "aligned") face = faces.TextFace("Sequence Type",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, 1, "aligned") face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, 2, "aligned") index = 3 for i in range(0, plasmidCount): face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, index, "aligned") index+=1 face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, index, "aligned") index+=1 t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts) #endregion ''' #region create box tree #region step5: tree construction treeFile = "".join(read(treePath)) t = e.Tree(treeFile) t.set_outgroup(t & "Reference") #set the tree style ts = e.TreeStyle() ts.show_leaf_name = False ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key].plasmids: for inc in plasmid.PlasmidRepType.split(","): if (inc.lower().find("inc") > -1): if not (inc in plasmidIncs): plasmidIncs[inc] = [metadata[key].ID] else: if metadata[key].ID not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key].ID) #plasmidIncs = sorted(plasmidIncs) for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): #if its the reference branch, populate the faces with column headers index = 0 for sensitive_data_column in sensitive_meta_data.get_columns(): (t & "Reference").add_face(addFace(sensitive_data_column), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("SampleID"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("New?"), index, "aligned") index = index + 1 for i in range( len(plasmidIncs) ): #this loop adds the columns (aka the incs) to the reference node (t & "Reference").add_face( addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") index = index + len(plasmidIncs) (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Sequence Type"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Carbapenamases"), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds the distance matrix (t & "Reference").add_face( addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") index = index + len(distanceDict[list(distanceDict.keys())[0]]) elif (n.is_leaf() and not n.name == "Reference"): #not reference branches, populate with metadata index = 0 mData = metadata[n.name.replace(".fa", "")] # pushing in sensitive data for sensitive_data_column in sensitive_meta_data.get_columns(): sens_col_val = sensitive_meta_data.get_value( bcid=mData.ID, column_name=sensitive_data_column) n.add_face(addFace(sens_col_val), index, "aligned") index = index + 1 n.add_face(addFace(mData.ID), index, "aligned") index = index + 1 if (metadata[n.name.replace(".fa", "")].new == True): #new column face = e.RectFace( 30, 30, "green", "green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, index, "aligned") index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa", "") in plasmidIncs[incs]): face = e.RectFace( 30, 30, "black", "black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") index = index + len(plasmidIncs) n.add_face(addFace(mData.MLSTSpecies), index, "aligned") index = index + 1 n.add_face(addFace(mData.SequenceType), index, "aligned") index = index + 1 n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds distance matrix n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render(outputFile, w=5000, units="mm", tree_style=ts) #save it as a png. or an phyloxml