def run(args): if args.text_mode: from ete2 import Tree for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = Tree(nw) else: t = Tree(tfile) print t.get_ascii(show_internal=args.show_internal_names, attributes=args.show_attributes) return import random import re import colorsys from collections import defaultdict from ete2 import (Tree, PhyloTree, TextFace, RectFace, faces, TreeStyle, add_face_to_node, random_color) global FACES if args.face: FACES = parse_faces(args.face) else: FACES = [] # VISUALIZATION ts = TreeStyle() ts.mode = args.mode ts.show_leaf_name = True ts.tree_width = args.tree_width for f in FACES: if f["value"] == "@name": ts.show_leaf_name = False break if args.as_ncbi: ts.show_leaf_name = False FACES.extend(parse_faces( ['value:@sci_name, size:10, fstyle:italic', 'value:@taxid, color:grey, size:6, format:" - %s"', 'value:@sci_name, color:steelblue, size:7, pos:b-top, nodetype:internal', 'value:@rank, color:indianred, size:6, pos:b-bottom, nodetype:internal', ])) if args.alg: FACES.extend(parse_faces( ['value:@sequence, size:10, pos:aligned, ftype:%s' %args.alg_type] )) if args.heatmap: FACES.extend(parse_faces( ['value:@name, size:10, pos:aligned, ftype:heatmap'] )) if args.bubbles: for bubble in args.bubbles: FACES.extend(parse_faces( ['value:@%s, pos:float, ftype:bubble, opacity:0.4' %bubble, ])) ts.branch_vertical_margin = args.branch_separation if args.show_support: ts.show_branch_support = True if args.show_branch_length: ts.show_branch_length = True if args.force_topology: ts.force_topology = True ts.layout_fn = lambda x: None for tindex, tfile in enumerate(args.src_tree_iterator): #print tfile if args.raxml: nw = re.sub(":(\d+\.\d+)\[(\d+)\]", ":\\1[&&NHX:support=\\2]", open(tfile).read()) t = PhyloTree(nw) else: t = PhyloTree(tfile) if args.alg: t.link_to_alignment(args.alg, alg_format=args.alg_format) if args.heatmap: DEFAULT_COLOR_SATURATION = 0.3 BASE_LIGHTNESS = 0.7 def gradient_color(value, max_value, saturation=0.5, hue=0.1): def rgb2hex(rgb): return '#%02x%02x%02x' % rgb def hls2hex(h, l, s): return rgb2hex( tuple(map(lambda x: int(x*255), colorsys.hls_to_rgb(h, l, s)))) lightness = 1 - (value * BASE_LIGHTNESS) / max_value return hls2hex(hue, lightness, DEFAULT_COLOR_SATURATION) heatmap_data = {} max_value, min_value = None, None for line in open(args.heatmap): if line.startswith('#COLNAMES'): pass elif line.startswith('#') or not line.strip(): pass else: fields = line.split('\t') name = fields[0].strip() values = map(lambda x: float(x) if x else None, fields[1:]) maxv = max(values) minv = min(values) if max_value is None or maxv > max_value: max_value = maxv if min_value is None or minv < min_value: min_value = minv heatmap_data[name] = values heatmap_center_value = 0 heatmap_color_center = "white" heatmap_color_up = 0.3 heatmap_color_down = 0.7 heatmap_color_missing = "black" heatmap_max_value = abs(heatmap_center_value - max_value) heatmap_min_value = abs(heatmap_center_value - min_value) if heatmap_center_value <= min_value: heatmap_max_value = heatmap_min_value + heatmap_max_value else: heatmap_max_value = max(heatmap_min_value, heatmap_max_value) # scale the tree if not args.height: args.height = None if not args.width: args.width = None f2color = {} f2last_seed = {} for node in t.traverse(): node.img_style['size'] = 0 if len(node.children) == 1: node.img_style['size'] = 2 node.img_style['shape'] = "square" node.img_style['fgcolor'] = "steelblue" ftype_pos = defaultdict(int) for findex, f in enumerate(FACES): if (f['nodetype'] == 'any' or (f['nodetype'] == 'leaf' and node.is_leaf()) or (f['nodetype'] == 'internal' and not node.is_leaf())): # if node passes face filters if node_matcher(node, f["filters"]): if f["value"].startswith("@"): fvalue = getattr(node, f["value"][1:], None) else: fvalue = f["value"] # if node's attribute has content, generate face if fvalue is not None: fsize = f["size"] fbgcolor = f["bgcolor"] fcolor = f['color'] if fcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fcolor = f2color.setdefault(color_bin, random_color(h=seed)) if fbgcolor: # Parse color options auto_m = re.search("auto\(([^)]*)\)", fbgcolor) if auto_m: target_attr = auto_m.groups()[0].strip() if not target_attr : color_keyattr = f["value"] else: color_keyattr = target_attr color_keyattr = color_keyattr.lstrip('@') color_bin = getattr(node, color_keyattr, None) last_seed = f2last_seed.setdefault(color_keyattr, random.random()) seed = last_seed + 0.10 + random.uniform(0.1, 0.2) f2last_seed[color_keyattr] = seed fbgcolor = f2color.setdefault(color_bin, random_color(h=seed)) if f["ftype"] == "text": if f.get("format", None): fvalue = f["format"] % fvalue F = TextFace(fvalue, fsize = fsize, fgcolor = fcolor or "black", fstyle = f.get('fstyle', None)) elif f["ftype"] == "fullseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="seq", seqtail_format="seq", height=fsize) elif f["ftype"] == "compactseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="compactseq", seqtail_format="compactseq", height=fsize) elif f["ftype"] == "blockseq": F = faces.SeqMotifFace(seq=fvalue, seq_format="blockseq", seqtail_format="blockseq", height=fsize, fgcolor=fcolor or "slategrey", bgcolor=fbgcolor or "slategrey", scale_factor = 1.0) fbgcolor = None elif f["ftype"] == "bubble": try: v = float(fvalue) except ValueError: rad = fsize else: rad = fsize * v F = faces.CircleFace(radius=rad, style="sphere", color=fcolor or "steelblue") elif f["ftype"] == "heatmap": if not f['column']: col = ftype_pos[f["pos"]] else: col = f["column"] for i, value in enumerate(heatmap_data.get(node.name, [])): ftype_pos[f["pos"]] += 1 if value is None: color = heatmap_color_missing elif value > heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_up) elif value < heatmap_center_value: color = gradient_color(abs(heatmap_center_value - value), heatmap_max_value, hue=heatmap_color_down) else: color = heatmap_color_center node.add_face(RectFace(20, 20, color, color), position="aligned", column=col + i) # Add header # for i, name in enumerate(header): # nameF = TextFace(name, fsize=7) # nameF.rotation = -90 # tree_style.aligned_header.add_face(nameF, column=i) F = None elif f["ftype"] == "profile": # internal profiles? F = None elif f["ftype"] == "barchart": F = None elif f["ftype"] == "piechart": F = None # Add the Face if F: F.opacity = f['opacity'] or 1.0 # Set face general attributes if fbgcolor: F.background.color = fbgcolor if not f['column']: col = ftype_pos[f["pos"]] ftype_pos[f["pos"]] += 1 else: col = f["column"] node.add_face(F, column=col, position=f["pos"]) if args.image: t.render("t%d.%s" %(tindex, args.image), tree_style=ts, w=args.width, h=args.height, units=args.size_units) else: t.show(None, tree_style=ts)
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylog�nie et la conservation de facteurs de firulence c�te � c�te N�cessite r�sultats MLST, ensemble des r�sultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des g�nomes et les noms qu'on veut afficher dans la phylog�nie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylog�nie au format newick avec identifiants correspondants � tous les dico utilis�s :param blast_result_file_list: r�sultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants g�nome utiis� dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' blast2data = {} queries = [] for one_blast_file in blast_result_file_list: with open(one_blast_file, 'r') as f: for line in f: line = line.split('\t') if line[1] not in blast2data: blast2data[line[1]] = {} blast2data[line[1]][line[0]] = [float(line[2]), int(line[8]), int(line[9])] else: blast2data[line[1]][line[0]] = [float(line[2]), int(line[8]), int(line[9])] if line[0] not in queries: queries.append(line[0]) print blast2data print queries for one_blast in blast2data.keys(): for ref_gene in blast2data[one_blast].keys(): for query_gene in blast2data[one_blast].keys(): overlap = False if ref_gene == query_gene: continue if one_blast == 'NC_002745' and ref_gene == 'selm': print 'target:', ref_gene, blast2data[one_blast][ref_gene] print query_gene, blast2data[one_blast][query_gene] # check if position is overlapping try: sorted_coordinates = sorted(blast2data[one_blast][ref_gene][1:3]) if blast2data[one_blast][query_gene][1] <= sorted_coordinates[1] and blast2data[one_blast][query_gene][1]>= sorted_coordinates[0]: print 'Overlaping locations!' print one_blast, ref_gene, blast2data[one_blast][ref_gene] print one_blast, query_gene, blast2data[one_blast][query_gene] overlap =True sorted_coordinates = sorted(blast2data[one_blast][query_gene][1:3]) if blast2data[one_blast][ref_gene][1] <= sorted_coordinates[1] and blast2data[one_blast][ref_gene][1]>= sorted_coordinates[0]: print 'Overlapping locations!' print one_blast, ref_gene, blast2data[one_blast][ref_gene] print one_blast, query_gene, blast2data[one_blast][query_gene] overlap =True if overlap: if blast2data[one_blast][ref_gene][0] > blast2data[one_blast][query_gene][0]: del blast2data[one_blast][query_gene] print 'removing', query_gene else: del blast2data[one_blast][ref_gene] print 'removing', ref_gene break except KeyError: print 'colocation already resolved:', query_gene, ref_gene queries_count = {} for query in queries: queries_count[query] = 0 for one_blast in blast2data: if query in blast2data[one_blast]: queries_count[query]+=1 for query in queries_count: if queries_count[query] == 0: queries.pop(queries.index(query)) ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) #t.populate(8) # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() #t2=t1 #for lf in t2.iter_leaves(): # try: # lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name]) # except: # #lf.name = ' %s (%s)' % (lf.name, lf.name) # # a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name])) # a.fgcolor = "red" # lf.name = a #t2.render("test.svg", dpi=800, h=400) #import sys #sys.exit() # and set it as tree outgroup head = True for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] for col, value in enumerate(sorted(queries)): print lf.name, value if head: 'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 4 n.margin_right = 4 n.margin_left = 4 n.margin_bottom = 4 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") try: identity_value = blast2data[lf.name][value][0] if 'nord' in id2description[lf.name]: if float(identity_value) >70: if str(identity_value) == '100.00' or str(identity_value) == '100.0': identity_value = '100' else: identity_value = str(round(float(identity_value), 1)) n = TextFace(' %s ' % str(identity_value)) n.margin_top = 4 n.margin_right = 4 n.margin_left = 4 n.margin_bottom = 4 n.inner_background.color = rgb2hex(m.to_rgba(float(identity_value))) if float(identity_value) >92: n.fgcolor = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: if float(identity_value) >70: if str(identity_value) == '100.00' or str(identity_value) == '100.0': identity_value = '100' else: identity_value = str(round(float(identity_value), 1)) n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = rgb2hex(m2.to_rgba(float(identity_value))) if float(identity_value) >92: n.fgcolor = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") except KeyError: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = "white" n.opacity = 1. lf.add_face(n, col, position="aligned") try: lf.name = ' %s (%s)' % (id2description[lf.name], id2mlst[lf.name]) except: #lf.name = ' %s (%s)' % (lf.name, lf.name) a = TextFace(' %s (%s)' % (lf.name, id2mlst[lf.name])) a.fgcolor = "red" lf.name = a head = False #.add_face(a, 0, position="aligned") # add boostrap suppot #for n in t1.traverse(): # if n.is_leaf(): # continue # n.add_face(TextFace(str(t1.support)), column=0, position = "branch-bottom") #ts = TreeStyle() #ts.show_branch_support = True # , tree_style=ts t1.render("test.svg", dpi=800, h=400)
def plot_blast_result(tree_file, blast_result_file_list, id2description, id2mlst, check_overlap, ordered_queries, fasta_file2accession, id_cutoff=80, reference_accession='-', accession2hit_filter=False, show_identity_values=True): ''' Projet Staph aureus PVL avec Laure Jaton Script pour afficher une phylogénie et la conservation de facteurs de virulence côte à côte Nécessite résultats MLST, ensemble des résultats tblastn (facteurs de virulence vs chromosomes), ainsi qu'une correspondance entre les accession des génomes et les noms qu'on veut afficher dans la phylogénie. Icemn pour les identifiants molis des patients, on les remplace par CHUV n. :param tree_file: phylogénie au format newick avec identifiants correspondants à tous les dico utilisés :param blast_result_file_list: résultats tblastn virulence factors vs chromosome (seulement best blast) :param id2description: identifiants génome utiisé dans l'arbre et description correspondante (i.e S aureus Newman) :param id2mlst: identitifiants arbre 2 S. aureus ST type :return: ''' import blast_utils blast2data, queries = blast_utils.remove_blast_redundancy( blast_result_file_list, check_overlap) queries_count = {} for query in queries: queries_count[query] = 0 for one_blast in blast2data: if query in blast2data[one_blast]: #print blast2data[one_blast][query] if float(blast2data[one_blast][query][0]) > id_cutoff: queries_count[query] += 1 else: del blast2data[one_blast][query] print queries_count for query in queries: print "Hit counts: %s\t%s" % (query, queries_count[query]) if queries_count[query] == 0: queries.pop(queries.index(query)) print 'delete columns with no matches ok' ''' rm_genes = ['selv','spsmA1','psmB1','psmB2','ses','set','sel','selX','sek','sel2','LukF', 'LukM', 'hly', 'hld' , 'hlgA', 'hlgB', 'hlgC', 'sed', 'sej', 'ser', 'selq1', 'sec3', 'sek2', 'seq2', 'lukD', 'lukE'] #rm_genes = ['icaR','icaA','icaB','icaC','icaD', 'sdrF', 'sdrH'] for gene in rm_genes: queries.pop(queries.index(gene)) ''' #queries = ['selv'] t1 = Tree(tree_file) tss = TreeStyle() #tss.show_branch_support = True # Calculate the midpoint node R = t1.get_midpoint_outgroup() t1.set_outgroup(R) t1.ladderize() ordered_queries_filtered = [] for query in ordered_queries: hit_count = 0 for lf2 in t1.iter_leaves(): try: accession = fasta_file2accession[lf2.name] tmpidentity = blast2data[accession][query][0] if float(tmpidentity) > float(id_cutoff): hit_count += 1 except: continue if hit_count > 0: ordered_queries_filtered.append(query) #print 'skippink-----------' head = True print 'drawing tree' print 'n initial queries: %s n kept: %s' % (len(ordered_queries), len(ordered_queries_filtered)) for lf in t1.iter_leaves(): #lf.add_face(AttrFace("name", fsize=20), 0, position="branch-right") lf.branch_vertical_margin = 0 #data = [random.randint(0,2) for x in xrange(3)] accession = fasta_file2accession[lf.name] for col, value in enumerate(ordered_queries_filtered): if head: if show_identity_values: #'first row, print gene names' #print 'ok!' n = TextFace(' %s ' % str(value)) n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) else: n = TextFace(' %s ' % str(value), fsize=6) n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 n.rotation = 270 n.vt_align = 2 n.hz_align = 2 n.inner_background.color = "white" n.opacity = 1. #lf.add_face(n, col, position="aligned") tss.aligned_header.add_face(n, col) try: identity_value = blast2data[accession][value][0] #print 'identity', lf.name, value, identity_value if lf.name != reference_accession: if not accession2hit_filter: # m_red color = rgb2hex(m_blue.to_rgba(float(identity_value))) else: # if filter, color hits that are not in the filter in green if accession in accession2hit_filter: if value in accession2hit_filter[accession]: # mred color = rgb2hex( m_green.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: color = rgb2hex( m_blue.to_rgba(float(identity_value))) else: # reference taxon, blue scale color = rgb2hex(m_blue.to_rgba(float(identity_value))) #if not show_identity_values: # color = rgb2hex(m_blue.to_rgba(float(identity_value))) except: identity_value = 0 color = "white" if show_identity_values: if float(identity_value) >= float(id_cutoff): if str(identity_value) == '100.00' or str( identity_value) == '100.0': identity_value = '100' n = TextFace("%s " % identity_value) else: # identity_value = str(round(float(identity_value), 1)) n = TextFace("%.2f" % round(float(identity_value), 2)) if float(identity_value) > 95: n.fgcolor = "white" n.opacity = 1. else: identity_value = '-' n = TextFace(' %s ' % str(identity_value)) n.opacity = 1. n.margin_top = 2 n.margin_right = 2 n.margin_left = 2 n.margin_bottom = 2 n.inner_background.color = color lf.add_face(n, col, position="aligned") else: if float(identity_value) >= float(id_cutoff): # don't show identity values n = TextFace(' ') n.margin_top = 0 n.margin_right = 0 n.margin_left = 0 n.margin_bottom = 0 #n.color = color n.inner_background.color = color lf.add_face(n, col, position="aligned") try: accession = fasta_file2accession[lf.name] lf.name = ' %s (%s)' % (id2description[accession], id2mlst[lf.name]) except KeyError: print '--------', id2description lf.name = ' %s (%s)' % (lf.name, id2mlst[lf.name]) head = False for n in t1.traverse(): nstyle = NodeStyle() if n.support < 0.9: #mundo = TextFace("%s" % str(n.support)) #n.add_face(mundo, column=1, position="branch-bottom") nstyle["fgcolor"] = "blue" nstyle["size"] = 6 n.set_style(nstyle) else: nstyle["fgcolor"] = "red" nstyle["size"] = 0 n.set_style(nstyle) print 'rendering tree' t1.render("profile.svg", dpi=1000, h=400, tree_style=tss)