def _get_T(file): txt = file.read() lines = txt.split('\n') lines.pop(0) lines.pop(len(lines) - 1) lines = [line.replace('\t', '') for line in lines] lines = [line.replace(' ', '') for line in lines] edges = [] edge_labels = {} # key is child node label. val is edge label for line in lines: if '->' in line: i, j = line.split('->') if '[' in j: j, edge_label = j.split('[') edges.append((i, j)) edge_label = '[' + edge_label edge_labels[j] = edge_label else: edges.append((i, j)) edge_labels[j] = '' parent_child_table = [] for i, j in edges: if i in list(edge_labels.keys()): i = i + edge_labels[i] if j in list(edge_labels.keys()): j = j + edge_labels[j] parent_child_table.append((i, j, 1)) return Tree.from_parent_child_table(parent_child_table)
def Get_Clonal_IDs(self): C_IDs = Tumour_Evolution.keys() C_Times = [] ID_lenghts = [] Phylogeny = [] C_IDs.remove("P-0:0") main_branches = [] print "ALL, IDs", C_IDs for _id in C_IDs: if (_id.find(",") == -1): main_branches.append(_id[0:3]) C_IDs.remove(_id) print "Main, branches ", main_branches for branch in main_branches: Phylogeny.append(("P", branch)) print "ph ", Phylogeny #print "Phylogeny", Phylogeny print "ids ", C_IDs for clone in C_IDs: generations = clone.split(",") head = generations[0] last_and_hour = generations[len(generations) - 1].split("-") last = last_and_hour[0] time = last_and_hour[1] generations.remove(generations[0]) generations.remove(generations[len(generations) - 1]) if (not generations): Phylogeny.append((head, head + "," + last + "-" + time)) else: print clone carry = head for element in generations: Phylogeny.append((head, head + "," + element)) carry += "," + element Phylogeny.append((carry, carry + "," + last + "-" + time)) print Phylogeny p = sorted(list(set(Phylogeny))) print "p", p t = Tree.from_parent_child_table(p) #a=np.unique(t).tolist() ts = TreeStyle() ts.show_leaf_name = True #ts.rotation = 90 t.show(tree_style=ts)
def convert(s): ''' Takes in .dot filename as a string, reads through file and converts ''' ''' into Newick, which is readable by ete3. ''' nwlist = [] f = open(s) fl = f.readlines() for i in fl: i = i.replace(']', '') i = i.replace(';', '') if ('->' in i.split()): #for GT with no branch lengths nwlist.append((i.split()[0], i.split()[2])) elif ('--' in i.split()): nwlist.append((i.split()[0], i.split()[2])) tree = Tree.from_parent_child_table(nwlist) return tree #EOF
from ete3 import Tree, TreeStyle, TextFace #t = Tree( "((5),0,2,3);", format=1 ) #t = Tree( "(3,5);", format =1 ) t = Tree("(A:1,(B:1,(E:1,D:1):0.5):0.5);") t = Tree("( ( A,B,(5) ) );", format=1) t = Tree.from_parent_child_table([("A", "B"), ("A", "C"), ("C", "D"), ("C", "E")]) t = Tree.from_parent_child_table([('P', 'PC1'), ('PC1', 'PC1,2-26:4402'), ('PC1', 'PC1,3'), ('PC1,3', 'PC1,3,5-26:6743'), ('PC1', 'PC1,14'), ('PC1,3,14', 'PC1,3,14,4-27:124'), ('PC1', 'PC1,9'), ('PC1,9', 'PC1,9,7-27:675'), ('PC1', 'PC1,0-26:4132')]) t = Tree.from_parent_child_table([("P", "PC0-23:7392", 100), ("P", "PC1-25:6592", 7.5), ("PC1-25:6592", "PC1,1-26:7008", 6), ("PC1-25:6592", "PC1,2-26:7008", 6)]) ts = TreeStyle() ts.show_leaf_name = True #ts.title.add_face(TextFace("Hello ETE", fsize=20), column=0) t.show(tree_style=ts)
def Surviving_Phylogenetic_Tree(self): C_IDs = Tumour_Evolution.keys() C_Times = [] ID_lenghts = [] Phylogeny = [] C_IDs.remove("P-0:0") main_branches = [] #while loop regex_str = 'PC[0-9]+' branch = ',[0-9]+' entering_flag = True Phylo_Struct = [] i = 0 while (C_IDs): if (entering_flag): entering_flag = False searchRegex = re.compile(regex_str + '-.*') matches = [ m.group(0) for l in C_IDs for m in [searchRegex.search(l)] if m ] for m in matches: C_IDs.remove(m) Phylo_Struct.append(matches) else: regex_str = regex_str + branch searchRegex = re.compile(regex_str + '-.*') matches = [ m.group(0) for l in C_IDs for m in [searchRegex.search(l)] if m ] for m in matches: C_IDs.remove(m) Phylo_Struct.append(matches) #print "PS",Phylo_Struct # trabnsalte this into a tree main_branches = Phylo_Struct[0] branch_ids = [] for clone in main_branches: branch_ids.append(clone[0:3]) Phylogeny = [] initial_step = Phylo_Struct.pop(0) initial_step = natsorted(initial_step, key=lambda y: y.lower()) #print "I", initial_step ID_time = dict() regex_str = 'PC[0-9]+' for clone in initial_step: parent_str = re.search(regex_str, clone) ID_time[parent_str.group(0)] = int( clone.split("-", 1)[1].split(":")[0]) Phylogeny.append( ("P", clone, 100 - int(clone.split("-", 1)[1].split(":")[0]))) ## Year length #print ID_time #print "Remaining ", Phylo_Struct ## Generating Phylogenetic Tree regex_str = 'PC[0-9]+' branch = ',[0-9]+' for step in Phylo_Struct: step = natsorted(step, key=lambda y: y.lower()) for clone in step: clone_str = re.search(regex_str, clone) for _parent in initial_step: parent_str = re.search(regex_str, _parent) if (clone_str.group(0) == parent_str.group(0)): main_parent = re.search('PC[0-9]+', clone) clone_year = int(clone.split("-", 1)[1].split(":")[0]) Phylogeny.append( (_parent, clone, abs(ID_time[main_parent.group(0)] - clone_year))) ## year lengt normalised regex_str = regex_str + branch initial_step = step t = Tree.from_parent_child_table(Phylogeny) #a=np.unique(t).tolist() ts = TreeStyle() ts.show_leaf_name = True #ts.rotation = 90 ts.mode = "c" ts.arc_start = -180 # 0 degrees = 3 o'clock ts.arc_span = 180 t.show(tree_style=ts)