def parsimony_up2(subtree, nodelist, parent, siblings): """parsimony part: up direction -> from root to leafs""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] # parent - nodelist element # siblings - [nodelist element] element = find_element_in_nodelist(subtree.name, nodelist) parent_tag = parent[4][ 0] # parent[4] could look like [['0', '1'], ['1']] or [['1']] both_tags = [] both_tags.append(parent_tag) for sibling in siblings: both_tags.append(sibling[4][0]) # get intersection or union tag_list = get_intersect_or_union(both_tags) # add new tag element[4].append(tag_list) # go on with children if not subtree.is_terminal(): children = [] for clade in subtree.clades: child = find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(subtree.clades)): clade = subtree.clades[i] child = find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] parsimony_up2(clade, nodelist, element, sublist) return
def parsimony_up(subtree, nodelist, parent, siblings): """parsimony part: up direction -> from root to leafs""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] # parent - nodelist element # siblings - [nodelist element] parent_tag = parent[4] # parent[4] could look like [0, 1] or [1] siblings_tags = [] siblings_tags += parent_tag for sibling in siblings: siblings_tags += sibling[4] element = find_element_in_nodelist(subtree.name, nodelist) # calculate and add mean mean = sum(siblings_tags) / len(siblings_tags) element[4].append(mean) # go on with children if not subtree.is_terminal(): children = [] for clade in subtree.clades: child = find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(subtree.clades) - 1): clade = subtree.clades[i] child = find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] parsimony_up(clade, nodelist, element, sublist) return
def sankoff_parsimony(tree, nodelist): """Using rpy2 for forwarding to R code""" # ---- cache tree for R script --- random_name = str(randint(0, 1000)) path_simulated_tree = 'code/bufferfiles/simulated_tree' + random_name + '.tre' path_tagged_tree = 'code/bufferfiles/tagged_tree' + random_name + '.tre' Phylo.write(tree, path_simulated_tree, 'newick') prepare_tree(tree.clade, nodelist) Phylo.write(tree, path_tagged_tree, 'newick') # -------- R code -------- path = "code/utilities/castor_parsimony_simulation.R" f = open(path, "r") code = ''.join(f.readlines()) print("---------------- prepare R script ----------------") code_Array = code.split("data/subtree/Eukaryota.tre") code = path_simulated_tree.join(code_Array) code_Array = code.split("code/bufferfiles/tagged_tree.tre") code = path_tagged_tree.join(code_Array) result = rpy2.robjects.r(code) # assume that... likelihoods = rpy2.robjects.globalenv['likelihoods'][0] # The rows in this matrix will be in the order in which tips and # nodes are indexed in the tree, i.e. the rows 1,..,Ntips store the probabilities for # tips, while rows (Ntips+1),..,(Ntips+Nnodes) store the probabilities for nodes. leaf_nodes = rpy2.robjects.globalenv['state_ids'] number_of_tips = rpy2.robjects.globalenv['number_of_tips'] internal_nodes = rpy2.robjects.globalenv['internal_nodes'] l = int(len(likelihoods) / 3) j = 0 k = 0 for i in range(2 * l, 3 * l): if j < number_of_tips[0]: element = find_element_in_nodelist(leaf_nodes[j], nodelist) if element[3] == '': # if unknown # set finaltag: element[3] = likelihoods[i] j += 1 else: element = find_element_in_nodelist(internal_nodes[k], nodelist) # set finaltag: element[3] = likelihoods[i] k += 1 return
def my_parsimony(tree_clade, nodelist): """mean based parsimony""" # down: parsimony_down(tree_clade, nodelist) # up: parent = find_element_in_nodelist(tree_clade.name, nodelist) children = [] for clade in tree_clade.clades: child = find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(tree_clade.clades)): clade = tree_clade.clades[i] child = find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] parsimony_up(clade, nodelist, parent, sublist) # final: parsimony_final(tree_clade, nodelist) return
def parsimony_down(subtree, nodelist): """parsimony part: down direction -> from leafs to root""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] child_tags = [] for clade in subtree.clades: child = find_element_in_nodelist(clade.name, nodelist) # if child is not tagged, first tag it: if child[4] == []: parsimony_down(clade, nodelist) child_tags.append(child[4][0]) element = find_element_in_nodelist(subtree.name, nodelist) # get intersection or union tag_list = get_intersect_or_union(child_tags) # add new tag element[4].append(tag_list) return
def fitch_parsimony(tree_clade, nodelist, version): """parsimony implemented from [COO98] - changed for multifurcating trees""" # down: parsimony_down(tree_clade, nodelist) # up: parent = find_element_in_nodelist(tree_clade.name, nodelist) children = [] for clade in tree_clade.clades: child = find_element_in_nodelist(clade.name, nodelist) children.append(child) for i in range(0, len(tree_clade.clades)): clade = tree_clade.clades[i] child = find_element_in_nodelist(clade.name, nodelist) sublist = deepcopy(children) del sublist[i] # ToDo: decide which one, next ToDo: a second parsimony down? Fitch_Versions.parsimony_up(clade, nodelist, parent, sublist, version) # final: parsimony_final(tree_clade, nodelist) return
def parsimony_down(subtree, nodelist): """parsimony part: down direction -> from leafs to root""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] mean = 0 for clade in subtree.clades: child = find_element_in_nodelist(clade.name, nodelist) # if child is not tagged, first tag it: if child[4] == []: parsimony_down(clade, nodelist) # if child is leaf node: if clade.is_terminal(): if child[4][0] == [0, 1]: # if unknown child[4][0] = 0.5 else: child[4][0] = child[4][0][0] mean = mean + child[4][0] # else: +0 for 'P' element = find_element_in_nodelist(subtree.name, nodelist) # calculate and add mean mean = mean / len(subtree.clades) element[4].append(mean) return
def prepare_tree(subtree, nodelist): """tag all leafs""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] if subtree.is_terminal(): element = find_element_in_nodelist(subtree.name, nodelist) if len(element[4][0]) > 1: subtree.name = '' else: subtree.name = str(element[4][0][0]) for clade in subtree.clades: prepare_tree(clade, nodelist) return
def prepare_tree(subtree): """tag all leafs""" # Arguments: # subtree # 0 1 2 # nodelist - [ott_id, originaltag, finaltag] element = find_element_in_nodelist(subtree.name, nodelist) if subtree.is_terminal(): if element[1] == '' or element[1] == 'NA': subtree.name = subtree.name + '$(' + element[2] + ')' else: subtree.name = subtree.name + '$' + element[1] else: subtree.name = subtree.name + '$(' + element[2] + ')' for clade in subtree.clades: prepare_tree(clade) return
def prepare_tree(subtree): """tag all leafs""" # Arguments: # subtree # 0 1 2 3 4 5 # nodelist - [id, originaltag, finaltag, depth, heights, nr_children] global nodelist if subtree.is_terminal(): element = find_element_in_nodelist(subtree.name, nodelist) if element[1] == 'NA': subtree.name = '' else: subtree.name = str(element[1]) for clade in subtree.clades: prepare_tree(clade) return
def parsimony_final(subtree, nodelist): """parsimony final part: combine multiple tags of node to one final tag""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] element = find_element_in_nodelist(subtree.name, nodelist) if subtree.is_terminal() and element[4][0] != 0.5: element[3] = element[4][0] else: # calculate mean mean = sum(element[4]) / len(element[4]) element[3] = str(round(mean, 2)) # go on with children if not subtree.is_terminal(): for clade in subtree.clades: parsimony_final(clade, nodelist) return
def parsimony_final(subtree, nodelist): """parsimony final part: combine multiple tags of node to one final tag""" # Arguments: # subtree # 0 1 2 3 4 # nodelist - [id, depth, originaltag, finaltag, calc[taglist]] element = find_element_in_nodelist(subtree.name, nodelist) if subtree.is_terminal() and len(element[4][0]) == 1: element[3] = element[4][0][0] else: # get intersection or union tag_list = get_intersect_or_union(element[4]) # add final tag tag_string = "" for tag in tag_list: tag_string += str(tag) + "&" tag_string = tag_string[:len(tag_string)-1] element[3] = tag_string # go on with children if not subtree.is_terminal(): for clade in subtree.clades: parsimony_final(clade, nodelist) return
def sankoff_parsimony(run_id): """Using rpy2 for forwarding to R code""" # Arguments: # tree # 0 1 2 3 4 5 # nodelist - [id, originaltag, finaltag, depth, heights, nr_children] # run_id global nodelist read_nodelist() tagged_tree_path = 'code/bufferfiles/tagged_tree.tre' if run_id >= 0: randomly_change_nodelist() tagged_tree_path = './code/bufferfiles/tagged_tree' + str( run_id) + '.tre' # ---- cache tree for R script --- cache_tree(tagged_tree_path) # -------- R code -------- f = open(r_path, "r") code = ''.join(f.readlines()) if run_id >= 0: print( colored("---------------- prepare R script ----------------", "green")) code_Array = code.split("code/bufferfiles/tagged_tree.tre") code = tagged_tree_path.join(code_Array) print(colored("---------------- run castor ----------------", "green")) rpy2.robjects.r(code) # assume that... likelihoods = rpy2.robjects.globalenv['likelihoods'][0] # The rows in this matrix will be in the order in which tips and # nodes are indexed in the tree, i.e. the rows 1,..,Ntips store the probabilities for # tips, while rows (Ntips+1),..,(Ntips+Nnodes) store the probabilities for nodes. leaf_nodes = rpy2.robjects.globalenv['state_ids'] number_of_tips = rpy2.robjects.globalenv['number_of_tips'] internal_nodes = rpy2.robjects.globalenv['internal_nodes'] print( colored("---------------- save likelihoods ----------------", "green")) l = int(len(likelihoods) / 3) j = 0 k = 0 for i in range(2 * l, 3 * l): if j < number_of_tips[0]: element = find_element_in_nodelist(leaf_nodes[j], nodelist) if element[2] == '': # if unknown # set finaltag: element[2] = likelihoods[i] j += 1 else: element = find_element_in_nodelist(internal_nodes[k], nodelist) # set finaltag: element[2] = likelihoods[i] k += 1 return nodelist