def cut_long_internal_branches(curroot, cutoff):
    """cut long branches and output all subtrees with at least 4 tips"""
    going = True
    subtrees = []  #store all subtrees after cutting
    while going:
        going = False  #only keep going if long branches were found during last round
        for node in curroot.iternodes():  #Walk through nodes
            if node.istip or node == curroot: continue
            if node.nchildren == 1:
                node, curroot = remove_kink(node, curroot)
                going = True
                break
            child0, child1 = node.children[0], node.children[1]
            if node.length > cutoff:
                print node.length
                if not child0.istip and not child1.istip and child0.length + child1.length > cutoff:
                    print child0.length + child1.length
                    if count_taxa(child0) >= 4:
                        subtrees.append(child0)
                    if count_taxa(child1) >= 4:
                        subtrees.append(child1)
                else:
                    subtrees.append(node)
                node = node.prune()
                if len(curroot.leaves()) > 2:  #no kink if only two left
                    node, curroot = remove_kink(node, curroot)
                    going = True
                break
    if count_taxa(curroot) >= 4:
        subtrees.append(curroot)  #write out the residue after cutting
    return subtrees
Пример #2
0
def prune(score_tuple,node,root,pp_trees):
	if score_tuple[0] > score_tuple[1]: #prune front
		print "prune front"
		pp_trees.append(node)
		par = node.prune()
		if par != None and len(root.leaves()) >= 3:
			par,root = tree_utils.remove_kink(par,root)
		return root,node == root
	else:
		if node != root: #prune back
			par = node.parent #par--node<
			par.remove_child(node)
			if par.parent != None:
				par,root = tree_utils.remove_kink(par,root)
		node.prune()
		print "prune back"
		pp_trees.append(root)
		if len(node.leaves()) >= 3:
			node,newroot = tree_utils.remove_kink(node,node)
		else:
			newroot = node
		return newroot,False #original root was cutoff, not done yet
def main(inDIR, file_ending, branch_len_cutoff, min_taxa, outDIR):
    """cut long branches and output subtrees as .subtre files
	if uncut and nothing changed betwee .tre and .subtree
	copy the original .tre file to the outdir"""
    if inDIR[-1] != "/": inDIR += "/"
    if outDIR[-1] != "/": outDIR += "/"
    min_taxa = int(min_taxa)
    filecount = 0
    cutoff = float(branch_len_cutoff)
    print "cutting branches longer than", cutoff
    for i in os.listdir(inDIR):
        if not i.endswith(file_ending): continue
        print i
        filecount += 1
        with open(inDIR + i, "r") as infile:  #only 1 tree in each file
            intree = newick3.parse(infile.readline())
        try:
            with open(inDIR + i[:i.find(".tre")] + ".tre",
                      "r") as infile:  #the original .tre
                raw_tree_size = len(
                    get_front_labels(newick3.parse(infile.readline())))
        except:  # did not refine this round. Use the .tre.tt.mm tree
            raw_tree_size = len(get_front_labels(intree))
        num_taxa = count_taxa(intree)
        if num_taxa < min_taxa:
            print "Tree has", num_taxa, "less than", min_taxa, "taxa"
        else:
            print ".tre:", raw_tree_size, "tips; " + file_ending + ": " + str(
                len(get_front_labels(intree))) + " tips"
            subtrees = cut_long_internal_branches(intree, cutoff)
            if len(subtrees) == 0:
                print "No tree with at least", min_taxa, "taxa"
            #elif raw_tree_size == len(subtrees[0].leaves()):
            #copy(inDIR+i,outDIR+i)
            #print "written to out directory unchanged"
            else:
                count = 0
                outsizes = ""
                for subtree in subtrees:
                    if count_taxa(subtree) >= min_taxa:
                        if subtree.nchildren == 2:  #fix bifurcating roots from cutting
                            temp, subtree = remove_kink(subtree, subtree)
                        count += 1
                        with open(
                                outDIR + i.split(".")[0] + "_" + str(count) +
                                ".subtree", "w") as outfile:
                            outfile.write(newick3.tostring(subtree) + ";\n")
                        outsizes += str(len(subtree.leaves())) + ", "
                print count, "tree(s) wirtten. Sizes:", outsizes
    assert filecount > 0, "No file end with " + file_ending + " in " + inDIR
def mask_monophyletic_tips(curroot,ignore=[]):
	going = True
	while going and curroot != None and len(curroot.leaves()) >= 4:
		going = False
		for node in curroot.iternodes(): # walk through nodes
			if not node.istip: continue # only look at tips
			name = get_name(node.label).split("_")[1]	
			for sister in node.get_sisters():
				if sister.istip and name==get_name(sister.label).split("_")[1]: # mask
					node = sister.prune()
					if len(curroot.leaves()) >= 4:
						if (node==curroot and node.nchildren==2) or (node!=curroot and node.nchildren==1):
							node,curroot = remove_kink(node,curroot)
					going = True
					break
	return curroot
def mask_paraphyletic_tips(curroot,ignore=[]):
	going = True
	while going and curroot != None and len(curroot.leaves()) >= 4:
		going = False
		for node in curroot.iternodes(): #walk through nodes
			if not node.istip: continue #only look at tips
			name = get_name(node.label).split("_")[1]
			parent = node.parent
			if node == curroot or parent == curroot or parent == None:
				continue #no paraphyletic tips for the root
			for para in parent.get_sisters():
				if para.istip and name==get_name(para.label).split("_")[1]: # mask
					node = para.prune()	
					if len(curroot.leaves()) >= 4:
						if (node==curroot and node.nchildren==2) or (node!=curroot and node.nchildren==1):
							node,curroot = remove_kink(node,curroot)
					going = True
					break
	return curroot
def main(inDIR,file_ending,branch_len_cutoff,min_taxa,outDIR,log):
    """cut long branches and output subtrees as .subtre files
    if uncut and nothing changed betwee .tre and .subtree
    copy the original .tre file to the outdir"""
    if inDIR[-1] != "/": inDIR += "/"
    min_taxa = int(min_taxa)
    filecount = 0
    cutoff = float(branch_len_cutoff)
    print("cutting branches longer than",cutoff)
    for i in os.listdir(inDIR):
        if not i.endswith(file_ending): continue
        #print i
        filecount += 1
        with open(inDIR+i,"r") as infile: #only 1 tree in each file
            intree = tree_reader.read_tree_string(infile.readline())
        try:
            with open(inDIR+i[:i.find(".tre")]+".tre","r") as infile: #the original .tre
                raw_tree_size = len(get_front_labels(tree_reader.read_tree_string(infile.readline())))
        except: # did not refine this round. Use the .tre.tt.mm tree
            raw_tree_size = len(get_front_labels(intree))
        num_taxa = len(intree.leaves())
        if num_taxa < min_taxa:
            print("Tree has",num_taxa,"less than", min_taxa,"taxa")
        else:
            #print ".tre:",raw_tree_size,"tips; "+file_ending+": "+str(len(get_front_labels(intree)))+" tips"
            subtrees = cut_long_internal_branches(intree,cutoff,min_taxa)
            if len(subtrees) == 0:
                print("No tree with at least", min_taxa, "taxa")
            else:
                count = 0
                outsizes = ""
                for subtree in subtrees:
                    if len(subtree.leaves()) >= min_taxa:
                        if len(subtree.children) == 2: #fix bifurcating roots from cutting
                            temp,subtree = remove_kink(subtree,subtree)
                        count += 1
                        outname = outDIR+"/"+i.split(".")[0]+"_"+str(count)+".subtree"
                        print(outname)
                        with open(outname,"w") as outfile:
                            outfile.write(subtree.get_newick_repr(True)+";\n")
                        outsizes += str(len(subtree.leaves()))+", "
                print(count,"tree(s) written. Sizes:",outsizes)
def mask_monophyletic_tips(curroot, unamb_chrDICT):
    going = True
    while going and len(curroot.leaves()) >= 4:
        going = False
        for node in curroot.iternodes():  #walk through nodes
            if not node.istip: continue  #only look at tips
            for sister in node.get_sisters():
                if sister.istip and get_name(node.label) == get_name(
                        sister.label):  #masking
                    #print node.label,unamb_chrDICT[node.label],sister.label,unamb_chrDICT[sister.label]
                    if unamb_chrDICT[node.label] > unamb_chrDICT[sister.label]:
                        node = sister.prune()
                    else:
                        node = node.prune()
                    if len(curroot.leaves()) >= 4:
                        if (node == curroot and node.nchildren == 2) or (
                                node != curroot and node.nchildren == 1):
                            node, curroot = remove_kink(node, curroot)
                    going = True
                    break
    return curroot
def mask_paraphyletic_tips(curroot, unamb_chrDICT):
    going = True
    while going and len(curroot.leaves()) >= 4:
        going = False
        for node in curroot.iternodes():  #walk through nodes
            if not node.istip: continue  #only look at tips
            parent = node.parent
            if node == curroot or parent == curroot:
                continue  #no paraphyletic tips for the root
            for para in parent.get_sisters():
                if para.istip and get_name(node.label) == get_name(para.label):
                    if unamb_chrDICT[node.label] > unamb_chrDICT[para.label]:
                        node = para.prune()
                    else:
                        node = node.prune()
                    if len(curroot.leaves()) >= 4:
                        if (node == curroot and node.nchildren == 2) or (
                                node != curroot and node.nchildren == 1):
                            node, curroot = remove_kink(node, curroot)
                    going = True
                    break
    return curroot