示例#1
0
def generateCoalescentTrees(choice, num, fout, length):
    	if choice == 1:
		sp_tree_str = """((((((((A:%f,B:%f):%f,C:%f):%f,D:%f):%f,E:%f):%f,F:%f):%f,G:%f):%f,H:%f):%f);""" % (float(length), float(length), float(length),float(length),2*float(length),float(length),3*float(length),float(length),4*float(length),float(length),5*float(length),float(length),6*float(length),float(length),7*float(length))

        	#sp_tree_str = """\
       		# [&R]  ((((((((A,B)%f,C)%f,D)%f,E)%f,F)%f,G)%f,H)%f);
        	#""" % (float(length),float(length),float(length),float(length),float(length),float(length),float(length))
    	elif choice == 2:
        	#sp_tree_str = """\
        	#[&R] (((A,B)%f,(C,D)%f)%f,((E,F)%f,(G,H)%f)%f);  
        	#""" % (float(length),float(length),float(length),float(length),float(length),float(length))
		sp_tree_str = """(((A:%f,B:%f):%f,(C:%f,D:%f):%f):%f,((E:%f,F:%f):%f,(G:%f,H:%f):%f):%f);""" % (float(length), float(length), float(length), float(length), float(length), 2*float(length),4*float(length),float(length), float(length),2*float(length),float(length), float(length), float(length),4*float(length)) 
    #print(sp_tree_str)
 	sp_tree = dendropy.Tree.get_from_string(sp_tree_str, "newick")
    	gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping(
        	containing_taxon_namespace=sp_tree.taxon_namespace,
        	num_contained=1)
    	gene_tree_list = TreeList()

    	for i in range(num):
        	gene_tree = dendropy.simulate.treesim.contained_coalescent_tree(containing_tree=sp_tree,
        	gene_to_containing_taxon_map=gene_to_species_map)
        	dendropy.simulate.treesim.contained_coalescent_tree(containing_tree=sp_tree,
                                      gene_to_containing_taxon_map=gene_to_species_map)
        	for t in gene_tree.leaf_nodes():
            		t.taxon.label = t.taxon.label.split( )[0]
        	gene_tree_list.append(gene_tree)

   	gene_tree_list.write_to_path(fout, 'newick')
示例#2
0
def generateCoalescentTrees(choice, num, fout, length):
    if choice == 1:
        sp_tree_str = """\
        [&R]  ((((((((A,B)%d,C)%d,D)%d,E)%d,F)%d,G)%d,H)%d);
        """ % (float(length),float(length),float(length),float(length),float(length),float(length),float(length))
    elif choice == 2:
        sp_tree_str = """\
        [&R]  ((((((((A,B)%d,C)%d,D)%d,E)%d,F)%d,G)%d,H)%d);
        """ % (float(length),float(length),float(length),float(length),float(length),float(length),float(length))

    sp_tree = dendropy.Tree.get_from_string(sp_tree_str, "newick")
    gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping(
        containing_taxon_namespace=sp_tree.taxon_namespace,
        num_contained=1)
    gene_tree_list = TreeList()

    for i in range(num):
        gene_tree = treesim.contained_coalescent_tree(containing_tree=sp_tree,
        gene_to_containing_taxon_map=gene_to_species_map)
        treesim.contained_coalescent_tree(containing_tree=sp_tree,
                                      gene_to_containing_taxon_map=gene_to_species_map)
        for t in gene_tree.leaf_nodes():
            t.taxon.label = t.taxon.label.split( )[0]
        gene_tree_list.append(gene_tree)

    gene_tree_list.write_to_path(fout, 'newick')
                redundant_count += 1
                break
        else:
            tree_list.append(tree)
    return tree_list, redundant_count


if __name__ == '__main__':
    #inputs#
    mle_tree = raw_input("File with Maximum Likelihood tree: ")
    mcmc_trees = raw_input("File with MCMC trees: ")
    burnin = int(raw_input("Burnin: "))
    outfile = raw_input("Name of outfile: ")

    uts = []  #list of unique topologies
    taxa = dendropy.TaxonSet()  #initialize TaxonSet object
    mle_tree = dendropy.Tree.get_from_path(mle_tree, 'nexus', taxon_set=taxa)
    uts.append(mle_tree)  #MLE tree is the first topology in unique list

    uts, redundant_count = unique_trees(uts,
                                        mcmc_trees,
                                        'nexus',
                                        burnin,
                                        taxonset=taxa)
    print "\nNumber of redundant trees: %d" % redundant_count
    print "Number of unique trees: %d\n" % len(uts)
    unique_tree_list = TreeList(uts)
    unique_tree_list.write_to_path(outfile,
                                   'newick',
                                   suppress_edge_lengths=True)
示例#4
0
fin = sys.argv[1]
num = int(sys.argv[2])
fout = sys.argv[3]

f = open(fin, "r")

sp_tree_str = ""
for l in f:
    sp_tree_str += l
f.close()

sp_tree_str = "[&R] " + sp_tree_str

sp_tree = dendropy.Tree.get_from_string(sp_tree_str,
                                        "newick",
                                        preserve_underscores=True)
gene_to_species_map = dendropy.TaxonNamespaceMapping.create_contained_taxon_mapping(
    containing_taxon_namespace=sp_tree.taxon_namespace, num_contained=1)
gene_tree_list = TreeList()

for i in range(num):
    gene_tree = treesim.contained_coalescent_tree(
        containing_tree=sp_tree,
        gene_to_containing_taxon_map=gene_to_species_map)
    for t in gene_tree.leaf_nodes():
        t.taxon.label = t.taxon.label.split()[0]
    gene_tree_list.append(gene_tree)

gene_tree_list.write_to_path(fout, 'newick')
    	for ut in tree_list:
    	    sd = treecalc.symmetric_difference(tree,ut)
            #print sd ## error check
            if sd == 0:
            	redundant_count +=1
                break
        else:
            tree_list.append(tree)
    return tree_list, redundant_count


if __name__ == '__main__':
    #inputs#
    mle_tree = raw_input("File with Maximum Likelihood tree: ")
    mcmc_trees = raw_input("File with MCMC trees: ")
    burnin = int(raw_input("Burnin: "))
    outfile = raw_input("Name of outfile: ")
    
    uts = [] #list of unique topologies
    taxa = dendropy.TaxonSet() #initialize TaxonSet object
    mle_tree = dendropy.Tree.get_from_path(mle_tree, 'nexus', taxon_set=taxa)
    uts.append(mle_tree) #MLE tree is the first topology in unique list
    
    uts, redundant_count = unique_trees(uts,mcmc_trees,'nexus',burnin,taxonset=taxa)
    print "\nNumber of redundant trees: %d" % redundant_count
    print "Number of unique trees: %d\n" % len(uts)
    unique_tree_list = TreeList(uts)
    unique_tree_list.write_to_path(outfile,'newick',suppress_edge_lengths=True)
	
    	    
示例#6
0
from dendropy import TreeList, TaxonNamespace
from dendropy.simulate import treesim
import os

parser = ArgumentParser('Generate trees of a given size with different algos')
parser.add_argument('-n', type=int, help='Tree size', default=100)
parser.add_argument('-d', type=str, help='Output directory')
args = parser.parse_args()

if not os.path.isdir(args.d):
    os.mkdir(args.d)
os.chdir(args.d)
bd2 = TreeList([
    treesim.birth_death_tree(birth_rate=1.0,
                             death_rate=0.5,
                             num_extant_tips=args.n,
                             repeat_until_success=True) for _ in range(100)
])
bd2.write_to_path('birth_death2.nwk', schema='newick')
bd5 = TreeList([
    treesim.birth_death_tree(birth_rate=1.0,
                             death_rate=0.2,
                             num_extant_tips=args.n,
                             repeat_until_success=True) for _ in range(100)
])
bd5.write_to_path('birth_death5.nwk', schema='newick')
taxa = TaxonNamespace(['T{}'.format(x) for x in range(1, args.n + 1)])
king = TreeList(
    [treesim.pure_kingman_tree(taxon_namespace=taxa) for _ in range(100)])
king.write_to_path('kingman.nwk', schema='newick')