def testGSA(self): """test that the pure-birth process produces the correct number of tips with GSA.""" _RNG = RepeatedRandom() for num_leaves in range(2, 20): t = treesim.birth_death(birth_rate=1.0, death_rate=0.0, ntax=num_leaves, gsa_ntax=4*num_leaves, rng=_RNG) self.assertTrue(t._debug_tree_is_valid()) self.assertEquals(num_leaves, len(t.leaf_nodes()))
def testBDTree(self): """PureCoalescentTreeTest -- tree generation without checking [TODO: checks]""" _RNG = RepeatedRandom() for num_leaves in range(2, 20): t = treesim.birth_death(birth_rate=1.0, death_rate=0.2, ntax=num_leaves, rng=_RNG) self.assertTrue(t._debug_tree_is_valid()) self.assertEquals(num_leaves, len(t.leaf_nodes()))
def getTree(self, size, birthParam): tree = treesim.birth_death(birth_rate=birthParam, death_rate=0, taxon_set=dendropy.TaxonSet(self.fullTaxonSet[0:size])) #tree.deroot() #print(tree) #randomize slightly #self.rescaleTree(tree,1.0) return tree
def main(args=None): for param in [ 'birth_rate', 'death_rate', 'birth_rate_sd', 'death_rate_sd' ]: param = '--' + param args[param] = float(args[param]) # loading taxon list if args['<genome_list>'] is not None: taxa = Utils.parseGenomeList(args['<genome_list>'], check_exists=False) taxa = [x[0] for x in taxa] elif args['<comm_file>'] is not None: comm = CommTable.from_csv(args['<comm_file>'], sep='\t') taxa = comm.get_unique_taxon_names() # init dendropy taxon namespace taxa = dendropy.TaxonNamespace(taxa, label='taxa') # simulating tree if args['--star']: tree = star_tree(taxon_set=taxa) else: tree = birth_death(args['--birth_rate'], args['--death_rate'], birth_rate_sd=args['--birth_rate_sd'], death_rate_sd=args['--death_rate_sd'], num_extant_tips=len(taxa)) # writing tree outfmt = args['--outfmt'].lower() psbl_fmts = ['newick', 'nexus'] assert outfmt in psbl_fmts, 'output file format not recognized.' +\ ' Possible formats: {}'.format(', '.join(psbl_fmts)) tree.write_to_stream(sys.stdout, outfmt)
def testGSABD(self): """test that the birth-death process produces the correct number of tips with GSA.""" _RNG = RepeatedRandom() for num_leaves in range(2, 15): _LOG.debug("Generating tree with %d leaves" % num_leaves) t = treesim.birth_death(birth_rate=1.0, death_rate=0.2, ntax=num_leaves, gsa_ntax=3*num_leaves, rng=_RNG) self.assertTrue(t._debug_tree_is_valid()) self.assertEquals(num_leaves, len(t.leaf_nodes()))
def testYule(self): """test that the pure-birth process produces the correct number of tips.""" _RNG = RepeatedRandom() for num_leaves in range(2, 20): t = treesim.birth_death(birth_rate=1.0, death_rate=0.0, ntax=num_leaves, rng=_RNG) self.assertTrue(t._debug_tree_is_valid()) self.assertEquals(num_leaves, len(t.leaf_nodes()))
def get_random_tree(branch_lengths_function): # note: the branch lengths function is unused in this tree generation method # simt = treesim.birth_death(birth_rate=birth_rate, death_rate=death_rate, ntax=n_tips_per_tree) # return simt.as_newick_string() simt = treesim.birth_death(birth_rate=birth_rate, death_rate=death_rate, ntax=n_tips_per_tree) x = simt.as_string('newick', suppress_rooting=True).strip() # print(x) return x
def generate(mean, sd, num_periods): tree = dendropy.Tree() for i in range(num_periods): tree = treesim.birth_death(birth_rate=random.gauss(mean, sd), death_rate=random.gauss(mean, sd), max_time=random.randint(1, 5), tree=tree, assign_taxa=False, repeat_until_success=True) tree.randomly_assign_taxa(create_required_taxa=True) return tree
def testGSABD(self): """test that the birth-death process produces the correct number of tips with GSA.""" _RNG = RepeatedRandom() for num_leaves in range(2, 15): _LOG.debug("Generating tree with %d leaves" % num_leaves) t = treesim.birth_death(birth_rate=1.0, death_rate=0.2, ntax=num_leaves, gsa_ntax=3 * num_leaves, rng=_RNG) self.assertTrue(t._debug_tree_is_valid()) self.assertEquals(num_leaves, len(t.leaf_nodes()))
def generate(mean, sd, num_periods): tree = dendropy.Tree() for i in range(num_periods): tree = treesim.birth_death( birth_rate=random.gauss(mean, sd), death_rate=random.gauss(mean, sd), max_time=random.randint(1, 5), tree=tree, assign_taxa=False, repeat_until_success=True, ) tree.randomly_assign_taxa(create_required_taxa=True) return tree
def generate(birth_rates, death_rates): assert len(birth_rates) == len(death_rates) tree = dendropy.Tree() for i, br in enumerate(birth_rates): tree = treesim.birth_death(birth_rates[i], death_rates[i], max_time=random.randint(1, 8), tree=tree, assign_taxa=False, repeat_until_success=True) print(tree.as_string('newick')) tree.randomly_assign_taxa(create_required_taxa=True) return tree
def generate(birth_rates, death_rates): assert len(birth_rates) == len(death_rates) tree = dendropy.Tree() for i, br in enumerate(birth_rates): tree = treesim.birth_death(birth_rates[i], death_rates[i], max_time=random.randint(1,8), tree=tree, assign_taxa=False, repeat_until_success=True) print(tree.as_string('newick')) tree.randomly_assign_taxa(create_required_taxa=True) return tree
#print(mid_random.as_newick_string()) mid_random.add_child(t2.seed_node, edge_length=l / 2) t2.seed_node.taxon = Taxon(label=childname) #print(mid_random.as_newick_string()) # 0 1 2 3 4 5 6 7 nodes = ["25", "70", "27", "181", "58", "105", "103", "82"] levels = [.8, .25, .3, .15, .13, .1, 0.05, .10] trs = [ treesim.birth_death(birth_rate=100.0 / l, death_rate=100.0 / l, birth_rate_sd=50, death_rate_sd=50, taxon_set=dendropy.TaxonSet([ n + "-" + str(y) for y in xrange(1, int(math.sqrt(700 * l))) ]), max_time=0.1) for (n, l) in zip(nodes, levels) ] print("simulated") graft(trs[5], [trs[6]], [nodes[6]]) graft(trs[4], [trs[5]], [nodes[5]]) graft(trs[2], [trs[3], trs[4]], [nodes[3], nodes[4]]) graft(trs[1], [trs[7]], [nodes[7]]) graft(trs[0], [trs[2], trs[1]], [nodes[2], nodes[1]]) tree = trs[0]
def do_sim(birth_rate , death_rate, num_leaves, rng=None): temp_dir = tempfile.mkdtemp() model_tree = treesim.birth_death(birth_rate=birth_rate, death_rate=death_rate, ntax=num_leaves, rng=rng) ################################################################################ # Calling seq-gen mtf = os.path.join(temp_dir, 'simtree') print "temp_dir =", temp_dir treefile_obj = open(mtf, 'w') treefile_obj.write("%s;\n" % str(model_tree)) # CLOSING THE FILE IS IMPORTANT! This flushes buffers, assuring that the data # will be written to the filesystem before seq-gen is invoked. treefile_obj.close() import subprocess command_line = ['seq-gen', '-mHKY', '-on', ] if os.environ.get('TREE_INF_TEST_RAND_NUMBER_SEED'): sg_seed = seed else: if rng is None: sg_seed = random.randint(0,100000) else: sg_seed = rng.randint(0,100000) command_line.append('-z%d' % sg_seed) command_line.append('simtree') seq_gen_proc = subprocess.Popen(command_line, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=temp_dir) dataset = seq_gen_proc.communicate()[0] # seq-gen does not exit with an error code when it fails. I don't know why!! if seq_gen_proc.returncode != 0 or len(dataset) == 0: sys.exit('seq-gen failed!\n') sd = os.path.join(temp_dir, 'simdata.nex') d = open(sd, 'w') d.write(dataset) # CLOSING THE FILE IS IMPORTANT! This flushes buffers, assuring that the data # will be written to the filesystem before PAUP is invoked. d.close() ################################################################################ # PAUP pcf = os.path.join(temp_dir, 'execute_paup.nex') pc = open(pcf, 'w') pc.write('''execute simdata.nex ; hsearch nomultrees ; savetree file=inferred.tre format = NEXUS; quit; ''') pc.close() paup_proc = subprocess.Popen(['paup', '-n', pcf], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=temp_dir) (o, e) = paup_proc.communicate() paup_output = os.path.join(temp_dir, 'inferred.tre') # seq-gen does not exit with an error code when it fails. I don't know why!! if paup_proc.returncode != 0 or not os.path.exists(paup_output): sys.exit(e) # read true tree with the inferred tree (because it is nexus) inf_tree_list = TreeList.get_from_path(paup_output, "NEXUS", taxon_set=model_tree.taxon_set) assert len(inf_tree_list) == 1 inferred_tree = inf_tree_list[0] # determine which splits were missed treesplit.encode_splits(inferred_tree) treesplit.encode_splits(model_tree) missing = model_tree.find_missing_splits(inferred_tree) # sort the nodes of the true tree by depth and ask whether or not they were recovered node_depth_TF_list = [] for node in model_tree.postorder_node_iter(): children = node.child_nodes() if children and node.parent_node: first_child = children[0] node.depth = first_child.depth + first_child.edge.length if node.edge.split_bitmask in missing: recovered = 0 else: recovered = 1 node_depth_TF_list.append((node.depth, node.edge.length, recovered)) else: node.depth = 0.0 node_depth_TF_list.sort() os.remove(pcf) os.remove(paup_output) os.remove(sd) os.remove(mtf) os.rmdir(temp_dir) return node_depth_TF_list
birth_rate = float(sys.argv[2]) assert birth_rate > 0.0 death_rate = float(sys.argv[3]) assert birth_rate >= 0.0 rng = random.Random() if os.environ.get('TREE_INF_TEST_RAND_NUMBER_SEED'): seed = int(os.environ.get('TREE_INF_TEST_RAND_NUMBER_SEED')) else: import time seed = time.time() rng.seed(seed) t = treesim.birth_death(birth_rate=birth_rate, death_rate=death_rate, ntax=num_leaves, rng=rng) treefile_obj = open('simtree', 'w') treefile_obj.write("%s;\n" % str(t)) # CLOSING THE FILE IS IMPORTANT! This flushes buffers, assuring that the data # will be written to the filesystem before seq-gen is invoked. treefile_obj.close() import subprocess command_line = ['seq-gen', '-mHKY', '-on', ] if os.environ.get('TREE_INF_TEST_RAND_NUMBER_SEED'):
def generate_trees_bdN(birth=0.57721,death=0.130357,N=49,rang=314): from dendropy import TaxonSet, TreeList, treesim taxa = TaxonSet() trees = TreeList() trees=[treesim.birth_death(birth, death, ntax=N, taxon_set=taxa, repeat_until_success=True) for x in range(rang)] return trees