def sample_dlcoal(stree, n, duprate, lossrate, namefunc=lambda x: x, remove_single=True, name_internal="n", minsize=0): """Sample a gene tree from the DLCoal model""" # generate the locus tree while True: locus_tree, locus_recon, locus_events = \ birthdeath.sample_birth_death_gene_tree( stree, duprate, lossrate) if len(locus_tree.leaves()) >= minsize: break if len(locus_tree.nodes) <= 1: # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # choose daughter duplications daughters = set() for node in locus_tree: if locus_events[node] == "dup": daughters.add(node.children[random.randint(0, 1)]) coal_tree, coal_recon = sample_multicoal_tree(locus_tree, n, daughters=daughters, namefunc=namefunc) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: rename_nodes(coal_tree, name_internal) rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_recon, "locus_events": locus_events, "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra
def sample_dlcoal(stree, n, duprate, lossrate, leaf_counts=None, namefunc=lambda x: x, remove_single=True, name_internal="n", minsize=0, reject=False): """Sample a gene tree from the DLCoal model""" # generate the locus tree while True: # TODO: does this take a namefunc? locus_tree, locus_recon, locus_events = \ birthdeath.sample_birth_death_gene_tree( stree, duprate, lossrate) if len(locus_tree.leaves()) >= minsize: break # if n is a dict, update it with gene names from locus tree if isinstance(n, dict): n2 = {} for node, snode in locus_recon.iteritems(): n2[node.name] = n[snode.name] else: n2 = n # if leaf_counts is a dict, update it with gene names from locus tree # TODO: how to handle copy number polymorphism? if isinstance(leaf_counts, dict): leaf_counts2 = {} for node in locus_tree.leaves(): snode = locus_recon[node] leaf_counts2[node.name] = leaf_counts[snode.name] else: leaf_counts2 = leaf_counts if len(locus_tree.nodes) <= 1: # total extinction coal_tree = treelib.Tree() coal_tree.make_root() coal_recon = {coal_tree.root: locus_tree.root} daughters = set() else: # simulate coalescence # choose daughter duplications daughters = set() for node in locus_tree: if locus_events[node] == "dup": daughters.add(node.children[random.randint(0, 1)]) if reject: # use slow rejection sampling (for testing) coal_tree, coal_recon = sample_multilocus_tree_reject( locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc) else: coal_tree, coal_recon = sample_multilocus_tree( locus_tree, n2, leaf_counts=leaf_counts2, daughters=daughters, namefunc=namefunc) # clean up coal tree if remove_single: treelib.remove_single_children(coal_tree) phylo.subset_recon(coal_tree, coal_recon) if name_internal: dlcoal.rename_nodes(coal_tree, name_internal) dlcoal.rename_nodes(locus_tree, name_internal) # store extra information extra = {"locus_tree": locus_tree, "locus_recon": locus_recon, "locus_events": locus_events, "coal_tree": coal_tree, "coal_recon": coal_recon, "daughters": daughters} return coal_tree, extra