def smc_apply_spr(tree, spr): """ Apply an SPR operation to a local tree. """ recomb = tree[spr["recomb_node"]] coal = tree[spr["coal_node"]] broken = recomb.parent broken_dist = broken.dist assert broken is not None # remove recomb subtree from its parent broken.children.remove(recomb) # adjust recoal if coal branch and broken are the same if coal == broken: coal = broken.children[0] # remove broken from tree broken_child = broken.children[0] broken_child.parent = broken.parent if broken.parent: util.replace(broken.parent.children, broken, broken_child) broken_child.dist += broken_dist # reuse broken node as new coal node new_node = broken new_node.data["age"] = spr["coal_time"] new_node.children = [recomb, coal] new_node.parent = coal.parent if new_node.parent: new_node.dist = new_node.parent.data["age"] - new_node.data["age"] else: new_node.dist = 0.0 recomb.parent = new_node recomb.dist = new_node.data["age"] - recomb.data["age"] coal.parent = new_node coal.dist = new_node.data["age"] - coal.data["age"] if new_node.parent: if coal in new_node.parent.children: util.replace(new_node.parent.children, coal, new_node) else: assert new_node in new_node.parent.children # change root while tree.root.parent is not None: tree.root = tree.root.parent
def sample_dsmc_sprs(k, popsize, rho, recombmap=None, start=0.0, end=0.0, times=None, times2=None, init_tree=None, names=None, make_names=True): """ Sample ARG using Discrete Sequentially Markovian Coalescent (SMC) k -- chromosomes popsize -- effective population size (haploid) rho -- recombination rate (recombinations / site / generation) recombmap -- map for variable recombination rate start -- staring chromosome coordinate end -- ending chromsome coordinate t -- initial time (default: 0) names -- names to use for leaves (default: None) make_names -- make names using strings (default: True) """ assert times is not None assert times2 is not None ntimes = len(times) - 1 time_steps = [times[i] - times[i - 1] for i in range(1, ntimes + 1)] # times2 = get_coal_times(times) if hasattr(popsize, "__len__"): popsizes = popsize else: popsizes = [popsize] * len(time_steps) # yield initial tree first if init_tree is None: init_tree = sample_tree(k, popsizes, times, start=start, end=end, names=names, make_names=make_names) argweaver.discretize_arg(init_tree, times2) yield init_tree # sample SPRs pos = start tree = init_tree.copy() while True: # sample next recomb point treelen = sum(x.get_dist() for x in tree) blocklen = int( sample_next_recomb(treelen, rho, pos=pos, recombmap=recombmap, minlen=1)) pos += blocklen if pos >= end - 1: break root_age_index = times.index(tree.root.age) # choose time interval for recombination states = set(argweaver.iter_coal_states(tree, times)) nbranches, nrecombs, ncoals = argweaver.get_nlineages_recomb_coal( tree, times) probs = [ nbranches[i] * time_steps[i] for i in range(root_age_index + 1) ] recomb_time_index = stats.sample(probs) recomb_time = times[recomb_time_index] # choose branch for recombination branches = [ x for x in states if x[1] == recomb_time_index and x[0] != tree.root.name ] recomb_node = tree[random.sample(branches, 1)[0][0]] # choose coal time j = recomb_time_index last_kj = nbranches[max(j - 1, 0)] while j < ntimes - 1: kj = nbranches[j] if ((recomb_node.name, j) in states and recomb_node.parents[0].age > times[j]): kj -= 1 assert kj > 0, (j, root_age_index, states) A = (times2[2 * j + 1] - times2[2 * j]) * kj if j > recomb_time_index: A += (times2[2 * j] - times2[2 * j - 1]) * last_kj coal_prob = 1.0 - exp(-A / float(popsizes[j])) if random.random() < coal_prob: break j += 1 last_kj = kj coal_time_index = j coal_time = times[j] # choose coal node # since coal points collapse, exclude parent node, but allow sibling exclude = [] def walk(node): exclude.append(node.name) if node.age == coal_time: for child in node.children: walk(child) walk(recomb_node) exclude2 = (recomb_node.parents[0].name, times.index(recomb_node.parents[0].age)) branches = [ x for x in states if x[1] == coal_time_index and x[0] not in exclude and x != exclude2 ] coal_node = tree[random.sample(branches, 1)[0][0]] # yield SPR rleaves = list(tree.leaf_names(recomb_node)) cleaves = list(tree.leaf_names(coal_node)) yield pos, (rleaves, recomb_time), (cleaves, coal_time) # apply SPR to local tree broken = recomb_node.parents[0] recoal = tree.new_node(age=coal_time, children=[recomb_node, coal_node]) # add recoal node to tree recomb_node.parents[0] = recoal broken.children.remove(recomb_node) if coal_node.parents: recoal.parents.append(coal_node.parents[0]) util.replace(coal_node.parents[0].children, coal_node, recoal) coal_node.parents[0] = recoal else: coal_node.parents.append(recoal) # remove broken node broken_child = broken.children[0] if broken.parents: broken_child.parents[0] = broken.parents[0] util.replace(broken.parents[0].children, broken, broken_child) else: broken_child.parents.remove(broken) del tree.nodes[broken.name] tree.set_root()
def sample_dsmc_sprs( k, popsize, rho, recombmap=None, start=0.0, end=0.0, times=None, times2=None, init_tree=None, names=None, make_names=True): """ Sample ARG using Discrete Sequentially Markovian Coalescent (SMC) k -- chromosomes popsize -- effective population size (haploid) rho -- recombination rate (recombinations / site / generation) recombmap -- map for variable recombination rate start -- staring chromosome coordinate end -- ending chromsome coordinate t -- initial time (default: 0) names -- names to use for leaves (default: None) make_names -- make names using strings (default: True) """ assert times is not None assert times2 is not None ntimes = len(times) - 1 time_steps = [times[i] - times[i-1] for i in range(1, ntimes+1)] # times2 = get_coal_times(times) if hasattr(popsize, "__len__"): popsizes = popsize else: popsizes = [popsize] * len(time_steps) # yield initial tree first if init_tree is None: init_tree = sample_tree(k, popsizes, times, start=start, end=end, names=names, make_names=make_names) argweaver.discretize_arg(init_tree, times2) yield init_tree # sample SPRs pos = start tree = init_tree.copy() while True: # sample next recomb point treelen = sum(x.get_dist() for x in tree) blocklen = int(sample_next_recomb(treelen, rho, pos=pos, recombmap=recombmap, minlen=1)) pos += blocklen if pos >= end - 1: break root_age_index = times.index(tree.root.age) # choose time interval for recombination states = set(argweaver.iter_coal_states(tree, times)) nbranches, nrecombs, ncoals = argweaver.get_nlineages_recomb_coal( tree, times) probs = [nbranches[i] * time_steps[i] for i in range(root_age_index+1)] recomb_time_index = stats.sample(probs) recomb_time = times[recomb_time_index] # choose branch for recombination branches = [x for x in states if x[1] == recomb_time_index and x[0] != tree.root.name] recomb_node = tree[random.sample(branches, 1)[0][0]] # choose coal time j = recomb_time_index last_kj = nbranches[max(j-1, 0)] while j < ntimes - 1: kj = nbranches[j] if ((recomb_node.name, j) in states and recomb_node.parents[0].age > times[j]): kj -= 1 assert kj > 0, (j, root_age_index, states) A = (times2[2*j+1] - times2[2*j]) * kj if j > recomb_time_index: A += (times2[2*j] - times2[2*j-1]) * last_kj coal_prob = 1.0 - exp(-A/float(popsizes[j])) if random.random() < coal_prob: break j += 1 last_kj = kj coal_time_index = j coal_time = times[j] # choose coal node # since coal points collapse, exclude parent node, but allow sibling exclude = [] def walk(node): exclude.append(node.name) if node.age == coal_time: for child in node.children: walk(child) walk(recomb_node) exclude2 = (recomb_node.parents[0].name, times.index(recomb_node.parents[0].age)) branches = [x for x in states if x[1] == coal_time_index and x[0] not in exclude and x != exclude2] coal_node = tree[random.sample(branches, 1)[0][0]] # yield SPR rleaves = list(tree.leaf_names(recomb_node)) cleaves = list(tree.leaf_names(coal_node)) yield pos, (rleaves, recomb_time), (cleaves, coal_time) # apply SPR to local tree broken = recomb_node.parents[0] recoal = tree.new_node(age=coal_time, children=[recomb_node, coal_node]) # add recoal node to tree recomb_node.parents[0] = recoal broken.children.remove(recomb_node) if coal_node.parents: recoal.parents.append(coal_node.parents[0]) util.replace(coal_node.parents[0].children, coal_node, recoal) coal_node.parents[0] = recoal else: coal_node.parents.append(recoal) # remove broken node broken_child = broken.children[0] if broken.parents: broken_child.parents[0] = broken.parents[0] util.replace(broken.parents[0].children, broken, broken_child) else: broken_child.parents.remove(broken) del tree.nodes[broken.name] tree.set_root()