def get_two_allele_distribution(N_big, N_small, f0, f1, f_subsample): """ Assumes small genic selection. Assumes small mutation. The mutational bias does not affect the distribution. @param N_big: total number of alleles in the population @param N_small: number of alleles sampled from the population @param f0: fitness of allele 0 @param f1: fitness of allele 1 @param f_subsample: subsampling function @return: distribution over all non-fixed population states """ # construct a transition matrix nstates = N_big + 1 P = np.zeros((nstates, nstates)) for i in range(nstates): p0, p1 = wrightfisher.genic_diallelic(f0, f1, i, N_big - i) if i == 0: P[i, 1] = 1.0 elif i == N_big: P[i, N_big - 1] = 1.0 else: for j in range(nstates): logp = StatsUtil.binomial_log_pmf(j, N_big, p0) P[i, j] = math.exp(logp) # find the stationary distribution v = MatrixUtil.get_stationary_distribution(P) MatrixUtil.assert_distribution(v) if not np.allclose(v, np.dot(v, P)): raise ValueError('expected a left eigenvector with eigenvalue 1') # return the stationary distribution conditional on dimorphism print v distn = f_subsample(v, N_small) return distn[1:-1] / np.sum(distn[1:-1])
def get_transition_matrix_slow(N_diploid, k, mutation, fit): """ Mutation probabilities are away from a fixed state. @param N_diploid: diploid population size @param k: number of alleles e.g. 4 for A,C,G,T @param mutation: k by k matrix of per-generation mutation probabilities @param fit: sequence of k fitness values @return: a transition matrix """ N = N_diploid * 2 states = [tuple(s) for s in gen_states(N, k)] nstates = len(states) s_to_i = dict((s, i) for i, s in enumerate(states)) P = np.zeros((nstates, nstates)) # Add rows corresponding to transitions from population states # for which an allele is currently fixed in the population. for i in range(k): P[i, i] = mutation[i, i] for j in range(k): if i == j: continue state = [0] * k state[i] = N - 1 state[j] = 1 P[i, s_to_i[tuple(state)]] = mutation[i, j] # Add rows corresponding to transitions from polymorphic population states. for i, j in combinations(range(k), 2): for h in range(1, N): state = [0] * k state[i] = h state[j] = N - h index = s_to_i[tuple(state)] # Compute each child probability of having allele j. #pi, pj = wrightfisher.genic_diallelic(fit[i], fit[j], h, N-h) #s = fit[i] - fit[j] s = 1 - fit[j] / fit[i] pi, pj = wrightfisher.genic_diallelic(1.0, 1.0 - s, h, N - h) # Add entries corresponding to fixation of an allele. P[index, i] = math.exp(StatsUtil.binomial_log_pmf(N, N, pi)) P[index, j] = math.exp(StatsUtil.binomial_log_pmf(0, N, pi)) # Add entries corresponding to transitions to polymorphic states. for hsink in range(1, N): sink_state = [0] * k sink_state[i] = hsink sink_state[j] = N - hsink sink_index = s_to_i[tuple(sink_state)] logp = StatsUtil.binomial_log_pmf(hsink, N, pi) P[index, sink_index] = math.exp(logp) return P
def get_transition_matrix_slow(N_diploid, k, mutation, fit): """ Mutation probabilities are away from a fixed state. @param N_diploid: diploid population size @param k: number of alleles e.g. 4 for A,C,G,T @param mutation: k by k matrix of per-generation mutation probabilities @param fit: sequence of k fitness values @return: a transition matrix """ N = N_diploid * 2 states = [tuple(s) for s in gen_states(N,k)] nstates = len(states) s_to_i = dict((s, i) for i, s in enumerate(states)) P = np.zeros((nstates, nstates)) # Add rows corresponding to transitions from population states # for which an allele is currently fixed in the population. for i in range(k): P[i, i] = mutation[i, i] for j in range(k): if i == j: continue state = [0]*k state[i] = N-1 state[j] = 1 P[i, s_to_i[tuple(state)]] = mutation[i, j] # Add rows corresponding to transitions from polymorphic population states. for i, j in combinations(range(k), 2): for h in range(1, N): state = [0]*k state[i] = h state[j] = N-h index = s_to_i[tuple(state)] # Compute each child probability of having allele j. #pi, pj = wrightfisher.genic_diallelic(fit[i], fit[j], h, N-h) #s = fit[i] - fit[j] s = 1 - fit[j] / fit[i] pi, pj = wrightfisher.genic_diallelic(1.0, 1.0 - s, h, N-h) # Add entries corresponding to fixation of an allele. P[index, i] = math.exp(StatsUtil.binomial_log_pmf(N, N, pi)) P[index, j] = math.exp(StatsUtil.binomial_log_pmf(0, N, pi)) # Add entries corresponding to transitions to polymorphic states. for hsink in range(1, N): sink_state = [0]*k sink_state[i] = hsink sink_state[j] = N-hsink sink_index = s_to_i[tuple(sink_state)] logp = StatsUtil.binomial_log_pmf(hsink, N, pi) P[index, sink_index] = math.exp(logp) return P