def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ Compute expected hitting times. Theta is 4*N*mu, and the units of time are 4*N*mu generations. @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid) ** 2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stationary distribution v = MatrixUtil.get_stationary_distribution(P) # compute the transition matrix limit at time infinity # P_inf = np.outer(np.ones_like(v), v) # compute the fundamental matrix Z # Z = linalg.inv(np.eye(nstates) - (P - P_inf)) - P_inf # # Use broadcasting instead of constructing P_inf. Z = linalg.inv(np.eye(nstates) - (P - v)) - v # compute the hitting time from state AB to state ab. i = 0 j = 3 hitting_time_generations = (Z[j, j] - Z[i, j]) / v[j] hitting_time = hitting_time_generations * theta row.append(hitting_time) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ Compute expected hitting times. Theta is 4*N*mu, and the units of time are 4*N*mu generations. @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stationary distribution v = MatrixUtil.get_stationary_distribution(P) # compute the transition matrix limit at time infinity #P_inf = np.outer(np.ones_like(v), v) # compute the fundamental matrix Z #Z = linalg.inv(np.eye(nstates) - (P - P_inf)) - P_inf # # Use broadcasting instead of constructing P_inf. Z = linalg.inv(np.eye(nstates) - (P - v)) - v # compute the hitting time from state AB to state ab. i = 0 j = 3 hitting_time_generations = (Z[j, j] - Z[i, j]) / v[j] hitting_time = hitting_time_generations * theta row.append(hitting_time) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on not looping np.fill_diagonal(H, 0) v = np.sum(H, axis=1) H /= v[:, np.newaxis] # let ab be an absorbing state # and compute the expected number of returns to AB Q = H[:3, :3] I = np.eye(3) N = linalg.inv(I - Q) # row.append(N[0, 0] - 1) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid) ** 2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on not looping np.fill_diagonal(H, 0) v = np.sum(H, axis=1) H /= v[:, np.newaxis] # let ab be an absorbing state # and compute the expected number of returns to AB Q = H[:3, :3] I = np.eye(3) N = linalg.inv(I - Q) # row.append(N[0, 0] - 1) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # compute a conditional transition probability i = 0 j = 3 row.append(H[i, j] / (1-H[i,i])) arr.append(row) return arr
def get_p2(N_diploid, theta, Nr, Ns): """ Compute the probability of compensatory substitution pathway type 2. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: p_t2 """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0, 0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2 * p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 # return p_t2
def get_p2(N_diploid, theta, Nr, Ns): """ Compute the probability of compensatory substitution pathway type 2. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: p_t2 """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0,0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2*p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 # return p_t2
def get_backward_info(N_diploid, theta, Nr, Ns): """ Compute expectations and variances for the two substitution pathways. Here backward is somewhat of a misnomer; it is meant as a contrast to forward simulation. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: (t1, v1), (t2, v2) """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) return (t1, v1), (t2, v2)
def get_backward_info(N_diploid, theta, Nr, Ns): """ Compute expectations and variances for the two substitution pathways. Here backward is somewhat of a misnomer; it is meant as a contrast to forward simulation. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: (t1, v1), (t2, v2) """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) return (t1, v1), (t2, v2)
def get_plot_array(N_diploid, theta, Nr_values, Ns_values): """ @param N_diploid: diploid population size @param theta: mutation rate @param Nr_values: recombination rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # define the haplotypes AB, Ab, aB, ab = 0, 1, 2, 3 # initialize the state space N_hap = 2 * N_diploid k = 4 M = multinomstate.get_sorted_states(N_hap, k) nstates = M.shape[0] # compute the inverse map T = multinomstate.get_inverse_map(M) # lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) # arr = [] for Nr in Nr_values: # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # precompute the product of mutation and recombination. MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) # """ # What is the distribution over next fixed states # from the current state? # This question can be answered # by hacking with transience and absorption. Q = P[:-k, :-k] R = P[:-k, -k:] B = linalg.solve(np.eye(nstates-k) - Q, R) # At this point B is the matrix whose nstates-k rows give # distributions over the k fixed states. # Next construct the transition matrix that is conditional # upon first hitting the ab fixed state. w = np.zeros(nstates) w[:-k] = R[:, -1] w[-k:] = np.array([0, 0, 0, 1]) P_t2 = P * w # normalize after scaling by the weights v = P_t2.sum(axis=1) P_t2 /= v[:, np.newaxis] # Get the hitting time from state AB to state ab. # Because of the conditioning, this should be the same # as the expected time to reach state ab given that state ab # is the first reached fixed state. # Note that this means that the first step is away from AB. # Or actually we can just use expected time to absorption. Q = P_t2[:-1, :-1] c = np.ones(nstates-1) t = linalg.lstsq(np.eye(nstates-1) - Q, c) t2 = t[-4] # Now do type 1 events. w = np.zeros(nstates) w[:-k] = 1 - R[:, 0] w[-k:] = np.array([0, 0, 0, 1]) P_t2 = P * w # """ # Get the probability of type 2. # This uses the stochastic complement. # Wait this is wrong. # This is the probability of a direct transition. X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0,0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2*p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 """ expectation_of_variance = p_t2*v2 + p_t1*v1 variance_of_expectation = p_t2*p_t1*(t1 - t2)*(t1 - t2) pooled_variance = ( expectation_of_variance + variance_of_expectation) """ # # Just do a simulation, # assuming that the wait times are normally distributed. nsamples = 500 n1 = np.random.binomial(nsamples, p_t1) n2 = nsamples - n1 X1 = np.random.normal(t1, math.sqrt(v1), n1) X2 = np.random.normal(t2, math.sqrt(v2), n2) X_pooled = np.hstack((X1, X2)) x = np.mean(X1) - np.mean(X2) s_pooled = math.sqrt(np.var(X_pooled) / nsamples) t_statistic = x / s_pooled row.append(t_statistic) # #x = (t1 - t2) / math.sqrt(variance / 200.0) #x = (t1 - t2) / math.sqrt((v1 + v2) / 200.0) #x = (t1 - t2) / math.sqrt(pooled_variance) #x = (t1 - t2) #row.append(math.log(t1) - math.log(t2)) #row.append(x) #row.append(v2) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on a transition change #np.fill_diagonal(H, 0) #H = (H.T / np.sum(H, axis=1)).T # #v = MatrixUtil.get_stationary_distribution(H) #print 'reversibility check (0 if reversible):' #print H/v - H.T/v #print #H_rev = (H*v).T/v #if not np.allclose( #[v[i]*H[i,j]-v[j]*H[j,i] for i in range(k) for j in range(k)], #np.zeros(k)): #raise ValueError('not reversible') # break the last state into two states AB, Ab, aB, ab, abx = 0, 1, 2, 3, 4 J = np.zeros((k + 1, k + 1)) J[:k, :k] = H # force ab and abx to be absorbing states J[ab] = np.zeros(k + 1) J[abx] = np.zeros(k + 1) J[ab, ab] = 1 J[abx, abx] = 1 # connect AB to the new ab state J[AB, abx] = J[AB, ab] J[AB, ab] = 0 # Now transition matrix J is in "canonical form" # because all of the absorbing states are at the end. B = linalg.solve( np.eye(k - 1) - J[:k - 1, :k - 1], J[:k - 1, k - 1:]) # compute the absorbing state distribution row.append(B[0, 1]) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on a transition change #np.fill_diagonal(H, 0) #H = (H.T / np.sum(H, axis=1)).T # #v = MatrixUtil.get_stationary_distribution(H) #print 'reversibility check (0 if reversible):' #print H/v - H.T/v #print #H_rev = (H*v).T/v #if not np.allclose( #[v[i]*H[i,j]-v[j]*H[j,i] for i in range(k) for j in range(k)], #np.zeros(k)): #raise ValueError('not reversible') # break the last state into two states AB, Ab, aB, ab, abx = 0, 1, 2, 3, 4 J = np.zeros((k+1, k+1)) J[:k, :k] = H # force ab and abx to be absorbing states J[ab] = np.zeros(k+1) J[abx] = np.zeros(k+1) J[ab, ab] = 1 J[abx, abx] = 1 # connect AB to the new ab state J[AB, abx] = J[AB, ab] J[AB, ab] = 0 # Now transition matrix J is in "canonical form" # because all of the absorbing states are at the end. B = linalg.solve(np.eye(k-1) - J[:k-1, :k-1], J[:k-1, k-1:]) # compute the absorbing state distribution row.append(B[0, 1]) arr.append(row) return arr