def main(args): alpha = args.alpha N = args.N k = 3 print 'alpha:', alpha print 'N:', N print 'k:', k print M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = wrightcore.create_mutation_abc(M, T) R_drift = wrightcore.create_moran_drift_rate_k3(M, T) Q = alpha * R_mut + R_drift # pick out the correct eigenvector W, V = scipy.linalg.eig(Q.T) w, v = min(zip(np.abs(W), V.T)) print 'rate matrix:' print Q print print 'transpose of rate matrix:' print Q.T print print 'eigendecomposition of transpose of rate matrix as integers:' print scipy.linalg.eig(Q.T) print print 'transpose of rate matrix in mathematica notation:' print MatrixUtil.m_to_mathematica_string(Q.T.astype(int)) print print 'abs eigenvector corresponding to smallest abs eigenvalue:' print np.abs(v) print
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ Compute expected hitting times. Theta is 4*N*mu, and the units of time are 4*N*mu generations. @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stationary distribution v = MatrixUtil.get_stationary_distribution(P) # compute the transition matrix limit at time infinity #P_inf = np.outer(np.ones_like(v), v) # compute the fundamental matrix Z #Z = linalg.inv(np.eye(nstates) - (P - P_inf)) - P_inf # # Use broadcasting instead of constructing P_inf. Z = linalg.inv(np.eye(nstates) - (P - v)) - v # compute the hitting time from state AB to state ab. i = 0 j = 3 hitting_time_generations = (Z[j, j] - Z[i, j]) / v[j] hitting_time = hitting_time_generations * theta row.append(hitting_time) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ Compute expected hitting times. Theta is 4*N*mu, and the units of time are 4*N*mu generations. @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid) ** 2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stationary distribution v = MatrixUtil.get_stationary_distribution(P) # compute the transition matrix limit at time infinity # P_inf = np.outer(np.ones_like(v), v) # compute the fundamental matrix Z # Z = linalg.inv(np.eye(nstates) - (P - P_inf)) - P_inf # # Use broadcasting instead of constructing P_inf. Z = linalg.inv(np.eye(nstates) - (P - v)) - v # compute the hitting time from state AB to state ab. i = 0 j = 3 hitting_time_generations = (Z[j, j] - Z[i, j]) / v[j] hitting_time = hitting_time_generations * theta row.append(hitting_time) arr.append(row) return arr
def do_full_simplex_then_collapse(mutrate, popsize): #mutrate = 0.01 #mutrate = 0.2 #mutrate = 10 #mutrate = 100 #mutrate = 1 N = popsize k = 4 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) # Create the joint site pair mutation rate matrix. R = mutrate * wrightcore.create_mutation(M, T) # Create the joint site pair drift transition matrix. lmcs = wrightcore.get_lmcs(M) lps = wrightcore.create_selection_neutral(M) log_drift = wrightcore.create_neutral_drift(lmcs, lps, M) # Define the drift and mutation transition matrices. P_drift = np.exp(log_drift) P_mut = scipy.linalg.expm(R) # Define the composite per-generation transition matrix. P = np.dot(P_mut, P_drift) # Solve a system of equations to find the stationary distribution. v = MatrixUtil.get_stationary_distribution(P) for state, value in zip(M, v): print state, value # collapse the two middle states nstates_collapsed = multinomstate.get_nstates(N, k-1) M_collapsed = np.array(list(multinomstate.gen_states(N, k-1)), dtype=int) T_collapsed = multinomstate.get_inverse_map(M_collapsed) v_collapsed = np.zeros(nstates_collapsed) for i, bigstate in enumerate(M): AB, Ab, aB, ab = bigstate.tolist() Ab_aB = Ab + aB j = T_collapsed[AB, Ab_aB, ab] v_collapsed[j] += v[i] for state, value in zip(M_collapsed, v_collapsed): print state, value # draw an equilateral triangle #drawtri(M_collapsed, T_collapsed, v_collapsed) #test_mesh() return M_collapsed, T_collapsed, v_collapsed
def do_full_simplex_then_collapse(mutrate, popsize): #mutrate = 0.01 #mutrate = 0.2 #mutrate = 10 #mutrate = 100 #mutrate = 1 N = popsize k = 4 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) # Create the joint site pair mutation rate matrix. R = mutrate * wrightcore.create_mutation(M, T) # Create the joint site pair drift transition matrix. lmcs = wrightcore.get_lmcs(M) lps = wrightcore.create_selection_neutral(M) log_drift = wrightcore.create_neutral_drift(lmcs, lps, M) # Define the drift and mutation transition matrices. P_drift = np.exp(log_drift) P_mut = scipy.linalg.expm(R) # Define the composite per-generation transition matrix. P = np.dot(P_mut, P_drift) # Solve a system of equations to find the stationary distribution. v = MatrixUtil.get_stationary_distribution(P) for state, value in zip(M, v): print state, value # collapse the two middle states nstates_collapsed = multinomstate.get_nstates(N, k - 1) M_collapsed = np.array(list(multinomstate.gen_states(N, k - 1)), dtype=int) T_collapsed = multinomstate.get_inverse_map(M_collapsed) v_collapsed = np.zeros(nstates_collapsed) for i, bigstate in enumerate(M): AB, Ab, aB, ab = bigstate.tolist() Ab_aB = Ab + aB j = T_collapsed[AB, Ab_aB, ab] v_collapsed[j] += v[i] for state, value in zip(M_collapsed, v_collapsed): print state, value # draw an equilateral triangle #drawtri(M_collapsed, T_collapsed, v_collapsed) #test_mesh() return M_collapsed, T_collapsed, v_collapsed
def main(): # use standard notation Nmu = 1.0 N = 120 mu = Nmu / float(N) print 'N*mu:', Nmu print 'N:', N print # multiply the rate matrix by this scaling factor m_factor = mu # use the moran drift distn_helper = moran_distn_helper # get properties of the collapsed diamond process k = 3 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = m_factor * wrightcore.create_mutation_collapsed(M, T) v = distn_helper(M, T, R_mut) for Ab_aB in range(N+1): nremaining = N - Ab_aB # compute the volume for normalization volume = 0.0 for AB in range(nremaining+1): ab = nremaining - AB volume += v[T[AB, Ab_aB, ab]] # print some info print 'X_1 + X_4 =', Ab_aB, '/', N print 'probability =', volume print 'Y = X_2 / (1 - (X_1 + X_4)) = X_2 / (X_2 + X_3)' if not nremaining: print 'conditional distribution of Y is undefined' else: # compute the conditional moments m1 = 0.0 m2 = 0.0 for AB in range(nremaining+1): ab = nremaining - AB p = v[T[AB, Ab_aB, ab]] / volume x = AB / float(nremaining) m1 += x*p m2 += x*x*p # print some info print 'conditional E(Y) =', m1 print 'conditional E(Y^2) =', m2 print 'conditional V(Y) =', m2 - m1*m1 print
def main(): # use standard notation Nmu = 1.0 N = 120 mu = Nmu / float(N) print 'N*mu:', Nmu print 'N:', N print # multiply the rate matrix by this scaling factor m_factor = mu # use the moran drift distn_helper = moran_distn_helper # get properties of the collapsed diamond process k = 3 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = m_factor * wrightcore.create_mutation_collapsed(M, T) v = distn_helper(M, T, R_mut) for Ab_aB in range(N + 1): nremaining = N - Ab_aB # compute the volume for normalization volume = 0.0 for AB in range(nremaining + 1): ab = nremaining - AB volume += v[T[AB, Ab_aB, ab]] # print some info print 'X_1 + X_4 =', Ab_aB, '/', N print 'probability =', volume print 'Y = X_2 / (1 - (X_1 + X_4)) = X_2 / (X_2 + X_3)' if not nremaining: print 'conditional distribution of Y is undefined' else: # compute the conditional moments m1 = 0.0 m2 = 0.0 for AB in range(nremaining + 1): ab = nremaining - AB p = v[T[AB, Ab_aB, ab]] / volume x = AB / float(nremaining) m1 += x * p m2 += x * x * p # print some info print 'conditional E(Y) =', m1 print 'conditional E(Y^2) =', m2 print 'conditional V(Y) =', m2 - m1 * m1 print
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid) ** 2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on not looping np.fill_diagonal(H, 0) v = np.sum(H, axis=1) H /= v[:, np.newaxis] # let ab be an absorbing state # and compute the expected number of returns to AB Q = H[:3, :3] I = np.eye(3) N = linalg.inv(I - Q) # row.append(N[0, 0] - 1) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on not looping np.fill_diagonal(H, 0) v = np.sum(H, axis=1) H /= v[:, np.newaxis] # let ab be an absorbing state # and compute the expected number of returns to AB Q = H[:3, :3] I = np.eye(3) N = linalg.inv(I - Q) # row.append(N[0, 0] - 1) arr.append(row) return arr
def get_full_simplex(m_factor, N, distn_helper): """ Note that this uses the non-moran formulation of drift. The distn_helper function taken as an argument is expected to be either moran_distn_helper or wright_distn_helper. @param m_factor: the mutation rate matrix is multiplied by this number @param N: population size @param distn_helper: a function (M, T, R_mut) -> v @return: M, T, v """ k = 4 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = m_factor * wrightcore.create_mutation(M, T) v = distn_helper(M, T, R_mut) return M, T, v
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # compute a conditional transition probability i = 0 j = 3 row.append(H[i, j] / (1-H[i,i])) arr.append(row) return arr
def collapse_diamond(N, M, v): """ Collapse the middle two states. @param N: population size @param M: index to state vector @param v: a distribution over a 3-simplex @return: a distribution over a 2-simplex """ k = 4 nstates_collapsed = multinomstate.get_nstates(N, k - 1) M_collapsed = np.array(list(multinomstate.gen_states(N, k - 1)), dtype=int) T_collapsed = multinomstate.get_inverse_map(M_collapsed) v_collapsed = np.zeros(nstates_collapsed) for i, bigstate in enumerate(M): AB, Ab, aB, ab = bigstate.tolist() Ab_aB = Ab + aB j = T_collapsed[AB, Ab_aB, ab] v_collapsed[j] += v[i] return v_collapsed
def collapse_diamond(N, M, v): """ Collapse the middle two states. @param N: population size @param M: index to state vector @param v: a distribution over a 3-simplex @return: a distribution over a 2-simplex """ k = 4 nstates_collapsed = multinomstate.get_nstates(N, k-1) M_collapsed = np.array(list(multinomstate.gen_states(N, k-1)), dtype=int) T_collapsed = multinomstate.get_inverse_map(M_collapsed) v_collapsed = np.zeros(nstates_collapsed) for i, bigstate in enumerate(M): AB, Ab, aB, ab = bigstate.tolist() Ab_aB = Ab + aB j = T_collapsed[AB, Ab_aB, ab] v_collapsed[j] += v[i] return v_collapsed
def collapse_side(N, M, v): """ Collapse two pairs of states. @param N: population size @param M: index to state vector @param v: a distribution over a 3-simplex @return: a distribution over a 1-simplex """ k = 4 nstates_collapsed = multinomstate.get_nstates(N, k - 2) M_collapsed = np.array(list(multinomstate.gen_states(N, k - 2)), dtype=int) T_collapsed = multinomstate.get_inverse_map(M_collapsed) v_collapsed = np.zeros(nstates_collapsed) for i, bigstate in enumerate(M): AB, Ab, aB, ab = bigstate.tolist() AB_Ab = AB + Ab aB_ab = aB + ab j = T_collapsed[AB_Ab, aB_ab] v_collapsed[j] += v[i] return v_collapsed
def collapse_side(N, M, v): """ Collapse two pairs of states. @param N: population size @param M: index to state vector @param v: a distribution over a 3-simplex @return: a distribution over a 1-simplex """ k = 4 nstates_collapsed = multinomstate.get_nstates(N, k-2) M_collapsed = np.array(list(multinomstate.gen_states(N, k-2)), dtype=int) T_collapsed = multinomstate.get_inverse_map(M_collapsed) v_collapsed = np.zeros(nstates_collapsed) for i, bigstate in enumerate(M): AB, Ab, aB, ab = bigstate.tolist() AB_Ab = AB + Ab aB_ab = aB + ab j = T_collapsed[AB_Ab, aB_ab] v_collapsed[j] += v[i] return v_collapsed
def get_p2(N_diploid, theta, Nr, Ns): """ Compute the probability of compensatory substitution pathway type 2. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: p_t2 """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0,0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2*p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 # return p_t2
def get_p2(N_diploid, theta, Nr, Ns): """ Compute the probability of compensatory substitution pathway type 2. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: p_t2 """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0, 0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2 * p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 # return p_t2
def do_collapsed_simplex(scaled_mut, N): """ @param N: population size """ k = 3 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) # Create the joint site pair mutation rate matrix. # This is scaled so that there are about popsize mutations per generation. R_mut_raw = wrightcore.create_mutation_collapsed(M, T) R_mut = (scaled_mut / float(N)) * R_mut_raw # Create the joint site pair drift transition matrix. lmcs = wrightcore.get_lmcs(M) lps = wrightcore.create_selection_neutral(M) #log_drift = wrightcore.create_neutral_drift(lmcs, lps, M) # Define the drift and mutation transition matrices. #P_drift = np.exp(log_drift) #P_mut = scipy.linalg.expm(R) # Define the composite per-generation transition matrix. #P = np.dot(P_mut, P_drift) # Solve a system of equations to find the stationary distribution. #v = MatrixUtil.get_stationary_distribution(P) # Try a new thing. # The raw drift matrix is scaled so that there are about N*N # replacements per generation. generation_rate = 1.0 R_drift_raw = wrightcore.create_moran_drift_rate_k3(M, T) R_drift = (generation_rate / float(N)) * R_drift_raw #FIXME: you should get the stationary distn directly from the rate matrix P = scipy.linalg.expm(R_mut + R_drift) v = MatrixUtil.get_stationary_distribution(P) """ for state, value in zip(M, v): print state, value """ # draw an equilateral triangle #drawtri(M, T, v) return M, T, v
def get_backward_info(N_diploid, theta, Nr, Ns): """ Compute expectations and variances for the two substitution pathways. Here backward is somewhat of a misnomer; it is meant as a contrast to forward simulation. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: (t1, v1), (t2, v2) """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) return (t1, v1), (t2, v2)
def get_backward_info(N_diploid, theta, Nr, Ns): """ Compute expectations and variances for the two substitution pathways. Here backward is somewhat of a misnomer; it is meant as a contrast to forward simulation. @param N_diploid: diploid population size @param theta: a mutation rate @param Nr: a recombination rate @param Ns: a selection value @return: (t1, v1), (t2, v2) """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # compute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # compute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # compute the expected number of mutation events per generation mu = theta / 2 # compute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # compute the selection coefficient s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) return (t1, v1), (t2, v2)
def get_plot_array(N_diploid, theta, Nr_values, Ns_values): """ @param N_diploid: diploid population size @param theta: mutation rate @param Nr_values: recombination rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # define the haplotypes AB, Ab, aB, ab = 0, 1, 2, 3 # initialize the state space N_hap = 2 * N_diploid k = 4 M = multinomstate.get_sorted_states(N_hap, k) nstates = M.shape[0] # compute the inverse map T = multinomstate.get_inverse_map(M) # lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) # arr = [] for Nr in Nr_values: # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # precompute the product of mutation and recombination. MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # t1, v1 = wfbckcompens.get_type_1_info(P) t2, v2 = wfbckcompens.get_type_2_info(P) # """ # What is the distribution over next fixed states # from the current state? # This question can be answered # by hacking with transience and absorption. Q = P[:-k, :-k] R = P[:-k, -k:] B = linalg.solve(np.eye(nstates-k) - Q, R) # At this point B is the matrix whose nstates-k rows give # distributions over the k fixed states. # Next construct the transition matrix that is conditional # upon first hitting the ab fixed state. w = np.zeros(nstates) w[:-k] = R[:, -1] w[-k:] = np.array([0, 0, 0, 1]) P_t2 = P * w # normalize after scaling by the weights v = P_t2.sum(axis=1) P_t2 /= v[:, np.newaxis] # Get the hitting time from state AB to state ab. # Because of the conditioning, this should be the same # as the expected time to reach state ab given that state ab # is the first reached fixed state. # Note that this means that the first step is away from AB. # Or actually we can just use expected time to absorption. Q = P_t2[:-1, :-1] c = np.ones(nstates-1) t = linalg.lstsq(np.eye(nstates-1) - Q, c) t2 = t[-4] # Now do type 1 events. w = np.zeros(nstates) w[:-k] = 1 - R[:, 0] w[-k:] = np.array([0, 0, 0, 1]) P_t2 = P * w # """ # Get the probability of type 2. # This uses the stochastic complement. # Wait this is wrong. # This is the probability of a direct transition. X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) p_direct = H[0, 3] / (1 - H[0,0]) # The following line is Equation (1) of the Nasrallah manuscript. p_t2 = (2*p_direct) / (1 + p_direct) p_t1 = 1 - p_t2 """ expectation_of_variance = p_t2*v2 + p_t1*v1 variance_of_expectation = p_t2*p_t1*(t1 - t2)*(t1 - t2) pooled_variance = ( expectation_of_variance + variance_of_expectation) """ # # Just do a simulation, # assuming that the wait times are normally distributed. nsamples = 500 n1 = np.random.binomial(nsamples, p_t1) n2 = nsamples - n1 X1 = np.random.normal(t1, math.sqrt(v1), n1) X2 = np.random.normal(t2, math.sqrt(v2), n2) X_pooled = np.hstack((X1, X2)) x = np.mean(X1) - np.mean(X2) s_pooled = math.sqrt(np.var(X_pooled) / nsamples) t_statistic = x / s_pooled row.append(t_statistic) # #x = (t1 - t2) / math.sqrt(variance / 200.0) #x = (t1 - t2) / math.sqrt((v1 + v2) / 200.0) #x = (t1 - t2) / math.sqrt(pooled_variance) #x = (t1 - t2) #row.append(math.log(t1) - math.log(t2)) #row.append(x) #row.append(v2) arr.append(row) return arr
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2 * N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on a transition change #np.fill_diagonal(H, 0) #H = (H.T / np.sum(H, axis=1)).T # #v = MatrixUtil.get_stationary_distribution(H) #print 'reversibility check (0 if reversible):' #print H/v - H.T/v #print #H_rev = (H*v).T/v #if not np.allclose( #[v[i]*H[i,j]-v[j]*H[j,i] for i in range(k) for j in range(k)], #np.zeros(k)): #raise ValueError('not reversible') # break the last state into two states AB, Ab, aB, ab, abx = 0, 1, 2, 3, 4 J = np.zeros((k + 1, k + 1)) J[:k, :k] = H # force ab and abx to be absorbing states J[ab] = np.zeros(k + 1) J[abx] = np.zeros(k + 1) J[ab, ab] = 1 J[abx, abx] = 1 # connect AB to the new ab state J[AB, abx] = J[AB, ab] J[AB, ab] = 0 # Now transition matrix J is in "canonical form" # because all of the absorbing states are at the end. B = linalg.solve( np.eye(k - 1) - J[:k - 1, :k - 1], J[:k - 1, k - 1:]) # compute the absorbing state distribution row.append(B[0, 1]) arr.append(row) return arr
def get_collapsed_diag_process_distn(m_factor, N, distn_helper): k = 2 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = m_factor * wrightcore.create_mutation_collapsed_diag(M, T) return distn_helper(M, T, R_mut)
def main(args): alpha = args.alpha N = args.N k = 4 print 'alpha:', alpha print 'N:', N print 'k:', k print M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = wrightcore.create_mutation(M, T) R_drift = wrightcore.create_moran_drift_rate_k4(M, T) Q = alpha * R_mut + R_drift P = scipy.linalg.expm(Q) v = MatrixUtil.get_stationary_distribution(P) # # Define the volumetric data using the stationary distribution. max_prob = np.max(v) d2 = np.zeros((N + 1, N + 1, N + 1, 4), dtype=float) U = np.array([ [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], ], dtype=int) for p, state in zip(v, M): x, y, z = np.dot(state, U).tolist() # r, g, b, alpha d2[x, y, z] = np.array( [ 255 * (p / max_prob), 0, 0, 255 * (p / max_prob), #100, ], dtype=float) #d2[x, y, z, 0] = 255 * (p / max_prob) #d2[x, y, z, 1] = 0 #d2[x, y, z, 2] = 0 #d2[x, y, z, 3] = 100 # fill the empty states for x in range(N + 1): for y in range(N + 1): for z in range(N + 1): if (x + y + z) % 2 == 1: p_accum = np.zeros(4, dtype=float) n_accum = 0 for dx in (-1, 1): if 0 <= x + dx <= N: p_accum += d2[x + dx, y, z] n_accum += 1 for dy in (-1, 1): if 0 <= y + dy <= N: p_accum += d2[x, y + dy, z] n_accum += 1 for dz in (-1, 1): if 0 <= z + dz <= N: p_accum += d2[x, y, z + dz] n_accum += 1 d2[x, y, z] = p_accum / n_accum # # Do things that the example application does. app = QtGui.QApplication([]) w = gl.GLViewWidget() w.opts['distance'] = 2 * N w.show() # # a visual grid or something #g = gl.GLGridItem() #g.scale(10, 10, 1) #w.addItem(g) # # Do some more things that the example application does. vol = gl.GLVolumeItem(d2, sliceDensity=1, smooth=True) #vol.translate(-5,-5,-10) vol.translate(-0.5 * N, -0.5 * N, -0.5 * N) w.addItem(vol) # # add an axis thingy #ax = gl.GLAxisItem() #w.addItem(ax) if sys.flags.interactive != 1: app.exec_()
def main(args): alpha = args.alpha N = args.N k = 4 print 'alpha:', alpha print 'N:', N print 'k:', k print M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = wrightcore.create_mutation(M, T) R_drift = wrightcore.create_moran_drift_rate_k4(M, T) Q = alpha * R_mut + R_drift P = scipy.linalg.expm(Q) v = MatrixUtil.get_stationary_distribution(P) # # Define the volumetric data using the stationary distribution. max_prob = np.max(v) d2 = np.zeros((N+1, N+1, N+1, 4), dtype=float) U = np.array([ [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0], ], dtype=int) for p, state in zip(v, M): x, y, z = np.dot(state, U).tolist() # r, g, b, alpha d2[x, y, z] = np.array([ 255 * (p / max_prob), 0, 0, 255 * (p / max_prob), #100, ], dtype=float) #d2[x, y, z, 0] = 255 * (p / max_prob) #d2[x, y, z, 1] = 0 #d2[x, y, z, 2] = 0 #d2[x, y, z, 3] = 100 # fill the empty states for x in range(N+1): for y in range(N+1): for z in range(N+1): if (x + y + z) % 2 == 1: p_accum = np.zeros(4, dtype=float) n_accum = 0 for dx in (-1, 1): if 0 <= x+dx <= N: p_accum += d2[x+dx, y, z] n_accum += 1 for dy in (-1, 1): if 0 <= y+dy <= N: p_accum += d2[x, y+dy, z] n_accum += 1 for dz in (-1, 1): if 0 <= z+dz <= N: p_accum += d2[x, y, z+dz] n_accum += 1 d2[x, y, z] = p_accum / n_accum # # Do things that the example application does. app = QtGui.QApplication([]) w = gl.GLViewWidget() w.opts['distance'] = 2*N w.show() # # a visual grid or something #g = gl.GLGridItem() #g.scale(10, 10, 1) #w.addItem(g) # # Do some more things that the example application does. vol = gl.GLVolumeItem(d2, sliceDensity=1, smooth=True) #vol.translate(-5,-5,-10) vol.translate(-0.5*N, -0.5*N, -0.5*N) w.addItem(vol) # # add an axis thingy #ax = gl.GLAxisItem() #w.addItem(ax) if sys.flags.interactive != 1: app.exec_()
def main(): # use standard notation Nmu = 1.0 N = 20 mu = Nmu / float(N) print 'N*mu:', Nmu print 'N:', N print k = 4 M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) nstates = len(M) #R_mut = m_factor * wrightcore.create_mutation_collapsed(M, T) #v = distn_helper(M, T, R_mut) # get the approximations alpha = 2*N*mu approx_1a = get_beta_approx(N+1, alpha) approx_2a = get_beta_approx(N+1, 2*alpha) d4_reduction, d4_nstates = get_d4_reduction(M, T) # for the initial guess all logs of ratios of probs are zero x0 = np.zeros(d4_nstates - 1) # precompute some design matrices X_side = get_design_matrix_side(M) X_diag = get_design_matrix_diag(M) print 'number of variables:', d4_nstates - 1 print f_errors = functools.partial( eval_f, M, T, d4_reduction, d4_nstates, approx_1a, approx_2a, X_side, X_diag, ) g_errors = functools.partial(eval_grad, f_errors) f = functools.partial(apply_sum_of_squares, f_errors) g = functools.partial(eval_grad, f) h = functools.partial(eval_hess, f) g_reverse = functools.partial(eval_grad_reverse_mode, f) """ result = scipy.optimize.leastsq( f_errors, x0, Dfun=g_errors, full_output=1, ) """ """ result = scipy.optimize.fmin_ncg( f, x0, fprime=g, fhess=h, avextol=1e-6, full_output=True, ) """ result = scipy.optimize.fmin_bfgs( f, x0, #fprime=g, fprime=g_reverse, full_output=True, ) print result xopt = result[0] v = unpack_distribution(nstates, d4_reduction, d4_nstates, xopt) # print some variances check_variance(M, T, v)
def main(args): alpha = args.alpha N = args.N k = 4 print 'alpha:', alpha print 'N:', N print 'k:', k print print 'defining the state vectors...' M = np.array(list(gen_states_for_induction(N)), dtype=int) #M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) print 'M.shape:', M.shape print 'M:' print M print if args.dense: print 'defining the state vector inverse map...' T = multinomstate.get_inverse_map(M) print 'T.shape:', T.shape print 'constructing dense mutation rate matrix...' R_mut = wrightcore.create_mutation(M, T) print 'constructing dense drift rate matrix...' R_drift = wrightcore.create_moran_drift_rate_k4(M, T) Q = alpha * R_mut + R_drift # pick out the correct eigenvector print 'taking eigendecomposition of dense rate matrix...' W, V = scipy.linalg.eig(Q.T) w, v = min(zip(np.abs(W), V.T)) if args.eigvals: print 'eigenvalues:' print W # get integer approximations of eigenvalues d = collections.defaultdict(int) for raw_eigval in W: int_eigval = int(np.round(raw_eigval.real)) d[int_eigval] += 1 arr = [] for int_eigval in reversed(sorted(d)): s = '%d^%d' % (-int_eigval, d[int_eigval]) arr.append(s) print ' '.join(arr) else: print 'rate matrix:' print Q print print 'transpose of rate matrix:' print Q.T print print 'eigendecomposition of transpose of rate matrix as integers:' print (W, V) print print 'rate matrix in mathematica notation:' print MatrixUtil.m_to_mathematica_string(Q.astype(int)) print print 'abs eigenvector corresponding to smallest abs eigenvalue:' print np.abs(v) print if args.sparse or args.shift_invert: print 'defining the state vector inverse dict...' T = multinomstate.get_inverse_dict(M) print 'sys.getsizeof(T):', sys.getsizeof(T) print 'constructing sparse coo mutation+drift rate matrix...' R_coo = create_coo_moran(M, T, alpha) print 'converting to sparse csr transpose rate matrix...' RT_csr = scipy.sparse.csr_matrix(R_coo.T) if args.shift_invert: print 'compute an eigenpair using shift-invert mode...' W, V = scipy.sparse.linalg.eigs(RT_csr, k=1, sigma=1) else: print 'compute an eigenpair using "small magnitude" mode...' W, V = scipy.sparse.linalg.eigs(RT_csr, k=1, which='SM') #print 'dense form of sparsely constructed matrix:' #print RT_csr.todense() #print print 'sparse eigenvalues:' print W print print 'sparse stationary distribution eigenvector:' print V[:, 0] print v = abs(V[:, 0]) v /= np.sum(v) autosave_filename = 'full-moran-autosave.txt' print 'writing the stationary distn to', autosave_filename, '...' with open(autosave_filename, 'w') as fout: for p, (X, Y, Z, W) in zip(v, M): print >> fout, X, Y, Z, W, p
def main(args): alpha = args.alpha N = args.N k = 4 print 'alpha:', alpha print 'N:', N print 'k:', k print print 'defining the state vectors...' M = np.array(list(gen_states_for_induction(N)), dtype=int) #M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) print 'M.shape:', M.shape print 'M:' print M print if args.dense: print 'defining the state vector inverse map...' T = multinomstate.get_inverse_map(M) print 'T.shape:', T.shape print 'constructing dense mutation rate matrix...' R_mut = wrightcore.create_mutation(M, T) print 'constructing dense drift rate matrix...' R_drift = wrightcore.create_moran_drift_rate_k4(M, T) Q = alpha * R_mut + R_drift # pick out the correct eigenvector print 'taking eigendecomposition of dense rate matrix...' W, V = scipy.linalg.eig(Q.T) w, v = min(zip(np.abs(W), V.T)) if args.eigvals: print 'eigenvalues:' print W # get integer approximations of eigenvalues d = collections.defaultdict(int) for raw_eigval in W: int_eigval = int(np.round(raw_eigval.real)) d[int_eigval] += 1 arr = [] for int_eigval in reversed(sorted(d)): s = '%d^%d' % (-int_eigval, d[int_eigval]) arr.append(s) print ' '.join(arr) else: print 'rate matrix:' print Q print print 'transpose of rate matrix:' print Q.T print print 'eigendecomposition of transpose of rate matrix as integers:' print(W, V) print print 'rate matrix in mathematica notation:' print MatrixUtil.m_to_mathematica_string(Q.astype(int)) print print 'abs eigenvector corresponding to smallest abs eigenvalue:' print np.abs(v) print if args.sparse or args.shift_invert: print 'defining the state vector inverse dict...' T = multinomstate.get_inverse_dict(M) print 'sys.getsizeof(T):', sys.getsizeof(T) print 'constructing sparse coo mutation+drift rate matrix...' R_coo = create_coo_moran(M, T, alpha) print 'converting to sparse csr transpose rate matrix...' RT_csr = scipy.sparse.csr_matrix(R_coo.T) if args.shift_invert: print 'compute an eigenpair using shift-invert mode...' W, V = scipy.sparse.linalg.eigs(RT_csr, k=1, sigma=1) else: print 'compute an eigenpair using "small magnitude" mode...' W, V = scipy.sparse.linalg.eigs(RT_csr, k=1, which='SM') #print 'dense form of sparsely constructed matrix:' #print RT_csr.todense() #print print 'sparse eigenvalues:' print W print print 'sparse stationary distribution eigenvector:' print V[:, 0] print v = abs(V[:, 0]) v /= np.sum(v) autosave_filename = 'full-moran-autosave.txt' print 'writing the stationary distn to', autosave_filename, '...' with open(autosave_filename, 'w') as fout: for p, (X, Y, Z, W) in zip(v, M): print >> fout, X, Y, Z, W, p
def get_plot_array(N_diploid, Nr, theta_values, Ns_values): """ @param N_diploid: diploid population size @param Nr: recombination rate @param theta_values: mutation rates @param Ns_values: selection values @return: arr[i][j] gives time for Ns_values[i] and theta_values[j] """ # set up the state space k = 4 M = multinomstate.get_sorted_states(2*N_diploid, k) T = multinomstate.get_inverse_map(M) nstates = M.shape[0] lmcs = wfengine.get_lmcs(M) # precompute rate matrices R_rate = wfcompens.create_recomb(M, T) M_rate = wfcompens.create_mutation(M, T) # precompute a recombination probability matrix R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2)) # arr = [] for theta in theta_values: # Compute the expected number of mutation events per generation. mu = theta / 2 # Precompute the mutation matrix # and the product of mutation and recombination. M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid)) MR_prob = np.dot(M_prob, R_prob) # row = [] for Ns in Ns_values: s = Ns / float(N_diploid) lps = wfcompens.create_selection(s, M) S_prob = np.exp(wfengine.create_genic(lmcs, lps, M)) P = np.dot(MR_prob, S_prob) # compute the stochastic complement X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k]) H = P[:k, :k] + np.dot(P[:k, k:], X) # condition on a transition change #np.fill_diagonal(H, 0) #H = (H.T / np.sum(H, axis=1)).T # #v = MatrixUtil.get_stationary_distribution(H) #print 'reversibility check (0 if reversible):' #print H/v - H.T/v #print #H_rev = (H*v).T/v #if not np.allclose( #[v[i]*H[i,j]-v[j]*H[j,i] for i in range(k) for j in range(k)], #np.zeros(k)): #raise ValueError('not reversible') # break the last state into two states AB, Ab, aB, ab, abx = 0, 1, 2, 3, 4 J = np.zeros((k+1, k+1)) J[:k, :k] = H # force ab and abx to be absorbing states J[ab] = np.zeros(k+1) J[abx] = np.zeros(k+1) J[ab, ab] = 1 J[abx, abx] = 1 # connect AB to the new ab state J[AB, abx] = J[AB, ab] J[AB, ab] = 0 # Now transition matrix J is in "canonical form" # because all of the absorbing states are at the end. B = linalg.solve(np.eye(k-1) - J[:k-1, :k-1], J[:k-1, k-1:]) # compute the absorbing state distribution row.append(B[0, 1]) arr.append(row) return arr