Пример #1
0
def get_backward_info(N_diploid, theta, Nr, Ns):
    """
    Compute expectations and variances for the two substitution pathways.
    Here backward is somewhat of a misnomer; it is meant as a contrast
    to forward simulation.
    @param N_diploid: diploid population size
    @param theta: a mutation rate
    @param Nr: a recombination rate
    @param Ns: a selection value
    @return: (t1, v1), (t2, v2)
    """
    # set up the state space
    k = 4
    M = multinomstate.get_sorted_states(2 * N_diploid, k)
    T = multinomstate.get_inverse_map(M)
    nstates = M.shape[0]
    lmcs = wfengine.get_lmcs(M)
    # compute rate matrices
    R_rate = wfcompens.create_recomb(M, T)
    M_rate = wfcompens.create_mutation(M, T)
    # compute a recombination probability matrix
    R_prob = linalg.expm(Nr * R_rate / float((2 * N_diploid)**2))
    # compute the expected number of mutation events per generation
    mu = theta / 2
    # compute the mutation matrix
    # and the product of mutation and recombination.
    M_prob = linalg.expm(mu * M_rate / float(2 * 2 * N_diploid))
    MR_prob = np.dot(M_prob, R_prob)
    # compute the selection coefficient
    s = Ns / float(N_diploid)
    lps = wfcompens.create_selection(s, M)
    S_prob = np.exp(wfengine.create_genic(lmcs, lps, M))
    P = np.dot(MR_prob, S_prob)
    #
    t1, v1 = wfbckcompens.get_type_1_info(P)
    t2, v2 = wfbckcompens.get_type_2_info(P)
    return (t1, v1), (t2, v2)
Пример #2
0
def get_backward_info(N_diploid, theta, Nr, Ns):
    """
    Compute expectations and variances for the two substitution pathways.
    Here backward is somewhat of a misnomer; it is meant as a contrast
    to forward simulation.
    @param N_diploid: diploid population size
    @param theta: a mutation rate
    @param Nr: a recombination rate
    @param Ns: a selection value
    @return: (t1, v1), (t2, v2)
    """
    # set up the state space
    k = 4
    M = multinomstate.get_sorted_states(2*N_diploid, k)
    T = multinomstate.get_inverse_map(M)
    nstates = M.shape[0]
    lmcs = wfengine.get_lmcs(M)
    # compute rate matrices
    R_rate = wfcompens.create_recomb(M, T)
    M_rate = wfcompens.create_mutation(M, T)
    # compute a recombination probability matrix
    R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2))
    # compute the expected number of mutation events per generation
    mu = theta / 2
    # compute the mutation matrix
    # and the product of mutation and recombination.
    M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid))
    MR_prob = np.dot(M_prob, R_prob)
    # compute the selection coefficient
    s = Ns / float(N_diploid)
    lps = wfcompens.create_selection(s, M)
    S_prob = np.exp(wfengine.create_genic(lmcs, lps, M))
    P = np.dot(MR_prob, S_prob)
    #
    t1, v1 = wfbckcompens.get_type_1_info(P)
    t2, v2 = wfbckcompens.get_type_2_info(P)
    return (t1, v1), (t2, v2)
Пример #3
0
def get_plot_array(N_diploid, theta, Nr_values, Ns_values):
    """
    @param N_diploid: diploid population size
    @param theta: mutation rate
    @param Nr_values: recombination rates
    @param Ns_values: selection values
    @return: arr[i][j] gives time for Ns_values[i] and theta_values[j]
    """
    # define the haplotypes
    AB, Ab, aB, ab = 0, 1, 2, 3
    # initialize the state space
    N_hap = 2 * N_diploid
    k = 4
    M = multinomstate.get_sorted_states(N_hap, k)
    nstates = M.shape[0]
    # compute the inverse map
    T = multinomstate.get_inverse_map(M)
    #
    lmcs = wfengine.get_lmcs(M)
    # precompute rate matrices
    R_rate = wfcompens.create_recomb(M, T)
    M_rate = wfcompens.create_mutation(M, T)
    # Compute the expected number of mutation events per generation.
    mu = theta / 2
    # Precompute the mutation matrix
    M_prob = linalg.expm(mu * M_rate / float(2*2*N_diploid))
    #
    arr = []
    for Nr in Nr_values:
        # precompute a recombination probability matrix
        R_prob = linalg.expm(Nr * R_rate / float((2*N_diploid)**2))
        # precompute the product of mutation and recombination.
        MR_prob = np.dot(M_prob, R_prob)
        #
        row = []
        for Ns in Ns_values:
            s = Ns / float(N_diploid)
            lps = wfcompens.create_selection(s, M)
            S_prob = np.exp(wfengine.create_genic(lmcs, lps, M))
            P = np.dot(MR_prob, S_prob)
            #
            t1, v1 = wfbckcompens.get_type_1_info(P)
            t2, v2 = wfbckcompens.get_type_2_info(P)
            #
            """
            # What is the distribution over next fixed states
            # from the current state?
            # This question can be answered
            # by hacking with transience and absorption.
            Q = P[:-k, :-k]
            R = P[:-k, -k:]
            B = linalg.solve(np.eye(nstates-k) - Q, R)
            # At this point B is the matrix whose nstates-k rows give
            # distributions over the k fixed states.
            # Next construct the transition matrix that is conditional
            # upon first hitting the ab fixed state.
            w = np.zeros(nstates)
            w[:-k] = R[:, -1]
            w[-k:] = np.array([0, 0, 0, 1])
            P_t2 = P * w
            # normalize after scaling by the weights
            v = P_t2.sum(axis=1)
            P_t2 /= v[:, np.newaxis]
            # Get the hitting time from state AB to state ab.
            # Because of the conditioning, this should be the same
            # as the expected time to reach state ab given that state ab
            # is the first reached fixed state.
            # Note that this means that the first step is away from AB.
            # Or actually we can just use expected time to absorption.
            Q = P_t2[:-1, :-1]
            c = np.ones(nstates-1)
            t = linalg.lstsq(np.eye(nstates-1) - Q, c)
            t2 = t[-4]
            # Now do type 1 events.
            w = np.zeros(nstates)
            w[:-k] = 1 - R[:, 0]
            w[-k:] = np.array([0, 0, 0, 1])
            P_t2 = P * w
            #
            """
            # Get the probability of type 2.
            # This uses the stochastic complement.
            # Wait this is wrong.
            # This is the probability of a direct transition.
            X = linalg.solve(np.eye(nstates - k) - P[k:, k:], P[k:, :k])
            H = P[:k, :k] + np.dot(P[:k, k:], X)
            p_direct = H[0, 3] / (1 - H[0,0])
            # The following line is Equation (1) of the Nasrallah manuscript.
            p_t2 = (2*p_direct) / (1 + p_direct)
            p_t1 = 1 - p_t2
            """
            expectation_of_variance = p_t2*v2 + p_t1*v1
            variance_of_expectation = p_t2*p_t1*(t1 - t2)*(t1 - t2)
            pooled_variance = (
                    expectation_of_variance + variance_of_expectation)
            """
            #
            # Just do a simulation,
            # assuming that the wait times are normally distributed.
            nsamples = 500
            n1 = np.random.binomial(nsamples, p_t1)
            n2 = nsamples - n1
            X1 = np.random.normal(t1, math.sqrt(v1), n1)
            X2 = np.random.normal(t2, math.sqrt(v2), n2)
            X_pooled = np.hstack((X1, X2))
            x = np.mean(X1) - np.mean(X2)
            s_pooled = math.sqrt(np.var(X_pooled) / nsamples)
            t_statistic = x / s_pooled
            row.append(t_statistic)
            #
            #x = (t1 - t2) / math.sqrt(variance / 200.0)
            #x = (t1 - t2) / math.sqrt((v1 + v2) / 200.0)
            #x = (t1 - t2) / math.sqrt(pooled_variance)
            #x = (t1 - t2)
            #row.append(math.log(t1) - math.log(t2))
            #row.append(x)
            #row.append(v2)
        arr.append(row)
    return arr