Exemplo n.º 1
0
def uniform_motifs_accept_reject(n,
                                 L,
                                 desired_ic,
                                 num_motifs,
                                 epsilon=0.1,
                                 beta=None,
                                 verbose=False):
    if beta is None:
        correction_per_col = 3 / (2 * log(2) * n)
        desired_ic_for_beta = desired_ic + L * correction_per_col
        beta = find_beta_for_mean_motif_ic(n,
                                           L,
                                           desired_ic_for_beta,
                                           verbose=verbose)
    ps = count_ps_from_beta(n, beta)
    count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps)
    return [
        uniform_motif_accept_reject(n,
                                    L,
                                    desired_ic,
                                    epsilon=epsilon,
                                    beta=beta,
                                    ps=ps,
                                    count_sampler=count_sampler,
                                    verbose=verbose)
        for i in trange(num_motifs)
    ]
Exemplo n.º 2
0
def uniform_motif_accept_reject(n,
                                L,
                                desired_ic,
                                epsilon=0.1,
                                beta=None,
                                ps=None,
                                count_sampler=None,
                                verbose=False):
    print "uniform motif accept reject:", n, L, desired_ic, beta
    correction_per_col = 3 / (2 * log(2) * n)
    desired_ic_for_beta = desired_ic + L * correction_per_col
    if desired_ic_for_beta == 2 * L:  # if we reach the upper limit, things break down
        cols = [sample_col_from_count((0, 0, 0, n)) for _ in range(L)]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p
    if beta is None:
        beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta)
        if verbose:
            print "beta:", beta
    if ps is None:
        ps = count_ps_from_beta(n, beta)
    if count_sampler is None:
        count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps)

    def rQ_raw():
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count) for count in counts]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p

    def rQ():
        return sample_until(lambda M: inrange(M, desired_ic, epsilon),
                            rQ_raw,
                            1,
                            progress_bar=False)[0]

    def dQhat(motif):
        return exp(beta * motif_ic(motif))

    Imin = desired_ic - epsilon
    Imax = desired_ic + epsilon
    log_M = -beta * Imin
    if verbose: print "Imin, Imax, log_M:", Imin, Imax, log_M

    def dQ(motif):
        return exp(beta * motif_ic(motif) + log_M)

    def AR(motif):
        return 1.0 / dQ(motif)

    #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction
    trials = 0
    while True:
        trials += 1
        motif = rQ()
        r = random.random()
        if r < AR(motif):
            return motif
        if verbose and trials % 100 == 0:
            print trials, AR(motif)
Exemplo n.º 3
0
def uniform_motif_with_ic_imh_ref(n,
                                  L,
                                  desired_ic,
                                  epsilon=0.1,
                                  iterations=None,
                                  verbose=False,
                                  num_chains=8):
    correction_per_col = 3 / (2 * log(2) * n)
    desired_ic_for_beta = desired_ic + L * correction_per_col
    beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta)
    ps = count_ps_from_beta(n, beta)
    count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps)

    def Q(motif):
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count) for count in counts]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p

    def log_dQ(motif_p, motif):
        return (beta * motif_ic(motif_p))

    def log_f(motif):
        in_range = abs(motif_ic(motif) - desired_ic) < epsilon
        return 0 if in_range else -10.0**100

    if iterations:
        x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0]
        chain = mh(log_f,
                   proposal=Q,
                   dprop=log_dQ,
                   x0=x0,
                   iterations=iterations,
                   use_log=True,
                   verbose=False)
        return chain
    else:  #use gelman rubin criterion
        x0s = sample_until(lambda x: log_f(x) > -1, lambda: Q(None),
                           num_chains)
        iterations = 100
        converged = False
        chains = [[] for _ in range(num_chains)]
        while not converged:
            for chain, x0 in zip(chains, x0s):
                chain.extend(
                    mh(log_f,
                       proposal=Q,
                       dprop=log_dQ,
                       x0=x0,
                       iterations=iterations,
                       use_log=True,
                       verbose=False))
            ic_chains = mmap(motif_ic, chains)
            R_hat, neff = gelman_rubin(ic_chains)
            if R_hat < 1.1:
                return chains
            else:
                x0s = [chain[-1] for chain in chains]
                iterations *= 2
Exemplo n.º 4
0
def uniform_motif_with_ic_imh(n,
                              L,
                              desired_ic,
                              epsilon=0.1,
                              iterations=None,
                              verbose=False,
                              beta=None,
                              num_chains=8):
    if beta is None:
        correction_per_col = 3 / (2 * log(2) * n)
        desired_ic_for_beta = desired_ic + L * correction_per_col
        beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta)
    ps = count_ps_from_beta(n, beta)
    count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps)

    def Q(motif):
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count) for count in counts]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p

    def log_dQ(motif_p, motif):
        return (beta * motif_ic(motif_p))

    def log_f(motif):
        in_range = abs(motif_ic(motif) - desired_ic) < epsilon
        return 0 if in_range else -10.0**100

    x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0]
    # first, determine probability of landing in range
    ar = 0
    iterations = 100
    while ar == 0:
        ar = mh(log_f,
                proposal=Q,
                dprop=log_dQ,
                x0=x0,
                iterations=iterations,
                use_log=True,
                verbose=False,
                return_ar=True)
        iterations *= 2
    iterations = int(1.0 / ar * 10)
    chain = mh(log_f,
               proposal=Q,
               dprop=log_dQ,
               x0=x0,
               iterations=iterations,
               use_log=True,
               verbose=False)
    return chain
Exemplo n.º 5
0
def uniform_motif_imh_tv(n, L, desired_ic, beta=None, epsilon=None, tv=0.01):
    """run uniform imh to within total variation bound tv"""
    correction_per_col = 3 / (2 * log(2) * n)
    desired_ic_for_beta = desired_ic + L * correction_per_col
    if beta == None:
        beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta)
    if epsilon == None:
        epsilon = 1.0 / (2 * beta)
        print "maximally efficient epsilon:", epsilon
    ps = count_ps_from_beta(n, beta)
    count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps)

    def Qp(motif):
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count) for count in counts]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p

    def Q(motif):
        return sample_until(lambda m: abs(motif_ic(m) - desired_ic) < epsilon,
                            lambda: Qp(None), 1)[0]

    def log_dQ(motif_p, motif):
        return (beta * motif_ic(motif_p))

    def log_f(motif):
        in_range = abs(motif_ic(motif) - desired_ic) < epsilon
        return 0 if in_range else -10.0**100

    alpha = exp(-2 * beta * epsilon)
    iterations = int(ceil(log(tv) / log(1 - alpha)))
    print "iterations:", iterations
    x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0]
    # first, determine probability of landing in range
    chain = mh(log_f,
               proposal=Q,
               dprop=log_dQ,
               x0=x0,
               iterations=iterations,
               use_log=True,
               verbose=False)
    return chain