Exemplo n.º 1
0
def uniform_motif(N,
                  L,
                  desired_ic,
                  epsilon=0.1,
                  beta=None,
                  ps=None,
                  count_sampler=None,
                  verbose=False):
    if verbose: print "uniform motif accept reject:", N, L, desired_ic, beta
    correction_per_col = 3 / (2 * log(2) * N)
    desired_ic_for_beta = desired_ic + L * correction_per_col
    if desired_ic_for_beta == 2 * L:  # if we reach the upper limit, things break down
        cols = [sample_col_from_count((0, 0, 0, N)) for _ in range(L)]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p
    if beta is None:
        beta = find_beta_for_mean_motif_ic(N, L, desired_ic_for_beta)
        if verbose:
            print "beta:", beta
    if ps is None:
        ps = count_ps_from_beta(N, beta)
    if count_sampler is None:
        count_sampler = inverse_cdf_sampler(enumerate_counts(N), ps)

    def rQ_raw():
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count) for count in counts]
        motif_p = map(lambda site: "".join(site), transpose(cols))
        return motif_p

    def rQ():
        return sample_until(lambda M: inrange(M, desired_ic, epsilon),
                            rQ_raw,
                            1,
                            progress_bar=False)[0]

    def dQhat(motif):
        return exp(beta * motif_ic(motif))

    Imin = desired_ic - epsilon
    Imax = desired_ic + epsilon
    log_M = -beta * Imin
    if verbose: print "Imin, Imax, log_M:", Imin, Imax, log_M

    def dQ(motif):
        return exp(beta * motif_ic(motif) + log_M)

    def AR(motif):
        return 1.0 / dQ(motif)

    #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction
    trials = 0
    while True:
        trials += 1
        motif = rQ()
        r = random.random()
        if r < AR(motif):
            return motif
        if verbose and trials % 100 == 0:
            print trials, AR(motif)
Exemplo n.º 2
0
def uniform_motifs(N,
                   L,
                   desired_ic,
                   num_motifs,
                   epsilon=0.1,
                   beta=None,
                   verbose=False):
    if beta is None:
        correction_per_col = 3 / (2 * log(2) * N)
        desired_ic_for_beta = desired_ic + L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,
                                           L,
                                           desired_ic_for_beta,
                                           verbose=verbose)
    ps = count_ps_from_beta(N, beta)
    count_sampler = inverse_cdf_sampler(enumerate_counts(N), ps)
    return [
        uniform_motif(N,
                      L,
                      desired_ic,
                      epsilon=epsilon,
                      beta=beta,
                      ps=ps,
                      count_sampler=count_sampler,
                      verbose=verbose) for i in trange(num_motifs)
    ]
Exemplo n.º 3
0
def maxent_motif(N,
                 L,
                 desired_ic,
                 tolerance=10**-10,
                 beta=None,
                 verbose=False,
                 A=4):
    """sample motif from max ent distribution with mean desired_ic"""
    # first we adjust the desired ic upwards so that when motif_ic is
    # called with 1st order correction, we get the desired ic.
    if beta is None:
        if verbose:
            print "finding beta"
        correction_per_col = (A - 1) / (2 * log(2) * N)
        desired_ic += L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,
                                           L,
                                           desired_ic,
                                           tolerance=tolerance,
                                           verbose=verbose,
                                           A=A)
    ps = count_ps_from_beta(N, beta, A=A)
    count_sampler = inverse_cdf_sampler(enumerate_counts(N, A), ps)
    counts = [count_sampler() for i in range(L)]
    cols = [sample_col_from_count(count, A=A) for count in counts]
    return map(lambda site: "".join(site), transpose(cols))
Exemplo n.º 4
0
def maxent_motifs(N, L, desired_ic, num_motifs, tolerance=10**-10, A=4, beta=None, countses=None,
                  entropies=None, log_cols=None, verbose=False):
    ### computational
    if countses is None:
        logger("countses", verbose)
        countses = enumerate_counts(N, A, verbose=verbose)
    if entropies is None:
        logger("entropies", verbose)
        entropies = np.array(map(entropy_from_counts, tqdm(countses)))
    if log_cols is None:
        iterator = tqdm(countses) if verbose else countses
        logger("log_cols", verbose)
        log_cols = np.array([log_counts_to_cols(counts, A=A) for counts in iterator])
    if beta is None:
        correction_per_col = (A-1)/(2*log(2)*N)
        desired_ic += L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,L,desired_ic,tolerance=tolerance,verbose=verbose, A=A,
                                           countses=countses, entropies=entropies, log_cols=log_cols)
        logger("beta: %s" % beta, verbose)
    logger("computing count ps from beta", verbose)
    ps = count_ps_from_beta(N,beta, A=A, verbose=verbose,
                            log_cols=log_cols, entropies=entropies)
    count_sampler = inverse_cdf_sampler(countses, ps)
    def sample():
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count, A=A) for count in counts]
        return map(lambda site:"".join(site),transpose(cols))
    iterator = trange if verbose else xrange
    if verbose:
        print "sampling"
    return [sample() for _ in iterator(num_motifs)]
Exemplo n.º 5
0
def uniform_motifs(N,L,desired_ic,num_motifs,epsilon=0.1,beta=None,verbose=False):
    if beta is None:
        correction_per_col = 3/(2*log(2)*N)
        desired_ic_for_beta = desired_ic + L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,L,desired_ic_for_beta,verbose=verbose)
    ps = count_ps_from_beta(N,beta)
    count_sampler = inverse_cdf_sampler(enumerate_counts(N),ps)
    return [uniform_motif(N,L,desired_ic,epsilon=epsilon,beta=beta,
                                        ps=ps,count_sampler=count_sampler,verbose=verbose)
            for i in trange(num_motifs)]
Exemplo n.º 6
0
def maxent_motifs(N,
                  L,
                  desired_ic,
                  num_motifs,
                  tolerance=10**-10,
                  A=4,
                  beta=None,
                  countses=None,
                  entropies=None,
                  log_cols=None,
                  verbose=False):
    ### computational
    if countses is None:
        logger("countses", verbose)
        countses = enumerate_counts(N, A, verbose=verbose)
    if entropies is None:
        logger("entropies", verbose)
        entropies = np.array(map(entropy_from_counts, tqdm(countses)))
    if log_cols is None:
        iterator = tqdm(countses) if verbose else countses
        logger("log_cols", verbose)
        log_cols = np.array(
            [log_counts_to_cols(counts, A=A) for counts in iterator])
    if beta is None:
        correction_per_col = (A - 1) / (2 * log(2) * N)
        desired_ic += L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,
                                           L,
                                           desired_ic,
                                           tolerance=tolerance,
                                           verbose=verbose,
                                           A=A,
                                           countses=countses,
                                           entropies=entropies,
                                           log_cols=log_cols)
        logger("beta: %s" % beta, verbose)
    logger("computing count ps from beta", verbose)
    ps = count_ps_from_beta(N,
                            beta,
                            A=A,
                            verbose=verbose,
                            log_cols=log_cols,
                            entropies=entropies)
    count_sampler = inverse_cdf_sampler(countses, ps)

    def sample():
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count, A=A) for count in counts]
        return map(lambda site: "".join(site), transpose(cols))

    iterator = trange if verbose else xrange
    if verbose:
        print "sampling"
    return [sample() for _ in iterator(num_motifs)]
Exemplo n.º 7
0
def maxent_motif(N,L,desired_ic,tolerance=10**-10,beta=None,verbose=False, A=4):
    """sample motif from max ent distribution with mean desired_ic"""
    # first we adjust the desired ic upwards so that when motif_ic is
    # called with 1st order correction, we get the desired ic.
    if beta is None:
        if verbose:
            print "finding beta"
        correction_per_col = (A-1)/(2*log(2)*N)
        desired_ic += L * correction_per_col
        beta = find_beta_for_mean_motif_ic(N,L,desired_ic,tolerance=tolerance,verbose=verbose, A=A)
    ps = count_ps_from_beta(N, beta, A=A)
    count_sampler = inverse_cdf_sampler(enumerate_counts(N, A), ps)
    counts = [count_sampler() for i in range(L)]
    cols = [sample_col_from_count(count, A=A) for count in counts]
    return map(lambda site:"".join(site),transpose(cols))
Exemplo n.º 8
0
def uniform_motif(N,L,desired_ic,epsilon=0.1,beta=None,ps=None,count_sampler=None,verbose=False):
    if verbose:  print "uniform motif accept reject:",N,L,desired_ic,beta
    correction_per_col = 3/(2*log(2)*N)
    desired_ic_for_beta = desired_ic + L * correction_per_col
    if desired_ic_for_beta == 2*L: # if we reach the upper limit, things break down
        cols = [sample_col_from_count((0,0,0,N)) for _ in range(L)]
        motif_p = map(lambda site:"".join(site),transpose(cols))
        return motif_p
    if beta is None:
        beta = find_beta_for_mean_motif_ic(N,L,desired_ic_for_beta)
        if verbose:
            print "beta:",beta
    if ps is None:
        ps = count_ps_from_beta(N,beta)
    if count_sampler is None:
        count_sampler = inverse_cdf_sampler(enumerate_counts(N),ps)
    def rQ_raw():
        counts = [count_sampler() for i in range(L)]
        cols = [sample_col_from_count(count) for count in counts]
        motif_p = map(lambda site:"".join(site),transpose(cols))
        return motif_p
    def rQ():
        return sample_until(lambda M:inrange(M,desired_ic,epsilon),rQ_raw,1,progress_bar=False)[0]
    def dQhat(motif):
        return exp(beta*motif_ic(motif))
    Imin = desired_ic - epsilon
    Imax = desired_ic + epsilon
    log_M = -beta*Imin
    if verbose: print "Imin, Imax, log_M:",Imin, Imax, log_M
    def dQ(motif):
        return exp(beta*motif_ic(motif) + log_M)
    def AR(motif):
        return 1.0/dQ(motif)
    #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction
    trials = 0
    while True:
        trials +=1
        motif = rQ()
        r = random.random()
        if r < AR(motif):
            return motif
        if verbose and trials % 100 == 0:
            print trials, AR(motif)