示例#1
0
def stoch_logistic_reg(f,
                       xmin,
                       xmax,
                       ymax,
                       desired_ic,
                       stop_sd=0.01,
                       debug=False):
    ymin = 0

    def sigmoid(params, x):
        x0, k, ymax = params
        y = ymax / (1 + np.exp(-k * (x - x0))) + ymin
        return y

    def residuals(params, x, y):
        return y - sigmoid(params, x)

    xs = list(np.linspace(xmin, xmax, 10))
    ys = map(f, xs)
    params = (2, 1, desired_ic * 2)
    while True:
        p_guess = params
        params, cov, infodict, mesg, ier = leastsq(residuals,
                                                   p_guess,
                                                   args=(xs, ys),
                                                   full_output=1)
        try:
            x = secant_interval(lambda x: sigmoid(params, x) - desired_ic,
                                xmin, xmax)
        except:
            print "failed secant interval"
            print params, xs, ys
            raise Exception()
        y = f(x)
        xs.append(x)
        ys.append(y)
        if sd(xs[-3:]) < stop_sd:
            break
        if debug:
            plt.scatter(xs, ys)
            plt.plot(*pl(lambda x: sigmoid(params, x),
                         np.linspace(xmin, xmax, 1000)))
            plt.plot(*pl(lambda x: desired_ic, np.linspace(xmin, xmax, 1000)))
            plt.plot([x, x], [0, 2 * L])
            plt.show()
    # end with one more round of interpolation
    params, cov, infodict, mesg, ier = leastsq(residuals,
                                               p_guess,
                                               args=(xs, ys),
                                               full_output=1)
    x = secant_interval(lambda x: sigmoid(params, x) - desired_ic, xmin, xmax)
    return x, (xs, ys)
示例#2
0
def excess_mi_experiment(filename=None):
    """Do artificial motifs with linear BEMs show the same patterns of excess MI as biological motifs? (Yes)"""
    n = 10
    L = 10
    G = 1000
    desired_ic = 10
    replicates = 1000
    ics = np.array(
        [mean_ic_from_eps(eps, n, L) for eps in enumerate_eps(n, L)])

    def mean_ic(N):
        ps = sella_hirsch_predictions(n, L, G, N)
        return ics.dot(ps)

    Ne = secant_interval(lambda N: mean_ic(N) - desired_ic,
                         0,
                         2000,
                         tolerance=0.1,
                         verbose=True)  # ~= 1525
    ps = sella_hirsch_predictions(n, L, G, Ne)
    sh_sampler = inverse_cdf_sampler(list(enumerate_eps(n, L)), ps)
    sh_motifs = [
        sample_motif_from_mismatches(sh_sampler(), L)
        for i in trange(replicates)
    ]
    sh_mean_ic = mean(map(
        motif_ic, sh_motifs))  # may undershoot desired due to approximation
    maxent_motifs = maxent_sample_motifs_with_ic(n, L, sh_mean_ic, replicates)
    plt.suptitle(
        "Motif Statistics for Match/Mismatch Model vs. MaxEnt Ensembles (n=10,L=10,G=1000)"
    )
    all_boxplot_comparisons([sh_motifs, maxent_motifs],
                            labels=["M/MM", "MaxEnt"],
                            plot_titles="IC Gini MI".split(),
                            filename=filename)
def find_beta_for_mean_col_ic(n,
                              desired_ic_per_col,
                              tolerance=10**-10,
                              verbose=False):
    """find beta such that entropy*exp(-beta*entropy)/Z = des_ent"""
    if verbose:
        print "enumerating countses"
    countses = enumerate_counts(n)
    if verbose:
        print "enumerating entropies"
    entropies = np.array(map(entropy_from_counts, countses))
    #cols = np.array(map(countses_to_cols, countses))
    if verbose:
        print "enumerating cols"
    #cols = np.exp(np.array(map(log_counts_to_cols, countses)))
    iterator = tqdm(countses) if verbose else countses
    log_cols = np.array(map(log_counts_to_cols, iterator))

    def f(beta):
        phats = cols * (np.exp(-beta * entropies))
        return 2 - entropies.dot(phats) / np.sum(phats) - desired_ic_per_col

    def f2(beta):
        log_phats = np_log_normalize(log_cols + -beta * entropies)
        expected_entropy = np.exp(log_phats).dot(entropies)
        return 2 - expected_entropy - desired_ic_per_col

    ub = 1000
    while f2(ub) < 0:
        ub *= 2
        print "raising upper bound to:", ub
    return secant_interval(f2, 0, ub, verbose=verbose, tolerance=tolerance)
def find_beta_for_mean_col_ic_ref(n, desired_ic_per_col, tolerance=10**-10):
    ic_from_beta = lambda beta: 2 - mean_col_ent(n, beta)
    f = lambda beta: ic_from_beta(beta) - desired_ic_per_col
    #print "finding beta to tol:",tolerance
    ub = 1000  # hackish, upped in order to deal with CRP
    while f(ub) < 0:
        ub *= 2
    return secant_interval(f, -10, ub, verbose=False, tolerance=tolerance)
def find_beta_for_mean_col_ic_ref2(n, desired_ic_per_col, tolerance=10**-10):
    """find beta such that entropy*exp(-beta*entropy)/Z = des_ent"""
    counts = enumerate_counts(n)
    entropies = np.array(map(entropy_from_counts, counts))
    cols = np.array(map(counts_to_cols, counts))

    def f(beta):
        phats = cols * (np.exp(-beta * entropies))
        return 2 - entropies.dot(phats) / np.sum(phats) - desired_ic_per_col

    ub = 1000
    return secant_interval(f, -10, ub, verbose=False, tolerance=tolerance)
def log_ZS_sophisticated((matrix, mu, Ne)):
    L = len(matrix)
    nu = Ne - 1
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    dfde = lambda ep: -nu*exp(ep-mu)/(1+exp(ep-mu)) - (ep-mat_mu)/mat_sigma**2
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    try:
        mode = secant_interval(dfde,ep_min - 20, ep_max + 20)
    except:
        print (matrix, mu, Ne)
        raise Exception
    kappa = -nu*(exp(mu-mode)/(1+exp(mu-mode))**2) - 1/mat_sigma**2
    sigma_approx = sqrt(-1/kappa)
    integrand = lambda ep:dnorm(ep, mat_mu, mat_sigma) * (1+exp(ep-mu))**-nu
    gauss_max = dnorm(mode, mode, sigma_approx)
    integrand_max = integrand(mode)
    mean_ZS = integrand_max / gauss_max
    return L * log(4) + log(mean_ZS)
示例#7
0
def log_ZS_sophisticated((matrix, mu, Ne)):
    L = len(matrix)
    nu = Ne - 1
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
    dfde = lambda ep: -nu * exp(ep - mu) / (1 + exp(ep - mu)) - (
        ep - mat_mu) / mat_sigma**2
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    try:
        mode = secant_interval(dfde, ep_min - 20, ep_max + 20)
    except:
        print(matrix, mu, Ne)
        raise Exception
    kappa = -nu * (exp(mu - mode) / (1 + exp(mu - mode))**2) - 1 / mat_sigma**2
    sigma_approx = sqrt(-1 / kappa)
    integrand = lambda ep: dnorm(ep, mat_mu, mat_sigma) * (1 + exp(ep - mu)
                                                           )**-nu
    gauss_max = dnorm(mode, mode, sigma_approx)
    integrand_max = integrand(mode)
    mean_ZS = integrand_max / gauss_max
    return L * log(4) + log(mean_ZS)
示例#8
0
        exp(
            log_fhat((matrix, mu, Ne), [site]) + log(1.0 / 4**L) -
            log_psfm_prob(site)) for site in sites)
    ZS = 4**L * mean_ZS
    return log(ZS)


def log_ZS_importance2((matrix, mu, Ne), trials=1000):
    y = mu - log(Ne)

    def expectation(lamb):
        psfm = [normalize([exp(-lamb * ep) for ep in row]) for row in matrix]
        return sum(ep * p for row, ps in zip(matrix, psfm)
                   for ep, p in zip(row, ps))

    lamb = secant_interval(lambda x: expectation(x) - y, -10, 10)


def log_ZM_importance((matrix, mu, Ne), N, trials=1000):
    log_ZS = log_ZS_importance((matrix, mu, Ne), trials=trials)
    return N * log_ZS


def log_ZS_empirical((matrix, mu, Ne), trials=1000):
    L = len(matrix)
    acc = 0
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += 1.0 / (1 + exp(ep - mu))**(Ne - 1)
    est_mean = acc / trials
    log_Zs = L * log(4) + log(est_mean)
    L = len(matrix)
    psfm = [[0.25]*4 for _ in range(L)]
    log_psfm = [[log(p) for p in row] for row in psfm]
    log_psfm_prob = lambda site:score_seq(log_psfm, site)
    sites = [sample_from_psfm(psfm) for _ in xrange(trials)]
    mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site]) + log(1.0/4**L) - log_psfm_prob(site))
                  for site in sites)
    ZS = 4**L * mean_ZS
    return log(ZS)

def log_ZS_importance2((matrix, mu, Ne), trials=1000):
    y = mu - log(Ne)
    def expectation(lamb):
        psfm = [normalize([exp(-lamb*ep) for ep in row]) for row in matrix]
        return sum(ep*p for row, ps in zip(matrix, psfm) for ep,p in zip(row,ps))
    lamb = secant_interval(lambda x:expectation(x)-y,-10,10)
        
        
def log_ZM_importance((matrix, mu, Ne), N, trials=1000):
    log_ZS = log_ZS_importance((matrix, mu, Ne), trials=trials)
    return N * log_ZS
    
def log_ZS_empirical((matrix, mu, Ne), trials=1000):
    L = len(matrix)
    acc = 0
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += 1.0/(1+exp(ep-mu))**(Ne-1)
    est_mean = acc / trials
    log_Zs = L*log(4) + log(est_mean)
    return log_Zs
示例#10
0
def predict_modal_energy(site_mu, site_sigma, mu, Ne):
    nu = Ne - 1
    dlogPe_de = lambda ep: -nu * exp(ep - mu) / (1 + exp(ep - mu)) - (
        ep - site_mu) / site_sigma**2
    return secant_interval(dlogPe_de, -50, 50)