def estremo_gibbs(iterations=50000, verbose=False, every=1000, sigma=1, mu=-10, Ne=5): nu = Ne - 1 L = 10 N = 20 code, motif = (sample_code(L=10, sigma=1), random_motif(length=L, num_sites=N)) def log_f((code, motif)): eps = map(lambda x: -log(x), pw_prob_sites(motif, code)) return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps) chain = [(code, motif[:])] print log_f((code, motif)) for iteration in trange(iterations): for i in range(N): site = motif[i] for j in range(L): b = site[j] log_ps = [] bps = [bp for bp in "ACGT" if not bp == b] for bp in bps: site_p = subst(site, bp, j) log_ps.append(log_f((code, [site_p]))) log_ps = [p - min(log_ps) for p in log_ps] bp = inverse_cdf_sample(bps, map(exp, log_ps), normalized=False) motif[i] = subst(site, bp, j) for k in range(L - 1): for b1 in "ACGT": for b2 in "ACGT": dws = [random.gauss(0, 0.1) for _ in range(10)] code_ps = [[d.copy() for d in code] for _ in range(10)] for code_p, dw in zip(code_ps, dws): code_p[k][b1, b2] += dw log_ps = [log_f((code_p, motif)) for code_p in code_ps] log_ps = [p - min(log_ps) for p in log_ps] code_p = inverse_cdf_sample(code_ps, map(exp, log_ps), normalized=False) code = code_p print log_f((code, motif)) chain.append((code, motif[:])) return chain x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20)) chain = mh(log_f, prop, x0, use_log=True, iterations=iterations, verbose=verbose, every=every) return chain
def validation_plot(L=10, N=50, ref_trials=1000): check_points = np.linspace(0, 10, 10) #ics_ref = sorted([motif_ic(random_motif(L, N)) for i in range(ref_trials)]) ics_ref = sorted([ motif_ic(random_motif(L, N), correct=True) for i in trange(ref_trials) ]) plt.plot(ics_ref, 1 - np.linspace(0, 1, len(ics_ref)), label="Empirical Complementary CDF", marker='o', linestyle='') plt.plot( check_points, [exp(ic_log_pvalue(N, L, ic, method="MC")) for ic in check_points], label="Importance Sampling Estimate") plt.plot( check_points, [exp(ic_log_pvalue(N, L, ic, method="UB")) for ic in check_points], label="Analytic Upper Bound") plt.plot(check_points, [ exp(ic_log_pvalue(N, L, ic, method="analytic")) for ic in check_points ], label="Analytic P-value") plt.semilogy() plt.legend() plt.xlabel("Information Content (bits)") plt.ylabel("P-value") plt.xlim(0, 1.2) plt.show()
def log_ZM_empirical_ref3(theta, N,trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, 1)) for _ in xrange(trials)] log_avg = logsum(lfhs) - log(trials) log_ZS = L*log(4) + log_avg log_ZM = N * log_ZS return log_ZM
def log_ZM_empirical_ref3(theta, N, trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, 1)) for _ in xrange(trials)] log_avg = logsum(lfhs) - log(trials) log_ZS = L * log(4) + log_avg log_ZM = N * log_ZS return log_ZM
def best_ic_motif(L,n,trials): best_ic = 0 for i in trange(trials): motif = random_motif(L,n) cur_ic = motif_ic(motif,correct=False) if cur_ic > best_ic: best_motif = motif return best_motif
def match_ic_mi(N, L, des_ic, des_mi, iterations=50000, take_stock=None, eta=0.01, alpha=1, beta=0): if take_stock is None: take_stock = int((N * L) * log(N * L)) x = random_motif(L, N) xs = [None] * iterations ics = [0.0] * iterations mis = [0.0] * iterations alphas = [0.0] * iterations betas = [0.0] * iterations ic = motif_ic(x) mi = total_motif_mi(x) accepts = 0 for i in xrange(iterations): # if i == iterations/2: # eta *= 0.1 xp = mutate_motif(x) icp = motif_ic(xp) mip = total_motif_mi(xp) log_y = (alpha * ic + beta * mi) log_yp = (alpha * icp + beta * mip) if log(random.random()) < log_yp - log_y: accepts += 1 x = xp ic = icp mi = mip ics[i] = (ic) mis[i] = (mi) xs[i] = (x) #print sum(site.count("A") for site in x) alphas[i] = (alpha) betas[i] = (beta) if i > 0 and i % take_stock == 0: if i < iterations / 10: mean_ic = mean(ics[i - take_stock:i]) mean_mi = mean(mis[i - take_stock:i]) alpha += eta * (des_ic - mean_ic) * exp( -i / (10 * float(iterations))) beta += eta * (des_mi - mean_mi) * exp( -i / (10 * float(iterations))) else: mean_ic = mean(ics[i - take_stock:i]) mean_mi = mean(mis[i - take_stock:i]) alpha = poly1d(polyfit(ics[:i], alphas[:i], 1))(des_ic) beta = poly1d(polyfit(mis[:i], betas[:i], 1))(des_mi) fmt_string = ( "mean ic: % 1.2f, mean mi: % 1.2f, alpha: % 1.2f, beta: % 1.2f" % (mean_ic, mean_mi, alpha, beta)) print i, "AR:", accepts / (i + 1.0), fmt_string return xs, ics, mis, alphas, betas
def sample_pw_motif_mh(code, N, Ne, mu, iterations=50000): nu = Ne - 1 def log_f(motif): eps = map(lambda x: -log(x), pw_prob_sites(motif, code)) return sum(log(1 / (1 + exp(ep - mu))**nu) for ep in eps) prop = mutate_motif L = len(code) + 1 x0 = random_motif(L, N) return mh(log_f, prop, x0, cache=True, use_log=True, iterations=iterations)
def sella_hirsch_mh_sampling(n=16,L=16,G=1000,N=100,sigma=1,iterations=50000): Zb = compute_Zb(n,L,sigma,G) nu = N-1 def fitness(motif): eps = [sigma*sum(b!="A" for b in site) for site in motif] fg = sum(exp(-sigma*ep) for ep in eps) return fg/(fg + Zb) def log_p(motif): return (nu * log(fitness(motif))) def proposal(motif): p = 4.0/(n*L) return mutate_motif_p(motif,p) x0 = random_motif(n,L) chain = mh(log_p,proposal,x0,use_log=True,iterations=iterations) return chain
def validation_plot(L=10, N=50, ref_trials=1000): check_points = np.linspace(0,10,10) #ics_ref = sorted([motif_ic(random_motif(L, N)) for i in range(ref_trials)]) ics_ref = sorted([motif_ic(random_motif(L, N), correct=True) for i in trange(ref_trials)]) plt.plot(ics_ref, 1 - np.linspace(0,1,len(ics_ref)),label="Empirical Complementary CDF", marker='o',linestyle='') plt.plot(check_points, [exp(ic_log_pvalue(N, L, ic, method="MC")) for ic in check_points], label="Importance Sampling Estimate") plt.plot(check_points, [exp(ic_log_pvalue(N, L, ic, method="UB")) for ic in check_points], label="Analytic Upper Bound") plt.plot(check_points, [exp(ic_log_pvalue(N, L, ic, method="analytic")) for ic in check_points], label="Analytic P-value") plt.semilogy() plt.legend() plt.xlabel("Information Content (bits)") plt.ylabel("P-value") plt.xlim(0,1.2) plt.show()
def sella_hirsch_mh_penalize_mu(Ne=5, n=16, L=16, G=5 * 10**6, sigma=1, alpha=0.01, init="random", matrix=None, x0=None, iterations=50000, p=None): print "p:", p if matrix is None: matrix = sample_matrix(L, sigma) if x0 is None: if init == "random": x0 = (random_motif(L, n), random.gauss(0, 1)) elif init == "ringer": x0 = (ringer_motif(matrix, n), random.gauss(0, 1)) elif init == "anti_ringer": x0 = (anti_ringer_motif(matrix, n), random.gauss(0, 1)) else: x0 = init if p is None: p = 1.0 / (n * L) nu = Ne - 1 def log_f((motif, mu)): return nu * log_fitness_penalize_mu(matrix, motif, mu, alpha) def prop((motif, mu)): motif_p = mutate_motif_p(motif, p) # probability of mutation per basepair mu_p = mu + random.gauss(0, 0.1) return motif_p, mu_p chain = mh(log_f, prop, x0, use_log=True, iterations=iterations) return matrix, chain
def sella_hirsch_mh(Ne=5, n=16, L=16, sigma=1, mu=0, init="random", matrix=None, x0=None, iterations=50000, p=None): print "p:", p if matrix is None: matrix = sample_matrix(L, sigma) else: L = len(matrix) if x0 is None: if init == "random": x0 = random_motif(L, n) elif init == "ringer": x0 = ringer_motif(matrix, n) elif init == "anti_ringer": x0 = anti_ringer_motif(matrix, n) else: x0 = init if p is None: p = 1.0 / (n * L) nu = Ne - 1 def log_f(motif): return nu * log_fitness(matrix, motif, mu) def prop(motif): motif_p = mutate_motif_p(motif, p) # probability of mutation per basepair return motif_p chain = mh(log_f, prop, x0, use_log=True, iterations=iterations) return matrix, chain
def sella_hirsch_mh_sampling(n=16, L=16, G=1000, N=100, sigma=1, iterations=50000): Zb = compute_Zb(n, L, sigma, G) nu = N - 1 def fitness(motif): eps = [sigma * sum(b != "A" for b in site) for site in motif] fg = sum(exp(-sigma * ep) for ep in eps) return fg / (fg + Zb) def log_p(motif): return (nu * log(fitness(motif))) def proposal(motif): p = 4.0 / (n * L) return mutate_motif_p(motif, p) x0 = random_motif(n, L) chain = mh(log_p, proposal, x0, use_log=True, iterations=iterations) return chain
def estremo(iterations=50000, verbose=False, every=1, sigma=1, mu=-10, Ne=5): nu = Ne - 1 def log_f((code, motif)): eps = map(lambda x: -log(x), pw_prob_sites(motif, code)) return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps) def prop((code, motif)): code_p = [d.copy() for d in code] i = random.randrange(len(code)) b1, b2 = random.choice("ACGT"), random.choice("ACGT") code_p[i][(b1, b2)] += random.gauss(0, sigma) motif_p = mutate_motif(motif) return (code_p, motif_p) x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20)) chain = mh(log_f, prop, x0, use_log=True, iterations=iterations, verbose=verbose, every=every) return chain
def random_genotype(n, L, linear_sigma, pairwise_sigma, copies): motif = random_motif(L, n) pwm = sample_matrix(L, linear_sigma) pairwise_weights = [[[random.gauss(0, pairwise_sigma) for i in range(4)] for j in range(4)] for k in range(L - 1)] return motif, copies, (pwm, pairwise_weights)
def log_ZM_empirical_ref2(theta, N, trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, N)) for _ in xrange(trials)] return N*L * log(4) + logsum(lfhs) - log(trials)
def log_ZM_empirical_ref2(theta, N, trials=1000): L = len(theta[0]) lfhs = [log_fhat(theta, random_motif(L, N)) for _ in xrange(trials)] return N * L * log(4) + logsum(lfhs) - log(trials)
def motif_mh(L,n,desired_ic): x0 = random_motif(L,n) def logf(motif,mu): return (mu*motif_ic(motif,correct=False)) return mh()
def init_species(): return random_motif(n, L)
def init_species(): return random_motif(n,L)