def sample_entropic_prior(K,eta=10**-3): max_ent = log2(K) hf = random.random() * max_ent p = flow_to_h(hf,simplex_sample(K),eta=eta) attempts = 0 while p is None: #print attempts p = flow_to_h(hf,simplex_sample(K),eta=eta) #print p attempts += 1 return p
def hdist(desired_ent,n): """Return distribution on n outcomes with entropy <= h (bits)""" ent = n while(ent > desired_ent): ps = simplex_sample(n) ent = h(ps) return ps
def rvector(beta, K=4): #trials = 0 while True: #trials += 1 p = simplex_sample(K) if random.random() < exp(-beta * h(p)): #print "acceptance rate:",1/float(trials) return p
def rvector(beta, K=4): #trials = 0 while True: #trials += 1 p = simplex_sample(K) if random.random() < exp(-beta*h(p)): #print "acceptance rate:",1/float(trials) return p
def fourier_check3(): L = 2 K = int(4**L) ps = np.array(simplex_sample(K)) Lap = norm_laplacian(L) lambdas, V = np.linalg.eig(norm_laplacian(L)) ps_hat = fourier(ps) print L1(ps, sum(ph*np.array(v) for ph,v in zip(ps_hat,transpose(V))))
def sample_measure_sanity(n, sigma): while True: xs = simplex_sample(n) p = exp(log_measure(xs, sigma)) if p > 1: raise Exception("p > 1") if random.random() < p / 2: return xs
def plot_flattened_transport(n): fig = plt.figure() ax = fig.add_subplot(111, projection='3d') xs,ys,zs = transpose([[1,0,0],[0,1,0],[0,0,1],[1,0,0]]) ax.plot(xs,ys,zs) q = project_to_simplex(np.array([1.0,1.0,1.0])) for i in range(n): p = simplex_sample(3) traj = map(project_to_simplex,circular_transport(p,q)) ax.plot(*transpose(traj))
def grad_descent(num_cols,iterations=100,eta=1): #p = sample(num_cols) p = np.array(simplex_sample(num_cols)) ps = [np.copy(p)] for i in xrange(iterations): g = grad(p)*min(p) #g *= min(p)/np.linalg.norm(g) #print p,g,h(p) p += eta*g ps.append(np.copy(p)) return ps
def test_measure_sanity(sigma=1, N=3, test_trials=100, trials=1000): ms = [] ms_sanity = [] for i in trange(test_trials): xs = simplex_sample(N) ms.append(measure(xs, sigma)) ms_sanity.append(measure_sanity(xs, sigma, trials)) plt.scatter(ms, ms_sanity) plt.plot([0, 1], [0, 1]) print pearsonr(ms, ms_sanity) plt.show()
def fourier_check2(): """verify identity: F[L[p]] = Lambdas*ps_hat""" L = 2 K = int(4**L) ps = np.array(simplex_sample(K)) Lap = norm_laplacian(L) lambdas, V = np.linalg.eig(norm_laplacian(L)) ans1 = fourier(Lap.dot(ps)) ps_hat = fourier(ps) ans2 = np.diag(lambdas).dot(ps_hat) print "L1 error:",L1(ans1,ans2)
def minimize_dHdt_test(): L = 4 K = int(4**L) for i in range(10): ps = np.array(simplex_sample(K)) print "marginalizing" qs = qs_from_psfm(marginalize(ps)) print "sampling wtih given entropy" rs = sample_with_given_entropy(K,h(qs),tol_factor=10**-6) print "minimizing" qsp = minimize_dHdt(qs) rsp = minimize_dHdt(rs) print "qs:",dHdt(qs),dHdt(qsp) print "rs:",dHdt(rs),dHdt(rsp)
def what_is_fourier_independence(): L = 2 K = int(4**L) diffs = [] ps_hats = [] qs_hats = [] for i in range(100): ps = np.array(simplex_sample(K)) qs = qs_from_psfm(marginalize(ps)) ps_hat = fourier(ps) qs_hat = fourier(qs) print sum(abs(ps_hat)) >= sum(abs(qs_hat)) # always true! ps_hats.append(ps_hat) qs_hats.append(qs_hat) diffs = ([ps_hat - qs_hat for (ps_hat,qs_hat) in zip(ps_hats,qs_hats)]) plt.plot(transpose(diffs)) # coefficients 0,3,4,5,9,10,11 are the same, so must control columnwise probabilities (8-1 = 7 df) return ps_hats,qs_hats
def sample_qs(L,req_entropy,col_tol = 0.001): """Given L and required entropy, sample an independent distribution uniformly from the set of psfms meeting those criteria. Return a full "qs" vector of length 4^L """ # first decide how much entropy in each column valid = False print "assigning entropies to columns" while not valid: col_ents = np.array(simplex_sample(L))*req_entropy if all(col_ents <= 2): #bits/base valid = True cols = [] print "assigning column:" for j in tqdm(range(L)): col_ent = col_ents[j] col = sample_with_given_entropy(4,col_ent) cols.append(col) #return cols return qs_from_psfm(cols)
def entropy_hessian_experiment(): L = 3 K = int(4**L) eps = 10**-6 ps = np.array(simplex_sample(K)) qs = qs_from_psfm(marginalize(ps)) rs = sample_with_given_entropy(K,h(qs),tol_factor=10**-2) bvs_p = entropic_isocontour(ps) bvs_q = entropic_isocontour(qs) bvs_r = entropic_isocontour(rs) dHp = dHdt(ps) dHps = [dHdt(normalize1(ps + bvp*eps)) for bvp in bvs_p] dHq = dHdt(qs) dHqs = [dHdt(normalize1(qs + bvq*eps)) for bvq in bvs_q] dHr = dHdt(rs) dHrs = [dHdt(normalize1(rs + bvr*eps)) for bvr in bvs_r] plt.plot(dHps,color='r') plt.plot([dHp]*(K-2),color='r',linestyle='--') plt.plot(dHqs,color='g') plt.plot([dHq]*(K-2),color='g',linestyle='--') plt.plot(dHrs,color='b') plt.plot([dHr]*(K-2),color='b',linestyle='--') plt.show()
def baum_welch(obs,L): """Given sequence and bs length L, approximate MLE parameters for emission probabilities,transition rate a01 (background->site). TODO: non-uniform background frequencies""" states = range(L+1) a01 = random.random() start_p = make_start_p(a01) trans_p = make_trans_p(a01) emit_p = [simplex_sample(4) for state in states] hidden_states = [random.choice(states) for ob in obs] iterations = 0 while True: # compute hidden states, given probs prob,hidden_states_new = viterbi(obs, states, start_p, trans_p, emit_p) # compute probs, given hidden states # first compute a01 a01_new = estimate_a01(hidden_states_new) start_p_new = make_start_p(a01_new) trans_p_new = make_trans_p(a01_new) emit_p_new = estimate_emit_p(obs,hidden_states_new,states) if (start_p_new == start_p and trans_p_new == trans_p and emit_p_new == emit_p and hidden_states_new == hidden_states): break else: print iterations,a01,l2(start_p,start_p_new), print l2(concat(trans_p),concat(trans_p_new)), print l2((hidden_states),hidden_states_new) a01 = a01_new start_p = start_p_new trans_p = trans_p_new emit_p = emit_p_new hidden_states = hidden_states_new iterations += 1 return start_p,trans_p,emit_p,hidden_states
def simplex_integrate(f, n, sigma, trials=1000): integrand = lambda ps: f(ps) * logit_measure(ps, sigma) / fac(n - 1) return mean(integrand(simplex_sample(n)) for i in xrange(trials))
def measure_test3_integrate_to_one(sigma=1, N=3, trials=1000): """check to see that measure integrates to 1""" return mean(measure(simplex_sample(N), 1) for i in trange(trials))
def fourier_check1(): L = 2 K = int(4**L) ps = np.array(simplex_sample(K)) print "L1 error:",L1(ps,inv_fourier(fourier(ps)))
def sample_measure2(n, sigma): f = lambda xs: measure2(xs, sigma) x0 = simplex_sample(n) prop = lambda xs: simplex_sample(n) chain = mh(f, prop, x0) return chain
def kronecker_test(L=2): K = int(4**L) ps = np.array(simplex_sample(K)) psfm = marginalize(ps) return L1(qs_from_psfm(psfm),qs_from_psfm_spec(psfm))
def mh_sample(K,iterations=50000): p0 = np.array(simplex_sample(K)) f = lambda p:1/h(p)**K proposal = lambda p:propose(p,sigma=1) return mh(f,proposal,p0,iterations=iterations)
def random_simplex(L): return [simplex_sample(4) for i in range(L)]
def sample_ps(K): return np.array(simplex_sample(K))
def sample(num_cols): return np.array(simplex_sample(4**num_cols))
def random_stochastic_matrix(K): return np.array([simplex_sample(K) for j in range(K)])
def random_simplex_vector(K): return np.array(simplex_sample(K))