Exemplo n.º 1
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    phat = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1 # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []
    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                    for (ep, p) in zip(mat_row, psfm_row)])
    lamb = bisect_interval(lambda l:mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log,row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep)/pmode:
            motif.append(site)    
    return motif
Exemplo n.º 2
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    phat = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1  # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []

    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([
            ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
            for (ep, p) in zip(mat_row, psfm_row)
        ])

    lamb = bisect_interval(lambda l: mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log, row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep) / pmode:
            motif.append(site)
    return motif
Exemplo n.º 3
0
def main(G=5000000,iterations=50000,init_matrix=None,init_mu=None,verbose=True):
    """Test case for FD-inference"""
    print "generating genome"
    genome = random_site(G)
    print "generating eps"
    eps = score_genome_np(TRUE_ENERGY_MATRIX,genome)
    min_mu,max_mu = -40,0
    mu = bisect_interval(lambda mu:np.sum(fd_solve_np(eps,mu))-q,min_mu,max_mu,verbose=True,tolerance=1e-1)
    print "computing ps"
    true_ps = fd_solve_np(eps,mu)
    print "true q:",np.sum(true_ps)
    print "generating chip dataset"
    mapped_reads = np.array(map_reads_np(chip_ps_np(true_ps,MEAN_FRAGMENT_LENGTH,NUM_CELLS_ORIGINAL),G))
    print "finished chip dataset"
    if init_matrix is None:
        init_matrix = random_energy_matrix(w)
    if init_mu is None:
        init_mu = -20#random.random()*40 - 20
    init_scores = score_genome_np(init_matrix,genome)
    init_state = ((init_matrix,init_mu),init_scores)
    logf = lambda state:complete_log_likelihood(state,mapped_reads)
    print "true mu:",mu
    print "true log_likelihood:",logf(((TRUE_ENERGY_MATRIX,mu),eps))
    rprop = lambda state:complete_rprop(state,genome)
    print "hitting mh loop"
    matrix_chain = mh(logf,proposal=rprop,x0=init_state,dprop=log_dprop,capture_state=capture_state,verbose=verbose,use_log=True,iterations=iterations,modulus=100)
    return matrix_chain,genome,mapped_reads
Exemplo n.º 4
0
def bisect_interval_noisy(f, epsilon=0.01, sigma=None, debug=False):
    """find zero of stochastic function f using linear regression"""
    print "in bisect"
    xmin = 1
    xmax = 2
    xs = [xmin, xmax]
    print xmin, xmax
    print f(1)
    ys = map(f, xs)
    print ys
    print "ys[-1]:", ys[-1]
    while ys[-1] < 0:
        xmax += 1
        xs.append(xmax)
        y = f(xmax)
        ys.append(y)
    xs2 = [x + xs[-1] for x in xs]
    ys2 = map(f, xs2)
    xs = xs + xs2
    ys = ys + ys2
    #xs = list(np.linspace(lb,ub,10))
    #ys = map(f,xs)
    print "xs,ys:", xs, ys
    i = 1
    while sd(xs[-3:]) > epsilon:
        print "starting round", i
        i += 1
        ### select xp
        # m = (y2-y1)/float(x2-x1)
        # xp = -y1/m + x1
        # yp = f(xp)
        if sigma is None:
            print "interpolating on:", xs, ys
            r = kde_interpolate(xs, ys, sigma=sd(xs) / 3.0)
        else:
            r = kde_interpolate(xs, ys, sigma=sigma)
        try:
            xp = bisect_interval(r, min(xs), max(xs))
            print "selected xp:", xp
        except:
            "secant regression failed!"
            Exception()
        if debug:
            plt.scatter(xs, ys)
            plt.plot(*pl(r, np.linspace(min(xs), max(xs), 1000)))
            plt.plot([xp, xp], [-10, 10])
            plt.plot([min(xs), max(xs)], [0, 0])
            plt.show()
        yp = f(xp)
        ### end select xp
        print "xp,yp:", xp, yp
        xs.append(xp)
        ys.append(yp)
        #js = sorted_indices(xs)
        #xs = rslice(xs,js)
        #ys = rslice(ys,js)
        #assert xs == sorted(xs)
    return xp, (xs, ys)
Exemplo n.º 5
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep) / dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep: ep * density(ep), ep_min,
                                          ep_max)

    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([
                ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                for (ep, p) in zip(mat_row, psfm_row)
            ])
        except:
            print matrix, lamb
            raise Exception

    try:
        lamb = bisect_interval(lambda l: mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
Exemplo n.º 6
0
def spoof_pmotifs(motif, num_motifs=10, trials=1):
    n = len(motif)
    L = len(motif[0])
    des_ic = motif_ic(motif)
    f = lambda p: -mean(
        motif_ic(pmotif(n, L, p)) - des_ic for i in range(trials))
    lb = 0
    ub = 0.75
    xs = np.linspace(lb, ub, 100)
    ys = map(f, xs)
    fhat = kde_regress(xs, ys)
    p = bisect_interval(fhat, lb, ub, verbose=False, tolerance=10**-3)
    return [pmotif(n, L, p) or _ in xrange(num_motifs)]
Exemplo n.º 7
0
def spoof_motif(motif, T):
    n = len(motif)
    L = len(motif[0])
    bio_ic = motif_ic(motif)
    sigma = 2 * mean(map(sd, make_pssm(motif)))  # XXX REVSIT THIS ISSUE
    ic_from_Ne = lambda Ne: predict_stat(n,
                                         L,
                                         sigma,
                                         Ne,
                                         G=5 * 10**6,
                                         T=lambda rho: mean_ic_from_rho(
                                             rho, n, L))
    Ne = bisect_interval(lambda Ne: ic_from_Ne(Ne) - bio_ic, 0.01, 5)
    return predict_stat(n, L, sigma, Ne, T)
Exemplo n.º 8
0
def predict_ic_from_theta(theta, L):
    sigma, mu, Ne = theta
    nu = Ne - 1
    ep_star = mu - log(Ne - 1)
    matrix = sample_matrix(L, sigma)
    ep_min = sum(map(min, matrix))
    des_ep = max(ep_star, ep_min + 1)
    def f(lamb):
        psfm = psfm_from_matrix(matrix, lamb)
        return sum([sum(ep*p for ep,p in zip(eps, ps)) for eps, ps in zip(matrix, psfm)]) - des_ep
    log_psfm = [[log(p) for p in ps] for ps in psfm]
    lamb = bisect_interval(f,-20,20)
    sites = ([sample_from_psfm(psfm) for i in range(100)])
    log_ps = [-nu*log(1+exp(score_seq(matrix, site) - mu)) for site in sites]
    log_qs = [score_seq(log_psfm, site) for site in sites]
Exemplo n.º 9
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep)/dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep:ep*density(ep), ep_min, ep_max)
    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                        for (ep, p) in zip(mat_row, psfm_row)])
        except:
            print matrix, lamb
            raise Exception
    try:
        lamb = bisect_interval(lambda l:mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
Exemplo n.º 10
0
def metropolis_pb(ks,q,verbose=False,mu_offset=0,iterations=50000):
    """Metropolis-Hastings sampling for ks, given product-bernoulli proposal function"""
    G = len(ks)
    eps = [-log(k) for k in ks]
    f = lambda mu:sum(fd(ep,mu) for ep in eps) - q
    mu = bisect_interval(f,-50,50) + mu_offset
    def weight(ss):
        return (falling_fac(q,sum(ss))*product(k**s for k,s in zip(ks,ss)))
    def proposal(ss):
        #state = [int(random.random() < p) for _ in range(len(ss))]
        state = rstate(eps,mu)
        #print "proposed state with occ:",sum(state)
        return state
    def dprop(ss):
        prop = dstate(ss,eps,mu)
        #print "prop:",prop 
        return prop        
    x0 = proposal([0] * len(ks))
    return mh(weight,proposal,x0,dprop=dprop,verbose=verbose,iterations=iterations)
def Ne_from_motif(bio_motif,interp_rounds,iterations=50000):
    """Given a motif, return Ne that matches mean IC"""
    bio_ic = motif_ic(bio_motif)
    n = len(bio_motif)
    L = len(bio_motif[0])
    matrix = [[-ep for ep in row] for row in  make_pssm(bio_motif)]
    print len(matrix)
    def f(Ne,iterations=iterations):
        print "Ne",Ne
        _,chain = sella_hirsch_mh(matrix=matrix,n=n,Ne=Ne,iterations=iterations,init='ringer')
        return mean(map(motif_ic,chain[iterations/2:])) - bio_ic
    # lo,hi = 1,5
    # data = []
    # for _ in xrange(interp_rounds):
    #     guess = (lo + hi)/2.0
    #     y = f(guess)
    #     print lo,hi,guess,y
    #     data.append((guess,y))
    #     if y > 0:
    #         hi = guess
    #     else:
    #         lo = guess
    # return data
    Ne_min = 1
    Ne_max = 5
    while f(Ne_max) < 0:
        print "increasing Ne max"
        Ne_max *= 2
    xs, ys= transpose([(Ne,f(Ne)) for Ne in np.linspace(Ne_min,Ne_max,interp_rounds)])
    # now find an interpolant.  We desire smallest sigma of gaussian
    # interpolant such that function has at most one inflection point
    interp_sigmas = np.linspace(0.01,1,100)
    interps = [gaussian_interp(xs,ys,sigma=s) for s in interp_sigmas]
    for i,(sigma, interp) in enumerate(zip(interp_sigmas,interps)):
        print i,sigma
        if num_inflection_points(map(interp,np.linspace(Ne_min,Ne_max,100))) == 1:
            "found 1 inflection point"
            break
    print sigma
    Ne = bisect_interval(interp,Ne_min,Ne_max)
    return Ne
Exemplo n.º 12
0
def metropolis_uniform(ks,q,verbose=False,mu_offset=0,iterations=50000):
    """Metropolis-Hastings sampling for ks, given uniform proposal function"""
    G = len(ks)
    eps = [-log(k) for k in ks]
    f = lambda mu:sum(fd(ep,mu) for ep in eps) - q
    mu = bisect_interval(f,-50,50) + mu_offset
    def weight(ss):
        return (falling_fac(q,sum(ss))*product(k**s for k,s in zip(ks,ss)))
    def proposal(ss):
        on_chr_prob = sum(ss)/float(q)
        on_chr = random.random() < on_chr_prob
        ss_new = ss[:]
        if on_chr:
            pos = random.choice([i for (i,s) in enumerate(ss) if s])
            ss_new[pos] = 0
        new_pos = random.choice([-1] + [i for (i,s) in enumerate(ss) if not s])
        if new_pos >= 0:
            ss_new[new_pos] = 1
        return ss_new
    x0 = proposal([0] * len(ks))
    return mh(weight,proposal,x0,verbose=verbose,iterations=iterations)
Exemplo n.º 13
0
def predict_ic_from_theta(theta, L):
    sigma, mu, Ne = theta
    nu = Ne - 1
    ep_star = mu - log(Ne - 1)
    matrix = sample_matrix(L, sigma)
    ep_min = sum(map(min, matrix))
    des_ep = max(ep_star, ep_min + 1)

    def f(lamb):
        psfm = psfm_from_matrix(matrix, lamb)
        return sum([
            sum(ep * p for ep, p in zip(eps, ps))
            for eps, ps in zip(matrix, psfm)
        ]) - des_ep

    log_psfm = [[log(p) for p in ps] for ps in psfm]
    lamb = bisect_interval(f, -20, 20)
    sites = ([sample_from_psfm(psfm) for i in range(100)])
    log_ps = [
        -nu * log(1 + exp(score_seq(matrix, site) - mu)) for site in sites
    ]
    log_qs = [score_seq(log_psfm, site) for site in sites]
Exemplo n.º 14
0
def solve_mu_for_copy_num(L, sigma, G, copy_num):
    f = lambda mu: total_occupancy(L, sigma, G, mu) - copy_num
    return bisect_interval(f, -100, 100)
Exemplo n.º 15
0
def find_alpha(K,entropy,tol_factor=0.01):
    ub = 1/(log2(K)-entropy)
    #print "K:%s,desired entropy:%s, ub:%s" % (K,entropy,ub)
    alpha = bisect_interval(lambda alpha:expected_entropy(K,alpha)-entropy,10**-10,ub)
    return alpha
Exemplo n.º 16
0
def find_beta_for_mean_col_ic(n, desired_ic_per_col, tolerance=10**-2):
    ic_from_beta = lambda beta: 2 - mean_col_ent(n, beta)
    f = lambda beta: ic_from_beta(beta) - desired_ic_per_col
    #print "finding beta to tol:",tolerance
    ub = 100 if n < 100 else 1000  # hackish, upped in order to deal with CRP
    return bisect_interval(f, -10, ub, verbose=False, tolerance=tolerance)
def mu_from(G,sigma,L,copy_num):
    f = lambda mu:copy_num_from(G,sigma,L,mu) - copy_num
    return bisect_interval(f,-500,500)
def mu_from(G, sigma, L, copy_num):
    f = lambda mu: copy_num_from(G, sigma, L, mu) - copy_num
    return bisect_interval(f, -500, 500)