def likelihood(X): mu = X[X.shape[0]/2,0] N_reverse = np.sum(X[:,2]) N_forward = np.sum(X[:,1]) N = N_reverse + N_forward pi = N_forward / N mean_forward = np.average(X[:,0], weights=X[:,1]) mean_reverse = np.average(X[:,0], weights=X[:,2]) lam = 1./ (0.5*( mean_forward - mean_reverse) ) if lam < 0: lam = 1 var = math.sqrt(np.sum([ pow(X[i,0]-mu,2)*(X[i,1]+X[i,2]) for i in range(X.shape[0]) ])/N) vl = 1.0 / (X[0,-1] - X[0,0]) U_ll = LOG(vl*pi)*N_forward + LOG(vl*(1-pi))*N_reverse U_BIC = -2*U_ll + 1*math.log(N_forward + N_reverse) EMG = component_bidir(mu, var, lam, 1.0,pi , None,foot_print=0) xs = np.linspace(X[0,0], X[-1,0], 1000) LL = sum([ LOG(EMG.pdf(X[k,0],1))*X[k,1] for k in range(0,X.shape[0]) ]) LL +=sum([ LOG(EMG.pdf(X[k,0],-1))*X[k,2] for k in range(0,X.shape[0]) ]) EMG_BIC = -2*LL + 3*math.log(N) return U_BIC/EMG_BIC
def bayes_factor(X, std=10, lam=0.1,step_size=1, norm_to_max=True): KS = list() i = 0 win = (3*std + 1.0 / lam) M1S = list() M2S = list() while i < X.shape[0]: j,k = i,i EMG = model.component_bidir(X[i,0], std, lam, 1.0, 0.5, None) while j < X.shape[0] and (X[j,0] - X[i,0]) < win: j+=1 while k >=0 and (X[i,0] - X[k,0]) < win: k-=1 if j < X.shape[0] and k>=0: M1 = sum([LOG(x)*y for x,y in zip(map(lambda x: EMG.pdf(x,1), X[k:j,0]), X[k:j,1])]) M1 +=sum([LOG(x)*y for x,y in zip(map(lambda x: EMG.pdf(x,-1), X[k:j,0]), X[k:j,2])]) l = 1 / (X[-1,0] - X[0,0]) M2 = sum([LOG(l)*y for y in X[k:j,1]]) + sum([LOG(l)*y for y in X[k:j,2]]) if M2==0: KS.append(1) else: KS.append(M2/M1) M1S.append(M1/np.sum(X[:,1:])) M2S.append(M2/np.sum(X[:,1:])) i+=1 if norm_to_max: KS = [k/ max(KS) for k in KS] return KS
def draw_posterior(ax,ax_cbar): X1 = sim.runOne(mu=-40, s=1, l=10, lr=40, ll=-200, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, N=3000, SHOW=False , bins=200, noise=False, foot_print = 1 ) X2 = sim.runOne(mu=40, s=1, l=10, lr=200, ll=-40, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, N=3000, SHOW=False , bins=200, noise=False, foot_print = 1 ) rvs = [model.component_bidir( -40, 1, 0.1, 0.5,0.5 , None,foot_print=1 )] rvs +=[model.component_elongation( -40, 200, 0.25,1.0 ,None , None, None, None)] rvs +=[model.component_elongation( -200, -40, 0.25,0 , None , None, None, None )] rvs += [model.component_bidir( 40, 1, 0.1, 0.5,0.5 , None,foot_print=1 )] rvs +=[model.component_elongation( 40, 200, 0.25,1.0 ,None , None, None, None)] rvs +=[model.component_elongation( -200, 40, 0.25,0 , None , None, None, None )] norm = mpl.colors.Normalize(vmin=0, vmax= 1) cmap = plt.get_cmap('PuOr' ) m = cm.ScalarMappable(norm=norm, cmap=cmap) cb1 = mpl.colorbar.ColorbarBase(ax_cbar, cmap=cmap, norm=norm, orientation='vertical') cb1.set_label(r'$p(k=paused|\hat{\theta})$') ax_cbar.yaxis.tick_right() ax_cbar.set_yticklabels(["0", "", "", "","", "0.5", "","", "", "", "1"] ) ax2 = ax_cbar.twinx() ax2.set_yticks([]) ax2.yaxis.set_label_position("left") KS = (0,3) MIN, MAX = 0,0 for i,X in enumerate((X1, X2)) : w = (X[-1,0] - X[0,0]) / X.shape[0] xs = np.linspace(X[0,0],X[-1,0], X.shape[0] ) MAX = max(MAX, max(X[:,1])) MIN = min(MIN, min(-X[:,2])) colorsf = [ m.to_rgba(posterior(x,rvs, Type=KS[i] )) for x in xs ] ax.bar(X[:,0], X[:,1], width=w, color=colorsf, edgecolor=colorsf,alpha=0.3) ax.bar(X[:,0], -X[:,2], width=w, color=colorsf, edgecolor=colorsf,alpha=0.3) ax.set_ylim(MIN, MAX) ax.set_yticklabels([""]+[str(abs(int(i ) )) for i in ax.get_yticks()[1:] ] ) ax.set_ylabel("Read Coverage") ax.set_xlabel("Relative Genomic Position") ax.grid()
def mu_by_pi(ax,X,res): A = np.zeros((res, res)) for i,mu in enumerate(np.linspace(-8, 8,res)): for j, pi in enumerate(np.linspace(0,1,res)): rv = model.component_bidir(mu,5, 1.0/5, 1.0, pi, None, foot_print=4) ll = -(log_likelihood(X, rv)) A[i,j] = math.log(ll,10) heatmap = ax.imshow(A, cmap=plt.cm.jet_r,vmin=A.min(), vmax=A.max(),aspect=0.85 ) ax.set_xticklabels([str(x)[:4] for x in np.linspace(0,1,len(ax.get_xticklabels())) ], rotation=45) ax.set_yticklabels([str(x)[:4] for x in np.linspace(-8, 8,len(ax.get_xticklabels())) ], rotation=45) ax.set_ylabel(r'$\mu$',fontsize=20) ax.set_xlabel(r'$\pi$',fontsize=20)
def fp_by_sigma(ax,X,res): A = np.zeros((res, res)) for i,fp in enumerate(np.linspace(0, 8,res)): for j, si in enumerate(np.linspace(1,10,res)): rv = model.component_bidir(0,si, 0.1, 1.0, 0.5, None, foot_print=fp) ll = abs(log_likelihood(X, rv)) A[i,j] = math.log(ll,10) heatmap = ax.imshow(A, cmap=plt.cm.jet_r,vmin=A.min(), vmax=A.max(),aspect=0.85 ) ax.set_xticklabels([str(x)[:4] for x in np.linspace(1,7,len(ax.get_xticklabels())) ], rotation=45) ax.set_yticklabels([str(x)[:4] for x in np.linspace(0, 8,len(ax.get_xticklabels())) ], rotation=45) ax.set_xlabel(r'$\sigma$',fontsize=20) ax.set_ylabel(r'$fp$',fontsize=20)
def draw(X): norm = mpl.colors.Normalize(vmin=0, vmax= 1) cmap = plt.get_cmap('PuOr' ) m = cm.ScalarMappable(norm=norm, cmap=cmap) means = (97, 23) sigmas = (1.5,1) lambdas = (0.3,0.5) fps = (1.5,0.9) pis = (0.2,0.4) wps = (0.2,0.1) wlfs = (0.05, 0.01) wrfs = (0.15,0.) lfs = (X[-1,0],95) lrs = ( 25,X[0,0]) rvs = [model.component_bidir(means[i], sigmas[i], lambdas[i], wps[i],pis[i] , None,foot_print=fps[i]) for i in range(2) ] rvs +=[model.component_elongation(lrs[i],means[i], wlfs[i], 0, None, None, None, 0 ) for i in range(2)] rvs +=[model.component_elongation(means[i], lfs[i],wrfs[i], 1.0, None, None, None, 0 ) for i in range(2)] for rv in rvs: print rv F = plt.figure(figsize=(15,10)) ax = F.add_subplot(1,1,1) N = np.sum(X[:,1:])+700000 X[:,1]/=N X[:,2]/=N colorsf = [ m.to_rgba(posterior(x,rvs, Type=1 )) for x in X[:,0] ] xs = np.linspace(X[0,0],X[-1,0],1000) ax.bar(X[:,0], X[:,1], color=colorsf, edgecolor=colorsf, alpha=1) ax.bar(X[:,0], -X[:,2], color=colorsf, edgecolor=colorsf, alpha=1) ax.plot(xs, [sum([rv.pdf(x,1) for rv in rvs]) for x in xs],linewidth=3.,linestyle="--",color="black") ax.plot(xs, [sum([-rv.pdf(x,-1) for rv in rvs]) for x in xs],linewidth=3.,linestyle="--",color="black") ax_cbar = F.add_axes([0.84,0.2,0.01,0.6]) cb1 = mpl.colorbar.ColorbarBase(ax_cbar, cmap=cmap, norm=norm, orientation='vertical') cb1.set_label("\n"+r'$p(k=paused|\hat{\theta})$', fontsize=20) ax_cbar.yaxis.tick_right() ax_cbar.set_yticklabels(["0", "", "", "","", "0.5", "","", "", "", "1"] ) ax2 = ax_cbar.twinx() ax2.set_yticks([]) ax2.yaxis.set_label_position("left") ax.grid() ax.set_yticklabels([""]+[str(abs((i ) )) for i in ax.get_yticks()[1:] ] ) ax.set_ylabel("Density") ax.set_xlabel("Relative Genomic Position") plt.savefig("/Users/joazofeifa/Lab/Article_drafts/EMG_paper/images/example_gene_fig.svg") plt.show()
def compute_ll(X, i,j, mu,si, l, pi, SHOW=False, foot_print=0): EMG = component_bidir(mu, si, l, 1.0,pi , None,foot_print=foot_print) LL = sum([ LOG(EMG.pdf(X[k,0],1))*X[k,1] for k in range(i,j) ]) LL +=sum([ LOG(EMG.pdf(X[k,0],-1))*X[k,2] for k in range(i,j) ]) if SHOW: plt.bar(X[i:j,0], X[i:j,1]/np.sum(X[i:j,1])) plt.bar(X[i:j,0], -X[i:j,2]/np.sum(X[i:j,2]) ) xs = np.linspace(X[i,0],X[j,0],100) plt.plot(xs, [EMG.pdf(x,1) for x in xs]) plt.plot(xs, [-EMG.pdf(x,-1) for x in xs]) plt.show() return LL pass
def mu_by_lambda(ax,X,res): A = np.zeros((res, res)) for i,mu in enumerate(np.linspace(-4, 4,res)): for j, l in enumerate(np.linspace(1,10,res)): l = 1.0 / l rv = model.component_bidir(mu,7.3, l, 1.0, 0.5, None, foot_print=4) ll = abs(log_likelihood(X, rv)) A[i,j] = math.log(ll,10) A = A[:,::-1] heatmap = ax.imshow(A, cmap=plt.cm.jet_r,vmin=A.min(), vmax=A.max(),aspect=0.85 ) ax.set_xticklabels([str(x)[:4] for x in np.linspace(1,10,len(ax.get_xticklabels())) ], rotation=45) ax.set_yticklabels([str(x)[:4] for x in np.linspace(-4, 4,len(ax.get_xticklabels())) ], rotation=45) ax.set_xlabel(r'$1/\lambda$',fontsize=20) ax.set_ylabel(r'$\mu$',fontsize=20)
def compute_ll(X, i,j, mu,si, l, pi, SHOW=False, foot_print=0): EMG = component_bidir(mu, si, l, 1.0,pi , None,foot_print=foot_print) LL = sum([ LOG(EMG.pdf(X[k,0],1))*X[k,1] for k in range(i,j) ]) LL +=sum([ LOG(EMG.pdf(X[k,0],-1))*X[k,2] for k in range(i,j) ]) if SHOW: l = float(max(X[i:j,0]) - min(X[i:j,0])) w = l / float(len(X[i:j,0]) ) plt.bar(X[i:j,0], X[i:j,1]/np.sum(X[i:j,1]), width=w) plt.bar(X[i:j,0], -X[i:j,2]/np.sum(X[i:j,2]), width=w ) xs = np.linspace(X[i,0],X[j,0],100) plt.plot(xs, [EMG.pdf(x,1) for x in xs], lw=2, color="blue") plt.plot(xs, [-EMG.pdf(x,-1) for x in xs], lw=2, color="blue") plt.plot(xs, [PI / l for x in xs], lw=2, color="red") plt.plot(xs, [-(1-PI) / l for x in xs],lw=2, color="red") plt.show() return LL pass
def draw_sim_one(ax, mu,si, l, w, pi): X = sim.runOne(mu=mu, s=2, l=1.0/l, lr=100, ll=-100, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, N=300, SHOW=False , bins=200, noise=False, foot_print = 1 ) noise = np.random.uniform(-135,145, 50) noise2 = np.random.uniform(-135,145, 50) counts,edges = np.histogram(noise, bins=200, normed=1) counts2,edges2 = np.histogram(noise2, bins=200, normed=1) counts*=0.05 counts2*=0.05 rvs = [model.component_bidir( 0, 2, l, 0.5,0.5 , None,foot_print=1 )] rvs +=[model.component_elongation( 0, 100, 0.25,1.0 ,None , None, None, None)] rvs +=[model.component_elongation( -100, 0, 0.25,0.0 , None , None, None, None )] X[:,1:]/=np.sum(X[:,1:]) w = (X[-1,0] - X[0,0]) / X.shape[0] ax.bar(X[:,0], X[:,1], width=w, color="blue", edgecolor="blue",alpha=0.3) ax.bar(X[:,0], -X[:,2], width=w, color="red", edgecolor="red",alpha=0.3) ax.bar(edges[1:], counts,width=w, color="blue", edgecolor="blue",alpha=0.3) ax.bar(edges2[1:], -counts2, width=w, color="red", edgecolor="red",alpha=0.3) xs = np.linspace(X[0,0], X[-1,0], 1000) ysf = [sum([rv.pdf(x,1) for rv in rvs ]) for x in xs] ysr = [sum([-rv.pdf(x,-1) for rv in rvs ]) for x in xs] ax.plot(xs,ysf,linewidth=2.5,linestyle="-", color="black") ax.plot(xs,ysr,linewidth=2.5,linestyle="-", color="black", label="Model") ax.set_yticks(np.linspace(min(ysr)-0.01, max(ysf)+0.01, 12)) ax.set_yticklabels([""]+ [str(abs(x))[:5] for x in np.linspace(min(ysr)+0.01, max(ysf)-0.01, 10)] + [""] ) ax.set_ylim(min(ysr)-0.01, max(ysf)+0.01) ax.grid() ax.set_ylabel("Density") ax.set_xticklabels([str(int(10*x)) for x in ax.get_xticks() ]) ax.set_xlabel("Relative Genomic Coordinate") ax.legend() pass
def bayes_factor(X, std=10, lam=0.1, step_size=1, norm_to_max=True): KS = list() i = 0 win = (3 * std + 1.0 / lam) M1S = list() M2S = list() while i < X.shape[0]: j, k = i, i EMG = model.component_bidir(X[i, 0], std, lam, 1.0, 0.5, None) while j < X.shape[0] and (X[j, 0] - X[i, 0]) < win: j += 1 while k >= 0 and (X[i, 0] - X[k, 0]) < win: k -= 1 if j < X.shape[0] and k >= 0: M1 = sum([ LOG(x) * y for x, y in zip(map(lambda x: EMG.pdf(x, 1), X[k:j, 0]), X[k:j, 1]) ]) M1 += sum([ LOG(x) * y for x, y in zip(map(lambda x: EMG.pdf(x, -1), X[k:j, 0]), X[k:j, 2]) ]) l = 1 / (X[-1, 0] - X[0, 0]) M2 = sum([LOG(l) * y for y in X[k:j, 1]]) + sum( [LOG(l) * y for y in X[k:j, 2]]) if M2 == 0: KS.append(1) else: KS.append(M2 / M1) M1S.append(M1 / np.sum(X[:, 1:])) M2S.append(M2 / np.sum(X[:, 1:])) i += 1 if norm_to_max: KS = [k / max(KS) for k in KS] return KS
def run(FILE): G = {} D = list() with open(FILE) as FH: for line in FH: if "#Data" in line: collect=True elif collect and "#" in line: collect = False t = int(line[1:].split(",")[0]) G[t] = list() elif collect: D.append(([float(x) for x in line.strip("\n").split("\t") ])) elif not collect and "#" in line: t = int (line[1:].split(",")[0]) G[t] = list() else: if "U" == line[0]: a,b,w,pi = line.strip("\n").split(": ")[1].split(",") G[t].append(component_elongation(float(a), float(b), float(w), float(pi) , None, "noise", None, None ) ) else: mu,si,l,w,pi = line.strip("\n").split(": ")[1].split(",") G[t].append(component_bidir(float(mu), float(si), float(l), float(w), float(pi), None)) return G, np.array(D)
def window(X, win=10): i,j,k = 0,0,0 scores, forward,reverse = list(), list(),list() while i < 5000: while j < X.shape[0] and (X[j,0]-X[i,0]) < -win: j+=1 while k <X.shape[0] and (X[k,0]-X[i,0]) < win: k+=1 if i < X.shape[0] and j < X.shape[0] and k < X.shape[0]: a,b = X[j,0],X[k,0] N_forward,N_reverse = np.sum(X[j:k,1]),np.sum(X[j:k,2]) N = N_forward + N_reverse pi = (N_forward+1) / (N+2) null = math.log(pi / (b-a))*N_forward + math.log((1-pi) / (b-a))*N_reverse center = X[i,0] rvs = [MODEL.component_bidir(center, 1.0, 0.5, 0.9,pi , None) , MODEL.component_elongation( a,b, 0.1, pi, None, None, None, None, )] model = sum([math.log(sum([rv.pdf(X[u,0],1) for rv in rvs] ))*X[u,1] for u in range(j,k)]) model +=sum([math.log(sum([rv.pdf(X[u,0],-1) for rv in rvs] ))*X[u,2] for u in range(j,k)]) scores.append(model/null) i+=1 return scores