示例#1
0
def likelihood(X):
	mu 	= X[X.shape[0]/2,0]
	N_reverse 	= np.sum(X[:,2])
	N_forward 	= np.sum(X[:,1])

	N 	= N_reverse + N_forward
	pi 	= N_forward / N
	mean_forward 	= np.average(X[:,0], weights=X[:,1])
	mean_reverse 	= np.average(X[:,0], weights=X[:,2])

	lam =  1./ (0.5*( mean_forward - mean_reverse) )
	if lam < 0:
		lam 	= 1
	var = math.sqrt(np.sum([ pow(X[i,0]-mu,2)*(X[i,1]+X[i,2]) for i in range(X.shape[0]) ])/N)
	vl 		= 1.0 / (X[0,-1] - X[0,0])
	U_ll 	= LOG(vl*pi)*N_forward + LOG(vl*(1-pi))*N_reverse
	U_BIC 	= -2*U_ll  + 1*math.log(N_forward + N_reverse)


	EMG 	= component_bidir(mu, var, lam, 1.0,pi , None,foot_print=0)
	xs 		= np.linspace(X[0,0], X[-1,0], 1000)


	LL 		= sum([ LOG(EMG.pdf(X[k,0],1))*X[k,1]  for k in range(0,X.shape[0]) ])
	LL 		+=sum([  LOG(EMG.pdf(X[k,0],-1))*X[k,2]  for k in range(0,X.shape[0]) ])

	EMG_BIC = -2*LL + 3*math.log(N)

					
	return  U_BIC/EMG_BIC
示例#2
0
def bayes_factor(X, std=10, lam=0.1,step_size=1, norm_to_max=True):
	KS 	= list()
	i 	= 0
	win 			= (3*std + 1.0 / lam)
	M1S 			= list()
	M2S 			= list()
	while i < X.shape[0]:
		j,k 	= i,i
		EMG 	= model.component_bidir(X[i,0], std, lam, 1.0, 0.5, None)

		while j < X.shape[0] and (X[j,0] - X[i,0]) < win:
			j+=1
		while k >=0 and (X[i,0] - X[k,0]) < win:
			k-=1
		if j < X.shape[0] and k>=0:
			M1 		= sum([LOG(x)*y for x,y in zip(map(lambda x: EMG.pdf(x,1), X[k:j,0]), X[k:j,1])])
			M1 		+=sum([LOG(x)*y for x,y in zip(map(lambda x: EMG.pdf(x,-1), X[k:j,0]), X[k:j,2])])
			l 		= 1 / (X[-1,0] - X[0,0])
			M2 		= sum([LOG(l)*y for y in X[k:j,1]]) + sum([LOG(l)*y for y in X[k:j,2]])
			if M2==0:
				KS.append(1)
			else:
				KS.append(M2/M1)
			M1S.append(M1/np.sum(X[:,1:]))
			M2S.append(M2/np.sum(X[:,1:]))
		i+=1
	if norm_to_max:
		KS 	= [k/ max(KS) for k in KS]
	return KS
示例#3
0
文件: fit_many.py 项目: dmalmer/EMG
def draw_posterior(ax,ax_cbar):
	X1 	= sim.runOne(mu=-40, s=1, l=10, lr=40, ll=-200, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, 
				pir=0.9, N=3000, SHOW=False , bins=200, noise=False, foot_print = 1 )
	X2 	= sim.runOne(mu=40, s=1, l=10, lr=200, ll=-40, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, 
				pir=0.9, N=3000, SHOW=False , bins=200, noise=False, foot_print = 1 )

	rvs 			= [model.component_bidir( -40, 1, 0.1, 0.5,0.5 , None,foot_print=1 )]
	rvs 			+=[model.component_elongation( -40, 200,  0.25,1.0 ,None , None, None, None)]
	rvs 			+=[model.component_elongation( -200, -40,  0.25,0 , None , None, None, None  )]
	
	rvs 			+= [model.component_bidir( 40, 1, 0.1, 0.5,0.5 , None,foot_print=1 )]

	rvs 			+=[model.component_elongation( 40, 200,  0.25,1.0 ,None , None, None, None)]
	rvs 			+=[model.component_elongation( -200, 40,  0.25,0 , None , None, None, None  )]

	norm = mpl.colors.Normalize(vmin=0, vmax= 1)
	cmap = plt.get_cmap('PuOr' )
	m = cm.ScalarMappable(norm=norm, cmap=cmap)
	
	cb1 = mpl.colorbar.ColorbarBase(ax_cbar, cmap=cmap,
	                                norm=norm,
	                                orientation='vertical')
	cb1.set_label(r'$p(k=paused|\hat{\theta})$')
	ax_cbar.yaxis.tick_right()
	ax_cbar.set_yticklabels(["0", "", "", "","",  "0.5", "","", "", "",  "1"]  )
	
	ax2 = ax_cbar.twinx()
	ax2.set_yticks([])
	ax2.yaxis.set_label_position("left")

	KS 	= (0,3)
	MIN, MAX 	= 0,0
	for i,X in enumerate((X1, X2)) :
		w 	= (X[-1,0] - X[0,0]) / X.shape[0]
		xs 	= np.linspace(X[0,0],X[-1,0], X.shape[0] )
		MAX = max(MAX, max(X[:,1]))
		MIN = min(MIN, min(-X[:,2]))
		colorsf 	= [ m.to_rgba(posterior(x,rvs, Type=KS[i] )) for x in xs ]
		
		ax.bar(X[:,0], X[:,1], width=w, color=colorsf, edgecolor=colorsf,alpha=0.3)		
		ax.bar(X[:,0], -X[:,2], width=w, color=colorsf, edgecolor=colorsf,alpha=0.3)		
	ax.set_ylim(MIN, MAX)
	ax.set_yticklabels([""]+[str(abs(int(i ) )) for i in ax.get_yticks()[1:] ] )
	ax.set_ylabel("Read Coverage")

	ax.set_xlabel("Relative Genomic Position")
	ax.grid()
示例#4
0
def mu_by_pi(ax,X,res):
	A 		= np.zeros((res, res))
	for i,mu in enumerate(np.linspace(-8, 8,res)):
		for j, pi in enumerate(np.linspace(0,1,res)):
			rv 	= model.component_bidir(mu,5, 1.0/5, 1.0, pi, None, foot_print=4)
			ll 		= -(log_likelihood(X, rv))

			A[i,j] 	= math.log(ll,10)
	heatmap = ax.imshow(A, cmap=plt.cm.jet_r,vmin=A.min(), vmax=A.max(),aspect=0.85 )
	ax.set_xticklabels([str(x)[:4] for x in  np.linspace(0,1,len(ax.get_xticklabels())) ], rotation=45)
	ax.set_yticklabels([str(x)[:4] for x in  np.linspace(-8, 8,len(ax.get_xticklabels())) ], rotation=45)
	ax.set_ylabel(r'$\mu$',fontsize=20)
	ax.set_xlabel(r'$\pi$',fontsize=20)
示例#5
0
def fp_by_sigma(ax,X,res):
	A 		= np.zeros((res, res))
	for i,fp in enumerate(np.linspace(0, 8,res)):
		for j, si in enumerate(np.linspace(1,10,res)):
			rv 	= model.component_bidir(0,si, 0.1, 1.0, 0.5, None, foot_print=fp)
			ll 		= abs(log_likelihood(X, rv))
			A[i,j] 	= math.log(ll,10)
	heatmap = ax.imshow(A, cmap=plt.cm.jet_r,vmin=A.min(), vmax=A.max(),aspect=0.85 )

	ax.set_xticklabels([str(x)[:4] for x in  np.linspace(1,7,len(ax.get_xticklabels())) ], rotation=45)
	ax.set_yticklabels([str(x)[:4] for x in  np.linspace(0, 8,len(ax.get_xticklabels())) ], rotation=45)
	ax.set_xlabel(r'$\sigma$',fontsize=20)
	ax.set_ylabel(r'$fp$',fontsize=20)
示例#6
0
文件: Figure_1.py 项目: dmalmer/EMG
def draw(X):
	norm = mpl.colors.Normalize(vmin=0, vmax= 1)
	cmap = plt.get_cmap('PuOr' )
	m = cm.ScalarMappable(norm=norm, cmap=cmap)
		
	means 	= (97, 23)
	sigmas 	= (1.5,1)
	lambdas = (0.3,0.5)
	fps 	= (1.5,0.9)
	pis 	= (0.2,0.4)
	wps 	= (0.2,0.1)
	wlfs 	= (0.05, 0.01)
	wrfs 	= (0.15,0.)
	lfs 	= (X[-1,0],95)
	lrs 	= ( 25,X[0,0])
	rvs 	= [model.component_bidir(means[i], sigmas[i], lambdas[i], wps[i],pis[i] , None,foot_print=fps[i]) for i in range(2) ]
	rvs 	+=[model.component_elongation(lrs[i],means[i], wlfs[i], 0, None, None, None, 0  ) for i in range(2)]
	rvs 	+=[model.component_elongation(means[i], lfs[i],wrfs[i], 1.0, None, None, None, 0  ) for i in range(2)]
	for rv in rvs:
		print rv
	F 	= plt.figure(figsize=(15,10))
	ax 	= F.add_subplot(1,1,1)
	N 	= np.sum(X[:,1:])+700000
	X[:,1]/=N
	X[:,2]/=N
	colorsf 	= [ m.to_rgba(posterior(x,rvs, Type=1 )) for x in X[:,0] ]
	xs 	= np.linspace(X[0,0],X[-1,0],1000)
	ax.bar(X[:,0], X[:,1], color=colorsf, edgecolor=colorsf, alpha=1)
	ax.bar(X[:,0], -X[:,2], color=colorsf, edgecolor=colorsf, alpha=1)
	ax.plot(xs, [sum([rv.pdf(x,1) for rv in rvs]) for x in xs],linewidth=3.,linestyle="--",color="black")
	ax.plot(xs, [sum([-rv.pdf(x,-1) for rv in rvs]) for x in xs],linewidth=3.,linestyle="--",color="black")
	ax_cbar = F.add_axes([0.84,0.2,0.01,0.6])
	cb1 = mpl.colorbar.ColorbarBase(ax_cbar, cmap=cmap,
	                                norm=norm,
	                                orientation='vertical')
	cb1.set_label("\n"+r'$p(k=paused|\hat{\theta})$', fontsize=20)
	ax_cbar.yaxis.tick_right()
	ax_cbar.set_yticklabels(["0", "", "", "","",  "0.5", "","", "", "",  "1"]  )
	
	ax2 = ax_cbar.twinx()
	ax2.set_yticks([])
	ax2.yaxis.set_label_position("left")
	ax.grid()
	ax.set_yticklabels([""]+[str(abs((i ) )) for i in ax.get_yticks()[1:] ] )
	ax.set_ylabel("Density")

	ax.set_xlabel("Relative Genomic Position")

	plt.savefig("/Users/joazofeifa/Lab/Article_drafts/EMG_paper/images/example_gene_fig.svg")
	plt.show()
示例#7
0
def compute_ll(X, i,j, mu,si, l, pi, SHOW=False, foot_print=0):
	EMG 	= component_bidir(mu, si, l, 1.0,pi , None,foot_print=foot_print)
	LL 		= sum([ LOG(EMG.pdf(X[k,0],1))*X[k,1]  for k in range(i,j) ])
	LL 		+=sum([ LOG(EMG.pdf(X[k,0],-1))*X[k,2]  for k in range(i,j) ])
	if SHOW:
		plt.bar(X[i:j,0], X[i:j,1]/np.sum(X[i:j,1]))
		plt.bar(X[i:j,0], -X[i:j,2]/np.sum(X[i:j,2]) )
		xs 	= np.linspace(X[i,0],X[j,0],100)
		plt.plot(xs, [EMG.pdf(x,1) for x in xs])
		plt.plot(xs, [-EMG.pdf(x,-1) for x in xs])
		
		plt.show()		
	return LL
	pass
示例#8
0
def mu_by_lambda(ax,X,res):
	A 		= np.zeros((res, res))
	for i,mu in enumerate(np.linspace(-4, 4,res)):
		for j, l in enumerate(np.linspace(1,10,res)):
			l 	= 1.0 / l
			rv 	= model.component_bidir(mu,7.3, l, 1.0, 0.5, None, foot_print=4)
			ll 		= abs(log_likelihood(X, rv))
			A[i,j] 	= math.log(ll,10)
	A 		= A[:,::-1]
	heatmap = ax.imshow(A, cmap=plt.cm.jet_r,vmin=A.min(), vmax=A.max(),aspect=0.85 )
	ax.set_xticklabels([str(x)[:4] for x in  np.linspace(1,10,len(ax.get_xticklabels())) ], rotation=45)
	ax.set_yticklabels([str(x)[:4] for x in  np.linspace(-4, 4,len(ax.get_xticklabels())) ], rotation=45)
	ax.set_xlabel(r'$1/\lambda$',fontsize=20)
	ax.set_ylabel(r'$\mu$',fontsize=20)
示例#9
0
def compute_ll(X, i,j, mu,si, l, pi, SHOW=False, foot_print=0):
	EMG 	= component_bidir(mu, si, l, 1.0,pi , None,foot_print=foot_print)
	LL 		= sum([ LOG(EMG.pdf(X[k,0],1))*X[k,1]  for k in range(i,j) ])
	LL 		+=sum([ LOG(EMG.pdf(X[k,0],-1))*X[k,2]  for k in range(i,j) ])
	if SHOW:
		l 	= float(max(X[i:j,0]) - min(X[i:j,0])) 
		w 	= l / float(len(X[i:j,0]) )
		plt.bar(X[i:j,0], X[i:j,1]/np.sum(X[i:j,1]), width=w)
		plt.bar(X[i:j,0], -X[i:j,2]/np.sum(X[i:j,2]), width=w )
		xs 	= np.linspace(X[i,0],X[j,0],100)
		plt.plot(xs, [EMG.pdf(x,1) for x in xs], lw=2, color="blue")
		plt.plot(xs, [-EMG.pdf(x,-1) for x in xs], lw=2, color="blue")
		plt.plot(xs, [PI / l for x in xs], lw=2, color="red")
		plt.plot(xs, [-(1-PI) / l for x in xs],lw=2, color="red")
		


		plt.show()
		
	return LL
	pass
示例#10
0
文件: fit_many.py 项目: dmalmer/EMG
def draw_sim_one(ax, mu,si, l, w, pi):
	X 	= sim.runOne(mu=mu, s=2, l=1.0/l, lr=100, ll=-100, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, 
				pir=0.9, N=300, SHOW=False , bins=200, noise=False, foot_print = 1 )
	noise 	= np.random.uniform(-135,145, 50)
	noise2 	= np.random.uniform(-135,145, 50)
	
	counts,edges 	= np.histogram(noise, bins=200, normed=1)
	counts2,edges2 	= np.histogram(noise2, bins=200, normed=1)
	
	counts*=0.05
	counts2*=0.05
	
	rvs 			= [model.component_bidir( 0, 2, l, 0.5,0.5 , None,foot_print=1 )]
	rvs 			+=[model.component_elongation( 0, 100,  0.25,1.0 ,None , None, None, None)]
	rvs 			+=[model.component_elongation( -100, 0,  0.25,0.0 , None , None, None, None  )]


	X[:,1:]/=np.sum(X[:,1:])
	w 	= (X[-1,0] - X[0,0]) / X.shape[0]
	ax.bar(X[:,0], X[:,1], width=w, color="blue", edgecolor="blue",alpha=0.3)		
	ax.bar(X[:,0], -X[:,2], width=w, color="red", edgecolor="red",alpha=0.3)		
	ax.bar(edges[1:], counts,width=w, color="blue", edgecolor="blue",alpha=0.3)
	ax.bar(edges2[1:], -counts2, width=w, color="red", edgecolor="red",alpha=0.3)
	
	xs 	= np.linspace(X[0,0], X[-1,0], 1000)
	ysf = [sum([rv.pdf(x,1) for rv in rvs ]) for x in xs]
	ysr = [sum([-rv.pdf(x,-1) for rv in  rvs ]) for x in xs]
	ax.plot(xs,ysf,linewidth=2.5,linestyle="-", color="black")
	ax.plot(xs,ysr,linewidth=2.5,linestyle="-", color="black", label="Model")
	ax.set_yticks(np.linspace(min(ysr)-0.01, max(ysf)+0.01, 12))
	ax.set_yticklabels([""]+ [str(abs(x))[:5] for x in np.linspace(min(ysr)+0.01, max(ysf)-0.01, 10)] + [""] )
	ax.set_ylim(min(ysr)-0.01, max(ysf)+0.01)
	ax.grid()
	ax.set_ylabel("Density")
	ax.set_xticklabels([str(int(10*x)) for x in ax.get_xticks() ])
	ax.set_xlabel("Relative Genomic Coordinate")
	ax.legend()
	pass
示例#11
0
def bayes_factor(X, std=10, lam=0.1, step_size=1, norm_to_max=True):
    KS = list()
    i = 0
    win = (3 * std + 1.0 / lam)
    M1S = list()
    M2S = list()
    while i < X.shape[0]:
        j, k = i, i
        EMG = model.component_bidir(X[i, 0], std, lam, 1.0, 0.5, None)

        while j < X.shape[0] and (X[j, 0] - X[i, 0]) < win:
            j += 1
        while k >= 0 and (X[i, 0] - X[k, 0]) < win:
            k -= 1
        if j < X.shape[0] and k >= 0:
            M1 = sum([
                LOG(x) * y
                for x, y in zip(map(lambda x: EMG.pdf(x, 1), X[k:j, 0]), X[k:j,
                                                                           1])
            ])
            M1 += sum([
                LOG(x) * y
                for x, y in zip(map(lambda x: EMG.pdf(x, -1), X[k:j,
                                                                0]), X[k:j, 2])
            ])
            l = 1 / (X[-1, 0] - X[0, 0])
            M2 = sum([LOG(l) * y for y in X[k:j, 1]]) + sum(
                [LOG(l) * y for y in X[k:j, 2]])
            if M2 == 0:
                KS.append(1)
            else:
                KS.append(M2 / M1)
            M1S.append(M1 / np.sum(X[:, 1:]))
            M2S.append(M2 / np.sum(X[:, 1:]))
        i += 1
    if norm_to_max:
        KS = [k / max(KS) for k in KS]
    return KS
示例#12
0
def run(FILE):
	G 	= {}
	D 	= list()
	with open(FILE) as FH:
		for line in FH:
			if "#Data" in line:
				collect=True
			elif collect and "#" in line:
				collect 	= False
				t 			= int(line[1:].split(",")[0])
				G[t] 		= list()
			elif collect:
				D.append(([float(x) for x in line.strip("\n").split("\t") ]))
			elif not collect and "#" in line:
				t 			= int (line[1:].split(",")[0])
				G[t] 		= list()
			else:
				if "U" == line[0]:
					a,b,w,pi 	= line.strip("\n").split(": ")[1].split(",")
					G[t].append(component_elongation(float(a), float(b), float(w), float(pi) , None, "noise", None, None  )  )
				else:
					mu,si,l,w,pi 	= line.strip("\n").split(": ")[1].split(",")
					G[t].append(component_bidir(float(mu), float(si), float(l), float(w), float(pi), None))
	return G, np.array(D)
示例#13
0
def window(X, win=10):
	i,j,k 				= 0,0,0
	scores, forward,reverse = list(), list(),list()
	while i < 5000:
		while j < X.shape[0] and  (X[j,0]-X[i,0]) < -win:
			j+=1
		while k <X.shape[0] and (X[k,0]-X[i,0]) < win:
			k+=1
		if i < X.shape[0] and j < X.shape[0] and k < X.shape[0]:
			a,b 					= X[j,0],X[k,0]
			N_forward,N_reverse  	= np.sum(X[j:k,1]),np.sum(X[j:k,2])
			N 						= N_forward + N_reverse
			pi 						= (N_forward+1) / (N+2)

			null 					= math.log(pi / (b-a))*N_forward +  math.log((1-pi) / (b-a))*N_reverse
			center 					= X[i,0]
			rvs 					= [MODEL.component_bidir(center, 1.0, 0.5, 0.9,pi , None) , MODEL.component_elongation( a,b, 0.1, pi, None, None, None, None, )]
			model 					= sum([math.log(sum([rv.pdf(X[u,0],1) for rv in rvs] ))*X[u,1] for u in range(j,k)])
			model 					+=sum([math.log(sum([rv.pdf(X[u,0],-1) for rv in rvs] ))*X[u,2] for u in range(j,k)])
			scores.append(model/null)


		i+=1
	return scores