Пример #1
0
def run(FILE):
	FHW 	= open(FILE, "w")
	X 		= load.grab_specific_region("chr1",6229860,6303055, SHOW=False, bins=1000 )
	print min(X[:,0]), max(X[:,0])
	FHW.write("#chr1,6229860,6303055\n")
	FHW.write("~forward\n")
	for i in range(X.shape[0]):
		FHW.write(str(X[i,0]) + "," + str(X[i,1]) + "\n")
	FHW.write("~reverse\n")
	for i in range(X.shape[0]):
		FHW.write(str(X[i,0]) + "," + str(X[i,2]) + "\n")
	X 	= simulate.runOne(mu=0, s=0.1, l=3, lr=100, ll=-50, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, 
		N=1000, SHOW=False, bins=1000, noise=True )
	X[:,0]+=6303055
	X[:,0]*=100.
	st, sp 	= 	X[0,0], X[-1,0]
	print st, sp
	FHW.write("#chrN,"+str(st) + "," + str(sp) + "\n")
	FHW.write("~forward\n")
	for i in range(X.shape[0]):
		FHW.write(str(X[i,0]) + "," + str(X[i,1]) + "\n")
	FHW.write("~reverse\n")
	for i in range(X.shape[0]):
		FHW.write(str(X[i,0]) + "," + str(X[i,2]) + "\n")
	FHW.close()
Пример #2
0
	coverage_scores 	= window(X,std=std,lam=lam,step_size=1)
	bayes_ks 			= bayes_factor(X,std=std,lam=lam,step_size=1)
	hybrid 				= center(coverage_scores, bayes_ks)
	starts 				= find_peaks([(x,y) for x,y in zip(np.linspace(X[0,0], X[-1,0], len(hybrid)), hybrid)])
	return coverage_scores, bayes_ks, hybrid, starts
def sample(X, k, std=1, lam=0.1):
	coverage_scores, bayes_ks, hybrid, starts 	= compute_possible_EM_starts(X,std=std, lam=lam)
	keeps 			= list()
	for i in range(k):
		j 			= np.random.geometric(0.8)-1
		keeps.append(starts[j][0])
		starts 		= starts[:j] + starts[j+1:]
	return keeps


	

if __name__=="__main__":
	X 	= load.grab_specific_region("chr1",6229860,6303055, SHOW=True, bins=300 )
	X[:,0]/=100.
	X[:,0]-=X[0,0]

	coverage_scores, bayes_ks, hybrid, starts 	= compute_possible_EM_starts(X,std=1,lam=0.1)
	draw(X, coverage_scores, bayes_ks, hybrid,starts)
	clf = model.EMGU(noise=True, K=3,noise_max=0.01,
		moveUniformSupport=5,
		max_it=50,seed=True)
	clf.fit(X)
	clf.draw(X)
	
	
Пример #3
0
	ax.set_xlabel("Relative Genomic Position")

	plt.savefig("/Users/joazofeifa/Lab/Article_drafts/EMG_paper/images/example_gene_fig.svg")
	plt.show()

def write_out(X, OUT=""):
	FHW 	= open(OUT, "w")
	for i in range(X.shape[0]):
		FHW.write(str(X[i,0])+","+str(X[i,1])+","+str(X[i,2])+"\n")
	FHW.close()
def load_IN(FILE):
	L 	= list()
	with open(FILE) as FH:
		for line in FH:
			x,y,r 	= line.strip("\n").split(",")
			L.append((float(x), float(y), float(r)))
	return np.array(L)

if __name__=="__main__":
	WRITE 	= False
	OUT 	= "/Users/joazofeifa/Lab/Article_drafts/EMG_paper/files/Example_Gene.csv"
	if WRITE:
		X 		=  load.grab_specific_region("chr1",8012007, 8033978, 
			pos_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.pos.BedGraph", 
			neg_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.neg.BedGraph",
			SHOW 	=False, bins=300)
		X[:,0]-=X[0,0]
		X[:,0]/=100.
		write_out(X, OUT=OUT)
	X 		= load_IN(OUT)
	draw(X)
Пример #4
0

if __name__ == "__main__":
	IN  = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/"
	#chr1:87,691,254-87,695,004
	#88,319,575-88320266
	#92,308,146-92,315,100
	#62,182,362-62,198,443
	#8,246,915-8,255,824
	#chr1:3,233,790-3,239,961
	#chr1:1,163,801-1,175,755
	#chr1:1,240,585-1,248,496
	#1,243,262-1,251,173
	#chr1:1,090,956-1,114,133
	#chr1:1,087,608-1,108,057
	X 	= load.grab_specific_region("chr1",1087608,1108057, SHOW=False, bins=100, 
		pos_file=IN+"DMSO2_3.pos.BedGraph", neg_file=IN+"DMSO2_3.neg.BedGraph" )
	X[:,0]-=min(X[:,0])
	scale = 100
	window = 1000
	X[:,0]/=scale
	run_MM(X, window=window, scale=scale )









Пример #5
0
	N 		= sum(Y)
	XS 		= sum([ X[i]*Y[i] for i in range(len(X))]) 
	mean 	= XS/N
	var 	= sum([pow(X[i] - mean,2)*Y[i] for i in range(len(X))]) / N
	X2 		= sum([pow(X[i],2)*Y[i] for i in range(len(X))])
	print var, (X2 - 2*mean*XS + pow(mean,2)*N) /N

	
if __name__ == "__main__":
	IN  = "/Users/joazofeifa/Lab/gro_seq_files/HCT116/bed_graph_files/"
	#chr1:87,691,254-87,695,004
	#88,319,575-88320266
	#92,308,146-92,315,100
	#62,182,362-62,198,443
	#8,246,915-8,255,824
	#chr1:3,233,790-3,239,961
	#chr1:1,013,872-1,017,272
	#934,235-937,997
	#1,206,352-1,213,240
	#836,632-843,542
	#1,091,333-1,096,157
	#chr1:162,105,107-162,113,041
	#25,681-33,615
	X 	= load.grab_specific_region("chr1",162105107,162113041, SHOW=False, bins=500, 
		pos_file=IN+"DMSO2_3.pos.BedGraph", neg_file=IN+"DMSO2_3.neg.BedGraph" )
	X[:,0]-=min(X[:,0])
	scale = 100
	window = 500
	X[:,0]/=scale
	run_MM(X, window=window, scale=scale )
	
Пример #6
0
if __name__ == "__main__":
	#==================================
	#testing MAP-EM procedure

	# X 	= simulate.runOne(mu=0, s=1, l=10, lr=100, ll=-50, we=0.5,wl=0.25, wr=0.25, pie=0.5, pil=0.1, pir=0.9, 
	# 	N=1000, SHOW=False, bins=300, noise=False, foot_print=10 )
	# chr1:20,984,647-20,991,448
	#chr1:836,835-843,549
	#chr1:539,399-542,484
	#chr3:15,684,556-15,692,636
	#chr2:10,420,826-10,462,048
	WRITE 	= False
	if WRITE:
	 	X 		=  load.grab_specific_region("chr2",10420826, 10433237, 
				pos_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.pos.BedGraph", 
				neg_file="/Users/joazofeifa//Lab/gro_seq_files/HCT116/bed_graph_files/DMSO2_3.neg.BedGraph",
				SHOW 	=False, bins=1000)
	 	X[:,0]-=X[0,0]
		X[:,0]/=100.
		FHW 	= open("/Users/joazofeifa/test.bed", "w")
		for i in range(X.shape[0]):
			FHW.write(str(X[i,0]) + "\t" + str(X[i,1]) + "\t" + str(X[i,2]) + "\n")
		FHW.close()
	X 	= list()

	with open("/Users/joazofeifa/test.bed") as FH:
		for line in FH:
			x,y,z 	= [float(x) for x in line.strip("\n").split("\t")]
			X.append([x,y,z])
	X 	= np.array(X)
Пример #7
0
def sample(X, k, std=1, lam=0.1):
    coverage_scores, bayes_ks, hybrid, starts = compute_possible_EM_starts(
        X, std=std, lam=lam)
    keeps = list()
    for i in range(k):
        j = np.random.geometric(0.8) - 1
        keeps.append(starts[j][0])
        starts = starts[:j] + starts[j + 1:]
    return keeps


if __name__ == "__main__":
    X = load.grab_specific_region("chr1",
                                  6229860,
                                  6303055,
                                  SHOW=True,
                                  bins=300)
    X[:, 0] /= 100.
    X[:, 0] -= X[0, 0]

    coverage_scores, bayes_ks, hybrid, starts = compute_possible_EM_starts(
        X, std=1, lam=0.1)
    draw(X, coverage_scores, bayes_ks, hybrid, starts)
    clf = model.EMGU(noise=True,
                     K=3,
                     noise_max=0.01,
                     moveUniformSupport=5,
                     max_it=50,
                     seed=True)
    clf.fit(X)
Пример #8
0
	#chr1:87,691,254-87,695,004
	#88,319,575-88320266
	#92,308,146-92,315,100
	#62,182,362-62,198,443
	#8,246,915-8,255,824
	#chr1:3,233,790-3,239,961
	#chr1:1,013,872-1,017,272
	#934,235-937,997
	#1,206,352-1,213,240
	#836,632-843,542
	#1,091,333-1,096,157
	#chr1:162,105,107-162,113,041
	#25,681-33,615
	#chr1:760,940-764,973
	#899,808-905,675
	#chr1:4,763,739-4,766,290
	#chr1:1,140,801-1,143,549
	#:1,200,396-1,202,629
	#:936,603-947,066
	#1,246,727-1,252,981

	#1,137,847-1,145,798
	print (1252981 - 1246727)/500.0
	X 	= load.grab_specific_region("chr1",1246727,1252981, SHOW=False, bins=500, 
		pos_file=IN+"DMSO2_3.pos.BedGraph", neg_file=IN+"DMSO2_3.neg.BedGraph" )
	X[:,0]-=min(X[:,0])
	scale = 100
	window = 500
	X[:,0]/=scale
	run_MM(X, window=window, scale=scale )