예제 #1
0
def gccontentforonechr(fastarep,chrsizepath,species,GCoutrep,resolution):
	resolution=int(resolution)
	sizedict=utils.loadchrsizedict(chrsizepath,resolution)
	matcolsize=sizedict["HicUsedTotalSize"] #HicUsedTotalSize HicChrBeginchr2
	mat=np.zeros(matcolsize)
	k=0
	for i in utils.dictchr[species]:
		print(i)
		o=open(fastarep+i+".fa","r")
		l=o.readline()
		s=""
		#init
		l=l.split(" ")
		l=o.readline()
		while l:
			l=l.replace("\n","")
			s+=l
			l=o.readline()
		o.close()
		i=0
		j=resolution
		if resolution>len(s):
			mat[k]=gcpercent(s)
			k+=1
		while j<len(s):
			mat[k]=gcpercent(s[i:j])
			i+=resolution
			j+=resolution
			k+=1
	toto={}#just to be un matlab savemat format
	toto['mat']=mat
	scpio.savemat(GCoutrep+str(resolution)+"pbGCvec.mat",toto)
	utils.savematrixasfilelist3(mat,GCoutrep+str(resolution)+"pbGCvec.csv")
예제 #2
0
def transformHiClistofpatterninDensity(setlist,TEtype,nameout):
	L=len(setlist)
	returnlist=[0]*L
	i=0
	while i<L:
		returnlist[i]=setlist[i].count(TEtype)
		i+=1
	utils.savematrixasfilelist3(returnlist,nameout)
예제 #3
0
basemat=convert.loadmatrixselected(matrixfilename,beginfend,endfend)

#matrix filtering
print("FILTERING")
pos_out=HiCutils.get_outliers(basemat)
basematfilter=basemat[np.ix_(~pos_out, ~pos_out)]
basematfilter=np.copy(basematfilter)
#basematfilter=basematfilter[0:1000,0:1000]
print(len(basemat),len(basematfilter))
fh5 = h5py.File(repositoryout+"inputmat.hdf5", "w")
fh5['data'] = basemat
fh5.close()
fh5 = h5py.File(repositoryout+"inputmat_filtered.hdf5", "w")
fh5['data']=basematfilter
fh5.close()
utils.savematrixasfilelist3(pos_out,repositoryout+"filteredbin.txt")

if Operation=="Boost":
	print("Boost Hic")
	boosted=BoostHiC(basematfilter)
	#save
	fh5 = h5py.File(repositoryout+"boostedmat.hdf5", "w")
	fh5['data']=boosted
	fh5.close()
elif Operation=="Sample":
	print("SAMPLING")
	Sample(basematfilter,repositoryout)