def gccontentforonechr(fastarep,chrsizepath,species,GCoutrep,resolution): resolution=int(resolution) sizedict=utils.loadchrsizedict(chrsizepath,resolution) matcolsize=sizedict["HicUsedTotalSize"] #HicUsedTotalSize HicChrBeginchr2 mat=np.zeros(matcolsize) k=0 for i in utils.dictchr[species]: print(i) o=open(fastarep+i+".fa","r") l=o.readline() s="" #init l=l.split(" ") l=o.readline() while l: l=l.replace("\n","") s+=l l=o.readline() o.close() i=0 j=resolution if resolution>len(s): mat[k]=gcpercent(s) k+=1 while j<len(s): mat[k]=gcpercent(s[i:j]) i+=resolution j+=resolution k+=1 toto={}#just to be un matlab savemat format toto['mat']=mat scpio.savemat(GCoutrep+str(resolution)+"pbGCvec.mat",toto) utils.savematrixasfilelist3(mat,GCoutrep+str(resolution)+"pbGCvec.csv")
def transformHiClistofpatterninDensity(setlist,TEtype,nameout): L=len(setlist) returnlist=[0]*L i=0 while i<L: returnlist[i]=setlist[i].count(TEtype) i+=1 utils.savematrixasfilelist3(returnlist,nameout)
basemat=convert.loadmatrixselected(matrixfilename,beginfend,endfend) #matrix filtering print("FILTERING") pos_out=HiCutils.get_outliers(basemat) basematfilter=basemat[np.ix_(~pos_out, ~pos_out)] basematfilter=np.copy(basematfilter) #basematfilter=basematfilter[0:1000,0:1000] print(len(basemat),len(basematfilter)) fh5 = h5py.File(repositoryout+"inputmat.hdf5", "w") fh5['data'] = basemat fh5.close() fh5 = h5py.File(repositoryout+"inputmat_filtered.hdf5", "w") fh5['data']=basematfilter fh5.close() utils.savematrixasfilelist3(pos_out,repositoryout+"filteredbin.txt") if Operation=="Boost": print("Boost Hic") boosted=BoostHiC(basematfilter) #save fh5 = h5py.File(repositoryout+"boostedmat.hdf5", "w") fh5['data']=boosted fh5.close() elif Operation=="Sample": print("SAMPLING") Sample(basematfilter,repositoryout)