Пример #1
0
def updateMap(path, db, motifChrom="chr17", window=100):
    """update mapability scores from bedGraph files"""
    mcollection = db["hg19"+motifChrom]
    for infile in glob.glob(os.path.join(path, "*"+motifChrom+".bedGraph")):##better in bedGraph
	(mappath,mapfilename) = os.path.split(infile)
	expName = mapfilename.split(motifChrom)[0]
	print 'updating', expName, motifChrom
	#print expName
	with open(infile,'rt') as bedGraphFile:
	    #bedGraph = csv.reader(bedGraphFile, delimiter = '\t')
	    bbFile = os.path.join(mappath,expName+motifChrom+'.bb')
	    if not os.path.isfile(bbFile):
		countBed.compressBed4(bedGraphFile, expName, bbFile)
	    coordDict, valuesDict = countBed.getBinBedCoord(bbFile, expName)
	    arrayDict=defaultdict(list)
	    cursor = mcollection.find()#{"tf_name":tfName})
	    for test in cursor:
		motifStart, motifEnd = test["genomic_region"]["start"], \
                                       test["genomic_region"]["end"]
		if not motifChrom in arrayDict:
		    arrayDict[motifChrom] = countBed.buildBedHist(motifChrom, coordDict, valuesDict, expName)
		xs, xvals, sums = arrayDict[motifChrom]
		avg = countBed.queryHist(xs, xvals, sums, motifStart-window, motifEnd+window)[0]
		
		#if avg != 0:
		    #print motifChrom, motifStart, motifEnd
		    #print avg
 		mcollection.update({"_id":test["_id"]},{"$set":{"map."+expName:avg}}, upsert = True)
    return 0
Пример #2
0
def updateMap(path, tfName, window):
    """update mapability scores from bedGraph files"""
    for infile in glob.glob(os.path.join(path, "*.bedGraph")):##better in bedGraph
	(mappath,mapfilename) = os.path.split(infile)
	expName = mapfilename.split('.')[0]
	#print expName
	with open(infile,'rt') as bedGraphFile:
	    bedGraph = csv.reader(bedGraphFile, delimiter = '\t')
	    coordDict, valuesDict = countBed.getBed4Coord(bedGraph, expName)
	    arrayDict=defaultdict(list)
	    cursor = mcollection.find({"tf_name":tfName})
	    for test in cursor:
		motifChrom, motifStart, motifEnd = test["motif_genomic_regions_info"]["chr"], \
                                                test["motif_genomic_regions_info"]["start"], \
                                                test["motif_genomic_regions_info"]["end"]
		if not motifChrom in arrayDict:
		    arrayDict[motifChrom] = countBed.buildBedHist(motifChrom, coordDict, valuesDict, expName)
		xs, xvals, sums = arrayDict[motifChrom]
		avg, size = countBed.queryHist(xs, xvals, sums, motifStart-window, motifEnd+window)
		
		if avg != 0:
		    print motifChrom, motifStart, motifEnd
		    print avg
 		mcollection.update({"_id":test["_id"]},{"$set":{"motif_mapability_info":{"score":{expName:avg}}}}, upsert = True)
    return 0
Пример #3
0
def updateMap(inpath,infile,outpath,motifChrom,seqlen,window=100):
    """update mapability scores from bedGraph files"""
    motifdir = os.path.join(outpath,"bedMotifs")
    mapdir = os.path.join(outpath,"mapMotifs")
    if not os.path.isdir(motifdir):
        print "Error: path-to-motif-bed-files invalid, please specify a valid outpath to store all calculated scores."
        sys.exit()
    if not os.path.isdir(mapdir):
        os.mkdir(mapdir) 
    mapinfile = os.path.join(inpath, infile)##better in per chromosome bedGraph files
    (mappath,mapfilename) = os.path.split(mapinfile)
    expName = mapfilename.split(motifChrom)[0]

    print 'updating', expName, motifChrom
    with bz2.BZ2File(mapinfile,'r') as bedGraphFile:
        bbFile = os.path.join(inpath,expName+motifChrom+'.bb')
        if not os.path.isfile(bbFile):
            countBed.compressBed4(bedGraphFile, expName, bbFile)
        coordDict, valuesDict = countBed.getBinBedCoord(bbFile, expName)
        arrayDict=defaultdict(list)
        for bedfile in glob.glob(os.path.join(motifdir,"*"+motifChrom+".bed.gz")):
            (filepath,filename) = os.path.split(bedfile)
            tfname = filename.split(motifChrom)[0]
            gcoordsfile = gzip.open(bedfile,'r')
            gcoords = csv.reader(gcoordsfile, delimiter='\t')
            mapfile = gzip.open(os.path.join(mapdir,tfname+motifChrom+'map'+str(seqlen)+'.txt.gz'),'w')
            writer = csv.writer(mapfile, delimiter='\t')
            for test in gcoords:
                motifStart, motifEnd = int(test[1]), int(test[2])
                if not motifChrom in arrayDict:
                    arrayDict[motifChrom] = countBed.buildBedHist(motifChrom, coordDict, valuesDict, expName)
                xs, xvals, sums = arrayDict[motifChrom]
                avg = countBed.queryHist(xs, xvals, sums, motifStart-window, motifEnd+window)[0]
                row = [avg]
                writer.writerows([row])
            mapfile.close()
    return 0