def updateMap(path, db, motifChrom="chr17", window=100): """update mapability scores from bedGraph files""" mcollection = db["hg19"+motifChrom] for infile in glob.glob(os.path.join(path, "*"+motifChrom+".bedGraph")):##better in bedGraph (mappath,mapfilename) = os.path.split(infile) expName = mapfilename.split(motifChrom)[0] print 'updating', expName, motifChrom #print expName with open(infile,'rt') as bedGraphFile: #bedGraph = csv.reader(bedGraphFile, delimiter = '\t') bbFile = os.path.join(mappath,expName+motifChrom+'.bb') if not os.path.isfile(bbFile): countBed.compressBed4(bedGraphFile, expName, bbFile) coordDict, valuesDict = countBed.getBinBedCoord(bbFile, expName) arrayDict=defaultdict(list) cursor = mcollection.find()#{"tf_name":tfName}) for test in cursor: motifStart, motifEnd = test["genomic_region"]["start"], \ test["genomic_region"]["end"] if not motifChrom in arrayDict: arrayDict[motifChrom] = countBed.buildBedHist(motifChrom, coordDict, valuesDict, expName) xs, xvals, sums = arrayDict[motifChrom] avg = countBed.queryHist(xs, xvals, sums, motifStart-window, motifEnd+window)[0] #if avg != 0: #print motifChrom, motifStart, motifEnd #print avg mcollection.update({"_id":test["_id"]},{"$set":{"map."+expName:avg}}, upsert = True) return 0
def updateMap(path, tfName, window): """update mapability scores from bedGraph files""" for infile in glob.glob(os.path.join(path, "*.bedGraph")):##better in bedGraph (mappath,mapfilename) = os.path.split(infile) expName = mapfilename.split('.')[0] #print expName with open(infile,'rt') as bedGraphFile: bedGraph = csv.reader(bedGraphFile, delimiter = '\t') coordDict, valuesDict = countBed.getBed4Coord(bedGraph, expName) arrayDict=defaultdict(list) cursor = mcollection.find({"tf_name":tfName}) for test in cursor: motifChrom, motifStart, motifEnd = test["motif_genomic_regions_info"]["chr"], \ test["motif_genomic_regions_info"]["start"], \ test["motif_genomic_regions_info"]["end"] if not motifChrom in arrayDict: arrayDict[motifChrom] = countBed.buildBedHist(motifChrom, coordDict, valuesDict, expName) xs, xvals, sums = arrayDict[motifChrom] avg, size = countBed.queryHist(xs, xvals, sums, motifStart-window, motifEnd+window) if avg != 0: print motifChrom, motifStart, motifEnd print avg mcollection.update({"_id":test["_id"]},{"$set":{"motif_mapability_info":{"score":{expName:avg}}}}, upsert = True) return 0
def updateMap(inpath,infile,outpath,motifChrom,seqlen,window=100): """update mapability scores from bedGraph files""" motifdir = os.path.join(outpath,"bedMotifs") mapdir = os.path.join(outpath,"mapMotifs") if not os.path.isdir(motifdir): print "Error: path-to-motif-bed-files invalid, please specify a valid outpath to store all calculated scores." sys.exit() if not os.path.isdir(mapdir): os.mkdir(mapdir) mapinfile = os.path.join(inpath, infile)##better in per chromosome bedGraph files (mappath,mapfilename) = os.path.split(mapinfile) expName = mapfilename.split(motifChrom)[0] print 'updating', expName, motifChrom with bz2.BZ2File(mapinfile,'r') as bedGraphFile: bbFile = os.path.join(inpath,expName+motifChrom+'.bb') if not os.path.isfile(bbFile): countBed.compressBed4(bedGraphFile, expName, bbFile) coordDict, valuesDict = countBed.getBinBedCoord(bbFile, expName) arrayDict=defaultdict(list) for bedfile in glob.glob(os.path.join(motifdir,"*"+motifChrom+".bed.gz")): (filepath,filename) = os.path.split(bedfile) tfname = filename.split(motifChrom)[0] gcoordsfile = gzip.open(bedfile,'r') gcoords = csv.reader(gcoordsfile, delimiter='\t') mapfile = gzip.open(os.path.join(mapdir,tfname+motifChrom+'map'+str(seqlen)+'.txt.gz'),'w') writer = csv.writer(mapfile, delimiter='\t') for test in gcoords: motifStart, motifEnd = int(test[1]), int(test[2]) if not motifChrom in arrayDict: arrayDict[motifChrom] = countBed.buildBedHist(motifChrom, coordDict, valuesDict, expName) xs, xvals, sums = arrayDict[motifChrom] avg = countBed.queryHist(xs, xvals, sums, motifStart-window, motifEnd+window)[0] row = [avg] writer.writerows([row]) mapfile.close() return 0