def updateCount(path, tfName): """update count data in discontinuous variableStep wiggle format""" for infile in glob.glob(os.path.join(path,"*.wig")): (wigpath,wigfilename) = os.path.split(infile) ##depends on the data type and source expName = "Dnase"##or add an expName parser line ctName = wigfilename.split('EncodeUwDnase')[-1].split('Aln')[0] wigFile = open(infile,'rt') wig = csv.reader(wigFile,delimiter='\t') coordDict, valuesDict = countWig.getCoord(wig,ctName) arrayDict = defaultdict(list) cursor = mcollection.find({"tf_name": tfName}) for test in cursor: motifChrom = test["motif_genomic_regions_info"]["chr"] motifStart = test["motif_genomic_regions_info"]["start"] motifEnd = test["motif_genomic_regions_info"]["end"] if not motifChrom in arrayDict: arrayDict[motifChrom] = countWig.buildHist(motifChrom,coordDict,valuesDict,ctName) xs, xvals, sums = arrayDict[motifChrom] count = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[0] #print count #mcollection.update({"_id":test["_id"]},{"$set":{expName: count}}, upsert = True) test["ct_info"]["accessibility_score"][expName] = count mcollection.save(test) return 0
def getCount(path, tfName): for infile in glob.glob(os.path.join(path,"*.wig")): (wigpath,wigfilename) = os.path.split(infile) ##depends on the data type and source methodName = "Dnase" ctName = wigfilename.split('EncodeUwDnase')[-1].split('Aln')[0] wigFile = open(infile,'rt') wig = csv.reader(wigFile,delimiter='\t') coordDict, valuesDict = countWig.getCoord(wig,ctName) arrayDict = defaultdict(list) cursor = mcollection.find({"tf_name": tfName}) for test in cursor: motifChrom, motifStart, motifEnd = test["motif_genomic_regions_info"]["chr"], test["motif_genomic_regions_info"]["start"], test["motif_genomic_regions_info"]["end"] if not motifChrom in arrayDict: arrayDict[motifChrom] = countWig.buildHist(motifChrom,coordDict,valuesDict,ctName) xs, xvals, sums = arrayDict[motifChrom] count = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[0] #print count test["ct_info"]["accessibility_score"][methodName] = count mcollection.save(test) return 0