def _writeContent(self, fn, header, resDictKey, coreCls): keys = self._getKeys(resDictKey) core = coreCls() core.begin() core.bigHeader(header) core.header('Local result table for ' + resDictKey) if len(self._results.getAllRegionKeys()) > MAX_LOCAL_RESULTS_IN_TABLE: core.line('Local results were not printed because of the large number of bins: ' \ + str(numUserBins) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE)) else: core.tableHeader([ str(coreCls().textWithHelp(baseText, helpText)) for baseText, helpText in ( [('Region', '')] + [self._results.getLabelHelpPair(key) for key in keys]) ]) for regionKey in self._results.getAllRegionKeys(): if self._results[regionKey].get(resDictKey) is None: core.tableLine([str(regionKey)] + [None] * len(keys)) else: core.tableLine([str(regionKey)] +\ [ strWithStdFormatting( self._results[regionKey][resDictKey].get(key) ) for key in keys]) core.tableFooter() core.end() ensurePathExists(fn) open(fn, 'w').write(str(core))
def createMapType(mapId, genome, rowTrackName, colTrackName, col2GeneListFn, galaxyId, countType): googleMapsCommonDir = '/'.join([STATIC_PATH, 'maps', 'common']) googleMapsMapIdDir = '/'.join([googleMapsCommonDir, mapId]) ensurePathExists(googleMapsMapIdDir + '/test') makeLowercaseName2NameShelfFromTnSubTypes( genome, rowTrackName, '/'.join([googleMapsMapIdDir, 'rowLowerCaseName2Name.shelf'])) makeLowercaseName2NameShelfFromTnSubTypes( genome, colTrackName, '/'.join([googleMapsMapIdDir, 'colLowerCaseName2Name.shelf'])) rowBaseTrackNameFile = open( '/'.join([googleMapsMapIdDir, 'rowBaseTrackName.txt']), 'w') colBaseTrackNameFile = open( '/'.join([googleMapsMapIdDir, 'colBaseTrackName.txt']), 'w') rowBaseTrackNameFile.write(rowTrackName + '\n') colBaseTrackNameFile.write(colTrackName + '\n') rowBaseTrackNameFile.close() colBaseTrackNameFile.close() if col2GeneListFn != 'None': shutil.copy(col2GeneListFn, '/'.join([googleMapsMapIdDir, 'col2GeneList.shelf'])) createShelvesBehindRankedGeneLists(galaxyId, mapId, countType)
def execute(cls, choices, galaxyFn=None, username=''): basisTrackNameAsList = choices.basisTrack.split(':') extractionOptions = dict( GalaxyInterface.getTrackExtractionOptions(choices.genome, basisTrackNameAsList)) extractionFormat = extractionOptions[ choices.extFormatLbl] if extractionOptions else None gsuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() for gsTrack in gsuite.allTracks(): # outputTrackFn = cls.extraGalaxyFn[gsTrack.title] # print '<br>\n<br>\n output track filename: ', outputTrackFn # print 'path: ', gsTrack.path # print 'parsed uri: ', gsTrack._parsedUri newTrackFileName = gsTrack.title + '.' + extractionFormat outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn( galaxyFn, newTrackFileName) ensurePathExists(outGalaxyFn) uri = GalaxyGSuiteTrack.generateURI(galaxyFn, extraFileName=newTrackFileName) GalaxyInterface.parseExtFormatAndExtractTrackManyBins( choices.genome, basisTrackNameAsList, gsTrack.suffix, gsTrack.path, True, choices.extFormatLbl, outGalaxyFn) outGSuite.addTrack( GSuiteTrack(uri, title=gsTrack.title, fileFormat=gsTrack.fileFormat, trackType=gsTrack.trackType, genome=choices.genome, attributes=gsTrack.attributes)) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): start = time.time() genome = choices[0] trackName = choices[1].split(':') outFn = galaxyFn if choices[5] == 'Write to Standardised file': outFn = createOrigPath(genome, choices[-1].split(':'), 'collapsed_result.bedgraph') ensurePathExists(outFn[:outFn.rfind('/') + 1]) combineMethod = choices[2] category = choices[3] if choices[3] else '' numSamples = choices[4] if choices[4] else '1' analysisDef = 'dummy [combineMethod=%s] %s [numSamples=%s] -> ConvertToNonOverlappingCategorySegmentsPythonStat' % \ (combineMethod, '[category=%s]' % category if category != '' else '', numSamples) #'Python' for regSpec in GenomeInfo.getChrList(genome): res = GalaxyInterface.runManual([trackName], analysisDef, regSpec, '*', genome, username=username, \ printResults=False, printHtmlWarningMsgs=False) from gold.origdata.TrackGenomeElementSource import TrackViewGenomeElementSource from gold.origdata.BedComposer import CategoryBedComposer for resDict in res.values(): trackView = resDict['Result'] tvGeSource = TrackViewGenomeElementSource( genome, trackView, trackName) CategoryBedComposer(tvGeSource).composeToFile(outFn)
def _getUriForDownloadedAndUncompressedTrackPossiblyCached( self, gSuiteTrack, galaxyFn, uncomprSuffix, extraFileName=None): from gold.gsuite.GSuiteTrackCache import GSUITE_TRACK_CACHE cache = GSUITE_TRACK_CACHE if cache.isCached(gSuiteTrack): cachedUri = cache.getCachedGalaxyUri(gSuiteTrack) if os.path.exists(GSuiteTrack(cachedUri).path): return cache.getCachedGalaxyUri(gSuiteTrack) if extraFileName: from quick.application.ExternalTrackManager import ExternalTrackManager outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn( galaxyFn, extraFileName) ensurePathExists(outGalaxyFn) if extraFileName.endswith('.' + uncomprSuffix): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=extraFileName) else: uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=extraFileName, suffix=uncomprSuffix) else: outGalaxyFn = galaxyFn uri = GalaxyGSuiteTrack.generateURI(galaxyFn=outGalaxyFn, suffix=uncomprSuffix) uncompressorAndDownloader = GSuiteTrackUncompressorAndDownloader() uncompressorAndDownloader.visit(gSuiteTrack, outGalaxyFn) if cache.shouldBeCached(gSuiteTrack): cache.cache(gSuiteTrack, uri) return uri
def composeToFile(gSuite, outFileName): print("Outfilename", outFileName) ensurePathExists(outFileName) with open(outFileName, 'w') as out: ok = _composeCommon(gSuite, out) return ok
def execute(cls, choices, galaxyFn=None, username=''): from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource from gold.origdata.GtrackComposer import StdGtrackComposer genome = choices[0] if choices[1] == 'track': trackName = choices[2].split(':') else: trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN( genome, choices[2].split(':')) outFn = galaxyFn if choices[4] == 'Write to Standardised file': outFn = createOrigPath(genome, choices[-1].split(':'), 'collapsed_result.bedgraph') ensurePathExists(outFn[:outFn.rfind('/') + 1]) threshold = choices[3] analysisDef = 'dummy [threshold=%s] -> ForEachSegmentDistToNearestInSameTrackStat' % threshold #'Python' res = GalaxyInterface.runManual([trackName], analysisDef, '*', '*', genome, username=username, \ printResults=False, printHtmlWarningMsgs=False) tvGeSource = TrackViewListGenomeElementSource( genome, [x['Result'] for x in res.values()], trackName) StdGtrackComposer(tvGeSource).composeToFile(outFn)
def writeRegionListToBedFile(regList, fn): from quick.util.CommonFunctions import ensurePathExists ensurePathExists(fn) f = open(fn, 'w') for reg in regList: f.write( '\t'.join([reg.chr, str(reg.start), str(reg.end)]) + os.linesep ) f.close()
def generateSynGSuite(cls, dataOut, galaxyFn, genome): outGSuite = GSuite() g = SimulationPointIter() newData = '' chrNum = 0 for chr in dataOut: # fileName = 'syn-chr' + 'iInterR-' + str(chr[0]) + 'st-' + str(chr[1]) + 'end-' + str( # chr[2]) + 'iInterR-' + str(chr[3]) + 'iIntraR-' + str(chr[4]) + 'prob-' + str(chr[5]) + '--' + str( # chrNum) fileName = 'syn-' + str(chr[0]) + ',' + str(chr[1]) + ',' + str(chr[2]) + ',' + str(chr[3]) +',' + str(chr[4]) +',' + str(chr[5]) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=fileName, suffix='bed') gSuiteTrack = GSuiteTrack(uri) outFn = gSuiteTrack.path ensurePathExists(outFn) g.createChrTrack(genome, chr[0], PointIter, outFn, chr[3], chr[4], chr[5], chr[1], chr[2]) with open(outFn, 'r') as outputFile: newData += ''.join(outputFile.readlines()) chrNum += 1 if chrNum == len(dataOut): with open(outFn, 'w') as outputFile: outputFile.write(newData) outGSuite.addTrack(GSuiteTrack(uri, title=''.join(fileName), genome=genome)) GSuiteComposer.composeToFile(outGSuite, cls.extraGalaxyFn['synthetic GSuite'])
def _commonWriteContent(self, fn, header, coreCls, headerPrefix, numElements): core = coreCls() core.begin() core.styleInfoBegin(styleClass="infomessagesmall", style='padding: 5px; margin-bottom: 10px; ' +\ 'background-image: none; background-color: #FFFC8C; ') core.header(headerPrefix) core.smallHeader(header) core.styleInfoEnd() #core.bigHeader(header) #core.header('Local result table') if isinstance(core, HtmlCore) and numElements > MAX_LOCAL_RESULTS_IN_TABLE: core.line('Local results were not printed because of the large number of bins: ' \ + str(numElements) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE)) else: self._writeTable(core, coreCls) core.end() ensurePathExists(fn) open(fn, 'w').write(str(core))
def _writeContent(self, resDictKey, fn): ensurePathExists(fn) arbitraryTV = self._results.getArbitraryLocalResult()['Result'] assert isinstance(arbitraryTV, TrackView) #assert arbitraryTV.trackFormat.getFormatName() in [ 'Valued segments', 'Segments'], arbitraryTV.trackFormat.getFormatName() genome = arbitraryTV.genomeAnchor.genome #print 'GENOME: ',genome from gold.util.CommonFunctions import getClassName print type([ self._results[key]['Result'] for key in sorted(self._results.keys()) ][0]), getClassName([ self._results[key]['Result'] for key in sorted(self._results.keys()) ][0]) tvGeSource = TrackViewListGenomeElementSource( genome, [ self._results[key]['Result'] for key in sorted(self._results.keys()) ], 'Private:GK:test1:wgEncodeUchicagoTfbsK562EfosControlPk'.split( ':')) if arbitraryTV.trackFormat.getFormatName() in ['Segments']: BedComposer(tvGeSource).composeToFile(fn) else: StdGtrackComposer(tvGeSource).composeToFile(fn)
def execute(cls, choices, galaxyFn=None, username=''): start = time.time() genome = choices[0] trackName = choices[1].split(':') outFn = galaxyFn if choices[5] == 'Write to Standardised file': outFn = createOrigPath(genome, choices[-1].split(':'), 'collapsed_result.bedgraph') ensurePathExists(outFn[:outFn.rfind('/')+1]) combineMethod = choices[2] category = choices[3] if choices[3] else '' numSamples = choices[4] if choices[4] else '1' analysisDef = 'dummy [combineMethod=%s] %s [numSamples=%s] -> ConvertToNonOverlappingCategorySegmentsPythonStat' % \ (combineMethod, '[category=%s]' % category if category != '' else '', numSamples) #'Python' for regSpec in GenomeInfo.getChrList(genome): res = GalaxyInterface.runManual([trackName], analysisDef, regSpec, '*', genome, username=username, \ printResults=False, printHtmlWarningMsgs=False) from gold.origdata.TrackGenomeElementSource import TrackViewGenomeElementSource from gold.origdata.BedComposer import CategoryBedComposer for resDict in res.values(): tvGeSource = TrackViewGenomeElementSource(genome, resDict['Result'], trackName) CategoryBedComposer(tvGeSource).composeToFile(outFn)
def generate(cls, valIter, valIterLen, maxValue, path, fnPrefix): "Assumes valIter gives values between 0 and maxValue" assert valIterLen > 0 chainsFn, startsFn = cls._getFileNames(path, fnPrefix) ensurePathExists(chainsFn) chains = memmap(chainsFn, 'int32', 'w+', shape=valIterLen) starts = memmap(startsFn, 'int32', 'w+', shape=maxValue) curPositions = zeros(maxValue, 'int32') - 1 starts[:] = curPositions #os.chmod(chainsFn, S_IRWXU|S_IRWXG|S_IRXO) #os.chmod(startsFn, S_IRWXU|S_IRWXG|S_IRXO) valIterIndex = 0 for val in valIter: if val == None: pass elif curPositions[val] < 0: starts[val] = curPositions[val] = valIterIndex else: chains[curPositions[val]] = valIterIndex curPositions[val] = valIterIndex valIterIndex += 1 if valIterIndex % 10e6 == 0: print '.', #for index in curPositions: # if index >= 0: # chains[ index ] = -1 chains[curPositions[curPositions >= 0]] = -1 chains.flush() starts.flush() return SameValueIndexChains(chains, starts)
def _writeContent(self, fn, header, resDictKey, coreCls): keys = self._getKeys(resDictKey) core = coreCls() core.begin() core.bigHeader(header) core.header('Local result table for ' + resDictKey) if len( self._results.getAllRegionKeys() ) > MAX_LOCAL_RESULTS_IN_TABLE: core.line('Local results were not printed because of the large number of bins: ' \ + str(numUserBins) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE)) else: core.tableHeader([ str( coreCls().textWithHelp(baseText, helpText) ) for baseText, helpText in ([('Region','')] + [self._results.getLabelHelpPair(key) for key in keys]) ]) for regionKey in self._results.getAllRegionKeys(): if self._results[regionKey].get(resDictKey) is None: core.tableLine([str(regionKey)] + [None]*len(keys)) else: core.tableLine([str(regionKey)] +\ [ strWithStdFormatting( self._results[regionKey][resDictKey].get(key) ) for key in keys]) core.tableFooter() core.end() ensurePathExists(fn) open(fn,'w').write( str(core) )
def _uncompressTemporaryFile(self, gSuiteTrack, tmpFileName, outFileName): import subprocess import os, sys, shutil for compSuffix in COMPRESSION_SUFFIXES: reduceLen = len(compSuffix) + 1 suffix = gSuiteTrack.suffix if suffix and (suffix == compSuffix or suffix.lower().endswith('.' + compSuffix)): if compSuffix == 'gz': subprocess.check_call(['gunzip', tmpFileName], stderr=sys.stdout) unzippedFileName = tmpFileName[:-reduceLen] ensurePathExists(outFileName) #os.rename(unzippedFileName, outFileName) shutil.move(unzippedFileName, outFileName) else: raise ShouldNotOccurError break else: #os.rename(tmpFileName, outFileName) shutil.move(tmpFileName, outFileName) currentUmask = os.umask(0) os.umask(currentUmask) os.chmod(outFileName, 0o666 - currentUmask)
def _writeContent(self, resDictKey, fn): from gold.application.RSetup import r ensurePathExists(fn) silenceRWarnings() bmpFn = fn #+ '.png' # r.png(filename=bmpFn, units='px', pointsize=self.POINT_SIZE, res=72) width, height = self.getPlotDimensions(resDictKey) # pdf test: # self.LINE_HEIGHT = self.POINT_SIZE # width, height = self.getPlotDimensions(resDictKey) # r.pdf(bmpFn, height=height*1.0/72, width=width*1.0/72, pointsize=self.POINT_SIZE) if any(x > 800 for x in [width, height]): self.LINE_HEIGHT = self.POINT_SIZE width, height = self.getPlotDimensions(resDictKey) if self.HIGH_DEF_COLORS: picType = 'png16m' else: picType = 'png256' r.bitmap(bmpFn, height=height, width=width, units='px', type=picType, pointsize=self.POINT_SIZE) else: r.png(filename=bmpFn, height=height, width=width, units='px', pointsize=self.POINT_SIZE, res=72) if resDictKey is not None: xlab = self._results.getLabelHelpPair(resDictKey)[0] else: xlab = None main = self._header self._customRExecution(resDictKey, xlab, main) #r.hist( , ) from gold.application.RSetup import r r('dev.off()')
def __iter__(self): for archivedFileInfo in self._archive: # galaxyFn = self._titleToGalaxyFnDict.get(archivedFileInfo.title) # if not galaxyFn: # raise ShouldNotOccurError('Galaxy filename not found for file with title: ' + archivedFile.title) extraFileName = os.sep.join((archivedFileInfo.directories if self._storeHierarchy else []) +\ [archivedFileInfo.baseFileName]) if self._storeHierarchy: attributeList = OrderedDict([('dir_level_%s' % (i+1), directory) \ for i,directory in enumerate(archivedFileInfo.directories)]) else: attributeList = OrderedDict() uri = GalaxyGSuiteTrack.generateURI(self._galaxyFn, extraFileName=extraFileName) gSuiteTrack = GSuiteTrack(uri, title=archivedFileInfo.baseFileName, attributes=attributeList) outFn = gSuiteTrack.path ensurePathExists(outFn) with open(outFn, 'w') as outFile: inFile = self._archive.openFile(archivedFileInfo.path) outFile.write(inFile.read()) inFile.close() yield gSuiteTrack
def _writeContent(self, resDictKey, fn): ensurePathExists(fn) outF = open( fn ,'w') outF.write('track type=wiggle_0 name=' + self._results.getStatClassName() + '_' + resDictKey + os.linesep) for bin in self._results.getAllRegionKeys(): outF.write( '\t'.join([str(x) for x in \ [bin.chr, bin.start, bin.end, str(self._results[bin].get(resDictKey)).replace('None', 'nan')] ]) + os.linesep) outF.close()
def _writeRawData(self, resDictKey, fn): ensurePathExists(fn) rawData = self._getRawData(resDictKey, False) outF = open(fn, 'w') outF.write('Xs: ' + ','.join([str(x) for x in rawData[0]]) + os.linesep) outF.write('Ys: ' + ','.join([str(x) for x in rawData[1]]) + os.linesep)
def __getitem__(self, memoPath): if not memoPath in self._memoDicts: memoFn = self._createPickleFn(memoPath) ensurePathExists( memoFn ) try: self._memoDicts[memoPath] = SafeDiskMirroredDict(memoFn) except Exception, e: logMessageOnce("Exception when accessing memo file '%s': %s" % (memoFn, str(e))) raise
def writeChr(cls, genome, chr, trackName, elTupleIter): outFn = createOrigPath(genome, trackName, chr + cls._getEnding()) ensurePathExists(outFn) outF = open(outFn, 'w') cls._writeHeader(outF, chr) numPrintedDots = 0 for el in elTupleIter: cls._writeEl(outF, el, chr) outF.close()
def writeTrackData(choices, genome, tn): from gold.util.RandomUtil import random from gold.util.CommonFunctions import createCollectedPath from quick.util.CommonFunctions import ensurePathExists trackFn = createCollectedPath(genome, tn, 'simulatedTracks.category.bed') ensurePathExists(trackFn) trackFile = open(trackFn, 'w') #determinePossibilities numPossiblePositions = int(choices[2]) spacingBetweenPositions = 1e3 possiblePositions = [ i * spacingBetweenPositions for i in range(1, int(numPossiblePositions)) ] numHighProbPositions = int(choices[3]) highProbPossiblePositions = possiblePositions[0:numHighProbPositions] lowProbPossiblePositions = possiblePositions[numHighProbPositions:] largestPossiblePosition = possiblePositions[-1] print 'largestPossiblePosition: ', largestPossiblePosition / 1e6, 'M' assert largestPossiblePosition < 1.5e8 #just due to hardcoded analysis region below.. sizePerPosition = 591 #empirical across all VDR binding sites.. print 'Total BpCoverage: ', len(possiblePositions) * sizePerPosition #make samples numExperiments = int(choices[0]) proportionFromHighProbPositions = float(choices[4]) fixedNumFromHighProbPositions = int(choices[5]) #numPositionsPerExperiment = [3000]*9 numPositionsPerExperiment = [ int(x) for x in choices[1].split(',') ] #[3073, 7118, 5290, 3059, 4051, 1021, 200, 610, 573] for experimentIndex in range(numExperiments): #sampledPositions = random.sample(possiblePositions, numPositionsPerExperiment[experimentIndex]) numHighProbSamples = int(numPositionsPerExperiment[experimentIndex] * proportionFromHighProbPositions ) + fixedNumFromHighProbPositions numLowProbSamples = numPositionsPerExperiment[ experimentIndex] - numHighProbSamples print 'numHighProbSamples: %i, out of numHighProbPossiblePositions: %i' % ( numHighProbSamples, len(highProbPossiblePositions)) sampledPositions = random.sample(highProbPossiblePositions, numHighProbSamples ) \ + random.sample(lowProbPossiblePositions, numLowProbSamples ) sampledSegments = [(position, position + sizePerPosition) for position in sampledPositions] for seg in sampledSegments: trackFile.write('\t'.join([ 'chr1', '%i' % seg[0], '%i' % seg[1], 'T%i' % experimentIndex ]) + '\n') trackFile.close()
def _writeContent(self, resDictKey, fn): ensurePathExists(fn) outF = open( fn ,'w') outF.write('track type=bedGraph name=' + self._results.getStatClassName() + '_' + resDictKey + \ (' viewLimits=0:1 autoScale=off' if resDictKey.lower() in ['pval','p-value'] else '') + os.linesep) for bin in self._results.getAllRegionKeys(): outF.write( '\t'.join([str(x) for x in \ [bin.chr, bin.start, bin.end, str(self._results[bin].get(resDictKey)).replace('None', 'nan')] ]) + os.linesep) outF.close()
def writeChr(cls, genome, chr, trackName, elTupleIter): outFn = createOrigPath(genome, trackName, chr+cls._getEnding() ) ensurePathExists(outFn) outF = open(outFn,'w') cls._writeHeader(outF, chr) numPrintedDots = 0 for el in elTupleIter: cls._writeEl(outF, el, chr) outF.close()
def _writeTestFile(self, case): fn = createOrigPath(self.GENOME, self.TRACK_NAME_PREFIX + case.trackName, 'testfile' + case.suffix) ensurePathExists(fn) testfile = open(fn, 'w') testfile.write('\n'.join(case.headerLines + case.lines)) testfile.close() return fn
def generateGenomeAnnotations(cls, abbrv): fnSource = cls.getCollectedPathGFF(abbrv) if os.path.exists(fnSource): from quick.extra.StandardizeTrackFiles import SplitFileToSubDirs SplitFileToSubDirs.parseFiles(abbrv, GenomeInfo.getGenomeAnnotationsTrackName(abbrv), direction='coll_to_std', \ suffix='.gff', catSuffix='.category.gff', subTypeCol='2', depth='1', numHeaderLines='0') fnDest = cls.getStandardizedPathGFF(abbrv) ensurePathExists(fnDest) shutil.copyfile(fnSource, fnDest)
def __getitem__(self, memoPath): if not memoPath in self._memoDicts: memoFn = self._createPickleFn(memoPath) ensurePathExists(memoFn) try: self._memoDicts[memoPath] = SafeDiskMirroredDict(memoFn) except Exception, e: logMessageOnce("Exception when accessing memo file '%s': %s" % (memoFn, str(e))) raise
def execute(cls, choices, galaxyFn=None , username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' #print 'Executing... with choices %s'%str(choices) abbrv = choices[0] name = choices[1] #Should test that the genome is not in hyperbrowser. gi = GenomeInfo(abbrv) if gi.hasOrigFiles(): sys.stderr.write( "Genome "+abbrv+ " is already in the Genomic HyperBrowser. Remove the old first.") else: gi.fullName = name if choices[2] == 'URL': urls = choices[3].split() gi.sourceUrls = urls for url in urls: try: GenomeImporter.downloadGenomeSequence(abbrv, url) except InvalidFormatError: return else: basePath = os.sep.join([NONSTANDARD_DATA_PATH, abbrv] + GenomeInfo.getSequenceTrackName(abbrv)) fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[4].split(':')) fnDest = basePath+'/'+abbrv+'Sequence.fasta' ensurePathExists(fnDest) copyfile(fnSource, fnDest) chrs=GenomeImporter.extractChromosomesFromGenome(abbrv) gi.sourceChrNames = chrs gi.installedBy = username gi.genomeBuildSource = choices[5] gi.genomeBuildName = choices[6] gi.species = choices[7] gi.speciesTaxonomyUrl = choices[8] gi.assemblyDetails = choices[9] gi.privateAccessList = [v.strip() for v in choices[10].replace(os.linesep, ' ').replace(',', ' ').split(' ') if v.find('@')>0] gi.isPrivate = (choices[11] != 'All') gi.isExperimental = (choices[12] != 'All') gi.ucscClade = choices[13] gi.ucscGenome = choices[14] gi.ucscAssembly = choices[15] galaxyFile=open(galaxyFn, "w") galaxyFile.write( 'Genome abbreviation: ' + abbrv + os.linesep) galaxyFile.write( 'Genome full name: ' + name + os.linesep) galaxyFile.write( 'Track name: ' + ':'.join(GenomeInfo.getSequenceTrackName(abbrv)) + os.linesep) galaxyFile.write( 'Temp chromosome names: ' + ' || '.join(chrs) + os.linesep) #GenomeImporter.saveTempInfo(abbrv, name, chrs) #print 'Chromosomes: '+chrs gi.store()
def writeRegionListToBedFile(regList, fn): from quick.util.CommonFunctions import ensurePathExists ensurePathExists(fn) f = open(fn, 'w') if regList != None: for reg in regList: f.write( '\t'.join([reg.chr, str(reg.start), str(reg.end)]) + os.linesep) f.close()
def downloadTrack(self, genome, trackName): if trackName[-1]=='primaryTable': self.getTableData(genome, trackName) else: bedString, metaData = self.getBedData(genome, trackName) #fileName = 'fromUcsc.bed' if trackName[-1][-3:] == 'bed' else 'fromUcsc.'+trackName[-1] fileName = 'fromUcsc.'+trackName[-1] fn = createOrigPath(genome, trackName, fileName) ensurePathExists(fn) open(fn,'w').write(bedString)
def __init__(self): if not os.path.exists(self.URI_PREFIXES_FN): ensurePathExists(self.URI_PREFIXES_FN) open(self.URI_PREFIXES_FN, 'w') if not os.path.exists(self.CACHE_SHELVE_FN): ensurePathExists(self.CACHE_SHELVE_FN) cache = self._openShelve('c') cache.close() prefixList = [line.strip() for line in open(self.URI_PREFIXES_FN, 'r')] self._uriPrefixes = Trie(prefixList)
def renameCollTrack(genome, oldTn, newTn): oldPath = createCollectedPath(genome, oldTn) if not os.path.exists(oldPath): print '(TN did not exist in collTracks..)' else: print '(renaming track in collTracks..)' newPath = createCollectedPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def renameStdTrack(genome, oldTn, newTn): oldPath = createOrigPath(genome, oldTn) assert os.path.exists(oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath print '(renaming track in stdTracks..)' newPath = createOrigPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) print 'Moving from %s to %s' % (oldPath, newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def renameCollTrack(genome, oldTn, newTn): oldPath = createCollectedPath(genome, oldTn) if not os.path.exists(oldPath): print '(TN did not exist in collTracks..)' else: print '(renaming track in collTracks..)' newPath = createCollectedPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists( newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False, \ ignoreEmpty=True): ensurePathExists(zipFn) zipFile = ZipFile(zipFn, 'w') for region in regionList: fn = os.path.dirname(zipFn) + os.sep + str(region).replace(':','_') okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \ globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \ allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty) if okFn: zipFile.write(okFn, os.path.basename(okFn)) os.remove(okFn) zipFile.close()
def renameProcTrack(genome, oldTn, newTn): for allowOverlaps in [False, True]: oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps) if not os.path.exists(oldPath): print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') else: print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')' newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def convertSumstat(cls, inFn, outFn, rsidDict, shouldLogTransform, valueFilter=None): """ Converts a sumstat primary track to a valued point track. The sumstat track must be formatted as defined in the summary statistic file definition of Bulik-Sullivan et al., (2015). See specification at https://github.com/bulik/ldsc/wiki/Summary-Statistics-File-Format :param inFn: path to original track :param outFn: path to new track :param rsidDict: dictionary for mapping of rsids to reference genome :param valueFilter: upper threshold for p/z values in original track """ from math import log ensurePathExists(outFn) inFile = open(inFn, 'r') outFile = open(outFn, 'w') # Find columns inFileLines = [x.strip().split('\t') for x in inFile.readlines()] colNames = [col.upper() for col in inFileLines[0]] valueColNum = colNames.index(cls.P) if cls.P in colNames else colNames.index(cls.Z) idColNum = colNames.index(cls.SNP) # Track header information outFile.write('##track type: valued points\n') outFile.write("##1-indexed: False\n") outFile.write('###' + '\t'.join(cls.GTRACK_COLS) + '\n') # Convert each line for cols in inFileLines[1:]: rsid = cols[idColNum] value = cols[valueColNum] if not valueFilter or float(value) <= valueFilter: if shouldLogTransform: try: """Convert values to -log(pval), the values of GWAS Catalog SNPs""" value = str(-log(float(value))) except: """For SNPs with reported p-value of 0.000, assume high significance""" value = str(-log(0.0005)) seq, pos = RsidMapper.getPosition(rsid, rsidDict) if seq and pos: outFile.write('\t'.join([seq, pos, rsid, value]) + '\n') inFile.close() outFile.close()
def _downloadFiles(cls, url, basePath, fileType, allowedUnpackedSuffixes): #if os.path.exists(basePath): # sys.stderr.write("Genome sequence path already exists: %s. Exiting..." % basePath) # return fn = basePath +"/" if not url.split('.')[-1].lower() in allowedUnpackedSuffixes + ['tar','tgz','gz','zip']: urlinfo = str(urllib2.urlopen(url).info()) if urlinfo.find('filename=') >0: fn+= urlinfo.split('filename=')[-1].replace(';','\n').split('\n')[0].strip() else: sys.stderr.write("Not a supported file format. File must end with: %s tar tgz tar.gz zip gz") \ % ' '.join(allowedUnpackedSuffixes) raise InvalidFormatError else: fn+=url.split("/")[-1] ensurePathExists(fn) urllib.urlretrieve(url, fn) if any(url.lower().endswith(x) for x in allowedUnpackedSuffixes): print fileType + " file" elif any(url.lower().endswith(x) for x in [".tar",".tgz",".tar.gz"]): print "tar file" te=tarfile.open(fn) te.extractall(path=basePath) te.close() os.remove(fn) elif url.lower().endswith(".zip"): print "zip file" sourceZip = zipfile.ZipFile(fn, 'r') sourceZip.extractall(path=basePath) sourceZip.close() os.remove(fn) elif url.lower().endswith(".gz"): print "gz file" f = gzip.open(fn, 'rb') retfn=fn[0:fn.rfind(".")]#Renames file except last part, ".gz"? resfile=open(retfn, "wb") for i in f: resfile.write(i) resfile.close() os.remove(fn)
def _writeRawData(self, resDictKey, fn): ensurePathExists(fn) outF = open(fn,'w') rawData = self._getRawData(resDictKey, False) if self.maxRawDataPoints is None or len(rawData) <= self.maxRawDataPoints: if type(rawData) in [list, tuple, numpy.ndarray] and len(rawData)>0 and type(rawData[0]) in [int,float,numpy.int32,numpy.float,numpy.float32, numpy.float64, numpy.float128, numpy.ndarray]: if type(rawData) == tuple: for npArr in rawData: print>>outF, ','.join([str(x) for x in npArr]) else: outF.write( os.linesep.join([str(x) for x in rawData]) ) else: outF.write( str(rawData) ) outF.close()
def renameStdTrack(genome, oldTn, newTn): oldPath = createOrigPath(genome, oldTn) assert os.path.exists( oldPath), 'ERROR: TN did not exist in stdTracks: ' + oldPath print '(renaming track in stdTracks..)' newPath = createOrigPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists( newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) print 'Moving from %s to %s' % (oldPath, newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def extractOneTrackManyToRegionFilesInOneZipFile(cls, trackName, regionList, zipFn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, \ globalCoords=False, asOriginal=False, allowOverlaps=False, \ ignoreEmpty=True): ensurePathExists(zipFn) zipFile = ZipFile(zipFn, 'w') for region in regionList: fn = os.path.dirname(zipFn) + os.sep + str(region).replace( ':', '_') okFn = cls.extract(trackName, [region], fn, fileFormatName=fileFormatName, \ globalCoords=globalCoords, addSuffix=True, asOriginal=asOriginal, \ allowOverlaps=allowOverlaps, ignoreEmpty=ignoreEmpty) if okFn: zipFile.write(okFn, os.path.basename(okFn)) os.remove(okFn) zipFile.close()
def printLinkToCallGraph(self, id, galaxyFn, prune=True): statsFile = GalaxyRunSpecificFile(id + ['pstats.dump'], galaxyFn) dotFile = GalaxyRunSpecificFile(id + ['callGraph.dot'], galaxyFn) pngFile = GalaxyRunSpecificFile(id + ['callGraph.png'], galaxyFn) ensurePathExists(statsFile.getDiskPath()) self._stats.dump_stats(statsFile.getDiskPath()) stats = OverheadStats(statsFile.getDiskPath()) stats.writeDotGraph(dotFile.getDiskPath(), prune=prune) stats.renderGraph(dotFile.getDiskPath(), pngFile.getDiskPath()) print str(HtmlCore().link( 'Call graph based on profiling (id=%s)' % ':'.join(id), pngFile.getURL()))
def getHiCFileFromTargetBins(targetBins, galaxyFn): from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile staticFile = GalaxyRunSpecificFile(['PEI_regions.bed'], galaxyFn) fn = staticFile.getDiskPath() from quick.util.CommonFunctions import ensurePathExists ensurePathExists(fn) f = open(fn, 'w') import os for region in targetBins: chrom = region[0] start = region[1] end = region[2] f.write('\t'.join([chrom, str(start), str(end)]) + os.linesep) f.close() return staticFile
def liftOverGTrack(cls, inFn, outFn, rsidDict): """ Liftover for primary point tracks. The tracks must have a column 'id', with the rsid of the SNPs in each row. In addition, 'seqid' and 'start' is needed in the original tracks, as these columns will be the only ones modified for each track element. :param inFn: path to original track :param outFn: path to new track :param rsidDict: dictionary for mapping of rsids to reference genome """ ensurePathExists(outFn) inFile = open(inFn, 'r') outFile = open(outFn, 'w') rsidCol = 0 seqCol = 0 startCol = 0 # Lift over each line for line in inFile.readlines(): if line.startswith('###'): cols = line[3:].strip().split('\t') rsidCol = cols.index(cls.RSID) seqCol = cols.index(cls.SEQID) startCol = cols.index(cls.POS) if line.startswith("##1-indexed:"): """ The rsID-mapping is based on the dbSNP positions, which are 0-indexed. We need to make sure this attribute is correctly set in our tracks. """ outFile.write("##1-indexed: False\n") elif line.startswith('#'): outFile.write(line) else: cols = line.strip().split('\t') rsid = cols[rsidCol] seq, pos = RsidMapper.getPosition(rsid, rsidDict) if seq and pos: cols[seqCol] = str(seq) cols[startCol] = pos outFile.write('\t'.join(cols) + '\n') inFile.close() outFile.close()
def downloadGenomeSequence(cls, abbrv, url): basePath = cls.getBasePath(abbrv) if os.path.exists(basePath): sys.stderr.write("Genome sequence path already exists: %s. Exiting..." % basePath) return fn = basePath +"/" if not url.split('.')[-1].lower() in ['fa','fasta','tar','tgz','gz','zip']: urlinfo = str(urllib2.urlopen(url).info()) if urlinfo.find('filename=') >0: fn+= urlinfo.split('filename=')[-1].replace(';','\n').split('\n')[0].strip() else: sys.stderr.write("Not a supported file format. File must end with: fa fasta tar tgz tar.gz zip gz") raise InvalidFormatError else: fn+=url.split("/")[-1] ensurePathExists(fn) urllib.urlretrieve(url, fn) if url.lower().endswith(".fa") | url.lower().endswith(".fasta"): print "fasta file" elif url.lower().endswith(".tar") | url.lower().endswith(".tgz") | url.lower().endswith(".tar.gz"): print "tar file" te=tarfile.open(fn) te.extractall(path=basePath) te.close() os.remove(fn) elif url.lower().endswith(".zip"): print "zip file" sourceZip = zipfile.ZipFile(fn, 'r') sourceZip.extractall(path=basePath) sourceZip.close() os.remove(fn) elif url.lower().endswith(".gz"): print "gz file" f = gzip.open(fn, 'rb') retfn=fn[0:fn.rfind(".")]#Renames file except last part, ".gz"? resfile=open(retfn, "wb") for i in f: resfile.write(i) resfile.close() os.remove(fn)
def getFileFromTargetBins(targetBins, galaxyFn, subDirId=None): staticFile = GalaxyRunSpecificFile( ([subDirId] if subDirId else []) + ['intersected_regions.bed'], galaxyFn) fn = staticFile.getDiskPath() from quick.util.CommonFunctions import ensurePathExists ensurePathExists(fn) f = open(fn, 'w') for region in targetBins: tmp = region[0].split(':') chrom = tmp[0] tmp2 = tmp[1].split('-') start = tmp2[0] end = tmp2[1] tfs = region[1] f.write('\t'.join([chrom, str(start), str(end), tfs]) + os.linesep) f.close() return staticFile
def renameProcTrack(genome, oldTn, newTn): for allowOverlaps in [False, True]: oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps) if not os.path.exists(oldPath): print 'Warning: TN did not exist as preproc ' + ( 'with overlaps' if allowOverlaps else ' without overlaps') else: print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')' newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps) if not ONLY_SIMULATION: assert not os.path.exists( newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' print 'Executing... starting to remove ' + choices[0] + os.linesep paths = [NONSTANDARD_DATA_PATH, ORIG_DATA_PATH, PARSING_ERROR_DATA_PATH, NMER_CHAIN_DATA_PATH] +\ [createDirPath('', '', allowOverlaps=x) for x in [False, True]] for p in paths: genome = choices[0] origPath = os.sep.join([ p, genome ]) trashPath = os.sep.join([ p, ".trash", genome ]) if os.path.exists(origPath): print 'Moving ' + genome + ' to .trash in folder: ' + p + os.linesep ensurePathExists(trashPath) shutil.move(origPath, trashPath)
def _writeContent(self, fn, resDictKey, header, coreCls): ensurePathExists(fn) outFile = open(fn,'w') core = coreCls() core.begin() core.bigHeader(header) core.header(self.HEADER) matrixDict = self._getRawData(resDictKey) matrix, rownames, colnames, rowOrder, colOrder = [copy(matrixDict.get(x)) for x in \ [self.MATRIX_VALUE_KEY,'Rows','Cols', 'RowOrder', 'ColOrder']] # matrix, rownames, colnames, rowOrder, colOrder = [matrixDict.get(x) for x in \ # [self.MATRIX_VALUE_KEY,'Rows','Cols', 'RowOrder', 'ColOrder']] # print matrix, rownames, colnames, rowOrder, colOrder if matrix is None: raise SilentError if rowOrder is not None: rownames = rownames[rowOrder] matrix = matrix[rowOrder] if colOrder is not None: colnames = colnames[colOrder] matrix = matrix[:,colOrder] core.tableHeader([''] + [ str( coreCls().textWithHelp(baseText, helpText) ) for baseText, helpText in \ [self._results.getLabelHelpPair(col) for col in colnames] ], sortable=True) for i,row in enumerate(matrix): core.tableLine([ str( coreCls().textWithHelp(*self._results.getLabelHelpPair(rownames[i])) ) ] +\ [ strWithStdFormatting( row[i] ) for i in xrange(len(row)) ]) # In order for the memory usage and handling time not to explode for large tables outFile.write( str(core) ) core = coreCls() core.tableFooter() core.end() outFile.write( str(core) ) outFile.close()
def execute(cls, choices, galaxyFn=None, username=''): from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource from gold.origdata.GtrackComposer import StdGtrackComposer genome = choices[0] if choices[1] == 'Track': trackName = choices[2].split(':') else: trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices[2].split(':')) outFn = galaxyFn if choices[4] == 'Write to Standardised file': outFn = createOrigPath(genome, choices[-1].split(':'), 'collapsed_result.bedgraph') ensurePathExists(outFn[:outFn.rfind('/')+1]) threshold = choices[3] analysisDef = 'dummy [threshold=%s] -> ForEachSegmentDistToNearestInSameTrackStat' % threshold #'Python' res = GalaxyInterface.runManual([trackName], analysisDef, '*', '*', genome, username=username, \ printResults=False, printHtmlWarningMsgs=False) tvGeSource = TrackViewListGenomeElementSource(genome, [x['Result'] for x in res.values()], trackName) StdGtrackComposer(tvGeSource).composeToFile(outFn)
def parseFileIntoPointTrack(cls, inFn, outFn, ldDict, rsidDict): """ Loops through a primary track and creates a new linked point track for the given track elements. The primary track must have the column header 'snps', whose column elements are rsids. :param inFn: Path to original track :param outFn: Path to new linked point track (LD graph) :param ldDict: Master LD dictionary :param edgeDir: Boolean parameter of whether or not the graph is undirected :return: """ from quick.util.CommonFunctions import ensurePathExists ensurePathExists(outFn) inFile = open(inFn, 'r') outFile = open(outFn, 'w') rsids = cls.getUniqueRsids(inFile) expansionDict = CreateLDTrack.getExpansionDict(rsids, ldDict) outFile.write(CreateLDTrack.formatPointTrack(expansionDict, rsidDict, rsids)) inFile.close() outFile.close()
def _writeContent(self, fn, header): #core = HtmlCore() #core.begin() #core.bigHeader(header) #core.header('Local result table') text = '' if len( self._results.getAllRegionKeys() ) > MAX_LOCAL_RESULTS_IN_TABLE: text += 'Local results were not printed because of the large number of bins: ' \ + str(numUserBins) + ' > ' + str(MAX_LOCAL_RESULTS_IN_TABLE) else: #core.tableHeader([ str( HtmlCore().textWithHelp(baseText, helpText) ) for baseText, helpText in # ([('Region','')] + self._results.getLabelHelpPairs()) ]) for regionKey in self._results.getAllRegionKeys(): text += '\t'.join([str(regionKey)] +\ [ strWithStdFormatting( self._results[regionKey].get(resDictKey) ) \ for resDictKey in self._results.getResDictKeys() ]) + os.linesep #core.tableFooter() #core.end() ensurePathExists(fn) open(fn,'w').write( text )
def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse): assert sparse in [False, True] tempContents = OrderedDict() genomeElementChrs = set(genomeElementChrList) lastRegion = None chrStartIdxs = OrderedDict() chrEndIdxs = OrderedDict() totElCount = 0 totBinCount = 0 for br in boundingRegionTuples: if lastRegion is None or br.region.chr != lastRegion.chr: if br.region.chr in tempContents: raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region) lastRegion = None tempContents[br.region.chr] = OrderedDict() #sorteddict() if sparse: chrStartIdxs[br.region.chr] = totElCount else: if br.region < lastRegion: raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region)) if lastRegion.overlaps(br.region): raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region)) if lastRegion.end == br.region.start: raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region)) if len(br.region) < 1: raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region) if not sparse and len(br.region) != br.elCount: raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount)) startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None) totElCount += br.elCount if sparse: chrEndIdxs[br.region.chr] = totElCount tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0) lastRegion = br.region if sparse: totBinCount = 0 for chr in tempContents: chrLen = GenomeInfo.getChrLen(self._genome, chr) numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen)) for key in tempContents[chr].keys(): startBinIdx = totBinCount endBinIdx = totBinCount + numBinsInChr brInfo = tempContents[chr][key] if chr in genomeElementChrs: tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \ chrStartIdxs[chr], chrEndIdxs[chr], \ startBinIdx, endBinIdx) else: if chrEndIdxs[chr] - chrStartIdxs[chr] > 0: raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr])) tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0) if chr in genomeElementChrs: totBinCount += numBinsInChr if len(genomeElementChrs - set(tempContents.keys())) > 0: raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys()))) ensurePathExists(self._fn) for chr in tempContents: brInfoDict = tempContents[chr] tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values())) brShelve = safeshelve.open(self._fn) brShelve.update(tempContents) brShelve.close() while not self.fileExists(): from gold.application.LogSetup import logMessage logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn) import time time.sleep(0.2)
def getDiskPath(self, ensurePath=False): fn = os.sep.join( [STATIC_PATH] + self._id) if ensurePath: ensurePathExists(fn) return fn
def composeToFile(self, fn, ignoreEmpty=False, **kwArgs): ensurePathExists(fn) f = open(fn, 'w') ok = self._composeCommon(f, ignoreEmpty, **kwArgs) f.close() return ok
def _writeResultObject(self, resDictKey, fn): if self._plotResultObject is not None: ensurePathExists(fn) from gold.application.RSetup import r r('function(x, fn) {dput(x, fn)}')(self._plotResultObject, fn)
def getTableData(self, genome, trackName): webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_outputType', trackName[-1]) ) response = mechanize.urlopen(paramForm.click('hgta_doTopSubmit')) fn = createCollectedPath(genome, trackName, 'fromUcsc.'+trackName[-1]) ensurePathExists(fn) open(fn,'w').write(response.read())
import os, shutil import sys from quick.util.CommonFunctions import ensurePathExists, getUniqueWebPath from quick.aux.CustomFuncCatalog import makeLowercaseName2NameShelfFromTnSubTypes, createShelvesBehindRankedGeneLists from config.Config import STATIC_PATH if len(sys.argv) != 8: print 'Usage: python createGoogleMapType.sh mapId genome rowTrackName colTrackName col2GeneListFn galaxyId countType' sys.exit(0) mapId, genome, rowTrackName, colTrackName, col2GeneListFn, galaxyId, countType = [sys.argv[x] for x in [1,2,3,4,5,6,7]] googleMapsCommonDir = '/'.join([STATIC_PATH, 'maps', 'common']) googleMapsMapIdDir = '/'.join([googleMapsCommonDir, mapId]) ensurePathExists(googleMapsMapIdDir + '/test') makeLowercaseName2NameShelfFromTnSubTypes(genome, rowTrackName, '/'.join([googleMapsMapIdDir, 'rowLowerCaseName2Name.shelf'])) makeLowercaseName2NameShelfFromTnSubTypes(genome, colTrackName, '/'.join([googleMapsMapIdDir, 'colLowerCaseName2Name.shelf'])) rowBaseTrackNameFile = open('/'.join([googleMapsMapIdDir, 'rowBaseTrackName.txt']), 'w') colBaseTrackNameFile = open('/'.join([googleMapsMapIdDir, 'colBaseTrackName.txt']), 'w') rowBaseTrackNameFile.write(rowTrackName + '\n') colBaseTrackNameFile.write(colTrackName + '\n') rowBaseTrackNameFile.close() colBaseTrackNameFile.close() if col2GeneListFn != 'None': shutil.copy(col2GeneListFn, '/'.join([googleMapsMapIdDir, 'col2GeneList.shelf']))
def _writeRawData(self, resDictKey, fn): GraphicsPresenter._writeRawData(self, resDictKey, fn) if self._returnDict.get(resDictKey) is not None: ensurePathExists(fn) open(fn,'a').write(os.linesep + 'Return: ' + str(self._returnDict[resDictKey]))