def testComposeLocalUrlGenomeAttributesNonAscii(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('galaxy:/12345abc', genome=u'hg18ø', attributes=OrderedDict([('one', 'yes')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', genome='hg19', attributes=OrderedDict([('two', u'nø')]))) gSuite.addTrack( GSuiteTrack( HbGSuiteTrack.generateURI(trackName=[u'track', u'nøme']), genome='hg38', attributes=OrderedDict([('two', 'yes')]))) gSuite.setCustomHeader('my header', u'bø!') output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: local\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: multiple\n' \ '##my header: b%C3%B8!\n' \ '###uri\ttitle\tfile_format\tgenome\tone\ttwo\n' \ 'galaxy:/12345abc\t12345abc\tunknown\thg18%C3%B8\tyes\t.\n' \ 'file:/path/to/file2\tfile2\tunknown\thg19\t.\tn%C3%B8\n' \ 'hb:/track/n%C3%B8me\tn%C3%B8me\tpreprocessed\thg38\t.\tyes\n' self.assertEquals(targetOutput, output)
def generateSynGSuite(cls, dataOut, galaxyFn, genome): outGSuite = GSuite() g = SimulationPointIter() newData = '' chrNum = 0 for chr in dataOut: # fileName = 'syn-chr' + 'iInterR-' + str(chr[0]) + 'st-' + str(chr[1]) + 'end-' + str( # chr[2]) + 'iInterR-' + str(chr[3]) + 'iIntraR-' + str(chr[4]) + 'prob-' + str(chr[5]) + '--' + str( # chrNum) fileName = 'syn-' + str(chr[0]) + ',' + str(chr[1]) + ',' + str(chr[2]) + ',' + str(chr[3]) +',' + str(chr[4]) +',' + str(chr[5]) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=fileName, suffix='bed') gSuiteTrack = GSuiteTrack(uri) outFn = gSuiteTrack.path ensurePathExists(outFn) g.createChrTrack(genome, chr[0], PointIter, outFn, chr[3], chr[4], chr[5], chr[1], chr[2]) with open(outFn, 'r') as outputFile: newData += ''.join(outputFile.readlines()) chrNum += 1 if chrNum == len(dataOut): with open(outFn, 'w') as outputFile: outputFile.write(newData) outGSuite.addTrack(GSuiteTrack(uri, title=''.join(fileName), genome=genome)) GSuiteComposer.composeToFile(outGSuite, cls.extraGalaxyFn['synthetic GSuite'])
def testQuoteInURLRemote(self): for scheme, cls in [('ftp', FtpGSuiteTrack), ('http', HttpGSuiteTrack), ('https', HttpsGSuiteTrack), ('rsync', RsyncGSuiteTrack)]: uri = cls.generateURI( netloc='server.com%7C', path='/path/to/file_with%20%3B%22%5B', suffix='bed%7C%2F', query='search=%2Aab&track=My%3Atrack+name%3B%22%5B%3Ahierarchy', doQuote=False) track = GSuiteTrack(uri) self.assertEquals( scheme + '://server.com%7C/path/to/file_with%20%3B%22%5B;bed%7C%2F?search=%2Aab&track=My%3Atrack+name%3B%22%5B%3Ahierarchy', uri) self.assertEquals(scheme, track.scheme) self.assertEquals( scheme + '://server.com%7C/path/to/file_with%20%3B%22%5B;bed%7C%2F?search=%2Aab&track=My%3Atrack+name%3B%22%5B%3Ahierarchy', track.uri) self.assertEquals('server.com|', track.netloc) self.assertEquals('/path/to/file_with ;"[', track.path) self.assertEquals('hierarchy', track.title) self.assertEquals('bed|/', track.suffix) self.assertEquals('search=*ab&track=My:track name;"[:hierarchy', track.query) self.assertEquals(['My', 'track name;"[', 'hierarchy'], track.trackName) uri = cls.generateURI( netloc='server.com|', path='/path/to/file_with ;"[', suffix='btrack|/', trackName=['My', 'track name;"[', 'hierarchy'], doQuote=True) track = GSuiteTrack(uri) self.assertEquals( scheme + '://server.com%7C/path/to/file_with%20%3B%22%5B;btrack%7C%2F?track=My%3Atrack+name%3B%22%5B%3Ahierarchy', uri) self.assertEquals(scheme, track.scheme) self.assertEquals( scheme + '://server.com%7C/path/to/file_with%20%3B%22%5B;btrack%7C%2F?track=My%3Atrack+name%3B%22%5B%3Ahierarchy', track.uri) self.assertEquals('server.com|', track.netloc) self.assertEquals('/path/to/file_with ;"[', track.path) self.assertEquals('hierarchy', track.title) self.assertEquals('btrack|/', track.suffix) self.assertEquals('track=My:track name;"[:hierarchy', track.query) self.assertEquals( ['My', 'track name;"[', 'hierarchy'], track.trackName ) # Temporarily, should be fixed if remote BTracks are supported
def testSimpleTitleDuplicate(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed')) gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/other/path/to/file1.bed')) gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/third/path/to/file1.bed')) gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/last/path/to/file1.bed')) self.assertEqual( ['file1.bed', 'file1.bed (2)', 'file1.bed (3)', 'file1.bed (4)'], gSuite.allTrackTitles())
def setUp(self): GSuiteTestWithMockEncodingFuncs.setUp(self) self.track1 = GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', title='Track1', genome='hg18', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')])) self.track2 = GSuiteTrack('hb:/track/name/hierarchy', title='Track2', genome='hg18') self.track3 = GSuiteTrack('galaxy:/ad123dd12fg;btrack', title='Track3', genome='hg18', attributes=OrderedDict([('view', 'signal'), ('antibody', 'cMyb')]))
def _getUriForDownloadedAndUncompressedTrackPossiblyCached( self, gSuiteTrack, galaxyFn, uncomprSuffix, extraFileName=None): from gold.gsuite.GSuiteTrackCache import GSUITE_TRACK_CACHE cache = GSUITE_TRACK_CACHE if cache.isCached(gSuiteTrack): cachedUri = cache.getCachedGalaxyUri(gSuiteTrack) if os.path.exists(GSuiteTrack(cachedUri).path): return cache.getCachedGalaxyUri(gSuiteTrack) if extraFileName: from quick.application.ExternalTrackManager import ExternalTrackManager outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn( galaxyFn, extraFileName) ensurePathExists(outGalaxyFn) if extraFileName.endswith('.' + uncomprSuffix): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=extraFileName) else: uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=extraFileName, suffix=uncomprSuffix) else: outGalaxyFn = galaxyFn uri = GalaxyGSuiteTrack.generateURI(galaxyFn=outGalaxyFn, suffix=uncomprSuffix) uncompressorAndDownloader = GSuiteTrackUncompressorAndDownloader() uncompressorAndDownloader.visit(gSuiteTrack, outGalaxyFn) if cache.shouldBeCached(gSuiteTrack): cache.cache(gSuiteTrack, uri) return uri
def visitGalaxyGSuiteTrack(self, gSuiteTrack): self.genericVisit(gSuiteTrack) from quick.application.ExternalTrackManager import ExternalTrackManager from gold.description.TrackInfo import TrackInfo if gSuiteTrack.hasExtraFileName(): baseFileName = os.path.basename(gSuiteTrack.uriWithoutSuffix) else: baseFileName = gSuiteTrack.title galaxyTN = ExternalTrackManager.constructGalaxyTnFromSuitedFn( gSuiteTrack.path, fileEnding=gSuiteTrack.suffix, name=baseFileName) trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN( gSuiteTrack.genome, galaxyTN, printErrors=False, printProgress=False, renameExistingTracksIfNeeded=False) trackType = TrackInfo(gSuiteTrack.genome, trackName).trackFormatName.lower() hbUri = HbGSuiteTrack.generateURI(trackName=trackName) return GSuiteTrack(hbUri, title=gSuiteTrack.title, trackType=trackType, genome=gSuiteTrack.genome, attributes=gSuiteTrack.attributes, comment=gSuiteTrack.comment)
def testCreateGalaxyGSuiteTrack(self): uri = GalaxyGSuiteTrack.generateURI( galaxyFn='/path/to/dataset_12345.dat', extraFileName='specific_file', trackName=['trackname'], suffix='btrack', doQuote=False) track = GSuiteTrack(uri, title='MyTrack', fileFormat='preprocessed', trackType='points', genome='mm9', attributes=OrderedDict([('extra', 'yes')])) self.assertEquals( 'galaxy:/9085014203344132088/specific_file;btrack?track=trackname', uri) self.assertEquals('galaxy', track.scheme) self.assertEquals( 'galaxy:/9085014203344132088/specific_file;btrack?track=trackname', track.uri) self.assertEquals(None, track.netloc) self.assertEquals( '/path/to/dataset_9085014203344132088_files/specific_file', track.path) self.assertEquals('track=trackname', track.query) self.assertEquals('btrack', track.suffix) self.assertEquals('MyTrack', track.title) self.assertEquals(['trackname'], track.trackName) self.assertEquals('local', track.location) self.assertEquals('preprocessed', track.fileFormat) self.assertEquals('points', track.trackType) self.assertEquals('mm9', track.genome) self.assertEquals(OrderedDict([('extra', 'yes')]), track.attributes)
def testCreateFileGSuiteTrack(self): uri = FileGSuiteTrack.generateURI(path='/path/to/file', suffix='btrack', trackName=['trackname'], doQuote=False) track = GSuiteTrack(uri, title='MyTrack', trackType='segments', genome='unknown', attributes=OrderedDict([('extra', 'no')])) self.assertEquals('file:/path/to/file;btrack?track=trackname', uri) self.assertEquals('file', track.scheme) self.assertEquals('file:/path/to/file;btrack?track=trackname', track.uri) self.assertEquals(None, track.netloc) self.assertEquals('/path/to/file', track.path) self.assertEquals('track=trackname', track.query) self.assertEquals('btrack', track.suffix) self.assertEquals('MyTrack', track.title) self.assertEquals(['trackname'], track.trackName) self.assertEquals('local', track.location) self.assertEquals('preprocessed', track.fileFormat) self.assertEquals('segments', track.trackType) self.assertEquals('unknown', track.genome) self.assertEquals(OrderedDict([('extra', 'no')]), track.attributes)
def execute(cls, choices, galaxyFn=None, username=''): from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from quick.application.ExternalTrackManager import ExternalTrackManager genome = choices.genome if choices.selectGenome == 'Yes' else None selectedHistories = [ key for key, val in choices.history.iteritems() if val is not None ] gSuite = GSuite() for histGalaxyId in selectedHistories: galaxyTrackName = choices.history[histGalaxyId].split(':') histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN( galaxyTrackName) histName = ExternalTrackManager.extractNameFromHistoryTN( galaxyTrackName) histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN( galaxyTrackName) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn, suffix=histSuffix) gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome)) GSuiteComposer.composeToFile(gSuite, galaxyFn)
def __iter__(self): for archivedFileInfo in self._archive: # galaxyFn = self._titleToGalaxyFnDict.get(archivedFileInfo.title) # if not galaxyFn: # raise ShouldNotOccurError('Galaxy filename not found for file with title: ' + archivedFile.title) extraFileName = os.sep.join((archivedFileInfo.directories if self._storeHierarchy else []) +\ [archivedFileInfo.baseFileName]) if self._storeHierarchy: attributeList = OrderedDict([('dir_level_%s' % (i+1), directory) \ for i,directory in enumerate(archivedFileInfo.directories)]) else: attributeList = OrderedDict() uri = GalaxyGSuiteTrack.generateURI(self._galaxyFn, extraFileName=extraFileName) gSuiteTrack = GSuiteTrack(uri, title=archivedFileInfo.baseFileName, attributes=attributeList) outFn = gSuiteTrack.path ensurePathExists(outFn) with open(outFn, 'w') as outFile: inFile = self._archive.openFile(archivedFileInfo.path) outFile.write(inFile.read()) inFile.close() yield gSuiteTrack
def _createPreprocessedGsuiteTrack(self, gSuiteTrack, baseFileName, doEncodeId, urlPrefix=None): from quick.application.ExternalTrackManager import ExternalTrackManager as ETM from gold.description.TrackInfo import TrackInfo self.genericVisit(gSuiteTrack) galaxyTN = ETM.constructGalaxyTnFromSuitedFn( gSuiteTrack.path, fileEnding=gSuiteTrack.suffix, name=baseFileName) trackName = ETM.getPreProcessedTrackFromGalaxyTN( gSuiteTrack.genome, galaxyTN, printErrors=False, printProgress=False, renameExistingTracksIfNeeded=False, doEncodeId=doEncodeId, urlPrefix=urlPrefix) trackType = TrackInfo(gSuiteTrack.genome, trackName).trackFormatName.lower() hbUri = HbGSuiteTrack.generateURI(trackName=trackName) return GSuiteTrack(hbUri, title=gSuiteTrack.title, trackType=trackType, genome=gSuiteTrack.genome, attributes=gSuiteTrack.attributes, comment=gSuiteTrack.comment)
def execute(cls, choices, galaxyFn=None, username=''): basisTrackNameAsList = choices.basisTrack.split(':') extractionOptions = dict( GalaxyInterface.getTrackExtractionOptions(choices.genome, basisTrackNameAsList)) extractionFormat = extractionOptions[ choices.extFormatLbl] if extractionOptions else None gsuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() for gsTrack in gsuite.allTracks(): # outputTrackFn = cls.extraGalaxyFn[gsTrack.title] # print '<br>\n<br>\n output track filename: ', outputTrackFn # print 'path: ', gsTrack.path # print 'parsed uri: ', gsTrack._parsedUri newTrackFileName = gsTrack.title + '.' + extractionFormat outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn( galaxyFn, newTrackFileName) ensurePathExists(outGalaxyFn) uri = GalaxyGSuiteTrack.generateURI(galaxyFn, extraFileName=newTrackFileName) GalaxyInterface.parseExtFormatAndExtractTrackManyBins( choices.genome, basisTrackNameAsList, gsTrack.suffix, gsTrack.path, True, choices.extFormatLbl, outGalaxyFn) outGSuite.addTrack( GSuiteTrack(uri, title=gsTrack.title, fileFormat=gsTrack.fileFormat, trackType=gsTrack.trackType, genome=choices.genome, attributes=gsTrack.attributes)) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def _parseTrackLine(trackLine, colNames, headerVars): colVals = trackLine.split('\t') if len(colVals) != len(colNames): raise InvalidFormatError('The number of columns in track line: %s ' % (repr(trackLine)) + 'is not equal to the number of columns in the ' 'column specification line (%s != %s)' % (len(colVals), len(colNames))) from copy import copy remainingColNames = copy(colNames) assert colNames[0] == URI_COL kwArgs = {} for colSpec in ALL_STD_COL_SPECS: val = _popValueFromColValsAndNamesIfPresent(colVals, remainingColNames, colSpec.colName) if val is not None: kwArgs[colSpec.memberName] = val elif colSpec.headerName in headerVars: if headerVars[colSpec.headerName] != MULTIPLE: kwArgs[colSpec.memberName] = headerVars[colSpec.headerName] attributes = OrderedDict(zip(remainingColNames, colVals)) for key, val in attributes.iteritems(): if val == '.': del attributes[key] kwArgs['attributes'] = attributes try: track = GSuiteTrack(**kwArgs) except InvalidFormatError as e: errorMsg = 'Error in track line %s:\n' % repr(trackLine) + e.message raise InvalidFormatError(errorMsg) return track
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack from quick.application.UserBinSource import GlobalBinSource from quick.extra.ProgressViewer import ProgressViewer from quick.extra.TrackExtractor import TrackExtractor genome = choices.genome fullGenomeBins = GlobalBinSource(genome) gSuite = getGSuiteFromGalaxyTN(choices.gsuite) progressViewer = ProgressViewer( [(cls.PROGRESS_PROCESS_DESCRIPTION, len(gSuite))], galaxyFn) outGSuite = GSuite() hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'storage', datasetInfo=choices.gsuite)] fileNameSet = set() for track in gSuite.allTracks(): fileName = cls._getUniqueFileName(fileNameSet, track.trackName) title = track.title attributes = track.attributes fi = cls._getFileFormatInfo(choices, gSuite, genome, track) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=hiddenStorageFn, extraFileName=fileName, suffix=fi.suffix) gSuiteTrack = GSuiteTrack(uri, title=title, genome=genome, attributes=attributes) TrackExtractor.extractOneTrackManyRegsToOneFile( track.trackName, fullGenomeBins, gSuiteTrack.path, fileFormatName=fi.fileFormatName, globalCoords=True, asOriginal=fi.asOriginal, allowOverlaps=fi.allowOverlaps) outGSuite.addTrack(gSuiteTrack) progressViewer.update() primaryFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'primary', datasetInfo=choices.gsuite)] GSuiteComposer.composeToFile(outGSuite, primaryFn)
def getTitleAndSuffixWithCompressionSuffixesRemoved(gSuiteTrack): gSuiteReq = GSuiteRequirements(allowedLocations=[LOCAL, REMOTE], allowedFileFormats=[PREPROCESSED, PRIMARY, UNKNOWN]) gSuiteReq.check(gSuiteTrack) title, suffix, path = gSuiteTrack.title, gSuiteTrack.suffix, gSuiteTrack.path if suffix: for compSuffix in COMPRESSION_SUFFIXES: reduceLen = len(compSuffix)+1 if suffix.lower() == compSuffix: if path.endswith('.' + compSuffix): path = path[:-reduceLen] suffix = getFileSuffix(path) else: if isinstance(gSuiteTrack, SearchQueryForSuffixGSuiteTrack): tempGSuiteTrack = GSuiteTrack(gSuiteTrack.uri.replace('.' + compSuffix, ''), title='') suffix = tempGSuiteTrack.suffix else: suffix = None # Impossible to find uncompressed suffix elif suffix.lower().endswith('.' + compSuffix): suffix = suffix[:-reduceLen] # e.g. suffix = 'bed.gz' -> 'bed' else: continue title = getTitleWithSuffixReplaced(title, '') break return title, suffix
def execute(cls, choices, galaxyFn=None, username=''): from quick.util.CommonFunctions import getFileSuffix import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import registerGSuiteTrackClass, GSuiteTrack, FileGSuiteTrack registerGSuiteTrackClass(FileGSuiteTrack) gSuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() for track in gSuite.allTracks(): path = track.path suffix = track.suffix if track.suffix != getFileSuffix( path) else '' uri = FileGSuiteTrack.generateURI(path=path, suffix=suffix) newTrack = GSuiteTrack(uri, title=track.title, trackType=track.trackType, genome=track.genome, attributes=track.attributes) outGSuite.addTrack(newTrack) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def testCreateRemoteGSuiteTrack(self): for scheme, cls in [('ftp', FtpGSuiteTrack), ('http', HttpGSuiteTrack), ('https', HttpsGSuiteTrack), ('rsync', RsyncGSuiteTrack)]: uri = cls.generateURI(netloc='server.com', path='/path/to/file', suffix='btrack', trackName=['trackname'], doQuote=False) track = GSuiteTrack(uri, title='MyTrack', fileFormat='preprocessed', trackType='segments', genome='TestGenome', attributes=OrderedDict([('extra', 'yes')])) self.assertEquals(scheme + '://server.com/path/to/file;btrack?track=trackname', uri) self.assertEquals(scheme, track.scheme) self.assertEquals(scheme + '://server.com/path/to/file;btrack?track=trackname', track.uri) self.assertEquals('server.com', track.netloc) self.assertEquals('/path/to/file', track.path) self.assertEquals('track=trackname', track.query) self.assertEquals('btrack', track.suffix) self.assertEquals('MyTrack', track.title) self.assertEquals(['trackname'], track.trackName) self.assertEquals('remote', track.location) self.assertEquals('preprocessed', track.fileFormat) self.assertEquals('segments', track.trackType) self.assertEquals('TestGenome', track.genome) self.assertEquals(OrderedDict([('extra', 'yes')]), track.attributes)
def testIncorrectTrackType(self): uri = FileGSuiteTrack.generateURI(path='/path/to/file') # track = GSuiteTrack(uri, trackType='segment') self.assertRaises(InvalidFormatError, GSuiteTrack, uri, trackType='segment') track = GSuiteTrack(uri) # track.trackType = 'segment' self.assertRaises(InvalidFormatError, track.__setattr__, 'trackType', 'segment')
def testIncorrectFileFormatGalaxy(self): uri = GalaxyGSuiteTrack.generateURI(galaxyFn='/path/to/file') # track = GSuiteTrack(uri, fileFormat='doc') self.assertRaises(InvalidFormatError, GSuiteTrack, uri, fileFormat='doc') track = GSuiteTrack(uri) # track.fileFormat = 'doc' self.assertRaises(InvalidFormatError, track.__setattr__, 'fileFormat', 'doc')
def execute(cls, choices, galaxyFn=None, username=''): """ Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than 'html', the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (cls, e.g. generated image files). choices is a list of selections made by web-user in each options box. Mandatory unless isRedirectTool() returns True. """ #TODO: add functionality for single gtrack within-track randomization print 'Executing...' inputGsuite = getGSuiteFromGalaxyTN(choices.gs) outputGSuite = GSuite() genome = inputGsuite.genome ts = factory.getFlatTracksTS(genome, choices.gs) randIndex = 0 bins = GlobalBinSource(genome) if choices.randType == TsRandAlgReg.BETWEEN_TRACKS_CATEGORY and \ choices.category not in [None, 'None']: ts = ts.getSplittedByCategoryTS(choices.category) randomizedTs = TrackStructureV2() for subTsKey, subTs in ts.items(): tvProvider = cls.createTrackViewProvider( choices, subTs, bins, genome) randomizedTs[subTsKey] = getRandomizedVersionOfTs( subTs, tvProvider, randIndex) randomizedTs = randomizedTs.getFlattenedTS() else: tvProvider = cls.createTrackViewProvider(choices, ts, bins, genome) randomizedTs = getRandomizedVersionOfTs(ts, tvProvider, randIndex) for singleTrackTs in randomizedTs.getLeafNodes(): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=os.path.sep.join(singleTrackTs.track.trackName) + '.randomized', suffix='bed') title = singleTrackTs.metadata.pop('title') gSuiteTrack = GSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, bins, randomizedTs) GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def testComposeRemoteOnlyUrl(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1')) gSuite.addTrack(GSuiteTrack('http://server.other.com/path/to/file2')) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: remote\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: unknown\n' \ '###uri\ttitle\n' \ 'ftp://server.somewhere.com/path/to/file1\tfile1\n' \ 'http://server.other.com/path/to/file2\tfile2\n' self.assertEquals(targetOutput, output)
def testQuoteInURLHb(self): uri = HbGSuiteTrack.generateURI(trackName=['My', 'track name;"[', 'hierarchy'], doQuote=True) track = GSuiteTrack(uri) self.assertEquals('hb:/My/track%20name%3B%22%5B/hierarchy', track.uri) self.assertEquals('hb', track.scheme) self.assertEquals('hb:/My/track%20name%3B%22%5B/hierarchy', track.uri) self.assertEquals(['My', 'track name;"[', 'hierarchy'], track.trackName) self.assertEquals('hierarchy', track.title)
def testComposeUrlTitleLocationTrackType(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1', title='Track1', trackType='points')) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', trackType='segments')) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: multiple\n' \ '##file format: unknown\n' \ '##track type: multiple\n' \ '##genome: unknown\n' \ '###uri\ttitle\ttrack_type\n' \ 'ftp://server.somewhere.com/path/to/file1\tTrack1\tpoints\n' \ 'file:/path/to/file2\tfile2\tsegments\n' self.assertEquals(targetOutput, output)
def testComposeRemoteUrlGenomeFileFormat(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', genome='hg18')) gSuite.addTrack( GSuiteTrack('http://server.other.com/path/to/file2', genome='hg18')) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: remote\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '###uri\ttitle\tfile_format\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tfile1.bed\tprimary\n' \ 'http://server.other.com/path/to/file2\tfile2\tunknown\n' self.assertEquals(targetOutput, output)
def testComposeLocalUrlGenomeAttributes(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('galaxy:/12345abc', genome='hg18', attributes=OrderedDict([('one', 'yes')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', genome='hg19', attributes=OrderedDict([('two', 'no')]))) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: local\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: multiple\n' \ '###uri\ttitle\tgenome\tone\ttwo\n' \ 'galaxy:/12345abc\t12345abc\thg18\tyes\t.\n' \ 'file:/path/to/file2\tfile2\thg19\t.\tno\n' self.assertEquals(targetOutput, output)
def testFullCompose(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', title='Track', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack('http://server.other.com/path/to/file2.bed', title='Track2', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyc')]))) gSuite.addTrack( GSuiteTrack('https://server.other.com/path/to/file3.bed', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack('rsync://server.other.com/path/to/file4;wig', title='Track4', attributes=OrderedDict([('cell', 'NHFL')]))) gSuite.addTrack(GSuiteTrack('hb:/track/name/hierarchy', title='Track')) gSuite.addTrack( GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name', title='Track', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file.btrack?track=track:name', title='Track name7', attributes=OrderedDict([('antibody', 'cMyb'), ('extra', 'yes')]))) gSuite.setGenomeOfAllTracks('hg18') gSuite.setCustomHeader('My header', 'Some value') output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: multiple\n' \ '##file format: multiple\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '##my header: Some value\n' \ '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tTrack\tprimary\tk562\tcMyb\t.\n' \ 'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \ 'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \ 'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \ 'hb:/track/name/hierarchy\tTrack (2)\tpreprocessed\t.\t.\t.\n' \ 'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack (3)\tpreprocessed\tk562\tcMyb\t.\n' \ 'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n' self.assertEquals(targetOutput, output)
def visitRemoteGSuiteTrack(self, gSuiteTrack, galaxyFn, colHierarchyList): import os self.genericVisit(gSuiteTrack, galaxyFn, colHierarchyList) uncomprTitle, uncomprSuffix = getTitleAndSuffixWithCompressionSuffixesRemoved( gSuiteTrack) rawFileName = getTitleWithCompressionSuffixesRemoved( GSuiteTrack(gSuiteTrack.uri)) duplicateIdx = getDuplicateIdx(uncomprTitle) rawFileName = renameBaseFileNameWithDuplicateIdx( rawFileName, duplicateIdx) memberHierarchyList = [] for colName in colHierarchyList: memberName = colName for colSpec in OPTIONAL_STD_COL_SPECS: if not colSpec.deprecated: if colName == colSpec.colName: memberName = colSpec.memberName memberHierarchyList.append(memberName) extraFileName = os.path.sep.join([ getattr(gSuiteTrack, memberName) for memberName in memberHierarchyList ] + [rawFileName]) uri = self._getUriForDownloadedAndUncompressedTrackPossiblyCached( gSuiteTrack, galaxyFn, uncomprSuffix, extraFileName) return GSuiteTrack(uri, title=uncomprTitle, fileFormat=gSuiteTrack.fileFormat, trackType=gSuiteTrack.trackType, genome=gSuiteTrack.genome, attributes=gSuiteTrack.attributes)
def testQuoteInURLFile(self): uri = FileGSuiteTrack.generateURI(path='/path/to/file_with ;"[', suffix='btrack', trackName=['My', 'track ;"[', 'name'], doQuote=True) track = GSuiteTrack(uri, fileFormat='preprocessed', trackType='segments') self.assertEquals('file:/path/to/file_with%20%3B%22%5B;btrack?' 'track=My%3Atrack+%3B%22%5B%3Aname', uri) self.assertEquals('file', track.scheme) self.assertEquals('file:/path/to/file_with%20%3B%22%5B;btrack?' 'track=My%3Atrack+%3B%22%5B%3Aname', track.uri) self.assertEquals('/path/to/file_with ;"[', track.path) self.assertEquals('name', track.title) self.assertEquals('track=My:track ;"[:name', track.query) self.assertEquals(['My', 'track ;"[', 'name'], track.trackName) self.assertEquals('btrack', track.suffix)
def testCreateGSuiteTrackDefaults(self): track = GSuiteTrack('ftp://server/path/to/file') self.assertEquals('ftp', track.scheme) self.assertEquals('ftp://server/path/to/file', track.uri) self.assertEquals('server', track.netloc) self.assertEquals('/path/to/file', track.path) self.assertEquals(None, track.query) self.assertEquals(None, track.suffix) self.assertEquals('file', track.title) self.assertEquals(None, track.trackName) self.assertEquals('remote', track.location) self.assertEquals('unknown', track.fileFormat) self.assertEquals('unknown', track.trackType) self.assertEquals('unknown', track.genome) self.assertEquals(OrderedDict(), track.attributes)