def run_on_extracted_variables(cls, ts, analysisBins, choices_numberOfTimesToRandomize, choices_randAlg, choices_randType, galaxyFn, genome): assert choices_numberOfTimesToRandomize==1 #For now, since ts probably needs to be unique each time.. outputGSuite = GSuite() for i in range(0, int(choices_numberOfTimesToRandomize)): print("TF leaf nodes") print(ts.getLeafNodes()) randTvProvider = cls._createTrackViewProvider(ts, analysisBins, genome, choices_randAlg, choices_randType, False, None) # the last False and non are temporary.. randomizedTs = getRandomizedVersionOfTs(ts, randTvProvider) # output files print("Leaf nodes") print(randomizedTs.getLeafNodes()) for j, singleTrackTs in enumerate(randomizedTs.getLeafNodes()): uri = "outputfile" uri = FileGSuiteTrack.generateURI(path='/home/ivargry/outfile_' + str(randint(0,999999999)) + '_' + str(j) + ".bed", suffix='bed', doQuote=False) print("URI: " + uri) title = singleTrackTs.metadata.pop('title') gSuiteTrack = FileGSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path singleTrackTs.metadata['randomization_run'] = i spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, analysisBins, randomizedTs) assert galaxyFn != "" GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): from quick.util.CommonFunctions import getFileSuffix import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import registerGSuiteTrackClass, GSuiteTrack, FileGSuiteTrack registerGSuiteTrackClass(FileGSuiteTrack) gSuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() for track in gSuite.allTracks(): path = track.path suffix = track.suffix if track.suffix != getFileSuffix( path) else '' uri = FileGSuiteTrack.generateURI(path=path, suffix=suffix) newTrack = GSuiteTrack(uri, title=track.title, trackType=track.trackType, genome=track.genome, attributes=track.attributes) outGSuite.addTrack(newTrack) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): basisTrackNameAsList = choices.basisTrack.split(':') extractionOptions = dict( GalaxyInterface.getTrackExtractionOptions(choices.genome, basisTrackNameAsList)) extractionFormat = extractionOptions[ choices.extFormatLbl] if extractionOptions else None gsuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() for gsTrack in gsuite.allTracks(): # outputTrackFn = cls.extraGalaxyFn[gsTrack.title] # print '<br>\n<br>\n output track filename: ', outputTrackFn # print 'path: ', gsTrack.path # print 'parsed uri: ', gsTrack._parsedUri newTrackFileName = gsTrack.title + '.' + extractionFormat outGalaxyFn = ExternalTrackManager.createGalaxyFilesFn( galaxyFn, newTrackFileName) ensurePathExists(outGalaxyFn) uri = GalaxyGSuiteTrack.generateURI(galaxyFn, extraFileName=newTrackFileName) GalaxyInterface.parseExtFormatAndExtractTrackManyBins( choices.genome, basisTrackNameAsList, gsTrack.suffix, gsTrack.path, True, choices.extFormatLbl, outGalaxyFn) outGSuite.addTrack( GSuiteTrack(uri, title=gsTrack.title, fileFormat=gsTrack.fileFormat, trackType=gsTrack.trackType, genome=choices.genome, attributes=gsTrack.attributes)) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def generateSynGSuite(cls, dataOut, galaxyFn, genome): outGSuite = GSuite() g = SimulationPointIter() newData = '' chrNum = 0 for chr in dataOut: # fileName = 'syn-chr' + 'iInterR-' + str(chr[0]) + 'st-' + str(chr[1]) + 'end-' + str( # chr[2]) + 'iInterR-' + str(chr[3]) + 'iIntraR-' + str(chr[4]) + 'prob-' + str(chr[5]) + '--' + str( # chrNum) fileName = 'syn-' + str(chr[0]) + ',' + str(chr[1]) + ',' + str(chr[2]) + ',' + str(chr[3]) +',' + str(chr[4]) +',' + str(chr[5]) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn, extraFileName=fileName, suffix='bed') gSuiteTrack = GSuiteTrack(uri) outFn = gSuiteTrack.path ensurePathExists(outFn) g.createChrTrack(genome, chr[0], PointIter, outFn, chr[3], chr[4], chr[5], chr[1], chr[2]) with open(outFn, 'r') as outputFile: newData += ''.join(outputFile.readlines()) chrNum += 1 if chrNum == len(dataOut): with open(outFn, 'w') as outputFile: outputFile.write(newData) outGSuite.addTrack(GSuiteTrack(uri, title=''.join(fileName), genome=genome)) GSuiteComposer.composeToFile(outGSuite, cls.extraGalaxyFn['synthetic GSuite'])
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' gSuite = getGSuiteFromGalaxyTN(choices.gsuite) attrName = choices.attrName outputGSuite = GSuite() #Writes the information that has been changed to a new track but jumps over all the empty strings for i, track in enumerate(gSuite.allTracks()): if i < cls.MAX_NUM_OF_TRACKS: newAttrValue = getattr(choices, 'selectAttribute%s' % ((i * 2) + 1)) if (attrName == TITLE_COL): track.title = newAttrValue else: track.setAttribute(attrName, newAttrValue) outputGSuite.addTrack(track) #Creates the new GSuite GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def addResultsToInputGSuite(gsuite, results, attrNames, outputGSuiteFN): ''' Add the values from the analysis results as metadata columns and create a new GSuite. If the new attribute names in attrNames already exist as metadata columns in the GSuite, attributes with added an appropriate _[index] will be added. ''' assert isinstance(attrNames, (list, tuple)), 'attrNames must be of type list or tuple: %s' % str(attrNames) newAttrNames = [] for attrName in attrNames: newAttrNames.append(_updateAttrNameWithIndexIfDuplicate(gsuite, attrName)) outGSuite = GSuite() for gsTrack in gsuite.allTracks(): currentTrackRes = results[gsTrack.title] if len(newAttrNames) == 1: if isinstance(currentTrackRes, (list, tuple)): if currentTrackRes[0]: gsTrack.setAttribute(newAttrNames[0], str(currentTrackRes[0])) else: if currentTrackRes: gsTrack.setAttribute(newAttrNames[0], str(currentTrackRes)) else: assert isinstance(currentTrackRes, (list, tuple)), 'Expected multiple results per track. Attribute names %s' % str(attrNames) for i, resultVal in enumerate(currentTrackRes): if resultVal: gsTrack.setAttribute(newAttrNames[i], str(resultVal)) outGSuite.addTrack(gsTrack) GSuiteComposer.composeToFile(outGSuite, outputGSuiteFN)
def execute(cls, choices, galaxyFn=None, username=''): from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from quick.application.ExternalTrackManager import ExternalTrackManager genome = choices.genome if choices.selectGenome == 'Yes' else None selectedHistories = [ key for key, val in choices.history.iteritems() if val is not None ] gSuite = GSuite() for histGalaxyId in selectedHistories: galaxyTrackName = choices.history[histGalaxyId].split(':') histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN( galaxyTrackName) histName = ExternalTrackManager.extractNameFromHistoryTN( galaxyTrackName) histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN( galaxyTrackName) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn, suffix=histSuffix) gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome)) GSuiteComposer.composeToFile(gSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack from quick.application.UserBinSource import GlobalBinSource from quick.extra.ProgressViewer import ProgressViewer from quick.extra.TrackExtractor import TrackExtractor genome = choices.genome fullGenomeBins = GlobalBinSource(genome) gSuite = getGSuiteFromGalaxyTN(choices.gsuite) progressViewer = ProgressViewer( [(cls.PROGRESS_PROCESS_DESCRIPTION, len(gSuite))], galaxyFn) outGSuite = GSuite() hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'storage', datasetInfo=choices.gsuite)] fileNameSet = set() for track in gSuite.allTracks(): fileName = cls._getUniqueFileName(fileNameSet, track.trackName) title = track.title attributes = track.attributes fi = cls._getFileFormatInfo(choices, gSuite, genome, track) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=hiddenStorageFn, extraFileName=fileName, suffix=fi.suffix) gSuiteTrack = GSuiteTrack(uri, title=title, genome=genome, attributes=attributes) TrackExtractor.extractOneTrackManyRegsToOneFile( track.trackName, fullGenomeBins, gSuiteTrack.path, fileFormatName=fi.fileFormatName, globalCoords=True, asOriginal=fi.asOriginal, allowOverlaps=fi.allowOverlaps) outGSuite.addTrack(gSuiteTrack) progressViewer.update() primaryFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'primary', datasetInfo=choices.gsuite)] GSuiteComposer.composeToFile(outGSuite, primaryFn)
def execute(cls, choices, galaxyFn=None, username=''): """ Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than 'html', the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (cls, e.g. generated image files). choices is a list of selections made by web-user in each options box. Mandatory unless isRedirectTool() returns True. """ #TODO: add functionality for single gtrack within-track randomization print 'Executing...' inputGsuite = getGSuiteFromGalaxyTN(choices.gs) outputGSuite = GSuite() genome = inputGsuite.genome ts = factory.getFlatTracksTS(genome, choices.gs) randIndex = 0 bins = GlobalBinSource(genome) if choices.randType == TsRandAlgReg.BETWEEN_TRACKS_CATEGORY and \ choices.category not in [None, 'None']: ts = ts.getSplittedByCategoryTS(choices.category) randomizedTs = TrackStructureV2() for subTsKey, subTs in ts.items(): tvProvider = cls.createTrackViewProvider( choices, subTs, bins, genome) randomizedTs[subTsKey] = getRandomizedVersionOfTs( subTs, tvProvider, randIndex) randomizedTs = randomizedTs.getFlattenedTS() else: tvProvider = cls.createTrackViewProvider(choices, ts, bins, genome) randomizedTs = getRandomizedVersionOfTs(ts, tvProvider, randIndex) for singleTrackTs in randomizedTs.getLeafNodes(): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=os.path.sep.join(singleTrackTs.track.trackName) + '.randomized', suffix='bed') title = singleTrackTs.metadata.pop('title') gSuiteTrack = GSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, bins, randomizedTs) GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def testComposeLocalUrlGenomeAttributesNonAscii(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('galaxy:/12345abc', genome=u'hg18ø', attributes=OrderedDict([('one', 'yes')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', genome='hg19', attributes=OrderedDict([('two', u'nø')]))) gSuite.addTrack( GSuiteTrack( HbGSuiteTrack.generateURI(trackName=[u'track', u'nøme']), genome='hg38', attributes=OrderedDict([('two', 'yes')]))) gSuite.setCustomHeader('my header', u'bø!') output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: local\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: multiple\n' \ '##my header: b%C3%B8!\n' \ '###uri\ttitle\tfile_format\tgenome\tone\ttwo\n' \ 'galaxy:/12345abc\t12345abc\tunknown\thg18%C3%B8\tyes\t.\n' \ 'file:/path/to/file2\tfile2\tunknown\thg19\t.\tn%C3%B8\n' \ 'hb:/track/n%C3%B8me\tn%C3%B8me\tpreprocessed\thg38\t.\tyes\n' self.assertEquals(targetOutput, output)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack inGSuite = getGSuiteFromGalaxyTN(choices.gsuite) outGSuite = GSuite() newSuffix = cls._getNewSuffixIfAny(choices) for track in cls._getSelectedTracks(choices, inGSuite): title, suffix = cls._getExportTrackTitleAndSuffix(track, newSuffix) trackGalaxyFn = cls.extraGalaxyFn[title] if choices.changeFormat == cls.OUTPUT_FORMAT_CONVERT: from gold.origdata.FileFormatComposer import getComposerClsFromFileFormatName geSource = track.getGenomeElementSource(printWarnings=False) composerCls = getComposerClsFromFileFormatName( choices.outputFormat) composer = composerCls(geSource) composer.composeToFile(trackGalaxyFn) else: shutil.copy(track.path, trackGalaxyFn) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=trackGalaxyFn, suffix=suffix) gSuiteTrack = GSuiteTrack(uri, title=title, genome=track.genome, attributes=track.attributes) outGSuite.addTrack(gSuiteTrack) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' from gold.gsuite.GSuiteArchiver import ArchiveToGalaxyGSuiteTrackIterator, \ convertArchiveToGSuite import gold.gsuite.GSuiteComposer as GSuiteComposer from quick.gsuite.GSuiteHbIntegration import \ writeGSuiteHiddenTrackStorageHtml from quick.extra.ProgressViewer import ProgressViewer from quick.util.debug import DebugUtil # DebugUtil.insertBreakPoint(username=username, currentUser='******') archive = cls._getArchiveReader(choices) trackCount = sum(1 for file in archive) desc = cls._getOutputHistoryDescription(choices) progressViewer = ProgressViewer([('Extract tracks', trackCount)], galaxyFn) storeHierarchy = choices.storeHierarchy == 'Yes' hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'storage', desc)] archiveToGSuiteTrackIter = \ ArchiveToGalaxyGSuiteTrackIterator(archive, hiddenStorageFn, storeHierarchy) writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn) gSuite = convertArchiveToGSuite(archiveToGSuiteTrackIter, progressViewer) GSuiteComposer.composeToFile( gSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName('primary', desc)])
def testEmptyCompose(self): gSuite = GSuite() output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: unknown\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: unknown\n' self.assertEquals(targetOutput, output)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' genome = choices.genome geSource = etm.getGESourceFromGalaxyOrVirtualTN(choices.track, genome) #hiddenStorageFn = cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE] hiddenStorageFn = galaxyFn composerCls = cls._getComposerCls(choices) valAttr = choices.attr gSuite = createGalaxyGSuiteBySplittingInputFileOnAttribute\ (hiddenStorageFn, geSource, genome, composerCls, valAttr) GSuiteComposer.composeToFile(gSuite, galaxyFn)
def testFullCompose(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', title='Track', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack('http://server.other.com/path/to/file2.bed', title='Track2', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyc')]))) gSuite.addTrack( GSuiteTrack('https://server.other.com/path/to/file3.bed', attributes=OrderedDict([('cell', 'GM12878'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack('rsync://server.other.com/path/to/file4;wig', title='Track4', attributes=OrderedDict([('cell', 'NHFL')]))) gSuite.addTrack(GSuiteTrack('hb:/track/name/hierarchy', title='Track')) gSuite.addTrack( GSuiteTrack('galaxy:/ad123dd12fg;btrack?track=track:name', title='Track', attributes=OrderedDict([('cell', 'k562'), ('antibody', 'cMyb')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file.btrack?track=track:name', title='Track name7', attributes=OrderedDict([('antibody', 'cMyb'), ('extra', 'yes')]))) gSuite.setGenomeOfAllTracks('hg18') gSuite.setCustomHeader('My header', 'Some value') output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: multiple\n' \ '##file format: multiple\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '##my header: Some value\n' \ '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tTrack\tprimary\tk562\tcMyb\t.\n' \ 'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \ 'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \ 'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \ 'hb:/track/name/hierarchy\tTrack (2)\tpreprocessed\t.\t.\t.\n' \ 'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack (3)\tpreprocessed\tk562\tcMyb\t.\n' \ 'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n' self.assertEquals(targetOutput, output)
def execute(cls, choices, galaxyFn=None, username=''): seqs = [s.strip() for s in choices.seqs.splitlines()] trackNameList = [] for nmer in seqs: GalaxyInterface.createNmerTrack(choices.genome, nmer) trackNameList.append( ['Sequence', 'K-mers', str(len(nmer)) + '-mers', nmer]) #example trackName = ['Sequence', 'K-mers', '7-mers', 'agagaga'] outGSuite = GSuite() for trackName in trackNameList: trackType = TrackInfo(choices.genome, trackName).trackFormatName.lower() hbUri = HbGSuiteTrack.generateURI(trackName=trackName) outGSuite.addTrack( GSuiteTrack(hbUri, title=' '.join(['Nmer track'] + trackName[-1:]), trackType=trackType, genome=choices.genome)) GSuiteComposer.composeToFile(outGSuite, cls.extraGalaxyFn['Kmers GSuite'])
def execute(choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' from quick.gsuite.GSuiteHbIntegration import getSubtracksAsGSuite import gold.gsuite.GSuiteComposer as GSuiteComposer genome = choices.genome parentTrack = choices.parentTrack.split(':') if parentTrack == ['']: parentTrack = [] gSuite = getSubtracksAsGSuite(genome, parentTrack, username) GSuiteComposer.composeToFile(gSuite, galaxyFn)
def execute(choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GalaxyGSuiteTrack, GSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from quick.application.ExternalTrackManager import ExternalTrackManager genome = choices.genome if choices.selectGenome == 'Yes' else None selectedHistories = [ key for key, val in choices.history.iteritems() if val is not None ] gSuite = GSuite() for histGalaxyId in selectedHistories: galaxyTrackName = choices.history[histGalaxyId].split(':') histGalaxyFn = ExternalTrackManager.extractFnFromGalaxyTN( galaxyTrackName) histName = ExternalTrackManager.extractNameFromHistoryTN( galaxyTrackName) histSuffix = ExternalTrackManager.extractFileSuffixFromGalaxyTN( galaxyTrackName) uri = GalaxyGSuiteTrack.generateURI(galaxyFn=histGalaxyFn, suffix=histSuffix) gSuite.addTrack(GSuiteTrack(uri, title=histName, genome=genome)) GSuiteComposer.composeToFile(gSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' categoryToGenomeElementListDict = defaultdict(list) genome = choices.genome outputType = choices.outputType catTrack = choices.catTrack.split(':') geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN( catTrack, genome) for ge in geSource: categoryToGenomeElementListDict[ge.val].append(ge) for category, genomeElementList in categoryToGenomeElementListDict.iteritems( ): geSourceWrapper = ListGESourceWrapper(geSource, genomeElementList) composer = cls.getComposer(geSourceWrapper, outputType) # staticFile = GalaxyRunSpecificFile(catTrack + [category, outputType], galaxyFn) composer.composeToFile(cls.extraGalaxyFn[category]) outGSuite = GSuite() for category, galaxyFileName in OrderedDict([ (x, cls.extraGalaxyFn[x]) for x in categoryToGenomeElementListDict.keys() ]).iteritems(): uri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFileName, suffix=outputType) outGSuite.addTrack(GSuiteTrack(uri, title=category, genome=genome)) GSuiteComposer.composeToFile( outGSuite, cls.extraGalaxyFn['GSuite from categorical']) print 'Execution done!'
def execute(cls, choices, galaxyFn=None, username=''): inGSuite = getGSuiteFromGalaxyTN(choices.gsuite) if choices.genome != inGSuite.genome: inGSuite.setGenomeOfAllTracks(choices.genome) registerGSuiteTrackClass(FileGSuiteTrack) outGSuite = GSuite() for track in inGSuite.allTracks(): origGalaxyTN = ETM.createGalaxyTnFromExternalTn(track.trackName) uri = FileGSuiteTrack.generateURI( path=ETM.extractFnFromGalaxyTN(origGalaxyTN), suffix=ETM.extractFileSuffixFromGalaxyTN(origGalaxyTN)) newTrack = GSuiteTrack(uri, title=track.title, trackType=track.trackType, genome=track.genome, attributes=track.attributes) outGSuite.addTrack(newTrack) GSuiteComposer.composeToFile(outGSuite, galaxyFn)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuitePreprocessor import GSuitePreprocessor from quick.extra.ProgressViewer import ProgressViewer from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN inGSuite = getGSuiteFromGalaxyTN(choices.history) if choices.genome != inGSuite.genome: inGSuite.setGenomeOfAllTracks(choices.genome) progressViewer = ProgressViewer( [('Preprocess tracks', inGSuite.numTracks())], galaxyFn) gSuitePreprocessor = GSuitePreprocessor() outGSuite, errorGSuite = gSuitePreprocessor.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites\ (inGSuite, progressViewer) #outGSuite, errorGSuite = inGSuite.preProcessAllLocalTracksAndReturnOutputAndErrorGSuites() nopreprocFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'nopreprocessed', datasetInfo=choices.history)] GSuiteComposer.composeToFile(errorGSuite, nopreprocFn) preprocFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'preprocessed', datasetInfo=choices.history)] GSuiteComposer.composeToFile(outGSuite, preprocFn)
def testComposeRemoteOnlyUrl(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1')) gSuite.addTrack(GSuiteTrack('http://server.other.com/path/to/file2')) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: remote\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: unknown\n' \ '###uri\ttitle\n' \ 'ftp://server.somewhere.com/path/to/file1\tfile1\n' \ 'http://server.other.com/path/to/file2\tfile2\n' self.assertEquals(targetOutput, output)
def testParseAndCompose(self): inputContents = \ '##location: multiple\n' \ '##file format: multiple\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '###uri\ttitle\tfile_format\tcell\tantibody\textra\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tTrack1\tprimary\tk562\tcMyb\t.\n' \ 'http://server.other.com/path/to/file2.bed\tTrack2\tprimary\tGM12878\tcMyc\t.\n' \ 'https://server.other.com/path/to/file3.bed\tfile3.bed\tprimary\tGM12878\tcMyb\t.\n' \ 'rsync://server.other.com/path/to/file4;wig\tTrack4\tprimary\tNHFL\t.\t.\n' \ 'hb:/track/name/hierarchy\tTrack5\tpreprocessed\t.\t.\t.\n' \ 'galaxy:/ad123dd12fg;btrack?track=track%3Aname\tTrack6\tpreprocessed\tk562\tcMyb\t.\n' \ 'file:/path/to/file.btrack?track=track%3Aname\tTrack name7\tpreprocessed\t.\tcMyb\tyes\n' gSuite = GSuiteParser.parseLines(inputContents.split('\n')) outputContents = GSuiteComposer.composeToString(gSuite) self.assertEquals(inputContents, outputContents)
def testComposeUrlTitleLocationTrackType(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1', title='Track1', trackType='points')) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', trackType='segments')) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: multiple\n' \ '##file format: unknown\n' \ '##track type: multiple\n' \ '##genome: unknown\n' \ '###uri\ttitle\ttrack_type\n' \ 'ftp://server.somewhere.com/path/to/file1\tTrack1\tpoints\n' \ 'file:/path/to/file2\tfile2\tsegments\n' self.assertEquals(targetOutput, output)
def testComposeRemoteUrlGenomeFileFormat(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('ftp://server.somewhere.com/path/to/file1.bed', genome='hg18')) gSuite.addTrack( GSuiteTrack('http://server.other.com/path/to/file2', genome='hg18')) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: remote\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: hg18\n' \ '###uri\ttitle\tfile_format\n' \ 'ftp://server.somewhere.com/path/to/file1.bed\tfile1.bed\tprimary\n' \ 'http://server.other.com/path/to/file2\tfile2\tunknown\n' self.assertEquals(targetOutput, output)
def testComposeLocalUrlGenomeAttributes(self): gSuite = GSuite() gSuite.addTrack( GSuiteTrack('galaxy:/12345abc', genome='hg18', attributes=OrderedDict([('one', 'yes')]))) gSuite.addTrack( GSuiteTrack('file:/path/to/file2', genome='hg19', attributes=OrderedDict([('two', 'no')]))) output = GSuiteComposer.composeToString(gSuite) targetOutput = \ '##location: local\n' \ '##file format: unknown\n' \ '##track type: unknown\n' \ '##genome: multiple\n' \ '###uri\ttitle\tgenome\tone\ttwo\n' \ 'galaxy:/12345abc\t12345abc\thg18\tyes\t.\n' \ 'file:/path/to/file2\tfile2\thg19\t.\tno\n' self.assertEquals(targetOutput, output)
def execute(cls, choices, galaxyFn=None, username=''): if not choices.source: return source = choices.source.split('[')[0].strip() fileTypes = [] gsm = TrackGlobalSearchModule(cls.useSqlite) desc = choices.subCategory if choices.source.find('HyperBrowser') == -1: #items = gsm.getItems(choices.search,choices.subCategory) # # allFileTypes = [x.split('[')[0] for x,selected in prevChoices.filetype.iteritems()] # # fileTypes = [x.split('[')[0] for x,selected in prevChoices.filetype.iteritems() if selected] ##allDataTypes = [x.split('[')[0].strip() for x,selected in choices.dataType.iteritems()] #dataTypes = [x.split('[')[0].strip() for x,selected in choices.dataType.iteritems() if selected] dataTypes = [choices.dataType.split('[')[0].strip()] ##Was made to speadup so that there will be no filetype comparisons, ##but deactivated for now since there is hardcoded filtering in ##prevChoices.fileType #if len(allFileTypes) == len(fileTypes): # fileTypes = [] if 'all tracks' in choices.outputType: remoteGSuite = gsm.getGSuite(choices.search, choices.subCategory, source, dataTypes, filterFileSuffix=True) elif choices.outputType == 'Select tracks manually': remoteGSuite = gsm.getGSuite(choices.search,choices.subCategory,source,dataTypes,\ filterFileSuffix = True,selectedFileIDs = choices.results) elif choices.outputType == 'Select 10 random tracks': remoteGSuite = gsm.getRandomGSuite(choices.search,choices.subCategory,source,dataTypes,\ filterFileSuffix = True,count = 10) elif choices.outputType == 'Select 50 random tracks': remoteGSuite = gsm.getRandomGSuite(choices.search,choices.subCategory,source,dataTypes,\ filterFileSuffix = True,count = 50) if choices.downloadAndPreprocess == 'Yes' and choices.source.find( 'HyperBrowser') == -1: trackCount = remoteGSuite.numTracks() progressViewer = \ ProgressViewer([('Download tracks', trackCount), ('Preprocess tracks', trackCount)], galaxyFn) #from gold.gsuite.GSuiteDownloader import GSuiteMultipleGalaxyFnDownloader #gSuiteDownloader = GSuiteMultipleGalaxyFnDownloader() #localGSuite, errorLocalGSuite = \ # gSuiteDownloader.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites\ # (remoteGSuite, progressViewer, cls.extraGalaxyFn) from gold.gsuite.GSuiteDownloader import GSuiteSingleGalaxyFnDownloader from quick.gsuite.GSuiteHbIntegration import \ writeGSuiteHiddenTrackStorageHtml gSuiteDownloader = GSuiteSingleGalaxyFnDownloader() hiddenStorageFn = cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'storage', desc)] localGSuite, errorLocalGSuite = \ gSuiteDownloader.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites \ (remoteGSuite, progressViewer, hiddenStorageFn, []) writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn) progressViewer.updateProgressObjectElementCount( 'Preprocess tracks', localGSuite.numTracks()) gSuitePreprocessor = GSuitePreprocessor() preProcessedGSuite, errorPreProcessGSuite = \ gSuitePreprocessor.visitAllGSuiteTracksAndReturnOutputAndErrorGSuites\ (localGSuite, progressViewer) #preProcessedGSuite, errorPreProcessGSuite = localGSuite.preProcessAllLocalTracksAndReturnOutputAndErrorGSuites(progressViewer) GSuiteComposer.composeToFile( remoteGSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName('remote', desc)]) GSuiteComposer.composeToFile( errorLocalGSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'nodownload', desc)]) GSuiteComposer.composeToFile( localGSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName('primary', desc)]) GSuiteComposer.composeToFile( errorPreProcessGSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'nopreprocessed', desc)]) GSuiteComposer.composeToFile( preProcessedGSuite, cls.extraGalaxyFn[getGSuiteHistoryOutputName( 'preprocessed', desc)]) else: GSuiteComposer.composeToFile(remoteGSuite, galaxyFn)
def __str__(self): import gold.gsuite.GSuiteComposer as GSuiteComposer return GSuiteComposer.composeToString(self)
class MultiTrackIntersectTool(GeneralGuiTool, GenomeMixin): GSUITE_FILE_OPTIONS_BOX_KEYS = ['gSuite'] ALLOW_UNKNOWN_GENOME = False ALLOW_GENOME_OVERRIDE = False ALLOW_MULTIPLE_GENOMES = False WHAT_GENOME_IS_USED_FOR = 'the output GSuite file' # Other common possibility: 'the analysis' FROM_HISTORY_TEXT = 'From history' FROM_HYPERBROWSER_TEXT = 'From HyperBrowser repository' WITH_OVERLAPS = 'Allow multiple overlapping points/segments within the same track' NO_OVERLAPS = 'Merge any overlapping points/segments within the same track' GSUITE_ALLOWED_FILE_FORMATS = [GSuiteConstants.PREPROCESSED] GSUITE_ALLOWED_LOCATIONS = [GSuiteConstants.LOCAL] GSUITE_ALLOWED_TRACK_TYPES = [GSuiteConstants.POINTS, GSuiteConstants.VALUED_POINTS, GSuiteConstants.SEGMENTS, GSuiteConstants.VALUED_SEGMENTS] GSUITE_DISALLOWED_GENOMES = [GSuiteConstants.UNKNOWN, GSuiteConstants.MULTIPLE] OUTPUT_TRACKS_SUFFIX = 'bed' GSUITE_OUTPUT_LOCATION = GSuiteConstants.LOCAL GSUITE_OUTPUT_FILE_FORMAT = GSuiteConstants.PREPROCESSED GSUITE_OUTPUT_TRACK_TYPE = GSuiteConstants.SEGMENTS OUTPUT_GSUITE_DESCRIPTION = ', intersected' PROGRESS_INTERSECT_MSG = 'Intersect tracks' PROGRESS_PREPROCESS_MSG = 'Preprocess tracks' @staticmethod def getToolName(): ''' Specifies a header of the tool, which is displayed at the top of the page. ''' return "Intersect preprocessed tracks in GSuite with a single track" @classmethod def getInputBoxNames(cls): ''' Specifies a list of headers for the input boxes, and implicitly also the number of input boxes to display on the page. The returned list can have two syntaxes: 1) A list of strings denoting the headers for the input boxes in numerical order. 2) A list of tuples of strings, where each tuple has two items: a header and a key. The contents of each input box must be defined by the function getOptionsBoxK, where K is either a number in the range of 1 to the number of boxes (case 1), or the specified key (case 2). Note: the key has to be camelCase (e.g. "firstKey") ''' return [('Select GSuite file from history:', 'gSuite')] +\ cls.getInputBoxNamesForGenomeSelection() +\ [('Select source of filtering track:', 'trackSource'), ('Select track from history:', 'trackHistory'), ('Select track:', 'track'), ('Overlap handling:', 'withOverlaps')] #@staticmethod #def getInputBoxOrder(): # ''' # Specifies the order in which the input boxes should be displayed, as a # list. The input boxes are specified by index (starting with 1) or by # key. If None, the order of the input boxes is in the order specified by # getInputBoxNames. # ''' # return None @classmethod def getOptionsBoxGSuite(cls): # Alternatively: getOptionsBox2() ''' See getOptionsBoxFirstKey(). prevChoices is a namedtuple of selections made by the user in the previous input boxes (that is, a namedtuple containing only one element in this case). The elements can accessed either by index, e.g. prevChoices[0] for the result of input box 1, or by key, e.g. prevChoices.key (case 2). ''' return cls.getHistorySelectionElement('gsuite') @classmethod def getOptionsBoxTrackSource(cls, prevChoices): return [cls.FROM_HISTORY_TEXT, cls.FROM_HYPERBROWSER_TEXT] @classmethod def getOptionsBoxTrackHistory(cls, prevChoices): if prevChoices.trackSource == cls.FROM_HISTORY_TEXT: from gold.application.DataTypes import getSupportedFileSuffixesForPointsAndSegments return cls.getHistorySelectionElement(*getSupportedFileSuffixesForPointsAndSegments()) @classmethod def getOptionsBoxTrack(cls, prevChoices): if prevChoices.trackSource == cls.FROM_HYPERBROWSER_TEXT: return cls.TRACK_SELECT_ELEMENT @classmethod def getOptionsBoxWithOverlaps(cls, prevChoices): if prevChoices.trackHistory or prevChoices.track: return [cls.NO_OVERLAPS, cls.WITH_OVERLAPS] #@staticmethod #def getInfoForOptionsBoxKey(prevChoices): # ''' # If not None, defines the string content of an clickable info box beside # the corresponding input box. HTML is allowed. # ''' # return None #@staticmethod #def getDemoSelections(): # return ['testChoice1','..'] @classmethod def getExtraHistElements(cls, choices): desc = cls.OUTPUT_GSUITE_DESCRIPTION return [HistElement(getGSuiteHistoryOutputName( 'nointersect', description=desc, datasetInfo=choices.gSuite), GSuiteConstants.GSUITE_SUFFIX), HistElement(getGSuiteHistoryOutputName( 'primary', description=desc, datasetInfo=choices.gSuite), GSuiteConstants.GSUITE_SUFFIX), HistElement(getGSuiteHistoryOutputName( 'nopreprocessed', description=desc, datasetInfo=choices.gSuite), GSuiteConstants.GSUITE_SUFFIX), HistElement(getGSuiteHistoryOutputName( 'preprocessed', description=desc, datasetInfo=choices.gSuite), GSuiteConstants.GSUITE_SUFFIX), HistElement(getGSuiteHistoryOutputName( 'storage', description=desc, datasetInfo=choices.gSuite), GSuiteConstants.GSUITE_STORAGE_SUFFIX, hidden=True)] @classmethod def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, HbGSuiteTrack from gold.origdata.TrackGenomeElementSource import TrackViewListGenomeElementSource from gold.origdata.FileFormatComposer import getComposerClsFromFileSuffix from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN from quick.application.ExternalTrackManager import ExternalTrackManager from quick.application.GalaxyInterface import GalaxyInterface from quick.application.UserBinSource import UserBinSource from quick.extra.TrackExtractor import TrackExtractor genome = choices.genome gSuite = getGSuiteFromGalaxyTN(choices.gSuite) if choices.withOverlaps == cls.NO_OVERLAPS: if choices.trackSource == cls.FROM_HISTORY_TEXT: filterTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.trackHistory) else: filterTrackName = choices.track.split(':') else: if choices.trackSource == cls.FROM_HISTORY_TEXT: regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices.trackHistory) binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices.trackHistory) else: regSpec = 'track' binSpec = choices.track userBinSource = UserBinSource(regSpec, binSpec, genome) desc = cls.OUTPUT_GSUITE_DESCRIPTION emptyFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('nointersect', description=desc, datasetInfo=choices.gSuite)] primaryFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('primary', description=desc, datasetInfo=choices.gSuite)] errorFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('nopreprocessed', description=desc, datasetInfo=choices.gSuite)] preprocessedFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('preprocessed', description=desc, datasetInfo=choices.gSuite)] hiddenStorageFn = cls.extraGalaxyFn \ [getGSuiteHistoryOutputName('storage', description=desc, datasetInfo=choices.gSuite)] analysisDef = '-> TrackIntersectionStat' # analysisDef = '-> TrackIntersectionWithValStat' numTracks = gSuite.numTracks() progressViewer = ProgressViewer([(cls.PROGRESS_INTERSECT_MSG, numTracks), (cls.PROGRESS_PREPROCESS_MSG, numTracks)], galaxyFn) emptyGSuite = GSuite() primaryGSuite = GSuite() for track in gSuite.allTracks(): newSuffix = cls.OUTPUT_TRACKS_SUFFIX extraFileName = os.path.sep.join(track.trackName) extraFileName = changeSuffixIfPresent(extraFileName, newSuffix=newSuffix) title = getTitleWithSuffixReplaced(track.title, newSuffix) primaryTrackUri = GalaxyGSuiteTrack.generateURI( galaxyFn=hiddenStorageFn, extraFileName=extraFileName, suffix=newSuffix if not extraFileName.endswith(newSuffix) else '') primaryTrack = GSuiteTrack(primaryTrackUri, title=title, genome=track.genome, attributes=track.attributes) if choices.withOverlaps == cls.NO_OVERLAPS: res = GalaxyInterface.runManual([track.trackName, filterTrackName], analysisDef, '*', '*', genome=genome, galaxyFn=galaxyFn, username=username) trackViewList = [res[key]['Result'] for key in sorted(res.keys())] tvGeSource = TrackViewListGenomeElementSource(genome, trackViewList) composerCls = getComposerClsFromFileSuffix(cls.OUTPUT_TRACKS_SUFFIX) composerCls(tvGeSource).composeToFile(primaryTrack.path) else: TrackExtractor.extractOneTrackManyRegsToOneFile( \ track.trackName, userBinSource, primaryTrack.path, fileFormatName=cls.OUTPUT_TRACKS_SUFFIX, \ globalCoords=True, asOriginal=False, allowOverlaps=True) # Temporary hack until better solution for empty result tracks have been implemented from gold.origdata.GenomeElementSource import GenomeElementSource geSource = GenomeElementSource(primaryTrack.path, genome=genome, suffix=cls.OUTPUT_TRACKS_SUFFIX) try: geSource.parseFirstDataLine() primaryGSuite.addTrack(primaryTrack) except Exception, e: # Most likely empty file primaryTrack.comment = e.message emptyGSuite.addTrack(primaryTrack) numTracks -= 1 progressViewer.updateProgressObjectElementCount( cls.PROGRESS_PREPROCESS_MSG, numTracks) # progressViewer.update() gSuitePreprocessor = GSuitePreprocessor() preprocessedGSuite, errorGSuite = gSuitePreprocessor.\ visitAllGSuiteTracksAndReturnOutputAndErrorGSuites \ (primaryGSuite, progressViewer) GSuiteComposer.composeToFile(emptyGSuite, emptyFn) GSuiteComposer.composeToFile(primaryGSuite, primaryFn) GSuiteComposer.composeToFile(preprocessedGSuite, preprocessedFn) GSuiteComposer.composeToFile(errorGSuite, errorFn) writeGSuiteHiddenTrackStorageHtml(hiddenStorageFn)
def printGSuite(cls, choices, cols, rows, colListString, outFile): #print cols from quick.extra.ProgressViewer import ProgressViewer from gold.gsuite.GSuite import GSuite from gold.gsuite.GSuiteTrack import GSuiteTrack, GalaxyGSuiteTrack import gold.gsuite.GSuiteComposer as GSuiteComposer from gold.origdata.GtrackGenomeElementSource import GtrackGenomeElementSource from gold.origdata.GtrackComposer import ExtendedGtrackComposer from gold.origdata.GESourceWrapper import ListGESourceWrapper from gold.origdata.GenomeElement import GenomeElement from collections import defaultdict from copy import copy from urllib import quote from unidecode import unidecode from pyliftover import LiftOver gSuite = GSuite() diseaseColIndex = cols.index(cls.DISEASE_COLUMN_NAME) chrColIndex = cols.index(cls.CHR_COLUMN_NAME) startColIndex = cols.index(cls.START_COLUMN_NAME) valColIndex = cols.index(cls.VAL_COLUMN_NAME) orderedExtraKeys = copy(cols) extraIndexes = range(len(cols)) for colName in [cls.DISEASE_COLUMN_NAME, cls.CHR_COLUMN_NAME, cls.START_COLUMN_NAME, cls.VAL_COLUMN_NAME]: extraIndexes.remove(cols.index(colName)) orderedExtraKeys.remove(colName) orderedExtraKeys = [cls._fixColNameForGTrack(key) for key in orderedExtraKeys] diseaseToRowsDict = defaultdict(list) for row in rows: disease = row[diseaseColIndex] if isinstance(disease, unicode): disease = unidecode(disease).replace('\x00', '') diseaseToRowsDict[disease].append(row) progressViewer = ProgressViewer([('Create GWAS tracks for diseases/traits', len(diseaseToRowsDict))], cls.extraGalaxyFn[cls.HISTORY_PROGRESS_TITLE] ) for disease in sorted(diseaseToRowsDict.keys()): uri = GalaxyGSuiteTrack.generateURI(galaxyFn=cls.extraGalaxyFn[cls.HISTORY_HIDDEN_TRACK_STORAGE], extraFileName=disease.replace('/', '_') + '.gtrack') gSuiteTrack = GSuiteTrack(uri, title=disease, genome=cls.OUTPUT_GENOME) gSuite.addTrack(gSuiteTrack) shouldLiftOver = cls.DATABASE_GENOME != cls.OUTPUT_GENOME if shouldLiftOver: liftOver = LiftOver(cls.DATABASE_GENOME, cls.OUTPUT_GENOME) geList = [] for row in diseaseToRowsDict[disease]: extra = {} for col, index in zip(orderedExtraKeys, extraIndexes): cell = row[index].strip() if isinstance(cell, unicode): cell = unidecode(cell) extra[col] = cell if cell != '' else '.' chrom = 'chr' + row[chrColIndex] if chrom == 'chr23': chrom = 'chrX' if chrom == 'chr24': chrom = 'chrY' if chrom == 'chrMT': chrom = 'chrM' start = int(row[startColIndex]) if shouldLiftOver: newPosList = liftOver.convert_coordinate(chrom, start) if newPosList is None or len(newPosList) != 1: print 'SNP with position %s on chromosome %s ' % (chrom, start) +\ 'could not be lifted over from reference genome ' +\ '%s to %s (for disease/trait "%s")' % \ (cls.DATABASE_GENOME, cls.OUTPUT_GENOME, disease) else: chrom, start = newPosList[0][0:2] #print extra geList.append(GenomeElement(chr=chrom, start=start, val=row[valColIndex], orderedExtraKeys=orderedExtraKeys, extra=extra)) geSource = GtrackGenomeElementSource(cls.GTRACK_BLUEPRINT_PATH) wrappedGeSource = ListGESourceWrapper(geSource, geList) composer = ExtendedGtrackComposer(wrappedGeSource) composer.composeToFile(gSuiteTrack.path) progressViewer.update() GSuiteComposer.composeToFile(gSuite, outFile)