def run_on_extracted_variables(cls, ts, analysisBins, choices_numberOfTimesToRandomize, choices_randAlg, choices_randType, galaxyFn, genome): assert choices_numberOfTimesToRandomize==1 #For now, since ts probably needs to be unique each time.. outputGSuite = GSuite() for i in range(0, int(choices_numberOfTimesToRandomize)): print("TF leaf nodes") print(ts.getLeafNodes()) randTvProvider = cls._createTrackViewProvider(ts, analysisBins, genome, choices_randAlg, choices_randType, False, None) # the last False and non are temporary.. randomizedTs = getRandomizedVersionOfTs(ts, randTvProvider) # output files print("Leaf nodes") print(randomizedTs.getLeafNodes()) for j, singleTrackTs in enumerate(randomizedTs.getLeafNodes()): uri = "outputfile" uri = FileGSuiteTrack.generateURI(path='/home/ivargry/outfile_' + str(randint(0,999999999)) + '_' + str(j) + ".bed", suffix='bed', doQuote=False) print("URI: " + uri) title = singleTrackTs.metadata.pop('title') gSuiteTrack = FileGSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path singleTrackTs.metadata['randomization_run'] = i spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, analysisBins, randomizedTs) assert galaxyFn != "" GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def trackOverlapValuesCorrelation(cls, analysisBins, gSuite): """ Represent each track as a vector with values at positions that are present in both tracks """ corrDict = cls.createDistDict([cls.CORR_PEARSON, cls.CORR_SPEARMAN]) analysisSpec = AnalysisSpec(OverlappingValsListStat) gSuiteSize = gSuite.numTracks() labels = [] for i in range(0, gSuiteSize): gSuiteTrack = gSuite.getTrackFromIndex(i) labels.append(gSuiteTrack.title) for j in range(i + 1, gSuiteSize): gSuiteTrack2 = gSuite.getTrackFromIndex(j) track1 = Track(gSuiteTrack.trackName) track2 = Track(gSuiteTrack2.trackName) track1List = [] track2List = [] result = doAnalysis(analysisSpec, analysisBins, [track1, track2]).getGlobalResult() if 'X' in result and 'Y' in result: track1List = result['X'] track2List = result['Y'] cls.updateCorrDict(corrDict, track1List, track2List) return corrDict, labels
def __init__(self, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' analysisDefString = REPLACE_TEMPLATES[ '$MCFDRv3$'] + ' -> CollectionBinnedHypothesisWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', choices.mcfdrDepth) analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack_') if choices.question == "question 8": analysisSpec.addParameter('rawStatistic', 'MultitrackRawBinnedOverlapV2Stat') else: analysisSpec.addParameter('rawStatistic', 'MultitrackRawSingleBinV2Stat') # analysisSpec.addParameter('pairwiseStatistic', choices.stat) analysisSpec.addParameter('summaryFunc', choices.summaryFunc) analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('localBinSize', choices.binSize) analysisSpec.addParameter('question', choices.question) analysisBins = GlobalBinSource(choices.genome) gsuite = getGSuiteFromGalaxyTN(choices.tracks) tracks = [Track(x.trackName) for x in gsuite.allTracks()] tracks = tracks[0:2] results = doAnalysis(analysisSpec, analysisBins, tracks) print results
def getGSuiteRipleysKData(self, bpWindow=1000, analysisBins=None): resDict = OrderedDict() ripleysK = AnalysisSpec(RipleysKStat) ripleysK.addParameter('bpWindow', str(bpWindow)) for track in self._gSuite.allTracks(): ripleysKResults = doAnalysis(ripleysK, analysisBins, [Track(track.trackName)]) resDict[track.title] = ripleysKResults.getGlobalResult()['Result'] return resDict
def handleSameTrack(cls, trackName, regSpec, binSpec, genome, galaxyFn): analysisSpec = AnalysisSpec(RawOverlapToSelfStat) analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, genome) return doAnalysis(analysisSpec, analysisBins, [Track(trackName)]).getGlobalResult()
def execute(cls, choices, galaxyFn=None, username=''): """ Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than 'html', the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (cls, e.g. generated image files). choices is a list of selections made by web-user in each options box. Mandatory unless isRedirectTool() returns True. """ #TODO: add functionality for single gtrack within-track randomization print 'Executing...' inputGsuite = getGSuiteFromGalaxyTN(choices.gs) outputGSuite = GSuite() genome = inputGsuite.genome ts = factory.getFlatTracksTS(genome, choices.gs) randIndex = 0 bins = GlobalBinSource(genome) if choices.randType == TsRandAlgReg.BETWEEN_TRACKS_CATEGORY and \ choices.category not in [None, 'None']: ts = ts.getSplittedByCategoryTS(choices.category) randomizedTs = TrackStructureV2() for subTsKey, subTs in ts.items(): tvProvider = cls.createTrackViewProvider( choices, subTs, bins, genome) randomizedTs[subTsKey] = getRandomizedVersionOfTs( subTs, tvProvider, randIndex) randomizedTs = randomizedTs.getFlattenedTS() else: tvProvider = cls.createTrackViewProvider(choices, ts, bins, genome) randomizedTs = getRandomizedVersionOfTs(ts, tvProvider, randIndex) for singleTrackTs in randomizedTs.getLeafNodes(): uri = GalaxyGSuiteTrack.generateURI( galaxyFn=galaxyFn, extraFileName=os.path.sep.join(singleTrackTs.track.trackName) + '.randomized', suffix='bed') title = singleTrackTs.metadata.pop('title') gSuiteTrack = GSuiteTrack(uri, title=title + '.randomized', fileFormat='primary', trackType='segments', genome=genome, attributes=singleTrackTs.metadata) outputGSuite.addTrack(gSuiteTrack) singleTrackTs.metadata['trackFilePath'] = gSuiteTrack.path spec = AnalysisSpec(TsWriterStat) res = doAnalysis(spec, bins, randomizedTs) GSuiteComposer.composeToFile(outputGSuite, galaxyFn)
def _runMultipleSingleValStatsCommon(trackStructure, stats, analysisBins, stat): assert stats is not None, 'stats argument not defined' assert type(stats) in [str, list], '''stats argument must be a list of statistics or ^-separated string of statistic names''' additionalAnalysisSpec = AnalysisSpec(stat) statsParam = stats if isinstance(stats, basestring) else "^".join([x.__name__ for x in stats]) additionalAnalysisSpec.addParameter('rawStatistics', statsParam) # use ^ separator to add additional stat classes. return doAnalysis(additionalAnalysisSpec, analysisBins, trackStructure).getGlobalResult()["Result"]
def execute(cls, choices, galaxyFn=None, username=''): # DebugUtil.insertBreakPoint() cls._setDebugModeIfSelected(choices) choices_queryTrack = choices.queryTrack choices_gsuite = choices.gsuite genome = choices.genome # queryTrackNameAsList = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.queryTrack, # printErrors=False, # printProgress=False) # gsuite = getGSuiteFromGalaxyTN(choices.gsuite) analysisBins = UserBinMixin.getUserBinSource(choices) import quick.gsuite.GuiBasedTsFactory as factory queryTS = factory.getSingleTrackTS(genome, choices_queryTrack) refTS = factory.getFlatTracksTS(genome, choices_gsuite) ts = TrackStructureV2() realTS = TrackStructureV2() realTS["query"] = queryTS realTS["reference"] = refTS randQueryTS = queryTS randTvProvider = cls.createTrackViewProvider(choices, refTS, analysisBins, genome) localAnalysis = randTvProvider.supportsLocalAnalysis() randRefTS = getRandomizedVersionOfTs(refTS, randTvProvider) hypothesisKeyList = [sts.metadata["title"] for sts in randRefTS.values()] for hypothesisKey in hypothesisKeyList: realTS = TrackStructureV2() realTS["query"] = queryTS realTS["reference"] = refTS[hypothesisKey] randTS = TrackStructureV2() randTS["query"] = randQueryTS randTS["reference"] = randRefTS[hypothesisKey] hypothesisTS = TrackStructureV2() hypothesisTS["real"] = realTS hypothesisTS["rand"] = randTS ts[hypothesisKey] = hypothesisTS analysisSpec = cls._prepareAnalysisWithHypothesisTests(choices, localAnalysis) if DebugConfig.USE_PROFILING: from gold.util.Profiler import Profiler profiler = Profiler() resDict = {} profiler.run('resDict[0] = doAnalysis(analysisSpec, analysisBins, ts)', globals(), locals()) res = resDict[0] result = res.getGlobalResult()['Result'] profiler.printStats() if DebugConfig.USE_CALLGRAPH and galaxyFn: profiler.printLinkToCallGraph(['profile_AnalysisDefJob'], galaxyFn) else: result = doAnalysis(analysisSpec, analysisBins, ts).getGlobalResult()["Result"] for trackTitle, res in result.iteritems(): print '{}: {}<br>'.format(trackTitle, repr(res.getResult()))
def printDiseaseRsIDs(cls, analysisBins, gSuite, core): analysisSpec = AnalysisSpec(UniquePointTrackStat) for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) result = doAnalysis(analysisSpec, analysisBins, [track]) core.smallHeader('<br><br><br>' + gSuiteTrack.title + '<br><br><br>') if 'Result' in result.getGlobalResult(): observations = result.getGlobalResult()['Result'] for snp in observations: core.line(snp)
def getDistancesWithin(cls, analysisBins, gSuite, galaxyFn, htmlCore): """ Finds all distances between points on the same tracks, for all tracks in a GSuite """ distances = [] analysisSpec = AnalysisSpec(PointGapsStat) for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) result = doAnalysis(analysisSpec, analysisBins, [track]) cls.addDistances(distances, result.getGlobalResult()['Result']) cls.printStats(distances, 'distance', htmlCore) cls.plotDistances(distances, galaxyFn, 'within', htmlCore)
def printUniqueRsIDs(cls, analysisBins, gSuite, core): rsIDs = set() analysisSpec = AnalysisSpec(UniquePointTrackStat) for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) result = doAnalysis(analysisSpec, analysisBins, [track]) if 'Result' in result.getGlobalResult(): observations = result.getGlobalResult()['Result'] rsIDs.update(observations) core.smallHeader('Printing snps from GSuite of ' + str(gSuite.numTracks()) + ' tracks<br>') for rsid in rsIDs: core.line(rsid)
def run(self): tracks = [t.trackName for t in self._gsuite.allTracks()] trackTitles = self._gsuite.allTrackTitles() results = OrderedDict() analysisSpec = AnalysisSpec(SummarizedInteractionWithOtherTracksStat) analysisSpec.addParameter('rawStatistic', self._rawStatistic) analysisSpec.addParameter('summaryFunc', self._summaryFunction) analysisSpec.addParameter('reverse', self._reversed) for t1Title, t1 in zip(trackTitles, tracks): for t2Title, t2 in zip(trackTitles, tracks): if t1Title != t2Title: result = doAnalysis(analysisSpec, self._analysisBins, [Track(t1), Track(t2)]) resultDict = result.getGlobalResult() # if 'Result' in resultDict: results[(t1Title, t2Title)] = resultDict['Result']
def execute(cls, choices, galaxyFn=None, username=''): import time start = time.clock() # HTML settings from gold.result.HtmlCore import HtmlCore htmlCore = HtmlCore() htmlCore.divBegin(style=cls.HTML_STYLE) # Set debug environment cls._setDebugModeIfSelected(choices) # Print tool information cls.htmlClusterTitle(cls.getToolName(), htmlCore) cls.htmlClusterSubtext(choices.distanceMeasure, cls.CLUSTER_LIST, choices.linkageCriterion, htmlCore) htmlCore.line('Threshold of r<sup>2</sup>: ' + choices.rSquare) # Analysis environment gSuite = getGSuiteFromGalaxyTN(choices.gSuite) analysisBins = GlobalBinSource(gSuite.genome) analysisSpec = AnalysisSpec(ExpandTrackAndMatchStat) splitName = choices.ldTrack.split(":") trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(gSuite.genome, splitName) linkedPointTrack = Track(trackName) # Find distance/correlation matrix labels = [] distDict = cls.createDistDict(cls.CLUSTER_LIST) size = gSuite.numTracks() for i in range(0, size): gSuiteTrack1 = gSuite.getTrackFromIndex(i) labels.append(gSuiteTrack1.title) for j in range(i + 1, size): gSuiteTrack2 = gSuite.getTrackFromIndex(j) track1 = Track(gSuiteTrack1.trackName) track2 = Track(gSuiteTrack2.trackName) count = doAnalysis(analysisSpec, analysisBins, [track1, track2, linkedPointTrack]).getGlobalResult() cls.updateDistDict(distDict, count) # Cluster and print plots cls.printDistPlots(distDict, labels, choices.distanceMeasure, choices.linkageCriterion, galaxyFn, htmlCore) cls.htmlClusterTime(str(time.clock() - start), htmlCore) htmlCore.divEnd() print htmlCore
def runMultipleSingleValStatsOnTracks(gsuite, stats, analysisBins, queryTrack=None): ''' gsuite: The gsuite of tracks stats: List of statistics analysisBins: BinSource object queryTrack: should be defined if there are stats that need to run on two tracks (e.g. overlap) Returns an OrderedDict: Track title -> OrderedDict: Stat name -> single value''' assert stats is not None, 'stats argument not defined' assert type(stats) in [str, list ], '''stats argument must be a list of statistics or ^-separated string of statistic names''' resultsDict = OrderedDict() from quick.statistic.GenericResultsCombinerStat import GenericResultsCombinerStat additionalAnalysisSpec = AnalysisSpec(GenericResultsCombinerStat) statsParam = stats if isinstance(stats, basestring) else "^".join( [x.__name__ for x in stats]) additionalAnalysisSpec.addParameter( 'rawStatistics', statsParam) #use ^ separator to add additional stat classes. for refTrack in gsuite.allTracks(): if refTrack.title not in resultsDict: resultsDict[refTrack.title] = OrderedDict() tracks = [Track(refTrack.trackName), queryTrack ] if queryTrack else [Track(refTrack.trackName)] additionalResult = doAnalysis(additionalAnalysisSpec, analysisBins, tracks).getGlobalResult() for statClassName, res in additionalResult.iteritems(): statPrettyName = CommonConstants.STATISTIC_CLASS_NAME_TO_NATURAL_NAME_DICT[ statClassName] if statClassName in CommonConstants.STATISTIC_CLASS_NAME_TO_NATURAL_NAME_DICT else statClassName resultsDict[refTrack.title][statPrettyName] = res return resultsDict
def microBinDistance(cls, gSuite, analysisBins, choices): """ Each bin represents a feature. """ bins = [] labels = [] size = gSuite.numTracks() distDict = cls.createDistDict(cls.CORR_DISTLIST) analysisSpec = AnalysisSpec(PointCountPerMicroBinV2Stat) analysisSpec.addParameter('microBin', int(choices.microBin)) # Get bins: for gSuiteTrack in gSuite.allTracks(): labels.append(gSuiteTrack.title) track = [Track(gSuiteTrack.trackName)] res = doAnalysis(analysisSpec, analysisBins, track).getGlobalResult() if 'Result' in res: result = res['Result'] bins.append(result) for i in range(0, size): for j in range(i + 1, size): bin1 = bins[i] bin2 = bins[j] count = {'a': 0, 'b': 0, 'c': 0, 'd': 0} # Create numpy masks nonzero_intersect = nonzero((bin1 != 0) & (bin2 != 0)) nonzero_snps1 = nonzero(bin1) nonzero_snps2 = nonzero(bin2) # Use masks to find a, b, c, d count['a'] = len(bin1[nonzero_intersect]) count['b'] = len(bin1[nonzero_snps1]) - count['a'] count['c'] = len(bin2[nonzero_snps2]) - count['a'] count['d'] = len(bin1) - count['a'] - count['b'] - count['c'] cls.updateDistDict(distDict, count) return distDict, labels
def getSNPFrequencyStats(cls, bins, gSuite, galaxyFn, htmlCore): rsIDs = set() snpCount = [] analysisSpec = AnalysisSpec(UniquePointTrackStat) trackLabels = [] for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) trackLabels.append(gSuiteTrack.title) result = doAnalysis(analysisSpec, bins, [track]) if 'Result' in result.getGlobalResult(): observations = result.getGlobalResult()['Result'] snpCount.append(len(observations)) rsIDs.update(observations) snpcountFile = GalaxyRunSpecificFile(['snpfrequencies.pdf'], galaxyFn) MatplotlibPlots.pointGraphY(snpCount, snpcountFile, ylabel='SNP counts', xticks=trackLabels) snpdistributionFile = GalaxyRunSpecificFile(['snpfreqhistogram.pdf'], galaxyFn) MatplotlibPlots.histogramRugPlot(snpCount, 10, snpdistributionFile, 'SNP counts') totalSNPCount = sum(snpCount) cls.printStats(snpCount, 'track', htmlCore) htmlCore.line('Total number of SNPs: ' + str(totalSNPCount)) htmlCore.line('Unique SNPs: ' + str(len(rsIDs))) htmlCore.line('Overlapping rsIDs: ' + str(totalSNPCount - len(rsIDs))) htmlCore.divider(True) htmlCore.header('Graph of SNP frequencies in GSuite tracks') htmlCore.line(snpcountFile.getEmbeddedImage()) htmlCore.link('PDF of SNP frequency graph', snpcountFile.getURL()) htmlCore.divider(True) htmlCore.header('Histogram of SNP frequencies in GSuite tracks') htmlCore.line(snpdistributionFile.getEmbeddedImage()) htmlCore.link('PDF of SNP frequency histogram', snpdistributionFile.getURL()) cls.getInteractiveColumnChartWithLabels(snpCount, trackLabels, htmlCore)
def execute(cls, choices, galaxyFn=None, username=''): from gold.description.AnalysisDefHandler import AnalysisSpec from quick.statistic.UniquePointTrackStat import UniquePointTrackStat from quick.application.UserBinSource import GlobalBinSource cls._setDebugModeIfSelected(choices) # Analysis environment gSuite = getGSuiteFromGalaxyTN(choices.gSuite) analysisBins = GlobalBinSource(gSuite.genome) analysisSpec = AnalysisSpec(UniquePointTrackStat) # Get unique rsids for whole GSuite rsids = set() for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) result = doAnalysis(analysisSpec, analysisBins, [track]).getGlobalResult() if 'Result' in result: rsids.update(result['Result']) # Create linked point track cls.createLinkedPointTrack(rsids, str(choices.isUndirected), galaxyFn, float(choices.rsquare))
def directVectorDistance(cls, gSuite, analysisBins): """ Each base pair represents its own feature. """ analysisSpec = AnalysisSpec(DistanceMetricsFoundationStat) labels = [] size = gSuite.numTracks() distDict = cls.createDistDict(cls.CORR_DISTLIST) for i in range(0, size): gSuiteTrack1 = gSuite.getTrackFromIndex(i) labels.append(gSuiteTrack1.title) for j in range(i + 1, size): gSuiteTrack2 = gSuite.getTrackFromIndex(j) track1 = Track(gSuiteTrack1.trackName) track2 = Track(gSuiteTrack2.trackName) count = doAnalysis(analysisSpec, analysisBins, [track1, track2]).getGlobalResult() cls.updateDistDict(distDict, count) return distDict, labels
def getOverlapMatrix(cls, analysisBins, analysisSpec, gSuite): overlapMatrix = [] labels = [] i = 0 for gSuiteTrack in gSuite.allTracks(): labels.append(gSuiteTrack.title) overlapMatrix.append([]) track = Track(gSuiteTrack.trackName) for gSuiteTrack2 in gSuite.allTracks(): if gSuiteTrack == gSuiteTrack2: overlapMatrix[i].append('.') # This value must be removed before correlating the values. continue track2 = Track(gSuiteTrack2.trackName) result = doAnalysis(analysisSpec, analysisBins, [track, track2]).getGlobalResult() overlapMatrix[i].append(result['Result']) i += 1 return overlapMatrix, labels
def generateTracksAndLabels(cls, gSuite, analysisBins): """ Used in bipartite matching along with rSquare graph. For each track, a list of its rsids is generated. A list of these lists, along with a list of track labels, is returned. """ from gold.application.HBAPI import doAnalysis from gold.description.AnalysisDefHandler import AnalysisSpec from quick.statistic.UniquePointTrackStat import UniquePointTrackStat from gold.track.Track import Track analysisSpec = AnalysisSpec(UniquePointTrackStat) tracks = [] labels = [] for gSuiteTrack in gSuite.allTracks(): track = Track(gSuiteTrack.trackName) result = doAnalysis(analysisSpec, analysisBins, [track]).getGlobalResult() if 'Result' in result: tracks.append(result['Result']) labels.append(gSuiteTrack.title) return tracks, labels
def getDistancesBetween(cls, analysisBins, gSuite, galaxyFn, htmlCore): """ Computes distances for one pair of track in a GSuite at a time. Finds the smallest distance to a point in the other track for all points in each track. These distances might be asymmetric, i.e. smallest distance between point in track1 -> closest point in track2 could be different from smallest distance between the same point in track2 -> closest point in track1 """ distances = [] analysisSpec = AnalysisSpec(NearestPointDistsStat) allTracks = gSuite.allTracks() for gSuiteTrack in allTracks: track = Track(gSuiteTrack.trackName) for gSuiteTrack2 in allTracks: if gSuiteTrack.trackName != gSuiteTrack2.trackName: track2 = Track(gSuiteTrack2.trackName) tracks = [track, track2] result = doAnalysis(analysisSpec, analysisBins, tracks) cls.addDistances(distances, result.getGlobalResult()['Result']) cls.printStats(distances, 'distance', htmlCore) htmlCore.line('Asymmetries related to alternating shortest distance: ' + str(cls.countAsymmetries(distances))) cls.plotDistances(distances, galaxyFn, 'between', htmlCore)
def execute(cls, choices, galaxyFn=None, username=''): cls._setDebugModeIfSelected(choices) genome = choices.genome genomicRegions = choices.genomicRegions genomicRegionsTracks = choices.genomicRegionsTracks sourceTfs = choices.sourceTfs sourceTfsDetails = choices.sourceTfsDetails tfTracks = choices.tfTracks # Get Genomic Region track name: if genomicRegions == cls.REGIONS_FROM_HISTORY: galaxyTN = genomicRegionsTracks.split(':') genElementTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN( genome, galaxyTN) #queryGSuite = getGSuiteFromGalaxyTN(genomicRegionsTracks) #queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()] elif genomicRegions == 'Hyperbrowser repository': selectedGenRegTrack = TfbsTrackNameMappings.getTfbsTrackNameMappings( genome)[genomicRegionsTracks] if isinstance(selectedGenRegTrack, dict): genElementTrackName = selectedGenRegTrack.values() else: genElementTrackName = selectedGenRegTrack elif genomicRegions == 'Hyperbrowser repository (cell-type-specific)': genElementTrackName = ['Private', 'Antonio' ] + genomicRegionsTracks.split(':') else: return # Get TF track names: if isinstance(tfTracks, dict): selectedTfTracks = [ key for key, val in tfTracks.iteritems() if val == 'True' ] else: selectedTfTracks = [tfTracks] queryTrackTitle = '--'.join(genElementTrackName) trackTitles = [queryTrackTitle] tracks = [Track(genElementTrackName, trackTitle=queryTrackTitle)] for i in selectedTfTracks: if sourceTfs == 'Hyperbrowser repository': tfTrackName = TfTrackNameMappings.getTfTrackNameMappings( genome)[sourceTfsDetails] + [i] tracks.append( Track(tfTrackName, trackTitle=tfTrackName[len(tfTrackName) - 1])) trackTitles.append(tfTrackName[len(tfTrackName) - 1]) else: tfTrackName = i.split(':') queryGSuite = getGSuiteFromGalaxyTN(sourceTfsDetails) for x in queryGSuite.allTracks(): selectedTrackNames = (':'.join(x.trackName)) if i == selectedTrackNames: tracks.append(Track(x.trackName, x.title)) trackTitles.append(x.trackName[-1]) # queryGSuite = getGSuiteFromGalaxyTN(sourceTfsDetails) # tfTrackName = [x.trackName for x in queryGSuite.allTracks()] + [i] # tracks += [Track(x.trackName, x.title) for x in queryGSuite.allTracks()] # trackTitles += tfTrackName # print tfTrackName # print tracks # print trackTitles trackTitlesForStat = trackTitles trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(trackTitles) ##first statistic for Q2 resultsForStatistics = OrderedDict() similarityFunc = [ #GSuiteStatUtils.T7_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP, GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP ] for similarityStatClassName in similarityFunc: regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) mcfdrDepth = AnalysisDefHandler( REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] analysisDefString = REPLACE_TEMPLATES[ '$MCFDR$'] + ' -> GSuiteSimilarityToQueryTrackRankingsAndPValuesWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack_') analysisSpec.addParameter( 'rawStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]) analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName] ) #needed for call of non randomized stat for assertion analysisSpec.addParameter('tail', 'more') analysisSpec.addParameter('trackTitles', trackTitles) #that need to be string analysisSpec.addParameter('queryTracksNum', str(len(tracks))) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() if not similarityStatClassName in resultsForStatistics: resultsForStatistics[similarityStatClassName] = {} resultsForStatistics[similarityStatClassName] = results keyTitle = [ #'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure)', 'Ratio of observed to expected overlap (Forbes similarity measure)' ] # 'Normalized Forbes coefficient: ratio of observed to expected overlap normalized in relation to the reference GSuite', # 'Forbes coefficient: ratio of observed to expected overlap' keyTitle = [ #GSuiteStatUtils.T7_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP, GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP ] resultDict = AllTfsOfRegions.countStatistics(similarityFunc, choices, genome, tracks, trackTitlesForStat) resultDictShow = AllTfsOfRegions.countStatisticResults( resultDict, keyTitle, trackTitlesForStat) # print resultsForStatistics '''selectedTrackNames = [] if sourceTfs == 'History (user-defined)': if selectedTfTracks.split(":")[1] == "gsuite": gSuite = getGSuiteFromGalaxyTN(selectedTfTracks) for track in gSuite.allTracks(): selectedTrackNames.append(track.trackName) else: galaxyTN = selectedTfTracks.split(':') gRegTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, galaxyTN) selectedTrackNames.append(gRegTrackName) else:''' tfNameList = [] #Intersection between TF Tracks and selected region (Table 1): n = 0 allTargetBins = [] alltfNames = [] table1 = [] for i in selectedTfTracks: n = n + 1 #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat" if sourceTfs == 'Hyperbrowser repository': tfTrackName = TfTrackNameMappings.getTfTrackNameMappings( genome)[sourceTfsDetails] + [i] else: tfTrackName = i.split(':') tfTrackName.pop(0) #tfIntersection.expandReferenceTrack(upFlankSize, downFlankSize) tfIntersection = TrackIntersection(genome, genElementTrackName, tfTrackName, galaxyFn, str(n)) regFileNamer = tfIntersection.getIntersectedRegionsStaticFileWithContent( ) targetBins = tfIntersection.getIntersectedReferenceBins() #regSpec, targetBins = UserBinSelector.getRegsAndBinsSpec(choices) tfHits = [i] * len(targetBins) fixedTargetBins = [str(a).split(" ")[0] for a in targetBins] extendedTargetBins = [ list(a) for a in zip(fixedTargetBins, tfHits) ] allTargetBins = allTargetBins + extendedTargetBins tfName = i alltfNames = alltfNames + [tfName] # Save output table: tfNameList.append(tfName) line = [tfName] + [len(targetBins)] + [ regFileNamer.getLink('Download bed-file') ] + [ regFileNamer.getLoadToHistoryLink('Send bed-file to History') ] table1 = table1 + [line] # Computing totals: fullCase = ','.join(alltfNames) firstColumn = [item[0] for item in allTargetBins] uniqueAllTargetBins = list(set(firstColumn)) # Group TFs by bound region: d1 = defaultdict(list) for k, v in allTargetBins: d1[k].append(v) allTFTargetBins = dict((k, ','.join(v)) for k, v in d1.iteritems()) allTFTargetList = [] fullCaseTFTargetList = [] for key, value in allTFTargetBins.iteritems(): allTFTargetList = allTFTargetList + [[key, value]] if value == fullCase: fullCaseTFTargetList = fullCaseTFTargetList + [[key, value]] analysis3 = TrackIntersection.getFileFromTargetBins( allTFTargetList, galaxyFn, str(3)) analysis4 = TrackIntersection.getFileFromTargetBins( fullCaseTFTargetList, galaxyFn, str(4)) # Print output to table: title = 'TF targets and co-occupancy of ' + genElementTrackName[ -1] + ' genomic regions' htmlCore = HtmlCore() pf = plotFunction(tableId='resultsTable') htmlCore.begin() htmlCore.header(title) htmlCore.divBegin('resultsDiv') htmlCore.line(pf.createButton(bText='Show/Hide more results')) # htmlCore.tableHeader(['Transcription Factor', 'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure) -- Similarity to genomic regions track', 'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure) -- p-value','Ratio of observed to expected overlap (Forbes similarity measure) -- Similarity to genomic regions track', 'Ratio of observed to expected overlap (Forbes similarity measure) -- p-value', 'Number of TF-Target Track Regions', 'File of TF Target Regions', 'File of TF Target Regions', 'Number of TF-co-occupied Regions', 'File of TF co-occupied Regions', 'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'], sortable=True, tableId='resultsTable') #previous ordering # htmlCore.tableHeader(['Transcription Factor', 'Normalized Forbes index --overlap score', # 'Normalized Forbes index --p-value', # 'Forbes index --overlap score', 'Forbes index --p-value', # 'Number of TF-Target Track Regions', 'File of TF Target Regions', # 'File of TF Target Regions', 'Number of target track regions occupied by this TF', # 'File of TF co-occupied Regions', 'File of TF co-occupied Regions', # 'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'], # sortable=True, tableId='resultsTable') htmlCore.tableHeader( [ 'Transcription Factor', 'Number of TF-Target Track Regions', 'File of TF Track Regions', 'Number of target track regions occupied by this TF', 'File of TF Target Regions', 'Forbes index --overlap score', 'Forbes index --p-value', #'Normalized Forbes index --overlap score', 'Normalized Forbes index --p-value', 'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs' ], sortable=True, tableId='resultsTable') # Adding co-occupancy results to table: n = 1000 genRegionNumElements = [ int(x) for x in getTrackRelevantInfo.getNumberElements( genome, genElementTrackName) ] for key0, it0 in resultsForStatistics.iteritems(): for el in tfNameList: if el not in it0: resultsForStatistics[key0][el] = [None, None] resultsPlotDict = {} resultPlotCat = [] resultsPlot = [] resultsForStatisticsProper = {} for key0, it0 in resultsForStatistics.iteritems(): if not key0 in resultsPlotDict: resultsPlotDict[key0] = {} resultsPlotPart = [] for key1, it1 in it0.iteritems(): resultsPlotPart.append(it1[0]) if not key1 in resultsForStatisticsProper: resultsForStatisticsProper[key1] = [] if not key1 in resultsPlotDict[key0]: resultsPlotDict[key0][key1] = None for el in it1: resultsForStatisticsProper[key1].append(el) resultsPlotDict[key0][key1] = it1[0] resultPlotCat.append(tfNameList) resultPlotCat.append(tfNameList) #resultPlotCatPart = tfNameList # print resultPlotCatPart for key0, it0 in resultsPlotDict.iteritems(): resultsPlotPart = [] for el in tfNameList: if el in it0: resultsPlotPart.append(it0[el]) else: resultsPlotPart.append(None) resultsPlot.append(resultsPlotPart) for i in table1: thisCaseTFTargetList = [] for key, value in allTFTargetList: if i[0] in value and ',' in value: thisCaseTFTargetList = thisCaseTFTargetList + [[ key, value ]] n = n + 1 thisAnalysis = TrackIntersection.getFileFromTargetBins( thisCaseTFTargetList, galaxyFn, str(n)) thisCaseCoCountsList = [] thing = [x[1] for x in thisCaseTFTargetList] for k in list(set(thing)): thisCount = thing.count(k) thisCaseCoCountsList = thisCaseCoCountsList + \ [[k, thisCount, 100*float(thisCount)/float(sum(genRegionNumElements)), 100*float(thisCount)/float(len(thisCaseTFTargetList))]] thisCaseCoCountsList.sort(key=lambda x: x[2], reverse=True) n = n + 1 thisCoCountsAnalysis = TrackIntersection.getOccupancySummaryFile( thisCaseCoCountsList, galaxyFn, str(n)) thisLine = [len(thisCaseTFTargetList)] + \ [thisAnalysis.getLink('Download file')] + [thisAnalysis.getLoadToHistoryLink('Send file to History')] + \ [thisCoCountsAnalysis.getLink('Download file')] + [thisCoCountsAnalysis.getLoadToHistoryLink('Send file to History')] newLineI = [] tfName = i[0] newLineI.append(tfName) for el in resultsForStatisticsProper[tfName]: newLineI.append(el) for elN in range(1, len(i)): newLineI.append(i[elN]) # htmlCore.tableLine(i + thisLine) # htmlCore.tableHeader(['Transcription Factor', 'Normalized Forbes index --overlap score', # 'Normalized Forbes index --p-value', # 'Forbes index --overlap score', 'Forbes index --p-value', # 'Number of TF-Target Track Regions', 'File of TF Target Regions', # 'File of TF Target Regions', 'Number of target track regions occupied by this TF', # 'File of TF co-occupied Regions', 'File of TF co-occupied Regions', # 'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'], # sortable=True, tableId='resultsTable') # htmlCore.tableHeader(['Transcription Factor', 'Number of TF-Target Track Regions', 'File of TF Track Regions', # 'Number of target track regions occupied by this TF', 'File of TF Target Regions', # 'Forbes index --overlap score', 'Forbes index --p-value', # 'Normalized Forbes index --overlap score', 'Normalized Forbes index --p-value', # 'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs'], # sortable=True, tableId='resultsTable') tl = newLineI + thisLine # previous ordering tl - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 # actual ordering - 0, 5, 7, 8, 7, 3, 4, 1, 2, 9, 11 #ordering = [0, 5, 7, 8, 10, 3, 4, 1, 2, 10, 12] ordering = [0, 3, 5, 6, 8, 1, 2, 8, 10] #1, 2, => delete eoList = [] for eo in ordering: eoList.append(tl[eo]) htmlCore.tableLine(eoList) totalCoOccupancyTargetList = [] n = 2000 for key, value in allTFTargetList: n = n + 1 if ',' in value: totalCoOccupancyTargetList = totalCoOccupancyTargetList + [[ key, value ]] #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat" totalCoOccupancyAnalysis = TrackIntersection.getFileFromTargetBins( totalCoOccupancyTargetList, galaxyFn, str(n)) #line = ['Total reported regions'] + [len(allTargetBins)] + [''] + [''] + [''] + [''] + [''] #line = ['Full co-occupancy of ' + fullCase] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + [len(fullCaseTFTargetList)] + [analysis4.getLink('Download file')] + [analysis4.getLoadToHistoryLink('Send file to History')] + ['-'] + ['-'] line = ['Full co-occupancy of ' + fullCase] + \ ['-'] + \ ['-'] + \ [len(fullCaseTFTargetList)] + \ ['-'] + \ ['-'] + \ ['-'] + \ [analysis4.getLoadToHistoryLink('Send file to History')] + \ ['-'] htmlCore.tableLine(line) #line = ['Total unique regions'] + ['-'] + ['-'] + ['-'] + ['-'] + [len(allTFTargetList)] + [analysis3.getLink('Download bed-file')] + [analysis3.getLoadToHistoryLink('Send bed-file to History')] + [len(totalCoOccupancyTargetList)] + [totalCoOccupancyAnalysis.getLink('Download file')] + [totalCoOccupancyAnalysis.getLoadToHistoryLink('Send file to History')] + ['-'] + ['-'] line = ['Total unique regions'] + \ [len(allTFTargetList)] + \ ['-'] + \ [len(totalCoOccupancyTargetList)] + \ [analysis3.getLoadToHistoryLink('Send bed-file to History')] + \ ['-'] +\ ['-'] + \ [totalCoOccupancyAnalysis.getLoadToHistoryLink('Send file to History')] + \ ['-'] htmlCore.tableLine(line) htmlCore.tableFooter() htmlCore.divEnd() # htmlCore.line(pf.hideColumns(indexList=[2, 4])) # sumRes = 0 for r in resultsPlot[0]: if r != None: sumRes += r if sumRes != 0: vg = visualizationGraphs() result = vg.drawColumnCharts( [resultsPlot[0]], height=300, categories=resultPlotCat, legend=False, addOptions='width: 90%; float:left; margin: 0 4%;', #titleText=['Overlap between TFs and genomic region using normalized Forbes', 'Overlap between TFs and genomic region using Forbes'], titleText=[ 'Overlap between TFs and genomic region using Forbes' ], xAxisRotation=90, xAxisTitle='TF', yAxisTitle='value') htmlCore.line(result) for key0, it0 in resultDictShow.iteritems(): htmlCore.divBegin('resultsDiv' + str(key0)) htmlCore.header(key0) htmlCore.tableHeader(it0[0], sortable=True, tableId='resultsTable' + str(key0)) for elN in range(1, len(it0)): htmlCore.tableLine(it0[elN]) htmlCore.tableFooter() htmlCore.divEnd() htmlCore.hideToggle(styleClass='debug') htmlCore.end() print htmlCore
def countStatistics(similarityFunc, choices, genome, tracks, trackTitles): trackList = tracks[1:] resultsForStatistics = OrderedDict() llDict = OrderedDict() trackT = trackTitles[1:] i = 0 for tt1 in trackT: if not tt1 in llDict: llDict[tt1] = [] resultsForStatistics[tt1] = {} for tt2 in range(i, len(trackT)): llDict[tt1].append(trackT[tt2]) i += 1 # # print 'llDict=' + str(llDict) # print 'trackT=' + str(trackT) # print 'trackList=' + str(trackList) for key0, it0 in llDict.iteritems(): if len(it0) > 1: trackCollection = [] for it1 in it0: trackNumber = trackT.index(it1) trackCollection.append(trackList[trackNumber]) trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(it0) # print str(key0) + '- trackCollection: ' + str(trackCollection) + ' trackTitles: ' + str(trackTitles) for similarityStatClassName in similarityFunc: regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, genome=genome) mcfdrDepth = AnalysisDefHandler( REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText( ).values()[0][0] analysisDefString = REPLACE_TEMPLATES[ '$MCFDR$'] + ' -> GSuiteSimilarityToQueryTrackRankingsAndPValuesWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter( 'assumptions', 'PermutedSegsAndIntersegsTrack_') analysisSpec.addParameter( 'rawStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[ similarityStatClassName]) analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[ similarityStatClassName] ) #needed for call of non randomized stat for assertion analysisSpec.addParameter('tail', 'more') analysisSpec.addParameter( 'trackTitles', trackTitles) #that need to be string #i added that one later analysisSpec.addParameter('queryTracksNum', str(len(trackCollection))) results = doAnalysis(analysisSpec, analysisBins, trackCollection).getGlobalResult() if not similarityStatClassName in resultsForStatistics[ key0]: resultsForStatistics[key0][ similarityStatClassName] = {} resultsForStatistics[key0][ similarityStatClassName] = results return resultsForStatistics
def execute(choices, galaxyFn=None, username=''): #targetTrackNames, targetTrackCollection, targetTrackGenome = getGSuiteDataFromGalaxyTN(choices.gSuiteFirst) gFirst = choices.gSuiteFirst.split(':') firstGSuite = ScreenTwoTrackCollectionsAgainstEachOther2LevelDepth.returnGSuiteDict3LevelDept( gFirst) gSecond = choices.gSuiteSecond.split(':') secondGSuite = ScreenTwoTrackCollectionsAgainstEachOther2LevelDepth.returnGSuiteDict2LevelDept( gSecond) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) if choices.intraOverlap == ScreenTwoTrackCollectionsAgainstEachOther2LevelDepth.MERGE_INTRA_OVERLAPS: analysisDef = 'dummy -> RawOverlapStat' else: analysisDef = 'dummy [withOverlaps=yes] -> RawOverlapAllowSingleTrackOverlapsStat' if choices.type == 'basic': results = [] for elFG in firstGSuite: for elSG in secondGSuite: if elFG['genome'] == elSG['genome']: targetTrackGenome = elFG['genome'] resultPartList3 = [] for targetTrackDetailFolder1 in elFG[ 'dataFolderValue0']: resultPartList2 = [] for targetTrackDetail in targetTrackDetailFolder1[ 'dataFolderValue1']: resultPartList1 = [] for el in elSG['dataFolderValue0']: result = GalaxyInterface.runManual( [ targetTrackDetail['trackPath'], el['trackPath'] ], analysisDef, regSpec, binSpec, elFG['genome'].split('-')[0], galaxyFn, printRunDescription=False, printResults=False) resultPartList1.append({ 'refTrackName': el['trackName'].replace( targetTrackGenome, ''), 'data': processResult(result.getGlobalResult()) }) resultPartList2.append({ 'folderName2': targetTrackDetail['folderName2'], 'targetTrackName': targetTrackDetail['trackName'], 'dataFolderValue2': resultPartList1 }) resultPartList3.append({ 'folderName1': targetTrackDetailFolder1['folderName1'], 'dataFolderValue1': resultPartList2 }) results.append({ 'genome': targetTrackGenome, 'dataFolderValue0': resultPartList3 }) else: from quick.statistic.NumT2SegsTouchedByT1SegsStat import NumT2SegsTouchedByT1SegsStat results = [] for elFG in firstGSuite: for elSG in secondGSuite: if elFG['genome'] == elSG['genome']: if choices.statistic == 'Number of touched segments': analysisSpec = AnalysisSpec( NumT2SegsTouchedByT1SegsStat) #analysisBins = UserBinSource('*', '10m', genome=elFG['genome'].split('-')[0]) analysisBins = GlobalBinSource( elFG['genome'].split('-')[0]) targetTrackGenome = elFG['genome'] resultPartList3 = [] for targetTrackDetailFolder1 in elFG[ 'dataFolderValue0']: resultPartList2 = [] for targetTrackDetail in targetTrackDetailFolder1[ 'dataFolderValue1']: resultPartList1 = [] for el in elSG['dataFolderValue0']: res = doAnalysis( analysisSpec, analysisBins, [ PlainTrack( targetTrackDetail['trackPath'] ), PlainTrack(el['trackPath']) ]) resultDict = res.getGlobalResult() resultPartList1.append({ 'refTrackName': el['trackName'].replace( targetTrackGenome, ''), 'data': [resultDict['Result']] }) resultPartList2.append({ 'folderName2': targetTrackDetail['folderName2'], 'targetTrackName': targetTrackDetail['trackName'], 'dataFolderValue2': resultPartList1 }) resultPartList3.append({ 'folderName1': targetTrackDetailFolder1['folderName1'], 'dataFolderValue1': resultPartList2 }) results.append({ 'genome': targetTrackGenome, 'dataFolderValue0': resultPartList3 }) if choices.type == 'basic': stat = choices.statistic #statIndex = STAT_LIST_INDEX[stat] statIndex = ScreenTwoTrackCollectionsAgainstEachOther2LevelDepth.STAT_LIST_INDEX statIndex = statIndex.index(stat) else: stat = '0' statIndex = 0 htmlCore = HtmlCore() htmlCore.begin() htmlCore.line(""" <style type="text/css"> .hidden { display: none; { .visible { display: block; } </style> """) folderValue0Unique = [] folderValue1Unique = [] folderValue2Unique = [] targetTrackFeatureTitles = [] for dataDetail0 in results: if dataDetail0['genome'] not in folderValue0Unique: folderValue0Unique.append(dataDetail0['genome']) for dataDetail1 in dataDetail0['dataFolderValue0']: if dataDetail1['folderName1'] not in folderValue1Unique: folderValue1Unique.append(dataDetail1['folderName1']) for dataDetail2 in dataDetail1['dataFolderValue1']: if dataDetail2['folderName2'] not in folderValue2Unique: folderValue2Unique.append(dataDetail2['folderName2']) for dataDetail3 in dataDetail2['dataFolderValue2']: if dataDetail3[ 'refTrackName'] not in targetTrackFeatureTitles: targetTrackFeatureTitles.append( dataDetail3['refTrackName']) #print 'folderValue0Unique=' + str(folderValue0Unique) #print 'folderValue1Unique=' + str(folderValue1Unique) #print 'folderValue2Unique=' + str(folderValue2Unique) #print 'targetTrackFeatureTitles=' + str(targetTrackFeatureTitles) targetTrackNameList = targetTrackFeatureTitles htmlCore.line('Statistic: ' + stat) htmlCore.line( addJS3levelOptionList(folderValue1Unique, folderValue2Unique, targetTrackFeatureTitles, targetTrackNameList, folderValue0Unique)) htmlCore.divBegin('results') #htmlCore.paragraph(preporcessResults(results, folderValue1Unique, folderValue2Unique, targetTrackFeatureTitles, statIndex)) htmlCore.paragraph( preporcessResults3(results, folderValue1Unique, folderValue2Unique, targetTrackFeatureTitles, folderValue0Unique, statIndex)) htmlCore.divEnd() htmlCore.hideToggle(styleClass='debug') htmlCore.end() print htmlCore
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' cls._setDebugModeIfSelected(choices) # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin """ from quick.statistic.RandomizationManagerV3Stat import RandomizationManagerV3Stat from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat) analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat") analysisSpec.addParameter('perBinStatistic', 'SummarizedStat') analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat') analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat') analysisSpec.addParameter('summaryFunc', choices.summaryFunc) analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution') analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack') analysisSpec.addParameter('maxSamples', 10) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) print "<p>Max stat results:</p>" print results.getGlobalResult() """ # Stat question 4 summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT statTxt = "Average" if (summaryFunc == "max"): statTxt = "Maximum" statDesc = 'number of <b>segments</b> per base' if choices.analysisName == cls.Q2: statDesc = 'number of <b>base pairs covered by segments</b>' core = HtmlCore() core.begin() core.header("Enrichment of GSuite tracks across regions") core.divBegin(divClass='resultsExplanation') core.paragraph( 'The following is a list of all regions (bins) and the <b>' + statTxt.lower() + '</b> ' + statDesc + ' across the tracks within each region.') core.divEnd() if choices.analysisName == cls.Q3: # Compute p-value per bin analysisSpec = AnalysisSpec(GSuiteBinEnrichmentPValWrapperStat) analysisSpec.addParameter('rawStatistic', 'BinSizeStat') #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat') #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat') #analysisSpec.addParameter('summaryFunc', summaryFunc) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) from quick.statistic.GenericRelativeToGlobalStat import GenericRelativeToGlobalStatUnsplittable #analysisSpec.addParameter("globalSource", GenericRelativeToGlobalStatUnsplittable.getGlobalSource('test', choices.genome, False)) analysisSpec.addParameter("globalSource", 'userbins') analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, choices.genome) results_pval = doAnalysis(analysisSpec, analysisBins, tracks) #print results_pval analysisSpec = AnalysisSpec(SummarizedWrapperStat) analysisSpec.addParameter('rawStatistic', 'SummarizedWrapperStat') countStat = 'ProportionElementCountStat' if choices.analysisName == cls.Q2: countStat = 'ProportionCountStat' # analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat') analysisSpec.addParameter('pairwiseStatistic', countStat) analysisSpec.addParameter('summaryFunc', summaryFunc) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) prettyResults = {} #print results for key, val in results.iteritems(): if "Result" in val.keys(): if choices.analysisName == cls.Q3: prettyResults[key] = (val["Result"], results_pval[key]["Result"]) else: prettyResults[key] = (val["Result"]) else: prettyResults[key] = "No result" topTrackTitle = results.keys()[0] """ core.paragraph(''' Suite data is coinciding the most in bin %s ''' % ('test')) """ columnNames = ['Bin', 'Representation within the bin'] if choices.analysisName == cls.Q3: columnNames.append('p-value') core.divBegin() if choices.analysisName == cls.Q1: shortQuestion = cls.Q1_SHORT elif choices.analysisName == cls.Q2: shortQuestion = cls.Q2_SHORT else: # Q3 shortQuestion = cls.Q3_SHORT visibleRows = 20 makeTableExpandable = len(prettyResults) > visibleRows addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, shortQuestion, tableDict=prettyResults, columnNames=columnNames, sortable=True, presorted=0, expandable=makeTableExpandable) core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import warnings #warnings.simplefilter('error') cls._setDebugModeIfSelected(choices) similarityStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT pairwiseStatName = GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName] gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] statTxt = "Average" if(summaryFunc == "max"): statTxt = "Maximum" if choices.analysisName == cls.Q2: mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin #from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat #analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat) analysisDefString = REPLACE_TEMPLATES['$MCFDRv3$'] + ' -> CollectionBinnedHypothesisWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat") # analysisSpec.addParameter('perBinStatistic', 'SummarizedStat') analysisSpec.addParameter('perBinStatistic', 'MultitrackSummarizedInteractionV2Stat') # analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat') analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat') analysisSpec.addParameter('summaryFunc', summaryFunc) # analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution') analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack') #analysisSpec.addParameter('maxSamples', 10) analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc) regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) results = results.getGlobalResult() resultsTxt = "The highest ranking bin based on the " + statTxt.lower() + " of the Forbes similarity measure for pairs of tracks within each bin had a score of <b>%.3f</b> with p-value <b>%.6f</b>" % (results["TSMC_GenericMaxBinValueStat"], results['P-value']) # Stat question 7 core = HtmlCore() core.begin() analysisSpec = AnalysisSpec(MultitrackSummarizedInteractionWrapperStat) #analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat') analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]) analysisSpec.addParameter('summaryFunc', summaryFunc) analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc) gsuite = getGSuiteFromGalaxyTN(choices.gsuite) tracks = [Track(x.trackName) for x in gsuite.allTracks()] regSpec, binSpec = cls.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, choices.genome) results = doAnalysis(analysisSpec, analysisBins, tracks) #print '<br>results: ', results, '<br><br>' prettyResults = OrderedDict() for key, val in results.iteritems(): if "Result" in val.keys(): prettyResults[key] = val["Result"] else: prettyResults[key] = "No result" core.header(statTxt + " co-occurence between pairs of tracks within each bin") if choices.analysisName == cls.Q2: core.paragraph(resultsTxt) core.divBegin(divClass='resultsExplanation') core.paragraph('The following is a list of all bins and the <b>' + statTxt.lower() + '</b> co-occurrence of tracks within each bin.') core.divEnd() """ core.paragraph(''' Suite data is coinciding the most in bin %s ''' % ('test')) """ visibleRows = 20 makeTableExpandable = len(prettyResults) > visibleRows columnNames = ['Bin', 'Co-occurrence within the bin'] if choices.analysisName == cls.Q1: shortQuestion = cls.Q1_SHORT else: shortQuestion = cls.Q2_SHORT addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, shortQuestion, tableDict=prettyResults, columnNames=columnNames, sortable=True, presorted=0, expandable=makeTableExpandable, visibleRows=visibleRows) core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' cls._setDebugModeIfSelected(choices) genome = choices.genome queryGSuite = getGSuiteFromGalaxyTN(choices.queryGSuite) refGSuite = getGSuiteFromGalaxyTN(choices.refGSuite) if choices.similarityFunc: similarityStatClassNameKey = choices.similarityFunc else: similarityStatClassNameKey = GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP isPointsVsSegments, pointsGSuite, segGSuite = cls.isPointsVsSegmentsAnalysis(queryGSuite, refGSuite) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()] refTrackList = [Track(x.trackName, x.title) for x in refGSuite.allTracks()] queryTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(x.title, safe='') for x in queryGSuite.allTracks()]) refTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(x.title, safe='') for x in refGSuite.allTracks()]) analysisSpec = AnalysisSpec(GSuiteVsGSuiteWrapperStat) analysisSpec.addParameter('queryTracksNum', str(len(queryTrackList))) analysisSpec.addParameter('refTracksNum', str(len(refTrackList))) analysisSpec.addParameter('queryTrackTitleList', queryTrackTitles) analysisSpec.addParameter('refTrackTitleList', refTrackTitles) analysisSpec.addParameter('similarityStatClassName', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassNameKey]) if choices.removeZeroRow: analysisSpec.addParameter('removeZeroRow', choices.removeZeroRow) if choices.removeZeroCol: analysisSpec.addParameter('removeZeroColumn', choices.removeZeroCol) resultsObj = doAnalysis(analysisSpec, analysisBins, queryTrackList + refTrackList) results = resultsObj.getGlobalResult() # baseDir = GalaxyRunSpecificFile([RAW_OVERLAP_TABLE_RESULT_KEY], galaxyFn).getDiskPath() # rawOverlapHeatmapPresenter = HeatmapFromDictOfDictsPresenter(resultsObj, baseDir, # 'Overlapping base-pair of tracks from the two suites', # printDimensions=False) rawOverlapTableData = results[RAW_OVERLAP_TABLE_RESULT_KEY] maxRawOverlap, maxROt1, maxROt2 = rawOverlapTableData.getMaxElement() similarityScoreTableData = results[SIMILARITY_SCORE_TABLE_RESULT_KEY] maxSimScore, maxSSt1, maxSSt2 = similarityScoreTableData.getMaxElement() baseDir = GalaxyRunSpecificFile([], galaxyFn=galaxyFn).getDiskPath() heatmapPresenter = HeatmapFromTableDataPresenter(resultsObj, baseDir=baseDir, header='Overlapping base-pairs between the tracks of the two suites', printDimensions=False) tablePresenter = MatrixGlobalValueFromTableDataPresenter(resultsObj, baseDir=baseDir, header='Table of overlapping base-pairs between the tracks of the two suites') core = HtmlCore() core.begin() core.divBegin(divId='results-page') core.divBegin(divId='svs-res-main-div', divClass='svs-res-main') core.divBegin(divId='raw-overlap-div', divClass='results-section') core.divBegin(divId='raw-overlap-table', divClass='svs-table-div') core.header('Base-pair overlaps between the tracks of the two GSuites') core.paragraph("""From the tracks in the two GSuites the highest base-pair overlap <b>(%s bps)</b> is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxRawOverlap, maxROt1, maxROt2)) core.divBegin(divId='raw-table-result', divClass='result-div') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line('''Follow the links to view the results in an HTML table or raw tabular form:''') core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(tablePresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY)) core.divEnd()#rawoverlap table core.divEnd() core.divEnd() core.divBegin(divId='raw-overlap-heatmap', divClass='svs-heatmap-div') try: core.header('Heatmap of base-pair overlaps') core.divBegin(divId='raw-table-result', divClass='result-div-heatmap') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line('''Follow the links to view the heatmap in the desired format:''') core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(heatmapPresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY)) core.divEnd() core.divEnd() except: core.line('Heatmap for the base-pair overlaps could not be created.') core.divEnd() core.divEnd() core.divEnd()#rawoverlap heatmap core.divEnd()#rawoverlap core.divBegin(divId='sim-score-div', divClass='results-section') core.divBegin(divId='sim-score-table', divClass='svs-table-div') core.header('Similarity score between the tracks of the two GSuites measured by %s' % choices.similarityFunc) core.paragraph("""From the tracks in the two GSuites the highest similarity score <b>(%s)</b> is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxSimScore, maxSSt1, maxSSt2)) core.divBegin(divId='raw-table-result', divClass='result-div') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line("""Follow the links to view the results in an HTML table or raw tabular form:""") core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(tablePresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY)) core.divEnd() core.divEnd() core.divEnd()#simscore table core.divBegin(divId='sim-score-heatmap', divClass='svs-heatmap-div') try: core.header('Heatmap of similarity scores') core.divBegin(divId='raw-table-result', divClass='result-div-heatmap') core.divBegin(divId='raw-table-result', divClass='result-div-left') core.line('''Follow the links to view the heatmap in the desired format:''') core.divEnd() core.divBegin(divId='raw-table-result', divClass='result-div-right') core.line(heatmapPresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY)) core.divEnd() core.divEnd() except: core.line('Heatmap for the similarity score could not be created.') core.divEnd() core.divEnd() core.divEnd()#simscore heatmap core.divEnd()#simscore core.divEnd()#results # core.paragraph( # '''Table displaying the number of base-pairs overlapping between the tracks in the two suites:''') # core.tableFromDictOfDicts(rawOverlapTableData, firstColName='Track title') # # core.paragraph(rawOverlapHeatmapPresenter.getReference(resDictKey=RAW_OVERLAP_TABLE_RESULT_KEY)) # core.paragraph( # '''Table displaying the similarity score for the tracks in the two suites as measured by %s:''' % similarityStatClassNameKey) # core.tableFromDictOfDicts(similarityScoreTableData, firstColName='Track title') # core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): #cls._setDebugModeIfSelected(choices) # from config.DebugConfig import DebugConfig # from config.DebugConfig import DebugModes # DebugConfig.changeMode(DebugModes.RAISE_HIDDEN_EXCEPTIONS_NO_VERBOSE) # DebugUtil.insertBreakPoint(5678, suspend=False) choices_gsuite = choices.gsuite selected_metadata = choices.cat choices_queryTrack = choices.query #genome = 'hg19' genome = choices.genome queryTS = factory.getSingleTrackTS(genome, choices_queryTrack) refTS = factory.getFlatTracksTS(genome, choices_gsuite) categoricalTS = refTS.getSplittedByCategoryTS(selected_metadata) fullTS = TrackStructureV2() fullTS['query'] = queryTS fullTS['reference'] = categoricalTS spec = AnalysisSpec(SummarizedInteractionPerTsCatV2Stat) parameter = 'minLqMedUqMax' spec.addParameter('pairwiseStatistic', ObservedVsExpectedStat.__name__) spec.addParameter('summaryFunc', parameter) bins = UserBinSource('chr1', '*', genome=genome) res = doAnalysis(spec, bins, fullTS) tsRes = res.getGlobalResult()['Result'] htmlCore = HtmlCore() htmlCore.begin() if parameter == 'minAndMax': htmlCore.tableHeader(['Track', 'min-max'], sortable=False, tableId='tab1') for k, it in tsRes.iteritems(): htmlCore.tableLine([ k, str("%.2f" % it.getResult()[0]) + '-' + str("%.2f" % it.getResult()[1]) ]) htmlCore.tableFooter() if parameter == 'minLqMedUqMax': dataList = [] categories = [] for keyE, itE in tsRes.iteritems(): categories.append(keyE) dataList.append(list(itE.getResult())) from quick.webtools.restricted.visualization.visualizationGraphs import \ visualizationGraphs vg = visualizationGraphs() res = vg.drawBoxPlotChart(dataList, categories=categories, seriesName=selected_metadata) htmlCore.line(res) htmlCore.end() print htmlCore
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' DebugMixin._setDebugModeIfSelected(choices) genome = choices.genome gSuite = getGSuiteFromGalaxyTN(choices.gsuite) # fullCategory = AnalysisManager.combineMainAndSubCategories(choices.analysisCategory, 'Basic') fullCategory = AnalysisManager.combineMainAndSubCategories( 'Descriptive statistics', 'Basic') tracks = list(gSuite.allTracks()) analysisName = choices.analysis # selectedAnalysis = GSuiteSingleValueAnalysisPerTrackTool \ # ._resolveAnalysisFromName(gSuite.genome, fullCategory, tracks[0].trackName, analysisName) selectedAnalysis = cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING[ choices.analysis] regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) # paramName, paramValues = selectedAnalysis.getFirstOptionKeyAndValues() # if paramName and paramValues: # if len(paramValues) == 1: # selectedAnalysis.addParameter(paramName, paramValues[0]) # else: # selectedAnalysis.addParameter(paramName, choices.paramOne) tableDict = OrderedDict() for track in tracks: tableDict[track.title] = OrderedDict() result = doAnalysis(selectedAnalysis, analysisBins, [track]) resultDict = result.getGlobalResult() if 'Result' in resultDict: track.setAttribute(analysisName.lower(), str(resultDict['Result'])) tableDict[ track.title][analysisName] = strWithNatLangFormatting( resultDict['Result']) else: for attrName, attrVal in resultDict.iteritems(): attrNameExtended = analysisName + ':' + attrName track.setAttribute(attrNameExtended.lower(), str(attrVal)) tableDict[track.title][ attrNameExtended] = strWithNatLangFormatting(attrVal) # assert isinstance(resultDict['Result'], (int, basestring, float)), type(resultDict['Result']) core = HtmlCore() core.begin() core.header('Results: ' + analysisName) def _produceTable(core, tableDict=None, tableId=None): return core.tableFromDictOfDicts(tableDict, firstColName='Track title', tableId=tableId, expandable=True, visibleRows=20, presorted=0) tableId = 'results_table' tableFile = GalaxyRunSpecificFile([tableId, 'table.tsv'], galaxyFn) tabularHistElementName = 'Raw results: ' + analysisName gsuiteFile = GalaxyRunSpecificFile( [tableId, 'input_with_results.gsuite'], galaxyFn) GSuiteComposer.composeToFile(gSuite, gsuiteFile.getDiskPath()) gsuiteHistElementName = \ getGSuiteHistoryOutputName('result', ', ' + analysisName, choices.gsuite) core.tableWithImportButtons( tabularFile=True, tabularFn=tableFile.getDiskPath(), tabularHistElementName=tabularHistElementName, gsuiteFile=True, gsuiteFn=gsuiteFile.getDiskPath(), gsuiteHistElementName=gsuiteHistElementName, produceTableCallbackFunc=_produceTable, tableDict=tableDict, tableId=tableId) core.end() print core
def execute(cls, choices, galaxyFn=None, username=''): """ Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. :param choices: Dict holding all current selections :param galaxyFn: :param username: """ cls._setDebugModeIfSelected(choices) genome = choices.genome queryTrackNameAsList = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.queryTrack, printErrors=False, printProgress=False) if choices.intensityTrack: intensityTrackNameAsList = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.intensityTrack, printErrors=False, printProgress=False) else: intensityTrackNameAsList = None analysisQuestion = choices.analysisQName similarityStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average' reverse = 'Yes' if choices.reversed else 'No' if analysisQuestion in [cls.Q2, cls.Q3]: randStrat = 'PermutedSegsAndIntersegsTrack_' if choices.isBasic else GSuiteStatUtils.PAIRWISE_RAND_CLS_MAPPING[choices.randStrat] gsuite = getGSuiteFromGalaxyTN(choices.gsuite) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) queryTrack = Track(queryTrackNameAsList) tracks = [queryTrack] + [Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks()] queryTrackTitle = prettyPrintTrackName(queryTrack.trackName).replace('/', '_') trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(queryTrackTitle)] + [quote(x.title, safe='') for x in gsuite.allTracks()]) additionalResultsDict = OrderedDict() additionalAttributesDict = OrderedDict() if analysisQuestion in [cls.Q1, cls.Q2]: additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict(choices.additionalAttributes, gsuite) # additional analysis stats = [SingleValueOverlapStat, CountStat, CountElementStat] # + [CountSegmentsOverlappingWithT2Stat] #takes long time additionalResultsDict = runMultipleSingleValStatsOnTracks(gsuite, stats, analysisBins, queryTrack=queryTrack) core = HtmlCore() if analysisQuestion == cls.Q1: analysisSpec = cls.prepareQ1(reverse, similarityStatClassName, trackTitles) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() gsPerTrackResultsModel = GSuitePerTrackResultModel(results, ['Similarity to query track'], additionalResultsDict=additionalResultsDict, additionalAttributesDict=additionalAttributesDict) if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: gsPerTrackResults = gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) else: gsPerTrackResults = gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() core = cls.generateQ1output(additionalResultsDict, analysisQuestion, choices, galaxyFn, gsPerTrackResults, queryTrackTitle, gsuite, results, similarityStatClassName) elif analysisQuestion == cls.Q2: analysisSpec = cls.prepareQ2(choices, similarityStatClassName, trackTitles, randStrat, intensityTrackNameAsList) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() core = cls.generateQ2Output(additionalAttributesDict, additionalResultsDict, analysisQuestion, choices, galaxyFn, queryTrackTitle, gsuite, results, similarityStatClassName) else: # Q3 analysisSpec = cls.prepareQ3(choices, similarityStatClassName, summaryFunc, randStrat) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() core = cls.generateQ3output(analysisQuestion, queryTrackTitle, results, similarityStatClassName) print str(core)
def execute(cls, choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' import numpy numpy.seterr(all='raise') cls._setDebugModeIfSelected(choices) # DebugUtil.insertBreakPoint(username=username, currentUser='******') genome = choices.genome analysisQuestion = choices.analysisName similaryStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average' reverse = 'Yes' if choices.reversed else 'No' gsuite = getGSuiteFromGalaxyTN(choices.gsuite) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome) tracks = [ Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks() ] trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join( [quote(x.title, safe='') for x in gsuite.allTracks()]) additionalResultsDict = OrderedDict() additionalAttributesDict = OrderedDict() if analysisQuestion in [cls.Q1, cls.Q2, cls.Q3]: additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict( choices.additionalAttributes, gsuite) #additional analysis stats = [CountStat, CountElementStat] additionalResultsDict = runMultipleSingleValStatsOnTracks( gsuite, stats, analysisBins, queryTrack=None) if analysisQuestion == cls.Q1: analysisSpec = AnalysisSpec( GSuiteRepresentativenessOfTracksRankingsWrapperStat) analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) analysisSpec.addParameter( 'summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) analysisSpec.addParameter('reverse', reverse) analysisSpec.addParameter('ascending', 'No') analysisSpec.addParameter('trackTitles', trackTitles) analysisSpec.addParameter('queryTracksNum', len(tracks)) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() gsPerTrackResultsModel = GSuitePerTrackResultModel( results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc], additionalResultsDict=additionalResultsDict, additionalAttributesDict=additionalAttributesDict) if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) else: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() core = HtmlCore() core.begin() core.divBegin(divId='results-page') core.divBegin(divClass='results-section') core.header(analysisQuestion) topTrackTitle = results.keys()[0] core.paragraph(''' The track "%s" is the most representative track of the GSuite with %s %s similarity to the rest of the tracks as measured by "%s" track similarity measure. ''' % (topTrackTitle, results[topTrackTitle], summaryFunc, similaryStatClassName)) addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, cls.Q1_SHORT, decoratedResultsDict, columnTitles, gsuite=gsuite, results=results, gsuiteAppendAttrs=['similarity_score'], sortable=True) # plot columnInd = 0 if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: columnInd = 1 res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot( results, additionalResultsDict, 'Similarity to rest of tracks in suite (%s)' % summaryFunc, columnInd=columnInd) core.line(res) core.divEnd() core.divEnd() core.end() # elif analysisQuestion == cls.Q2: # analysisSpec = AnalysisSpec(GSuiteRepresentativenessOfTracksRankingsWrapperStat) # analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) # analysisSpec.addParameter('summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) # analysisSpec.addParameter('reverse', reverse) # analysisSpec.addParameter('ascending', 'Yes') # analysisSpec.addParameter('trackTitles', trackTitles) # results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() # # gsPerTrackResultsModel = GSuitePerTrackResultModel( # results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc], # additionalResultsDict=additionalResultsDict, # additionalAttributesDict=additionalAttributesDict) # if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: # columnTitles, decoratedResultsDict = \ # gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) # else: # columnTitles, decoratedResultsDict = \ # gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() # # core = HtmlCore() # core.begin() # core.divBegin(divId='results-page') # core.divBegin(divClass='results-section') # core.header(analysisQuestion) # topTrackTitle = results.keys()[0] # core.paragraph(''' # The track "%s" is the most atypical track of the GSuite with %s %s similarity to the rest of the tracks # as measured by the "%s" track similarity measure. # ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle]), summaryFunc, similaryStatClassName)) # # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')'], sortable=False) # # from quick.util import CommonFunctions # rawDataURIList = CommonFunctions.getHyperlinksForRawTableData( # dataDict=decoratedResultsDict, colNames=columnTitles, # tableId="resultsTable", galaxyFn=galaxyFn) # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, # tableId='resultsTable', addInstruction=True, # addRawDataSelectBox=True, rawDataURIList=rawDataURIList) # # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, tableId='resultsTable') # # columnInd = 0 # if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: # columnInd = 1 # res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot( # results, additionalResultsDict, # 'Similarity to rest of tracks in suite (%s)' % summaryFunc, # columnInd=columnInd) # core.line(res) # core.divEnd() # core.divEnd() # core.end() # # if choices.addResults == 'Yes': # GSuiteStatUtils.addResultsToInputGSuite( # gsuite, results, ['Similarity_score'], # cls.extraGalaxyFn[GSUITE_EXPANDED_WITH_RESULT_COLUMNS_FILENAME]) elif analysisQuestion == cls.Q3: mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \ AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] analysisDefString = REPLACE_TEMPLATES[ '$MCFDRv3$'] + ' -> GSuiteRepresentativenessOfTracksRankingsAndPValuesWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack') analysisSpec.addParameter( 'rawStatistic', SummarizedInteractionWithOtherTracksV2Stat.__name__) analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) analysisSpec.addParameter( 'summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) analysisSpec.addParameter('tail', 'right-tail') analysisSpec.addParameter('trackTitles', trackTitles) results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() core = HtmlCore() gsPerTrackResultsModel = GSuitePerTrackResultModel( results, [ 'Similarity to rest of tracks in suite (%s)' % summaryFunc, 'P-value' ], additionalResultsDict=additionalResultsDict, additionalAttributesDict=additionalAttributesDict) if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute) else: columnTitles, decoratedResultsDict = \ gsPerTrackResultsModel.generateColumnTitlesAndResultsDict() core.begin() core.divBegin(divId='results-page') core.divBegin(divClass='results-section') core.header(analysisQuestion) topTrackTitle = results.keys()[0] core.paragraph(''' The track "%s" has the lowest P-value of %s corresponding to %s %s similarity to the rest of the tracks as measured by "%s" track similarity measure. ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle][1]), strWithNatLangFormatting(results[topTrackTitle][0]), summaryFunc, similaryStatClassName)) # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')', 'P-value'], sortable=False) addTableWithTabularAndGsuiteImportButtons( core, choices, galaxyFn, cls.Q3_SHORT, decoratedResultsDict, columnTitles, gsuite=gsuite, results=results, gsuiteAppendAttrs=['similarity_score', 'p_value'], sortable=True) core.divEnd() core.divEnd() core.end() else: # Q4 mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \ AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0] analysisDefString = REPLACE_TEMPLATES[ '$MCFDRv3$'] + ' -> CollectionSimilarityHypothesisWrapperStat' analysisSpec = AnalysisDefHandler(analysisDefString) analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth) analysisSpec.addParameter('assumptions', 'PermutedSegsAndIntersegsTrack') analysisSpec.addParameter('rawStatistic', 'MultitrackSummarizedInteractionV2Stat') analysisSpec.addParameter( 'pairwiseStatistic', GSuiteStatUtils. PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName]) analysisSpec.addParameter( 'summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc]) analysisSpec.addParameter('multitrackSummaryFunc', 'avg') # should it be a choice? analysisSpec.addParameter('tail', 'right-tail') results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult() pval = results['P-value'] observed = results['TSMC_MultitrackSummarizedInteractionV2Stat'] significanceLevel = 'strong' if pval < 0.01 else ( 'weak' if pval < 0.05 else 'no') core = HtmlCore() core.begin() core.divBegin(divId='results-page') core.divBegin(divClass='results-section') core.header(analysisQuestion) core.paragraph(''' The tracks in the suite show %s significance in their collective similarity (average similarity of a track to the rest) of %s and corresponding p-value of %s, as measured by "%s" track similarity measure. ''' % (significanceLevel, strWithNatLangFormatting(observed), strWithNatLangFormatting(pval), similaryStatClassName)) core.divEnd() core.divEnd() core.end() print str(core)
def execute(cls, choices, galaxyFn=None, username=''): cls._setDebugModeIfSelected(choices) targetGSuite = getGSuiteFromGalaxyTN(choices.gSuiteFirst) refGSuite = getGSuiteFromGalaxyTN(choices.gSuiteSecond) regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices) analysisDef = 'dummy -> RawOverlapStat' # analysisDef = 'dummy [withOverlaps=yes] -> RawOverlapAllowSingleTrackOverlapsStat' results = OrderedDict() for targetTrack in targetGSuite.allTracks(): targetTrackName = targetTrack.title for refTrack in refGSuite.allTracks(): refTrackName = refTrack.title if targetTrack.trackName == refTrack.trackName: # print targetTrack.title # print targetTrack.trackName result = DetermineSuiteTracksCoincidingWithAnotherSuite.handleSameTrack( targetTrack.trackName, regSpec, binSpec, targetGSuite.genome, galaxyFn) else: result = GalaxyInterface.runManual( [targetTrack.trackName, refTrack.trackName], analysisDef, regSpec, binSpec, targetGSuite.genome, galaxyFn, printRunDescription=False, printResults=False, printProgress=False).getGlobalResult() if targetTrackName not in results: results[targetTrackName] = OrderedDict() results[targetTrackName][refTrackName] = result stat = STAT_OVERLAP_COUNT_BPS statIndex = STAT_LIST_INDEX[stat] title = '' processedResults = [] headerColumn = [] for targetTrackName in targetGSuite.allTrackTitles(): resultRowDict = processRawResults(results[targetTrackName]) resultColumn = [] headerColumn = [] for refTrackName, statList in resultRowDict.iteritems(): resultColumn.append(statList[statIndex]) headerColumn.append(refTrackName) processedResults.append(resultColumn) outputTable = {} for elN in range(0, len(headerColumn)): outputTable[elN] = {} outputTable[elN]['id'] = headerColumn[elN] transposedProcessedResults = [list(x) for x in zip(*processedResults)] # second question sumSecondgSuite # first question numSecondgSuite # fifth question numSecondgSuitePercentage for i in range(0, len(transposedProcessedResults)): outputTable[i]['sumSecondgSuite'] = sum( transposedProcessedResults[i]) if not 'numSecondgSuite' in outputTable[i]: outputTable[i]['numSecondgSuite'] = 0 for j in range(0, len(transposedProcessedResults[i])): if transposedProcessedResults[i][j] >= 1: outputTable[i]['numSecondgSuite'] += 1 else: outputTable[i]['numSecondgSuite'] += 0 outputTable[i]['numSecondgSuitePercentage'] = float( outputTable[i]['numSecondgSuite']) / float( targetGSuite.numTracks()) * 100 from gold.statistic.CountSegmentStat import CountSegmentStat from gold.statistic.CountPointStat import CountPointStat from gold.description.TrackInfo import TrackInfo from gold.statistic.CountStat import CountStat # third question numPairBpSecondgSuite # fourth question numFreqBpSecondgSuite i = 0 for refTrack in refGSuite.allTracks(): formatName = TrackInfo(refTrack.genome, refTrack.trackName).trackFormatName analysisDef = CountStat analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, refTrack.genome) results = doAnalysis(AnalysisSpec(analysisDef), analysisBins, [PlainTrack(refTrack.trackName)]) resultDict = results.getGlobalResult() if len(resultDict) == 0: outputTable[i]['numPairBpSecondgSuite'] = None outputTable[i]['numFreqBpSecondgSuite'] = None outputTable[i]['numFreqUniqueBpSecondgSuite'] = None else: outputTable[i]['numPairBpSecondgSuite'] = resultDict['Result'] if outputTable[i]['numPairBpSecondgSuite'] != 0: outputTable[i]['numFreqBpSecondgSuite'] = float( outputTable[i]['sumSecondgSuite']) / float( outputTable[i]['numPairBpSecondgSuite']) else: outputTable[i]['numFreqBpSecondgSuite'] = None if outputTable[i]['sumSecondgSuite'] != 0: outputTable[i]['numFreqUniqueBpSecondgSuite'] = float( outputTable[i]['numPairBpSecondgSuite']) / float( outputTable[i]['sumSecondgSuite']) else: outputTable[i]['numFreqUniqueBpSecondgSuite'] = None i += 1 # sortTable outputTableLine = [] for key, item in outputTable.iteritems(): line = [ item['id'], item['numSecondgSuite'], item['sumSecondgSuite'], item['numPairBpSecondgSuite'], item['numFreqBpSecondgSuite'], item['numFreqUniqueBpSecondgSuite'], item['numSecondgSuitePercentage'] ] outputTableLine.append(line) import operator outputTableLineSort = sorted(outputTableLine, key=operator.itemgetter(1), reverse=True) tableHeader = [ 'Region ID ', 'Number of cases with at least one event ', 'Total number of events', 'Genome coverage (unique bp)', 'Number of events per unique bp', 'Number of unique bp per event', 'Percentage of cases with at least one event' ] htmlCore = HtmlCore() htmlCore.begin() htmlCore.line( "<b>Identification of genomic elements with high event recurrence</b> " ) htmlCore.header(title) htmlCore.divBegin('resultsDiv') htmlCore.tableHeader(tableHeader, sortable=True, tableId='resultsTable') for line in outputTableLineSort: htmlCore.tableLine(line) plotRes = [] plotXAxis = [] for lineInx in range(1, len(outputTableLineSort[0])): plotResPart = [] plotXAxisPart = [] for lineInxO in range(0, len(outputTableLineSort)): # if outputTableLineSort[lineInxO][lineInx]!=0 and # if outputTableLineSort[lineInxO][lineInx]!=None: plotResPart.append(outputTableLineSort[lineInxO][lineInx]) plotXAxisPart.append(outputTableLineSort[lineInxO][0]) plotRes.append(plotResPart) plotXAxis.append(plotXAxisPart) htmlCore.tableFooter() htmlCore.divEnd() htmlCore.divBegin('plot', style='padding-top:20px;margin-top:20px;') vg = visualizationGraphs() res = vg.drawColumnCharts( plotRes, titleText=tableHeader[1:], categories=plotXAxis, height=500, xAxisRotation=270, xAxisTitle='Ragion ID', yAxisTitle='Number of cases with at least one event', marginTop=30, addTable=True, sortableAccordingToTable=True, legend=False) htmlCore.line(res) htmlCore.divEnd() htmlCore.hideToggle(styleClass='debug') htmlCore.end() print htmlCore
def _getTrackStats(self, trackName, analysisBins): analysisSpec = AnalysisSpec(SegmentTrackOverviewStat) # analysisBins = GlobalBinSource(self._genome) track = Track(trackName) return doAnalysis(analysisSpec, analysisBins, [track])
#WORKING AS OF April 5, 2015 from gold.application.HBAPI import doAnalysis, GlobalBinSource, AnalysisSpec, PlainTrack #from gold.application.HBAPI import RegionIter, AnalysisDefHandler, GenomeRegion from quick.statistic.AvgSegLenStat import AvgSegLenStat analysisSpec = AnalysisSpec(AvgSegLenStat) analysisSpec.addParameter("withOverlaps", "yes") analysisBins = GlobalBinSource('hg18') tracks = [PlainTrack(['Genes and gene subsets', 'Genes', 'Refseq'])] result = doAnalysis(analysisSpec, analysisBins, tracks) resultDict = result.getGlobalResult() print "Avg gene length: ", resultDict['Result']
def run(self): assert self._referenceTrackFn is not None if (isinstance(self._referenceTrackFn, basestring)): regSpec, binSpec = 'file', self._referenceTrackFn elif (type(self._referenceTrackFn) == list): regSpec, binSpec = 'track', ':'.join(self._referenceTrackFn) trackName1 = self._queryTrackName trackName2 = None from gold.description.TrackInfo import TrackInfo formatName = TrackInfo(self._genome, trackName1).trackFormatName # formatConv = '' # if 'segments' in formatName: # formatConv = '[tf1:=SegmentToStartPointFormatConverter:]' # analysisDef = formatConv + '-> CountPointStat' from gold.statistic.CountStat import CountStat #analysisDef = '-> CountSegmentStat' if 'segments' in formatName else '-> CountPointStat' analysisDef = CountStat # print '_referenceTrackFn' + str(self._referenceTrackFn) # print '_queryTrackName' + str(self._queryTrackName) # # print 'trackName1' + str(trackName1) # print 'trackName2' + str(trackName2) #analysisDef = CountStat #print '<div class="debug">' #trackName1, trackName2, analysisDef = GalaxyInterface._cleanUpAnalysisDef(trackName1, trackName2, analysisDef) #trackName1, trackName2 = GalaxyInterface._cleanUpTracks([trackName1, trackName2], genome, realPreProc=True) # #userBinSource, fullRunArgs = GalaxyInterface._prepareRun(trackName1, trackName2, analysisDef, regSpec, binSpec, self._genome) #res = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource, **fullRunArgs).run() #if it is not a gSuite #res = GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, self._genome, printResults=False, printHtmlWarningMsgs=False) #if gSuite from gold.application.HBAPI import PlainTrack from gold.application.HBAPI import doAnalysis from gold.description.AnalysisDefHandler import AnalysisSpec analysisBins = GalaxyInterface._getUserBinSource( regSpec, binSpec, self._genome) res = doAnalysis(AnalysisSpec(analysisDef), analysisBins, [PlainTrack(self._queryTrackName)]) #print 'ccc' #resultDict = res.getGlobalResult() resDictKeys = res.getResDictKeys() if len(resDictKeys) == 1: #assert len(resDictKeys)==1, resDictKeys resDictKey = resDictKeys[0] targetBins = [ bin for bin in res.keys() if res[bin][resDictKey] > 0 ] self._result = res self._intersectedReferenceBins = targetBins