Exemplo n.º 1
0
 def retrieveFeatureTrack(self, genome, galaxyFn, regionTrackName, featureTrack):
     
     regionTrackName = regionTrackName.split(':')
     regionFileName = ExternalTrackManager.extractFnFromGalaxyTN(regionTrackName)
     
     bins = GalaxyInterface._getUserBinSource('gtrack', regionFileName, genome, featureTrack.split(':'))
     
     return self._extractor.extract(featureTrack.split(':'), bins, galaxyFn, 'gtrack')
Exemplo n.º 2
0
    def handleSameTrack(cls, trackName, regSpec, binSpec, genome, galaxyFn):

        analysisSpec = AnalysisSpec(RawOverlapToSelfStat)
        analysisBins = GalaxyInterface._getUserBinSource(
            regSpec, binSpec, genome)

        return doAnalysis(analysisSpec, analysisBins,
                          [Track(trackName)]).getGlobalResult()
Exemplo n.º 3
0
 def handleRegionClustering(self, genome, tracks, clusterMethod, extra_option):
     region_cluster_track = self.getHistoryTrackDef('track1')
     print region_cluster_track
     region_ref_track = self.params.get('reftrack1')
     if region_cluster_track[0] == 'galaxy' :
         file_type = region_cluster_track[1]
         track_path = region_cluster_track[2]
         userBinSource = GalaxyInterface._getUserBinSource('bed', track_path, genome)
         validFeature = SplittedRegionsAsFeaturesCatalog.getValidAnalyses(genome,region_ref_track,[])
         analysisDef = validFeature[0]
         result = AnalysisDefJob(analysisDef, region_ref_track, [], userBinSource).run()
         print [result[localKey][validFeature[1]] for localKey in sorted(result.keys())]
Exemplo n.º 4
0
 def _constructBins(regSpec, binSpec, genome, trackNames):
     # Construct and check bins
     try:
         from quick.application.GalaxyInterface import GalaxyInterface
         userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome, trackNames)
         return [None, userBinSource]
     except Exception, e:
         results = Results([], [], '')
         results.addError(InvalidRunSpecException('Error in specification of analysis region or binsize: ' + str(e)))
         logMessage('Error in specification of analysis region (' + regSpec +') or binsize: (' + binSpec + ')')
         if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
             raise
         return [results, None]
Exemplo n.º 5
0
    def _constructBins(regSpec, binSpec, genome, trackName1, trackName2):
        #Construct and check bins
        try:
            #userBinSource= UserBinSource(regSpec, binSpec)
            from quick.application.GalaxyInterface import GalaxyInterface
#            from config.Config import DEFAULT_GENOME
            userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome, trackName1, trackName2)
            return [None, userBinSource]
        except Exception, e:
            #results = Results(trackName1, trackName2, statClassName)
            results = Results([],[],'')
            results.addError(InvalidRunSpecException('Error in specification of analysis region or binsize: ' + str(e)))
            logMessage('Error in specification of analysis region (' + regSpec +') or binsize: (' + binSpec + ')')
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            return [results, None]
Exemplo n.º 6
0
    def computeDistance(self, genome, track1, track2, feature): #direct distance between track1, track2
        '''
        track1 and track2 are two lists like : ['Sequence','Repeating elements','LINE']
        feature specifies how the distance between track1 and track2 is defined 
        '''
        validFeature = DirectDistanceCatalog.getValidAnalyses(genome, track1, track2)[feature]
        analysisDef = validFeature[0] #'bla bla -> PropFreqOfTr1VsTr2Stat' #or any other statistic from the HB collection
        if self.params.get("compare_in") == "Chromosomes" :
            regSpec = "__chrs__"
            binSpec = self.params.get("Chromosomes")
        elif self.params.get("compare_in") == "Chromosome arms" :
            regSpec = "__chrArms__"
            binSpec = self.params.get("Chromosome_arms")
        elif self.params.get("compare_in") == "Cytobands" :
            regSpec = "__chrBands__"
            binSpec = self.params.get("Cytobands")
        else :
            regSpec = self.params.get("region")
            binSpec = self.params.get("binsize")
        #regSpec = 'chr1' #could also be e.g. 'chr1' for the whole chromosome or '*' for the whole genome
        #binSpec = '10m' #could also be e.g.'100', '1k' or '*' for whole regions/chromosomes as bins 
        #genome = 'hg18' # path /../../..../genome
        #allRepeats = GalaxyInterface.getSubTrackNames(genome,['Sequence','Repeating elements'],False)
        #GalaxyInterface.run(trackName1, trackName2, question, regSpec, binSpec, genome='hg18')
        userBinSource = GalaxyInterface._getUserBinSource(regSpec,binSpec,genome)
        
        result = AnalysisDefJob(analysisDef, track1, track2, userBinSource).run()
        #result er av klassen Results..
        #from gold.result.Results import Results

        mainResultDict = result.getGlobalResult()
        #from PropFreqOfTr1VsTr2Stat:...
        #self._result = {'Track1Prop':ratio,'CountTrack1':c1, 'CountTrack2':c2,'Variance':variance}

        #mainValueOfInterest = mainResultDict['Variance']
        return mainResultDict[validFeature[1]]
    def execute(choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        genome = choices.genome
        targetTrack = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
            genome,
            choices.targetTrack,
            printErrors=False,
            printProgress=False)

        refGSuite = getGSuiteFromGalaxyTN(choices.refTrackCollection)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)

        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)

        results = TrackReportCommon.getOverlapResultsForTrackVsCollection(
            genome, targetTrack, refGSuite, analysisBins=analysisBins)
        processedResults = TrackReportCommon.processRawResults(results)

        targetTrackTitle = prettyPrintTrackName(targetTrack)
        title = 'Screening of track ' + targetTrackTitle
        sortedProcessedResultsTupleList = sorted(
            processedResults.iteritems(),
            key=lambda x: x[1][STAT_LIST_INDEX[STAT_FACTOR_OBSERVED_VS_EXPECTED
                                               ]],
            reverse=True)
        refTrackNames = [x[0] for x in sortedProcessedResultsTupleList]
        refTrackNames = [
            x.replace('\'', '').replace('"', '') for x in refTrackNames
        ]
        plotData = [x[1] for x in sortedProcessedResultsTupleList]
        #         plotData = zip(*plotData) #invert
        plotData = normalizeMatrixData(plotData)

        printVals = tuple([str(targetTrackTitle)]) + tuple(
            [str(x[0]) for x in sortedProcessedResultsTupleList[0:3]])

        htmlCore = HtmlCore()
        htmlCore.begin()
        htmlCore.header(title)

        if choices.bmQid and choices.bmQid not in ['None']:
            htmlCore.append(
                str(
                    quick.gsuite.GSuiteHbIntegration.
                    getAnalysisQuestionInfoHtml(choices.bmQid)))

        htmlCore.divBegin('resultsDiv')
        htmlCore.paragraph('''
            The query track <b>%s</b> overlaps most strongly (is most highly enriched) with the tracks <b>%s</b>, <b>%s</b> and <b>%s</b> 
            from the selected collection. See below for a full (ranked) table of overlap and enrichment.
        ''' % printVals)

        htmlCore.paragraph('''
        The coverage of the query track is %s bps.
        ''' % strWithNatLangFormatting(
            TrackReportCommon.getQueryTrackCoverageFromRawOverlapResults(
                results)))

        htmlCore.tableHeader(TrackReportCommon.HEADER_ROW,
                             sortable=True,
                             tableId='resultsTable')
        for refTrackName, refTrackResults in sortedProcessedResultsTupleList:
            line = [refTrackName
                    ] + [strWithNatLangFormatting(x) for x in refTrackResults]
            htmlCore.tableLine(line)
        htmlCore.tableFooter()
        htmlCore.divEnd()
        '''

        addColumnPlotToHtmlCore(htmlCore, 
                                refTrackNames,  
                                TrackReportCommon.HEADER_ROW[1:], 
                                'stat', 'Results plot (data is normalized for better visual comparison) ', 
                                plotData, xAxisRotation = 315)
        '''
        addPlotToHtmlCore(
            htmlCore,
            refTrackNames,
            TrackReportCommon.HEADER_ROW[1:],
            'stat',
            'Results plot (data is normalized for better visual comparison) ',
            plotData,
            xAxisRotation=315)

        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()

        print htmlCore
Exemplo n.º 8
0
    def execute(cls, choices, galaxyFn=None, username=''):
        cls._setDebugModeIfSelected(choices)

        genome = choices.genome
        genomicRegions = choices.genomicRegions
        genomicRegionsTracks = choices.genomicRegionsTracks
        sourceTfs = choices.sourceTfs
        sourceTfsDetails = choices.sourceTfsDetails
        tfTracks = choices.tfTracks

        # Get Genomic Region track name:
        if genomicRegions == cls.REGIONS_FROM_HISTORY:
            galaxyTN = genomicRegionsTracks.split(':')
            genElementTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
                genome, galaxyTN)

            #queryGSuite = getGSuiteFromGalaxyTN(genomicRegionsTracks)
            #queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()]

        elif genomicRegions == 'Hyperbrowser repository':
            selectedGenRegTrack = TfbsTrackNameMappings.getTfbsTrackNameMappings(
                genome)[genomicRegionsTracks]
            if isinstance(selectedGenRegTrack, dict):
                genElementTrackName = selectedGenRegTrack.values()
            else:
                genElementTrackName = selectedGenRegTrack
        elif genomicRegions == 'Hyperbrowser repository (cell-type-specific)':
            genElementTrackName = ['Private', 'Antonio'
                                   ] + genomicRegionsTracks.split(':')
        else:
            return

        # Get TF track names:
        if isinstance(tfTracks, dict):
            selectedTfTracks = [
                key for key, val in tfTracks.iteritems() if val == 'True'
            ]
        else:
            selectedTfTracks = [tfTracks]

        queryTrackTitle = '--'.join(genElementTrackName)

        trackTitles = [queryTrackTitle]
        tracks = [Track(genElementTrackName, trackTitle=queryTrackTitle)]

        for i in selectedTfTracks:
            if sourceTfs == 'Hyperbrowser repository':
                tfTrackName = TfTrackNameMappings.getTfTrackNameMappings(
                    genome)[sourceTfsDetails] + [i]
                tracks.append(
                    Track(tfTrackName,
                          trackTitle=tfTrackName[len(tfTrackName) - 1]))
                trackTitles.append(tfTrackName[len(tfTrackName) - 1])

            else:
                tfTrackName = i.split(':')

                queryGSuite = getGSuiteFromGalaxyTN(sourceTfsDetails)

                for x in queryGSuite.allTracks():
                    selectedTrackNames = (':'.join(x.trackName))
                    if i == selectedTrackNames:
                        tracks.append(Track(x.trackName, x.title))
                        trackTitles.append(x.trackName[-1])

                # queryGSuite = getGSuiteFromGalaxyTN(sourceTfsDetails)
                # tfTrackName = [x.trackName for x in queryGSuite.allTracks()] + [i]
                # tracks += [Track(x.trackName, x.title) for x in queryGSuite.allTracks()]
                # trackTitles += tfTrackName

        # print tfTrackName
        # print tracks
        # print trackTitles

        trackTitlesForStat = trackTitles

        trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(trackTitles)

        ##first statistic for Q2
        resultsForStatistics = OrderedDict()

        similarityFunc = [  #GSuiteStatUtils.T7_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP,
            GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        ]

        for similarityStatClassName in similarityFunc:
            regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
            analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                             binSpec,
                                                             genome=genome)

            mcfdrDepth = AnalysisDefHandler(
                REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]
            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDR$'] + ' -> GSuiteSimilarityToQueryTrackRankingsAndPValuesWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('assumptions',
                                      'PermutedSegsAndIntersegsTrack_')
            analysisSpec.addParameter(
                'rawStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName])
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]
            )  #needed for call of non randomized stat for assertion
            analysisSpec.addParameter('tail', 'more')
            analysisSpec.addParameter('trackTitles',
                                      trackTitles)  #that need to be string
            analysisSpec.addParameter('queryTracksNum', str(len(tracks)))

            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()

            if not similarityStatClassName in resultsForStatistics:
                resultsForStatistics[similarityStatClassName] = {}

            resultsForStatistics[similarityStatClassName] = results

        keyTitle = [
            #'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure)',
            'Ratio of observed to expected overlap (Forbes similarity measure)'
        ]

        # 'Normalized Forbes coefficient: ratio of observed to expected overlap normalized in relation to the reference GSuite',
        # 'Forbes coefficient: ratio of observed to expected overlap'

        keyTitle = [
            #GSuiteStatUtils.T7_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP,
            GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        ]

        resultDict = AllTfsOfRegions.countStatistics(similarityFunc, choices,
                                                     genome, tracks,
                                                     trackTitlesForStat)

        resultDictShow = AllTfsOfRegions.countStatisticResults(
            resultDict, keyTitle, trackTitlesForStat)

        #         print resultsForStatistics
        '''selectedTrackNames = []
        if sourceTfs == 'History (user-defined)':
            if selectedTfTracks.split(":")[1] == "gsuite":
                gSuite = getGSuiteFromGalaxyTN(selectedTfTracks)
                for track in gSuite.allTracks():
                    selectedTrackNames.append(track.trackName)
            else:
                galaxyTN = selectedTfTracks.split(':')
                gRegTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, galaxyTN)
                selectedTrackNames.append(gRegTrackName)
        else:'''

        tfNameList = []

        #Intersection between TF Tracks and selected region (Table 1):
        n = 0
        allTargetBins = []
        alltfNames = []
        table1 = []
        for i in selectedTfTracks:
            n = n + 1
            #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat"

            if sourceTfs == 'Hyperbrowser repository':
                tfTrackName = TfTrackNameMappings.getTfTrackNameMappings(
                    genome)[sourceTfsDetails] + [i]
            else:
                tfTrackName = i.split(':')
                tfTrackName.pop(0)
            #tfIntersection.expandReferenceTrack(upFlankSize, downFlankSize)
            tfIntersection = TrackIntersection(genome, genElementTrackName,
                                               tfTrackName, galaxyFn, str(n))

            regFileNamer = tfIntersection.getIntersectedRegionsStaticFileWithContent(
            )
            targetBins = tfIntersection.getIntersectedReferenceBins()

            #regSpec, targetBins = UserBinSelector.getRegsAndBinsSpec(choices)

            tfHits = [i] * len(targetBins)
            fixedTargetBins = [str(a).split(" ")[0] for a in targetBins]
            extendedTargetBins = [
                list(a) for a in zip(fixedTargetBins, tfHits)
            ]
            allTargetBins = allTargetBins + extendedTargetBins
            tfName = i
            alltfNames = alltfNames + [tfName]

            # Save output table:
            tfNameList.append(tfName)
            line = [tfName] + [len(targetBins)] + [
                regFileNamer.getLink('Download bed-file')
            ] + [
                regFileNamer.getLoadToHistoryLink('Send bed-file to History')
            ]
            table1 = table1 + [line]

        # Computing totals:
        fullCase = ','.join(alltfNames)
        firstColumn = [item[0] for item in allTargetBins]
        uniqueAllTargetBins = list(set(firstColumn))

        # Group TFs by bound region:
        d1 = defaultdict(list)
        for k, v in allTargetBins:
            d1[k].append(v)
        allTFTargetBins = dict((k, ','.join(v)) for k, v in d1.iteritems())

        allTFTargetList = []
        fullCaseTFTargetList = []
        for key, value in allTFTargetBins.iteritems():
            allTFTargetList = allTFTargetList + [[key, value]]
            if value == fullCase:
                fullCaseTFTargetList = fullCaseTFTargetList + [[key, value]]

        analysis3 = TrackIntersection.getFileFromTargetBins(
            allTFTargetList, galaxyFn, str(3))
        analysis4 = TrackIntersection.getFileFromTargetBins(
            fullCaseTFTargetList, galaxyFn, str(4))

        # Print output to table:
        title = 'TF targets and co-occupancy of ' + genElementTrackName[
            -1] + ' genomic regions'
        htmlCore = HtmlCore()

        pf = plotFunction(tableId='resultsTable')

        htmlCore.begin()
        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')

        htmlCore.line(pf.createButton(bText='Show/Hide more results'))

        # htmlCore.tableHeader(['Transcription Factor', 'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure) -- Similarity to genomic regions track', 'Normalized ratio of observed to expected overlap (normalized Forbes similarity measure) -- p-value','Ratio of observed to expected overlap (Forbes similarity measure) -- Similarity to genomic regions track', 'Ratio of observed to expected overlap (Forbes similarity measure) -- p-value', 'Number of TF-Target Track Regions', 'File of TF Target Regions', 'File of TF Target Regions', 'Number of TF-co-occupied Regions', 'File of TF co-occupied Regions', 'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'], sortable=True, tableId='resultsTable')

        #previous ordering
        # htmlCore.tableHeader(['Transcription Factor', 'Normalized Forbes index --overlap score',
        #                       'Normalized Forbes index --p-value',
        #                       'Forbes index --overlap score', 'Forbes index --p-value',
        #                       'Number of TF-Target Track Regions', 'File of TF Target Regions',
        #                       'File of TF Target Regions', 'Number of target track regions occupied by this TF',
        #                       'File of TF co-occupied Regions', 'File of TF co-occupied Regions',
        #                       'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'],
        #                      sortable=True, tableId='resultsTable')

        htmlCore.tableHeader(
            [
                'Transcription Factor',
                'Number of TF-Target Track Regions',
                'File of TF Track Regions',
                'Number of target track regions occupied by this TF',
                'File of TF Target Regions',
                'Forbes index --overlap score',
                'Forbes index --p-value',
                #'Normalized Forbes index --overlap score', 'Normalized Forbes index --p-value',
                'File of TF co-occupied Regions',
                'Rank of TF co-occupancy motifs'
            ],
            sortable=True,
            tableId='resultsTable')

        # Adding co-occupancy results to table:
        n = 1000
        genRegionNumElements = [
            int(x) for x in getTrackRelevantInfo.getNumberElements(
                genome, genElementTrackName)
        ]

        for key0, it0 in resultsForStatistics.iteritems():
            for el in tfNameList:
                if el not in it0:
                    resultsForStatistics[key0][el] = [None, None]

        resultsPlotDict = {}
        resultPlotCat = []
        resultsPlot = []

        resultsForStatisticsProper = {}
        for key0, it0 in resultsForStatistics.iteritems():
            if not key0 in resultsPlotDict:
                resultsPlotDict[key0] = {}
            resultsPlotPart = []
            for key1, it1 in it0.iteritems():
                resultsPlotPart.append(it1[0])
                if not key1 in resultsForStatisticsProper:
                    resultsForStatisticsProper[key1] = []
                if not key1 in resultsPlotDict[key0]:
                    resultsPlotDict[key0][key1] = None
                for el in it1:
                    resultsForStatisticsProper[key1].append(el)
                resultsPlotDict[key0][key1] = it1[0]

        resultPlotCat.append(tfNameList)
        resultPlotCat.append(tfNameList)

        #resultPlotCatPart = tfNameList

        #         print resultPlotCatPart

        for key0, it0 in resultsPlotDict.iteritems():
            resultsPlotPart = []
            for el in tfNameList:
                if el in it0:
                    resultsPlotPart.append(it0[el])
                else:
                    resultsPlotPart.append(None)
            resultsPlot.append(resultsPlotPart)

        for i in table1:
            thisCaseTFTargetList = []
            for key, value in allTFTargetList:
                if i[0] in value and ',' in value:
                    thisCaseTFTargetList = thisCaseTFTargetList + [[
                        key, value
                    ]]
            n = n + 1

            thisAnalysis = TrackIntersection.getFileFromTargetBins(
                thisCaseTFTargetList, galaxyFn, str(n))

            thisCaseCoCountsList = []
            thing = [x[1] for x in thisCaseTFTargetList]
            for k in list(set(thing)):
                thisCount = thing.count(k)
                thisCaseCoCountsList = thisCaseCoCountsList +  \
                                       [[k, thisCount, 100*float(thisCount)/float(sum(genRegionNumElements)), 100*float(thisCount)/float(len(thisCaseTFTargetList))]]
            thisCaseCoCountsList.sort(key=lambda x: x[2], reverse=True)
            n = n + 1

            thisCoCountsAnalysis = TrackIntersection.getOccupancySummaryFile(
                thisCaseCoCountsList, galaxyFn, str(n))

            thisLine = [len(thisCaseTFTargetList)] + \
            [thisAnalysis.getLink('Download file')] + [thisAnalysis.getLoadToHistoryLink('Send file to History')] + \
            [thisCoCountsAnalysis.getLink('Download file')] + [thisCoCountsAnalysis.getLoadToHistoryLink('Send file to History')]

            newLineI = []
            tfName = i[0]
            newLineI.append(tfName)

            for el in resultsForStatisticsProper[tfName]:
                newLineI.append(el)

            for elN in range(1, len(i)):
                newLineI.append(i[elN])

#             htmlCore.tableLine(i + thisLine)

# htmlCore.tableHeader(['Transcription Factor', 'Normalized Forbes index --overlap score',
#                       'Normalized Forbes index --p-value',
#                       'Forbes index --overlap score', 'Forbes index --p-value',
#                       'Number of TF-Target Track Regions', 'File of TF Target Regions',
#                       'File of TF Target Regions', 'Number of target track regions occupied by this TF',
#                       'File of TF co-occupied Regions', 'File of TF co-occupied Regions',
#                       'Rank of TF co-occupancy motifs', 'Rank of TF co-occupancy motifs'],
#                      sortable=True, tableId='resultsTable')

# htmlCore.tableHeader(['Transcription Factor', 'Number of TF-Target Track Regions', 'File of TF Track Regions',
#                      'Number of target track regions occupied by this TF', 'File of TF Target Regions',
#                      'Forbes index --overlap score', 'Forbes index --p-value',
#                      'Normalized Forbes index --overlap score', 'Normalized Forbes index --p-value',
#                      'File of TF co-occupied Regions', 'Rank of TF co-occupancy motifs'],
#                     sortable=True, tableId='resultsTable')

            tl = newLineI + thisLine
            # previous ordering tl - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
            # actual ordering - 0, 5, 7, 8, 7, 3, 4, 1, 2, 9, 11

            #ordering  = [0, 5, 7, 8, 10, 3, 4, 1, 2, 10, 12]
            ordering = [0, 3, 5, 6, 8, 1, 2, 8, 10]

            #1, 2, => delete

            eoList = []
            for eo in ordering:
                eoList.append(tl[eo])

            htmlCore.tableLine(eoList)

        totalCoOccupancyTargetList = []
        n = 2000
        for key, value in allTFTargetList:
            n = n + 1
            if ',' in value:
                totalCoOccupancyTargetList = totalCoOccupancyTargetList + [[
                    key, value
                ]]
        #newGalaxyFn = galaxyFn.split(".")[0] + str(n) + "." + "dat"
        totalCoOccupancyAnalysis = TrackIntersection.getFileFromTargetBins(
            totalCoOccupancyTargetList, galaxyFn, str(n))
        #line = ['Total reported regions'] + [len(allTargetBins)] + [''] + [''] + [''] + [''] + ['']

        #line = ['Full co-occupancy of ' + fullCase] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + ['-'] + [len(fullCaseTFTargetList)] + [analysis4.getLink('Download file')] + [analysis4.getLoadToHistoryLink('Send file to History')] + ['-'] + ['-']

        line = ['Full co-occupancy of ' + fullCase] + \
               ['-'] + \
               ['-'] + \
               [len(fullCaseTFTargetList)] + \
               ['-'] + \
               ['-'] + \
               ['-'] + \
               [analysis4.getLoadToHistoryLink('Send file to History')] + \
               ['-']

        htmlCore.tableLine(line)
        #line = ['Total unique regions'] + ['-'] + ['-'] + ['-'] + ['-']  + [len(allTFTargetList)] + [analysis3.getLink('Download bed-file')] + [analysis3.getLoadToHistoryLink('Send bed-file to History')] + [len(totalCoOccupancyTargetList)] + [totalCoOccupancyAnalysis.getLink('Download file')] + [totalCoOccupancyAnalysis.getLoadToHistoryLink('Send file to History')] + ['-'] + ['-']

        line = ['Total unique regions'] + \
               [len(allTFTargetList)] + \
               ['-'] + \
               [len(totalCoOccupancyTargetList)] + \
               [analysis3.getLoadToHistoryLink('Send bed-file to History')] + \
               ['-'] +\
               ['-'] + \
               [totalCoOccupancyAnalysis.getLoadToHistoryLink('Send file to History')] + \
               ['-']

        htmlCore.tableLine(line)

        htmlCore.tableFooter()
        htmlCore.divEnd()

        # htmlCore.line(pf.hideColumns(indexList=[2, 4]))
        #

        sumRes = 0
        for r in resultsPlot[0]:
            if r != None:
                sumRes += r

        if sumRes != 0:
            vg = visualizationGraphs()
            result = vg.drawColumnCharts(
                [resultsPlot[0]],
                height=300,
                categories=resultPlotCat,
                legend=False,
                addOptions='width: 90%; float:left; margin: 0 4%;',
                #titleText=['Overlap between TFs and genomic region using normalized Forbes', 'Overlap between TFs and genomic region using Forbes'],
                titleText=[
                    'Overlap between TFs and genomic region using Forbes'
                ],
                xAxisRotation=90,
                xAxisTitle='TF',
                yAxisTitle='value')

            htmlCore.line(result)

        for key0, it0 in resultDictShow.iteritems():
            htmlCore.divBegin('resultsDiv' + str(key0))
            htmlCore.header(key0)
            htmlCore.tableHeader(it0[0],
                                 sortable=True,
                                 tableId='resultsTable' + str(key0))

            for elN in range(1, len(it0)):
                htmlCore.tableLine(it0[elN])

            htmlCore.tableFooter()
            htmlCore.divEnd()

        htmlCore.hideToggle(styleClass='debug')

        htmlCore.end()
        print htmlCore
Exemplo n.º 9
0
    def execute(cls, choices, galaxyFn=None, username=''):
        cls._setDebugModeIfSelected(choices)

        targetGSuite = getGSuiteFromGalaxyTN(choices.gSuiteFirst)
        refGSuite = getGSuiteFromGalaxyTN(choices.gSuiteSecond)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)

        analysisDef = 'dummy -> RawOverlapStat'
        # analysisDef = 'dummy [withOverlaps=yes] -> RawOverlapAllowSingleTrackOverlapsStat'
        results = OrderedDict()

        for targetTrack in targetGSuite.allTracks():
            targetTrackName = targetTrack.title
            for refTrack in refGSuite.allTracks():
                refTrackName = refTrack.title
                if targetTrack.trackName == refTrack.trackName:
                    # print targetTrack.title
                    # print targetTrack.trackName
                    result = DetermineSuiteTracksCoincidingWithAnotherSuite.handleSameTrack(
                        targetTrack.trackName, regSpec, binSpec,
                        targetGSuite.genome, galaxyFn)
                else:
                    result = GalaxyInterface.runManual(
                        [targetTrack.trackName, refTrack.trackName],
                        analysisDef,
                        regSpec,
                        binSpec,
                        targetGSuite.genome,
                        galaxyFn,
                        printRunDescription=False,
                        printResults=False,
                        printProgress=False).getGlobalResult()
                if targetTrackName not in results:
                    results[targetTrackName] = OrderedDict()
                results[targetTrackName][refTrackName] = result

        stat = STAT_OVERLAP_COUNT_BPS
        statIndex = STAT_LIST_INDEX[stat]
        title = ''

        processedResults = []
        headerColumn = []
        for targetTrackName in targetGSuite.allTrackTitles():
            resultRowDict = processRawResults(results[targetTrackName])
            resultColumn = []
            headerColumn = []
            for refTrackName, statList in resultRowDict.iteritems():
                resultColumn.append(statList[statIndex])
                headerColumn.append(refTrackName)
            processedResults.append(resultColumn)

        outputTable = {}
        for elN in range(0, len(headerColumn)):
            outputTable[elN] = {}
            outputTable[elN]['id'] = headerColumn[elN]

        transposedProcessedResults = [list(x) for x in zip(*processedResults)]

        # second question sumSecondgSuite
        # first question numSecondgSuite
        # fifth question numSecondgSuitePercentage
        for i in range(0, len(transposedProcessedResults)):
            outputTable[i]['sumSecondgSuite'] = sum(
                transposedProcessedResults[i])
            if not 'numSecondgSuite' in outputTable[i]:
                outputTable[i]['numSecondgSuite'] = 0
            for j in range(0, len(transposedProcessedResults[i])):
                if transposedProcessedResults[i][j] >= 1:
                    outputTable[i]['numSecondgSuite'] += 1
                else:
                    outputTable[i]['numSecondgSuite'] += 0
            outputTable[i]['numSecondgSuitePercentage'] = float(
                outputTable[i]['numSecondgSuite']) / float(
                    targetGSuite.numTracks()) * 100

        from gold.statistic.CountSegmentStat import CountSegmentStat
        from gold.statistic.CountPointStat import CountPointStat
        from gold.description.TrackInfo import TrackInfo
        from gold.statistic.CountStat import CountStat

        # third question numPairBpSecondgSuite
        # fourth question numFreqBpSecondgSuite
        i = 0
        for refTrack in refGSuite.allTracks():
            formatName = TrackInfo(refTrack.genome,
                                   refTrack.trackName).trackFormatName
            analysisDef = CountStat
            analysisBins = GalaxyInterface._getUserBinSource(
                regSpec, binSpec, refTrack.genome)
            results = doAnalysis(AnalysisSpec(analysisDef), analysisBins,
                                 [PlainTrack(refTrack.trackName)])
            resultDict = results.getGlobalResult()
            if len(resultDict) == 0:
                outputTable[i]['numPairBpSecondgSuite'] = None
                outputTable[i]['numFreqBpSecondgSuite'] = None
                outputTable[i]['numFreqUniqueBpSecondgSuite'] = None
            else:
                outputTable[i]['numPairBpSecondgSuite'] = resultDict['Result']

                if outputTable[i]['numPairBpSecondgSuite'] != 0:
                    outputTable[i]['numFreqBpSecondgSuite'] = float(
                        outputTable[i]['sumSecondgSuite']) / float(
                            outputTable[i]['numPairBpSecondgSuite'])
                else:
                    outputTable[i]['numFreqBpSecondgSuite'] = None

                if outputTable[i]['sumSecondgSuite'] != 0:
                    outputTable[i]['numFreqUniqueBpSecondgSuite'] = float(
                        outputTable[i]['numPairBpSecondgSuite']) / float(
                            outputTable[i]['sumSecondgSuite'])
                else:
                    outputTable[i]['numFreqUniqueBpSecondgSuite'] = None

            i += 1

        # sortTable
        outputTableLine = []
        for key, item in outputTable.iteritems():
            line = [
                item['id'], item['numSecondgSuite'], item['sumSecondgSuite'],
                item['numPairBpSecondgSuite'], item['numFreqBpSecondgSuite'],
                item['numFreqUniqueBpSecondgSuite'],
                item['numSecondgSuitePercentage']
            ]
            outputTableLine.append(line)

        import operator
        outputTableLineSort = sorted(outputTableLine,
                                     key=operator.itemgetter(1),
                                     reverse=True)

        tableHeader = [
            'Region ID ', 'Number of cases with at least one event ',
            'Total number of events', 'Genome coverage (unique bp)',
            'Number of events per unique bp', 'Number of unique bp per event',
            'Percentage of cases with at least one event'
        ]
        htmlCore = HtmlCore()

        htmlCore.begin()

        htmlCore.line(
            "<b>Identification of genomic elements with high event recurrence</b> "
        )

        htmlCore.header(title)
        htmlCore.divBegin('resultsDiv')
        htmlCore.tableHeader(tableHeader,
                             sortable=True,
                             tableId='resultsTable')

        for line in outputTableLineSort:
            htmlCore.tableLine(line)

        plotRes = []
        plotXAxis = []
        for lineInx in range(1, len(outputTableLineSort[0])):
            plotResPart = []
            plotXAxisPart = []
            for lineInxO in range(0, len(outputTableLineSort)):
                # if outputTableLineSort[lineInxO][lineInx]!=0 and
                # if outputTableLineSort[lineInxO][lineInx]!=None:
                plotResPart.append(outputTableLineSort[lineInxO][lineInx])
                plotXAxisPart.append(outputTableLineSort[lineInxO][0])
            plotRes.append(plotResPart)
            plotXAxis.append(plotXAxisPart)

        htmlCore.tableFooter()
        htmlCore.divEnd()

        htmlCore.divBegin('plot', style='padding-top:20px;margin-top:20px;')

        vg = visualizationGraphs()
        res = vg.drawColumnCharts(
            plotRes,
            titleText=tableHeader[1:],
            categories=plotXAxis,
            height=500,
            xAxisRotation=270,
            xAxisTitle='Ragion ID',
            yAxisTitle='Number of cases with at least one event',
            marginTop=30,
            addTable=True,
            sortableAccordingToTable=True,
            legend=False)
        htmlCore.line(res)
        htmlCore.divEnd()

        htmlCore.hideToggle(styleClass='debug')
        htmlCore.end()

        print htmlCore
    def executeSelfFeature(cls, genome, tracks, track_names, clusterMethod,
                           extra_option, feature, distanceType, kmeans_alg,
                           galaxyFn, regSpec, binSpec):

        from proto.RSetup import r
        #regSpec, binSpec = 'bed', '/usit/invitro/data/galaxy/galaxy-dist-hg-dev/./database/files/017/dataset_17084.dat'
        silenceRWarnings()

        jobFile = open(galaxyFn, 'w')
        #         print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg,regSpec, binSpec]])), '<br><br>'
        batchRun = GalaxyRunSpecificFile(['batch_run_job.txt'], galaxyFn)
        print >> jobFile, '<h3>Results for the "similarity of positional distribution along the genome" way of clustering<h3/><br/><br/>'
        with open(batchRun.getDiskPath(ensurePath=True), 'w') as batchFile:
            print >> batchFile, '$clusterBySelfFeature', (genome, '$'.join([
                ':'.join(t) for t in tracks
            ]), ':'.join(track_names), clusterMethod, extra_option, feature,
                                                          distanceType,
                                                          kmeans_alg, regSpec,
                                                          binSpec)
        print >> jobFile, batchRun.getLink(
            'View batch script line for this analysis<br/>')
        #print>>jobFile, 'Batch script syntax for this analysis:<br>$clusterBySelfFeature', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec), '<br><br>'
        #print>>jobFile, 'signature of method clusterBySelfFeature:<br>', 'clusterBySelfFeature(genome, tracksStr, track_namesStr, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec):<br><br><br>'
        prettyTrackNames = [
            v[-1].replace('RoadMap_', '').replace('.H3K4me1', '')
            for v in tracks
        ]
        #prettyTrackNames = [prettyPrintTrackName(v, shortVersion=True) for v in tracks]
        f_matrix = cls.construct_feature_matrix(genome, tracks, feature,
                                                regSpec, binSpec)
        #print>>jobFile, 'dir f_matrix: ', dir(f_matrix), regSpec, binSpec
        userBinSource = GalaxyInterface._getUserBinSource(
            regSpec, binSpec, genome)
        binNames = [
            str(bin)
            for binIndex, bin in enumerate(sorted(list(userBinSource)))
        ]
        if len(binNames) != f_matrix.shape[1]:
            binNames = ['Microbin' + str(i) for i in range(f_matrix.shape[1])]
        r.assign('bin_names', binNames)
        r.assign('track_names', prettyTrackNames
                 )  #use as track names, will be shown in clustering figure
        r.assign('f_matrix', f_matrix)
        r.assign('distanceType', distanceType)
        r('row.names(f_matrix) <- track_names')
        r('colnames(f_matrix) <- bin_names')

        if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--":
            #print 'galaxyFn: ', galaxyFn
            figure = GalaxyRunSpecificFile(
                ['cluster_tracks_result_figure.pdf'], galaxyFn)
            figurepath = figure.getDiskPath(ensurePath=True)
            r('d <- dist(f_matrix, method=distanceType)')
            distTable = r('d')
            distMatrix = GalaxyRunSpecificFile(['distance_matrix_result.txt'],
                                               galaxyFn)
            distMatrixPath = distMatrix.getDiskPath(True)
            open(distMatrixPath, 'w').write(str(distTable))
            print >> jobFile, distMatrix.getLink(
                'View the distance matrix for this analysis <br>')
            #with open(distMatrixPath,'w') as distObj:
            #    #distTable = d_matrix.tolist()
            #    core = HtmlCore()
            #    core.tableHeader(['']+track_names,firstRow=True)
            #    rowSize = len(track_names)
            #    index=0
            #    while index<len(distTable):
            #        core.tableLine([track_names[index % rowSize]]+[str(v) for v in distTable[index:index+rowSize]])
            #    #for index, row in enumerate(distTable):
            #    #    core.tableLine([track_names[index]]+[str(v) for v in row])
            #    core.tableFooter()
            #    print>>distObj, str(core)
            #print>>jobFile, distMatrix.getLink('View the distance matrix for this analysis <br>')

            if True:  #f_matrix.shape[1] <= 100:
                r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'],
                                                       galaxyFn)
                #', '.join([str(v) for v in row])
                r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
                r('dput(f_matrix, f_matrix_fn)')
                #r_f_matrixFile.writeTextToFile(', '.join(cls.getFlattenedMatrix(f_matrix)) + '\n\nTrack names: '+', '.join(prettyTrackNames)+'\n\nNumber of tracks: '+str(len(prettyTrackNames))+'\n\nbins: +)
                #r_f_matrixFile.writeTextToFile()
                #r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
                print >> jobFile, r_f_matrixFile.getLink(
                    'Access the R-representation of the Feature_matrix (text-file)'
                ), '<br/>'

            cls._clusterAndPlotDendrogram(figurepath, extra_option, 'd',
                                          'f_matrix', prettyTrackNames)
            print >> jobFile, figure.getLink(
                'View the clustering tree (dendrogram) for this analysis<br>')

            if True:  #f_matrix.shape[1] <= 100:
                #heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
                #baseDir = os.path.dirname(heatmap.getDiskPath(True))

                resDict = Results([], [], '')
                resDict.setGlobalResult({
                    'result': {
                        'Matrix': f_matrix,
                        'Rows': np.array(track_names),
                        'Cols': np.array(binNames),
                        'Significance': None,
                        'RowClust': r('hr'),
                        'ColClust': None
                    }
                })
                header = 'View the resulting heatmap plot <br>'

                baseDir = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()
                heatPresenter = HeatmapFromNumpyPresenter(
                    resDict, baseDir, header, printDimensions=False)
                print >> jobFile, heatPresenter.getReference('result')

                #heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
                #heatmap_path = heatmap.getDiskPath(True)
                #r.pdf(heatmap_path)
                ##cm.colors(256)
                #r.library("gplots")
                #r('heatmap(f_matrix, col=redgreen(75), distfun=function(c) dist(c, method=distanceType), hclustfun=function(c) hclust(c, method=extra_option, members=NULL),Colv=NA, scale="none", xlab="", ylab="", cexRow=0.5, cexCol=0.5, margin=c(8,10))')#Features cluster tracks
                #r('dev.off()')
                ##print>>jobFile, r('dimnames(f_matrix)')
                #print>>jobFile, heatmap.getLink('View the resulting heatmap plot <br>')
            else:
                print >> jobFile, 'Heatmap not generated due to large size ', f_matrix.shape
        elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
            textFile = GalaxyRunSpecificFile(
                ['result_of_kmeans_clustering.txt'], galaxyFn)
            textFilePath = textFile.getDiskPath(True)
            extra_option = int(extra_option)
            r.assign('kmeans_alg', kmeans_alg)
            r.assign('extra_option', extra_option)

            r(
                'hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)'
            )  #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
            r('hr$height <- hr$height/max(hr$height)*10')
            kmeans_output = open(textFilePath, 'w')
            clusterSizes = r('hr$size')  #size of every cluster
            withinSS = r('hr$withinss')
            clusters = r('hr$cluster')
            for index1 in range(
                    extra_option
            ):  #extra_option actually the number of clusters
                #trackInCluster = [k for k,val in clusters.items() if val == index1]
                trackInCluster = [
                    k + 1 for k, val in enumerate(clusters)
                    if val == index1 + 1
                ]  #IS THIS CORRECT, I.E. SAME AS ABOVE??

                print >> kmeans_output, 'Cluster %i(%s objects) : ' % (
                    index1 + 1, str(clusterSizes[index1]))
                for name in trackInCluster:
                    print >> kmeans_output, name, '(This result may be a bit shaky afters some changes in rpy access)'

                print >> kmeans_output, 'Sum of square error for this cluster is : ' + str(
                    withinSS[index1]) + '\n'

            kmeans_output.close()
            print >> jobFile, textFile.getLink(
                'Detailed result of kmeans clustering <br>')

        #cls.print_data(f_matrix, jobFile)
        '''
Exemplo n.º 11
0
    def countStatistics(similarityFunc, choices, genome, tracks, trackTitles):

        trackList = tracks[1:]

        resultsForStatistics = OrderedDict()

        llDict = OrderedDict()
        trackT = trackTitles[1:]
        i = 0
        for tt1 in trackT:
            if not tt1 in llDict:
                llDict[tt1] = []
                resultsForStatistics[tt1] = {}
                for tt2 in range(i, len(trackT)):
                    llDict[tt1].append(trackT[tt2])
            i += 1
#
#         print 'llDict=' + str(llDict)
#         print 'trackT=' + str(trackT)
#         print 'trackList=' + str(trackList)

        for key0, it0 in llDict.iteritems():
            if len(it0) > 1:
                trackCollection = []
                for it1 in it0:

                    trackNumber = trackT.index(it1)
                    trackCollection.append(trackList[trackNumber])

                trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(it0)

                #                 print str(key0) + '- trackCollection: ' + str(trackCollection) + ' trackTitles: ' + str(trackTitles)

                for similarityStatClassName in similarityFunc:
                    regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
                    analysisBins = GalaxyInterface._getUserBinSource(
                        regSpec, binSpec, genome=genome)

                    mcfdrDepth = AnalysisDefHandler(
                        REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText(
                        ).values()[0][0]
                    analysisDefString = REPLACE_TEMPLATES[
                        '$MCFDR$'] + ' -> GSuiteSimilarityToQueryTrackRankingsAndPValuesWrapperStat'
                    analysisSpec = AnalysisDefHandler(analysisDefString)
                    analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
                    analysisSpec.addParameter(
                        'assumptions', 'PermutedSegsAndIntersegsTrack_')
                    analysisSpec.addParameter(
                        'rawStatistic',
                        GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[
                            similarityStatClassName])
                    analysisSpec.addParameter(
                        'pairwiseStatistic',
                        GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[
                            similarityStatClassName]
                    )  #needed for call of non randomized stat for assertion
                    analysisSpec.addParameter('tail', 'more')
                    analysisSpec.addParameter(
                        'trackTitles', trackTitles)  #that need to be string

                    #i added that one later

                    analysisSpec.addParameter('queryTracksNum',
                                              str(len(trackCollection)))
                    results = doAnalysis(analysisSpec, analysisBins,
                                         trackCollection).getGlobalResult()

                    if not similarityStatClassName in resultsForStatistics[
                            key0]:
                        resultsForStatistics[key0][
                            similarityStatClassName] = {}

                    resultsForStatistics[key0][
                        similarityStatClassName] = results

        return resultsForStatistics
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results
        page in Galaxy history. If getOutputFormat is anything else than HTML,
        the output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        import warnings
        #warnings.simplefilter('error')

        cls._setDebugModeIfSelected(choices)

        similarityStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP

        summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT

        pairwiseStatName = GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName]

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        statTxt = "Average"
        if(summaryFunc == "max"): statTxt = "Maximum"


        if choices.analysisName == cls.Q2:


            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]

            # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin
            #from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat
            #analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat)

            analysisDefString = REPLACE_TEMPLATES['$MCFDRv3$'] + ' -> CollectionBinnedHypothesisWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)

            analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat")
            # analysisSpec.addParameter('perBinStatistic', 'SummarizedStat')
            analysisSpec.addParameter('perBinStatistic', 'MultitrackSummarizedInteractionV2Stat')
            # analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat')
            analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat')
            analysisSpec.addParameter('summaryFunc', summaryFunc)
            # analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution')
            analysisSpec.addParameter('tail', 'right-tail')
            analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack')
            #analysisSpec.addParameter('maxSamples', 10)
            analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc)




            regSpec, binSpec = cls.getRegsAndBinsSpec(choices)

            analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                             binSpec,
                                                             choices.genome)

            results = doAnalysis(analysisSpec, analysisBins, tracks)
            results = results.getGlobalResult()
            resultsTxt = "The highest ranking bin based on the " + statTxt.lower() + " of the Forbes similarity measure for pairs of tracks within each bin had a score of <b>%.3f</b> with p-value <b>%.6f</b>" % (results["TSMC_GenericMaxBinValueStat"], results['P-value'])



        # Stat question 7
        core = HtmlCore()
        core.begin()
        analysisSpec = AnalysisSpec(MultitrackSummarizedInteractionWrapperStat)
        #analysisSpec.addParameter('pairwiseStatistic', 'ObservedVsExpectedStat')
        analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassName])
        analysisSpec.addParameter('summaryFunc', summaryFunc)
        analysisSpec.addParameter('multitrackSummaryFunc', summaryFunc)
        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        regSpec, binSpec = cls.getRegsAndBinsSpec(choices)

        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         choices.genome)
        results = doAnalysis(analysisSpec, analysisBins, tracks)
        #print '<br>results: ', results, '<br><br>'





        prettyResults = OrderedDict()
        for key, val in results.iteritems():
            if "Result" in val.keys():
                prettyResults[key] = val["Result"]
            else:
                prettyResults[key] = "No result"

        core.header(statTxt + " co-occurence between pairs of tracks within each bin")

        if choices.analysisName == cls.Q2:
            core.paragraph(resultsTxt)

        core.divBegin(divClass='resultsExplanation')
        core.paragraph('The following is a list of all bins and the <b>' + statTxt.lower() + '</b> co-occurrence of tracks within each bin.')
        core.divEnd()


        """
        core.paragraph('''
            Suite data is coinciding the most in bin %s
        ''' % ('test'))
        """

        visibleRows = 20
        makeTableExpandable = len(prettyResults) > visibleRows
        columnNames = ['Bin', 'Co-occurrence within the bin']
        if choices.analysisName == cls.Q1:
            shortQuestion = cls.Q1_SHORT
        else:
            shortQuestion = cls.Q2_SHORT

        addTableWithTabularAndGsuiteImportButtons(
            core, choices, galaxyFn, shortQuestion, tableDict=prettyResults,
            columnNames=columnNames, sortable=True, presorted=0,
            expandable=makeTableExpandable, visibleRows=visibleRows)

        core.divEnd()
        core.end()

        print str(core)
Exemplo n.º 13
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results
        page in Galaxy history. If getOutputFormat is anything else than HTML,
        the output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        cls._setDebugModeIfSelected(choices)

        # First compute pvalue by running the statistic through a wrapper stat that computes the max per bin
        """
        from quick.statistic.RandomizationManagerV3Stat import RandomizationManagerV3Stat
        from quick.statistic.CollectionBinnedHypothesisWrapperStat import CollectionBinnedHypothesisWrapperStat
        analysisSpec = AnalysisSpec(CollectionBinnedHypothesisWrapperStat)
        analysisSpec.addParameter("rawStatistic", "GenericMaxBinValueStat")
        analysisSpec.addParameter('perBinStatistic', 'SummarizedStat')
        analysisSpec.addParameter('mcSamplerClass', 'NaiveMCSamplingV2Stat')
        analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat')
        analysisSpec.addParameter('summaryFunc', choices.summaryFunc)
        analysisSpec.addParameter('evaluatorFunc','evaluatePvalueAndNullDistribution')
        analysisSpec.addParameter('tail', 'right-tail')
        analysisSpec.addParameter('assumptions', 'RandomGenomeLocationTrack')
        analysisSpec.addParameter('maxSamples', 10)

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        regSpec, binSpec = cls.getRegsAndBinsSpec(choices)

        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         choices.genome)
        results = doAnalysis(analysisSpec, analysisBins, tracks)

        print "<p>Max stat results:</p>"

        print results.getGlobalResult()
        """
        # Stat question 4
        summaryFunc = choices.summaryFunc if choices.summaryFunc else cls.SUMMARY_FUNC_DEFAULT
        statTxt = "Average"
        if (summaryFunc == "max"): statTxt = "Maximum"

        statDesc = 'number of <b>segments</b> per base'
        if choices.analysisName == cls.Q2:
            statDesc = 'number of <b>base pairs covered by segments</b>'

        core = HtmlCore()
        core.begin()
        core.header("Enrichment of GSuite tracks across regions")
        core.divBegin(divClass='resultsExplanation')
        core.paragraph(
            'The following is a list of all regions (bins) and the <b>' +
            statTxt.lower() + '</b> ' + statDesc +
            ' across the tracks within each region.')
        core.divEnd()

        if choices.analysisName == cls.Q3:

            # Compute p-value per bin
            analysisSpec = AnalysisSpec(GSuiteBinEnrichmentPValWrapperStat)
            analysisSpec.addParameter('rawStatistic', 'BinSizeStat')
            #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat')
            #analysisSpec.addParameter('pairwiseStatistic', 'ProportionElementCountStat')
            #analysisSpec.addParameter('summaryFunc', summaryFunc)
            gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
            tracks = [Track(x.trackName) for x in gsuite.allTracks()]
            regSpec, binSpec = cls.getRegsAndBinsSpec(choices)
            from quick.statistic.GenericRelativeToGlobalStat import GenericRelativeToGlobalStatUnsplittable
            #analysisSpec.addParameter("globalSource", GenericRelativeToGlobalStatUnsplittable.getGlobalSource('test', choices.genome, False))
            analysisSpec.addParameter("globalSource", 'userbins')
            analysisBins = GalaxyInterface._getUserBinSource(
                regSpec, binSpec, choices.genome)
            results_pval = doAnalysis(analysisSpec, analysisBins, tracks)

        #print results_pval

        analysisSpec = AnalysisSpec(SummarizedWrapperStat)
        analysisSpec.addParameter('rawStatistic', 'SummarizedWrapperStat')

        countStat = 'ProportionElementCountStat'
        if choices.analysisName == cls.Q2:
            countStat = 'ProportionCountStat'

        # analysisSpec.addParameter('pairwiseStatistic', 'ProportionCountStat')
        analysisSpec.addParameter('pairwiseStatistic', countStat)
        analysisSpec.addParameter('summaryFunc', summaryFunc)
        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        tracks = [Track(x.trackName) for x in gsuite.allTracks()]

        regSpec, binSpec = cls.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(
            regSpec, binSpec, choices.genome)
        results = doAnalysis(analysisSpec, analysisBins, tracks)

        prettyResults = {}
        #print results

        for key, val in results.iteritems():
            if "Result" in val.keys():

                if choices.analysisName == cls.Q3:
                    prettyResults[key] = (val["Result"],
                                          results_pval[key]["Result"])
                else:
                    prettyResults[key] = (val["Result"])
            else:
                prettyResults[key] = "No result"

        topTrackTitle = results.keys()[0]
        """
        core.paragraph('''
            Suite data is coinciding the most in bin %s
        ''' % ('test'))
        """

        columnNames = ['Bin', 'Representation within the bin']
        if choices.analysisName == cls.Q3:
            columnNames.append('p-value')

        core.divBegin()
        if choices.analysisName == cls.Q1:
            shortQuestion = cls.Q1_SHORT
        elif choices.analysisName == cls.Q2:
            shortQuestion = cls.Q2_SHORT
        else:  # Q3
            shortQuestion = cls.Q3_SHORT

        visibleRows = 20
        makeTableExpandable = len(prettyResults) > visibleRows

        addTableWithTabularAndGsuiteImportButtons(
            core,
            choices,
            galaxyFn,
            shortQuestion,
            tableDict=prettyResults,
            columnNames=columnNames,
            sortable=True,
            presorted=0,
            expandable=makeTableExpandable)

        core.divEnd()
        core.end()

        print str(core)
Exemplo n.º 14
0
    def execute(cls, choices, galaxyFn=None, username=''):
        """
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        :param choices:  Dict holding all current selections
        :param galaxyFn:
        :param username:
        """

        cls._setDebugModeIfSelected(choices)
        genome = choices.genome
        queryTrackNameAsList = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.queryTrack,
                                                                                     printErrors=False,
                                                                                     printProgress=False)
        if choices.intensityTrack:
            intensityTrackNameAsList = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices.intensityTrack,
                                                                                     printErrors=False,
                                                                                     printProgress=False)
        else:
            intensityTrackNameAsList = None
        analysisQuestion = choices.analysisQName
        similarityStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average'
        reverse = 'Yes' if choices.reversed else 'No'
        if analysisQuestion in [cls.Q2, cls.Q3]:
            randStrat = 'PermutedSegsAndIntersegsTrack_' if choices.isBasic else GSuiteStatUtils.PAIRWISE_RAND_CLS_MAPPING[choices.randStrat]

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome)
        queryTrack = Track(queryTrackNameAsList)
        tracks = [queryTrack] + [Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks()]
        queryTrackTitle = prettyPrintTrackName(queryTrack.trackName).replace('/', '_')
        trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(queryTrackTitle)] + [quote(x.title, safe='') for x in gsuite.allTracks()])
        additionalResultsDict = OrderedDict()
        additionalAttributesDict = OrderedDict()
        if analysisQuestion in [cls.Q1, cls.Q2]:
            additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict(choices.additionalAttributes, gsuite)
            # additional analysis
            stats = [SingleValueOverlapStat, CountStat,
                     CountElementStat]  # + [CountSegmentsOverlappingWithT2Stat] #takes long time
            additionalResultsDict = runMultipleSingleValStatsOnTracks(gsuite, stats, analysisBins,
                                                                      queryTrack=queryTrack)

        core = HtmlCore()
        if analysisQuestion == cls.Q1:
            analysisSpec = cls.prepareQ1(reverse, similarityStatClassName, trackTitles)

            results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()

            gsPerTrackResultsModel = GSuitePerTrackResultModel(results, ['Similarity to query track'],
                                                               additionalResultsDict=additionalResultsDict,
                                                               additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                gsPerTrackResults = gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                gsPerTrackResults = gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core = cls.generateQ1output(additionalResultsDict, analysisQuestion, choices, galaxyFn, gsPerTrackResults,
                                        queryTrackTitle, gsuite, results, similarityStatClassName)
        elif analysisQuestion == cls.Q2:
            analysisSpec = cls.prepareQ2(choices, similarityStatClassName, trackTitles, randStrat,
                                         intensityTrackNameAsList)

            results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()

            core = cls.generateQ2Output(additionalAttributesDict, additionalResultsDict,
                                        analysisQuestion, choices, galaxyFn, queryTrackTitle,
                                        gsuite, results, similarityStatClassName)
        else:  # Q3
            analysisSpec = cls.prepareQ3(choices, similarityStatClassName, summaryFunc, randStrat)
            results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()
            core = cls.generateQ3output(analysisQuestion, queryTrackTitle, results, similarityStatClassName)

        print str(core)
Exemplo n.º 15
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        DebugMixin._setDebugModeIfSelected(choices)
        genome = choices.genome
        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        #         fullCategory = AnalysisManager.combineMainAndSubCategories(choices.analysisCategory, 'Basic')
        fullCategory = AnalysisManager.combineMainAndSubCategories(
            'Descriptive statistics', 'Basic')
        tracks = list(gSuite.allTracks())
        analysisName = choices.analysis
        # selectedAnalysis = GSuiteSingleValueAnalysisPerTrackTool \
        #     ._resolveAnalysisFromName(gSuite.genome, fullCategory, tracks[0].trackName, analysisName)

        selectedAnalysis = cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING[
            choices.analysis]

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)
        # paramName, paramValues = selectedAnalysis.getFirstOptionKeyAndValues()
        # if paramName and paramValues:
        #     if len(paramValues) == 1:
        #         selectedAnalysis.addParameter(paramName, paramValues[0])
        #     else:
        #         selectedAnalysis.addParameter(paramName, choices.paramOne)

        tableDict = OrderedDict()

        for track in tracks:
            tableDict[track.title] = OrderedDict()
            result = doAnalysis(selectedAnalysis, analysisBins, [track])
            resultDict = result.getGlobalResult()
            if 'Result' in resultDict:
                track.setAttribute(analysisName.lower(),
                                   str(resultDict['Result']))
                tableDict[
                    track.title][analysisName] = strWithNatLangFormatting(
                        resultDict['Result'])
            else:
                for attrName, attrVal in resultDict.iteritems():
                    attrNameExtended = analysisName + ':' + attrName
                    track.setAttribute(attrNameExtended.lower(), str(attrVal))
                    tableDict[track.title][
                        attrNameExtended] = strWithNatLangFormatting(attrVal)
                    # assert isinstance(resultDict['Result'], (int, basestring, float)), type(resultDict['Result'])

        core = HtmlCore()
        core.begin()
        core.header('Results: ' + analysisName)

        def _produceTable(core, tableDict=None, tableId=None):
            return core.tableFromDictOfDicts(tableDict,
                                             firstColName='Track title',
                                             tableId=tableId,
                                             expandable=True,
                                             visibleRows=20,
                                             presorted=0)

        tableId = 'results_table'
        tableFile = GalaxyRunSpecificFile([tableId, 'table.tsv'], galaxyFn)
        tabularHistElementName = 'Raw results: ' + analysisName

        gsuiteFile = GalaxyRunSpecificFile(
            [tableId, 'input_with_results.gsuite'], galaxyFn)
        GSuiteComposer.composeToFile(gSuite, gsuiteFile.getDiskPath())
        gsuiteHistElementName = \
            getGSuiteHistoryOutputName('result', ', ' + analysisName, choices.gsuite)

        core.tableWithImportButtons(
            tabularFile=True,
            tabularFn=tableFile.getDiskPath(),
            tabularHistElementName=tabularHistElementName,
            gsuiteFile=True,
            gsuiteFn=gsuiteFile.getDiskPath(),
            gsuiteHistElementName=gsuiteHistElementName,
            produceTableCallbackFunc=_produceTable,
            tableDict=tableDict,
            tableId=tableId)
        core.end()
        print core
Exemplo n.º 16
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        import numpy
        numpy.seterr(all='raise')
        cls._setDebugModeIfSelected(choices)
        # DebugUtil.insertBreakPoint(username=username, currentUser='******')
        genome = choices.genome
        analysisQuestion = choices.analysisName
        similaryStatClassName = choices.similarityFunc if choices.similarityFunc else GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP
        summaryFunc = choices.summaryFunc if choices.summaryFunc else 'average'
        reverse = 'Yes' if choices.reversed else 'No'

        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)
        tracks = [
            Track(x.trackName, trackTitle=x.title) for x in gsuite.allTracks()
        ]
        trackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in gsuite.allTracks()])

        additionalResultsDict = OrderedDict()
        additionalAttributesDict = OrderedDict()
        if analysisQuestion in [cls.Q1, cls.Q2, cls.Q3]:
            additionalAttributesDict = cls.getSelectedAttributesForEachTrackDict(
                choices.additionalAttributes, gsuite)
            #additional analysis
            stats = [CountStat, CountElementStat]
            additionalResultsDict = runMultipleSingleValStatsOnTracks(
                gsuite, stats, analysisBins, queryTrack=None)

        if analysisQuestion == cls.Q1:
            analysisSpec = AnalysisSpec(
                GSuiteRepresentativenessOfTracksRankingsWrapperStat)
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('reverse', reverse)
            analysisSpec.addParameter('ascending', 'No')
            analysisSpec.addParameter('trackTitles', trackTitles)
            analysisSpec.addParameter('queryTracksNum', len(tracks))
            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()

            gsPerTrackResultsModel = GSuitePerTrackResultModel(
                results,
                ['Similarity to rest of tracks in suite (%s)' % summaryFunc],
                additionalResultsDict=additionalResultsDict,
                additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core = HtmlCore()
            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            topTrackTitle = results.keys()[0]
            core.paragraph('''
                The track "%s" is the most representative track of the GSuite with %s %s similarity to the rest of the tracks
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle, results[topTrackTitle], summaryFunc,
                   similaryStatClassName))

            addTableWithTabularAndGsuiteImportButtons(
                core,
                choices,
                galaxyFn,
                cls.Q1_SHORT,
                decoratedResultsDict,
                columnTitles,
                gsuite=gsuite,
                results=results,
                gsuiteAppendAttrs=['similarity_score'],
                sortable=True)

            # plot
            columnInd = 0
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnInd = 1
            res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
                results,
                additionalResultsDict,
                'Similarity to rest of tracks in suite (%s)' % summaryFunc,
                columnInd=columnInd)
            core.line(res)
            core.divEnd()
            core.divEnd()
            core.end()

        # elif analysisQuestion == cls.Q2:
        #     analysisSpec = AnalysisSpec(GSuiteRepresentativenessOfTracksRankingsWrapperStat)
        #     analysisSpec.addParameter('pairwiseStatistic', GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
        #     analysisSpec.addParameter('summaryFunc', GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
        #     analysisSpec.addParameter('reverse', reverse)
        #     analysisSpec.addParameter('ascending', 'Yes')
        #     analysisSpec.addParameter('trackTitles', trackTitles)
        #     results = doAnalysis(analysisSpec, analysisBins, tracks).getGlobalResult()
        #
        #     gsPerTrackResultsModel = GSuitePerTrackResultModel(
        #         results, ['Similarity to rest of tracks in suite (%s)' % summaryFunc],
        #         additionalResultsDict=additionalResultsDict,
        #         additionalAttributesDict=additionalAttributesDict)
        #     if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
        #         columnTitles, decoratedResultsDict = \
        #             gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
        #     else:
        #         columnTitles, decoratedResultsDict = \
        #             gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()
        #
        #     core = HtmlCore()
        #     core.begin()
        #     core.divBegin(divId='results-page')
        #     core.divBegin(divClass='results-section')
        #     core.header(analysisQuestion)
        #     topTrackTitle = results.keys()[0]
        #     core.paragraph('''
        #         The track "%s" is the most atypical track of the GSuite with %s %s similarity to the rest of the tracks
        #         as measured by the "%s" track similarity measure.
        #     ''' % (topTrackTitle, strWithNatLangFormatting(results[topTrackTitle]), summaryFunc, similaryStatClassName))
        #     # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')'], sortable=False)
        #
        #     from quick.util import CommonFunctions
        #     rawDataURIList = CommonFunctions.getHyperlinksForRawTableData(
        #         dataDict=decoratedResultsDict, colNames=columnTitles,
        #         tableId="resultsTable", galaxyFn=galaxyFn)
        #     core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True,
        #                              tableId='resultsTable', addInstruction=True,
        #                              addRawDataSelectBox=True, rawDataURIList=rawDataURIList)
        #     # core.tableFromDictionary(decoratedResultsDict, columnNames=columnTitles, sortable=True, tableId='resultsTable')
        #
        #     columnInd = 0
        #     if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
        #         columnInd = 1
        #     res = GSuiteTracksCoincidingWithQueryTrackTool.drawPlot(
        #         results, additionalResultsDict,
        #         'Similarity to rest of tracks in suite (%s)' % summaryFunc,
        #         columnInd=columnInd)
        #     core.line(res)
        #     core.divEnd()
        #     core.divEnd()
        #     core.end()
        #
        #     if choices.addResults == 'Yes':
        #         GSuiteStatUtils.addResultsToInputGSuite(
        #             gsuite, results, ['Similarity_score'],
        #             cls.extraGalaxyFn[GSUITE_EXPANDED_WITH_RESULT_COLUMNS_FILENAME])
        elif analysisQuestion == cls.Q3:

            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
            AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]

            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDRv3$'] + ' -> GSuiteRepresentativenessOfTracksRankingsAndPValuesWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('assumptions',
                                      'PermutedSegsAndIntersegsTrack')
            analysisSpec.addParameter(
                'rawStatistic',
                SummarizedInteractionWithOtherTracksV2Stat.__name__)
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('tail', 'right-tail')
            analysisSpec.addParameter('trackTitles', trackTitles)
            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()
            core = HtmlCore()

            gsPerTrackResultsModel = GSuitePerTrackResultModel(
                results, [
                    'Similarity to rest of tracks in suite (%s)' % summaryFunc,
                    'P-value'
                ],
                additionalResultsDict=additionalResultsDict,
                additionalAttributesDict=additionalAttributesDict)
            if choices.leadAttribute and choices.leadAttribute != GSuiteConstants.TITLE_COL:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict(choices.leadAttribute)
            else:
                columnTitles, decoratedResultsDict = \
                    gsPerTrackResultsModel.generateColumnTitlesAndResultsDict()

            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            topTrackTitle = results.keys()[0]
            core.paragraph('''
                The track "%s" has the lowest P-value of %s corresponding to %s %s similarity to the rest of the tracks
                as measured by "%s" track similarity measure.
            ''' % (topTrackTitle,
                   strWithNatLangFormatting(results[topTrackTitle][1]),
                   strWithNatLangFormatting(results[topTrackTitle][0]),
                   summaryFunc, similaryStatClassName))
            # core.tableFromDictionary(results, columnNames=['Track title', 'Similarity to rest of tracks in suite (' + summaryFunc+')', 'P-value'], sortable=False)

            addTableWithTabularAndGsuiteImportButtons(
                core,
                choices,
                galaxyFn,
                cls.Q3_SHORT,
                decoratedResultsDict,
                columnTitles,
                gsuite=gsuite,
                results=results,
                gsuiteAppendAttrs=['similarity_score', 'p_value'],
                sortable=True)

            core.divEnd()
            core.divEnd()
            core.end()
        else:  # Q4
            mcfdrDepth = choices.mcfdrDepth if choices.mcfdrDepth else \
                AnalysisDefHandler(REPLACE_TEMPLATES['$MCFDR$']).getOptionsAsText().values()[0][0]
            analysisDefString = REPLACE_TEMPLATES[
                '$MCFDRv3$'] + ' -> CollectionSimilarityHypothesisWrapperStat'
            analysisSpec = AnalysisDefHandler(analysisDefString)
            analysisSpec.setChoice('MCFDR sampling depth', mcfdrDepth)
            analysisSpec.addParameter('assumptions',
                                      'PermutedSegsAndIntersegsTrack')
            analysisSpec.addParameter('rawStatistic',
                                      'MultitrackSummarizedInteractionV2Stat')
            analysisSpec.addParameter(
                'pairwiseStatistic', GSuiteStatUtils.
                PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similaryStatClassName])
            analysisSpec.addParameter(
                'summaryFunc',
                GSuiteStatUtils.SUMMARY_FUNCTIONS_MAPPER[summaryFunc])
            analysisSpec.addParameter('multitrackSummaryFunc',
                                      'avg')  # should it be a choice?
            analysisSpec.addParameter('tail', 'right-tail')
            results = doAnalysis(analysisSpec, analysisBins,
                                 tracks).getGlobalResult()
            pval = results['P-value']
            observed = results['TSMC_MultitrackSummarizedInteractionV2Stat']
            significanceLevel = 'strong' if pval < 0.01 else (
                'weak' if pval < 0.05 else 'no')
            core = HtmlCore()
            core.begin()
            core.divBegin(divId='results-page')
            core.divBegin(divClass='results-section')
            core.header(analysisQuestion)
            core.paragraph('''
                The tracks in the suite show %s significance in their collective similarity
                (average similarity of a track to the rest) of %s
                and corresponding p-value of %s,
                as measured by "%s" track similarity measure.
            ''' % (significanceLevel, strWithNatLangFormatting(observed),
                   strWithNatLangFormatting(pval), similaryStatClassName))
            core.divEnd()
            core.divEnd()
            core.end()

        print str(core)
Exemplo n.º 17
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        cls._setDebugModeIfSelected(choices)
        genome = choices.genome
        queryGSuite = getGSuiteFromGalaxyTN(choices.queryGSuite)
        refGSuite = getGSuiteFromGalaxyTN(choices.refGSuite)
        if choices.similarityFunc:
            similarityStatClassNameKey = choices.similarityFunc
        else:
            similarityStatClassNameKey = GSuiteStatUtils.T5_RATIO_OF_OBSERVED_TO_EXPECTED_OVERLAP

        isPointsVsSegments, pointsGSuite, segGSuite = cls.isPointsVsSegmentsAnalysis(queryGSuite, refGSuite)

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome=genome)

        queryTrackList = [Track(x.trackName, x.title) for x in queryGSuite.allTracks()]
        refTrackList = [Track(x.trackName, x.title) for x in refGSuite.allTracks()]

        queryTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in queryGSuite.allTracks()])
        refTrackTitles = CommonConstants.TRACK_TITLES_SEPARATOR.join(
            [quote(x.title, safe='') for x in refGSuite.allTracks()])

        analysisSpec = AnalysisSpec(GSuiteVsGSuiteWrapperStat)
        analysisSpec.addParameter('queryTracksNum', str(len(queryTrackList)))
        analysisSpec.addParameter('refTracksNum', str(len(refTrackList)))
        analysisSpec.addParameter('queryTrackTitleList', queryTrackTitles)
        analysisSpec.addParameter('refTrackTitleList', refTrackTitles)
        analysisSpec.addParameter('similarityStatClassName',
                                  GSuiteStatUtils.PAIRWISE_STAT_LABEL_TO_CLASS_MAPPING[similarityStatClassNameKey])
        if choices.removeZeroRow:
            analysisSpec.addParameter('removeZeroRow', choices.removeZeroRow)
        if choices.removeZeroCol:
            analysisSpec.addParameter('removeZeroColumn', choices.removeZeroCol)
        resultsObj = doAnalysis(analysisSpec, analysisBins, queryTrackList + refTrackList)
        results = resultsObj.getGlobalResult()

        # baseDir = GalaxyRunSpecificFile([RAW_OVERLAP_TABLE_RESULT_KEY], galaxyFn).getDiskPath()
        # rawOverlapHeatmapPresenter = HeatmapFromDictOfDictsPresenter(resultsObj, baseDir,
        #                                                              'Overlapping base-pair of tracks from the two suites',
        #                                                              printDimensions=False)

        rawOverlapTableData = results[RAW_OVERLAP_TABLE_RESULT_KEY]
        maxRawOverlap, maxROt1, maxROt2 = rawOverlapTableData.getMaxElement()
        similarityScoreTableData = results[SIMILARITY_SCORE_TABLE_RESULT_KEY]
        maxSimScore, maxSSt1, maxSSt2 = similarityScoreTableData.getMaxElement()

        baseDir = GalaxyRunSpecificFile([], galaxyFn=galaxyFn).getDiskPath()
        heatmapPresenter = HeatmapFromTableDataPresenter(resultsObj, baseDir=baseDir,
                                            header='Overlapping base-pairs between the tracks of the two suites',
                                            printDimensions=False)
        tablePresenter = MatrixGlobalValueFromTableDataPresenter(resultsObj, baseDir=baseDir,
                                            header='Table of overlapping base-pairs between the tracks of the two suites')

        core = HtmlCore()
        core.begin()
        core.divBegin(divId='results-page')
        core.divBegin(divId='svs-res-main-div', divClass='svs-res-main')    
        core.divBegin(divId='raw-overlap-div', divClass='results-section')
        core.divBegin(divId='raw-overlap-table', divClass='svs-table-div')
        core.header('Base-pair overlaps between the tracks of the two GSuites')
        core.paragraph("""From the tracks in the two GSuites the highest base-pair overlap <b>(%s bps)</b>
        is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxRawOverlap, maxROt1, maxROt2))
        
        core.divBegin(divId='raw-table-result', divClass='result-div')
        core.divBegin(divId='raw-table-result', divClass='result-div-left')
        core.line('''Follow the links to view the results in an HTML table
        or raw tabular form:''')
        core.divEnd()
        core.divBegin(divId='raw-table-result', divClass='result-div-right')
        core.line(tablePresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY))
        core.divEnd()#rawoverlap table
        core.divEnd()
        core.divEnd()
        core.divBegin(divId='raw-overlap-heatmap', divClass='svs-heatmap-div')
        try:
            core.header('Heatmap of base-pair overlaps')
            core.divBegin(divId='raw-table-result', divClass='result-div-heatmap')
            core.divBegin(divId='raw-table-result', divClass='result-div-left')
            core.line('''Follow the links to view the heatmap in the desired format:''')
            core.divEnd()
            core.divBegin(divId='raw-table-result', divClass='result-div-right')
            core.line(heatmapPresenter.getReference(RAW_OVERLAP_TABLE_RESULT_KEY))
            core.divEnd()
            core.divEnd()
        except:
            core.line('Heatmap for the base-pair overlaps could not be created.')
            core.divEnd()
            core.divEnd()
        core.divEnd()#rawoverlap heatmap
        core.divEnd()#rawoverlap
        

        core.divBegin(divId='sim-score-div', divClass='results-section')
        core.divBegin(divId='sim-score-table', divClass='svs-table-div')
        core.header('Similarity score between the tracks of the two GSuites measured by %s' % choices.similarityFunc)
        core.paragraph("""From the tracks in the two GSuites the highest similarity score <b>(%s)</b>
        is observed for the pair of <b>'%s'</b> and <b>'%s'</b>.""" % (maxSimScore, maxSSt1, maxSSt2))
        core.divBegin(divId='raw-table-result', divClass='result-div')
        core.divBegin(divId='raw-table-result', divClass='result-div-left')
        core.line("""Follow the links to view the results in an HTML table or raw tabular form:""")
        core.divEnd()
        core.divBegin(divId='raw-table-result', divClass='result-div-right')
        core.line(tablePresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY))
        core.divEnd()
        core.divEnd()
        core.divEnd()#simscore table
        core.divBegin(divId='sim-score-heatmap', divClass='svs-heatmap-div')
        try:
            core.header('Heatmap of similarity scores')
            core.divBegin(divId='raw-table-result', divClass='result-div-heatmap')
            core.divBegin(divId='raw-table-result', divClass='result-div-left')
            core.line('''Follow the links to view the heatmap in the desired format:''')
            core.divEnd()
            core.divBegin(divId='raw-table-result', divClass='result-div-right')
            core.line(heatmapPresenter.getReference(SIMILARITY_SCORE_TABLE_RESULT_KEY))
            core.divEnd()
            core.divEnd()
        except:
            core.line('Heatmap for the similarity score could not be created.')
            core.divEnd()
            core.divEnd()
        core.divEnd()#simscore heatmap
        core.divEnd()#simscore
        core.divEnd()#results
        # core.paragraph(
        #     '''Table displaying the number of base-pairs overlapping between the tracks in the two suites:''')
        # core.tableFromDictOfDicts(rawOverlapTableData, firstColName='Track title')
        # # core.paragraph(rawOverlapHeatmapPresenter.getReference(resDictKey=RAW_OVERLAP_TABLE_RESULT_KEY))
        # core.paragraph(
        #     '''Table displaying the similarity score for the tracks in the two suites as measured by %s:''' % similarityStatClassNameKey)
        # core.tableFromDictOfDicts(similarityScoreTableData, firstColName='Track title')
        #
        core.divEnd()
        core.end()

        print str(core)
    def run(self):
        assert self._referenceTrackFn is not None

        if (isinstance(self._referenceTrackFn, basestring)):
            regSpec, binSpec = 'file', self._referenceTrackFn
        elif (type(self._referenceTrackFn) == list):
            regSpec, binSpec = 'track', ':'.join(self._referenceTrackFn)

        trackName1 = self._queryTrackName
        trackName2 = None

        from gold.description.TrackInfo import TrackInfo

        formatName = TrackInfo(self._genome, trackName1).trackFormatName
        #        formatConv = ''
        #        if 'segments' in formatName:
        #            formatConv = '[tf1:=SegmentToStartPointFormatConverter:]'

        #        analysisDef = formatConv + '-> CountPointStat'

        from gold.statistic.CountStat import CountStat
        #analysisDef = '-> CountSegmentStat' if 'segments' in formatName else '-> CountPointStat'
        analysisDef = CountStat

        # print '_referenceTrackFn' + str(self._referenceTrackFn)
        # print '_queryTrackName' + str(self._queryTrackName)
        #
        # print 'trackName1' + str(trackName1)
        # print 'trackName2' + str(trackName2)

        #analysisDef = CountStat

        #print '<div class="debug">'
        #trackName1, trackName2, analysisDef = GalaxyInterface._cleanUpAnalysisDef(trackName1, trackName2, analysisDef)
        #trackName1, trackName2 = GalaxyInterface._cleanUpTracks([trackName1, trackName2], genome, realPreProc=True)
        #
        #userBinSource, fullRunArgs = GalaxyInterface._prepareRun(trackName1, trackName2, analysisDef, regSpec, binSpec, self._genome)
        #res = AnalysisDefJob(analysisDef, trackName1, trackName2, userBinSource, **fullRunArgs).run()

        #if it is not a gSuite
        #res = GalaxyInterface.runManual([trackName1, trackName2], analysisDef, regSpec, binSpec, self._genome, printResults=False, printHtmlWarningMsgs=False)

        #if gSuite
        from gold.application.HBAPI import PlainTrack
        from gold.application.HBAPI import doAnalysis
        from gold.description.AnalysisDefHandler import AnalysisSpec

        analysisBins = GalaxyInterface._getUserBinSource(
            regSpec, binSpec, self._genome)
        res = doAnalysis(AnalysisSpec(analysisDef), analysisBins,
                         [PlainTrack(self._queryTrackName)])
        #print 'ccc'
        #resultDict = res.getGlobalResult()

        resDictKeys = res.getResDictKeys()

        if len(resDictKeys) == 1:
            #assert len(resDictKeys)==1, resDictKeys
            resDictKey = resDictKeys[0]
            targetBins = [
                bin for bin in res.keys() if res[bin][resDictKey] > 0
            ]
            self._result = res
            self._intersectedReferenceBins = targetBins