def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands,
                                     ids, edges, weights, extras, region):
        referenceTV = PlainTrack(self._trackNameIntensity).getTrackView(
            region
        )  #self._trackNameIntensity based on naming convenience wrt. inheritance
        if len(referenceTV.valsAsNumpyArray()) == 0:
            raise InvalidRunSpecException('Error: No reference data available for sampling randomized locations in region' + \
                                          str(region) + \
                                          '. Please check that the reference track was created with the same main track that is being randomized in this analysis.')

        if referenceTV.trackFormat.isDense():
            raise InvalidRunSpecException(
                'Error: Cannot sample by distance to reference if reference is a dense track'
            )
        else:
            return self._createRandomizedNumpyArraysFromDistanceToReference(
                binLen, starts, ends, vals, strands, ids, edges, weights,
                extras, referenceTV)
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges,
                                     weights, extras, region):
        universeTV = PlainTrack(self._trackNameUniverse).getTrackView(region)

        if universeTV.trackFormat.isDense():
            raise InvalidRunSpecException('Error: Universe needs to be a binary (non-dense) track')
        else:
            return self._createRandomizedNumpyArraysFromBinaryUniverse(
                binLen, starts, ends, vals, strands, ids, edges, weights, extras, universeTV)
Пример #3
0
 def _constructBins(regSpec, binSpec, genome, trackNames):
     # Construct and check bins
     try:
         from quick.application.GalaxyInterface import GalaxyInterface
         userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome, trackNames)
         return [None, userBinSource]
     except Exception, e:
         results = Results([], [], '')
         results.addError(InvalidRunSpecException('Error in specification of analysis region or binsize: ' + str(e)))
         logMessage('Error in specification of analysis region (' + regSpec +') or binsize: (' + binSpec + ')')
         if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
             raise
         return [results, None]
Пример #4
0
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands,
                                     ids, edges, weights, extras, region):
        referenceTV = PlainTrack(self._trackNameIntensity).getTrackView(
            region
        )  #self._trackNameIntensity based on naming convenience wrt. inheritance

        if referenceTV.trackFormat.isDense():
            raise InvalidRunSpecException(
                'Error: Intensity needs to be a binary (non-dense) track')
        else:
            return self._createRandomizedNumpyArraysFromBinaryIntensity(
                binLen, starts, ends, vals, strands, ids, edges, weights,
                extras, referenceTV)
Пример #5
0
    def _inferTrackName(trackName, genome, fullAccess):
        if len(trackName) == 0 or \
                len(trackName) == 1 and trackName[0].lower() in ['blank', 'none', 'dummy', '_', ' ', '']:
            return None

        # trackName = rawTN.replace('_',' ').split(':')
        # trackName = rawTN.split(':')
        #
        # trackName = convertTNstrToTNListFormat(rawTN)

        if ProcTrackOptions.isValidTrack(genome, trackName, fullAccess):
            return trackName
        else:
            raise InvalidRunSpecException('Error in trackname specification. \'' +\
                                          ':'.join(trackName) + '\' does not match any tracknames. ' +\
                                          'This may be because of limited user permissions.')
Пример #6
0
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands,
                                     ids, edges, weights, extras, region):
        intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(region)
        if len(intensityTV.valsAsNumpyArray()) == 0:
            raise InvalidRunSpecException('Error: No intensity data available for sampling randomized locations in region' + \
                                          str(region) + \
                                          '. Please check that the intensity track was created with the same main track that is being randomized in this analysis.')

        #intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(self._origRegion) #Dependence on origRegion is not nice, but not a big problem..

        if intensityTV.trackFormat.isDense():
            assert intensityTV.trackFormat.isValued('number')
            return self._createRandomizedNumpyArraysFromIntensityFunction(binLen, starts, ends, vals, strands, ids, edges, \
                                                                          weights, extras, intensityTV)
        else:
            raise NotImplementedError
Пример #7
0
    def parseBatchLine(batchLine, genome, fullAccess):
        if batchLine[0] == '#' or batchLine.strip() == '':
            return

        from urllib import unquote

        #Split and check number of columns
        cols = [x for x in batchLine.strip().split(BATCH_COL_SEPARATOR)]
        if len(cols) != 6:
            results = Results(['N/A'], ['N/A'], 'N/A')
            #results.addResultComponent( 'Invalid',InvalidRunResultComponent('Error in batch specification. 6 columns are required, while '\
            #                            + str(len(cols)) + ' are given.'))
            results.addError(InvalidRunSpecException('Error in batch specification. 6 columns are required, while '\
                                        + str(len(cols)) + ' are given: ' + batchLine))
            return results, None, None, None, None

        bc = BatchContents()

        bc.regSpec = cols[1]
        bc.binSpec = unquote(cols[2])
        from quick.application.ExternalTrackManager import ExternalTrackManager
        if ExternalTrackManager.isGalaxyTrack(bc.binSpec.split(':')):
            bc.binSpec = ExternalTrackManager.extractFnFromGalaxyTN(
                bc.binSpec.split(':'))

        try:
            from quick.application.GalaxyInterface import GalaxyInterface
            bc.trackName1 = [unquote(x) for x in cols[3].split(':')]
            bc.trackName2 = [unquote(x) for x in cols[4].split(':')]
            bc.cleanedTrackName1, bc.cleanedTrackName2 = GalaxyInterface._cleanUpTracks(
                [bc.trackName1, bc.trackName2], genome, realPreProc=True)

            bc.cleanedTrackName1 = BatchRunner._inferTrackName(
                bc.cleanedTrackName1, genome, fullAccess)
            bc.cleanedTrackName2 = BatchRunner._inferTrackName(
                bc.cleanedTrackName2, genome, fullAccess)

        except (InvalidRunSpecException, IdenticalTrackNamesError), e:
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            bc.errorResult = Results(['N/A'], ['N/A'], 'N/A')
            bc.errorResult.addError(e)
            return bc
Пример #8
0
    def parseBatchLine(batchLine, genome, fullAccess):
        if batchLine[0] == '#' or batchLine.strip() == '':
            return
            
        from urllib import unquote
        
        # Split and check number of columns
        cols = [x for x in batchLine.strip().split(BATCH_COL_SEPARATOR)]
        if len(cols) != 6:
            results = Results(['N/A'], ['N/A'], 'N/A')
            results.addError(InvalidRunSpecException('Error in batch specification. 6 columns are required, while '\
                                        + str(len(cols)) + ' are given: ' + batchLine))
            return results, None, None, None, None 

        bc = BatchContents()
        
        bc.regSpec = cols[1]
        bc.binSpec = unquote(cols[2])

        from quick.application.ExternalTrackManager import ExternalTrackManager
        if ExternalTrackManager.isGalaxyTrack(bc.binSpec.split(':')):
            bc.binSpec = ExternalTrackManager.extractFnFromGalaxyTN(bc.binSpec.split(':'))

        bc.statClassName, bc.paramDict = BatchRunner._parseClassAndParams(cols[5])

        bc.trackNames = [[unquote(x) for x in cols[i].split(':')] for i in [3, 4]]
        if 'trackNameIntensity' in bc.paramDict:
            bc.trackNames.append(convertTNstrToTNListFormat(bc.paramDict['trackNameIntensity'], doUnquoting=True))

        from quick.application.GalaxyInterface import GalaxyInterface

        partlyCleanedTrackNames = GalaxyInterface._cleanUpTracks(bc.trackNames, genome, realPreProc=True)

        try:
            bc.cleanedTrackNames = BatchRunner._inferTrackNames(partlyCleanedTrackNames, genome, fullAccess)

        except (InvalidRunSpecException,IdenticalTrackNamesError), e:
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            bc.errorResult = Results(['N/A'],['N/A'],'N/A')
            bc.errorResult.addError(e)
            return bc
Пример #9
0
    def execute(choices, galaxyFn=None, username=''):
        #setupDebugModeAndLogging()
        from time import time
        startTime = time()
        print HtmlCore().begin()
        print '<pre>'
        genome = choices[0]
        #assert genome=='hg19'
        flankSize = choices[3]

        if choices[1] == 'Prepared catalogues':
            if choices[2] == 'GiulioNewGwas':
                gwasTnBase = 'Private:GK:NewGwasBase'.split(':')
            elif choices[2] == 'GiulioAllGwas':
                gwasTnBase = 'Private:GK:AllGiulioGwasSnpsAsOf9feb13'.split(
                    ':')
            elif choices[2] == 'GiulioMay13Gwas':
                gwasTnBase = 'Private:GK:Gwas:GiulioMay13'.split(':')
            elif choices[2] == 'SmallTest':
                gwasTnBase = 'Private:GK:Gwas'.split(':')
            else:
                raise

            gwasTnBase += [flankSize]

        elif choices[1] == 'Custom track':
            gwasTnBase = choices[2].split(':')
            assert flankSize == 'SNPs'
        else:
            assert False, choices[1]
        referenceTrackSource = choices[4]
        normalization = choices[5]
        assert normalization == 'CoverageDepth'
        analysisType = choices[6]
        if analysisType == 'Enrichment':
            ResultClass = EnrichmentGwasResults
        elif analysisType == 'Testing':
            ResultClass = HypothesisTestingGwasResults
            nullmodelMapping = dict(
                zip([
                    'Sample disease regions uniformly',
                    'Sample disease regions with preserved inter-region spacings',
                    'Sample disease regions with preserved distance to nearest exon'
                ], [
                    'PermutedSegsAndSampledIntersegsTrack_',
                    'PermutedSegsAndIntersegsTrack_',
                    'SegsSampledByDistanceToReferenceTrack_,trackNameIntensity=Genes and gene subsets^Exons^Ensembl exons'
                ]))
            nullmodel = nullmodelMapping[choices[9]]
            assert nullmodel in [
                'PermutedSegsAndSampledIntersegsTrack_',
                'PermutedSegsAndIntersegsTrack_',
                'SegsSampledByDistanceToReferenceTrack_,trackNameIntensity=Genes and gene subsets^Exons^Ensembl exons'
            ]
        else:
            raise

        kernelType = choices[7]
        kernelParam = choices[8]

        if choices[10] == 'Include links to full underlying results':
            includeDetailedResults = True
        elif choices[10] == 'Only produce main result values':
            includeDetailedResults = False
        else:
            raise InvalidRunSpecException('Did not understand option: %s' %
                                          choices[12])

        mcDepth = choices[11]

        if choices[12] == 'yes':
            includeLocalResults = True
        elif choices[12] == 'no':
            includeLocalResults = False
        else:
            raise InvalidRunSpecException('Did not understand option: %s' %
                                          choices[12])

        if choices[15] == 'yes':
            useCache = True
        elif choices[15] == 'no':
            useCache = False
        else:
            raise InvalidRunSpecException('Did not understand option: %s' %
                                          choices[15])

        if choices[16] == 'yes':
            printProgress = True
        elif choices[16] == 'no':
            printProgress = False
        else:
            raise InvalidRunSpecException('Did not understand option: %s' %
                                          choices[16])

        from quick.application.GalaxyInterface import GalaxyInterface
        #print GalaxyInterface.getHtmlForToggles()
        #print GalaxyInterface.getHtmlBeginForRuns()
        #from quick.webtools.GwasAPI import getEnrichmentValues
        print 'Progress: '
        #print 'base: ',gwasTnBase
        #print 'leaves: ',GalaxyInterface.getSubTrackNames(genome, gwasTnBase,deep=False, username=username)
        disRes = MultiGwasResults()
        from gold.application.HyperBrowserCLI import getSubTrackLeafTerms
        from quick.application.ProcTrackOptions import ProcTrackOptions

        #for gwasTrackLeaf in GalaxyInterface.getSubTrackNames(genome, gwasTnBase,deep=False, username=username):
        allDiseases = getSubTrackLeafTerms(genome,
                                           gwasTnBase,
                                           username=username)
        if len(allDiseases) == 0:
            assert ProcTrackOptions.isValidTrack(
                genome, gwasTnBase, GalaxyInterface.userHasFullAccess(
                    username)), 'Genome: %s, TN: %s, Access: %s' % (
                        genome, gwasTnBase,
                        GalaxyInterface.userHasFullAccess(username))
            allDiseases = gwasTnBase[-1:]
            gwasTnBase = gwasTnBase[:-1]

        for disease in allDiseases:
            #print 'Leaf:',gwasTrackLeaf[0]
            #if not gwasTrackLeaf[0] in ['11 - Height.txt']:
            #if not disease in ['1 - Alzheimer.txt','10 - Graves.txt']:#['Malaria','UC']:
            #    print 'IGNORING: ', gwasTrackLeaf[0]
            #    continue

            #if gwasTrackLeaf in [[],None] or gwasTrackLeaf[0]=='-- All subtypes --':
            #continue

            #gwasTn = ':'.join(gwasTnBase + [gwasTrackLeaf[0]])
            gwasTn = ':'.join(gwasTnBase + [disease])
            #print 'Running API: ', "$getEnrichmentValues(%s, '%s', '%s')" % ([gwasTn], referenceTrackSource, normalization)
            #enrichmentsDict = getEnrichmentValues([gwasTn], referenceTrackSource, normalization)#, ['114 - Brain_Mid_Frontal_Lobe.txt','134 - Rectal_Smooth_Muscle.txt'])
            #assert len(enrichmentsDict.values())==1
            #enrichments = enrichmentsDict.values()[0]

            #if gwasTrackLeaf[0] in ['Malaria','UC']:
            #print 'HERE IS WHAT I GOT: ',enrichmentsDict
            #print 'ENR: ',enrichments
            #print 'One: ', (enrichments.values()[0])['enrichment']['13 - CD4'].getGlobalResult()
            #assert 'enrichment' in (enrichments.values()[0]), (enrichments.values()[0])
            #disRes[gwasTrackLeaf[0]] = (enrichments.values()[0])['enrichment']
            #disRes[gwasTrackLeaf[0]] = (enrichments.values()[0])
            #disease = gwasTrackLeaf[0]
            #disRes[disease] = [x.getGlobalResult() for x in enrichments]
            #print 'DISres: ', disRes[gwasTrackLeaf[0]]
            #from quick.util.CommonFunctions import extractIdFromGalaxyFn

            res = ResultClass(gwasId=disease, verbose=True, galaxyFn=galaxyFn)
            #referenceSubTypes = enrichments.keys()
            #referenceSubTypes = [x[0] for x in GalaxyInterface.getSubTrackNames(genome, 'Private:GK:Psych:DHSs'.split(':'), deep=False, username=username) if not x[0] == '-- All subtypes --']
            if referenceTrackSource == 'H3K4me3':
                refTrackBase = 'Private:GK:Psych:H3K4me3'
                refTrackCoverageFunction = 'Private^GK^Psych^H3K4me3CoverageTrack'
            elif referenceTrackSource == 'DHS':
                refTrackBase = 'Private:GK:Psych:DHSs'
                refTrackCoverageFunction = 'Private^GK^Psych^DHSCoverageTrack'
            elif referenceTrackSource == 'Chromatin state 1-AP':
                refTrackBase = 'Private:Anders:Chromatin State Segmentation:1_Active_Promoter'
                refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^1_Active_PromoterV2'
            elif referenceTrackSource == 'Chromatin state 4-SE':
                refTrackBase = 'Private:Anders:Chromatin State Segmentation:4_Strong_Enhancer'
                refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^4_Strong_Enhancer'
            elif referenceTrackSource == 'Chromatin state 5-SE':
                refTrackBase = 'Private:Anders:Chromatin State Segmentation:5_Strong_Enhancer'
                refTrackCoverageFunction = 'Private^GWAS^Chromatin^CoverageFunctionTracks^5_Strong_Enhancer'
            else:
                raise
            refTrackSelectType = choices[13]

            allReferenceTracks = [
                x[0] for x in GalaxyInterface.getSubTrackNames(
                    genome,
                    refTrackBase.split(':'),
                    deep=False,
                    username=username) if not x[0] == '-- All subtypes --'
            ]
            if refTrackSelectType == 'Use all reference tracks':
                referenceSubTypes = allReferenceTracks
            elif refTrackSelectType == 'Select single reference track':
                referenceSubTypes = [choices[14]]
                assert referenceSubTypes[0] in allReferenceTracks
            elif refTrackSelectType == 'Select a range among all reference tracks':
                try:
                    firstRefTrack, lastRefTrack = choices[14].split('-')
                    referenceSubTypes = allReferenceTracks[
                        int(firstRefTrack):int(lastRefTrack) + 1]
                    print 'Analyzing %s among a total of %s reference tracks' % (
                        choices[14], len(allReferenceTracks))
                except Exception:
                    print 'Range format should be e.g. "15-18".'
                    raise
            else:
                raise

            for referenceSubType in referenceSubTypes:
                #if not referenceSubType in ['107 - Adult_Kidney.txt','106 - Adipose_Nuclei.txt']:
                #    #print 'IGNORING: ',referenceSubType
                #    continue
                #
                if analysisType == 'Enrichment':
                    res[referenceSubType] = directGetEnrichment(
                        gwasTn, referenceSubType, refTrackBase, kernelType,
                        kernelParam, useCache, printProgress)
                elif analysisType == 'Testing':
                    res[referenceSubType] = directGetTestResults(
                        gwasTn, referenceSubType, refTrackBase, kernelType,
                        kernelParam, refTrackCoverageFunction, nullmodel,
                        mcDepth, useCache, printProgress)
                else:
                    raise

                #print disease, referenceSubType, res[referenceSubType]
                #print "ENR: ",enrichments
                #res[referenceSubType] = enrichments[referenceSubType]
            disRes[disease] = res

        #for disease in disRes:
        #    print 'D FULL %s:' %disease, disRes[disease]
        #    print 'D DICTS %s:'%disease, disRes[disease].getAllGlobalResultDicts()
        #    print 'DISEASE %s:'%disease, disRes[disease].getAllGlobalResults()
        print 'Total run time (excluding figure generation): %i seconds.' % (
            time() - startTime)
        print '</pre>'
        #print GalaxyInterface.getHtmlBeginForRuns()

        print '<h1>Results</h1>'
        if len(allDiseases) > 1:
            try:
                heatMapLink = disRes.getLinkToClusteredHeatmap(
                    'Heatmap', galaxyFn)
                print '<h3>Heatmap</h3>', heatMapLink  #, '<br>'
            except:
                print '<p>Creation of heatmap failed</p>'
        tableOutput = disRes.getHtmlResultsTable(includeDetailedResults)
        print '<h3>Results table</h3>', tableOutput

        if choices[-1]:
            print '<h3>Prior coloring table</h3>'
            colorFn = ExternalTrackManager.extractFnFromGalaxyTN(
                choices[-1].split(':'))
            print disRes.getColoredSortedReferencesTable(colorFn)

        if includeLocalResults:
            print '<h3>Local results</h3>'
            print disRes.getLinksToAllLocalHtmlResultsTables(galaxyFn)