Exemplo n.º 1
0
    def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName):
        oldTrackName = None
        for allowOverlaps in [False, True]:
            parentDir = createDirPath(stdTrackName[:-1],
                                      genome,
                                      allowOverlaps=allowOverlaps)
            if os.path.exists(parentDir):
                dirContents = os.listdir(parentDir)
                realDirs = [
                    x for x in dirContents
                    if os.path.isdir(os.path.join(parentDir, x))
                    and not os.path.islink(os.path.join(parentDir, x))
                ]

                reqDirName = stdTrackName[-1]
                reqDirPath = os.path.join(parentDir, reqDirName)

                from gold.application.LogSetup import logMessage
                logMessage('Checking ' + reqDirPath)

                if os.path.islink(reqDirPath) and not os.path.isdir(
                        os.readlink(reqDirPath)):
                    # This is to fix a bug that ended in the symlink pointing to a file
                    os.remove(reqDirPath)
                    logMessage('Removed ' + reqDirPath)

                if realDirs and reqDirName not in dirContents:
                    oldTrackName = stdTrackName[:-1] + [realDirs[0]]
                    os.symlink(realDirs[0], reqDirPath)

        if oldTrackName is not None:
            ti = TrackInfo(genome, oldTrackName)
            ti.trackName = stdTrackName
            ti.store()
 def _computeBinomialTail(cls, x, size, prob, tail):
     from gold.application.RSetup import r
     x, size, prob = int(x), int(size), float(prob)
     if prob*size >= cls.MIN_SUCCESSES_FOR_NORM_APPROXIMATION <= (1-prob)*size:
         mean = size * prob
         sd = (size*prob*(1-prob))**0.5
         lessPval = r.pnorm(x,mean,sd)
         if tail=='less':
             pval = lessPval                
         elif tail=='more':
             pval = 1 - lessPval
         elif tail=='different':
             pval = min(1, 2*min( lessPval, 1-lessPval))
         else:
             from gold.application.LogSetup import logMessage, logging
             logMessage('Unsupported tail (%s) encountered in _computeBinomialTail.'%tail, level=logging.WARN)
     elif x > cls.MAX_SUCCESSES_FOR_BINOMIAL_RUNTIME:
         return None
         #raise NotImplementedError()
     else:
         if tail=='less':
             pval = r.pbinom(x,size,prob)
         elif tail=='more':
             pval = 1 - r.pbinom(x-1,size,prob)
         elif tail=='different':
             pval = min(1,2*min( r.pbinom(x,size,prob), 1 - r.pbinom(x-1,size,prob)))
     return pval
Exemplo n.º 3
0
    def __init__(self, region, track, track2=None, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if not x in ['false', 'true']:
                logMessage('isExperimental has value other than false/true',
                           level=logging.WARN)
                raise ShouldNotOccurError(
                    'isExperimental has value other than false/true.')
            if x == 'true':
                assert IS_EXPERIMENTAL_INSTALLATION, IS_EXPERIMENTAL_INSTALLATION
        #else:
        #    assert IS_EXPERIMENTAL_INSTALLATION

        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])

        self._region = region
        self._track = track
        if track2 not in [None, []]:
            self._track2 = track2
        self._kwArgs = kwArgs
        self._init(**kwArgs)

        self._trace('__init__')
Exemplo n.º 4
0
    def execute(cls, choices, galaxyFn=None, username=''):
        print choices
        logMessage('CreateBpsVennDIagram choices input: ' + repr(choices))
        debugstring = 'i execute\n'

        genome, trackNames = CreateBpsVennDIagram.getTrackNamesFromFormParameters(
            choices)
        trackNameStrings = [':'.join(tn) for tn in trackNames]
        print trackNameStrings

        geSourceList, trackNamesWithoutPath = CreateBpsVennDIagram.getGeSourceList(
            genome, trackNames)

        # Make input similar, if it is many files or one category.bed file.
        # turn into a categoryBedLIst
        if len(trackNames) == 1:  # assume input is one category.bed file
            categoryBedList, categoryNames = CreateBpsVennDIagram.getCategoryBedList(
                geSourceList[0])
        else:
            categoryBedList = CreateBpsVennDIagram.collapseToCategoryBedList(
                geSourceList, trackNamesWithoutPath)
            categoryNames = trackNamesWithoutPath

        # print categoryBedList
        # return
        # make cat selection list, all are considerd in the from this tool. To be used in subsequent methoods that also can be called from other tools where this come into play.
        labelToPrime = CreateBpsVennDIagram.getPrimeList()
        counter = 0
        catInfo = OrderedDict()
        for c in categoryNames:
            thisTrackName = trackNameStrings[0]
            if len(trackNames) > 1:
                thisTrackName = trackNameStrings[counter]
            debugstring += str(thisTrackName) + '\n'
            catInfo[c] = {
                'label': labelToPrime.keys()[counter],
                'prime': labelToPrime.values()[counter],
                'selection': 'in',
                'fullTrackName': thisTrackName
            }
            # catInfo[c] = {'label':labelToPrime.keys()[counter], 'prime':labelToPrime.values()[counter], 'selection':'in', 'fullTrackName':thisTrackName}
            counter = counter + 1

        # collapse to startorstop and state lists
        posDict, catDict = CreateBpsVennDIagram.getPosCatDictsFromCategoryBedList(
            categoryBedList, catInfo)

        # iterate list and get stateBPCounter and stateRegions
        stateBPCounter, stateRegions, thisdebugstring = CreateBpsVennDIagram.getStateCount(
            posDict, catDict)

        debugstring += 'stateBPCounter: ' + str(stateBPCounter) + '\n'

        utfil = open(galaxyFn, 'w')
        utfil.write(
            CreateBpsVennDIagram.getHtmlString(catInfo, stateBPCounter,
                                               genome))
        utfil.close()
        # Turn the stateBPCounter into the object used by javascript
        '''
 def _combineResults(self):
     if len(self._childResults)>0:
         logMessage(repr(self._childResults))
         x,y = zip(*self._childResults)
         return self._pearsonr(x,y)
     else:
         return 2.5
 def _computeBinomialPval(cls, x, size, prob, tail):
     from proto.RSetup import r
     x, size, prob = int(x), int(size), float(prob)
     if prob * size >= cls.MIN_SUCCESSES_FOR_NORM_APPROXIMATION <= (
             1 - prob) * size:
         mean = size * prob
         sd = (size * prob * (1 - prob))**0.5
         lessPval = r.pnorm(x, mean, sd)
         if tail == 'less':
             pval = lessPval
         elif tail == 'more':
             pval = 1 - lessPval
         elif tail == 'different':
             pval = min(1, 2 * min(lessPval, 1 - lessPval))
         else:
             from gold.application.LogSetup import logMessage, logging
             logMessage(
                 'Unsupported tail (%s) encountered in _computeBinomialTail.'
                 % tail,
                 level=logging.WARN)
     elif x > cls.MAX_SUCCESSES_FOR_BINOMIAL_RUNTIME:
         return None
         #raise NotImplementedError()
     else:
         if tail == 'less':
             pval = r.pbinom(x, size, prob)
         elif tail == 'more':
             pval = 1 - r.pbinom(x - 1, size, prob)
         elif tail == 'different':
             pval = min(
                 1, 2 * min(r.pbinom(x, size, prob),
                            1 - r.pbinom(x - 1, size, prob)))
     return pval
    def __init__(self, region, trackStructure, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION  # @UnresolvedImport
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if x not in ['false', 'true']:
                logMessage('isExperimental has value other than false/true',
                           level=logging.WARN)
                raise ShouldNotOccurError(
                    'isExperimental has value other than false/true.')
            if x == 'true':
                assert IS_EXPERIMENTAL_INSTALLATION, IS_EXPERIMENTAL_INSTALLATION

        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])

        self._region = region
        self._trackStructure = trackStructure

        #TODO:boris 20150924, Code for checking if query and reference (track and track2) are the same track.
        #We should decide if we will allow this in the future.

        self._kwArgs = kwArgs
        self._init(**kwArgs)

        self._trace('__init__')
Exemplo n.º 8
0
 def getValuesFromBedFile(cls, genome, fn, colorPattern=(1,0,0)):
     resDict = defaultdict(list)
     valDict = defaultdict(list)
     lineTab = []
     if type(fn) == type(None):
         return resDict
     elif isinstance(fn, basestring):
         lineTab = open(fn,'r').read().split('\n')
     else:
         lineTab = fn.returnComposed().split('\n')
     
     valueList = []
     for line in lineTab:
         lineTab = line.split('\t')
         try:
             chrom = lineTab[0]
             valDict[chrom]+=[float(lineTab[3])]
         except:
             logMessage(line)
     
     maxVal = max(max(valDict.values()))
     for chrom in GenomeInfo.getChrList(genome):
         if valDict.has_key(chrom):
             try:
                 resDict[chrom]+= [tuple([255 - (int(val*255/maxVal)*v) for v in colorPattern]) for val in valDict[chrom]]
             except:
                 logMessage ('Ny rundeeee:  '+ str([v for v in valDict[chrom][:10]])+ ':   '+str(maxVal))
                            
     print 'count', len(valDict.values())
     return resDict, maxVal
Exemplo n.º 9
0
    def _determineStatClass(self, flushMemoized=True):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList) == 0:
            # if self._reversed:
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: ' + self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage('Checking validity of stat class "{}" for analysisDef "{}".'.format(statClass.__name__, self.getDefAfterChoices()))

            trackA, trackB = self._track, self._track2
            if trackA is None:
                continue

            try:
                StatJob(dummyGESource, trackA, trackB, statClass, minimal=True,
                        **self.getAllChoices(filterByActivation=True)).run(False, flushMemoized=flushMemoized)

            except IncompatibleTracksError, e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
            except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG,
                                 messagePrefix='Warning: error in _determineStatClass for stat: %s' % statClass.__name__)
                if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                    raise
Exemplo n.º 10
0
    def __init__(self, region, trackStructure, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION  # @UnresolvedImport
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if x not in ['false', 'true']:
                logMessage('isExperimental has value other than false/true',
                           level=logging.WARN)
                raise ShouldNotOccurError(
                    'isExperimental has value other than false/true.')
            if x == 'true':
                assert IS_EXPERIMENTAL_INSTALLATION, IS_EXPERIMENTAL_INSTALLATION

        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])

        self._region = region
        self._trackStructure = trackStructure

        #TODO:boris 20150924, Code for checking if query and reference (track and track2) are the same track.
        #We should decide if we will allow this in the future.

        #TODO: This should probably instead happen in the default _init method, so that when this is
        # overridden, one needs to explicitly store kwArgs if desired.
        #As it is now, parameters will be handled explicitly in _init while still becoming part of self_kwArgs
        self._kwArgs = kwArgs

        self._init(**kwArgs)

        self._trace('__init__')
Exemplo n.º 11
0
 def __init__(self, region, track, track2, rawStatistic, randTrackClass=None, assumptions=None, tails=None, numResamplings=2000, randomSeed=None, **kwArgs):
     if tails==None:
         if 'tail' in kwArgs:
             tailTranslator = {'more':'right-tail', 'less':'left-tail', 'different':'two-tail'}
             tails = tailTranslator[kwArgs['tail']]
             if DebugConfig.VERBOSE:
                 logMessage('Argument tail provided instead of tails to RandomizationManagerStatUnsplittable', level=logging.DEBUG)
         else:
             tails = 'right-tail' # or 'two-tail'?
             logMessage('No tails argument provided to RandomizationManagerStatUnsplittable', level=logging.DEBUG)
     
     if track2 is None:
         self._track2 = None #to allow track2 to be passed on as None to rawStatistics without error. For use by single-track MC-tests..
         
     from gold.util.RandomUtil import getManualSeed, setManualSeed
     if randomSeed is not None and randomSeed != 'Random' and getManualSeed() is None:
         setManualSeed(int(randomSeed))
     
     if 'mcSetupScheme' in kwArgs:
         kwArgs = copy(kwArgs) #to not edit original dict..
         if kwArgs['mcSetupScheme'] != 'custom':
             assert not 'maxSamples' in kwArgs #check that specific values are not redundantly set
         #
     Statistic.__init__(self, region, track, track2, rawStatistic=rawStatistic, randTrackClass=randTrackClass, assumptions=assumptions, tails=tails, numResamplings=numResamplings, randomSeed=randomSeed, **kwArgs)
     #if type(rawStatistic) is str:
     #    from gold.statistic.AllStatistics import STAT_CLASS_DICT
     #    rawStatistic = STAT_CLASS_DICT[rawStatistic]
     
     assert (randTrackClass is None) ^ (assumptions is None) # xor
     if assumptions is not None:
         assert assumptions.count('_') == 1, assumptions
         randTrackClass1, randTrackClass2 = assumptions.split('_')
     else:
         randTrackClass1 = None
         randTrackClass2 = randTrackClass
     
     self._randTrackClass1, self._randTrackClass2 = \
         [ ( globals()[clsDef] if clsDef not in ['None',''] else None ) \
             if isinstance(clsDef, basestring) else clsDef for clsDef in [randTrackClass1, randTrackClass2]]
     
     assert not (randTrackClass1 is None and randTrackClass2 is None)
     for cls in [self._randTrackClass1, self._randTrackClass2]:
         assert cls in [None, PermutedSegsAndSampledIntersegsTrack, \
                        PermutedSegsAndIntersegsTrack, RandomGenomeLocationTrack, SegsSampledByIntensityTrack, ShuffledMarksTrack, SegsSampledByDistanceToReferenceTrack, PointsSampledFromBinaryIntensityTrack]
         
     #print self._randTrackClass1, self._randTrackClass2
     self._rawStatistic = self.getRawStatisticClass(rawStatistic)
     
     #self._randTrackList = []
     self._tails = tails
     if kwArgs.get('minimal') == True:
         self._numResamplings = 1
         self._kwArgs['maxSamples'] = 1
     else:
         self._numResamplings = int(numResamplings)
     CompBinManager.ALLOW_COMP_BIN_SPLITTING = False
     self._randResults = []
     self._observation = None
     #to load r libraries for McFdr:
     McFdr._initMcFdr()
Exemplo n.º 12
0
    def computeStep(self):
        self._trace('computeStep')

        if not self.hasResult():
            self._loadMemoizedResult()

        if self.hasResult():
            return
        for child in self._children:
            if not child.hasResult():
                child.computeStep()

        if not all([child.hasResult() for child in self._children]):
            return

        self._trace('_compute')
        #The method _compute may either return the result, or set the result variable directly:
        res = None
        with StatisticExceptionHandling(**self._kwArgs):
            res = self._compute()
            if DebugConfig.VERBOSE:
                logMessage('Result of statistic %s in region %s: %s' % (getClassName(self), self._region, res))

        if not self.hasResult():
            #Only set _result if this was not set directly by the previous call to _compute
            self._result = res

        self._storeMemoizedResult()
Exemplo n.º 13
0
    def __init__(self, region, track, track2=None, *args, **kwArgs):
        from config.Config import IS_EXPERIMENTAL_INSTALLATION
        if 'isExperimental' in kwArgs:
            x = kwArgs['isExperimental'].lower()
            if not x in ['false','true']:
                logMessage('isExperimental has value other than false/true', level=logging.WARN)
                raise ShouldNotOccurError('isExperimental has value other than false/true.')
            if x=='true':
                assert IS_EXPERIMENTAL_INSTALLATION
        #else:
        #    assert IS_EXPERIMENTAL_INSTALLATION
                
        if 'assumptions' in kwArgs:
            self._checkAssumptions(kwArgs['assumptions'])
            
        self._region = region
        self._track = track
        if track2 not in [None, []]:
            if track.trackName == track2.trackName:
                #if not kwArgs.get('allowIdenticalTracks') in [True,'True']: #Does not work, as all kwArgs are not sent further down in createChildren, meaning that a base statistic like RawDataStat would not find allowIdenticalTracks and throw exception..
                #if not IS_EXPERIMENTAL_INSTALLATION: #does not work either, as results in: gold.util.CustomExceptions.IncompatibleTracksError: Track 'Unmarked segments (Sample tracks)'was created, but not touched by statistic
                from gold.util.CustomExceptions import IdenticalTrackNamesError
                raise IdenticalTrackNamesError("Track names are identical. Track name = " + ':'.join(track.trackName))
            self._track2 = track2
        self._kwArgs = kwArgs

        self._init(**kwArgs)

        self._trace('__init__')
Exemplo n.º 14
0
    def computeStep(self):
#        if self.hasResult():
#            return

        self._trace('computeStep')
        try:
            try:
                if not self._curChild.hasResult():
                    self._curChild.computeStep()
                if not self._curChild.hasResult():
                    return
                nextRes = self._curChild.getResult()

                if DebugConfig.VERBOSE:
                    logMessage('Result of statistic %s in region %s: %s' % (getClassName(self._curChild), self._curChild._region, nextRes))

            # except NoneResultError, e:
            except (CentromerError, NoneResultError), e:
                nextRes = None
                if DebugConfig.VERBOSE:
                    logException(e, level=logging.DEBUG)
                if DebugConfig.PASS_ON_NONERESULT_EXCEPTIONS:
                    raise

            self._childResults.append(nextRes)
            tempRefHolderChild = self._curChild # To avoid children of this _curChild to be collected in the next line.
                                                # It will live long enough for createChildren to be called on new _curChild
            self._curChild.afterComputeCleanup() # In case a global analysis was run without prior local analyses
            self._curChild = None #first sets curchild to None to free memory even when self._bins.next() raises StopIteration..
            self._curChild = self._getChildObject(self._bins.next())

            self._curChild.createChildren()
Exemplo n.º 15
0
 def isValidForListing(self):
     anyTextParts = len(self._analysisParts) > 0
     if not anyTextParts:
         if DebugConfig.VERBOSE:
             logMessage('Analysisdef "{}" does not have any text available for listing. '.format(self.getDef()) +
                        'Skipping...')
     else:
         return self.getStat(flushMemoized=False) is not None
Exemplo n.º 16
0
 def _logAssumptionReduction(self, removedAssumptions):
     #global VERBOSE
     #prev = VERBOSE
     #VERBOSE = True
 
     for assumption in removedAssumptions:
         logMessage('Assumption "' + str(assumption) + '" was removed from analysisDef: ' + self.getDef())
         self.setChoice(self.ASSUMP_LABEL_KEY, assumption)
         self._determineStatClass()    
Exemplo n.º 17
0
def getOrigFn(genome, trackName, suffix, fileTree='standardized'):
    fns = getOrigFns(genome, trackName, suffix, fileTree=fileTree)
    if len(fns) != 1:
        if IS_EXPERIMENTAL_INSTALLATION:
            from gold.application.LogSetup import logMessage, logging
            logMessage('getOrigFn - Cannot decide among zero or several filenames: %s' % fns, logging.WARNING)
        return None

    return fns[0]
Exemplo n.º 18
0
 def globalAnalysisEnded(self):
     if not self._printProgress:
         return
     if self._startGlobalTime is None:
         logMessage(
             'Called globalAnalysisEnded without globalAnalysisStarted being called before.',
             level=logging.WARN)
     print "\nglobal analysis took %f seconds" % (time.time() -
                                                  self._startGlobalTime)
Exemplo n.º 19
0
 def _logAssumptionReduction(self, removedAssumptions):
     #global VERBOSE
     #prev = VERBOSE
     #VERBOSE = True
 
     for assumption in removedAssumptions:
         logMessage('Assumption "' + str(assumption) + '" was removed from analysisDef: ' + self.getDef())
         self.setChoice(self.ASSUMP_LABEL_KEY, assumption)
         self._determineStatClass()    
 def __init__(self, region, track, track2, rawStatistic, randTrackClass=None, assumptions=None, tails=None, numResamplings=2000, randomSeed=None, **kwArgs):
     #print 'TEMP RM:',kwArgs
     if tails==None:
         if 'tail' in kwArgs:
             tailTranslator = {'more':'right-tail', 'less':'left-tail', 'different':'two-tail'}
             tails = tailTranslator[kwArgs['tail']]
             if DebugConfig.VERBOSE:
                 logMessage('Argument tail provided instead of tails to RandomizationManagerStatUnsplittable', level=logging.DEBUG)
         else:
             tails = 'right-tail' # or 'two-tail'?
             logMessage('No tails argument provided to RandomizationManagerStatUnsplittable', level=logging.DEBUG)
     
     if track2 is None:
         self._track2 = None #to allow track2 to be passed on as None to rawStatistics without error. For use by single-track MC-tests..
         
     from gold.util.RandomUtil import getManualSeed, setManualSeed
     if randomSeed is not None and randomSeed != 'Random' and getManualSeed() is None:
         setManualSeed(int(randomSeed))
         
     Statistic.__init__(self, region, track, track2, rawStatistic=rawStatistic, randTrackClass=randTrackClass, assumptions=assumptions, tails=tails, numResamplings=numResamplings, randomSeed=randomSeed, **kwArgs)
     #if type(rawStatistic) is str:
     #    from gold.statistic.AllStatistics import STAT_CLASS_DICT
     #    rawStatistic = STAT_CLASS_DICT[rawStatistic]
     
     assert (randTrackClass is None) ^ (assumptions is None) # xor
     if assumptions is not None:
         assert assumptions.count('_') == 1, assumptions
         randTrackClass1, randTrackClass2 = assumptions.split('_')
     else:
         randTrackClass1 = None
         randTrackClass2 = randTrackClass
     
     self._randTrackClass1, self._randTrackClass2 = \
         [ ( globals()[clsDef] if clsDef not in ['None',''] else None ) \
             if type(clsDef) is str else clsDef for clsDef in [randTrackClass1, randTrackClass2] ]
     
     assert not (randTrackClass1 is None and randTrackClass2 is None)
     for cls in [self._randTrackClass1, self._randTrackClass2]:
         assert cls in [None, PermutedSegsAndSampledIntersegsTrack, \
                        PermutedSegsAndIntersegsTrack, RandomGenomeLocationTrack, SegsSampledByIntensityTrack, ShuffledMarksTrack]
         
     #print self._randTrackClass1, self._randTrackClass2
     self._rawStatistic = self.getRawStatisticClass(rawStatistic)
     
     #self._randTrackList = []
     self._tails = tails
     if kwArgs.get('minimal') == True:
         self._numResamplings = 1
         self._kwArgs['maxSamples'] = 1
     else:
         self._numResamplings = int(numResamplings)
     CompBinManager.ALLOW_COMP_BIN_SPLITTING = False
     self._randResults = []
     self._observation = None
     #to load r libraries for McFdr:
     McFdr._initMcFdr()
Exemplo n.º 21
0
 def _handleMissingStat(self):
     from gold.application.LogSetup import logMessage, logging
     from gold.description.RunDescription import RunDescription
     import gold.description.Analysis as AnalysisModule
     #AnalysisModule.VERBOSE = True
     msg = 'Started run with invalid statistic... Def: ' + self._analysisDef
                 #+ ', Run description: ' + \
                 #RunDescription.getRevEngBatchLine( self._trackName1, self._trackName2, self._analysisDef, \
                                                   #'Not Available', 'Not Available', self._userBinSource.genome)
     logMessage(msg, level=logging.ERROR)
     raise Exception(msg)
Exemplo n.º 22
0
 def _handleMissingStat(self):
     from gold.application.LogSetup import logMessage, logging
     from gold.description.RunDescription import RunDescription
     import gold.description.Analysis as AnalysisModule
     #AnalysisModule.VERBOSE = True
     msg = 'Started run with invalid statistic... Def: ' + self._analysisDef
                 #+ ', Run description: ' + \
                 #RunDescription.getRevEngBatchLine( self._trackName1, self._trackName2, self._analysisDef, \
                                                   #'Not Available', 'Not Available', self._userBinSource.genome)
     logMessage(msg, level=logging.ERROR)
     raise Exception(msg)
Exemplo n.º 23
0
    def _determineStatClass(self):
        assert( hasattr(self, '_track') )
        assert( hasattr(self, '_track2') )
        dummyGESource = MinimalBinSource(self._genome)

        if len(self._statClassList)==0:
            #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine)
            logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING)
            if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine)
        
        for statClass in self._statClassList:
            if DebugConfig.VERBOSE:
                logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')')
#                print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')'

            #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]:
            for trackA, trackB in [[self._track, self._track2]]:
                if trackA == None:
                    continue

                try:
                    StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices()).run(False)
                    #In order not to mess up integration tests
                    initSeed()
                    for track in [trackA, trackB]:
                        if track is not None and track.formatConverters is None:
                            raise IncompatibleTracksError('Track ' + prettyPrintTrackName(track.trackName) +\
                                                          'was created, but not touched by statistic')
                    
                except IncompatibleTracksError, e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    if DebugConfig.VERBOSE:
                        logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                    #if VERBOSE:
                    #    print 'Incompatible tracks: ', \
                    #          statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                    #    print 'Incompatible: ', e
                except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    if DebugConfig.VERBOSE:
                        logException(e, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__)
                    #if VERBOSE:
                    #    print 'Warning: exception in getStat: ', \
                    #        statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                    #    traceback.print_exc(file=sys.stdout)
                except OSError, e:
                    if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS:
                        raise
                    elif not 'withOverlaps' in str(e):
                        raise
Exemplo n.º 24
0
    def storeResult(cls, stat):
        if stat.resultLoadedFromDisk():
            return

        if cls._shouldUseDiskMemoization(stat, store=True):
            memoPath = cls._createMemoPath(stat)
            key = cls._createMemoKey(stat)

            if DebugConfig.VERBOSE:
                logMessage('Storing result "{}" in "{}"'.format(
                    stat._result, memoPath))
            cls.memoDataCollection[memoPath][key] = stat._result
Exemplo n.º 25
0
 def getNullModel(self):
     nullModel = self.getChoiceText(self.ASSUMP_LABEL_KEY)
     if self._reversed:
         if re.search('[^ ,.]T[12][^ ,.]',nullModel):
             logMessage('found instance of T1/T2 in null-model that may not refer to tracks as assumed in getNullModel')
         assert not 'tempT2' in nullModel
         
         nullModel = nullModel.replace('T1','tempT2')
         nullModel = nullModel.replace('T2','T1')
         nullModel = nullModel.replace('tempT2','T2')
     
     return nullModel
Exemplo n.º 26
0
 def _getH0andH1Text(self, coreCls):
     h0 = self._results._analysis.getH0()
     h1 = self._results._analysis.getH1()
     if not None in (h0,h1):
         core = coreCls()
         core.descriptionLine('H0', h0, indent=True)
         core.line('vs')
         core.descriptionLine('H1', h1, indent=True)
         return str(core)
     else:
         logMessage('Did not find H0 or H1. Their values: ' + str(h0) +' and ' + str(h1))
         return None
Exemplo n.º 27
0
 def _constructBins(regSpec, binSpec, genome, trackNames):
     # Construct and check bins
     try:
         from quick.application.GalaxyInterface import GalaxyInterface
         userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome, trackNames)
         return [None, userBinSource]
     except Exception, e:
         results = Results([], [], '')
         results.addError(InvalidRunSpecException('Error in specification of analysis region or binsize: ' + str(e)))
         logMessage('Error in specification of analysis region (' + regSpec +') or binsize: (' + binSpec + ')')
         if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
             raise
         return [results, None]
 def getNullModel(self):
     nullModel = self.getChoiceText(self.ASSUMP_LABEL_KEY)
     if self.reversed:
         if re.search('[^ ,.]T[12][^ ,.]',nullModel):
             logMessage('Found instance of T1/T2 in null-model that may not refer to '
                        'tracks as assumed in getNullModel')
         assert not 'tempT2' in nullModel
         
         nullModel = nullModel.replace('T1','tempT2')
         nullModel = nullModel.replace('T2','T1')
         nullModel = nullModel.replace('tempT2','T2')
     
     return nullModel
Exemplo n.º 29
0
 def _getH0andH1Text(self, coreCls):
     h0 = self._results._h0
     h1 = self._results._h1
     if not None in (h0, h1):
         core = coreCls()
         core.descriptionLine('H0', h0, indent=True)
         core.line('vs')
         core.descriptionLine('H1', h1, indent=True)
         return str(core)
     else:
         logMessage('Did not find H0 or H1. Their values: ' + str(h0) +
                    ' and ' + str(h1))
         return None
Exemplo n.º 30
0
 def getH1(self):
     #H1 = self.getChoiceText(self.H1_KEY)
     #return H1
     tailChoice = self.getChoice(self.TAIL_KEY)
     if tailChoice is None:
         return None
     
     selectedH1Key = self.H1_KEY + '_' + tailChoice
     H1 = self.getChoice(selectedH1Key)
     if H1 is None:
         logMessage('Could not find H1, probably mismatch between tail-choice and corresponding H1-option in analysisDef '+\
                    '(tail choice: %s, options: %s)' % (self.getChoice('tail'), self.getAllOptionsAsKeys() ) )            
 
     return H1
def monitor_load():
    taskQueueManager = TaskQueueManagerFactory.getTaskQueueManager()
    nextPossibleJobRequestTime = time.time()
    while True:        
        loadAverage = taskQueueManager.getLoadAverage()
        #logging.getLogger(PARALLEL_LOGGER).debug("load average is %f", loadAverage)
        #logMessage("load average is %f" % loadAverage, level=5, logger=PARALLEL_LOGGER)
        if loadAverage > LOAD_THRESHOLD:
            if time.time() > nextPossibleJobRequestTime:
                #logging.getLogger(PARALLEL_LOGGER).debug("load over threshold, submitting titan job")
                logMessage("load over threshold, submitting titan job", level=5, logger=PARALLEL_LOGGER)
                nextPossibleJobRequestTime = time.time() + JOB_SUBMISSION_WAIT_PERIOD
                TitanJobScript.submitJob()
        time.sleep(5)
def getOrigFns(genome, trackName, suffix, fileTree='standardized'):
    assert fileTree in ['standardized', 'collected', 'parsing error']
    from gold.application.LogSetup import logMessage, logging

    path = getOrigPathForFileTree(genome, trackName, fileTree)

    if not os.path.exists(path):
        if IS_EXPERIMENTAL_INSTALLATION:
            logMessage('getOrigFn - Path does not exist: ' + path,
                       logging.WARNING)
        return []

    return [path + os.sep + x for x in os.listdir(path) if os.path.isfile(path + os.sep + x) \
            and x.endswith(suffix) and not x[0] in ['.','_','#'] and not x[-1] in ['~','#']]
def getPreviewFile(trackNameTuple):
    logMessage('trackNameTuple :=  ' + repr(trackNameTuple))
    if trackNameTuple[-1].find(',FOLDER') > 0:
        return None
    context = zmq.Context()
    socket = context.socket(zmq.REQ)
    socket.connect("tcp://localhost:5559")

    trackNameTuple[-1] = trackNameTuple[-1].split(',')[0]
    datasetId = getDatasetId(trackNameTuple[1])
    subtype = trackNameTuple[2]
    fileName = '/'.join(trackNameTuple[3:]).replace(',FOLDER',
                                                    '').split(',')[0]
    if fileName == '':
        return None
    subList = [subtype, fileName]
    paramlist = [
        'params:=' + '<#>'.join([datasetId, repr(subList)]),
        'operation:=GetFilePreviewFromPublicDataset',
        'class:=dataStorageServicePub'
    ]
    socket.send(messageSep.join(paramlist))
    filePreview = socket.recv_unicode().encode('ascii', 'ignore')
    #startIndex, endIndex = filePreview.find('<Preview>')+9,  filePreview.rfind('</Preview>')
    #filePreview = filePreview[startIndex:endIndex]
    from tempfile import NamedTemporaryFile
    tempfile = NamedTemporaryFile()
    tempfile.write(filePreview)
    logMessage('fileName :=  ' + fileName)
    logMessage('NamedTemporaryFile :=  ' + tempfile.name)
    logMessage('FilePreview :=  ' + filePreview)
    return tempfile
Exemplo n.º 34
0
    def getH1(self):
        #H1 = self.getChoiceText(self.H1_KEY)
        #return H1
        tailChoice = self.getChoice(self.TAIL_KEY)
        if tailChoice is None:
            return None

        selectedH1Key = self.H1_KEY + '_' + tailChoice
        H1 = self.getChoice(selectedH1Key)
        if H1 is None:
            logMessage('Could not find H1, probably mismatch between tail-choice and corresponding H1-option in analysisDef '+\
                       '(tail choice: %s, options: %s)' % (self.getChoice('tail'), self.getAllOptionsAsKeys() ) )

        return H1
Exemplo n.º 35
0
def getPreviewFile(trackNameTuple, userName, pwd):
    logMessage('trackNameTuple :=  '+ repr(trackNameTuple))
    if trackNameTuple[-1].find(',FOLDER')>0:
        return None
    context = zmq.Context()
    socket = context.socket(zmq.REQ)
    socket.connect("tcp://localhost:5559")
    
    trackNameTuple[-1] = trackNameTuple[-1].split(',')[0]
    datasetId = getDatasetId(trackNameTuple[1])
    subtype = trackNameTuple[2]
    fileName = '/'.join(trackNameTuple[3:]).replace(',FOLDER','').split(',')[0]
    if fileName =='':
        return None
    subList = [subtype, fileName]
    paramlist = ['params:='+'<#>'.join([ datasetId, repr(subList)]), 'operation:=GetFilePreviewFromPublicDataset','class:=dataStorageServicePub']
    socket.send(messageSep.join(paramlist))
    filePreview = socket.recv_unicode().encode('ascii','ignore')
    #startIndex, endIndex = filePreview.find('<Preview>')+9,  filePreview.rfind('</Preview>')
    #filePreview = filePreview[startIndex:endIndex]
    from tempfile import NamedTemporaryFile
    tempfile = NamedTemporaryFile()
    tempfile.write(filePreview)
    logMessage('fileName :=  '+fileName)
    logMessage('NamedTemporaryFile :=  '+tempfile.name)
    logMessage('FilePreview :=  '+ filePreview)
    return tempfile

#[email protected]
Exemplo n.º 36
0
def monitor_load():
    taskQueueManager = TaskQueueManagerFactory.getTaskQueueManager()
    nextPossibleJobRequestTime = time.time()
    while True:        
        loadAverage = taskQueueManager.getLoadAverage()
        #logging.getLogger(PARALLEL_LOGGER).debug("load average is %f", loadAverage)
        #logMessage("load average is %f" % loadAverage, level=5, logger=PARALLEL_LOGGER)
        if loadAverage > LOAD_THRESHOLD:
            if time.time() > nextPossibleJobRequestTime:
                #logging.getLogger(PARALLEL_LOGGER).debug("load over threshold, submitting titan job")
                logMessage("load over threshold, submitting titan job", level=5, logger=PARALLEL_LOGGER)
                nextPossibleJobRequestTime = time.time() + JOB_SUBMISSION_WAIT_PERIOD
                TitanJobScript.submitJob()
        time.sleep(5)
Exemplo n.º 37
0
    def loadResult(cls, stat):
        if cls._shouldUseDiskMemoization(stat, store=False):
            memoPath = cls._createMemoPath(stat)
            key = cls._createMemoKey(stat)

            memoDict = cls.memoDataCollection[memoPath]
            if key in memoDict:
                res = cls.memoDataCollection[memoPath][key]
                stat.setMemoizedResult(res)

                if DebugConfig.VERBOSE:
                    logMessage('Loading result "{}" in "{}"'.format(
                        res, memoPath))

                stat.setMemoizedResult(res)
 def isCompatibleWith(self, sourceFormat, exceptionList=[]):
     assert (not isinstance(sourceFormat, TrackFormatReq))
     attrList = [
         attr for attr in self._getAttributes(includeReqExtensions=False)
         if attr[1:] not in exceptionList
     ]
     pairedAttrs = [[getattr(obj, attr) for obj in [self, sourceFormat]]
                    for attr in attrList]
     if DebugConfig.VERBOSE:
         logMessage(
             "Checking track format compatibility. Paired attributes: " +
             ', '.join("{}: {}".format(x, y)
                       for x, y in zip(attrList, pairedAttrs)))
     res = (not False in [s is None or s == sf for s, sf in pairedAttrs])
     return res
Exemplo n.º 39
0
    def computeStep(self):
        self._trace('computeStep')

        if not self.hasResult():
            self._loadMemoized()
        if self.hasResult():
            return
        for child in self._children:
            if not child.hasResult():
                child.computeStep()

        if not all([child.hasResult() for child in self._children]):
            return

        self._trace('_compute')
        #The method _compute may either return the result, or set the result variable directly:
        res = None
        with StatisticExceptionHandling(**self._kwArgs):
            res = self._compute()
            # import os
            # import psutil
            # process = psutil.Process(os.getpid())
            # print process.memory_info().rss
            if DebugConfig.VERBOSE:
                logMessage('Result of statistic %s in region %s: %s' %
                           (getClassName(self), self._region, res))

        #try:
        #    self._trace('_compute')
        #    #The method _compute may either return the result, or set the result variable directly:
        #    res = self._compute()
        #
        #except (TooLargeBinError, TooSmallBinError, CentromerError),e:
        #    logException(e)
        #    raise
        #except (ZeroDivisionError, FloatingPointError, TypeError, ValueError),e:
        #    #print 'Error: ', e.__class__.__name__, e
        #    res = None
        #    if DebugConfig.VERBOSE or e.__class__ in [TypeError, ValueError]:
        #        logException(e, message='kwArgs: ' + str(self._kwArgs))
        #    if DebugConfig.PASS_ON_COMPUTE_EXCEPTIONS:
        #        raise

        if not self.hasResult():
            #Only set _result if this was not set directly by the previous call to _compute
            self._result = res

        self._storeResult()
def inferValType(valList, shapeOffset=0):
    if valList is None:
        return False
    elif type(valList) in [list, tuple]:
        return 'number'
    elif isinstance(valList, numpy.ndarray) or isinstance(
            valList, SmartMemmap):
        if len(valList.shape) == 2 + shapeOffset and valList.shape[
                1 +
                shapeOffset] == 2 and valList.dtype == numpy.dtype('float128'):
            return 'mean_sd'
        elif any(valList.dtype == numpy.dtype(x)
                 for x in ['float32', 'float64', 'float128']):
            if len(valList.shape) == 1 + shapeOffset:
                return 'number'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'population'
        if any(valList.dtype == numpy.dtype(x) for x in ['int32', 'int64']):
            if len(valList.shape) == 1 + shapeOffset:
                return 'number (integer)'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'population'
        elif any(valList.dtype == numpy.dtype(x) for x in ['int8', 'bool8']):
            if len(valList.shape) == 1 + shapeOffset:
                return 'tc'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'tc_vector'
        elif valList.dtype == numpy.dtype('S1'):
            if len(valList.shape) == 1 + shapeOffset:
                return 'char'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'char_vector'
        elif _dtypeIsStringLongerThanOne(valList.dtype):
            if len(valList.shape) == 1 + shapeOffset:
                return 'category'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'category_vector'

        if valList.shape[1 + shapeOffset] == 0:
            return 'unsupported list'

        logMessage('Shape or dtype not recognized: ' + str(valList.shape) +
                   ' and ' + str(valList.dtype))
        raise ShouldNotOccurError()

    else:
        logMessage('Type of valList not recognized: ' + str(type(valList)))
        raise ShouldNotOccurError()
Exemplo n.º 41
0
    def _constructBins(regSpec, binSpec, genome, trackName1, trackName2):
        #Construct and check bins
        try:
            #userBinSource= UserBinSource(regSpec, binSpec)
            from quick.application.GalaxyInterface import GalaxyInterface
#            from config.Config import DEFAULT_GENOME
            userBinSource = GalaxyInterface._getUserBinSource(regSpec, binSpec, genome, trackName1, trackName2)
            return [None, userBinSource]
        except Exception, e:
            #results = Results(trackName1, trackName2, statClassName)
            results = Results([],[],'')
            results.addError(InvalidRunSpecException('Error in specification of analysis region or binsize: ' + str(e)))
            logMessage('Error in specification of analysis region (' + regSpec +') or binsize: (' + binSpec + ')')
            if DebugConfig.PASS_ON_BATCH_EXCEPTIONS:
                raise
            return [results, None]
Exemplo n.º 42
0
 def getRevEngBatchLine(trackName1, trackName2, cleanedTrackName1, cleanedTrackName2, analysisDef, \
                        regSpec, binSpec, genome, manualSeed, **kwArgs):
     #analysisDef is assumed to be unquoted
     
     #if this is to work, must check explicitly against special keywords  in regSpec (or check that regSpec is a valid region that is to have region..)...
     #if not genome in regSpec:
     #    regSpec = genome+':'+regSpec
     try:
         if DebugConfig.VERBOSE:
             logMessage('getting RevEngBatchLine:')
         #analysisDef =analysisDef.replace('%20PointCountInSegsPvalStat%2C','') #REMOVE
         #print 'NOWAG: ',analysisDef
         
         analysis = Analysis(analysisDef, genome, cleanedTrackName1, cleanedTrackName2, **kwArgs)
         stat = analysis.getStat()
         if stat is None:
             return 'No corr batch line, as no valid statistic was found.. '
         #print 'CAME HERE'
         statClassName = stat.__name__
         #fixme: Add space, but this is not checked in batchrunner...
         params = ','.join(['='.join([choicePair[0], str(manualSeed)]) \
                              if (manualSeed is not None and choicePair[0] == 'randomSeed' and choicePair[1] == 'Random')
                                 else '='.join(choicePair) \
                             for choicePair in analysis.getChoices().items() \
                              if choicePair[0] not in ['H0','H1_more','H1_less','H1_different','H1_ha1','H1_ha2','H1_ha3','H1_ha4','H1_ha5'] ])
         statText = statClassName + '(' + params + ')'
         #return BATCH_COL_SEPARATOR.join([regSpec, binSpec, \
         #                 (':'.join(trackName1)).replace(' ','_'),\
         #                 (':'.join(trackName2)).replace(' ','_') if trackName2 is not None else 'None',\
         #                 statText])
         #assert unquote(regSpec) == regSpec
         assert unquote(binSpec) == binSpec #To assure that unquote can be safely applied to binSpec without any consequences (we don't want to always quote, but still want the possibility to use quoted history track names)
         batchElements = [genome, regSpec, binSpec, \
                          (':'.join([quote(x, safe='') for x in trackName1])),\
                          (':'.join([quote(x, safe='') for x in trackName2])) if trackName2 is not None else 'None',\
                          statText]
         #batchElements = [el.replace(BATCH_COL_SEPARATOR, '\\' + BATCH_COL_SEPARATOR) for el in batchElements]
         #batchElements = [quote(el, safe='') for el in batchElements]
         return BATCH_COL_SEPARATOR.join(batchElements)
         
     except Exception,e:
         #raise
         logException(e,logging.WARNING,'Could not generate corresponding batch line: ')
         #if DebugConfig.VERBOSE:
         logMessage('analysisDef, genome, trackName1, trackName2: \n' +
                    str([analysisDef, genome, trackName1, trackName2]) )
         return 'Warning: Could not generate corresponding batch line.' 
Exemplo n.º 43
0
    def _trace(self, methodName):
        
        if TRACE_STAT[methodName]:
            if not hasattr(Statistic, 'objAddresses'):
                Statistic.objAddresses = {}

            if not hasattr(self, '_traceId'):
                if not self.__class__.__name__ in Statistic.objAddresses:
                    Statistic.objAddresses[self.__class__.__name__] = 0
                self._traceId = str(Statistic.objAddresses[self.__class__.__name__])
                Statistic.objAddresses[self.__class__.__name__] += 1
            
            logMessage(  self.__class__.__name__ + '(' + self._traceId + ').' + methodName \
                  + ( (' (' + str(self._region) +')') if TRACE_STAT['printRegions'] else '') \
                  + ( ( ' (' + str(self._track.trackName) \
                  + (',' + str(self._track2.trackName) if hasattr(self,'_track2') else '') \
                  + ')' ) if TRACE_STAT['printTrackNames'] else '') )
def downloadFirstFtpFile(localFile, FtpAddress):
    user, pwdServ, port = FtpAddress.replace('/', '').split(':')[1:]
    pwd, server = pwdServ.split('@')
    logMessage('localFile:  ' + localFile)
    ftp_h = FTP()
    ftp_h.connect(server, port)
    ftp_h.login(user, pwd)
    filenames = []
    ftp_h.retrlines('NLST', filenames.append)
    for fn in filenames:

        utfil = open(localFile, 'wb')

        ftp_h.retrbinary('RETR ' + fn, utfil.write)
        utfil.close()
        break
    ftp_h.close()
Exemplo n.º 45
0
def inferValType(valList, shapeOffset=0):
    if valList is None:
        return False
    elif type(valList) in [list,tuple]:
        return 'number'
    elif isinstance(valList, numpy.ndarray) or isinstance(valList, SmartMemmap):    
        if len(valList.shape) == 2 + shapeOffset and valList.shape[1 + shapeOffset] == 2 and valList.dtype == numpy.dtype('float128'):
            return 'mean_sd'
        elif any(valList.dtype == numpy.dtype(x) for x in ['float32', 'float64', 'float128']):
            if len( valList.shape ) == 1 + shapeOffset:
                return 'number'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'population'
        if any(valList.dtype == numpy.dtype(x) for x in ['int32', 'int64']):
            if len( valList.shape ) == 1 + shapeOffset:
                return 'number (integer)'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'population'
        elif any(valList.dtype == numpy.dtype(x) for x in ['int8', 'bool8']):
            if len( valList.shape ) == 1 + shapeOffset:
                return 'tc'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'tc_vector'
        elif valList.dtype == numpy.dtype('S1'):
            if len( valList.shape ) == 1 + shapeOffset:
                return 'char'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'char_vector'
        elif _dtypeIsStringLongerThanOne(valList.dtype):
            if len( valList.shape ) == 1 + shapeOffset:
                return 'category'
            elif valList.shape[1 + shapeOffset] >= 2:
                return 'category_vector'
        
        if valList.shape[1 + shapeOffset] == 0:
            return 'unsupported list'
                
        logMessage('Shape or dtype not recognized: ' + str(valList.shape) + ' and ' + str(valList.dtype) )
        raise ShouldNotOccurError()
        
    else:
        logMessage('Type of valList not recognized: ' + str(type(valList)))
        raise ShouldNotOccurError()
Exemplo n.º 46
0
    def _parseDef(self, id):
        self._analysisParts = []
        self._statClassList = []

#                           ([^-[]* #pure text - not '['
        #print 'NOWAG id:', id
        parts = re.findall('''
                            # Match pure text (part[0]):
                           ( (?: [^-[]* (?:-(?!>))? )* #1. pure text - not '[' or '-',
                                                       #2. separated by a possible '-' that is not before a '>'
                                                       #1 and 2 is repeated as long as necessary
                           [^-[\s]+) #should not end with whitespace,
                                     #as this may belong to the '->'-expression
                           # Match option clause (part[1])            
                           |( \[ [^[\]]* \] ) #Matches an expression inside brackets '[]'
                           # Match specification of statistic classes (part[2])
                           |( \s? \-> \s? .* )
                           # Match any additional whitespace (part[3])
                           |(\s*)
                           ''', id, flags=re.VERBOSE)
        
        from gold.statistic.AllStatistics import STAT_CLASS_DICT
        for part in parts:
            if part[0] != '':
                self._analysisParts.append(part[0])
            if part[1] != '':
                self._analysisParts.append(AnalysisOption(part[1]))
            if part[2] != '':
                statNames = part[2].replace('->','').replace(' ','').split(',')
                #self._statClassList = statNames                
                self._statClassList = [STAT_CLASS_DICT[statName] for statName in statNames \
                                       if STAT_CLASS_DICT.get(statName) is not None]
                if len(self._statClassList)==0:
                    if len(statNames)==0:
                        logMessage('No statistic found when parsing analysisDef: ' + self._analysisLine)
                    else:
                        logMessage('Specified statistics not found in STAT_CLASS_DICT. Statistics:%s, and keys in STAT_CLASS_DICT: %s' % (str(statNames), str(STAT_CLASS_DICT)) )
            if part[3] != '':
                self._analysisParts.append(part[3])
Exemplo n.º 47
0
                        logException(e, message='(Error in _determineStatClass, with statClass %s)' % statClass.__name__)
                        #if VERBOSE:
                        #    print 'Warning: exception in getStat: ', \
                        #        statClass.__name__ + ': ' + e.__class__.__name__ + ': ' + str(e)
                        #    traceback.print_exc(file=sys.stdout)
                else:
                    #self._reversed = reversed
                    #self._conversionsUsed = len(trackA.conversionsUsed) > 0 or \
                    #    ((trackB is not None) and len(trackB.conversionsUsed) > 0)
                    ##self._validStatClass = functools.partial(statClass, **self.getChoices())
                    #functools.update_wrapper(self._validStatClass, statClass)
                    validStatClass = wrapClass(statClass, keywords=self.getChoices() )
                    #self.setConverters( self._track.formatConverters, self._track2.formatConverters if self._track2 is not None else None)
                    #self._updateOptions()
                    if DebugConfig.VERBOSE:
                        logMessage(statClass.__name__ + ': OK')
#                        print statClass.__name__ + ': OK'
                    return validStatClass
        
        return None
    
    def _appendConverterOptions(self, track, labelKey):
        if track is None:
            return
        
        if self.getChoice(labelKey) is not None:
            assert(self.getChoice(labelKey) == getClassName(track.formatConverters[0]))
            return
        
        labelPair = (labelKey, '_Treat ' + prettyPrintTrackName(track.trackName) + ' as')
        choicePairs = [ ( getClassName(fc), fc.getOutputDescription(TrackInfo(self._genome, track.trackName).trackFormatName) ) \
            if time.time() > nextPossibleJobRequestTime:
                #logging.getLogger(PARALLEL_LOGGER).debug("load over threshold, submitting titan job")
                logMessage("load over threshold, submitting titan job", level=5, logger=PARALLEL_LOGGER)
                nextPossibleJobRequestTime = time.time() + JOB_SUBMISSION_WAIT_PERIOD
                TitanJobScript.submitJob()
        time.sleep(5)

class MyManager(BaseManager):
    pass

pid = os.getpid()

pidFileName = GALAXY_BASE_DIR + "/taskQueue.pid"

with open(pidFileName, "w") as f:
    f.write(str(pid))

MyManager.register("TaskQueueReferent", TaskQueueManagerFactory.getTaskQueueManager)
MyManager.register("shutdown", shutdown)

loadThread = threading.Thread(target=monitor_load)
loadThread.daemon = True
loadThread.start()

manager = MyManager(address=("", PP_MANAGER_PORT), authkey=PP_PASSPHRASE)
server = manager.get_server()

#logging.getLogger(PARALLEL_LOGGER).debug("Task queue started, serving forever...")
logMessage("Task queue started, serving forever...", level=5, logger=PARALLEL_LOGGER)
server.serve_forever()
Exemplo n.º 49
0
    def submit(self, func, args=(), depfuncs=(), modules=(),
            callback=None, callbackargs=(), group='default', globals=None, restrictions=[]):
        """Submits function to the execution queue

            func - function to be executed
            args - tuple with arguments of the 'func'
            depfuncs - tuple with functions which might be called from 'func'
            modules - tuple with module names to import
            callback - callback function which will be called with argument
                    list equal to callbackargs+(result,)
                    as soon as calculation is done
            callbackargs - additional arguments for callback function
            group - job group, is used when wait(group) is called to wait for
            jobs in a given group to finish
            globals - dictionary from which all modules, functions and classes
            will be imported, for instance: globals=globals()
        """

        # perform some checks for frequent mistakes
        if self._exiting:
            raise DestroyedServerError("Cannot submit jobs: server"\
                    " instance has been destroyed")

        if not isinstance(args, tuple):
            raise TypeError("args argument must be a tuple")

        if not isinstance(depfuncs, tuple):
            raise TypeError("depfuncs argument must be a tuple")

        if not isinstance(modules, tuple):
            raise TypeError("modules argument must be a tuple")

        if not isinstance(callbackargs, tuple):
            raise TypeError("callbackargs argument must be a tuple")

        if globals is not None and not isinstance(globals, dict):
            raise TypeError("globals argument must be a dictionary")

        for module in modules:
            if not isinstance(module, types.StringType):
                raise TypeError("modules argument must be a list of strings")

        tid = self.__gentid()

        if globals:
            modules += tuple(self.__find_modules("", globals))
            modules = tuple(set(modules))
            self.logger.debug("Task %i will autoimport next modules: %s" %
                    (tid, str(modules)))
            for object1 in globals.values():
                if isinstance(object1, types.FunctionType) \
                        or isinstance(object1, types.ClassType):
                    depfuncs += (object1, )

        task = _Task(self, tid, callback, callbackargs, group)

        self.__waittasks_lock.acquire()
        self.__waittasks.append(task)
        self.__waittasks_lock.release()

        # if the function is a method of a class add self to the arguments list
        if isinstance(func, types.MethodType) and func.im_self is not None:
            args = (func.im_self, ) + args

        # if there is an instance of a user deined class in the arguments add
        # whole class to dependancies
        for arg in args:
            # Checks for both classic or new class instances
            if isinstance(arg, types.InstanceType) \
                    or str(type(arg))[:6] == "<class":
                # do not include source for imported modules
                if ppcommon.is_not_imported(arg, modules):
                    depfuncs += tuple(ppcommon.get_class_hierarchy(arg.__class__))

        # if there is a function in the arguments add this
        # function to dependancies
        for arg in args:
            if isinstance(arg, types.FunctionType):
                depfuncs += (arg, )
                
                
        #Add task id
        args += (tid, )

        sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
        sargs = pickle.dumps(args, self.__pickle_proto)

        with self.__jobs_lock:
            if task.group not in self.__jobs:                                        
                job = Job(task.group, restrictions)
                job.worker = self.getFreeWorker()
                self.__jobs[task.group] = job
                self.__queue.append(job)
                
            self.__jobs[task.group].addTask((task, sfunc, sargs))
        
        #logging.getLogger(PARALLEL_LOGGER).debug("task %i submitted in group %s" , tid, group)
        logMessage("task %i submitted in group %s" % (tid, group), level = 5, logger=PARALLEL_LOGGER)
        self.__scheduler()
        return task
    def validateAndPossiblyResetLocalResults(cls, stats):
        #return 0#to short-circuit this functionality as it is currently in development
        #return McFdr.dummyStub(stats)
        if len(stats)==0:
            return 0
        #else:
            #print 'LEN: ',len(stats)
        mt = stats[0]._kwArgs.get('mThreshold')
        ft = stats[0]._kwArgs.get('fdrThreshold')
        ms = stats[0]._kwArgs.get('maxSamples')
        fc = stats[0]._kwArgs.get('fdrCriterion')
        M_THRESHOLD = int(mt) if mt is not None else 20
        FDR_THRESHOLD = float(ft) if ft is not None else 0.1
        if ms is None:
            MAX_SAMPLES = 50000
        elif type(ms) is int:
            MAX_SAMPLES = ms
        elif ms.lower() == 'unlimited':
            MAX_SAMPLES = None
        else:
            MAX_SAMPLES = int(ms)
        
        #print 'M_THRESHOLD:%i, FDR_THRESHOLD:%.2f, MAX_SAMPLES:%s' % (M_THRESHOLD,FDR_THRESHOLD,str(MAX_SAMPLES))
        
        
        #print 'min samples:%i, samples per chunk:%i' % (stats[0]._numResamplings, NUM_SAMPLES_PER_CHUNK)
        
        assert fc in [None, 'individual','simultaneous'], 'fdrCriterion:'+str(fc)
        individualFdr = (fc == 'individual')
        #print 'FDR criterion: %s' % fc
        if fc is None:
            logMessage('Warning: empty fdrCriterion, using simultaneous')
        #USE_MC_FDR = True #if false, use only standard sequential MC, without checking q-values
        
        from gold.application.RSetup import r
        import numpy
        
        #print '<pre>'
        #pvals = [x.getResult()[RandomizationManagerStatUnsplittable.PVAL_KEY] for x in stats]
        pvals = range(len(stats))
        allMs = range(len(stats))
        allNumSamples = range(len(stats))
        isInValid = range(len(stats))
        for i,x in enumerate(stats):
            try:
                pvals[i] = x.getResult()[RandomizationManagerStatUnsplittable.PVAL_KEY]
                allMs[i] = x.getResult()[RandomizationManagerStatUnsplittable.M_KEY]
                allNumSamples[i] = x.getResult()[RandomizationManagerStatUnsplittable.NUM_SAMPLES_KEY]
                isInValid[i] = False
            except:
                pvals[i] = None
                allMs[i] = None
                allNumSamples[i] = None
                isInValid[i] = True
        
        
        #print 'P: ',pvals
        #print 'Stats: ',stats
        #print 'LEN: ',len(stats)
        fdrVals = McFdr.adjustPvalues(pvals, verbose=False)
        
        #if not type(fdrVals) in (list,tuple):
        #    fdrVals = [fdrVals]
        #print 'FDR: ', fdrVals
        
        #allMs = [x.getResult()[RandomizationManagerStatUnsplittable.M_KEY] for x in stats] #maybe just access stat object directly to get this..
        #allMs = range(len(stats))
        #for i,x in enumerate(stats):
        #    try:
        #        allMs[i] = x.getResult()[RandomizationManagerStatUnsplittable.M_KEY]
        #    except:
        #        allMs[i] = None
        
        #determinedByM = [M_THRESHOLD is not None and m is not None and m>=M_THRESHOLD for m in allMs]
        determinedByM = [M_THRESHOLD is not None and m>=M_THRESHOLD for m in allMs]
        determinedByFdr = [FDR_THRESHOLD is not None and not numpy.isnan(f) and f<FDR_THRESHOLD for f in fdrVals]
        determinedByMaxSamples = [MAX_SAMPLES is not None and n>=MAX_SAMPLES for n in allNumSamples]
        statIndividuallyDetermined = list(any(x) for x in zip(determinedByM,determinedByMaxSamples,isInValid)) #determined by anything except FDR, as the latter is not necessarily handled on a per test level..
        statDeterminedByAnyMeans = list(any(x) for x in zip(statIndividuallyDetermined, determinedByFdr)) #determined individually or by FDR
        assert len(stats) == len(pvals) == len(fdrVals) == len(allMs) == len(determinedByM) == len(determinedByFdr) == len(statIndividuallyDetermined)
        
        #print '</pre>'
        #print allMs
        #print fdrVals
        
        #ndIndexes = [i for i in range(len(statDetermined)) if not statDetermined[i]]
        #print 'INDEXES: ' + ','.join([str(x) for x in ndIndexes]), '<br>'
        #print 'M-VALUES: ' + ','.join([str(allMs[x]) for x in ndIndexes]), '<br>'
        #print 'P-VALUES: ' + ','.join([str(pvals[x]) for x in ndIndexes]), '<br>'
        #print 'FDR-VALUES: ' + ','.join([str(fdrVals[x]) for x in ndIndexes]), '<br>'

        for i in range(len(statIndividuallyDetermined)):
            determined = statIndividuallyDetermined[i] or (individualFdr and determinedByFdr[i])
            if not determined:
                if hasattr(stats[i], '_result'):
                    del stats[i]._result
                else:
                    print 'no _result to delete at index %i in stats: '%i #, stats
                    print 'obj details: ',stats[i]._region
                stats[i]._numResamplings += cls.NUM_SAMPLES_PER_CHUNK #get number from mcFdr..
        #return all(statDeterminedByAnyMeans)
        #returns number of not determined stats..
        return sum((1 if not determined else 0) for determined in statDeterminedByAnyMeans)
Exemplo n.º 51
0
    def _run_local(self, task, sfunc, sargs, worker, job):
        """Runs a job locally"""

        if self._exiting:
            return
        self.logger.info("Task %i started",  task.tid)

        start_time = time.time()

        
        sresult = None
        while sresult == None:
            try:
                worker.t.csend(sfunc)
                worker.t.send(sargs)
            except:
                if self._exiting:
                    return
                if SHOW_EXPECTED_EXCEPTIONS:
                    self.logger.debug("Exception sending task in _run_local (possibly expected)", exc_info=True)
                    
                #print "exception in run_local for group %s for worker %s when fetching result for tid %s: (thread is %s)" % (job.group, worker, task.tid, threading.current_thread())
                raise
            while sresult == None:
                try:
                    sresult = worker.t.receive(timeout=10)
                except pptransport.TimeoutException: #Can happen because of R crashing...
                    if worker.is_alive():
                        continue
                    else:
                        print "Worker appears to have crashed for task %d, reinserting task..."
                        
                        worker.stop()
                        job.worker = self.getFreeWorker()
                        self.insert(sfunc, sargs, job, task)
                        self.__scheduler()
                        return
                except:
                    if self._exiting:
                        return
                    if SHOW_EXPECTED_EXCEPTIONS:
                        self.logger.debug("Exception receiving result in _run_local (possibly expected)", exc_info=True)
                    raise

        task.finalize(sresult)

        # remove the job from the waiting list
        if self.__waittasks:
            with self.__waittasks_lock:
                self.__waittasks.remove(task)
        
        job.taskFinished()
        worker.free()
        
        self.__add_to_active_tasks(-1)
            
        if not self._exiting:
            self.__stat_add_time("local", time.time()-start_time)
        #self.logger.debug("Task %i ended",  task.tid)
        #logging.getLogger(PARALLEL_LOGGER).debug("Task %i ended", task.tid)
        logMessage("Task %i ended" % task.tid, level = 5, logger = PARALLEL_LOGGER)
        self.__scheduler()
    def validateAndPossiblyResetGlobalResult(cls, stat):
        mt = stat._kwArgs.get('mThreshold')
        ft = stat._kwArgs.get('fdrThreshold')
        ms = stat._kwArgs.get('maxSamples')
        fc = stat._kwArgs.get('fdrCriterion')
        M_THRESHOLD = int(mt) if mt is not None else 20
        FDR_THRESHOLD = float(ft) if ft is not None else 0.1
        if ms is None:
            MAX_SAMPLES = 50000
        elif type(ms) is int:
            MAX_SAMPLES = ms
        elif ms.lower() == 'unlimited':
            MAX_SAMPLES = None
        else:
            MAX_SAMPLES = int(ms)
        
        #print 'M_THRESHOLD:%i, FDR_THRESHOLD:%.2f, MAX_SAMPLES:%s' % (M_THRESHOLD,FDR_THRESHOLD,str(MAX_SAMPLES))
        
        
        #print 'min samples:%i, samples per chunk:%i' % (stats[0]._numResamplings, NUM_SAMPLES_PER_CHUNK)
        
        assert fc in [None, 'individual','simultaneous'], 'fdrCriterion:'+str(fc)
        individualFdr = (fc == 'individual')
        #print 'FDR criterion: %s' % fc
        if fc is None:
            logMessage('Warning: empty fdrCriterion, using simultaneous')
        #USE_MC_FDR = True #if false, use only standard sequential MC, without checking q-values
        
        from gold.application.RSetup import r
        import numpy
        
        #print '<pre>'
        #pvals = [x.getResult()[RandomizationManagerStatUnsplittable.PVAL_KEY] for x in stats]
        #pvals = range(len(stats))
        #allMs = range(len(stats))
        #allNumSamples = range(len(stats))
        #isInValid = range(len(stats))
        #for i,x in enumerate(stats):
        try:
            pval = stat.getResult()[RandomizationManagerStatUnsplittable.PVAL_KEY]
            mVal = stat.getResult()[RandomizationManagerStatUnsplittable.M_KEY]
            numSamples = stat.getResult()[RandomizationManagerStatUnsplittable.NUM_SAMPLES_KEY]
            isInValid = False
        except:
            pval= None
            mVal = None
            numSamples = None
            isInValid = True
            #raise
    
        #print type(pval)
        determinedByM = M_THRESHOLD is not None and mVal>=M_THRESHOLD
        determinedByFdr = FDR_THRESHOLD is not None and pval is not None and not numpy.isnan(pval) and pval<FDR_THRESHOLD
        determinedByMaxSamples = MAX_SAMPLES is not None and numSamples>=MAX_SAMPLES
        #print 'TEMP statdet1: ',determinedByM, determinedByFdr, determinedByMaxSamples
        statDetermined = any([determinedByM, determinedByFdr, determinedByMaxSamples,isInValid])
        #determined by anything except FDR, as the latter is not necessarily handled on a per test level..
        #statDeterminedByAnyMeans = list(any(x) for x in zip(statIndividuallyDetermined, determinedByFdr)) #determined individually or by FDR
        #assert len(stats) == len(pvals) == len(fdrVals) == len(allMs) == len(determinedByM) == len(determinedByFdr) == len(statIndividuallyDetermined)

        if not statDetermined:
            if hasattr(stat, '_result'):
                del stat._result
            else:
                print 'no _result to delete at global level'
                #print 'obj details: ',stats._region
            stat._numResamplings += cls.NUM_SAMPLES_PER_CHUNK #get number from mcFdr..
        #return all(statDeterminedByAnyMeans)
        #returns number of not determined stats..
        #print 'TEMP statdet: ',statDetermined
        return (1 if not statDetermined else 0) 
Exemplo n.º 53
0
 def globalAnalysisEnded(self):
     if not self._printProgress:
         return
     if self._startGlobalTime is None:
         logMessage('Called globalAnalysisEnded without globalAnalysisStarted being called before.', level=logging.WARN)
     print "\nglobal analysis took %f seconds" % (time.time() - self._startGlobalTime)
Exemplo n.º 54
0
    def _generateAnswerText(self, coreCls):
        onlyLocalPvals = self._results.hasOnlyLocalPvals()
        globalPval = self._results.getGlobalResult().get(self._results.getPvalKey()) if not onlyLocalPvals else None

        localPvalsUrl = getRelativeUrlFromWebPath(os.sep.join([self._baseDir, 'table.html'])) #problematic dependency towards fn in tablePresenter..
        #Change to something like this when StaticFile has been used throughout presenters..
        #from quick.util.StaticFile import GalaxyRunSpecificFile
        #GalaxyRunSpecificFile([], '', self._galaxyFn)
        
        core = coreCls()
        core.styleInfoBegin(styleClass="infomessagesmall answerbox question")
        core.header('You asked:')
        core.line(str(coreCls().highlight(self._getHeader())))
        core.styleInfoEnd()
        
        #Simplistic answer
        core.styleInfoBegin(styleClass="infomessagesmall answerbox simplisticanswer")
        core.header(str(coreCls().link('Simplistic answer:', '#', \
                                        args='''onclick="return toggle('simplistic_answer_expl')"''')))
        
        core.styleInfoBegin(styleId="simplistic_answer_expl", styleClass="infomessagesmall explanation")
        if onlyLocalPvals :
            core.line('''
Under "simplistic answer" you will find a simple statement on whether there were any findings for the local analysis. The number of significant bins at 10% false discovery rate (FDR) is provided.<br>
<br>
It is not possible to draw a decisive conclusion based on a p-value, so the statements are only meant as simple indications.<br>
                  ''')
        else:
            core.line('''
Under "simplistic answer" you will find a yes/maybe/no-conclusion answer to the question asked, based on a simple thresholding scheme on the p-value:<br>
"yes" if p-value < 0.01<br>
"maybe" if  0.01 < p-value < 0.1<br>
"no conclusion" if p-value > 0.1<br>
<br>
It is not possible to draw a decisive conclusion based on a p-value, so the statements are only meant as simple indications.<br>                  
                  ''')
            
        core.styleInfoEnd()
        
        if onlyLocalPvals:
            numSign, numTested, numIgnored = self._results.getFdrSignBins()
            if numSign == numTested and numSign != 0:
                simplisticPhrase = 'Yes - the data suggests this for all bins'
            elif numSign>0:
                simplisticPhrase = 'Yes - the data suggests this at least in some bins'
                numSign, numTested, numIgnored = self._results.getFdrSignBins()
                simplisticPhrase += ' (%i significant bins out of %i, at %i%% FDR' % (numSign, numTested, self._results.FDR_THRESHOLD*100)
            else:
                simplisticPhrase = 'No support from data for this conclusion in any bin'
            
                
            core.line(str(coreCls().highlight(simplisticPhrase)))
        else:
            assert globalPval is not None
            
            directionality = ''
            if self._results._analysis.isTwoSidedTest():
                tsValue, expTsValue = self._getTestStatisticAndExpectedValues()
                if tsValue is not None and expTsValue is not None and tsValue!=expTsValue:
                    directionality = '(higher) ' if tsValue > expTsValue else '(lower) '
                    
            if globalPval < 0.01:
                simplisticPhrase = 'Yes %s- the data suggests this' % directionality
            elif globalPval < 0.1:
                simplisticPhrase = 'Maybe %s- weak evidence' % directionality
            else:
                simplisticPhrase = 'No support from data for this conclusion'
                
            core.line(str(coreCls().highlight(simplisticPhrase + ' (p-value: ' + strWithStdFormatting(globalPval) + ')' )))
        core.styleInfoEnd()
        
        #Precise answer
        core.styleInfoBegin(styleClass="infomessagesmall answerbox preciseanswer")
        core.header(str(coreCls().link('Precise answer:', '#', \
                                       args='''onclick="return toggle('precise_answer_expl')"''')))
        
        core.styleInfoBegin(styleId="precise_answer_expl", styleClass="infomessagesmall explanation")
        if onlyLocalPvals :
            core.line('''
Significance testing evaluates a <b>null hypothesis (H0)</b> versus an <b>alternative hypothesis (H1)</b>. Low <b>p-values</b> are evidence against H0. The testing involves comparing the observed value of a  <b>test statistic</b> to the distribution of the test statistic under a <b>null model</b>. The testing was performed in each local bin, with a list of FDR-corrected p-values per bin provided.                  
                  ''')
        else:
            core.line('''
Significance testing evaluates a <b>null hypothesis (H0)</b> versus an <b>alternative hypothesis (H1)</b>. Low <b>p-values</b> are evidence against H0. The testing involves comparing the observed value of a  <b>test statistic</b> to the distribution of the test statistic under a <b>null model</b>. 
                  ''')

        core.styleInfoEnd()
        
        EffectSizeText = 'Please note that both the effect size and the p-value should be considered in order to assess the practical significance of a result.'
        
        FDR_text = '* False Discovery Rate: The expected proportion of false positive results among the significant bins is no more than %i%%.' \
                    % (self._results.FDR_THRESHOLD*100)
        
        if onlyLocalPvals:
            numSign, numTested, numIgnored = self._results.getFdrSignBins()
            
            core.line(str(coreCls().highlight('%i significant bins out of %i, at %i' \
                                              % (numSign, numTested, self._results.FDR_THRESHOLD*100) + '% FDR*')))
            core.line('')
            localPvalsLink = str(coreCls().link('collection of FDR-corrected p-values per bin', localPvalsUrl))
            notComputeLink = str(coreCls().link('Not able to compute', '#', \
                                               args='''onclick="return toggle('no_global_pval_expl')"'''))
            core.line('A ' + localPvalsLink + ' was computed. ' + notComputeLink + ' a global p-value for this analysis.')
            core.styleInfoBegin(styleId="no_global_pval_expl", styleClass="infomessagesmall explanation")
            core.line('(Explanation to appear in box)')
            core.styleInfoEnd()
            
            if numIgnored > 0:
                core.line('')
                core.line('%s bin%s excluded due to lack of data.' % (numIgnored, 's' if numIgnored > 1 else ''))
                
            core.line('')
            core.line(EffectSizeText)
            core.line('')
            core.line(FDR_text)

            h0h1Text = self._getH0andH1Text(coreCls)
            if h0h1Text is not None:
                core.divider(withSpacing=True)
                core.line('In each bin, the test of')
                core.append(h0h1Text)
                core.line('was performed.')
        else:
            h0h1Text = self._getH0andH1Text(coreCls)
            if h0h1Text is not None:
                core.line('The p-value is %s for the test' % strWithStdFormatting(globalPval) )
                core.append(h0h1Text)
            else:
                core.line('The p-value is %s.' % strWithStdFormatting(globalPval) )
                core.line('')
            core.line('Low p-values are evidence against H0.')

            numSign, numTested, numIgnored = self._results.getFdrSignBins()
            if numTested+numIgnored > 1:                
                localPvalsLink = str(coreCls().link('each bin separately', localPvalsUrl))
                excludeText = ' (%i bin%s excluded from FDR-analysis due to lacking p-values).' \
                              % (numIgnored, 's' if numIgnored>1 else '.') if numIgnored>0 else ''
                core.line('')
                core.line('The test was also performed for ' + localPvalsLink + \
                          ', resulting in %i significant bins out of %i, at %i%% FDR*' % (numSign, numTested, self._results.FDR_THRESHOLD*100) +\
                          excludeText)

            core.line('')
            core.line(EffectSizeText)
            core.line('')
            core.line(FDR_text)
        
        nullModel = self._results._analysis.getNullModel()
        if nullModel is not None:
            core.divider(withSpacing=True)
            core.line('P-values were computed under the %s defined by the following preservation and randomization rules:' \
                      % str(coreCls().highlight('null model')))
            core.paragraph(nullModel, indent=True)

        testStatistic = self._results.getTestStatisticText()
        if testStatistic != None:
            #pick out relevant part:
            mo = re.search('^[tT]est.[sS]tatistic ?:? ?',testStatistic)
            if mo!= None:
                testStatistic = testStatistic[mo.end():]            
                #if len(testStatistic)>0 and testStatistic[0]=='(':
                    #testStatistic = testStatistic[1:]
                #if len(testStatistic)>0 and testStatistic[-1]==')':
                    #testStatistic = testStatistic[:-1]
            
            tsValue, expTsValue = self._getTestStatisticAndExpectedValues()
            core.divider(withSpacing=True)
            core.line('The %s used is:' % str(coreCls().highlight('test statistic')))
            core.paragraph(testStatistic, indent=True)
            
            if tsValue is not None:
                if expTsValue is not None:
                    core.line('The value of the test statistic is %s, which is %s the expected value: %s.' \
                                % (strWithStdFormatting(tsValue), \
                                   (str(coreCls().emphasize('higher')) + ' than' if tsValue > expTsValue else \
                                    (str(coreCls().emphasize('lower')) + ' than' if tsValue < expTsValue else \
                                     str(coreCls().emphasize('equal')) + ' to')), \
                                   strWithStdFormatting(expTsValue)))
                else:
                    core.line('The value of the test statistic is %s.' % (strWithStdFormatting(tsValue)))
            
        #temporary solution, as lacking objects needed to construct note-link directly..
        noteText = ''
        if self._results._runDescription is not None:
            #mo = re.search('<note.*note>', self._results._runDescription)
            mo = re.search('<a href[^>]*/notes/[^>]*>[^<]*</a>', self._results._runDescription)
            if mo is not None:
                noteLink = mo.string[mo.start():mo.end()]
                noteText = ' See ' + noteLink + ' for a more complete description of the test.'
        
        if noteText == '':
            logMessage('Note-link not found in runDescription, and thus omitted from results')
            
        core.divider(withSpacing=True)
        
        runDescLink = str(coreCls().link('run description', '#', \
                                         args='''onclick="return toggle('run_description')"'''))
        core.line('The p-values may be subject to further parameter choices, which are listed in the %s.' %\
                  (runDescLink) + noteText)
        core.divider(withSpacing=True)
        resultsLink = str(coreCls().link('See full details', '#', \
                                         args='''onclick="return toggle('results_box')"'''))
        core.line(resultsLink + ' of the results in table form.')
        core.styleInfoEnd()
        
        return str(core)
Exemplo n.º 55
0
    def getRunDescription(trackName1, trackName2, trackNameIntensity, analysisDef, ubSource, revEngBatchLine, \
                          urlForTrackAutoSelection, manualSeed, **kwArgs):
        genome = ubSource.genome
        core = HtmlCore()

        analysis = Analysis(analysisDef, genome, trackName1, trackName2, **kwArgs)
        
        core.header('GENOME')
        core.append(GenomeInfo(genome).mainInfo(printEmpty=False))
        core.divider()
                
        formatChoices = analysis.getFormatConverterChoicesAsText().items()
        tr1FormatChoice, tr2FormatChoice = formatChoices if len(formatChoices) == 2 else (None, None) 
        
        first = True
        for tn,label,formatChoice in zip([trackName1,trackName2,trackNameIntensity], \
                                         ['TRACK 1','TRACK 2','INTENSITY TRACK'], \
                                         [tr1FormatChoice,tr2FormatChoice,None]):
            if tn in [None, []]:
                continue
            
            if not first:
                core.divider()

            core.header(label)
            trackInfo = TrackInfo(genome, tn)
            trackText = ''
            if ExternalTrackManager.isHistoryTrack(tn):
                assert len(tn)>=4, 'Length of external track name < 4: %s' % str(tn)
                core.descriptionLine('Name', ExternalTrackManager.extractNameFromHistoryTN(tn) + ' (from history)' + os.linesep)
            else:
                core.descriptionLine('Name', ':'.join(tn) + os.linesep)
            core.append(trackInfo.mainInfo(printEmpty=False))

            if formatChoice is not None:
                core.descriptionLine('Treated as', formatChoice[1])
            
            first = False
        
        core.divider()
        core.header('ANALYSIS')
        core.paragraph( ''.join(str(analysis).split(':')[1:]) )

        first = True
        for label,choice in analysis.getInterfaceChoicesAsText().items():
            if first:
                core.divider()
                core.header('OPTIONS')
            
            if manualSeed is not None and label == 'Random seed' and choice == 'Random':
                choice = str(manualSeed)
                
            core.descriptionLine(label, choice)
            first = False
            
        h0 = analysis.getH0()
        if h0 is not None:
            core.divider()
            core.header('NULL HYPOTHESIS')
            core.paragraph(h0)
            
        h1 = analysis.getH1()
        if h1 is not None:
            core.divider()
            core.header('ALTERNATIVE HYPOTHESIS')
            core.paragraph(h1)
            
        core.divider()
        core.header('ANALYSIS REGIONS')
        if hasattr(ubSource, 'description'):
            core.paragraph(ubSource.description)
            
        core.divider()
        core.header('SOLUTION')

        statClass = analysis.getStat()
        #One alternative is to put getDescription in MagicStatFactory-hierarchy as class-method, and get real class behind partial-object.
        #if isinstance(statClass, functools.partial):
            #statClass = statClass.func
        #core.paragraph( statClass.getDescription() )

        #Chosen alternative is to Instantiate an object, which will automatically give object of real class..
        #and then use the following two lines, which will get class in Statistic-hierarchy instead of MagicStatFactory-hierarchy ..
        try:
            reg = ubSource.__iter__().next()
        except:
            core.paragraph('Solution not relevant, as there are no specified analysis regions..')
        else:
            track1, track2 = analysis.getTracks()
            if statClass is None:
                core.paragraph('Solution not available, due to currently invalid analysis')
                logMessage('Solution not available, with params: ' + str([trackName1, trackName2, analysisDef]), level=logging.WARN )
            else:
                statObj = statClass(reg,track1, track2)
                statDescr = statObj.getDescription()
                replPat = '<a href=' + os.sep.join([STATIC_REL_PATH,'notes','stats','']) + r'\1>note</a>'
                statDescr = re.sub('<note>(.*)</note>', replPat, statDescr)
        
                core.paragraph( statDescr )

        core.divider()
        core.header('TIME OF ANALYSIS')
        core.paragraph('Analysis initiated at time: ' + str( datetime.datetime.now() ) )
        
        if urlForTrackAutoSelection not in [None, '']:
            core.divider()
            core.header('URL FOR TRACK AUTOSELECTION')
            #urlOptions = '&'.join(['track1=' + quote(':'.join(trackName1)), 'track2=' + quote(':'.join(trackName2))])
            #core.paragraph(URL_PREFIX + '/hyper?' + urlOptions)
            core.styleInfoBegin(styleClass='break-word')
            core.paragraph(urlForTrackAutoSelection)
            core.styleInfoEnd()
            
        if revEngBatchLine not in [None, '']:
            core.divider()
            core.header('CORRESPONDING BATCH-RUN LINE')
            #if any(ExternalTrackManager.isRedirectOrExternalTrack(tn) for tn in [trackName1, trackName2]):
                #core.paragraph('Batch-run line not available with tracks from history')
            #else:
            core.styleInfoBegin(styleClass='break-word')
            core.paragraph(revEngBatchLine)
            core.styleInfoEnd()

        core.divider()
        core.header('REFERENCES')
        core.paragraph('The HyperBrowser system is described in:<br>"Sandve et al., <a href="http://genomebiology.com/2010/11/12/R121/">The Genomic HyperBrowser: inferential genomics at the sequence level</a>, Genome Biol. 2010;11(12):R121')
        from gold.statistic.RandomizationManagerStat import RandomizationManagerStat
        if statClass is not None and RandomizationManagerStat.getMcSamplingScheme(statClass.keywords) == 'MCFDR':
            core.paragraph('The p-values of this analysis were computed using the MCFDR scheme for Monte Carlo based p-value computation'+\
                           ', described in:<br>Sandve et al., <a href="http://bioinformatics.oxfordjournals.org/content/early/2011/10/13/bioinformatics.btr568.long">Sequential Monte Carlo multiple testing</a>, Bioinformatics 2011')
        
#        description = \
#'''
#Run descriptions will be introduced in the next version of HB. <br>
#Below is an example run description, which is a static text unconnected to your choices. The purpose is to get feedback from you on what this should look like:<br>
#Track1 (refseg:genes): Unmarked points (converted from unmarked segments, taking midpoints)<br>
#Track2 (DNA melting:meltmap): Function<br>
#Bins: Chr1, divided into bins of 10 megabases<br>
#Question: Are track1-points occurring with different frequency inside track2-segment than outside?<br>
#Analysis:<br>
#The main result is a p-value resulting from a statistical test connected to the question.<br>
#The null-hypothesis assumes that the track1-points are randomly distributed according to a poisson-distribution, with the same number of points as in the original data. Track2-segment are assumed fixed as they are in the original data. This can be answered by a binomial test. The alternative hypothesis is then that the count of points inside segments has resulted from a different distribution of points, where the points are then either distributed more or less inside segments versus outside. See the note on this question in the user guide for further info.<br>
#'''
        return str(core)
    def storeBoundingRegions(self, boundingRegionTuples, genomeElementChrList, sparse):
        assert sparse in [False, True]

        tempContents = OrderedDict()

        genomeElementChrs = set(genomeElementChrList)    
        lastRegion = None
        chrStartIdxs = OrderedDict()
        chrEndIdxs = OrderedDict()
        totElCount = 0
        totBinCount = 0
        
        for br in boundingRegionTuples:
            if lastRegion is None or br.region.chr != lastRegion.chr:
                if br.region.chr in tempContents:
                    raise InvalidFormatError("Error: bounding region (%s) is not grouped with previous bounding regions of the same chromosome (sequence)." % br.region)
                
                lastRegion = None
                tempContents[br.region.chr] = OrderedDict() #sorteddict()
                if sparse:
                    chrStartIdxs[br.region.chr] = totElCount
            else:
                if br.region < lastRegion:
                    raise InvalidFormatError("Error: bounding regions in the same chromosome (sequence) are unsorted: %s > %s." % (lastRegion, br.region))
                if lastRegion.overlaps(br.region):
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' overlap." % (lastRegion, br.region))
                if lastRegion.end == br.region.start:
                    raise InvalidFormatError("Error: bounding regions '%s' and '%s' are adjoining (there is no gap between them)." % (lastRegion, br.region))
            
            if len(br.region) < 1:
                raise InvalidFormatError("Error: bounding region '%s' does not have positive length." % br.region)
                
            if not sparse and len(br.region) != br.elCount:
                raise InvalidFormatError("Error: track type representation is dense, but the length of bounding region '%s' is not equal to the element count: %s != %s" % (br.region, len(br.region), br.elCount))
            
            startIdx, endIdx = (totElCount, totElCount + br.elCount) if not sparse else (None, None)
            totElCount += br.elCount
            if sparse:
                chrEndIdxs[br.region.chr] = totElCount
            
            tempContents[br.region.chr][br.region.start] = BoundingRegionInfo(br.region.start, br.region.end, startIdx, endIdx, 0, 0)
            
            lastRegion = br.region
        
        if sparse:
            totBinCount = 0
            for chr in tempContents:
                chrLen = GenomeInfo.getChrLen(self._genome, chr)
                numBinsInChr = CompBinManager.getNumOfBins(GenomeRegion(start=0, end=chrLen))
                for key in tempContents[chr].keys():
                    startBinIdx = totBinCount
                    endBinIdx = totBinCount + numBinsInChr
                    brInfo = tempContents[chr][key]
                    
                    if chr in genomeElementChrs:
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, \
                                                                    chrStartIdxs[chr], chrEndIdxs[chr], \
                                                                    startBinIdx, endBinIdx)
                    else:
                        if chrEndIdxs[chr] - chrStartIdxs[chr] > 0:
                            raise InvalidFormatError("Error: bounding region '%s' has incorrect element count: %s > 0" % (GenomeRegion(chr=chr, start=brInfo.start, end=brInfo.end), chrEndIdxs[chr] - chrStartIdxs[chr]))
                        tempContents[chr][key] = BoundingRegionInfo(brInfo.start, brInfo.end, 0, 0, 0, 0)
                
                if chr in genomeElementChrs:
                    totBinCount += numBinsInChr
        
        if len(genomeElementChrs - set(tempContents.keys())) > 0:
            raise InvalidFormatError('Error: some chromosomes (sequences) contains data, but has no bounding regions: %s' % ', '.join(genomeElementChrs - set(tempContents.keys())))
        
        ensurePathExists(self._fn)
        
        for chr in tempContents:
            brInfoDict = tempContents[chr]
            tempContents[chr] = BrInfoHolder(tuple(brInfoDict.keys()), tuple(brInfoDict.values()))
        
        brShelve = safeshelve.open(self._fn)
        brShelve.update(tempContents)
        brShelve.close()
        
        while not self.fileExists():
            from gold.application.LogSetup import logMessage
            logMessage("Bounding region shelve file '%s' has yet to be created" % self._fn)
            import time
            time.sleep(0.2)
 def __init__(self, category, numSamples):
     self._count = 0
     self._category = category
     self._numSamples = numSamples
     logMessage('kategori = %s' % self._category)
     self.valueDict = dict()
Exemplo n.º 58
0
    def storePickledResults(self):
        try:
            from cPickle import dump
            pickleStaticFile = GalaxyRunSpecificFile(['results.pickle'],self._galaxyFn)
            #print 'TEMP1: PATH: ',pickleStaticFile.getDiskPath(True)
            from copy import copy
            pickleList = [copy(res) for res in self._resultsList]
            for res in pickleList:
                res._analysis=None
            dump(pickleList, pickleStaticFile.getFile())
            #dump(self._resultsList, pickleStaticFile.getFile())
        except Exception, e:
            logException(e, message='Not able to pickle results object')
        except:
            logMessage('Exception object not subclassing Exception encountered',level=logging.ERROR)
    
class ResultsViewer(object):
    def __new__(self, results, baseDir):
        #print 'TEMP1 ', results.values()
        presCollectionType = results.getPresCollectionType()
        #print 'presCollectionType: ',presCollectionType 
        if presCollectionType == 'standard':
            return StandardResultsViewer.__new__(StandardResultsViewer, results, baseDir)
        elif presCollectionType == 'distribution':
            return DistributionResultsViewer.__new__(DistributionResultsViewer, results, baseDir)
        elif presCollectionType == 'dictofdicts':
            return DictOfDictsResultsViewer.__new__(DictOfDictsResultsViewer, results, baseDir)
        elif presCollectionType == 'matrix':
            return MatrixResultsViewer.__new__(MatrixResultsViewer, results, baseDir)
        elif presCollectionType == 'scatter':