Пример #1
0
    def __init__(self, geSource):
        self._geSource = self._decorateGESource(geSource)
        self._boundingRegionsAndGEsCorrespond = None

        self._areValsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getValTypeName() == 'Category'
        self._areEdgeWeightsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getWeightTypeName() == 'Category'
        self._valCategories = set()
        self._edgeWeightCategories = set()

        self._numElements = OrderedDefaultDict(int)
        self._maxStrLens = OrderedDefaultDict(partial(self._initMaxStrLens, self._getMaxStrLensKeys()))
        self._maxNumEdges = OrderedDefaultDict(int)

        self._hasCalculatedStats = False
Пример #2
0
    def validateAndReturnErrors(cls, choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        errorStr = cls._checkTrack(choices,
                                   trackChoiceIndex='track',
                                   genomeChoiceIndex='genome')
        if errorStr:
            return errorStr

        if choices.track and not choices.attr:
            return 'You have chosen a track with no attributes (columns) supported for splitting. ' \
                   'Attributes that do not support splitting are: ' + ', '.join(cls.UNSUPPORTED_ATTRS)

        geSource = etm.getGESourceFromGalaxyOrVirtualTN(
            choices.track, choices.genome)
        trackFormat = TrackFormat.createInstanceFromGeSource(geSource)

        if trackFormat.isDense():
            return 'The track format of the selected track file is: %s' % trackFormat.getFormatName() +\
                   ' This tool only supports track types Points, Segments, or variations of these.'
Пример #3
0
    def _composeContents(self,
                         out,
                         hbColumns,
                         columns,
                         geSource,
                         onlyNonDefault=True,
                         singleDataLine=False):
        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        out.write(self._composeHeaderLines(onlyNonDefault))
        out.write(self._composeColSpecLine(columns))

        for br, geList in iterateOverBRTuplesWithContainedGEs(
                geSource, onlyYieldTwoGEs=singleDataLine):
            if br is not None:
                out.write(self._composeBoundingRegionLine(br))

            for i, ge in enumerate(
                    self._removeStartElementIfApplicable(tf, geList)):
                out.write(
                    self._composeDataLine(ge, hbColumns, i + 1,
                                          i + 1 == len(geList)))

                if singleDataLine:
                    break
            if singleDataLine:
                break
Пример #4
0
    def _getBasicTrackFormat(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            try:
                tf = GeneralGuiTool._convertToBasicTrackFormat(
                    TrackFormat.createInstanceFromGeSource(
                        geSource).getFormatName())
            except Warning:
                return genome, tn, ''
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                tfName = 'Points'
            else:
                tfName = TrackInfo(genome, tn).trackFormatName
            tf = GeneralGuiTool._convertToBasicTrackFormat(tfName)
        return genome, tn, tf
    def getOptionsBoxOutputFormat(cls, prevChoices):
        if prevChoices.changeFormat == cls.OUTPUT_FORMAT_CONVERT:
            try:
                from gold.origdata.GenomeElementSource import GenomeElementSource
                from gold.origdata.FileFormatComposer import findMatchingFileFormatComposers
                from gold.track.TrackFormat import TrackFormat

                gSuite = getGSuiteFromGalaxyTN(prevChoices.gsuite)
                selectedTracks = cls._getSelectedTracks(prevChoices, gSuite)

                allGeSources = [
                    GenomeElementSource(track.path,
                                        genome=track.genome,
                                        printWarnings=False,
                                        suffix=track.suffix)
                    for track in selectedTracks
                ]
                matchingComposersForAllSelectedTracks = \
                    [findMatchingFileFormatComposers(TrackFormat.createInstanceFromGeSource(geSource)) for geSource in allGeSources]

                commonComposers = reduce(
                    set.intersection,
                    map(set, matchingComposersForAllSelectedTracks))
                return [
                    composer.fileFormatName for composer in commonComposers
                ]
            except:
                return []
Пример #6
0
 def __init__(self, geSource):
     GESourceManager.__init__(self, geSource)
     self._tf = TrackFormat.createInstanceFromGeSource(geSource)
     self._numElements = defaultdict(partial(OrderedDefaultDict, int))
     self._valCategories = defaultdict(partial(OrderedDefaultDict, set))
     self._edgeWeightCategories = defaultdict(partial(OrderedDefaultDict, set))
     self._maxStrLens = defaultdict(partial(OrderedDefaultDict, \
         partial(self._initMaxStrLens, self._getMaxStrLensKeys())))
     self._maxNumEdges = defaultdict(partial(OrderedDefaultDict, int))
 def getOptionsBoxConversion(prevChoices):
     if prevChoices.history:
         try:
             geSource = UniversalConverterTool._getGESource(prevChoices)
             matchingComposers = findMatchingFileFormatComposers(TrackFormat.createInstanceFromGeSource(geSource))
             return ['%s -> %s (track type: %s)' % \
                     (geSource.getFileFormatName(), composerInfo.fileFormatName, composerInfo.trackFormatName) \
                     for composerInfo in matchingComposers if geSource.getFileFormatName() != composerInfo.fileFormatName]
         except:
             return []
 def getOptionsBoxConversion(prevChoices):
     if prevChoices.history:
         try:
             geSource = UniversalConverterTool._getGESource(prevChoices)
             matchingComposers = findMatchingFileFormatComposers(
                 TrackFormat.createInstanceFromGeSource(geSource))
             return ['%s -> %s (track type: %s)' % \
                     (geSource.getFileFormatName(), composerInfo.fileFormatName, composerInfo.trackFormatName) \
                     for composerInfo in matchingComposers if geSource.getFileFormatName() != composerInfo.fileFormatName]
         except:
             return []
 def _getGESourceManagerFromGESource(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() in [
                 'Number', 'Number (integer)', 'Case-control'
         ]:
             return SkipExtraPassDenseGESourceManager(geSource)
         else:
             raise NotSupportedError
     else:
         return GESourceManager(geSource)
Пример #10
0
 def __new__(self, geSource, brRegionList):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() == 'Number':
             return NumberFunctionOneChrSortedNoOverlapsGESourceManager.__new__\
                 (NumberFunctionOneChrSortedNoOverlapsGESourceManager, geSource, brRegionList)
         else:
             raise NotSupportedError
     else:
         return SparseOneChrSortedNoOverlapsGESourceManager.__new__\
             (SparseOneChrSortedNoOverlapsGESourceManager, geSource, brRegionList)
Пример #11
0
 def getOptionsBoxFormat(prevChoices):
     if prevChoices.track:
         geSource = etm.getGESourceFromGalaxyOrVirtualTN(
             prevChoices.track, prevChoices.genome)
         tf = TrackFormat.createInstanceFromGeSource(geSource)
         matchingComposers = findMatchingFileFormatComposers(tf)
         conversions = [geSource.getFileFormatName() + \
                        ' (no conversion, track type: %s)' % tf.getFormatName()]
         conversions += ['%s -> %s (track type: %s)' % (geSource.getFileFormatName(), \
                         composerInfo.fileFormatName, composerInfo.trackFormatName) \
                         for composerInfo in matchingComposers \
                         if geSource.getFileFormatName() != composerInfo.fileFormatName]
         return conversions
Пример #12
0
 def _composeContents(self, out, hbColumns, columns, geSource, onlyNonDefault=True, singleDataLine=False):
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     out.write( self._composeHeaderLines(onlyNonDefault) )
     out.write( self._composeColSpecLine(columns) )
     
     for br, geList in iterateOverBRTuplesWithContainedGEs(geSource, onlyAddTwoGEs=singleDataLine):
         if br is not None:
             out.write( self._composeBoundingRegionLine(br) )
         
         for i, ge in enumerate(self._removeStartElementIfApplicable(tf, geList)):
             out.write( self._composeDataLine(ge, hbColumns, i+1, i+1 == len(geList)) )
             
             if singleDataLine:
                 break
         if singleDataLine:
             break
 def _allGESources(self, trackName):
     regionList = self._regionList if self._preProcess else [
         self._regionList[0]
     ]
     for region in regionList:
         self._status = "Trying to create custom track geSource for region: {}".format(
             region)
         geSource = self._getGeSourceCallBackFunc(self._genome,
                                                  self._trackName, region,
                                                  **self._callBackArgs)
         tf = TrackFormat.createInstanceFromGeSource(geSource)
         if tf.reprIsDense():
             brList = [BoundingRegionTuple(region, len(region))]
             yield BrTuplesGESourceWrapper(geSource, brList)
         else:
             brList = [BoundingRegionTuple(region, 0)]
             yield GEBoundingRegionElementCounter(geSource, brList)
Пример #14
0
 def _calcTrackStatistics(self, chr, allowOverlaps):
     if chr not in self._numElements[allowOverlaps]:
         
         # In order to handle the first element of each bounding region for
         # genome partitions and step functions correctly
         tf = TrackFormat.createInstanceFromGeSource(self._geSource)
         if tf.isDense() and tf.isInterval():
             geList = self._getGEBuckets(allowOverlaps)[chr]
             prevEnd = 0
             for br in self._getBRBuckets(allowOverlaps)[chr]:
                 for i, el in enumerate(geList[prevEnd:prevEnd + br.elCount]):
                     self._updateTrackStatistics(el, chr, allowOverlaps, \
                         firstElInPartitionBoundingRegion=(i==0))
                 prevEnd += br.elCount
         else:
             for el in self._getGEBuckets(allowOverlaps)[chr]:
                 self._updateTrackStatistics(el, chr, allowOverlaps)
Пример #15
0
    def _getValueTypeName(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            valTypeName = TrackFormat.createInstanceFromGeSource(
                geSource).getValTypeName()
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                valTypeName = ''
            else:
                valTypeName = TrackInfo(genome, tn).markType
        return valTypeName.lower()
Пример #16
0
    def testSorting(self):
        geSourceTest = self._commonSetup()
        
        for caseName in geSourceTest.cases:
            if not caseName.startswith('gtrack'):
                continue
                
            if 'no_sort' in caseName:
                print 'Test case skipped: ' + caseName
                continue
                
            print caseName
            print
            
            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print
            
            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))
            
            
            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()
            
            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass
            
            self.assertEquals(sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()])
    def _compose(self, out):
        trackName = self._geSource.getTrackName()
        if trackName is not None:
            name = ':'.join(self._geSource.getTrackName()).replace(' ','_')
        else:
            name = None
        
        print >>out, 'track type=wiggle_0' + (' name=%s' % name if name is not None else '')

        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        span = self._geSource.getFixedLength()
        step = self._geSource.getFixedGapSize() + span
        
        isFixedStep = (tf.reprIsDense() or step > 1 or (step == 1 and span != 1))
        
        for brt, geList in iterateOverBRTuplesWithContainedGEs(self._geSource):
            if len(geList) == 0:
                continue
            
            if isFixedStep:
                self._composeFixedStepDeclarationLine(out, brt.region, step, span)
            else:
                curChr, curSpan = self._composeVariableStepDeclarationLine(out, geList[0])
            
            for i,ge in enumerate(geList):
                if i==0 and tf.isDense() and tf.isInterval() and \
                    self._geSource.addsStartElementToDenseIntervals():
                    continue
                
                val = self._commonFormatNumberVal(ge.val)
                
                if isFixedStep:
                    cols = [val]
                else:
                    if ge.chr != curChr or self._getVariableSpan(ge) != curSpan:
                        curChr, curSpan = self._composeVariableStepDeclarationLine(out, ge)
                    cols = [str(ge.start+1), val]
                
                print >>out, '\t'.join([str(x) for x in cols])
Пример #18
0
    def validateAndReturnErrors(choices):
        '''
        Should validate the selected input parameters. If the parameters are not
        valid, an error text explaining the problem should be returned. The GUI
        then shows this text to the user (if not empty) and greys out the
        execute button (even if the text is empty). If all parameters are valid,
        the method should return None, which enables the execute button.
        '''

        if not choices.genome:
            return 'Please select genome'

        if not choices.catTrack:
            return 'Please select categorical track from history'

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            choices.catTrack.split(':'), choices.genome)

        trackFormat = TrackFormat.createInstanceFromGeSource(geSource)

        if trackFormat.getValTypeName() != 'Category':
            return 'Please select <b>categorical</b> track from history, current is of type ' + trackFormat.getValTypeName(
            )
    def _allGESourceManagers(self, trackName, allowOverlaps):
        trackNameStr = ':'.join(trackName)
        self._status = "Trying to create GESourceManager " \
                       "(trackName: {}, allowOverlaps: {})".format(trackNameStr, allowOverlaps)
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % trackNameStr + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(
                        trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
Пример #20
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all(
             (0 <= ge.val <= 1000) for ge in self._geSource)
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print

            sortedContents = sortGtrackFileAndReturnContents(
                testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))

            reprIsDense = TrackFormat.createInstanceFromGeSource(
                sortedGeSource).reprIsDense()

            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList),
                                  [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass

            self.assertEquals(
                sorted(case.boundingRegionsAssertList),
                [br for br in sortedGeSource.getBoundingRegionTuples()])

    def runTest(self):
        pass
Пример #22
0
 def __new__(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         return DenseStdGESourceManager.__new__(DenseStdGESourceManager, geSource)
     else:
         return SparseStdGESourceManager.__new__(SparseStdGESourceManager, geSource)
Пример #23
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all((0 <= ge.val <= 1000) for ge in self._geSource)