示例#1
0
 def __init__(self, trackName, trackTitle=None):
     self.trackName = trackName
     self.trackTitle = trackTitle
     self._trackSource = TrackSource()
     self._trackViewLoader = TrackViewLoader()
     self._trackFormatReq = NeutralTrackFormatReq()
     self.formatConverters = None
     self._trackId = None
示例#2
0
    def __init__(self, origTrack, origRegion, randIndex, **kwArgs):
        self._origTrack = origTrack
        self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]
        self._origRegion = origRegion
        self._trackFormatReq = NeutralTrackFormatReq()
        self._cachedTV = None
        self._minimal = ('minimal' in kwArgs and kwArgs['minimal'] == True)

        from gold.formatconversion.FormatConverter import TrivialFormatConverter
        self.formatConverters = [TrivialFormatConverter
                                 ]  #To allow construction of uniqueID
        self._trackId = None  #To allow construction of uniqueID
示例#3
0
    def __init__(self, origTrack, randIndex, **kwArgs):
        self._origTrack = OrigTrackWrapper(origTrack, trackRandomizer=self)
        self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]
        self.trackTitle = origTrack.trackTitle
        self._trackFormatReq = NeutralTrackFormatReq()
        self._cachedTV = None
        self._minimal = ('minimal' in kwArgs and kwArgs['minimal'] == True)
        self.formatConverters = [TrivialFormatConverter
                                 ]  # To allow construction of uniqueID
        self._trackId = None  # To allow construction of uniqueID

        self._init(origTrack, randIndex, **kwArgs)
    def __init__(self, origTrack, origRegion, randIndex, **kwArgs):
        self._origTrack = origTrack
        self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]
        self._origRegion = origRegion
        self._trackFormatReq = NeutralTrackFormatReq()
        self._cachedTV = None
        self._minimal = ('minimal' in kwArgs and kwArgs['minimal'] == True)

        from gold.formatconversion.FormatConverter import TrivialFormatConverter
        # TODO: Is the hack in the next line required anymore?
        self.formatConverters = [TrivialFormatConverter]  # To allow construction of uniqueID,
        self._trackId = None  # To allow construction of uniqueID
        self._hasBeenRead = False
示例#5
0
 def __init__(self, trackName):
     self.trackName = trackName
     self._trackSource = TrackSource()
     self._trackViewLoader = TrackViewLoader()
     self._trackFormatReq = NeutralTrackFormatReq()
     self.formatConverters = None
     self._trackId = None
 def __init__(self, trackView, ignoreTrackFormat=False):
     self._tv = trackView
     self.trackName = ['dummy' + str(SampleTrackWithConverters.trackNo)]
     self._ignoreTrackFormat = ignoreTrackFormat
     SampleTrackWithConverters.trackNo += 1
     self.formatConverters = None
     self._trackFormatReq = NeutralTrackFormatReq()
    def __init__(self,
                 region,
                 track,
                 trackFormatReq=NeutralTrackFormatReq(),
                 **kwArgs):
        if isIter(region):
            raise SplittableStatNotAvailableError()

        super(self.__class__, self).__init__(region,
                                             track,
                                             trackFormatReq=trackFormatReq,
                                             **kwArgs)
        self._track.addFormatReq(trackFormatReq)
class RandomizedTrack(Track):
    IS_MEMOIZABLE = False

    def __new__(cls, *args, **kwArgs):
        return object.__new__(cls)
    
    def __init__(self, origTrack, origRegion, randIndex, **kwArgs ):
        self._origTrack = origTrack
        self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]        
        self._origRegion = origRegion
        self._trackFormatReq = NeutralTrackFormatReq()
        self._cachedTV = None

    def _checkTrackFormat(self, origTV):
        pass
    
    def getTrackView(self, region):
        #print 'get tv for reg: ',region
        #print str(type(self._origRegion)) + " and " + str(type(region))
        assert (not isIter(self._origRegion) and self._origRegion  == region) or (isIter(self._origRegion) and region in self._origRegion) 
        
        #if self._cachedTV is None:
        rawData = RawDataStat(region, self._origTrack, self._trackFormatReq)
        origTV = rawData.getResult()
        self._checkTrackFormat(origTV)
        assert(not origTV.allowOverlaps)
        assert(origTV.borderHandling == 'crop')
        assert region == origTV.genomeAnchor
        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                              origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                              origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                              origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                              origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region)
        
        from gold.util.CommonFunctions import getClassName
        self._cachedTV = TrackView(origTV.genomeAnchor, \
                                   (starts + origTV.genomeAnchor.start if starts is not None else None), \
                                   (ends + origTV.genomeAnchor.start if ends is not None else None), \
                                   vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)
        assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
               + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
        return self._cachedTV
        
    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, origTrackFormat, region):
        raise AbstractClassError
示例#9
0
class RandomizedTrack(Track, TrackRandomizer):
    __metaclass__ = ABCMeta

    IS_MEMOIZABLE = False
    WORKS_WITH_MINIMAL = True

    def __new__(cls, *args, **kwArgs):
        return object.__new__(cls)

    def __init__(self, origTrack, randIndex, **kwArgs):
        self._origTrack = OrigTrackWrapper(origTrack, trackRandomizer=self)
        self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]
        self.trackTitle = origTrack.trackTitle
        self._trackFormatReq = NeutralTrackFormatReq()
        self._cachedTV = None
        self._minimal = ('minimal' in kwArgs and kwArgs['minimal'] == True)
        self.formatConverters = [TrivialFormatConverter
                                 ]  # To allow construction of uniqueID
        self._trackId = None  # To allow construction of uniqueID

        self._init(origTrack, randIndex, **kwArgs)

    def _init(self, origTrack, randIndex, **kwArgs):
        pass

    def getTrackView(self, region):
        if self._minimal and not self.WORKS_WITH_MINIMAL:
            return self._origTrack.getTrackView(region)

        randTV = self._getRandTrackView(region)

        assert self._trackFormatReq.isCompatibleWith(randTV.trackFormat), \
            'Incompatible track-format: ' + str(self._trackFormatReq) + \
            ' VS ' + str(randTV.trackFormat)

        return randTV

    @abstractmethod
    def _getRandTrackView(self, region):
        pass
示例#10
0
    def getTrackView(self, region):
        # To make sure that the origTrack is only read once across randomizations
        rawData = RawDataStat(region, self._origTrack, NeutralTrackFormatReq())
        origTv = rawData.getResult()

        if not self._trackRandomizer.supportsTrackFormat(origTv.trackFormat):
            raise NotSupportedError(
                'The original track "{}" has format "{}", '
                'which is not supported by "{}".'.format(
                    prettyPrintTrackName(self.trackName),
                    str(origTv.trackFormat),
                    self._trackRandomizer.getDescription()))

        if not self._trackRandomizer.supportsOverlapMode(origTv.allowOverlaps):
            raise NotSupportedError(
                'The original track "{}" has "allowOverlaps={}", '
                'which is not supported by "{}".'.format(
                    prettyPrintTrackName(self.trackName), origTv.allowOverlaps,
                    self._trackRandomizer.getDescription()))

        assert origTv.borderHandling == 'crop'

        return origTv
示例#11
0
class Track(object):
    IS_MEMOIZABLE = True
    def __new__(cls, trackName):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)
    
    def __init__(self, trackName):
        self.trackName = trackName
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)
    
    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None) 
        assert(borderHandling is not None) 
        
        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)
        
        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)
        
        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))
        
        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))
    
    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:        
            self.formatConverters = [getFormatConverterByName(converterClassName)]
    
    def getUniqueKey(self, genome):
        assert self.formatConverters is not None and len(self.formatConverters) == 1, 'FC: '+str(self.formatConverters)
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )
        
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id
            
        return hash((tuple(self.trackName), self._trackId, getClassName(self.formatConverters[0]), \
                     self.formatConverters[0].VERSION, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
class RandomizedTrack(Track):
    IS_MEMOIZABLE = False
    WORKS_WITH_MINIMAL = True

    def __new__(cls, *args, **kwArgs):
        return object.__new__(cls)

    def __init__(self, origTrack, origRegion, randIndex, **kwArgs):
        self._origTrack = origTrack
        self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]
        self._origRegion = origRegion
        self._trackFormatReq = NeutralTrackFormatReq()
        self._cachedTV = None
        self._minimal = ('minimal' in kwArgs and kwArgs['minimal'] == True)

        from gold.formatconversion.FormatConverter import TrivialFormatConverter
        # TODO: Is the hack in the next line required anymore?
        self.formatConverters = [TrivialFormatConverter]  # To allow construction of uniqueID,
        self._trackId = None  # To allow construction of uniqueID
        self._hasBeenRead = False

    def _checkTrackFormat(self, origTV):
        pass

    def getTrackView(self, region):
        #print 'TEMP5: get tv for reg: ',region, ' for TrackName: ', self.trackName
        #print str(type(self._origRegion)) + " and " + str(type(region))
        if DebugConfig.USE_SLOW_DEFENSIVE_ASSERTS:
            assert (not isIter(self._origRegion) and self._origRegion  == region) or \
                    (isIter(self._origRegion) and region in self._origRegion)

        if self._minimal and not self.WORKS_WITH_MINIMAL:
            tv = self._origTrack.getTrackView(region)
        else:
            tv = self._getTrackView(region)

        self._hasBeenRead = True
        return tv

    def _getTrackView(self, region):
        from gold.util.RandomUtil import random  # To initialize random generators if not done previously

        #if self._cachedTV is None:
        rawData = RawDataStat(region, self._origTrack, self._trackFormatReq)
        origTV = rawData.getResult()

        self._checkTrackFormat(origTV)
        assert(not origTV.allowOverlaps)
        assert(origTV.borderHandling == 'crop')
        assert region == origTV.genomeAnchor

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                              origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                              origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                              origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                              origTV.allExtrasAsDictOfNumpyArrays(), region)

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._undoTrackViewChanges(starts, ends, vals, strands, ids, edges, weights, extras, origTV)

        from gold.util.CommonFunctions import getClassName
        self._cachedTV = TrackView(origTV.genomeAnchor, starts, ends, vals, strands, ids, edges, weights, \
                                   origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)

        assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
               + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
        return self._cachedTV

    def _undoTrackViewChanges(self, starts, ends, vals, strands, ids, edges, weights, extras, origTV):
        if origTV.trackFormat.isPoints():
            ends = None

        elif origTV.trackFormat.isPartitionOrStepFunction():
            ends = numpy.append([0], ends)
            starts = None

        if starts is not None:
            starts += origTV.genomeAnchor.start

        if ends is not None:
            ends += origTV.genomeAnchor.start

        return starts, ends, vals, strands, ids, edges, weights, extras

    def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region):
        raise AbstractClassError
示例#13
0
class Track(object):
    IS_MEMOIZABLE = True
    
    def __new__(cls, trackName, trackTitle=None):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)

    def __init__(self, trackName, trackTitle=None):
        self.trackName = trackName
        self.trackTitle = trackTitle
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        self._randIndex = None

    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)

    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None)
        assert(borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))

    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:
            self.formatConverters = [getFormatConverterByName(converterClassName)]

    def getUniqueKey(self, genome):
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName),
                     self._trackId if self._trackId else '',
                     getClassName(self.formatConverters[0]) if self.formatConverters else '',
                     self.formatConverters[0].VERSION if self.formatConverters else '',
                     self._trackFormatReq.allowOverlaps() if self._trackFormatReq.allowOverlaps() else '',
                     self._trackFormatReq.borderHandling() if self._trackFormatReq.borderHandling() else ''))

    def resetTrackSource(self):
        self._trackSource = TrackSource()

    def setRandIndex(self, randIndex):
        pass #used only by TsBasedRandomTrack
示例#14
0
class Track(object):
    IS_MEMOIZABLE = True

    def __new__(cls, trackName, trackTitle=None):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)

    def __init__(self, trackName, trackTitle=None):
        self.trackName = trackName
        self.trackTitle = trackTitle
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        self._hasBeenRead = False

    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)

    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None)
        assert(borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)

        self._hasBeenRead = True

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))

    # TODO: track.formatConverters needs a complete overhaul. It is currently used:
    # 1) to link tracks with possible choices for track conversion
    # 2) to store the choice of format converter made by the user
    # 3) to manage the default choice of format converters [=always the first item in the list]
    # 4) to hold the currently selected format converter class until needed by getTrackView
    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:
            self.formatConverters = [getFormatConverterByName(converterClassName)]

    def hasBeenFlaggedAsRead(self):
        return self._hasBeenRead

    def getUniqueKey(self, genome):
        itemsToBeHashed = [tuple(self.trackName)]
        itemsToBeHashed.append(self._getTrackId(genome))

        if self._trackFormatReq is not None:
            if self._trackFormatReq.allowOverlaps() is not None:
                itemsToBeHashed.append(self._trackFormatReq.allowOverlaps())
            itemsToBeHashed.append(self._trackFormatReq.borderHandling())

        itemsToBeHashed.append(getClassName(self.formatConverters[0]) if self.formatConverters else '')
        # TODO: Move away from fixed VERSION, as these in practice are never updated. Also for statistics.
        itemsToBeHashed.append(self.formatConverters[0].VERSION if self.formatConverters else '')

        from config.DebugConfig import DebugConfig
        if DebugConfig.VERBOSE:
            from gold.application.LogSetup import logMessage
            logMessage('Unique key items for track "{}": '.format(self.trackName) +
                       ', '.join(str(_) for _ in itemsToBeHashed))

        return hash(tuple(itemsToBeHashed))

    def _getTrackId(self, genome):
        if not self._trackId:
            trackInfo = TrackInfo(genome, self.trackName)
            self._trackId = trackInfo.id
        return self._trackId if self._trackId else ''

    def resetTrackSource(self):
        self._trackSource = TrackSource()
 def __init__(self, origTrack, origRegion, randIndex, **kwArgs ):
     self._origTrack = origTrack
     self.trackName = origTrack.trackName + ['Randomized', str(randIndex)]        
     self._origRegion = origRegion
     self._trackFormatReq = NeutralTrackFormatReq()
     self._cachedTV = None