Пример #1
0
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
Пример #2
0
class TrackView(object):
    def _handlePointsAndPartitions(self):
        if self.trackFormat.isDense() and not self.trackFormat.reprIsDense():
            self._startList = self._endList[:-1]
            self._endList = self._endList[1:]
            if self._valList != None:
                self._valList = self._valList[1:]
            if self._strandList != None:
                self._strandList = self._strandList[1:]
            if self._idList != None:
                self._idList = self._idList[1:]
            if self._edgesList != None:
                self._edgesList = self._edgesList[1:]
            if self._weightsList != None:
                self._weightsList = self._weightsList[1:]
            for key, extraList in self._extraLists.items():
                if extraList != None:
                    self._extraLists[key] = extraList[1:]
        if not self.trackFormat.isDense() and not self.trackFormat.isInterval():
            self._endList = VirtualPointEnd(self._startList)

    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert startList!=None or endList!=None or valList!=None or edgesList!=None
        assert borderHandling in ['crop']

        self.genomeAnchor = genomeAnchor.getCopy()
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))

    def __iter__(self):
        self._trackElement._index = -1
        return self

    def _updateNumListElements(self):
        ""
        self._numListElements = self._computeNumListElements()

        if self.allowOverlaps and self._numListElements > 0:
            self._numIterElements = self._computeNumIterElements()
        else:
            self._numIterElements = self._numListElements

    def _computeNumListElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                return len(list)
        raise ShouldNotOccurError

    def _computeNumIterElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                if isinstance(list, numpy.ndarray):
                    return len(self._removeBlindPassengersFromNumpyArray(list))
                else:
                    return sum(1 for x in self)
        raise ShouldNotOccurError

    def __len__(self):
        ""
        return self._bpSize()

    def getNumElements(self):
        return self._numIterElements

    def _bpSize(self):
        return len(self.genomeAnchor)

    def next(self):
        self._trackElement._index += 1

        #To remove any blind passengers - segments entirely in front of genomeanchor,
        # but sorted after a larger segment crossing the border
        if self.allowOverlaps and not self.trackFormat.reprIsDense():
            while self._trackElement._index < self._numListElements and self._endList[self._trackElement._index] <= self.genomeAnchor.start: #self._trackElement.end() <= 0:
                self._trackElement._index += 1

        if self._trackElement._index < self._numListElements:
            return self._trackElement
        else:
            raise StopIteration

    def _findLeftIndex(self):
        leftIndex = 0
        #remove track elements entirely to the left of the anchor
        while leftIndex < len(self._endList) and self._endList[leftIndex] <= self.genomeAnchor.start:
            leftIndex += 1
        return leftIndex

    def _findRightIndex(self):
        rightIndex = self._numListElements
        while rightIndex > 0 and self._startList[rightIndex-1] >= self.genomeAnchor.end:
            rightIndex -= 1
        return rightIndex

    def sliceElementsAccordingToGenomeAnchor(self):
        assert( not self.trackFormat.reprIsDense() )
        self._doScatteredSlicing()

    def _doScatteredSlicing(self):
        leftIndex = self._findLeftIndex()
        rightIndex = self._findRightIndex()

        if self._bpSize() == 0:
            rightIndex = leftIndex

        self._startList = self._startList[leftIndex:rightIndex]
        self._endList = self._endList[leftIndex:rightIndex]

        if self._valList != None:
            self._valList = self._valList[leftIndex:rightIndex]
        if self._strandList != None:
            self._strandList = self._strandList[leftIndex:rightIndex]
        if self._idList != None:
            self._idList = self._idList[leftIndex:rightIndex]
        if self._edgesList != None:
            self._edgesList = self._edgesList[leftIndex:rightIndex]
        if self._weightsList != None:
            self._weightsList = self._weightsList[leftIndex:rightIndex]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[leftIndex:rightIndex]
        self._updateNumListElements()

    def _doDenseSlicing(self, i, j):
        if self._valList != None:
            self._valList = self._valList[i:j]
        if self._strandList != None:
            self._strandList = self._strandList[i:j]
        if self._idList != None:
            self._idList = self._idList[i:j]
        if self._edgesList != None:
            self._edgesList = self._edgesList[i:j]
        if self._weightsList != None:
            self._weightsList = self._weightsList[i:j]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[i:j]
        self._updateNumListElements()

    def __getslice__(self, i, j):
        slicedTV = TrackView(self.genomeAnchor, self._startList, self._endList, \
                             self._valList, self._strandList, self._idList, \
                             self._edgesList, self._weightsList, \
                             self.borderHandling, self.allowOverlaps, \
                             extraLists=self._extraLists)
        slicedTV.trackFormat = self.trackFormat

        slicedTV.genomeAnchor.start += i
        if j>=0:
            try:
                slicedTV.genomeAnchor.end = min(self.genomeAnchor.end, self.genomeAnchor.start + j)
            except FloatingPointError: # Caused by trackView[:] with self.genomeAnchor.start > 0
                slicedTV.genomeAnchor.end = self.genomeAnchor.end
        if j<0:
            slicedTV.genomeAnchor.end += j

        if self.trackFormat.reprIsDense():
            slicedTV._doDenseSlicing(i,j)
        else:
            slicedTV._doScatteredSlicing()
        return slicedTV

    def _getBpLevelModificationArray(self, indexes, vals):
        bpLevelMod = numpy.bincount(indexes, vals)
        origLen = len(bpLevelMod)
        bpLevelMod.resize(self._bpSize()+1)
        bpLevelMod[origLen:] = 0
        return bpLevelMod

    def _commonGetBpLevelArray(self, vals):
        if self.trackFormat.reprIsDense():
            if self.allowOverlaps:
                raise ShouldNotOccurError()
            return vals
        else:
            bpLevelArray = numpy.zeros(self._bpSize()+1)
            numElements = self.getNumElements()
            if numElements > 0:
                bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals)
                bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals)
                bpLevelArray = bpLevelArray.cumsum()
            return bpLevelArray[:-1]

    def getBinaryBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='bool8')

    def getCoverageBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='int32')

    def getValueBpLevelArray(self, voidValue=0):
        '''
        Creates a bp-level function of any valued track. In case of scattered tracks,
        uncovered aras are filled with voidValue (which would typically be set to 0 or numpy.nan).
        In the case of overlapping regions, the values are added.'''

        assert self.trackFormat.isValued('number'), self.trackFormat
        vals = self.valsAsNumpyArray()
        bpLevelArray = numpy.array(self._commonGetBpLevelArray(vals), dtype=vals.dtype)
        if voidValue != 0:
            bpLevelArray[~self.getBinaryBpLevelArray()] = voidValue
        return bpLevelArray

    def _removeBlindPassengersFromNumpyArray(self, numpyArray):
        '''
        To remove any blind passengers - segments entirely in front of genomeanchor,
        but sorted after a larger segment crossing the border.
        '''
        if self.allowOverlaps and len(numpyArray) > 0:
            numpyArray = numpyArray[numpy.where(self._endList > self.genomeAnchor.start)]
        return numpyArray

    def _commonAsNumpyArray(self, numpyArray, numpyArrayModMethod, name):
        assert(self.borderHandling in ['crop'])
        if numpyArray is None:
            return None

        numpyArray = self._removeBlindPassengersFromNumpyArray(numpyArray)

        if numpyArrayModMethod is not None:
            return numpyArrayModMethod(numpyArray)
        else:
            return numpyArray

    def startsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._startList, self._startListModMethod, 'starts')

    def _startListModMethod(self, startList):
        return numpy.maximum(startList - self.genomeAnchor.start, \
                             numpy.zeros(len(startList), dtype='int32'))

    def endsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._endList, self._endListModMethod, 'ends')

    def _endListModMethod(self, endList):
        return numpy.minimum(endList - self.genomeAnchor.start, \
                             numpy.zeros(len(endList), dtype='int32') + len(self.genomeAnchor))

    def valsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._valList, None, 'vals')

    def strandsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._strandList, None, 'strands')

    def idsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._idList, None, 'ids')

    def edgesAsNumpyArray(self):
        return self._commonAsNumpyArray(self._edgesList, None, 'edges')

    def weightsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._weightsList, None, 'weights')

    def extrasAsNumpyArray(self, key):
        assert self.hasExtra(key)
        from functools import partial
        return self._commonAsNumpyArray(self._extraLists[key], None, 'extras')

    def allExtrasAsDictOfNumpyArrays(self):
        return OrderedDict([(key,self.extrasAsNumpyArray(key)) for key in self._extraLists])

    def hasExtra(self, key):
        return key in self._extraLists