def _compute(self):
        tv = self._children[0].getResult()

        starts = tv.startsAsNumpyArray()
        ends = tv.endsAsNumpyArray()

        vals = []
        dists = starts[1:] - ends[:-1]
        if len(dists) == 0:
            return TrackView(genomeAnchor=tv.genomeAnchor, startList=starts, endList=ends, valList=numpy.array(vals, dtype='int32'), \
                strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=tv.allowOverlaps)
        dists[dists < 0] = 0
        selector = [False] * len(ends)

        if dists[0] <= self.threshold:
            selector[0] = True
            vals.append(dists[0])

        for index in xrange(1, len(starts) - 1):
            nearestDist = min(dists[index - 1], dists[index])
            if nearestDist <= self.threshold:
                vals.append(nearestDist)
                selector[index] = True
        selector = numpy.array(selector)
        return TrackView(genomeAnchor=tv.genomeAnchor, startList=starts[selector], endList=ends[selector], valList=numpy.array(vals, dtype='int32'), \
                strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=tv.allowOverlaps)
Exemplo n.º 2
0
 def __init__(self, vals=True, strands=True, anchor=None, valDType='float64'):
    assert(vals!=True or anchor!=None)
    
    if anchor==None:
        numElements = len(vals)
        anchor = [10, 10 + numElements]
    else:
        numElements = anchor[1] - anchor[0]
    
    vals = self._createList(vals, getRandValList(numElements), valDType)
    strands = self._createList(strands, getRandStrandList(numElements), 'bool8')
    
    #print (vals, strands, anchor)
    TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), None, None,
                       vals, strands, None, None, None, 'crop', False)
    def _compute(self):
        tv1, tv2 = self._children[0].getResult(), self._children[1].getResult()

        t1s = tv1.startsAsNumpyArray()
        t1e = tv1.endsAsNumpyArray()
        t2s = tv2.startsAsNumpyArray()
        t2e = tv2.endsAsNumpyArray()

        allSortedDecodedEvents, allEventLengths, cumulativeCoverStatus = \
            self._findAllStartAndEndEvents(t1s, t1e, t2s, t2e)

        allResultStarts = allSortedDecodedEvents[cumulativeCoverStatus == 3]
        allResultLengths = allEventLengths[cumulativeCoverStatus[:-1] == 3]
        allResultEnds = allResultStarts + allResultLengths

        return TrackView(genomeAnchor=tv1.genomeAnchor,
                         startList=allResultStarts,
                         endList=allResultEnds,
                         valList=None,
                         strandList=None,
                         idList=None,
                         edgesList=None,
                         weightsList=None,
                         borderHandling=tv1.borderHandling,
                         allowOverlaps=False)
Exemplo n.º 4
0
 def _createTrackView(self, starts, ends, vals, strands, ids, edges, weights, extras, sourceRegion, allowOverlaps, sliceFull=False):
     genomeAnchor = GenomeRegion(genome=self.genome, chr=self.chr, start=sourceRegion[0], end=sourceRegion[1])
     
     tv = TrackView(genomeAnchor, \
                    array(starts) if starts is not None else None, \
                    array(ends) if ends is not None else None, \
                    array(vals, dtype='float64') if vals is not None else None, \
                    array(strands) if strands is not None else None, \
                    array(ids) if ids is not None else None, \
                    array(edges) if edges is not None else None, \
                    array(weights) if weights is not None else None, \
                    'crop', allowOverlaps, \
                    extraLists=OrderedDict([(key, array(extra)) for key, extra in extras.iteritems()]) if extras is not None else OrderedDict())
     if sliceFull:
         tv.sliceElementsAccordingToGenomeAnchor()
     return tv
 def _compute(self):
     tv = self._children[0].getResult()
     starts, ends = tv.startsAsNumpyArray(), tv.endsAsNumpyArray()
     
     borderDict = defaultdict(int)
     listLen = len(starts)
     
     for index in xrange(listLen):
         borderDict[starts[index]]+=1
         borderDict[ends[index]]-=1
     
     
     sortedPos = sorted(borderDict)
     range(0, chrlength, microbinzie)
     
     #handle start border issues
     startList, endList, valList = (sortedPos,  sortedPos[1:], [])  if sortedPos[0] == 0 else  ([0] + sortedPos,  sortedPos,  [0])
     
     #Handle end border issues 
     chrEndPos = GenomeInfo.getChrLen(tv.genomeAnchor.genome, tv.genomeAnchor.chr)-1
     startList, endList  = (startList, endList+[chrEndPos])  if endList[-1]<chrEndPos else  (startList[:-1], endList)
     
     #make step-function values
     accVal = 0
     for pos in sortedPos:
         accVal+= borderDict[pos]
         valList.append(accVal)
     
     if chrEndPos == pos:
         valList.pop()
     
         
     return TrackView(genomeAnchor=tv.genomeAnchor, startList=np.array(startList), endList=np.array(endList), valList=np.array(valList), \
                      strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=False)
    def _compute(self):
        tv1, tv2 = self._children[0].getResult(), self._children[1].getResult()

        t1s = tv1.startsAsNumpyArray()
        t1e = tv1.endsAsNumpyArray()
        t1vals = tv1.valsAsNumpyArray()
        t2s = tv2.startsAsNumpyArray()
        t2e = tv2.endsAsNumpyArray()

        allSortedDecodedEvents, allEventLengths, cumulativeCoverStatus = \
            self._findAllStartAndEndEvents(t1s, t1e, t2s, t2e)

        allResultStarts = allSortedDecodedEvents[cumulativeCoverStatus[:-1] == 3]
        allResultLengths = allEventLengths[cumulativeCoverStatus[:-1] == 3]
        allResultEnds = allResultStarts + allResultLengths
        
        valList = []
        cursor = 0
        for rs, re in zip(allResultStarts, allResultEnds):
            for i in xrange(cursor, len(t1s)):
                if rs >= t1s[i] and re <= t1e[i]:
                    valList.append(float(t1vals[i]))
                    cursor = i
                    break
        
        assert len(valList) == len(allResultStarts), valList
            
        return TrackView(genomeAnchor=tv1.genomeAnchor, startList=allResultStarts,
                         endList=allResultEnds, valList=array(valList), strandList=None,
                         idList=None, edgesList=None, weightsList=None,
                         borderHandling=tv1.borderHandling, allowOverlaps=False)
Exemplo n.º 7
0
 def _compute(self):
     tv = self._children[0].getResult()
     starts = tv.startsAsNumpyArray()
     binArray = starts / self.microBin
     binCounts = np.bincount(binArray)
     numMicroBins = int(math.ceil(float(len(self._region)) / self.microBin))
     binCounts = np.concatenate(
         [binCounts,
          np.zeros(numMicroBins - len(binCounts), dtype='int')])
     #print 'temp1: ', len(binCounts)
     assert [i * self.microBin for i in xrange(len(binCounts))
             ] == range(0, len(self._region), self.microBin), ([
                 i * self.microBin for i in xrange(len(binCounts))
             ], range(0, len(self._region), self.microBin))
     startList = [i * self.microBin for i in xrange(len(binCounts))]
     assert [
         min((i + 1) * self.microBin, len(self._region))
         for i in xrange(len(binCounts))
     ] == startList[1:] + [len(self._region)]
     endList = [
         min((i + 1) * self.microBin, len(self._region))
         for i in xrange(len(binCounts))
     ]
     #print ','.join([str(x) for x in binCounts])
     return TrackView(genomeAnchor=tv.genomeAnchor, startList=np.array(startList), endList=np.array(endList), valList=binCounts, \
                       strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=False)
    def _getTrackView(self, region):
        from gold.util.RandomUtil import random  # To initialize random generators if not done previously

        #if self._cachedTV is None:
        rawData = RawDataStat(region, self._origTrack, self._trackFormatReq)
        origTV = rawData.getResult()

        self._checkTrackFormat(origTV)
        assert(not origTV.allowOverlaps)
        assert(origTV.borderHandling == 'crop')
        assert region == origTV.genomeAnchor

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                              origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                              origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                              origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                              origTV.allExtrasAsDictOfNumpyArrays(), region)

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._undoTrackViewChanges(starts, ends, vals, strands, ids, edges, weights, extras, origTV)

        from gold.util.CommonFunctions import getClassName
        self._cachedTV = TrackView(origTV.genomeAnchor, starts, ends, vals, strands, ids, edges, weights, \
                                   origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)

        assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
               + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
        return self._cachedTV
Exemplo n.º 9
0
    def _getRandTrackView(self, region):
        origTV = self._origTrack.getTrackView(region)

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._createRandomizedNumpyArrays(
                len(origTV.genomeAnchor), origTV.startsAsNumpyArray(),
                origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(),
                origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(),
                origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(),
                origTV.allExtrasAsDictOfNumpyArrays(), region)

        starts, ends, vals, strands, ids, edges, weights, extras = \
            self._undoTrackViewChanges(starts, ends, vals, strands, ids,
                                       edges, weights, extras, origTV)

        return TrackView(origTV.genomeAnchor,
                         starts,
                         ends,
                         vals,
                         strands,
                         ids,
                         edges,
                         weights,
                         origTV.borderHandling,
                         origTV.allowOverlaps,
                         extraLists=extras)
 def _createTrackView(self, starts, ends, vals, strands, ids, edges, weights, extras, sourceRegion, allowOverlaps, sliceFull=False):
     genomeAnchor = GenomeRegion(genome=self.genome, chr=self.chr, start=sourceRegion[0], end=sourceRegion[1])
     
     tv = TrackView(genomeAnchor, \
                    array(starts) if starts is not None else None, \
                    array(ends) if ends is not None else None, \
                    array(vals, dtype='float64') if vals is not None else None, \
                    array(strands) if strands is not None else None, \
                    array(ids) if ids is not None else None, \
                    array(edges) if edges is not None else None, \
                    array(weights) if weights is not None else None, \
                    'crop', allowOverlaps, \
                    extraLists=OrderedDict([(key, array(extra)) for key, extra in extras.iteritems()]) if extras is not None else OrderedDict())
     if sliceFull:
         tv.sliceElementsAccordingToGenomeAnchor()
     return tv
Exemplo n.º 11
0
    def _compute(self):

        tv = self._children[0].getResult()
        startL, endL, valL = list(tv.startsAsNumpyArray()), list(
            tv.endsAsNumpyArray()), tv.valsAsNumpyArray()
        index = 0
        numSegments = len(startL)
        overlapTreshold = 0
        startRes, endRes, valRes = [], [], []

        while index < numSegments - 1:
            start = startL[index]
            end = endL[index]
            if start >= overlapTreshold and startL[index + 1] >= end:
                startRes.append(start)
                endRes.append(end)
                valRes.append(valL[index])
            overlapTreshold = max(overlapTreshold, end)
            index += 1

        if startL[-1] >= overlapTreshold:
            startRes.append(startL[index])
            endRes.append(endL[index])
            valRes.append(valL[index])




        return TrackView(genomeAnchor=tv.genomeAnchor, startList=np.array(startRes), endList=np.array(endRes), valList=np.array(valRes), \
                         strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=False)
Exemplo n.º 12
0
    def getOneTrackViewFromPool(self, origTrack, randIndex):
        trackId = origTrack.getUniqueKey(self._region.genome)
        assert trackId in self._trackIdToIndexDict.keys(), \
            'given track should be in the original TrackStructure that was used to make this pool'
        trackIndex = self._trackIdToIndexDict[origTrack.getUniqueKey(
            self._region.genome)]

        if randIndex not in self._randomTrackSets['starts']:
            self._computeRandomTrackSet(randIndex)

        origTV = origTrack.getTrackView(self._region)

        for tvParam in self._randomTrackSets:
            try:
                self._randomTrackSets[tvParam][randIndex][trackIndex]
            except KeyError:  # if the parameter does not exist, set it to None
                self._randomTrackSets[tvParam][randIndex] = {}
                self._randomTrackSets[tvParam][randIndex][trackIndex] = None

        return TrackView(
            genomeAnchor=origTV.genomeAnchor,
            startList=self._randomTrackSets['starts'][randIndex][trackIndex],
            endList=self._randomTrackSets['ends'][randIndex][trackIndex],
            valList=self._randomTrackSets['vals'][randIndex][trackIndex],
            strandList=self._randomTrackSets['strands'][randIndex][trackIndex],
            idList=self._randomTrackSets['ids'][randIndex][trackIndex],
            edgesList=self._randomTrackSets['edges'][randIndex][trackIndex],
            weightsList=self._randomTrackSets['weights'][randIndex]
            [trackIndex],
            borderHandling=origTV.borderHandling,
            allowOverlaps=origTV.allowOverlaps)
Exemplo n.º 13
0
    def _compute(self):
        raise  #not finished implementing..

        tv = self._children[0].getResult()
        vals = tv.valsAsNumpyArray()
        numMicroBins = int(math.ceil(float(len(self._region)) / self.microBin))
        miBinBorders = range(0, len(self._region),
                             self.microBin) + [len(self._region)]
        miBins = []
        #go from borders 0,10,20 to paired bin intervals 0,10, 10,20, 20,30 ...
        for i, b in enumerate(miBinBorders):
            miBins.append(b)
            if i != 0 and i != len(miBinBorders) - 1:
                miBins.append(b)
        accVals = vals.add.reduceat(vals, miBins)[::2]
        binCounts = accVals
        #print 'temp1: ', len(binCounts)
        #Fix asserts here..
        assert [i * self.microBin for i in xrange(len(binCounts))
                ] == range(0, len(self._region), self.microBin), ([
                    i * self.microBin for i in xrange(len(binCounts))
                ], range(0, len(self._region), self.microBin))
        startList = [i * self.microBin for i in xrange(len(binCounts))]
        assert [
            min((i + 1) * self.microBin, len(self._region))
            for i in xrange(len(binCounts))
        ] == startList[1:] + [len(self._region)]
        endList = [
            min((i + 1) * self.microBin, len(self._region))
            for i in xrange(len(binCounts))
        ]
        #print ','.join([str(x) for x in binCounts])
        return TrackView(genomeAnchor=tv.genomeAnchor, startList=np.array(startList), endList=np.array(endList), valList=binCounts, \
                          strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=False)
Exemplo n.º 14
0
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
Exemplo n.º 15
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gold.track.CommonMemmapFunctions import findEmptyVal
        from gold.track.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
Exemplo n.º 16
0
    def __init__(self,
                 vals=True,
                 strands=True,
                 anchor=None,
                 valDType='float64'):
        assert (vals != True or anchor is not None)

        if anchor is None:
            numElements = len(vals)
            anchor = [10, 10 + numElements]
        else:
            numElements = anchor[1] - anchor[0]

        vals = self._createList(vals, getRandValList(numElements), valDType)
        strands = self._createList(strands, getRandStrandList(numElements),
                                   'bool8')

        #print (vals, strands, anchor)
        TrackView.__init__(
            self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]),
            None, None, vals, strands, None, None, None, 'crop', False)
Exemplo n.º 17
0
def runIntegrationTest():
    track = Track(['melting'])
    track2 = Track(['melting'])
    #regionIter = [_getRegion(c,s,e) for c,s,e in [('M',1000,2000),('M',2000,5000),('M',1000,15000)]]#('M',4000,4000)] ]
    regionIter = GenomeElementSource('Z:\\new_hb\\2sSegs.bed', 'hg18')

    # segments:
    genomeAnchor = GenomeRegion(genome='hg18', chr='chrM', start=0, end=50)
    trackView = TrackView(genomeAnchor, [2, 16, 23, 40], [9, 20, 26, 45], None,
                          4, None)
    trackView2 = TrackView(genomeAnchor, [4, 8, 22], [6, 16, 24], None, 3,
                           None)

    #    data = StatRunner.run(regionIter, track, track2, RawOverlapStat, trackView, trackView2)
    #    data = StatRunner.run(regionIter, track, track2, DerivedOverlapStat, trackView, trackView2)
    data = StatRunner.run(regionIter, track, track2, AccuracyStat, trackView,
                          trackView2)
    print data
    param = "cc"
    for el in data:
        s = SingleValExtractor(el, param)
        print s.getVal()
Exemplo n.º 18
0
 def getTrackView(self, trackBinIndex):
     trackBinPair = self._trackBinIndexer.getTrackBinPairForTrackBinIndex(
         trackBinIndex)
     trackStorageView = self._getDataFrameView(trackBinIndex)
     starts = trackStorageView.getArray(START_KEY)
     lengths = trackStorageView.getArray(LENGTH_KEY)
     ends = starts + lengths
     return TrackView(trackBinPair.bin,
                      starts,
                      ends,
                      None,
                      None,
                      None,
                      None,
                      None,
                      borderHandling='crop',
                      allowOverlaps=self.allowOverlaps)
    def _compute(self):

        binSize = self._children[0].getResult()
        tv = self._children[1].getResult()
        starts = list(tv.startsAsNumpyArray())
        ends = starts[:]
        vals = strandType = strandList = None
        if len(starts) > 0:
            if starts[0] > 0:
                starts.insert(0, 0)
            else:
                del ends[0]

            if len(ends) == 0 or ends[-1] < binSize - 1:
                ends.append(binSize - 1)
            else:
                del starts[-1]

            strands = tv.strandsAsNumpyArray()

            if strands != None:
                strands = set(strands)
                if len(strands) > 1:
                    raise InvalidFormatError(
                        'All strands within a bin must be of same sort: error at %s'
                        % (tv.genomeAnchor))
                strandType = strands.pop()
                strandList = [strandType] * len(starts)

            vals = range(len(starts) -
                         1, -1, -1) if strandType == 0 else range(len(starts))

            starts = np.array(starts) + tv.genomeAnchor.start
            ends = np.array(ends) + tv.genomeAnchor.start

        strTemplate = self._region.chr + '\t%s\t%s\t%s\t' + getStringFromStrand(
            strandType)
        return '\n'.join([
            strTemplate % (str(starts[i]), str(ends[i]), str(vals[i]))
            for i in xrange(len(starts))
        ])

        return TrackView(genomeAnchor=tv.genomeAnchor, startList=starts, endList=ends, valList=vals, \
                         strandList=strandList, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=tv.allowOverlaps)
 def testInit(self):
     starts = [1, 11, 21]
     ends = [9, 19, 29]
     values = [5.2, -5, 0]
     strands = [False, True, False]
     ids = ['a1', 'b2', 'c3']
     edges = [['b2','c3'], ['a1',''], ['','']]
     weights = [[0.2,0.3], [-0.1,nan], [nan,nan]]
     extras = OrderedDict([('extra1', ['A','B','C']), ('extra2', ['1.0','2.0','3.0'])])
     
     genomeAnchor = GenomeRegion(self.genome, self.chr, 0, 100)
 
     self.assertRaises(AssertionError, TrackView, genomeAnchor, [], ends, values, strands, ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, [], values, strands, ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, [], strands, ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, [], ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, [], edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, [], weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, [], 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, weights, 'crop', False, extraLists=OrderedDict([('extra1', [])]))
 
     t = TrackView(genomeAnchor, None, [0] + ends, [nan] + values, [True] + strands, [''] + ids, [['','']] + edges, [[nan,nan]] + weights, 'crop', False, \
                   extraLists=OrderedDict([(x,[''] + y) for x,y in extras.items()]))
     t = TrackView(genomeAnchor, starts, None, values, strands, ids, edges, weights, 'crop', False, extraLists=extras)
     t = TrackView(genomeAnchor, starts, ends, None, strands, ids, edges, weights, 'crop', False, extraLists=extras)
     t = TrackView(genomeAnchor, starts, ends, values, None, ids, edges, weights, 'crop', False, extraLists=extras)
     t = TrackView(genomeAnchor, starts, ends, values, strands, None, None, None, 'crop', False, extraLists=extras)
     t = TrackView(genomeAnchor, starts, ends, values, strands, ids, None, None, 'crop', False, extraLists=extras)
     t = TrackView(genomeAnchor, starts, ends, values, strands, ids, edges, None, 'crop', False, extraLists=extras)
     t = TrackView(genomeAnchor, starts, ends, values, strands, ids, edges, weights, 'crop', False)
 
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts[0:-1], ends, values, strands, ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends[0:-1], values, strands, ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values[0:-1], strands, ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands[0:-1], ids, edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids[0:-1], edges, weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges[0:-1], weights, 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, weights[0:-1], 'crop', False, extraLists=extras)
     self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, weights, 'crop', False, extraLists={'cat': extras['extra1'][0:-1]})
    def _compute(self):
        start = time.time()
        tv = self._children[0].getResult()
        starts, ends, vals = tv.startsAsNumpyArray(), tv.endsAsNumpyArray(
        ), tv.valsAsNumpyArray()
        endVals = vals[np.argsort(ends)]

        borderDict = dict()
        for index, val in enumerate(starts):
            if val in borderDict:
                borderDict[val][0] += 1
            else:
                borderDict[val] = [1, 0]
            if ends[index] in borderDict:
                borderDict[ends[index]][1] += 1
            else:
                borderDict[ends[index]] = [0, 1]
        uniquePoints = sorted(borderDict.keys())
        borderArray = [borderDict[v] for v in uniquePoints]
        del starts, ends  # delete unneccessary memory

        if self._combineMethod == 'mostCommonCat':
            combineMethod = MostCommonCategory()
        elif self._combineMethod == 'freqOfCat':
            combineMethod = FrequencyOfCategory(self._category,
                                                self._numSamples)

        if len(uniquePoints) > 0:
            newVals = [
                combineMethod.getEmptyElement()
                for v in xrange(len(uniquePoints))
            ]  # np.zeros(len(uniquePoints), dtype=vals.dtype)#FIXME lager arrayen som skal holde resultatene

            accStart = numVals = accEnd = 0
            #print 'before for-loop:  ', time.time()-start
            countAddition = 0
            for uniqueIndex, uniqueRow in enumerate(borderArray):
                newVals[uniqueIndex] = combineMethod.getCombinedValueForRegion(
                    numVals)

                newAccStart = accStart + uniqueRow[0]
                newAccEnd = accEnd + uniqueRow[1]

                while accStart < newAccStart:
                    combineMethod.updateForRegionStart(vals[accStart])
                    accStart += 1

                while accEnd < newAccEnd:
                    combineMethod.updateForRegionEnd(endVals[accEnd])
                    accEnd += 1

                accEnd, accStart = newAccEnd, newAccStart
                numVals += uniqueRow[0] - uniqueRow[1]
            #print 'after for-loop:  ', time.time()-start
            #logMessage('Iterated through %i Subtractions' % countAddition)
            #logMessage(repr(combineMethod.valueDict.keys()))
        else:
            newVals = [combineMethod.getEmptyElement()]

        segBorders = np.array(uniquePoints) + tv.genomeAnchor.start
        return TrackView(genomeAnchor = tv.genomeAnchor, startList=segBorders[:-1], endList=segBorders[1:], valList=np.array(newVals[1:], dtype=combineMethod.getDataType()), \
                         strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=tv.allowOverlaps)
    def convert(cls, tv):
        startList = cls._virtualListClass(tv._startList, tv._endList,
                                          tv._strandList)
        valList = tv._valList
        strandList = tv._strandList
        idList = tv._idList
        edgesList = tv._edgesList
        weigthsList = tv._weightsList
        extraLists = tv._extraLists

        if len(startList) > 0:
            sortIndexes = numpy.argsort(startList)
            startList = startList[sortIndexes]
            if valList is not None:
                valList = valList[sortIndexes]
            if strandList is not None:
                strandList = strandList[sortIndexes]
            if idList is not None:
                idList = idList[sortIndexes]
            if edgesList is not None:
                edgesList = edgesList[sortIndexes]
            if weigthsList is not None:
                weigthsList = weigthsList[sortIndexes]
            for key in extraLists:
                if extraLists[key] is not None:
                    extraLists[key] = extraLists[key][sortIndexes]

        #if tv.allowOverlaps and len(startList) >= 2:
        #    #What we really want to do is:
        #    #sortedZippedList = sorted(zip(startList, valList, strandList) ))
        #    #startList, valList, strandList = zip(*sortedZippedList)
        #    #But, since valList or strandList may be None:
        #
        #    sortedZippedList = sorted(zip(*( [startList] + \
        #                                     [x for x in [valList, strandList, idList, \
        #                                                  edgesList, weigthsList] + \
        #                                                  extraLists.values() if x is not None] )))
        #    x = zip(*sortedZippedList)
        #    startList = x.pop(0)
        #    if valList is not None:
        #        valList = x.pop(0)
        #    if strandList is not None:
        #        strandList = x.pop(0)
        #    if idList is not None:
        #        idList = x.pop(0)
        #    if edgesList is not None:
        #        edgesList = x.pop(0)
        #    if weigthsList is not None:
        #        weigthsList = x.pop(0)
        #    for key in extraLists:
        #        if extraLists[key] is not None:
        #            extraLists[key] = x.pop(0)
        #    assert(x == [])

        newTv = TrackView(tv.genomeAnchor,
                          startList,
                          None,
                          valList,
                          strandList,
                          idList,
                          edgesList,
                          weigthsList,
                          tv.borderHandling,
                          tv.allowOverlaps,
                          extraLists=extraLists)
        newTv = newTv[:]
        return newTv
Exemplo n.º 23
0
   def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \
                extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False):
      if type(starts) != bool and ends == True:
        ends = False
      if type(ends) != bool and starts == True:
        starts = False
      
      assert not (starts==False and ends==False)
      assert segments!=False and segments!=True
      assert starts!=None and ends!=None and vals!=None and strands!=None
      assert segments==None or (starts==True and ends==True)
      assert not (isIter(weights) and not isIter(edges))
      
      assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \
             or numElements!=None
      #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \
      #       type(vals)!=bool or type(strands)!=bool) and numElements==None )\
      #       or numElements!=None)
      #
      if anchor==None:
          anchor = [10,1000]
      
      if segments != None:
          starts = []
          ends = []        
          for seg in segments:
              starts.append(seg[0])
              ends.append(seg[1])
      
      if isIter(edges):
         maxNumEdges = self._findMaxNumEls(edges)
         edges = self._appendEmptyToEnd(edges, '', maxNumEdges)
         if isIter(weights):
            weights = self._appendEmptyToEnd(weights, numpy.nan, maxNumEdges)
      
      [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else [])
      for list in [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else []):
          if type(list) != bool  and numElements == None:
              numElements = len(list)
          assert(type(list) == bool or len(list) == numElements)
      
      for coordList in [starts, ends]:
          if type(coordList) != bool:
              for j in range(len(coordList)):
                  coordList[j] += anchor[0]
      
      randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1])
      starts = self._createList(starts, randSegmentLists[0], 'int32')
      ends = self._createList(ends, randSegmentLists[1], 'int32')
      
      vals = self._createList(vals, getRandValList(numElements, valDType), valDType)
      strands = self._createList(strands, getRandStrandList(numElements), 'bool8')
      
      randIds, randEdges, randWeights = getRandGraphLists(numElements)
      ids = self._createList(ids, randIds, randIds.dtype)
      edges = self._createList(edges, randEdges, randEdges.dtype)
      weights = self._createList(weights, randWeights, 'float64')
      
      if weights is not None and len(weights.shape) == 1:
         weights = weights.reshape(weights.shape + (0,))
      
      extras = self._createExtraLists(extras, 'S', numElements)

      if starts == None:
          if ends[0] != 0:
             ends = numpy.append([anchor[0]], ends)
             if vals != None:
                vals = numpy.append([nan], vals)
             if strands != None:
                strands = numpy.append([True], strands)
          if ends[-1] != anchor[1]:
              ends[-1] = anchor[1]
      
#        print (starts, ends, vals, strands, anchor)
      TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \
                         strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)
Exemplo n.º 24
0
 def __iter__(self):
     for region in [self._region]:
         valArray = numpy.arange(len(region), dtype='int32')
         yield TrackView(self._region,
                         valList=valArray,
                         allowOverlaps=False)
Exemplo n.º 25
0
    def _compute(self):
        tv = self._children[0].getResult()
        nElements = tv.getNumElements()
        starts, ends, vals = tv.startsAsNumpyArray(), tv.endsAsNumpyArray(
        ), tv.valsAsNumpyArray()
        sortedToOriginalEndIndices = np.argsort(
            ends)  # neccessary to find the correct values for end events

        uniqueSortedPositions, uniquePosIndices = np.unique1d(
            np.concatenate((starts, ends)), return_inverse=True)
        if uniqueSortedPositions.size > 1:
            posEventArray = np.zeros(
                (uniqueSortedPositions.size, 2), dtype='int32'
            )  #the number of segments starting and ending at each unique position

            # starts
            indices, counts = self._countDuplicates(
                uniquePosIndices[:nElements])
            posEventArray[indices, 0] = counts
            # ends
            indices, counts = self._countDuplicates(
                uniquePosIndices[nElements:])
            posEventArray[indices, 1] = counts
            del starts, ends, indices, counts, uniquePosIndices  # delete unneccessary arrays to free memory

            newVals = np.zeros(uniqueSortedPositions.size - 1,
                               dtype=vals.dtype)

            uniqueVals = np.unique(vals)
            uniqueValCounts = np.zeros(
                uniqueVals.size, dtype='int32'
            )  # array with the current count for each unique value

            #NumPy record array for converting from category values to corr. indices in the uniqueValCounts array
            recDType = np.dtype({
                'names': [str(x) for x in uniqueVals],
                'formats': ['int32'] * uniqueVals.size
            })
            uniqueValCountsIndices = np.array([tuple(range(len(uniqueVals)))],
                                              dtype=recDType)

            accStart = 0
            accEnd = 0

            for posEventIndex in xrange(len(posEventArray)):
                startEvents, endEvents = posEventArray[posEventIndex]
                numVals = uniqueValCounts.sum()
                if numVals > 0:
                    maxCount = uniqueValCounts.max()
                    newVals[posEventIndex - 1] = ';'.join([
                        str(x) for x in uniqueVals[np.where(
                            uniqueValCounts == maxCount)]
                    ]) + '(%i/%i)' % (maxCount, numVals)
                uniqueEventVals, counts = self._countDuplicates(
                    vals[accStart:accStart + startEvents])
                if uniqueEventVals.size > 0:
                    uniqueValCounts[uniqueValCountsIndices[uniqueEventVals].
                                    view('int32')] += counts

                uniqueEventVals, counts = self._countDuplicates(
                    vals[sortedToOriginalEndIndices[accEnd:accEnd +
                                                    endEvents]])
                if uniqueEventVals.size > 0:
                    uniqueValCounts[uniqueValCountsIndices[uniqueEventVals].
                                    view('int32')] -= counts

                accStart += startEvents
                accEnd += endEvents

        else:
            newVals = np.array([], dtype=vals.dtype)

        segBorders = uniqueSortedPositions + tv.genomeAnchor.start
        return TrackView(genomeAnchor = tv.genomeAnchor, startList=segBorders[:-1], endList=segBorders[1:], valList=newVals, \
                         strandList=None, idList=None, edgesList=None, weightsList=None, borderHandling=tv.borderHandling, allowOverlaps=tv.allowOverlaps)
Exemplo n.º 26
0
    def loadTrackView(trackData,
                      region,
                      borderHandling,
                      allowOverlaps,
                      trackName=[]):
        """
        trackData : see TrackSource.getTrackData {'id' : smartmemmap}
        region : see GenomeRegion
        """
        #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
        brShelve = trackData.boundingRegionShelve
        brInfo = brShelve.getBoundingRegionInfo(
            region) if brShelve is not None else None

        extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                           RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]

        reservedArrays = [
            TrackViewLoader._getArray(trackData, arrayName, brInfo)
            for arrayName in RESERVED_PREFIXES
        ]
        extraArrays = [
            TrackViewLoader._getArray(trackData, arrayName, brInfo)
            for arrayName in extraArrayNames
        ]
        trackFormat = TrackFormat(
            *(reservedArrays +
              [OrderedDict(zip(extraArrayNames, extraArrays))]))

        if trackFormat.reprIsDense():
            if brInfo is None:
                leftIndex = region.start
                rightIndex = region.end
            else:
                leftIndex = region.start - brInfo.start
                rightIndex = region.end - brInfo.start
        else:
            leftBin = CompBinManager.getBinNumber(region.start)
            rightBin = CompBinManager.getBinNumber(region.end - 1)
            #leftBin = region.start/COMP_BIN_SIZE
            #rightBin = (region.end-1)/COMP_BIN_SIZE

            if trackData.get('leftIndex') is None or trackData.get(
                    'rightIndex') is None:
                raise IOError('Preprocessed track not found. TrackData: ' +
                              ', '.join(trackData.keys()))

            leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex',
                                                  brInfo, leftBin)
            rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex',
                                                   brInfo, rightBin)

        slicedReservedArrays = [
            (array[leftIndex:rightIndex] if array is not None else None)
            for array in reservedArrays
        ]
        slicedExtraArrays = [
            (array[leftIndex:rightIndex] if array is not None else None)
            for array in extraArrays
        ]

        argList = [region] + slicedReservedArrays + [
            borderHandling, allowOverlaps
        ] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
        tv = TrackView(*(argList))

        if not trackFormat.reprIsDense():
            tv.sliceElementsAccordingToGenomeAnchor()
            #tv._doScatteredSlicing()
        return tv
Exemplo n.º 27
0
    def __init__(self,
                 segments=None,
                 starts=True,
                 ends=True,
                 vals=False,
                 strands=False,
                 ids=False,
                 edges=False,
                 weights=False,
                 extras=False,
                 anchor=None,
                 numElements=None,
                 valDType='float64',
                 borderHandling='crop',
                 allowOverlaps=False):
        if type(starts) != bool and ends == True:
            ends = False
        if type(ends) != bool and starts == True:
            starts = False

        assert not (starts == False and ends == False)
        assert segments != False and segments != True
        assert starts is not None and ends is not None and vals is not None and strands is not None
        assert segments is None or (starts == True and ends == True)
        assert not (isIter(weights) and not isIter(edges))

        assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \
            or numElements is not None
        #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \
        #        type(vals)!=bool or type(strands)!=bool) and numElements==None )\
        #        or numElements!=None)
        #
        if anchor is None:
            anchor = [10, 1000]

        if segments is not None:
            starts = []
            ends = []
            for seg in segments:
                starts.append(seg[0])
                ends.append(seg[1])

        if isIter(edges):
            maxNumEdges = self._findMaxNumEls(edges)
            edges = self._appendEmptyToEnd(edges, '', maxNumEdges)
            if isIter(weights):
                weights = self._appendEmptyToEnd(weights, numpy.nan,
                                                 maxNumEdges)

        [starts, ends, vals, strands, ids, edges, weights
         ] + ([x for x in extras.values()] if isinstance(extras, dict) else [])
        for list in [starts, ends, vals, strands, ids, edges, weights] + (
            [x for x in extras.values()] if isinstance(extras, dict) else []):
            if type(list) != bool and numElements is None:
                numElements = len(list)
            assert (type(list) == bool or len(list) == numElements)

        for coordList in [starts, ends]:
            if type(coordList) != bool:
                for j in range(len(coordList)):
                    coordList[j] += anchor[0]

        randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1])
        starts = self._createList(starts, randSegmentLists[0], 'int32')
        ends = self._createList(ends, randSegmentLists[1], 'int32')

        vals = self._createList(vals, getRandValList(numElements, valDType),
                                valDType)
        strands = self._createList(strands, getRandStrandList(numElements),
                                   'bool8')

        randIds, randEdges, randWeights = getRandGraphLists(numElements)
        ids = self._createList(ids, randIds, randIds.dtype)
        edges = self._createList(edges, randEdges, randEdges.dtype)
        weights = self._createList(weights, randWeights, 'float64')

        if weights is not None and len(weights.shape) == 1:
            weights = weights.reshape(weights.shape + (0, ))

        extras = self._createExtraLists(extras, 'S', numElements)

        if starts is None:
            if ends[0] != 0:
                ends = numpy.append([anchor[0]], ends)
                if vals is not None:
                    vals = numpy.append([nan], vals)
                if strands is not None:
                    strands = numpy.append([True], strands)
            if ends[-1] != anchor[1]:
                ends[-1] = anchor[1]


#        print (starts, ends, vals, strands, anchor)
        TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \
                           strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)