示例#1
0
    def _createTrackView(self,
                         starts,
                         ends,
                         vals,
                         strands,
                         ids,
                         edges,
                         weights,
                         extras,
                         sourceRegion,
                         allowOverlaps,
                         sliceFull=False):
        genomeAnchor = GenomeRegion(genome=self.genome,
                                    chr=self.chr,
                                    start=sourceRegion[0],
                                    end=sourceRegion[1])

        tv = TrackView(genomeAnchor, \
                       array(starts) if starts is not None else None, \
                       array(ends) if ends is not None else None, \
                       array(vals, dtype='float64') if vals is not None else None, \
                       array(strands) if strands is not None else None, \
                       array(ids) if ids is not None else None, \
                       array(edges) if edges is not None else None, \
                       array(weights) if weights is not None else None, \
                       'crop', allowOverlaps, \
                       extraLists=OrderedDict([(key, array(extra)) for key, extra in extras.iteritems()]) if extras is not None else OrderedDict())
        if sliceFull:
            tv.sliceElementsAccordingToGenomeAnchor()
        return tv
示例#2
0
 def __init__(self, vals=True, strands=True, anchor=None, valDType='float64'):
    assert(vals!=True or anchor!=None)
    
    if anchor==None:
        numElements = len(vals)
        anchor = [10, 10 + numElements]
    else:
        numElements = anchor[1] - anchor[0]
    
    vals = self._createList(vals, getRandValList(numElements), valDType)
    strands = self._createList(strands, getRandStrandList(numElements), 'bool8')
    
    #print (vals, strands, anchor)
    TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), None, None,
                       vals, strands, None, None, None, 'crop', False)
示例#3
0
 def _createTrackView(self, starts, ends, vals, strands, ids, edges, weights, extras, sourceRegion, allowOverlaps, sliceFull=False):
     genomeAnchor = GenomeRegion(genome=self.genome, chr=self.chr, start=sourceRegion[0], end=sourceRegion[1])
     
     tv = TrackView(genomeAnchor, \
                    array(starts) if starts is not None else None, \
                    array(ends) if ends is not None else None, \
                    array(vals, dtype='float64') if vals is not None else None, \
                    array(strands) if strands is not None else None, \
                    array(ids) if ids is not None else None, \
                    array(edges) if edges is not None else None, \
                    array(weights) if weights is not None else None, \
                    'crop', allowOverlaps, \
                    extraLists=OrderedDict([(key, array(extra)) for key, extra in extras.iteritems()]) if extras is not None else OrderedDict())
     if sliceFull:
         tv.sliceElementsAccordingToGenomeAnchor()
     return tv
示例#4
0
 def getTrackView(self, region):
     #print 'get tv for reg: ',region
     #print str(type(self._origRegion)) + " and " + str(type(region))
     if Config.USE_SLOW_DEFENSIVE_ASSERTS:
         assert (not isIter(self._origRegion) and self._origRegion  == region) or \
                 (isIter(self._origRegion) and region in self._origRegion) 
     
     #if self._cachedTV is None:
     self._origTrack.addFormatReq(self._trackFormatReq)
     origTV = self._origTrack.getTrackView(region)     
     self._checkTrackFormat(origTV)
     assert(not origTV.allowOverlaps)
     assert(origTV.borderHandling == 'crop')
     assert region == origTV.genomeAnchor
     starts, ends, vals, strands, ids, edges, weights, extras = \
         self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \
                                           origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \
                                           origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \
                                           origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \
                                           origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region)
     
     from gtrackcore.util.CommonFunctions import getClassName
     self._cachedTV = TrackView(origTV.genomeAnchor, \
                                (starts + origTV.genomeAnchor.start if starts is not None else None), \
                                (ends + origTV.genomeAnchor.start if ends is not None else None), \
                                vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras)
     assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\
            + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat)
     return self._cachedTV
示例#5
0
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
示例#6
0
    def __init__(self,
                 vals=True,
                 strands=True,
                 anchor=None,
                 valDType='float64'):
        assert (vals != True or anchor != None)

        if anchor == None:
            numElements = len(vals)
            anchor = [10, 10 + numElements]
        else:
            numElements = anchor[1] - anchor[0]

        vals = self._createList(vals, getRandValList(numElements), valDType)
        strands = self._createList(strands, getRandStrandList(numElements),
                                   'bool8')

        #print (vals, strands, anchor)
        TrackView.__init__(
            self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]),
            None, None, vals, strands, None, None, None, 'crop', False)
示例#7
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [
            None
        ] * 7
        extraLists = OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense()
                              or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList,
                         idList, edgesList, weightsList, borderHandling,
                         allowOverlaps, extraLists)
示例#8
0
   def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \
                extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False):
      if type(starts) != bool and ends == True:
        ends = False
      if type(ends) != bool and starts == True:
        starts = False
      
      assert not (starts==False and ends==False)
      assert segments!=False and segments!=True
      assert starts!=None and ends!=None and vals!=None and strands!=None
      assert segments==None or (starts==True and ends==True)
      assert not (isIter(weights) and not isIter(edges))
      
      assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \
             or numElements!=None
      #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \
      #       type(vals)!=bool or type(strands)!=bool) and numElements==None )\
      #       or numElements!=None)
      #
      if anchor==None:
          anchor = [10,1000]
      
      if segments != None:
          starts = []
          ends = []        
          for seg in segments:
              starts.append(seg[0])
              ends.append(seg[1])
      
      if isIter(edges):
         maxNumEdges = self._findMaxNumEls(edges)
         edges = self._appendEmptyToEnd(edges, '', maxNumEdges)
         if isIter(weights):
            weights = self._appendEmptyToEnd(weights, numpy.nan, maxNumEdges)
      
      [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else [])
      for list in [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else []):
          if type(list) != bool  and numElements == None:
              numElements = len(list)
          assert(type(list) == bool or len(list) == numElements)
      
      for coordList in [starts, ends]:
          if type(coordList) != bool:
              for j in range(len(coordList)):
                  coordList[j] += anchor[0]
      
      randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1])
      starts = self._createList(starts, randSegmentLists[0], 'int32')
      ends = self._createList(ends, randSegmentLists[1], 'int32')
      
      vals = self._createList(vals, getRandValList(numElements, valDType), valDType)
      strands = self._createList(strands, getRandStrandList(numElements), 'bool8')
      
      randIds, randEdges, randWeights = getRandGraphLists(numElements)
      ids = self._createList(ids, randIds, randIds.dtype)
      edges = self._createList(edges, randEdges, randEdges.dtype)
      weights = self._createList(weights, randWeights, 'float64')
      
      if weights is not None and len(weights.shape) == 1:
         weights = weights.reshape(weights.shape + (0,))
      
      extras = self._createExtraLists(extras, 'S', numElements)

      if starts == None:
          if ends[0] != 0:
             ends = numpy.append([anchor[0]], ends)
             if vals != None:
                vals = numpy.append([nan], vals)
             if strands != None:
                strands = numpy.append([True], strands)
          if ends[-1] != anchor[1]:
              ends[-1] = anchor[1]
      
#        print (starts, ends, vals, strands, anchor)
      TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \
                         strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)
示例#9
0
    def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \
                 extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False):
        if type(starts) != bool and ends == True:
            ends = False
        if type(ends) != bool and starts == True:
            starts = False

        assert not (starts == False and ends == False)
        assert segments != False and segments != True
        assert starts != None and ends != None and vals != None and strands != None
        assert segments == None or (starts == True and ends == True)
        assert not (isIter(weights) and not isIter(edges))

        assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \
               or numElements!=None
        #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \
        #       type(vals)!=bool or type(strands)!=bool) and numElements==None )\
        #       or numElements!=None)
        #
        if anchor == None:
            anchor = [10, 1000]

        if segments != None:
            starts = []
            ends = []
            for seg in segments:
                starts.append(seg[0])
                ends.append(seg[1])

        if isIter(edges):
            maxNumEdges = self._findMaxNumEls(edges)
            edges = self._appendEmptyToEnd(edges, '', maxNumEdges)
            if isIter(weights):
                weights = self._appendEmptyToEnd(weights, numpy.nan,
                                                 maxNumEdges)

        [starts, ends, vals, strands, ids, edges, weights
         ] + ([x for x in extras.values()] if isinstance(extras, dict) else [])
        for list in [starts, ends, vals, strands, ids, edges, weights] + (
            [x for x in extras.values()] if isinstance(extras, dict) else []):
            if type(list) != bool and numElements == None:
                numElements = len(list)
            assert (type(list) == bool or len(list) == numElements)

        for coordList in [starts, ends]:
            if type(coordList) != bool:
                for j in range(len(coordList)):
                    coordList[j] += anchor[0]

        randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1])
        starts = self._createList(starts, randSegmentLists[0], 'int32')
        ends = self._createList(ends, randSegmentLists[1], 'int32')

        vals = self._createList(vals, getRandValList(numElements, valDType),
                                valDType)
        strands = self._createList(strands, getRandStrandList(numElements),
                                   'bool8')

        randIds, randEdges, randWeights = getRandGraphLists(numElements)
        ids = self._createList(ids, randIds, randIds.dtype)
        edges = self._createList(edges, randEdges, randEdges.dtype)
        weights = self._createList(weights, randWeights, 'float64')

        if weights is not None and len(weights.shape) == 1:
            weights = weights.reshape(weights.shape + (0, ))

        extras = self._createExtraLists(extras, 'S', numElements)

        if starts == None:
            if ends[0] != 0:
                ends = numpy.append([anchor[0]], ends)
                if vals != None:
                    vals = numpy.append([nan], vals)
                if strands != None:
                    strands = numpy.append([True], strands)
            if ends[-1] != anchor[1]:
                ends[-1] = anchor[1]


#        print (starts, ends, vals, strands, anchor)
        TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \
                           strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)
示例#10
0
    def testInit(self):
        starts = [1, 11, 21]
        ends = [9, 19, 29]
        values = [5.2, -5, 0]
        strands = [False, True, False]
        ids = ['a1', 'b2', 'c3']
        edges = [['b2', 'c3'], ['a1', ''], ['', '']]
        weights = [[0.2, 0.3], [-0.1, nan], [nan, nan]]
        extras = OrderedDict([('extra1', ['A', 'B', 'C']),
                              ('extra2', ['1.0', '2.0', '3.0'])])

        genomeAnchor = GenomeRegion(self.genome, self.chr, 0, 100)

        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor, [],
                          ends,
                          values,
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts, [],
                          values,
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends, [],
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values, [],
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands, [],
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids, [],
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids,
                          edges, [],
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=OrderedDict([('extra1', [])]))

        t = TrackView(genomeAnchor, None, [0] + ends, [nan] + values, [True] + strands, [''] + ids, [['','']] + edges, [[nan,nan]] + weights, 'crop', False, \
                      extraLists=OrderedDict([(x,[''] + y) for x,y in extras.items()]))
        t = TrackView(genomeAnchor,
                      starts,
                      None,
                      values,
                      strands,
                      ids,
                      edges,
                      weights,
                      'crop',
                      False,
                      extraLists=extras)
        t = TrackView(genomeAnchor,
                      starts,
                      ends,
                      None,
                      strands,
                      ids,
                      edges,
                      weights,
                      'crop',
                      False,
                      extraLists=extras)
        t = TrackView(genomeAnchor,
                      starts,
                      ends,
                      values,
                      None,
                      ids,
                      edges,
                      weights,
                      'crop',
                      False,
                      extraLists=extras)
        t = TrackView(genomeAnchor,
                      starts,
                      ends,
                      values,
                      strands,
                      None,
                      None,
                      None,
                      'crop',
                      False,
                      extraLists=extras)
        t = TrackView(genomeAnchor,
                      starts,
                      ends,
                      values,
                      strands,
                      ids,
                      None,
                      None,
                      'crop',
                      False,
                      extraLists=extras)
        t = TrackView(genomeAnchor,
                      starts,
                      ends,
                      values,
                      strands,
                      ids,
                      edges,
                      None,
                      'crop',
                      False,
                      extraLists=extras)
        t = TrackView(genomeAnchor, starts, ends, values, strands, ids, edges,
                      weights, 'crop', False)

        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts[0:-1],
                          ends,
                          values,
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends[0:-1],
                          values,
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values[0:-1],
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands[0:-1],
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids[0:-1],
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids,
                          edges[0:-1],
                          weights,
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids,
                          edges,
                          weights[0:-1],
                          'crop',
                          False,
                          extraLists=extras)
        self.assertRaises(AssertionError,
                          TrackView,
                          genomeAnchor,
                          starts,
                          ends,
                          values,
                          strands,
                          ids,
                          edges,
                          weights,
                          'crop',
                          False,
                          extraLists={'cat': extras['extra1'][0:-1]})