def _createTrackView(self, starts, ends, vals, strands, ids, edges, weights, extras, sourceRegion, allowOverlaps, sliceFull=False): genomeAnchor = GenomeRegion(genome=self.genome, chr=self.chr, start=sourceRegion[0], end=sourceRegion[1]) tv = TrackView(genomeAnchor, \ array(starts) if starts is not None else None, \ array(ends) if ends is not None else None, \ array(vals, dtype='float64') if vals is not None else None, \ array(strands) if strands is not None else None, \ array(ids) if ids is not None else None, \ array(edges) if edges is not None else None, \ array(weights) if weights is not None else None, \ 'crop', allowOverlaps, \ extraLists=OrderedDict([(key, array(extra)) for key, extra in extras.iteritems()]) if extras is not None else OrderedDict()) if sliceFull: tv.sliceElementsAccordingToGenomeAnchor() return tv
def __init__(self, vals=True, strands=True, anchor=None, valDType='float64'): assert(vals!=True or anchor!=None) if anchor==None: numElements = len(vals) anchor = [10, 10 + numElements] else: numElements = anchor[1] - anchor[0] vals = self._createList(vals, getRandValList(numElements), valDType) strands = self._createList(strands, getRandStrandList(numElements), 'bool8') #print (vals, strands, anchor) TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), None, None, vals, strands, None, None, None, 'crop', False)
def getTrackView(self, region): #print 'get tv for reg: ',region #print str(type(self._origRegion)) + " and " + str(type(region)) if Config.USE_SLOW_DEFENSIVE_ASSERTS: assert (not isIter(self._origRegion) and self._origRegion == region) or \ (isIter(self._origRegion) and region in self._origRegion) #if self._cachedTV is None: self._origTrack.addFormatReq(self._trackFormatReq) origTV = self._origTrack.getTrackView(region) self._checkTrackFormat(origTV) assert(not origTV.allowOverlaps) assert(origTV.borderHandling == 'crop') assert region == origTV.genomeAnchor starts, ends, vals, strands, ids, edges, weights, extras = \ self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \ origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \ origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \ origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \ origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region) from gtrackcore.util.CommonFunctions import getClassName self._cachedTV = TrackView(origTV.genomeAnchor, \ (starts + origTV.genomeAnchor.start if starts is not None else None), \ (ends + origTV.genomeAnchor.start if ends is not None else None), \ vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras) assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\ + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat) return self._cachedTV
def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]): """ trackData : see TrackSource.getTrackData {'id' : smartmemmap} region : see GenomeRegion """ #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps) brShelve = trackData.boundingRegionShelve brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \ RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']] reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES] extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames] trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) ) if trackFormat.reprIsDense(): if brInfo is None: leftIndex = region.start rightIndex = region.end else: leftIndex = region.start - brInfo.start rightIndex = region.end - brInfo.start else: leftBin = CompBinManager.getBinNumber(region.start) rightBin = CompBinManager.getBinNumber(region.end-1) #leftBin = region.start/COMP_BIN_SIZE #rightBin = (region.end-1)/COMP_BIN_SIZE if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None: raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys())) leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin) rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin) slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays] slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays] argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))] tv = TrackView( *(argList) ) if not trackFormat.reprIsDense(): tv.sliceElementsAccordingToGenomeAnchor() #tv._doScatteredSlicing() return tv
def __init__(self, vals=True, strands=True, anchor=None, valDType='float64'): assert (vals != True or anchor != None) if anchor == None: numElements = len(vals) anchor = [10, 10 + numElements] else: numElements = anchor[1] - anchor[0] vals = self._createList(vals, getRandValList(numElements), valDType) strands = self._createList(strands, getRandStrandList(numElements), 'bool8') #print (vals, strands, anchor) TrackView.__init__( self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), None, None, vals, strands, None, None, None, 'crop', False)
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal from gtrackcore.track.core.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN( self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [ None ] * 7 extraLists = OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \ extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False): if type(starts) != bool and ends == True: ends = False if type(ends) != bool and starts == True: starts = False assert not (starts==False and ends==False) assert segments!=False and segments!=True assert starts!=None and ends!=None and vals!=None and strands!=None assert segments==None or (starts==True and ends==True) assert not (isIter(weights) and not isIter(edges)) assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \ or numElements!=None #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \ # type(vals)!=bool or type(strands)!=bool) and numElements==None )\ # or numElements!=None) # if anchor==None: anchor = [10,1000] if segments != None: starts = [] ends = [] for seg in segments: starts.append(seg[0]) ends.append(seg[1]) if isIter(edges): maxNumEdges = self._findMaxNumEls(edges) edges = self._appendEmptyToEnd(edges, '', maxNumEdges) if isIter(weights): weights = self._appendEmptyToEnd(weights, numpy.nan, maxNumEdges) [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else []) for list in [starts, ends, vals, strands, ids, edges, weights] + ([x for x in extras.values()] if isinstance(extras, dict) else []): if type(list) != bool and numElements == None: numElements = len(list) assert(type(list) == bool or len(list) == numElements) for coordList in [starts, ends]: if type(coordList) != bool: for j in range(len(coordList)): coordList[j] += anchor[0] randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1]) starts = self._createList(starts, randSegmentLists[0], 'int32') ends = self._createList(ends, randSegmentLists[1], 'int32') vals = self._createList(vals, getRandValList(numElements, valDType), valDType) strands = self._createList(strands, getRandStrandList(numElements), 'bool8') randIds, randEdges, randWeights = getRandGraphLists(numElements) ids = self._createList(ids, randIds, randIds.dtype) edges = self._createList(edges, randEdges, randEdges.dtype) weights = self._createList(weights, randWeights, 'float64') if weights is not None and len(weights.shape) == 1: weights = weights.reshape(weights.shape + (0,)) extras = self._createExtraLists(extras, 'S', numElements) if starts == None: if ends[0] != 0: ends = numpy.append([anchor[0]], ends) if vals != None: vals = numpy.append([nan], vals) if strands != None: strands = numpy.append([True], strands) if ends[-1] != anchor[1]: ends[-1] = anchor[1] # print (starts, ends, vals, strands, anchor) TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \ strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)
def __init__(self, segments=None, starts=True, ends=True, vals=True, strands=False, ids=False, edges=False, weights=False, \ extras=False, anchor=None, numElements=None, valDType='float64', borderHandling='crop', allowOverlaps=False): if type(starts) != bool and ends == True: ends = False if type(ends) != bool and starts == True: starts = False assert not (starts == False and ends == False) assert segments != False and segments != True assert starts != None and ends != None and vals != None and strands != None assert segments == None or (starts == True and ends == True) assert not (isIter(weights) and not isIter(edges)) assert (any( type(x) not in [bool,type(None)] for x in [segments,starts,ends,vals,strands,ids,edges,weights,extras]) and numElements==None) \ or numElements!=None #assert(( (type(segments)!=bool or type(starts)!=bool or type(ends)!=bool or \ # type(vals)!=bool or type(strands)!=bool) and numElements==None )\ # or numElements!=None) # if anchor == None: anchor = [10, 1000] if segments != None: starts = [] ends = [] for seg in segments: starts.append(seg[0]) ends.append(seg[1]) if isIter(edges): maxNumEdges = self._findMaxNumEls(edges) edges = self._appendEmptyToEnd(edges, '', maxNumEdges) if isIter(weights): weights = self._appendEmptyToEnd(weights, numpy.nan, maxNumEdges) [starts, ends, vals, strands, ids, edges, weights ] + ([x for x in extras.values()] if isinstance(extras, dict) else []) for list in [starts, ends, vals, strands, ids, edges, weights] + ( [x for x in extras.values()] if isinstance(extras, dict) else []): if type(list) != bool and numElements == None: numElements = len(list) assert (type(list) == bool or len(list) == numElements) for coordList in [starts, ends]: if type(coordList) != bool: for j in range(len(coordList)): coordList[j] += anchor[0] randSegmentLists = getRandSegments(numElements, anchor[0], anchor[1]) starts = self._createList(starts, randSegmentLists[0], 'int32') ends = self._createList(ends, randSegmentLists[1], 'int32') vals = self._createList(vals, getRandValList(numElements, valDType), valDType) strands = self._createList(strands, getRandStrandList(numElements), 'bool8') randIds, randEdges, randWeights = getRandGraphLists(numElements) ids = self._createList(ids, randIds, randIds.dtype) edges = self._createList(edges, randEdges, randEdges.dtype) weights = self._createList(weights, randWeights, 'float64') if weights is not None and len(weights.shape) == 1: weights = weights.reshape(weights.shape + (0, )) extras = self._createExtraLists(extras, 'S', numElements) if starts == None: if ends[0] != 0: ends = numpy.append([anchor[0]], ends) if vals != None: vals = numpy.append([nan], vals) if strands != None: strands = numpy.append([True], strands) if ends[-1] != anchor[1]: ends[-1] = anchor[1] # print (starts, ends, vals, strands, anchor) TrackView.__init__(self, GenomeRegion('TestGenome', 'chr21', anchor[0], anchor[1]), starts, ends, vals, \ strands, ids, edges, weights, borderHandling, allowOverlaps, extraLists=extras)
def testInit(self): starts = [1, 11, 21] ends = [9, 19, 29] values = [5.2, -5, 0] strands = [False, True, False] ids = ['a1', 'b2', 'c3'] edges = [['b2', 'c3'], ['a1', ''], ['', '']] weights = [[0.2, 0.3], [-0.1, nan], [nan, nan]] extras = OrderedDict([('extra1', ['A', 'B', 'C']), ('extra2', ['1.0', '2.0', '3.0'])]) genomeAnchor = GenomeRegion(self.genome, self.chr, 0, 100) self.assertRaises(AssertionError, TrackView, genomeAnchor, [], ends, values, strands, ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, [], values, strands, ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, [], strands, ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, [], ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, [], edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, [], weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, [], 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, weights, 'crop', False, extraLists=OrderedDict([('extra1', [])])) t = TrackView(genomeAnchor, None, [0] + ends, [nan] + values, [True] + strands, [''] + ids, [['','']] + edges, [[nan,nan]] + weights, 'crop', False, \ extraLists=OrderedDict([(x,[''] + y) for x,y in extras.items()])) t = TrackView(genomeAnchor, starts, None, values, strands, ids, edges, weights, 'crop', False, extraLists=extras) t = TrackView(genomeAnchor, starts, ends, None, strands, ids, edges, weights, 'crop', False, extraLists=extras) t = TrackView(genomeAnchor, starts, ends, values, None, ids, edges, weights, 'crop', False, extraLists=extras) t = TrackView(genomeAnchor, starts, ends, values, strands, None, None, None, 'crop', False, extraLists=extras) t = TrackView(genomeAnchor, starts, ends, values, strands, ids, None, None, 'crop', False, extraLists=extras) t = TrackView(genomeAnchor, starts, ends, values, strands, ids, edges, None, 'crop', False, extraLists=extras) t = TrackView(genomeAnchor, starts, ends, values, strands, ids, edges, weights, 'crop', False) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts[0:-1], ends, values, strands, ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends[0:-1], values, strands, ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values[0:-1], strands, ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands[0:-1], ids, edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids[0:-1], edges, weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges[0:-1], weights, 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, weights[0:-1], 'crop', False, extraLists=extras) self.assertRaises(AssertionError, TrackView, genomeAnchor, starts, ends, values, strands, ids, edges, weights, 'crop', False, extraLists={'cat': extras['extra1'][0:-1]})