def getTrackView(self, region): allowOverlaps = self._trackFormatReq.allowOverlaps() borderHandling = self._trackFormatReq.borderHandling() assert (allowOverlaps is not None) assert (borderHandling is not None) origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps) if self.formatConverters is None: self.formatConverters = getFormatConverters( origTrackView.trackFormat, self._trackFormatReq) if self.formatConverters == []: raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(origTrackView.trackFormat) + ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \ ' does not satisfy ' + str(self._trackFormatReq)) if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq): raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\ ' does not support conversion from ' + str(origTrackView.trackFormat) + \ ' to ' + str(self._trackFormatReq)) return self.formatConverters[0].convert(origTrackView)
def addFormatReq(self, requestedTrackFormat): prevFormatReq = self._trackFormatReq self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat) if self._trackFormatReq is None: raise IncompatibleTracksError(str(prevFormatReq ) + \ ' is incompatible with additional ' + str(requestedTrackFormat))
def _compute(self): rawData = self._children[0].getResult() ends = rawData.endsAsNumpyArray() starts = rawData.startsAsNumpyArray() catSequence = rawData.valsAsNumpyArray() if catSequence is None: raise IncompatibleTracksError() catSet = numpy.unique(catSequence) res = {} for cat in catSet: filter = (catSequence == cat) if rawData.trackFormat.reprIsDense(): res[cat] = filter.sum() else: #print 'BpCoverage..: ',ends, starts, catSequence, catSet, type(catSequence), filter #res[cat] = ends[filter].sum() - starts[filter].sum() catStarts = starts[filter] catEnds = ends[filter] totCoverage = catEnds.sum() - catStarts.sum() runningMaxEnds = numpy.maximum.accumulate(catEnds) tempArray1 = runningMaxEnds[:-1] - catStarts[1:] tempArray2 = runningMaxEnds[:-1] - catEnds[1:] totOverlap = tempArray1[tempArray1 > 0].sum() - tempArray2[ tempArray2 > 0].sum() res[cat] = totCoverage - totOverlap return res
def getTrackView(self, region): if self.formatConverters is None: self.formatConverters = getFormatConverters( self._tv.trackFormat, self._trackFormatReq) if self.formatConverters == []: raise IncompatibleTracksError('Track with format: '\ + str(self._tv.trackFormat) + ('(' + self._tv.trackFormat._val + ')' if self._tv.trackFormat._val else '') + \ ' does not satisfy ' + str(self._trackFormatReq)) if not self.formatConverters[0].canHandle(self._tv.trackFormat, self._trackFormatReq): raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\ ' does not support conversion from ' + str(self._tv.trackFormat) + \ ' to ' + str(self._trackFormatReq)) return self.formatConverters[0].convert(self._tv[region.start - self._tv.genomeAnchor.start : \ region.end - self._tv.genomeAnchor.start])
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gold.track.CommonMemmapFunctions import findEmptyVal from gold.track.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7 extraLists=OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def _checkTrackFormat(self, origTV): # Commented out, as segments are not currently supported at all (end list is never returned) # #if origTV.trackFormat.isDense(): # raise IncompatibleTracksError() # #if origTV.trackFormat.isInterval(): # if not IS_EXPERIMENTAL_INSTALLATION: # raise IncompatibleTracksError if origTV.trackFormat.isDense() or origTV.trackFormat.isInterval(): raise IncompatibleTracksError()
def _compute(self): rawData = self._children[0].getResult() starts = rawData.startsAsNumpyArray() catSequence = rawData.valsAsNumpyArray() if catSequence is None: raise IncompatibleTracksError() catSet = numpy.unique(catSequence) res = OrderedDict() for cat in catSet: filter = (catSequence == cat) res[cat] = len(numpy.unique(starts[filter])) return res
def _validateAllTracksRead(self): if not self.hasResult(): raise ShouldNotOccurError("At this stage, statistic should either have result, " "or exception should have been raised") tracks = self.getAllTracks() trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self.getGenome()) for tr in tracks] for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(trackUniqueKeys))): track = tracks[trackIndex] if track is not None and not track.hasBeenFlaggedAsRead(): uniqueKeyForRestTracks = \ set(trackUniqueKeys[i] for i in restTrackIndexes) # If several tracks are the same, memory memoization will only result # in one RawDataStat being created, for one Track object. This is a # wanted optimization. In other cases, something is probably wrong if # a track has not been touched. However, this rule may be revisited # when track structure functionality is implemented. if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks: raise IncompatibleTracksError( 'Track ' + prettyPrintTrackName(track.trackName) + ' was created, but not touched by statistic')
def _determineStatClass(self): assert( hasattr(self, '_track') ) assert( hasattr(self, '_track2') ) dummyGESource = MinimalBinSource(self._genome) if len(self._statClassList)==0: #logging.getLogger(HB_LOGGER).warning('Stat class list is empty, for analysisDef: ' + self._analysisLine) if self._reversed: logMessage('Stat class list is empty, for analysisDef: ' + self._analysisLine, level = logging.WARNING) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise ShouldNotOccurError('Stat class list is empty. Analysisdef: '+self._analysisLine) for statClass in self._statClassList: if DebugConfig.VERBOSE: logMessage(statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')') # print statClass.__name__ + ': Trying (' + self.getDefAfterChoices() + ')' #for reversed, trackA, trackB in [(False, self._track, self._track2), (True, self._track2, self._track) ]: trackA, trackB = self._track, self._track2 if trackA is None: continue try: # The hackiest of all hacks! # TODO: reimplement together with TrackStructure job = StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)) stat = job._getSingleResult(dummyGESource[0])[-1] tracks = stat._tracks if hasattr(stat, '_tracks') else [trackA, trackB] trackUniqueKeys = [Track(tr.trackName).getUniqueKey(self._genome) for tr in tracks if tr is not None] StatJob(dummyGESource, trackA, trackB, statClass, minimal=True, **self.getChoices(filterByActivation=True)).run(False) #In order not to mess up integration tests initSeed() for trackIndex, restTrackIndexes in allElementsVersusRest(xrange(len(tracks))): track = tracks[trackIndex] if track is not None and track.formatConverters is None: uniqueKeyForRestTracks = \ set(trackUniqueKeys[i] for i in restTrackIndexes) # If several tracks are the same, memory memoization will only result # in one RawDataStat being created, for one Track object. This is a # wanted optimization. In other cases, something is probably wrong if # a track has not been touched. However, this rule may be revisited # when track structure functionality is implemented. if trackUniqueKeys[trackIndex] not in uniqueKeyForRestTracks: raise IncompatibleTracksError( 'Track ' + prettyPrintTrackName(track.trackName) + ' was created, but not touched by statistic') except IncompatibleTracksError, e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise except (AssertionError, IncompatibleAssumptionsError, IdenticalTrackNamesError), e: if DebugConfig.VERBOSE: logException(e, level=logging.DEBUG, message='(Warning: error in _determineStatClass for stat: %s)' % statClass.__name__) if DebugConfig.PASS_ON_VALIDSTAT_EXCEPTIONS: raise
def addFormatReq(self, requestedTrackFormat): if not self._ignoreTrackFormat and requestedTrackFormat != None and not requestedTrackFormat.isCompatibleWith( self._tv.trackFormat): raise IncompatibleTracksError( str(requestedTrackFormat) + ' not compatible with ' + str(self._tv.trackFormat))
def _checkTrackFormat(self, origTV): if origTV.trackFormat.isDense(): raise IncompatibleTracksError()
def supportsTrackFormat(cls, origTrackFormat): if origTrackFormat.trackFormat.isDense(): raise IncompatibleTracksError()
def _checkTrackFormat(self, origTV): if not origTV.trackFormat.isValued(): raise IncompatibleTracksError(str(origTV.trackFormat))