def _getTrackView(self, region): from gold.util.RandomUtil import random # To initialize random generators if not done previously #if self._cachedTV is None: rawData = RawDataStat(region, self._origTrack, self._trackFormatReq) origTV = rawData.getResult() self._checkTrackFormat(origTV) assert(not origTV.allowOverlaps) assert(origTV.borderHandling == 'crop') assert region == origTV.genomeAnchor starts, ends, vals, strands, ids, edges, weights, extras = \ self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \ origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \ origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \ origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \ origTV.allExtrasAsDictOfNumpyArrays(), region) starts, ends, vals, strands, ids, edges, weights, extras = \ self._undoTrackViewChanges(starts, ends, vals, strands, ids, edges, weights, extras, origTV) from gold.util.CommonFunctions import getClassName self._cachedTV = TrackView(origTV.genomeAnchor, starts, ends, vals, strands, ids, edges, weights, \ origTV.borderHandling, origTV.allowOverlaps, extraLists=extras) assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\ + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat) return self._cachedTV
def getTrackView(self, region): assert self._origRegion == region allChrArmRegs = GenomeInfo.getContainingChrArms(region) if len(allChrArmRegs) != 1: raise CentromerError chrArm = allChrArmRegs[0] buffer = self._getIndepencyBufferSize(region) sourceRegs = chrArm.exclude( copy(region).extend(-buffer).extend(buffer) ) assert len(sourceRegs) in [1,2] if not any(len(sourceReg) >= self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO * len(region) for sourceReg in sourceRegs): raise TooLargeBinError('Source region lengths of ' + str([len(x) for x in sourceRegs]) + ' are too small compared to region length of ' + str(len(region)) + ' according to MIN_SOURCE_TO_SAMPLE_SIZE_RATIO: ' + str(self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO)) if len(sourceRegs) == 1: sourceReg = sourceRegs[0] else: firstSourceProportion = (len(sourceRegs[0])-len(region)) / sum(len(sourceRegs[i])-len(region) for i in range(2)) sourceReg = sourceRegs[0] if random.random() < firstSourceProportion else sourceRegs[1] randOffset = random.randint( 0, len(sourceReg) - len(region) ) start = sourceReg.start + randOffset end = start + len(region) randRegion = GenomeRegion(region.genome, region.chr, start, end) rawData = RawDataStat(randRegion, self._origTrack, self._trackFormatReq) tv = rawData.getResult() assert region != tv.genomeAnchor return tv
def getTrackView(self, region): #print 'get tv for reg: ',region #print str(type(self._origRegion)) + " and " + str(type(region)) assert (not isIter(self._origRegion) and self._origRegion == region) or (isIter(self._origRegion) and region in self._origRegion) #if self._cachedTV is None: rawData = RawDataStat(region, self._origTrack, self._trackFormatReq) origTV = rawData.getResult() self._checkTrackFormat(origTV) assert(not origTV.allowOverlaps) assert(origTV.borderHandling == 'crop') assert region == origTV.genomeAnchor starts, ends, vals, strands, ids, edges, weights, extras = \ self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \ origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \ origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \ origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \ origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region) from gold.util.CommonFunctions import getClassName self._cachedTV = TrackView(origTV.genomeAnchor, \ (starts + origTV.genomeAnchor.start if starts is not None else None), \ (ends + origTV.genomeAnchor.start if ends is not None else None), \ vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras) assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\ + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat) return self._cachedTV
def getTrackView(self, region): # To make sure that the origTrack is only read once across randomizations rawData = RawDataStat(region, self._origTrack, NeutralTrackFormatReq()) origTv = rawData.getResult() if not self._trackRandomizer.supportsTrackFormat(origTv.trackFormat): raise NotSupportedError( 'The original track "{}" has format "{}", ' 'which is not supported by "{}".'.format( prettyPrintTrackName(self.trackName), str(origTv.trackFormat), self._trackRandomizer.getDescription())) if not self._trackRandomizer.supportsOverlapMode(origTv.allowOverlaps): raise NotSupportedError( 'The original track "{}" has "allowOverlaps={}", ' 'which is not supported by "{}".'.format( prettyPrintTrackName(self.trackName), origTv.allowOverlaps, self._trackRandomizer.getDescription())) assert origTv.borderHandling == 'crop' return origTv