def getTrackView(self, region): assert self._origRegion == region allChrArmRegs = GenomeInfo.getContainingChrArms(region) if len(allChrArmRegs) != 1: raise CentromerError chrArm = allChrArmRegs[0] buffer = self._getIndepencyBufferSize(region) sourceRegs = chrArm.exclude( copy(region).extend(-buffer).extend(buffer) ) assert len(sourceRegs) in [1,2] if not any(len(sourceReg) >= self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO * len(region) for sourceReg in sourceRegs): raise TooLargeBinError('Source region lengths of ' + str([len(x) for x in sourceRegs]) + ' are too small compared to region length of ' + str(len(region)) + ' according to MIN_SOURCE_TO_SAMPLE_SIZE_RATIO: ' + str(self.MIN_SOURCE_TO_SAMPLE_SIZE_RATIO)) if len(sourceRegs) == 1: sourceReg = sourceRegs[0] else: firstSourceProportion = (len(sourceRegs[0])-len(region)) / sum(len(sourceRegs[i])-len(region) for i in range(2)) sourceReg = sourceRegs[0] if random.random() < firstSourceProportion else sourceRegs[1] randOffset = random.randint( 0, len(sourceReg) - len(region) ) start = sourceReg.start + randOffset end = start + len(region) randRegion = GenomeRegion(region.genome, region.chr, start, end) rawData = RawDataStat(randRegion, self._origTrack, self._trackFormatReq) tv = rawData.getResult() assert region != tv.genomeAnchor return tv
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(allowOverlaps=False, val='number'))) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(allowOverlaps=False, dense=False)))
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(val='number', dense=True))) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(val='number', dense=True)))
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=True, val='char'))) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(val='category')))
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=True, val=self._markReq))) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, interval=True)))
def getTrackView(self, region): #print 'get tv for reg: ',region #print str(type(self._origRegion)) + " and " + str(type(region)) assert (not isIter(self._origRegion) and self._origRegion == region) or (isIter(self._origRegion) and region in self._origRegion) #if self._cachedTV is None: rawData = RawDataStat(region, self._origTrack, self._trackFormatReq) origTV = rawData.getResult() self._checkTrackFormat(origTV) assert(not origTV.allowOverlaps) assert(origTV.borderHandling == 'crop') assert region == origTV.genomeAnchor starts, ends, vals, strands, ids, edges, weights, extras = \ self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \ origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \ origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \ origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \ origTV.allExtrasAsDictOfNumpyArrays(), origTV.trackFormat, region) from gold.util.CommonFunctions import getClassName self._cachedTV = TrackView(origTV.genomeAnchor, \ (starts + origTV.genomeAnchor.start if starts is not None else None), \ (ends + origTV.genomeAnchor.start if ends is not None else None), \ vals, strands, ids, edges, weights, origTV.borderHandling, origTV.allowOverlaps, extraLists=extras) assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\ + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat) return self._cachedTV
def _createChildren(self): self._addChild(RawDataStat(self._region, self._track, \ TrackFormatReq(dense=False, interval=False, \ val='category', allowOverlaps=True))) self._addChild(RawDataStat(self._region, self._track2, \ TrackFormatReq(dense=False, interval=True, \ val='category', allowOverlaps=True)))
def _getTrackView(self, region): from gold.util.RandomUtil import random # To initialize random generators if not done previously #if self._cachedTV is None: rawData = RawDataStat(region, self._origTrack, self._trackFormatReq) origTV = rawData.getResult() self._checkTrackFormat(origTV) assert(not origTV.allowOverlaps) assert(origTV.borderHandling == 'crop') assert region == origTV.genomeAnchor starts, ends, vals, strands, ids, edges, weights, extras = \ self._createRandomizedNumpyArrays(len(origTV.genomeAnchor), origTV.startsAsNumpyArray(), \ origTV.endsAsNumpyArray(), origTV.valsAsNumpyArray(), \ origTV.strandsAsNumpyArray(), origTV.idsAsNumpyArray(), \ origTV.edgesAsNumpyArray(), origTV.weightsAsNumpyArray(), \ origTV.allExtrasAsDictOfNumpyArrays(), region) starts, ends, vals, strands, ids, edges, weights, extras = \ self._undoTrackViewChanges(starts, ends, vals, strands, ids, edges, weights, extras, origTV) from gold.util.CommonFunctions import getClassName self._cachedTV = TrackView(origTV.genomeAnchor, starts, ends, vals, strands, ids, edges, weights, \ origTV.borderHandling, origTV.allowOverlaps, extraLists=extras) assert self._trackFormatReq.isCompatibleWith(self._cachedTV.trackFormat), 'Incompatible track-format: '\ + str(self._trackFormatReq) + ' VS ' + str(self._cachedTV.trackFormat) return self._cachedTV
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, interval=True)) ) #interval=False is supported through the faster PointCountPerSegStat.. self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, interval=True)))
def _createChildren(self): rawSegDataStat = RawDataStat( self._region, self._track, TrackFormatReq(interval=True, dense=False)) rawNumDataStat = RawDataStat( self._region, self._track2, TrackFormatReq(dense=True, val='number', interval=False)) self._addChild(rawSegDataStat) self._addChild(rawNumDataStat)
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False))) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False))) self._binSizeStat = self._addChild( BinSizeStat(self._region, self._track2))
def _createChildren(self): self._addChild( RawDataStat( self._region, self._track, TrackFormatReq(dense=False, interval=False, val='number'))) self._addChild( RawDataStat( self._region, self._track2, TrackFormatReq(dense=False, interval=False, val='number')))
def _createChildren(self): rawDataStat = RawDataStat(self._region, self._track, TrackFormatReq(dense=False, interval=False)) self._addChild(rawDataStat) rawDataStat2 = RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, interval=True)) self._addChild(rawDataStat2) self._binSizeStat = self._addChild( BinSizeStat(self._region, self._track2))
def _createChildren(self): self._graphNodeIdStat = self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=True, id=True, dense=True))) #self._segmentsStat = self._addChild(RawDataStat(self._region, self._track2, TrackFormatReq(interval=False, dense=False))) self._segmentsStat = self._addChild( RawDataStat( self._region, self._track2, TrackFormatReq(interval=False, dense=False, allowOverlaps=None)))
def _createChildren(self): #the settings of allowOverlaps is somewhat arbitrary for now.. self._addChild( RawDataStat( self._region, self._track, TrackFormatReq(dense=False, interval=True, allowOverlaps=False))) self._addChild( RawDataStat( self._region, self._track2, TrackFormatReq(dense=False, interval=True, allowOverlaps=False)))
def _createChildren(self): self._addChild( RawDataStat( self._region, self._track, TrackFormatReq(val='number', allowOverlaps=self._configuredToAllowOverlaps( strict=False)))) self._addChild( RawDataStat( self._region, self._track2, TrackFormatReq(interval=True, allowOverlaps=self._configuredToAllowOverlaps( strict=False))))
def _createChildren(self): self._addChild( RawDataStat( self._region, self._track, TrackFormatReq(interval=True, val='category', allowOverlaps=True)))
def _createChildren(self): rawDataStat = RawDataStat( self._region, self._track, TrackFormatReq(dense=False, interval=False, allowOverlaps=(self._withOverlaps == 'yes'))) self._addChild(rawDataStat)
def _createChildren(self): for track in self._tracks: self._addChild( RawDataStat(self._region, track, TrackFormatReq(dense=False), **self._kwArgs)) self._binSizeStat = self._addChild( BinSizeStat(self._region, self._tracks[0]))
def _createChildren(self): self._addChild( RawDataStat( self._region, self._track, TrackFormatReq(allowOverlaps=self._configuredToAllowOverlaps( strict=False)))) pass
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False))) self._addChild( RawOverlapSortedStartEndCodedEventsStat(self._region, self._track, self._track2, **self._kwArgs))
def _createChildren(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( self._rawStatistic(self._region, self._track, self._track2, **kwArgs) ) #try: for subtype1 in ['0','1']: for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath(tn1, self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())): raise IncompatibleTracksError track1 = Track( tn1) track1.formatConverters = self._track.formatConverters track2 = Track( tn2) track2.formatConverters = self._track2.formatConverters self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
def getTrackView(self, region): # To make sure that the origTrack is only read once across randomizations rawData = RawDataStat(region, self._origTrack, NeutralTrackFormatReq()) origTv = rawData.getResult() if not self._trackRandomizer.supportsTrackFormat(origTv.trackFormat): raise NotSupportedError( 'The original track "{}" has format "{}", ' 'which is not supported by "{}".'.format( prettyPrintTrackName(self.trackName), str(origTv.trackFormat), self._trackRandomizer.getDescription())) if not self._trackRandomizer.supportsOverlapMode(origTv.allowOverlaps): raise NotSupportedError( 'The original track "{}" has "allowOverlaps={}", ' 'which is not supported by "{}".'.format( prettyPrintTrackName(self.trackName), origTv.allowOverlaps, self._trackRandomizer.getDescription())) assert origTv.borderHandling == 'crop' return origTv
def _compute(self): tvs = [] for track in [self._track, self._track2]: tvs.append( RawDataStat(self._region, track, self._getTrackFormatReq()).getResult()) # tvs = [x.getResult() for x in self._children] print len(self._tracks) from numpy import array # tvStartsOld = [x.startsAsNumpyArray()for x in tvs] # tvEndsOld = [x.endsAsNumpyArray() for x in tvs] tvStarts = [array(x.startsAsNumpyArray(), dtype='int64') for x in tvs] tvEnds = [array(x.endsAsNumpyArray(), dtype='int64') for x in tvs] tvStarts = [ np.array(x.startsAsNumpyArray(), dtype='int64') for x in tvs ] tvEnds = [np.array(x.endsAsNumpyArray(), dtype='int64') for x in tvs] print "N Starts: " + str(len(tvStarts[0])) for x in tvs: print x print x.__dict__ print ".........---------------............" for tvs in tvStarts: print tvs print tvs.__dict__ binSize = tvEnds[0][-1] * 2 bins = np.range(0, binSize, localBinSize) print "Bins: " + str(len(bins)) s = [] for track in tvStarts: s.append(len(track)) E = np.sum(s) / len(bins) print "Expected" + str(E) O = np.zeros((1, len(bins))) binPositions = [ np.floor_divide(t_starts, 10000) for t_starts in tvStarts ] for track in binPositions: for binPos in track: O[binPos] += 1 T = np.sum(np.power((O - E), 2) / E) return T
def _createChildren(self): self._valBpArrayStat = self._addChild( ValueBpLevelArrayRawDataStat(self._region, self._track, voidValue=0)) self._addChild( FormatSpecStat(self._region, self._track, TrackFormatReq(allowOverlaps=False, val='number'))) self._segmentBpArrayStat = self._addChild( BpLevelArrayRawDataStat(self._region, self._track2, bpDepthType='binary')) self._segStat = self._addChild( RawDataStat( self._region, self._track2, TrackFormatReq(allowOverlaps=False, dense=False, interval=True)))
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=True)) )
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=False, val='category')))
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, interval=self.INTERVALS, \ allowOverlaps = (self._withOverlaps == 'yes') ) ) )
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=False, dense=False))) self._addChild(BinSizeStat(self._region, self._track))
def _createChildren(self): self._addChild( RawDataStat( self._region, self._track, TrackFormatReq(dense=True, interval=False, allowOverlaps=False)))
def _createChildren(self): self._addChild(CountPointStat(self._region, self._track)) self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='mean_sd')))
def _createChildren(self): self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(interval=True, \ allowOverlaps = (self._withOverlaps == 'yes') ) ) )