def extractToFile(self, fn, outTrackName): append = False for region in GlobalBinSource(self._genome): print 'Creating segmentation for chr: ',region.chr trackView = PlainTrack(self._inTrackName).getTrackView(region) teSource = FunctionCategorizerWrapper(trackView, self._categorizerMethod, minSegLen=self._minSegLen) teSource.trackFormat = TrackFormat.createInstanceFromPrefixList(['start','end','val']) TrackExtractor._extract(teSource, outTrackName, region, fn, append=append, globalCoords=True, addSuffix=True) append = True
def testExtra(self): tf = TrackFormat.createInstanceFromPrefixList(['start', 'a', 'b', 'c'], 'float64', 1, 'float64', 1) self.assertTrue(tf.hasExtra(specificExtra='a')) self.assertFalse(tf.hasExtra(specificExtra='d')) self.assertEqual(['a','b','c'], tf.getExtraNames()) tfq = TrackFormatReq(interval=False, extra=['a','b']) self.assertFalse(tfq.isCompatibleWith(tf))
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert len(region) == 1 from collections import OrderedDict from gold.track.CommonMemmapFunctions import findEmptyVal from gold.track.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7 extraLists=OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def _getRawTrackView(self, region, borderHandling, allowOverlaps): assert region.start == 0 and region.end == 1 from collections import OrderedDict from gold.track.CommonMemmapFunctions import findEmptyVal from gold.track.TrackView import TrackView import numpy as np geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome) prefixList = geSource.getPrefixList() valDataType = geSource.getValDataType() valDim = geSource.getValDim() weightDataType = geSource.getEdgeWeightDataType() weightDim = geSource.getEdgeWeightDim() startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7 extraLists=OrderedDict() tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \ weightDataType, weightDim) if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()): raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\ + str(tf) + ' does not satisfy ' + str(self._trackFormatReq)) denseAndInterval = tf.isDense() and tf.isInterval() numEls = 2 if denseAndInterval else 1 if valDataType == 'S': valDataType = 'S2' if weightDataType == 'S': weightDataType = 'S2' for prefix in prefixList: if prefix == 'start': startList = np.array([-1], dtype='int32') elif prefix == 'end': if denseAndInterval: endList = np.array([0, 1], dtype='int32') else: endList = np.array([0], dtype='int32') elif prefix == 'val': valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \ dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls) elif prefix == 'strand': strandList = np.array([1] * numEls, dtype='int8') elif prefix == 'id': idList = np.array([''] * numEls, dtype='S1') elif prefix == 'edges': edgesList = np.array([['']] * numEls, dtype='S1') elif prefix == 'weights': weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \ dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1)) else: extraLists[prefix] = np.array([''] * numEls, dtype='S1') return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
def testExtra(self): tf = TrackFormat.createInstanceFromPrefixList(['start', 'a', 'b', 'c'], 'float64', 1, 'float64', 1) self.assertTrue(tf.hasExtra(specificExtra='a')) self.assertFalse(tf.hasExtra(specificExtra='d')) self.assertEqual(['a', 'b', 'c'], tf.getExtraNames()) tfq = TrackFormatReq(interval=False, extra=['a', 'b']) self.assertFalse(tfq.isCompatibleWith(tf))
def testCompatibilityWithExceptions(self): tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'], 'float64', 1, 'float64', 1) self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\ .isCompatibleWith(tf)) self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\ .isCompatibleWith(tf, ['interval'])) self.assertTrue(TrackFormatReq(interval=True, strand=True, val='number')\ .isCompatibleWith(tf, ['interval', 'hasStrand'])) self.assertFalse(TrackFormatReq(interval=True, strand=True, val='tc')\ .isCompatibleWith(tf, ['interval', 'hasStrand']))
def testWeightTypes(self): tf = TrackFormat.createInstanceFromPrefixList(['id', 'edges', 'weights'], 'float64', 1, 'S8', 3) self.assertTrue(tf.isWeighted(specificWeightType='category_vector')) self.assertFalse(tf.isWeighted(specificWeightType='number')) self.assertEqual('Vector of categories', tf.getWeightTypeName()) self.assertEqual('Linked base pairs', tf.getFormatName()) tfq = TrackFormatReq(linked=True, weights='number') self.assertFalse(tfq.isCompatibleWith(tf))
def testValTypes(self): tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'], 'float128', 2, 'float64', 1) self.assertTrue(tf.isValued(specificValType='mean_sd')) self.assertFalse(tf.isValued(specificValType='number')) self.assertEqual('Mean and std.dev.', tf.getValTypeName()) self.assertEqual('Valued points', tf.getFormatName()) tfq = TrackFormatReq(interval=False, val='tc') self.assertFalse(tfq.isCompatibleWith(tf))
def testWeightTypes(self): tf = TrackFormat.createInstanceFromPrefixList( ['id', 'edges', 'weights'], 'float64', 1, 'S8', 3) self.assertTrue(tf.isWeighted(specificWeightType='category_vector')) self.assertFalse(tf.isWeighted(specificWeightType='number')) self.assertEqual('Vector of categories', tf.getWeightTypeName()) self.assertEqual('Linked base pairs', tf.getFormatName()) tfq = TrackFormatReq(linked=True, weights='number') self.assertFalse(tfq.isCompatibleWith(tf))
def __iter__(self): self = copy(self) #does not support function, partitions and points: if (False in [attrs in self._geSource.getPrefixList() for attrs in ['start', 'end']]): raise NotSupportedError('Binning file must be segments. Current file format: ' + \ TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \ self._geSource.getValDataType(), \ self._geSource.getValDim(), \ self._geSource.getEdgeWeightDataType(), \ self._geSource.getEdgeWeightDim()).getFormatName() ) self._geIter = self._geSource.__iter__() return self
def __iter__(self): self = copy(self) #does not support function, partitions and points: if (False in [ attrs in self._geSource.getPrefixList() for attrs in ['start', 'end'] ]): raise NotSupportedError('Binning file must be segments. Current file format: ' + \ TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \ self._geSource.getValDataType(), \ self._geSource.getValDim(), \ self._geSource.getEdgeWeightDataType(), \ self._geSource.getEdgeWeightDim()).getFormatName() ) self._geIter = self._geSource.__iter__() return self
def extractToFile(self, fn, outTrackName): append = False for region in GlobalBinSource(self._genome): print 'Creating segmentation for chr: ', region.chr trackView = PlainTrack(self._inTrackName).getTrackView(region) teSource = FunctionCategorizerWrapper(trackView, self._categorizerMethod, minSegLen=self._minSegLen) teSource.trackFormat = TrackFormat.createInstanceFromPrefixList( ['start', 'end', 'val']) TrackExtractor._extract(teSource, outTrackName, region, fn, append=append, globalCoords=True, addSuffix=True) append = True
def getOptionsBoxFormat(prevChoices): tf = TrackFormat.createInstanceFromPrefixList(['start', 'end']) return [ composer.fileFormatName for composer in findMatchingFileFormatComposers(tf) ]
def getTrackFormat(self): return TrackFormat.createInstanceFromPrefixList(self._prefixList, \ self._valDataType, \ self._valDim, \ self._weightDataType, \ self._weightDim)
def getTrackFormat(self): return TrackFormat.createInstanceFromPrefixList(self._tempTrackInfo.prefixList, \ self._tempTrackInfo.valDataType, \ self._tempTrackInfo.valDim, \ self._tempTrackInfo.weightDataType, \ self._tempTrackInfo.weightDim)