示例#1
0
    def __init__(self, geSource):
        self._geSource = self._decorateGESource(geSource)
        self._boundingRegionsAndGEsCorrespond = None

        self._areValsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getValTypeName() == 'Category'
        self._areEdgeWeightsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getWeightTypeName() == 'Category'
        self._valCategories = set()
        self._edgeWeightCategories = set()

        self._numElements = OrderedDefaultDict(int)
        self._maxStrLens = OrderedDefaultDict(partial(self._initMaxStrLens, self._getMaxStrLensKeys()))
        self._maxNumEdges = OrderedDefaultDict(int)

        self._hasCalculatedStats = False
示例#2
0
    def __init__(self, geSource):
        self._geSource = self._decorateGESource(geSource)
        self._boundingRegionsAndGEsCorresponds = None

        self._areValsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getValTypeName() == 'Category'
        self._areEdgeWeightsCategorical = TrackFormat.createInstanceFromGeSource(geSource).getWeightTypeName() == 'Category'
        self._valCategories = set()
        self._edgeWeightCategories = set()

        self._numElements = OrderedDefaultDict(int)
        self._maxStrLens = OrderedDefaultDict(partial(self._initMaxStrLens, self._getMaxStrLensKeys()))
        self._maxNumEdges = OrderedDefaultDict(int)
        
        self._hasCalculatedStats = False
示例#3
0
    def _composeContents(self,
                         out,
                         hbColumns,
                         columns,
                         geSource,
                         onlyNonDefault=True,
                         singleDataLine=False):
        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        out.write(self._composeHeaderLines(onlyNonDefault))
        out.write(self._composeColSpecLine(columns))

        for br, geList in iterateOverBRTuplesWithContainedGEs(
                geSource, onlyYieldTwoGEs=singleDataLine):
            if br is not None:
                out.write(self._composeBoundingRegionLine(br))

            for i, ge in enumerate(
                    self._removeStartElementIfApplicable(tf, geList)):
                out.write(
                    self._composeDataLine(ge, hbColumns, i + 1,
                                          i + 1 == len(geList)))

                if singleDataLine:
                    break
            if singleDataLine:
                break
示例#4
0
 def loadTrackView(trackData, region, borderHandling, allowOverlaps, trackName=[]):
     """
     trackData : see TrackSource.getTrackData {'id' : smartmemmap}
     region : see GenomeRegion
     """
     #brShelve = BoundingRegionShelve(region.genome, trackName, allowOverlaps)
     brShelve = trackData.boundingRegionShelve
     brInfo = brShelve.getBoundingRegionInfo(region) if brShelve is not None else None
     
     extraArrayNames = [arrayName for arrayName in trackData if arrayName not in \
                        RESERVED_PREFIXES.keys() + ['leftIndex', 'rightIndex']]
     
     reservedArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in RESERVED_PREFIXES]
     extraArrays = [TrackViewLoader._getArray(trackData, arrayName, brInfo) for arrayName in extraArrayNames]
     trackFormat = TrackFormat( *(reservedArrays + [OrderedDict(zip(extraArrayNames, extraArrays))]) )
     
     if trackFormat.reprIsDense():
         if brInfo is None:
             leftIndex = region.start
             rightIndex = region.end
         else:
             leftIndex = region.start - brInfo.start
             rightIndex = region.end - brInfo.start 
     else:
         leftBin = CompBinManager.getBinNumber(region.start)
         rightBin = CompBinManager.getBinNumber(region.end-1)
         #leftBin = region.start/COMP_BIN_SIZE
         #rightBin = (region.end-1)/COMP_BIN_SIZE
         
         if trackData.get('leftIndex') is None or trackData.get('rightIndex') is None:
             raise IOError('Preprocessed track not found. TrackData: ' + ', '.join(trackData.keys()))
         
         leftIndex = TrackViewLoader._getArray(trackData, 'leftIndex', brInfo, leftBin)
         rightIndex = TrackViewLoader._getArray(trackData, 'rightIndex', brInfo, rightBin)
     
     slicedReservedArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in reservedArrays]
     slicedExtraArrays = [(array[leftIndex:rightIndex] if array is not None else None) for array in extraArrays]
     
     argList = [region] + slicedReservedArrays + [borderHandling, allowOverlaps] + [OrderedDict(zip(extraArrayNames, slicedExtraArrays))]
     tv = TrackView( *(argList) )
     
     if not trackFormat.reprIsDense():
         tv.sliceElementsAccordingToGenomeAnchor()
         #tv._doScatteredSlicing()
     return tv
示例#5
0
    def testExtra(self):
        tf = TrackFormat.createInstanceFromPrefixList(["start", "a", "b", "c"], "float64", 1, "float64", 1)
        self.assertTrue(tf.hasExtra(specificExtra="a"))
        self.assertFalse(tf.hasExtra(specificExtra="d"))

        self.assertEqual(["a", "b", "c"], tf.getExtraNames())

        tfq = TrackFormatReq(interval=False, extra=["a", "b"])
        self.assertFalse(tfq.isCompatibleWith(tf))
示例#6
0
    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert startList!=None or endList!=None or valList!=None or edgesList!=None
        assert borderHandling in ['crop']

        self.genomeAnchor = genomeAnchor.getCopy()
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))
示例#7
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1
        
        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np
        
        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [None]*7
        extraLists=OrderedDict()
        
        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense() or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))
        
        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1
        
        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'
        
        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')
        
        return TrackView(region, startList, endList, valList, strandList, idList, edgesList, weightsList, borderHandling, allowOverlaps, extraLists)
示例#8
0
    def testCompatibilityWithExceptions(self):
        tf = TrackFormat.createInstanceFromPrefixList(["start", "val"], "float64", 1, "float64", 1)

        self.assertFalse(TrackFormatReq(interval=True, strand=True, val="number").isCompatibleWith(tf))
        self.assertFalse(TrackFormatReq(interval=True, strand=True, val="number").isCompatibleWith(tf, ["interval"]))
        self.assertTrue(
            TrackFormatReq(interval=True, strand=True, val="number").isCompatibleWith(tf, ["interval", "hasStrand"])
        )
        self.assertFalse(
            TrackFormatReq(interval=True, strand=True, val="tc").isCompatibleWith(tf, ["interval", "hasStrand"])
        )
示例#9
0
    def testExtra(self):
        tf = TrackFormat.createInstanceFromPrefixList(['start', 'a', 'b', 'c'],
                                                      'float64', 1, 'float64',
                                                      1)
        self.assertTrue(tf.hasExtra(specificExtra='a'))
        self.assertFalse(tf.hasExtra(specificExtra='d'))

        self.assertEqual(['a', 'b', 'c'], tf.getExtraNames())

        tfq = TrackFormatReq(interval=False, extra=['a', 'b'])
        self.assertFalse(tfq.isCompatibleWith(tf))
示例#10
0
 def _getGESourceManagerFromGESource(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() in ['Number', 'Number (integer)', 'Case-control']:
             return RegionBasedGESourceManager(geSource, self._regionList, \
                                               calcStatsInExtraPass=False, countElsInBoundingRegions=False)
         else:
             raise NotSupportedError
     else:
         return RegionBasedGESourceManager(geSource, self._regionList, \
                                           calcStatsInExtraPass=True, countElsInBoundingRegions=True)
示例#11
0
    def testValTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(["start", "val"], "float128", 2, "float64", 1)

        self.assertTrue(tf.isValued(specificValType="mean_sd"))
        self.assertFalse(tf.isValued(specificValType="number"))

        self.assertEqual("Mean and std.dev.", tf.getValTypeName())
        self.assertEqual("Valued points", tf.getFormatName())

        tfq = TrackFormatReq(interval=False, val="tc")
        self.assertFalse(tfq.isCompatibleWith(tf))
示例#12
0
    def testWeightTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(["id", "edges", "weights"], "float64", 1, "S8", 3)

        self.assertTrue(tf.isWeighted(specificWeightType="category_vector"))
        self.assertFalse(tf.isWeighted(specificWeightType="number"))

        self.assertEqual("Vector of categories", tf.getWeightTypeName())
        self.assertEqual("Linked base pairs", tf.getFormatName())

        tfq = TrackFormatReq(linked=True, weights="number")
        self.assertFalse(tfq.isCompatibleWith(tf))
示例#13
0
    def testWeightTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(
            ['id', 'edges', 'weights'], 'float64', 1, 'S8', 3)

        self.assertTrue(tf.isWeighted(specificWeightType='category_vector'))
        self.assertFalse(tf.isWeighted(specificWeightType='number'))

        self.assertEqual('Vector of categories', tf.getWeightTypeName())
        self.assertEqual('Linked base pairs', tf.getFormatName())

        tfq = TrackFormatReq(linked=True, weights='number')
        self.assertFalse(tfq.isCompatibleWith(tf))
示例#14
0
    def testValTypes(self):
        tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'],
                                                      'float128', 2, 'float64',
                                                      1)

        self.assertTrue(tf.isValued(specificValType='mean_sd'))
        self.assertFalse(tf.isValued(specificValType='number'))

        self.assertEqual('Mean and std.dev.', tf.getValTypeName())
        self.assertEqual('Valued points', tf.getFormatName())

        tfq = TrackFormatReq(interval=False, val='tc')
        self.assertFalse(tfq.isCompatibleWith(tf))
示例#15
0
 def _getGESourceManagerFromGESource(self, geSource):
     tf = TrackFormat.createInstanceFromGeSource(geSource)
     if tf.reprIsDense():
         if tf.getValTypeName() in [
                 'Number', 'Number (integer)', 'Case-control'
         ]:
             return RegionBasedGESourceManager(geSource, self._regionList, \
                                               calcStatsInExtraPass=False, countElsInBoundingRegions=False)
         else:
             raise NotSupportedError
     else:
         return RegionBasedGESourceManager(geSource, self._regionList, \
                                           calcStatsInExtraPass=True, countElsInBoundingRegions=True)
示例#16
0
    def testCompatibilityWithExceptions(self):
        tf = TrackFormat.createInstanceFromPrefixList(['start', 'val'],
                                                      'float64', 1, 'float64',
                                                      1)

        self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                         .isCompatibleWith(tf))
        self.assertFalse(TrackFormatReq(interval=True, strand=True, val='number')\
                         .isCompatibleWith(tf, ['interval']))
        self.assertTrue(TrackFormatReq(interval=True, strand=True, val='number')\
                        .isCompatibleWith(tf, ['interval', 'hasStrand']))
        self.assertFalse(TrackFormatReq(interval=True, strand=True, val='tc')\
                         .isCompatibleWith(tf, ['interval', 'hasStrand']))
示例#17
0
    def __iter__(self):
        self = copy(self)
        
        #does not support function, partitions and points:
        if (False in [attrs in self._geSource.getPrefixList() for attrs in ['start', 'end']]):
            raise NotSupportedError('Binning file must be segments. Current file format: ' + \
                                    TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \
                                                                             self._geSource.getValDataType(), \
                                                                             self._geSource.getValDim(), \
                                                                             self._geSource.getEdgeWeightDataType(), \
                                                                             self._geSource.getEdgeWeightDim()).getFormatName() )

        self._geIter = self._geSource.__iter__()
        return self
示例#18
0
 def testFormats(self):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id, edges, weights in [(None, None, None),
                                                ([], None, None),
                                                ([], [], None),
                                                ([], [], [])]:
                         for extra in [None, {'a': [], 'b': []}]:
                             if [] in [start, end, val, edges]:
                                 tf = TrackFormat(start, end, val, strand,
                                                  id, edges, weights, extra)
                                 self._assertTrackFormat(tf, start==[], end==[], val==[], strand==[], id==[], edges==[], weights==[], \
                                                         hasExtra=extra is not None, extra=extra.keys() if extra is not None else [])
示例#19
0
 def _composeContents(self, out, hbColumns, columns, geSource, onlyNonDefault=True, singleDataLine=False):
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     out.write( self._composeHeaderLines(onlyNonDefault) )
     out.write( self._composeColSpecLine(columns) )
     
     for br, geList in iterateOverBRTuplesWithContainedGEs(geSource, onlyYieldTwoGEs=singleDataLine):
         if br is not None:
             out.write( self._composeBoundingRegionLine(br) )
         
         for i, ge in enumerate(self._removeStartElementIfApplicable(tf, geList)):
             out.write( self._composeDataLine(ge, hbColumns, i+1, i+1 == len(geList)) )
             
             if singleDataLine:
                 break
         if singleDataLine:
             break
示例#20
0
    def __iter__(self):
        self = copy(self)

        #does not support function, partitions and points:
        if (False in [
                attrs in self._geSource.getPrefixList()
                for attrs in ['start', 'end']
        ]):
            raise NotSupportedError('Binning file must be segments. Current file format: ' + \
                                    TrackFormat.createInstanceFromPrefixList(self._geSource.getPrefixList(), \
                                                                             self._geSource.getValDataType(), \
                                                                             self._geSource.getValDim(), \
                                                                             self._geSource.getEdgeWeightDataType(), \
                                                                             self._geSource.getEdgeWeightDim()).getFormatName() )

        self._geIter = self._geSource.__iter__()
        return self
示例#21
0
    def testSorting(self):
        geSourceTest = self._commonSetup()

        for caseName in geSourceTest.cases:
            if not caseName.startswith("gtrack"):
                continue

            if "no_sort" in caseName:
                print "Test case skipped: " + caseName
                continue

            print caseName
            print

            case = geSourceTest.cases[caseName]
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print

            sortedContents = sortGtrackFileAndReturnContents(testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(
                sourceClass(
                    "sortedFile.gtrack",
                    case.genome,
                    forPreProcessor=forPreProcessor,
                    printWarnings=False,
                    strToUseInsteadOfFn=sortedContents,
                )
            )

            reprIsDense = TrackFormat.createInstanceFromGeSource(sortedGeSource).reprIsDense()

            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList), [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass

            self.assertEquals(
                sorted(case.boundingRegionsAssertList), [br for br in sortedGeSource.getBoundingRegionTuples()]
            )
示例#22
0
    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert (startList is not None or endList is not None or
                valList is not None or edgesList is not None)
        assert borderHandling in ['crop']

        self.genomeAnchor = genomeAnchor.getCopy()
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))
示例#23
0
    def _compose(self, out):
        trackName = self._geSource.getTrackName()
        if trackName is not None:
            name = ':'.join(self._geSource.getTrackName()).replace(' ', '_')
        else:
            name = None

        print >> out, 'track type=wiggle_0' + (' name=%s' % name
                                               if name is not None else '')

        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        span = self._geSource.getFixedLength()
        step = self._geSource.getFixedGapSize() + span

        isFixedStep = (tf.reprIsDense() or step > 1
                       or (step == 1 and span != 1))

        for brt, geList in iterateOverBRTuplesWithContainedGEs(self._geSource):
            if len(geList) == 0:
                continue

            if isFixedStep:
                self._composeFixedStepDeclarationLine(out, brt.region, step,
                                                      span)
            else:
                curChr, curSpan = self._composeVariableStepDeclarationLine(
                    out, geList[0])

            for i, ge in enumerate(geList):
                if i==0 and tf.isDense() and tf.isInterval() and \
                    self._geSource.addsStartElementToDenseIntervals():
                    continue

                val = self._commonFormatNumberVal(ge.val)

                if isFixedStep:
                    cols = [val]
                else:
                    if ge.chr != curChr or self._getVariableSpan(
                            ge) != curSpan:
                        curChr, curSpan = self._composeVariableStepDeclarationLine(
                            out, ge)
                    cols = [str(ge.start + 1), val]

                print >> out, '\t'.join([str(x) for x in cols])
示例#24
0
    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
示例#25
0
    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(
                        trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
示例#26
0
    def _compose(self, out):
        trackName = self._geSource.getTrackName()
        if trackName is not None:
            name = ':'.join(self._geSource.getTrackName()).replace(' ','_')
        else:
            name = None
        
        print >>out, 'track type=wiggle_0' + (' name=%s' % name if name is not None else '')

        tf = TrackFormat.createInstanceFromGeSource(self._geSource)
        span = self._geSource.getFixedLength()
        step = self._geSource.getFixedGapSize() + span
        
        isFixedStep = (tf.reprIsDense() or step > 1 or (step == 1 and span != 1))
        
        for brt, geList in iterateOverBRTuplesWithContainedGEs(self._geSource):
            if len(geList) == 0:
                continue
            
            if isFixedStep:
                self._composeFixedStepDeclarationLine(out, brt.region, step, span)
            else:
                curChr, curSpan = self._composeVariableStepDeclarationLine(out, geList[0])
            
            for i,ge in enumerate(geList):
                if i==0 and tf.isDense() and tf.isInterval() and \
                    self._geSource.addsStartElementToDenseIntervals():
                    continue
                
                val = self._commonFormatNumberVal(ge.val)
                
                if isFixedStep:
                    cols = [val]
                else:
                    if ge.chr != curChr or self._getVariableSpan(ge) != curSpan:
                        curChr, curSpan = self._composeVariableStepDeclarationLine(out, ge)
                    cols = [str(ge.start+1), val]
                
                print >>out, '\t'.join([str(x) for x in cols])
示例#27
0
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        assert len(region) == 1

        from collections import OrderedDict
        from gtrackcore.track.memmap.CommonMemmapFunctions import findEmptyVal
        from gtrackcore.track.core.TrackView import TrackView
        import numpy as np

        geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
            self.trackName, region.genome)
        prefixList = geSource.getPrefixList()
        valDataType = geSource.getValDataType()
        valDim = geSource.getValDim()
        weightDataType = geSource.getEdgeWeightDataType()
        weightDim = geSource.getEdgeWeightDim()

        startList, endList, valList, strandList, idList, edgesList, weightsList = [
            None
        ] * 7
        extraLists = OrderedDict()

        tf = TrackFormat.createInstanceFromPrefixList(prefixList, valDataType, valDim, \
                                                      weightDataType, weightDim)
        if allowOverlaps and (tf.isDense()
                              or geSource.hasNoOverlappingElements()):
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(tf) + ' does not satisfy ' + str(self._trackFormatReq))

        denseAndInterval = tf.isDense() and tf.isInterval()
        numEls = 2 if denseAndInterval else 1

        if valDataType == 'S':
            valDataType = 'S2'
        if weightDataType == 'S':
            weightDataType = 'S2'

        for prefix in prefixList:
            if prefix == 'start':
                startList = np.array([-1], dtype='int32')
            elif prefix == 'end':
                if denseAndInterval:
                    endList = np.array([0, 1], dtype='int32')
                else:
                    endList = np.array([0], dtype='int32')
            elif prefix == 'val':
                valList = np.array([findEmptyVal(valDataType)] * valDim * numEls, \
                                   dtype=valDataType).reshape((numEls, valDim) if valDim > 1 else numEls)
            elif prefix == 'strand':
                strandList = np.array([1] * numEls, dtype='int8')
            elif prefix == 'id':
                idList = np.array([''] * numEls, dtype='S1')
            elif prefix == 'edges':
                edgesList = np.array([['']] * numEls, dtype='S1')
            elif prefix == 'weights':
                weightsList = np.array([[[findEmptyVal(weightDataType)]]] * weightDim * numEls, \
                                       dtype=weightDataType).reshape((numEls, 1, weightDim) if weightDim > 1 else (numEls, 1))
            else:
                extraLists[prefix] = np.array([''] * numEls, dtype='S1')

        return TrackView(region, startList, endList, valList, strandList,
                         idList, edgesList, weightsList, borderHandling,
                         allowOverlaps, extraLists)
示例#28
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all((0 <= ge.val <= 1000) for ge in self._geSource)
 def getTrackFormat(self):
     return TrackFormat.createInstanceFromPrefixList(self._prefixList, \
                                                     self._valDataType, \
                                                     self._valDim, \
                                                     self._weightDataType, \
                                                     self._weightDim)
示例#30
0
 def _assertIsCompatibleWith(self, tfReq, reqList):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id, edges, weights in [(None, None, None),
                                                ([], None, None),
                                                ([], [], None),
                                                ([], [], [])]:
                         for extra in [None, {'a': [], 'b': []}]:
                             if [] in [start, end, val]:
                                 tf = TrackFormat(start, end, val, strand,
                                                  id, edges, weights, extra)
                                 propList = [tf.isDense(), tf.isValued(), tf.isInterval(), tf.isLinked(), tf.hasStrand(), tf.hasId(), tf.isWeighted(), tf.hasExtra(), \
                                             tf.getValTypeName() if tf.getValTypeName() != '' else False, \
                                             tf.getWeightTypeName() if tf.getWeightTypeName() != '' else False, \
                                             tf.getExtraNames() if tf.getExtraNames() != [] else False]
                                 isCompatible = (not False in [
                                     (r == None or r == p)
                                     for r, p in zip(reqList, propList)
                                 ])
                                 self.assertEqual(
                                     isCompatible,
                                     tfReq.isCompatibleWith(tf))
示例#31
0
 def _init(self):
     self._allValsAreBedVals = False
     tf = TrackFormat.createInstanceFromGeSource(self._geSource)
     if tf.getValTypeName() == 'Number (integer)':
         self._allValsAreBedVals = all(
             (0 <= ge.val <= 1000) for ge in self._geSource)
示例#32
0
 def getTrackFormat(self):
     return TrackFormat.createInstanceFromPrefixList(self._prefixList, \
                                                     self._valDataType, \
                                                     self._valDim, \
                                                     self._weightDataType, \
                                                     self._weightDim)
示例#33
0
class TrackView(object):
    def _handlePointsAndPartitions(self):
        if self.trackFormat.isDense() and not self.trackFormat.reprIsDense():
            self._startList = self._endList[:-1]
            self._endList = self._endList[1:]
            if self._valList != None:
                self._valList = self._valList[1:]
            if self._strandList != None:
                self._strandList = self._strandList[1:]
            if self._idList != None:
                self._idList = self._idList[1:]
            if self._edgesList != None:
                self._edgesList = self._edgesList[1:]
            if self._weightsList != None:
                self._weightsList = self._weightsList[1:]
            for key, extraList in self._extraLists.items():
                if extraList != None:
                    self._extraLists[key] = extraList[1:]
        if not self.trackFormat.isDense() and not self.trackFormat.isInterval():
            self._endList = VirtualPointEnd(self._startList)

    def __init__(self, genomeAnchor, startList, endList, valList, strandList, idList, edgesList, \
                 weightsList, borderHandling, allowOverlaps, extraLists=OrderedDict()):
        assert startList!=None or endList!=None or valList!=None or edgesList!=None
        assert borderHandling in ['crop']

        self.genomeAnchor = genomeAnchor.getCopy()
        self.trackFormat = TrackFormat(startList, endList, valList, strandList, idList, edgesList, weightsList, extraLists)
        self.borderHandling = borderHandling
        self.allowOverlaps = allowOverlaps

        self._trackElement = TrackElement(self)
        #self._bpLevelArray = None

        self._startList = startList
        self._endList = endList
        self._valList = valList
        self._strandList = strandList
        self._idList = idList
        self._edgesList = edgesList
        self._weightsList = weightsList
        self._extraLists = copy(extraLists)

        self._handlePointsAndPartitions()

        if self._startList is None:
            self._trackElement.start = noneFunc
        if self._endList is None:
            self._trackElement.end = noneFunc
        if self._valList is None:
            self._trackElement.val = noneFunc
        if self._strandList is None:
            self._trackElement.strand = noneFunc
        if self._idList is None:
            self._trackElement.id = noneFunc
        if self._edgesList is None:
            self._trackElement.edges = noneFunc
        if self._weightsList is None:
            self._trackElement.weights = noneFunc

        self._updateNumListElements()

        for i, list in enumerate([self._startList, self._endList, self._valList, self._strandList, self._idList, self._edgesList, self._weightsList] \
            + [extraList for extraList in self._extraLists.values()]):
                assert list is None or len(list) == self._numListElements, 'List (%s): ' % i + str(list) + ' (expected %s elements, found %s)' % (self._numListElements, len(list))

    def __iter__(self):
        self._trackElement._index = -1
        return self

    def _updateNumListElements(self):
        ""
        self._numListElements = self._computeNumListElements()

        if self.allowOverlaps and self._numListElements > 0:
            self._numIterElements = self._computeNumIterElements()
        else:
            self._numIterElements = self._numListElements

    def _computeNumListElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                return len(list)
        raise ShouldNotOccurError

    def _computeNumIterElements(self):
        for list in [self._startList, self._endList, self._valList, self._edgesList]:
            if list is not None:
                if isinstance(list, numpy.ndarray):
                    return len(self._removeBlindPassengersFromNumpyArray(list))
                else:
                    return sum(1 for x in self)
        raise ShouldNotOccurError

    def __len__(self):
        ""
        return self._bpSize()

    def getNumElements(self):
        return self._numIterElements

    def _bpSize(self):
        return len(self.genomeAnchor)

    def next(self):
        self._trackElement._index += 1

        #To remove any blind passengers - segments entirely in front of genomeanchor,
        # but sorted after a larger segment crossing the border
        if self.allowOverlaps and not self.trackFormat.reprIsDense():
            while self._trackElement._index < self._numListElements and self._endList[self._trackElement._index] <= self.genomeAnchor.start: #self._trackElement.end() <= 0:
                self._trackElement._index += 1

        if self._trackElement._index < self._numListElements:
            return self._trackElement
        else:
            raise StopIteration

    def _findLeftIndex(self):
        leftIndex = 0
        #remove track elements entirely to the left of the anchor
        while leftIndex < len(self._endList) and self._endList[leftIndex] <= self.genomeAnchor.start:
            leftIndex += 1
        return leftIndex

    def _findRightIndex(self):
        rightIndex = self._numListElements
        while rightIndex > 0 and self._startList[rightIndex-1] >= self.genomeAnchor.end:
            rightIndex -= 1
        return rightIndex

    def sliceElementsAccordingToGenomeAnchor(self):
        assert( not self.trackFormat.reprIsDense() )
        self._doScatteredSlicing()

    def _doScatteredSlicing(self):
        leftIndex = self._findLeftIndex()
        rightIndex = self._findRightIndex()

        if self._bpSize() == 0:
            rightIndex = leftIndex

        self._startList = self._startList[leftIndex:rightIndex]
        self._endList = self._endList[leftIndex:rightIndex]

        if self._valList != None:
            self._valList = self._valList[leftIndex:rightIndex]
        if self._strandList != None:
            self._strandList = self._strandList[leftIndex:rightIndex]
        if self._idList != None:
            self._idList = self._idList[leftIndex:rightIndex]
        if self._edgesList != None:
            self._edgesList = self._edgesList[leftIndex:rightIndex]
        if self._weightsList != None:
            self._weightsList = self._weightsList[leftIndex:rightIndex]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[leftIndex:rightIndex]
        self._updateNumListElements()

    def _doDenseSlicing(self, i, j):
        if self._valList != None:
            self._valList = self._valList[i:j]
        if self._strandList != None:
            self._strandList = self._strandList[i:j]
        if self._idList != None:
            self._idList = self._idList[i:j]
        if self._edgesList != None:
            self._edgesList = self._edgesList[i:j]
        if self._weightsList != None:
            self._weightsList = self._weightsList[i:j]
        for key, extraList in self._extraLists.items():
            self._extraLists[key] = extraList[i:j]
        self._updateNumListElements()

    def __getslice__(self, i, j):
        slicedTV = TrackView(self.genomeAnchor, self._startList, self._endList, \
                             self._valList, self._strandList, self._idList, \
                             self._edgesList, self._weightsList, \
                             self.borderHandling, self.allowOverlaps, \
                             extraLists=self._extraLists)
        slicedTV.trackFormat = self.trackFormat

        slicedTV.genomeAnchor.start += i
        if j>=0:
            try:
                slicedTV.genomeAnchor.end = min(self.genomeAnchor.end, self.genomeAnchor.start + j)
            except FloatingPointError: # Caused by trackView[:] with self.genomeAnchor.start > 0
                slicedTV.genomeAnchor.end = self.genomeAnchor.end
        if j<0:
            slicedTV.genomeAnchor.end += j

        if self.trackFormat.reprIsDense():
            slicedTV._doDenseSlicing(i,j)
        else:
            slicedTV._doScatteredSlicing()
        return slicedTV

    def _getBpLevelModificationArray(self, indexes, vals):
        bpLevelMod = numpy.bincount(indexes, vals)
        origLen = len(bpLevelMod)
        bpLevelMod.resize(self._bpSize()+1)
        bpLevelMod[origLen:] = 0
        return bpLevelMod

    def _commonGetBpLevelArray(self, vals):
        if self.trackFormat.reprIsDense():
            if self.allowOverlaps:
                raise ShouldNotOccurError()
            return vals
        else:
            bpLevelArray = numpy.zeros(self._bpSize()+1)
            numElements = self.getNumElements()
            if numElements > 0:
                bpLevelArray += self._getBpLevelModificationArray(self.startsAsNumpyArray(), vals)
                bpLevelArray -= self._getBpLevelModificationArray(self.endsAsNumpyArray(), vals)
                bpLevelArray = bpLevelArray.cumsum()
            return bpLevelArray[:-1]

    def getBinaryBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='bool8')

    def getCoverageBpLevelArray(self):
        vals = numpy.ones(self.getNumElements(), dtype='int32')
        return numpy.array(self._commonGetBpLevelArray(vals), dtype='int32')

    def getValueBpLevelArray(self, voidValue=0):
        '''
        Creates a bp-level function of any valued track. In case of scattered tracks,
        uncovered aras are filled with voidValue (which would typically be set to 0 or numpy.nan).
        In the case of overlapping regions, the values are added.'''

        assert self.trackFormat.isValued('number'), self.trackFormat
        vals = self.valsAsNumpyArray()
        bpLevelArray = numpy.array(self._commonGetBpLevelArray(vals), dtype=vals.dtype)
        if voidValue != 0:
            bpLevelArray[~self.getBinaryBpLevelArray()] = voidValue
        return bpLevelArray

    def _removeBlindPassengersFromNumpyArray(self, numpyArray):
        '''
        To remove any blind passengers - segments entirely in front of genomeanchor,
        but sorted after a larger segment crossing the border.
        '''
        if self.allowOverlaps and len(numpyArray) > 0:
            numpyArray = numpyArray[numpy.where(self._endList > self.genomeAnchor.start)]
        return numpyArray

    def _commonAsNumpyArray(self, numpyArray, numpyArrayModMethod, name):
        assert(self.borderHandling in ['crop'])
        if numpyArray is None:
            return None

        numpyArray = self._removeBlindPassengersFromNumpyArray(numpyArray)

        if numpyArrayModMethod is not None:
            return numpyArrayModMethod(numpyArray)
        else:
            return numpyArray

    def startsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._startList, self._startListModMethod, 'starts')

    def _startListModMethod(self, startList):
        return numpy.maximum(startList - self.genomeAnchor.start, \
                             numpy.zeros(len(startList), dtype='int32'))

    def endsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._endList, self._endListModMethod, 'ends')

    def _endListModMethod(self, endList):
        return numpy.minimum(endList - self.genomeAnchor.start, \
                             numpy.zeros(len(endList), dtype='int32') + len(self.genomeAnchor))

    def valsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._valList, None, 'vals')

    def strandsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._strandList, None, 'strands')

    def idsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._idList, None, 'ids')

    def edgesAsNumpyArray(self):
        return self._commonAsNumpyArray(self._edgesList, None, 'edges')

    def weightsAsNumpyArray(self):
        return self._commonAsNumpyArray(self._weightsList, None, 'weights')

    def extrasAsNumpyArray(self, key):
        assert self.hasExtra(key)
        from functools import partial
        return self._commonAsNumpyArray(self._extraLists[key], None, 'extras')

    def allExtrasAsDictOfNumpyArrays(self):
        return OrderedDict([(key,self.extrasAsNumpyArray(key)) for key in self._extraLists])

    def hasExtra(self, key):
        return key in self._extraLists
示例#34
0
            testFn = self._writeTestFile(case)
            print open(testFn).read()
            print

            sortedContents = sortGtrackFileAndReturnContents(
                testFn, case.genome)
            print sortedContents

            sourceClass = GenomeElementSource if case.sourceClass is None else case.sourceClass
            forPreProcessor = True if case.sourceClass is None else False
            sortedGeSource = GEDependentAttributesHolder(sourceClass('sortedFile.gtrack', case.genome, \
                                                                     forPreProcessor=forPreProcessor, \
                                                                     printWarnings=False, \
                                                                     strToUseInsteadOfFn=sortedContents))

            reprIsDense = TrackFormat.createInstanceFromGeSource(
                sortedGeSource).reprIsDense()

            if not reprIsDense:
                self.assertEquals(sorted(case.assertElementList),
                                  [ge for ge in sortedGeSource])
            else:
                for ge in sortedGeSource:
                    pass

            self.assertEquals(
                sorted(case.boundingRegionsAssertList),
                [br for br in sortedGeSource.getBoundingRegionTuples()])

    def runTest(self):
        pass
示例#35
0
 def _assertIsCompatibleWith(self, tfReq, reqList):
     for start in [None, []]:
         for end in [None, []]:
             for val in [None, []]:
                 for strand in [None, []]:
                     for id, edges, weights in [(None, None, None), ([], None, None), ([], [], None), ([], [], [])]:
                         for extra in [None, {"a": [], "b": []}]:
                             if [] in [start, end, val]:
                                 tf = TrackFormat(start, end, val, strand, id, edges, weights, extra)
                                 propList = [
                                     tf.isDense(),
                                     tf.isValued(),
                                     tf.isInterval(),
                                     tf.isLinked(),
                                     tf.hasStrand(),
                                     tf.hasId(),
                                     tf.isWeighted(),
                                     tf.hasExtra(),
                                     tf.getValTypeName() if tf.getValTypeName() != "" else False,
                                     tf.getWeightTypeName() if tf.getWeightTypeName() != "" else False,
                                     tf.getExtraNames() if tf.getExtraNames() != [] else False,
                                 ]
                                 isCompatible = not False in [
                                     (r == None or r == p) for r, p in zip(reqList, propList)
                                 ]
                                 self.assertEqual(isCompatible, tfReq.isCompatibleWith(tf))