Exemplo n.º 1
0
 def checkUndirectedEdges(genome, trackName, allowOverlaps):
     collector = TrackInfoDataCollector(genome, trackName)
     if not (collector.getTrackFormat().isLinked() and collector.hasUndirectedEdges()):
         return
     
     complementEdgeWeightDict = {}
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         
         ids = trackData['id']
         edges = trackData['edges']
         weights = trackData.get('weights')
         
         for i, id in enumerate(ids):
             edgesAttr = edges[i][edges[i] != '']
             weightsAttr = weights[i][edges[i] != ''] if weights is not None else None
             PreProcessUtils._adjustComplementaryEdgeWeightDict(complementEdgeWeightDict, id, edgesAttr, weightsAttr)
     
     if len(complementEdgeWeightDict) != 0:
             unmatchedPairs = []
             for toId in complementEdgeWeightDict:
                 for fromId in complementEdgeWeightDict[toId]:
                     unmatchedPairs.append((fromId, toId, complementEdgeWeightDict[toId][fromId]))
             raise InvalidFormatError("Error: All edges are not undirected. The following edges specifications " +\
                                      "are not matched by an opposite edge with equal weight:" + os.linesep +\
                                      os.linesep.join(["from '%s' to '%s'" % (fromId, toId) + \
                                                       (" with weight '%s'" % weight  if weight != '' else '') \
                                                       for fromId, toId, weight in unmatchedPairs]))
Exemplo n.º 2
0
    def _getNumTrackElements(self, trackName, chr, allowOverlaps):
        trackSource = TrackSource()
        trackData = trackSource.getTrackData(trackName, self.GENOME, chr,
                                             allowOverlaps)
        brShelve = trackData.boundingRegionShelve
        if brShelve:
            chrBoundingRegions = brShelve.getAllBoundingRegionsForChr(chr)
            numTrackElements = sum(
                self._getTrackView(trackName, br,
                                   allowOverlaps).getNumElements()
                for br in chrBoundingRegions)
            assert numTrackElements == brShelve.getTotalElementCountForChr(chr)
        else:  # chr-specific folders have not been merged
            numTrackElements = self._getNumTrackElementsFromMemmap(
                trackName, chr, allowOverlaps)

        return numTrackElements
Exemplo n.º 3
0
 def checkIfEdgeIdsExist(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     if not collector.getTrackFormat().isLinked():
         return
     
     uniqueIds = numpy.array([], dtype='S')
     uniqueEdgeIds = numpy.array([], dtype='S')
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         uniqueIds = numpy.unique(numpy.concatenate((uniqueIds, trackData['id'][:])))
         uniqueEdgeIds = numpy.unique(numpy.concatenate((uniqueEdgeIds, trackData['edges'][:].flatten())))
     
     uniqueIds = uniqueIds[uniqueIds != '']
     uniqueEdgeIds = uniqueEdgeIds[uniqueEdgeIds != '']
     
     unmatchedIds = set(uniqueEdgeIds) - set(uniqueIds)
     if len(unmatchedIds) > 0:
         raise InvalidFormatError("Error: the following ids specified in the 'edges' column do not exist in the dataset: " + ', '.join(sorted(unmatchedIds)))
Exemplo n.º 4
0
 def checkIfEdgeIdsExist(genome, trackName, allowOverlaps):
     collector = TrackInfoDataCollector(genome, trackName)
     if not collector.getTrackFormat().isLinked():
         return
     
     uniqueIds = numpy.array([], dtype='S')
     uniqueEdgeIds = numpy.array([], dtype='S')
     
     for chr in collector.getPreProcessedChrs(allowOverlaps):
         trackSource = TrackSource()
         trackData = trackSource.getTrackData(trackName, genome, chr, allowOverlaps)
         uniqueIds = numpy.unique(numpy.concatenate((uniqueIds, trackData['id'][:])))
         uniqueEdgeIds = numpy.unique(numpy.concatenate((uniqueEdgeIds, trackData['edges'][:].flatten())))
     
     uniqueIds = uniqueIds[uniqueIds != '']
     uniqueEdgeIds = uniqueEdgeIds[uniqueEdgeIds != '']
     
     unmatchedIds = set(uniqueEdgeIds) - set(uniqueIds)
     if len(unmatchedIds) > 0:
         raise InvalidFormatError("Error: the following ids specified in the 'edges' column do not exist in the dataset: " + ', '.join(sorted(unmatchedIds)))
Exemplo n.º 5
0
class Track(object):
    IS_MEMOIZABLE = True
    def __new__(cls, trackName):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)
    
    def __init__(self, trackName):
        self.trackName = trackName
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        
    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)
    
    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None) 
        assert(borderHandling is not None) 
        
        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)
        
        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)
        
        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))
        
        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))
    
    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:        
            self.formatConverters = [getFormatConverterByName(converterClassName)]
    
    def getUniqueKey(self, genome):
        assert self.formatConverters is not None and len(self.formatConverters) == 1, 'FC: '+str(self.formatConverters)
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )
        
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id
            
        return hash((tuple(self.trackName), self._trackId, getClassName(self.formatConverters[0]), \
                     self.formatConverters[0].VERSION, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
Exemplo n.º 6
0
class Track(object):
    IS_MEMOIZABLE = True
    
    def __new__(cls, trackName, trackTitle=None):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)

    def __init__(self, trackName, trackTitle=None):
        self.trackName = trackName
        self.trackTitle = trackTitle
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        self._randIndex = None

    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)

    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None)
        assert(borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))

    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:
            self.formatConverters = [getFormatConverterByName(converterClassName)]

    def getUniqueKey(self, genome):
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName),
                     self._trackId if self._trackId else '',
                     getClassName(self.formatConverters[0]) if self.formatConverters else '',
                     self.formatConverters[0].VERSION if self.formatConverters else '',
                     self._trackFormatReq.allowOverlaps() if self._trackFormatReq.allowOverlaps() else '',
                     self._trackFormatReq.borderHandling() if self._trackFormatReq.borderHandling() else ''))

    def resetTrackSource(self):
        self._trackSource = TrackSource()

    def setRandIndex(self, randIndex):
        pass #used only by TsBasedRandomTrack
Exemplo n.º 7
0
class Track(object):
    IS_MEMOIZABLE = True

    def __new__(cls, trackName, trackTitle=None):
        if trackName == [] or trackName is None:
            return None
        else:
            if ExternalTrackManager.isVirtualTrack(trackName):
                return VirtualMinimalTrack.__new__(VirtualMinimalTrack)
            else:
                return object.__new__(cls)

    def __init__(self, trackName, trackTitle=None):
        self.trackName = trackName
        self.trackTitle = trackTitle
        self._trackSource = TrackSource()
        self._trackViewLoader = TrackViewLoader()
        self._trackFormatReq = NeutralTrackFormatReq()
        self.formatConverters = None
        self._trackId = None
        self._hasBeenRead = False

    def _getRawTrackView(self, region, borderHandling, allowOverlaps):
        trackData = self._trackSource.getTrackData(self.trackName, region.genome, region.chr, allowOverlaps)
        return self._trackViewLoader.loadTrackView(trackData, region, borderHandling, allowOverlaps, self.trackName)

    def getTrackView(self, region):
        allowOverlaps = self._trackFormatReq.allowOverlaps()
        borderHandling = self._trackFormatReq.borderHandling()
        assert(allowOverlaps is not None)
        assert(borderHandling is not None)

        origTrackView = self._getRawTrackView(region, borderHandling, allowOverlaps)

        if self.formatConverters is None:
            self.formatConverters = getFormatConverters(origTrackView.trackFormat, self._trackFormatReq)

        self._hasBeenRead = True

        if self.formatConverters == []:
            raise IncompatibleTracksError(prettyPrintTrackName(self.trackName) + ' with format: '\
                                          + str(origTrackView.trackFormat) +
                                          ('(' + origTrackView.trackFormat._val + ')' if origTrackView.trackFormat._val else '') + \
                                          ' does not satisfy ' + str(self._trackFormatReq))

        if not self.formatConverters[0].canHandle(origTrackView.trackFormat, self._trackFormatReq):
            raise IncompatibleTracksError(getClassName(self.formatConverters[0]) +\
                                          ' does not support conversion from ' + str(origTrackView.trackFormat) + \
                                          ' to ' + str(self._trackFormatReq))
        return self.formatConverters[0].convert(origTrackView)

    def addFormatReq(self, requestedTrackFormat):
        prevFormatReq = self._trackFormatReq
        self._trackFormatReq = TrackFormatReq.merge(self._trackFormatReq, requestedTrackFormat)
        if self._trackFormatReq is None:
            raise IncompatibleTracksError(str(prevFormatReq ) + \
                                          ' is incompatible with additional ' + str(requestedTrackFormat))

    # TODO: track.formatConverters needs a complete overhaul. It is currently used:
    # 1) to link tracks with possible choices for track conversion
    # 2) to store the choice of format converter made by the user
    # 3) to manage the default choice of format converters [=always the first item in the list]
    # 4) to hold the currently selected format converter class until needed by getTrackView
    def setFormatConverter(self, converterClassName):
        assert( self.formatConverters is None )
        if converterClassName is not None:
            self.formatConverters = [getFormatConverterByName(converterClassName)]

    def hasBeenFlaggedAsRead(self):
        return self._hasBeenRead

    def getUniqueKey(self, genome):
        itemsToBeHashed = [tuple(self.trackName)]
        itemsToBeHashed.append(self._getTrackId(genome))

        if self._trackFormatReq is not None:
            if self._trackFormatReq.allowOverlaps() is not None:
                itemsToBeHashed.append(self._trackFormatReq.allowOverlaps())
            itemsToBeHashed.append(self._trackFormatReq.borderHandling())

        itemsToBeHashed.append(getClassName(self.formatConverters[0]) if self.formatConverters else '')
        # TODO: Move away from fixed VERSION, as these in practice are never updated. Also for statistics.
        itemsToBeHashed.append(self.formatConverters[0].VERSION if self.formatConverters else '')

        from config.DebugConfig import DebugConfig
        if DebugConfig.VERBOSE:
            from gold.application.LogSetup import logMessage
            logMessage('Unique key items for track "{}": '.format(self.trackName) +
                       ', '.join(str(_) for _ in itemsToBeHashed))

        return hash(tuple(itemsToBeHashed))

    def _getTrackId(self, genome):
        if not self._trackId:
            trackInfo = TrackInfo(genome, self.trackName)
            self._trackId = trackInfo.id
        return self._trackId if self._trackId else ''

    def resetTrackSource(self):
        self._trackSource = TrackSource()