Exemplo n.º 1
0
 def removeOutdatedPreProcessedFiles(genome, trackName, allowOverlaps, mode):
     collector = PreProcMetaDataCollector(genome, trackName)
     if PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps) and not \
         collector.hasRemovedPreProcFiles(allowOverlaps):
             dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
             
             assert dirPath.startswith(Config.PROCESSED_DATA_PATH), \
                 "Processed data path '%s' does not start with '%s'" % \
                 (dirPath, Config.PROCESSED_DATA_PATH)
             if mode == 'Real':
                 print 'Removing outdated preprocessed data: ', dirPath
                 for fn in os.listdir(dirPath):
                     fullFn = os.path.join(dirPath, fn)
                     if os.path.isfile(fullFn):
                         os.unlink(fullFn)
                     if os.path.isdir(fullFn):
                         if PreProcessUtils._isOldTypeChromDirectory(fullFn, genome):
                             shutil.rmtree(fullFn)
             else:
                 print 'Would now have removed outdated preprocessed data if real run: ', dirPath
             
             collector.updateRemovedPreProcFilesFlag(allowOverlaps, True)
     
     if mode == 'Real':
         ti = TrackInfo(genome, trackName)
         ti.resetTimeOfPreProcessing()
Exemplo n.º 2
0
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
Exemplo n.º 3
0
def modifyTnRecord(genome, oldTn, newTn, verbose):
    trackInfo = TrackInfo(genome, oldTn)
    assert trackInfo.trackName == oldTn
    assert trackInfo.timeOfPreProcessing is not None, 'ERROR: trackInfo-object not complete for TN (is this track preprocessed?): ' + str(oldTn)
    #if trackInfo.timeOfPreProcessing is None:
        #print 'WARNING: timeOfPreProcessing is None for: ',oldTn
        
    trackInfo.trackName = newTn
    if not ONLY_SIMULATION:
        trackInfo.store()
        if verbose:
            print '(Storing track-info with new tn: %s)' % str(newTn)
    else:
        if verbose:
            print 'Would now store track-info with new tn: %s' % str(newTn)
Exemplo n.º 4
0
 def constructId(geSource):
     from gtrackcore_memmap.preprocess.PreProcessTracksJob import PreProcessTracksJob
     if geSource.hasOrigFile():
         origPath = os.path.dirname(geSource.getFileName()) if not geSource.isExternal() else geSource.getFileName()
         return TrackInfo.constructIdFromPath(geSource.getGenome(), origPath, \
                                              geSource.getVersion(), PreProcessTracksJob.VERSION)
     else:
         return geSource.getId()
Exemplo n.º 5
0
 def shouldPreProcessGESource(trackName, geSource, allowOverlaps):
     genome = geSource.getGenome()
     storedInfo = TrackInfo(genome, trackName)
     
     validFilesExist = PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps) and \
         storedInfo.isValid()
     
     if not geSource.hasOrigFile():
         return False if validFilesExist or geSource.isExternal() else True
     
     storedAsAccordingToGeSource = \
         (PreProcessUtils.constructId(geSource) == storedInfo.id and \
          geSource.getVersion() == storedInfo.preProcVersion)
     
     #from gtrackcore_memmap.application.LogSetup import logMessage
     #logMessage(geSource.getGenome())
     #logMessage(':'.join(trackName))
     #logMessage('%s %s %s %s %s' % (PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps), \
     #                               storedInfo.isValid(), \
     #                               geSource.hasOrigFile(), \
     #                               PreProcessUtils.constructId(geSource) == storedInfo.id, \
     #                               geSource.getVersion() == storedInfo.preProcVersion))
     
     return not (validFilesExist and storedAsAccordingToGeSource)
    def finalize(self, username, printMsg):
        ti = TrackInfo(self._genome, self._trackName)
        
        ti.fileType = self._fileSuffix
        trackFormat = self.getTrackFormat()
        ti.trackFormatName = trackFormat.getFormatName()
        ti.markType = trackFormat.getValTypeName()
        ti.weightType = trackFormat.getWeightTypeName()
        ti.undirectedEdges = self._undirectedEdges
        ti.preProcVersion = self._preProcVersion

        ti.origElCount = self._numElements[True]
        ti.clusteredElCount = self._numElements[False]
        
        if trackFormat.isDense() and trackFormat.isInterval():
            ti.origElCount -= len(self._boundingRegionTuples[True])
            ti.clusteredElCount -= len(self._boundingRegionTuples[False])

        if True in self._valCategories:
            ti.numValCategories = len(self._valCategories[True])
        
        if False in self._valCategories:
            ti.numClusteredValCategories = len(self._valCategories[False])

        if True in self._edgeWeightCategories:
            ti.numEdgeWeightCategories = len(self._edgeWeightCategories[True])
        
        ti.id = self._id
        ti.timeOfPreProcessing = datetime.datetime.now()
    
        ti.lastUpdatedBy = username
        if ti.hbContact == '':
            ti.hbContact = username
        
        ti.store()
        
        if printMsg:
            print "Finished preprocessing track '%s'." % ':'.join(self._trackName)
            print
        
        self.removeEntry()
Exemplo n.º 7
0
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     if ti.isValid():
         ti.subTrackCount = 1
         ti.store()