def preProcFilesExist(cls, genome, trackName, allowOverlaps): collector = PreProcMetaDataCollector(genome, trackName) preProcFilesExist = collector.preProcFilesExist(allowOverlaps) if preProcFilesExist is None: merged = cls.mergedPreProcFilesExist(genome, trackName, allowOverlaps) if merged: preProcFilesExist = True else: preProcFilesExist = cls.oldTypePreProcFilesExist( genome, trackName, allowOverlaps) collector.updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist, merged) return preProcFilesExist
def preProcFilesExist(genome, trackName, allowOverlaps): collector = PreProcMetaDataCollector(genome, trackName) preProcFilesExist = collector.preProcFilesExist(allowOverlaps) if preProcFilesExist is None: dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) if BoundingRegionShelve(genome, trackName, allowOverlaps).fileExists(): preProcFilesExist = True # any( fn.split('.')[0] in ['start', 'end', 'val', 'edges'] \ # for fn in os.listdir(dirPath) if os.path.isfile(os.path.join(dirPath, fn)) ) else: if os.path.exists(dirPath): preProcFilesExist = PreProcessUtils._hasOldTypeChromSubDirs(dirPath, genome) else: preProcFilesExist = False collector.updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist) return preProcFilesExist
def process(self): assert self._genome is not None, 'Error: genome must be specified when preprocessing tracks.' atLeastOneFinalized = False for trackName in self._allTrackNames(): assert trackName != [''] overlapRulesProcessedForTrackName = [] collector = PreProcMetaDataCollector(self._genome, trackName) try: trackName = self._renameTrackNameIfIllegal(trackName) for allowOverlaps in [True, False]: anyGeSourceManagers = False for geSourceManager in self._allGESourceManagers( trackName, allowOverlaps): anyGeSourceManagers = True # PreProcess if needed if self._shouldPreProcess(): PreProcessUtils.removeOutdatedPreProcessedFiles( self._genome, trackName, allowOverlaps, self._mode) if self._shouldPrintProcessMessages( ) and allowOverlaps not in overlapRulesProcessedForTrackName: self._printProcessTrackMessage( trackName, allowOverlaps) overlapRulesProcessedForTrackName.append( allowOverlaps) self._status = 'Trying to preprocess geSource...' geSourceJob = PreProcessGeSourceJob( trackName, geSourceManager, allowOverlaps, self._mode) anyWarnings = geSourceJob.process() if self._raiseIfAnyWarnings and anyWarnings and trackName not in self._warningTrackNames: self._warningTrackNames.append(trackName) collector.updatePreProcDirtyStatus( geSourceJob.hasModifiedData()) # Finalize overlapRule output if needed if anyGeSourceManagers and self._shouldFinalize( ) and collector.preProcIsDirty(): if self._mode == 'Real' and self._shouldMergeChrFolders( ): self._status = 'Trying to combine chromosome vectors into combined vectors.' PreProcessUtils.createBoundingRegionShelve( self._genome, trackName, allowOverlaps) ChrMemmapFolderMerger.merge( self._genome, trackName, allowOverlaps) self._status = 'Trying to remove chromosome folders' PreProcessUtils.removeChrMemmapFolders( self._genome, trackName, allowOverlaps) collector.updatePreProcFilesExistFlag( allowOverlaps, preProcFilesExist=True, merged=True) self._status = 'Trying to check whether 3D data is correct' PreProcessUtils.checkIfEdgeIdsExist( self._genome, trackName, allowOverlaps) PreProcessUtils.checkUndirectedEdges( self._genome, trackName, allowOverlaps) PreProcessUtils.checkUndirectedEdges( self._genome, trackName, allowOverlaps) collector.markOverlapRuleAsFinalized(allowOverlaps) # Finalize track if needed if self._shouldFinalize(): if collector.preProcIsDirty(): self._status = 'Trying to finalize.' collector.finalize(self._username, self._shouldPrintProcessMessages()) if not atLeastOneFinalized: atLeastOneFinalized = True else: collector.removeEntry() except NotSupportedError, e: collector.removeEntry() if DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS: raise_from( PreprocessWarning( self._addContextToExceptionMsg(e, trackName)), e) else: self._printExceptionMsg(e, trackName, Error=False) except Exception, e: collector.removeEntry() if DebugConfig.PASS_ON_PREPROCESS_EXCEPTIONS: raise_from( PreprocessError( self._addContextToExceptionMsg(e, trackName)), e) else: self._printExceptionMsg(e, trackName, Error=True)