def _createPreProcFiles(self): geSource = self._geSourceManager.getGESource() genome = geSource.genome collector = PreProcMetaDataCollector(genome, self._trackName) collector.updateMetaDataForFinalization(geSource.getFileSuffix(), geSource.getPrefixList(), \ geSource.getValDataType(), geSource.getValDim(), \ geSource.getEdgeWeightDataType(), geSource.getEdgeWeightDim(), \ geSource.hasUndirectedEdges(), geSource.getVersion(), PreProcessUtils.constructId(geSource), \ self._geSourceManager.getNumElements(), \ self._geSourceManager.getBoundingRegionTuples(), \ self._geSourceManager.getValCategories(), \ self._geSourceManager.getEdgeWeightCategories(), \ self._allowOverlaps) if self._geSourceManager.getNumElements() == 0: return if self._mode != 'Real': for ge in geSource: pass return output = OutputManager(genome, self._trackName, self._allowOverlaps, self._geSourceManager) writeFunc = output.writeRawSlice if geSource.isSliceSource() else output.writeElement for ge in geSource: writeFunc(ge) collector.flagChrsAsPreProcessed(self._allowOverlaps, self._geSourceManager.getAllChrs()) output.close()
def _allGESourceManagers(self, trackName, allowOverlaps): collector = PreProcMetaDataCollector(self._genome, trackName) if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(True): for i in range(1): self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \ (' (allowOverlaps: %s)' % allowOverlaps) yield self._getGESourceManagerFromTrack(trackName) else: for geSource in self._allGESources(trackName): if allowOverlaps == True: tf = TrackFormat.createInstanceFromGeSource(geSource) if tf.isDense() or geSource.hasNoOverlappingElements(): return self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \ (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \ (' (allowOverlaps: %s)' % allowOverlaps) if PreProcessUtils.shouldPreProcessGESource(trackName, geSource, allowOverlaps): yield self._getGESourceManagerFromGESource(geSource)
def _allGESourceManagers(self, trackName, allowOverlaps): collector = PreProcMetaDataCollector(self._genome, trackName) if allowOverlaps == False and collector.overlapRuleHasBeenFinalized( True): for i in range(1): self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \ (' (allowOverlaps: %s)' % allowOverlaps) yield self._getGESourceManagerFromTrack(trackName) else: for geSource in self._allGESources(trackName): if allowOverlaps == True: tf = TrackFormat.createInstanceFromGeSource(geSource) if tf.isDense() or geSource.hasNoOverlappingElements(): return self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \ (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \ (' (allowOverlaps: %s)' % allowOverlaps) if PreProcessUtils.shouldPreProcessGESource( trackName, geSource, allowOverlaps): yield self._getGESourceManagerFromGESource(geSource)
def process(self): assert self._genome is not None, 'Error: genome must be specified when preprocessing tracks.' atLeastOneFinalized = False for trackName in self._allTrackNames(): assert trackName != [''] overlapRulesProcessedForTrackName = [] collector = PreProcMetaDataCollector(self._genome, trackName) try: trackName = self._renameTrackNameIfIllegal(trackName) for allowOverlaps in [True, False]: anyGeSourceManagers = False for geSourceManager in self._allGESourceManagers(trackName, allowOverlaps): anyGeSourceManagers = True # PreProcess if needed if self._shouldPreProcess(): PreProcessUtils.removeOutdatedPreProcessedFiles(self._genome, trackName, allowOverlaps, self._mode) if self._shouldPrintProcessMessages() and allowOverlaps not in overlapRulesProcessedForTrackName: self._printProcessTrackMessage(trackName, allowOverlaps) overlapRulesProcessedForTrackName.append(allowOverlaps) self._status = 'Trying to preprocess geSource...' geSourceJob = PreProcessGeSourceJob(trackName, geSourceManager, allowOverlaps, self._mode) anyWarnings = geSourceJob.process() if self._raiseIfAnyWarnings and anyWarnings and trackName not in self._warningTrackNames: self._warningTrackNames.append(trackName) collector.updatePreProcDirtyStatus(geSourceJob.hasModifiedData()) # Finalize overlapRule output if needed if anyGeSourceManagers and self._shouldFinalize() and collector.preProcIsDirty(): if self._mode == 'Real' and self._shouldMergeChrFolders(): self._status = 'Trying to combine chromosome vectors into combined vectors.' PreProcessUtils.createBoundingRegionShelve(self._genome, trackName, allowOverlaps) ChrMemmapFolderMerger.merge(self._genome, trackName, allowOverlaps) self._status = 'Trying to remove chromosome folders' PreProcessUtils.removeChrMemmapFolders(self._genome, trackName, allowOverlaps) self._status = 'Trying to check whether 3D data is correct' PreProcessUtils.checkIfEdgeIdsExist(self._genome, trackName, allowOverlaps) PreProcessUtils.checkUndirectedEdges(self._genome, trackName, allowOverlaps) PreProcessUtils.checkUndirectedEdges(self._genome, trackName, allowOverlaps) collector.markOverlapRuleAsFinalized(allowOverlaps) # Finalize track if needed if self._shouldFinalize(): if collector.preProcIsDirty(): self._status = 'Trying to finalize.' collector.finalize(self._username, self._shouldPrintProcessMessages()) if not atLeastOneFinalized: atLeastOneFinalized = True else: collector.removeEntry() except NotSupportedError, e: collector.removeEntry() if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=False) except Exception, e: collector.removeEntry() if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=True)
def process(self): assert self._genome is not None, 'Error: genome must be specified when preprocessing tracks.' atLeastOneFinalized = False for trackName in self._allTrackNames(): assert trackName != [''] overlapRulesProcessedForTrackName = [] collector = PreProcMetaDataCollector(self._genome, trackName) try: trackName = self._renameTrackNameIfIllegal(trackName) for allowOverlaps in [True, False]: anyGeSourceManagers = False for geSourceManager in self._allGESourceManagers( trackName, allowOverlaps): anyGeSourceManagers = True # PreProcess if needed if self._shouldPreProcess(): PreProcessUtils.removeOutdatedPreProcessedFiles( self._genome, trackName, allowOverlaps, self._mode) if self._shouldPrintProcessMessages( ) and allowOverlaps not in overlapRulesProcessedForTrackName: self._printProcessTrackMessage( trackName, allowOverlaps) overlapRulesProcessedForTrackName.append( allowOverlaps) self._status = 'Trying to preprocess geSource...' geSourceJob = PreProcessGeSourceJob( trackName, geSourceManager, allowOverlaps, self._mode) anyWarnings = geSourceJob.process() if self._raiseIfAnyWarnings and anyWarnings and trackName not in self._warningTrackNames: self._warningTrackNames.append(trackName) collector.updatePreProcDirtyStatus( geSourceJob.hasModifiedData()) # Finalize overlapRule output if needed if anyGeSourceManagers and self._shouldFinalize( ) and collector.preProcIsDirty(): if self._mode == 'Real' and self._shouldMergeChrFolders( ): self._status = 'Trying to combine chromosome vectors into combined vectors.' PreProcessUtils.createBoundingRegionShelve( self._genome, trackName, allowOverlaps) ChrMemmapFolderMerger.merge( self._genome, trackName, allowOverlaps) self._status = 'Trying to remove chromosome folders' PreProcessUtils.removeChrMemmapFolders( self._genome, trackName, allowOverlaps) self._status = 'Trying to check whether 3D data is correct' PreProcessUtils.checkIfEdgeIdsExist( self._genome, trackName, allowOverlaps) PreProcessUtils.checkUndirectedEdges( self._genome, trackName, allowOverlaps) PreProcessUtils.checkUndirectedEdges( self._genome, trackName, allowOverlaps) collector.markOverlapRuleAsFinalized(allowOverlaps) # Finalize track if needed if self._shouldFinalize(): if collector.preProcIsDirty(): self._status = 'Trying to finalize.' collector.finalize(self._username, self._shouldPrintProcessMessages()) if not atLeastOneFinalized: atLeastOneFinalized = True else: collector.removeEntry() except NotSupportedError, e: collector.removeEntry() if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=False) except Exception, e: collector.removeEntry() if self.PASS_ON_EXCEPTIONS: raise else: self._printExceptionMsg(e, trackName, Error=True)