def main(self): # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. gc.disable() parseOptions() self._algorithm = self._algorithmByName(options.algorithm) self._setupLogging() random.seed(42) logging.info("h5py version: %s" % h5py.version.version) logging.info("hdf5 version: %s" % h5py.version.hdf5_version) logging.info("ConsensusCore version: %s" % (consensusCoreVersion() or "ConsensusCore unavailable")) logging.info("Starting.") atexit.register(self._cleanup) if options.doProfiling: self._makeTemporaryDirectory() with AlignmentSet(options.inputFilename) as peekFile: if not peekFile.isCmpH5 and not peekFile.hasPbi: logging.warn("'fancyChunking' not yet available for BAM " "files without accompanying .pbi files, " "disabling") options.fancyChunking = False logging.info("Peeking at file %s" % options.inputFilename) logging.info("Input data: numAlnHits=%d" % len(peekFile)) resolveOptions(peekFile) self._loadReference(peekFile) self._checkFileCompatibility(peekFile) self._configureAlgorithm(options, peekFile) options.disableHdf5ChunkCache = True #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile) #if options.disableHdf5ChunkCache: # logging.info("Will disable HDF5 chunk cache (large number of datasets)") #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count()) if options.dumpEvidence: self._setupEvidenceDumpDirectory(options.evidenceDirectory) self._launchSlaves() self._readCmpH5Input() monitoringThread = threading.Thread(target=monitorSlaves, args=(self,)) monitoringThread.start() try: if options.doProfiling: cProfile.runctx("self._mainLoop()", globals=globals(), locals=locals(), filename=os.path.join(options.temporaryDirectory, "profile-main.out")) elif options.doDebugging: if not options.threaded: die("Debugging only works with -T (threaded) mode") logging.info("PID: %d", os.getpid()) import ipdb with ipdb.launch_ipdb_on_exception(): self._mainLoop() else: self._mainLoop() except: why = traceback.format_exc() self.abortWork(why) monitoringThread.join() if self._aborting: logging.error("Aborting") return -1 else: logging.info("Finished.") if options.doProfiling: self._printProfiles() # close h5 file. self._inCmpH5.close() return 0
def main(self): # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. gc.disable() self._algorithm = self._algorithmByName(options.algorithm) self._setupLogging() random.seed(42) logging.info("h5py version: %s" % h5py.version.version) logging.info("hdf5 version: %s" % h5py.version.hdf5_version) logging.info("ConsensusCore version: %s" % (consensusCoreVersion() or "ConsensusCore unavailable")) logging.info("ConsensusCore2 version: %s" % (consensusCore2Version() or "ConsensusCore2 unavailable")) logging.info("Starting.") atexit.register(self._cleanup) if options.doProfiling: self._makeTemporaryDirectory() with AlignmentSet(options.inputFilename) as peekFile: if options.algorithm == "arrow" and peekFile.isCmpH5: die("Arrow does not support CmpH5 files") if not peekFile.isCmpH5 and not peekFile.hasPbi: die("Genomic Consensus only works with cmp.h5 files and BAM " "files with accompanying .pbi files") logging.info("Peeking at file %s" % options.inputFilename) logging.info("Input data: numAlnHits=%d" % len(peekFile)) resolveOptions(peekFile) self._loadReference(peekFile) self._checkFileCompatibility(peekFile) self._configureAlgorithm(options, peekFile) options.disableHdf5ChunkCache = True #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile) #if options.disableHdf5ChunkCache: # logging.info("Will disable HDF5 chunk cache (large number of datasets)") #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count()) if options.dumpEvidence: self._setupEvidenceDumpDirectory(options.evidenceDirectory) self._launchSlaves() self._readAlignmentInput() monitoringThread = threading.Thread(target=monitorSlaves, args=(self, )) monitoringThread.start() try: if options.doProfiling: cProfile.runctx("self._mainLoop()", globals=globals(), locals=locals(), filename=os.path.join( options.temporaryDirectory, "profile-main.out")) elif options.debug: if not options.threaded: die("Debugging only works with -T (threaded) mode") logging.info("PID: %d", os.getpid()) import ipdb with ipdb.launch_ipdb_on_exception(): self._mainLoop() else: self._mainLoop() except: why = traceback.format_exc() self.abortWork(why) monitoringThread.join() if self._aborting: logging.error("Aborting") return -1 else: logging.info("Finished.") if options.doProfiling: self._printProfiles() # close h5 file. self._inAlnFile.close() return 0
def main(self): # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. gc.disable() parseOptions() self._algorithm = self._algorithmByName(options.algorithm) self._setupLogging() random.seed(42) logging.info("h5py version: %s" % h5py.version.version) logging.info("hdf5 version: %s" % h5py.version.hdf5_version) logging.info("ConsensusCore version: %s" % (consensusCoreVersion() or "ConsensusCore unavailable")) logging.info("Starting.") atexit.register(self._cleanup) if options.doProfiling: self._makeTemporaryDirectory() if options.usingBam: logging.warn( "'fancyChunking' not yet available for BAM, disabling") options.fancyChunking = False # Peek at the bam file to build tables with BamReader(options.inputFilename) as peekCmpH5: logging.info("Peeking at BAM file %s" % options.inputFilename) logging.info("Input BAM data: numAlnHits=%d" % len(peekCmpH5)) resolveOptions(peekCmpH5) self._loadReference(peekCmpH5) self._checkFileCompatibility(peekCmpH5) self._configureAlgorithm(options, peekCmpH5) else: # We need to peek at the cmp.h5 file to build the The # refGroupId<->refGroupFullName mapping, and to determine # whether the selected algorithm parameters (Quiver) are # compatible with the data. But we then have to close the # file, and let the "real" open happen after the fork. with CmpH5Reader(options.inputFilename) as peekCmpH5: logging.info("Peeking at CmpH5 file %s" % options.inputFilename) logging.info("Input CmpH5 data: numAlnHits=%d" % len(peekCmpH5)) resolveOptions(peekCmpH5) self._loadReference(peekCmpH5) self._checkFileCompatibility(peekCmpH5) self._configureAlgorithm(options, peekCmpH5) options.disableHdf5ChunkCache = self._shouldDisableChunkCache( peekCmpH5) if options.disableHdf5ChunkCache: logging.info( "Will disable HDF5 chunk cache (large number of datasets)" ) logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count()) if options.dumpEvidence: self._setupEvidenceDumpDirectory(options.evidenceDirectory) self._launchSlaves() self._readCmpH5Input() monitoringThread = threading.Thread(target=monitorSlaves, args=(self, )) monitoringThread.start() try: if options.doProfiling: cProfile.runctx("self._mainLoop()", globals=globals(), locals=locals(), filename=os.path.join( options.temporaryDirectory, "profile-main.out")) elif options.doDebugging: logging.info("PID: %d", os.getpid()) try: import ipdb as pdb except: import pdb return pdb.runeval("self._mainLoop()", globals(), locals()) else: self._mainLoop() except: why = traceback.format_exc() self.abortWork(why) monitoringThread.join() if self._aborting: logging.error("Aborting") return -1 else: logging.info("Finished.") if options.doProfiling: self._printProfiles() # close h5 file. self._inCmpH5.close() return 0
def main(self): # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. gc.disable() parseOptions() self._algorithm = self._algorithmByName(options.algorithm) self._setupLogging() random.seed(42) logging.info("h5py version: %s" % h5py.version.version) logging.info("hdf5 version: %s" % h5py.version.hdf5_version) logging.info("ConsensusCore version: %s" % (consensusCoreVersion() or "ConsensusCore unavailable")) logging.info("Starting.") atexit.register(self._cleanup) if options.doProfiling: self._makeTemporaryDirectory() if options.usingBam: logging.warn("'fancyChunking' not yet available for BAM, disabling") options.fancyChunking = False # Peek at the bam file to build tables with BamReader(options.inputFilename) as peekCmpH5: logging.info("Peeking at BAM file %s" % options.inputFilename) logging.info("Input BAM data: numAlnHits=%d" % len(peekCmpH5)) resolveOptions(peekCmpH5) self._loadReference(peekCmpH5) self._checkFileCompatibility(peekCmpH5) self._configureAlgorithm(options, peekCmpH5) else: # We need to peek at the cmp.h5 file to build the The # refGroupId<->refGroupFullName mapping, and to determine # whether the selected algorithm parameters (Quiver) are # compatible with the data. But we then have to close the # file, and let the "real" open happen after the fork. with CmpH5Reader(options.inputFilename) as peekCmpH5: logging.info("Peeking at CmpH5 file %s" % options.inputFilename) logging.info("Input CmpH5 data: numAlnHits=%d" % len(peekCmpH5)) resolveOptions(peekCmpH5) self._loadReference(peekCmpH5) self._checkFileCompatibility(peekCmpH5) self._configureAlgorithm(options, peekCmpH5) options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekCmpH5) if options.disableHdf5ChunkCache: logging.info("Will disable HDF5 chunk cache (large number of datasets)") logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count()) if options.dumpEvidence: self._setupEvidenceDumpDirectory(options.evidenceDirectory) self._launchSlaves() self._readCmpH5Input() monitoringThread = threading.Thread(target=monitorSlaves, args=(self,)) monitoringThread.start() try: if options.doProfiling: cProfile.runctx("self._mainLoop()", globals=globals(), locals=locals(), filename=os.path.join(options.temporaryDirectory, "profile-main.out")) elif options.doDebugging: logging.info("PID: %d", os.getpid()) try: import ipdb as pdb except: import pdb return pdb.runeval("self._mainLoop()", globals(), locals()) else: self._mainLoop() except: why = traceback.format_exc() self.abortWork(why) monitoringThread.join() if self._aborting: logging.error("Aborting") return -1 else: logging.info("Finished.") if options.doProfiling: self._printProfiles() # close h5 file. self._inCmpH5.close() return 0
def main(self): # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. gc.disable() random.seed(42) if options.pdb or options.pdbAtStartup: print("Process ID: %d" % os.getpid(), file=sys.stderr) try: import ipdb except ImportError: die("Debugging options require 'ipdb' package installed.") if not options.threaded: die("Debugging only works with -T (threaded) mode") if options.pdbAtStartup: ipdb.set_trace() logging.info("ConsensusCore version: %s" % (consensusCoreVersion() or "ConsensusCore unavailable")) logging.info("ConsensusCore2 version: %s" % (consensusCore2Version() or "ConsensusCore2 unavailable")) logging.info("Starting.") atexit.register(self._cleanup) if options.doProfiling: self._makeTemporaryDirectory() with AlignmentSet(options.inputFilename) as peekFile: if options.algorithm == "arrow" and peekFile.isCmpH5: die("Arrow does not support CmpH5 files") if not peekFile.isCmpH5 and not peekFile.hasPbi: die("Genomic Consensus only works with cmp.h5 files and BAM " "files with accompanying .pbi files") logging.info("Peeking at file %s" % options.inputFilename) logging.info("Input data: numAlnHits=%d" % len(peekFile)) resolveOptions(peekFile) self._loadReference(peekFile) self._checkFileCompatibility(peekFile) self._algorithm = self._algorithmByName(options.algorithm, peekFile) self._configureAlgorithm(options, peekFile) options.disableHdf5ChunkCache = True #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile) #if options.disableHdf5ChunkCache: # logging.info("Will disable HDF5 chunk cache (large number of datasets)") self._launchSlaves() self._readAlignmentInput() monitoringThread = threading.Thread(target=monitorSlaves, args=(self,)) monitoringThread.start() try: if options.doProfiling: cProfile.runctx("self._mainLoop()", globals=globals(), locals=locals(), filename=os.path.join(options.temporaryDirectory, "profile-main.out")) elif options.pdb: with ipdb.launch_ipdb_on_exception(): self._mainLoop() else: self._mainLoop() except BaseException as exc: msg = 'options={}'.format(pprint.pformat(vars(options))) logging.exception(msg) self.abortWork(repr(exc)) monitoringThread.join() if self._aborting: logging.error("Aborting") return -1 else: logging.info("Finished.") if options.doProfiling: self._printProfiles() # close h5 file. self._inAlnFile.close() return 0