def test_createReferenceWindows(self): bamFile = os.path.join(big_data_dir, "Hpyl_1_5000.bam") ds = AlignmentSet(bamFile, referenceFastaFname=None) refInfoTable = ds.referenceInfoTable windows = ReferenceUtils.createReferenceWindows(refInfoTable) self.assertEqual(len(windows), 1) w = windows[0] self.assertEqual(w.refId, 0) self.assertEqual(w.refName, 'gi|12057207|gb|AE001439.1|') self.assertEqual(w.start, 0) self.assertEqual(w.end, 1643831)
def test_createReferenceWindows (self): bamFile = os.path.join(big_data_dir, "Hpyl_1_5000.bam") ds = AlignmentSet(bamFile, referenceFastaFname=None) refInfoTable = ds.referenceInfoTable windows = ReferenceUtils.createReferenceWindows(refInfoTable) self.assertEqual(len(windows), 1) w = windows[0] self.assertEqual(w.refId, 0) self.assertEqual(w.refName, 'gi|12057207|gb|AE001439.1|') self.assertEqual(w.start, 0) self.assertEqual(w.end, 1643831)
def _mainLoop(self): """ Main loop First launch the worker and writer processes Then we loop over ReferenceGroups in the cmp.h5. For each contig we will: 1. Load the sequence into the main memory of the parent process 3. Chunk up the contig and submit the chunk descriptions to the work queue Finally, wait for the writer process to finish. """ # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. #gc.disable() self.loadSharedAlignmentSet(self.args.alignment_set) # Resolve the windows that will be visited. if self.args.referenceWindowsAsString is not None: self.referenceWindows = [] for s in self.args.referenceWindowsAsString.split(","): try: win = ReferenceUtils.parseReferenceWindow(s, self.alignments.referenceInfo) self.referenceWindows.append(win) except: if self.args.skipUnrecognizedContigs: continue else: raise Exception, "Unrecognized contig!" elif self.args.referenceWindowsFromAlignment: self.referenceWindows = ReferenceUtils.referenceWindowsFromAlignment(self.alignments, self.alignments.referenceInfo) refNames = set([rw.refName for rw in self.referenceWindows]) # limit output to contigs that overlap with reference windows self.refInfo = [r for r in self.refInfo if r.Name in refNames] else: self.referenceWindows = ReferenceUtils.createReferenceWindows( self.refInfo) # Load reference and IpdModel self.loadReferenceAndModel(self.args.reference) # Spawn workers self._launchSlaveProcesses() logging.info('Generating kinetics summary for [%s]' % self.args.alignment_set) #self.referenceMap = self.alignments['/RefGroup'].asDict('RefInfoID', 'ID') #self.alnInfo = self.alignments['/AlnInfo'].asRecArray() # Main loop -- we loop over ReferenceGroups in the cmp.h5. For each contig we will: # 1. Load the sequence into the main memory of the parent process # 2. Fork the workers # 3. chunk up the contig and self.workChunkCounter = 0 # Iterate over references for window in self.referenceWindows: logging.info('Processing window/contig: %s' % (window,)) for chunk in ReferenceUtils.enumerateChunks(self.args.referenceStride, window): self._workQueue.put((self.workChunkCounter, chunk)) self.workChunkCounter += 1 # Shutdown worker threads with None sentinels for i in xrange(self.args.numWorkers): self._workQueue.put(None) for w in self._workers: w.join() # Join on the result queue and the resultsCollector process. # This ensures all the results are written before shutdown. self.monitoringThread.join() self._resultsQueue.join() self._resultCollectorProcess.join() logging.info("ipdSummary.py finished. Exiting.") self.alignments.close() return 0
def _mainLoop(self): """ Main loop First launch the worker and writer processes Then we loop over ReferenceGroups in the cmp.h5. For each contig we will: 1. Load the sequence into the main memory of the parent process 3. Chunk up the contig and submit the chunk descriptions to the work queue Finally, wait for the writer process to finish. """ # This looks scary but it's not. Python uses reference # counting and has a secondary, optional garbage collector for # collecting garbage cycles. Unfortunately when a cyclic GC # happens when a thread is calling cPickle.dumps, the # interpreter crashes sometimes. See Bug 19704. Since we # don't leak garbage cycles, disabling the cyclic GC is # essentially harmless. #gc.disable() self.loadSharedAlignmentSet(self.args.alignment_set) # Resolve the windows that will be visited. if self.args.referenceWindowsAsString is not None: self.referenceWindows = [] for s in self.args.referenceWindowsAsString.split(","): try: win = ReferenceUtils.parseReferenceWindow( s, self.alignments.referenceInfo) self.referenceWindows.append(win) except: if self.args.skipUnrecognizedContigs: continue else: raise Exception("Unrecognized contig!") elif self.args.referenceWindowsFromAlignment: self.referenceWindows = ReferenceUtils.referenceWindowsFromAlignment( self.alignments, self.alignments.referenceInfo) refNames = set([rw.refName for rw in self.referenceWindows]) # limit output to contigs that overlap with reference windows self.refInfo = [r for r in self.refInfo if r.Name in refNames] else: self.referenceWindows = ReferenceUtils.createReferenceWindows( self.refInfo) # Load reference and IpdModel ipdModelFilename = basic.getIpdModelFilename( self.args.ipdModel, ReferenceUtils.loadAlignmentChemistry(self.alignments), self.args.paramsPath) self.loadReferenceAndModel(self.args.reference, ipdModelFilename) # Spawn workers self._launchSlaveProcesses() logging.info('Generating kinetics summary for [%s]' % self.args.alignment_set) #self.referenceMap = self.alignments['/RefGroup'].asDict('RefInfoID', 'ID') #self.alnInfo = self.alignments['/AlnInfo'].asRecArray() # Main loop -- we loop over ReferenceGroups in the cmp.h5. For each contig we will: # 1. Load the sequence into the main memory of the parent process # 2. Fork the workers # 3. chunk up the contig and self.workChunkCounter = 0 # Iterate over references for window in self.referenceWindows: logging.info('Processing window/contig: %s' % (window, )) for chunk in ReferenceUtils.enumerateChunks( self.args.referenceStride, window): self._workQueue.put((self.workChunkCounter, chunk)) self.workChunkCounter += 1 # Shutdown worker threads with None sentinels for i in xrange(self.args.numWorkers): self._workQueue.put(None) for w in self._workers: w.join() # Join on the result queue and the resultsCollector process. # This ensures all the results are written before shutdown. self.monitoringThread.join() self._resultsQueue.join() self._resultCollectorProcess.join() logging.info("ipdSummary.py finished. Exiting.") self.alignments.close() return 0