def __init__(self, rapi_plugin_id, paired=True): self._plugin = pyrapi.load_aligner(rapi_plugin_id) self._opts = HiRapiOpts(self._plugin) self._plugin.init(self._opts._rapi_opts) self._batch = self._plugin.read_batch(2 if paired else 1) self._aligner = None self._ref = None self._qoffset = self.Qenc_Sanger
def __init__(self, rapi_plugin_id, paired=True, opts=None): self._plugin = pyrapi.load_aligner(rapi_plugin_id) self._opts = opts if opts else HiRapiOpts(self._plugin) self._plugin.init(self._opts._rapi_opts) self._batch = self._plugin.read_batch(2 if paired else 1) self._aligner = None self._ref = None self._qoffset = self.Qenc_Sanger
def main(argv=None): start_time = time.time() batch_size = 100000 # n reads assert batch_size % 2 == 0 options = parse_args(argv) plugin = pyrapi.load_aligner('rapi_bwa') opts = plugin.opts() #opts.share_ref_mem = True plugin.init(opts) opts.n_threads = options.nthreads _log.info("Using the %s aligner plugin, aligner version %s, plugin version %s", plugin.aligner_name(), plugin.aligner_version(), plugin.plugin_version()) _log.info("Number of threads: %s", opts.n_threads) pe = not options.se _log.info("Working in %s mode", 'paired-end' if pe else 'single-end') _log.info("Reading data in %s format", options.format) _log.info("Loading reference %s", options.ref) ref = plugin.ref(options.ref) _log.info("Reference loaded") batch = plugin.read_batch(2 if pe else 1) # allocate space for reads batch.reserve(100000 / batch.n_reads_per_frag) aligner = plugin.aligner(opts) # print SAM header print plugin.format_sam_hdr(ref) input_generator = create_input(options) def _load_batch(): batch.clear() _log.info('loading batch %s', batch_count) for reads in input_generator: read1 = reads[0] batch.append(read1['id'], read1['seq'], read1['q'], plugin.QENC_SANGER) if pe: read2 = reads[1] batch.append(read2['id'], read2['seq'], read2['q'], plugin.QENC_SANGER) if len(batch) >= batch_size: break # return whether or not the batch is empty return len(batch) != 0 def _process_batch(): _log.info("aligning...") aligner.align_reads(ref, batch) _log.info("finished aligning. Printing output") for idx in xrange(batch.n_fragments): sam = plugin.format_sam_from_batch(batch, idx) print sam done = False batch_count = 0 while not done: batch_count += 1 _log.info("Batch %s", batch_count) has_data = _load_batch() if has_data: _process_batch() else: done = True ref.unload() end_time = time.time() _log.info("Total runtime: %0.3f seconds", end_time - start_time)