def _thread_worker(aln_reader, chunk_feeder, platform, results_queue, error_queue): try: while True: ctg_region = chunk_feeder.get_chunk() if ctg_region is None: break ctg_aln = aln_reader.get_alignments(ctg_region.ctg_id, ctg_region.start, ctg_region.end) ctg_id = ctg_region.ctg_id if len(ctg_aln) == 0: continue ctg_aln = aln_reader.trim_and_transpose(ctg_aln, ctg_region.start, ctg_region.end) ctg_aln, _mean_cov = get_uniform_alignments(ctg_aln) profile, aln_errors = _contig_profile(ctg_aln, platform) sequence = _flatten_profile(profile) results_queue.put((ctg_id, ctg_region.start, sequence, aln_errors)) except Exception as e: logger.error("Thread exception") logger.error(traceback.format_exc()) error_queue.put(e)
def _thread_worker(aln_reader, chunk_feeder, contigs_info, err_mode, results_queue, error_queue, bubbles_file, bubbles_file_lock): """ Will run in parallel """ try: while True: ctg_region = chunk_feeder.get_chunk() if ctg_region is None: break ctg_aln = aln_reader.get_alignments(ctg_region.ctg_id, ctg_region.start, ctg_region.end) ctg_id = ctg_region.ctg_id if len(ctg_aln) == 0: continue ref_seq = aln_reader.get_region_sequence(ctg_region.ctg_id, ctg_region.start, ctg_region.end) #since we are working with contig chunks, tranform alignment coorinates ctg_aln = aln_reader.trim_and_transpose(ctg_aln, ctg_region.start, ctg_region.end) ctg_aln, mean_cov = get_uniform_alignments(ctg_aln) profile, aln_errors = _compute_profile(ctg_aln, ref_seq) partition, num_long_bubbles = _get_partition(profile, err_mode) ctg_bubbles = _get_bubble_seqs(ctg_aln, profile, partition, ctg_id) ## coverage_cap = 0.9 * cfg.vals["max_read_coverage"] if mean_cov > coverage_cap: mean_cov = aln_reader.get_median_depth(ctg_region.ctg_id, ctg_region.start, ctg_region.end) ## ctg_bubbles, num_empty = _postprocess_bubbles(ctg_bubbles) ctg_bubbles, num_long_branch = _split_long_bubbles(ctg_bubbles) #transform coordinates back for b in ctg_bubbles: b.position += ctg_region.start with bubbles_file_lock: _output_bubbles(ctg_bubbles, open(bubbles_file, "a")) results_queue.put( (ctg_id, len(ctg_bubbles), num_long_bubbles, num_empty, num_long_branch, aln_errors, mean_cov)) del profile del ctg_bubbles except Exception as e: logger.error("Thread exception") logger.error(traceback.format_exc()) error_queue.put(e)
def _contig_profile(alignment, platform, genome_len): """ Computes alignment profile """ #leave the best uniform alignments alignment = get_uniform_alignments(alignment, genome_len) aln_errors = [] profile = [Profile() for _ in range(genome_len)] #max_aln_err = cfg.vals["err_modes"][platform]["max_aln_error"] for aln in alignment: #if aln.err_rate > max_aln_err: continue aln_errors.append(aln.err_rate) #after gap shifting it is possible that #two gaps are aligned against each other qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq) trg_seq = shift_gaps(qry_seq, aln.trg_seq) trg_pos = aln.trg_start for trg_nuc, qry_nuc in zip(trg_seq, qry_seq): if trg_nuc == "-": trg_pos -= 1 if trg_pos >= genome_len: trg_pos -= genome_len #total += 1 prof_elem = profile[trg_pos] if trg_nuc == "-" and qry_nuc != "-": prof_elem.insertions[aln.qry_id] += qry_nuc else: prof_elem.nucl = trg_nuc prof_elem.matches[qry_nuc] += 1 trg_pos += 1 #print "len", genome_len, "median coverage", cov_threshold #print "total bases: ", total, "discarded bases: ", discarded #print "filtered", float(discarded) / total #print "" return profile, aln_errors
def _thread_worker(aln_reader, contigs_info, err_mode, results_queue, error_queue, bubbles_file_handle, bubbles_file_lock): """ Will run in parallel """ try: aln_reader.init_reading() while not aln_reader.is_eof(): ctg_id, ctg_aln = aln_reader.get_chunk() if ctg_id is None: break #logger.debug("Processing {0}".format(ctg_id)) #get top unifom alignments ctg_aln = get_uniform_alignments(ctg_aln, contigs_info[ctg_id].length) profile, aln_errors = _compute_profile(ctg_aln, err_mode, contigs_info[ctg_id].length) partition, num_long_bubbles = _get_partition(profile, err_mode) ctg_bubbles = _get_bubble_seqs(ctg_aln, err_mode, profile, partition, contigs_info[ctg_id]) mean_cov = sum([len(b.branches) for b in ctg_bubbles]) // (len(ctg_bubbles) + 1) ctg_bubbles, num_empty, num_long_branch = \ _postprocess_bubbles(ctg_bubbles) results_queue.put( (ctg_id, len(ctg_bubbles), num_long_bubbles, num_empty, num_long_branch, aln_errors, mean_cov)) with bubbles_file_lock: _output_bubbles(ctg_bubbles, bubbles_file_handle) del profile del ctg_bubbles aln_reader.stop_reading() except Exception as e: error_queue.put(e)