def _contig_profile(alignment, platform, genome_len): """ Computes alignment profile """ max_aln_err = cfg.vals["err_modes"][platform]["max_aln_error"] aln_errors = [] profile = [Profile() for _ in xrange(genome_len)] for aln in alignment: #if aln.err_rate > max_aln_err: continue aln_errors.append(aln.err_rate) #after gap shifting it is possible that #two gaps are aligned against each other qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq) trg_seq = shift_gaps(qry_seq, aln.trg_seq) trg_pos = aln.trg_start for trg_nuc, qry_nuc in izip(trg_seq, qry_seq): if trg_nuc == "-": trg_pos -= 1 if trg_pos >= genome_len: trg_pos -= genome_len prof_elem = profile[trg_pos] if trg_nuc == "-" and qry_nuc != "-": prof_elem.insertions[aln.qry_id] += qry_nuc else: prof_elem.nucl = trg_nuc prof_elem.matches[qry_nuc] += 1 trg_pos += 1 return profile, aln_errors
def _contig_profile(alignment, platform, genome_len): """ Computes alignment profile """ #max_aln_err = config.vals["err_modes"][platform]["max_aln_error"] aln_errors = [] profile = [Profile() for _ in xrange(genome_len)] for aln in alignment: #if aln.err_rate > max_aln_err: continue aln_errors.append(aln.err_rate) qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq) trg_seq = shift_gaps(qry_seq, aln.trg_seq) #qry_seq = aln.qry_seq #trg_seq = aln.trg_seq trg_pos = aln.trg_start for trg_nuc, qry_nuc in izip(trg_seq, qry_seq): if trg_nuc == "-": trg_pos -= 1 if trg_pos >= genome_len: trg_pos -= genome_len prof_elem = profile[trg_pos] if trg_nuc == "-": prof_elem.insertions[qry_nuc] += 1 else: prof_elem.nucl = trg_nuc prof_elem.matches[qry_nuc] += 1 trg_pos += 1 return profile, aln_errors
def _contig_profile(alignment, platform): """ Computes alignment profile """ if not alignment: return [] genome_len = alignment[0].trg_len aln_errors = [] profile = [Profile() for _ in range(genome_len)] #max_aln_err = cfg.vals["err_modes"][platform]["max_aln_error"] for aln in alignment: #if aln.err_rate > max_aln_err: continue aln_errors.append(aln.err_rate) #after gap shifting it is possible that #two gaps are aligned against each other qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq) trg_seq = shift_gaps(qry_seq, aln.trg_seq) trg_pos = aln.trg_start for trg_nuc, qry_nuc in zip(trg_seq, qry_seq): if trg_nuc == "-": trg_pos -= 1 if trg_pos >= genome_len: trg_pos -= genome_len #total += 1 prof_elem = profile[trg_pos] if trg_nuc == "-" and qry_nuc != "-": prof_elem.insertions[aln.qry_id] += qry_nuc else: prof_elem.nucl = trg_nuc prof_elem.matches[qry_nuc] += 1 trg_pos += 1 #print "len", genome_len, "median coverage", cov_threshold #print "total bases: ", total, "discarded bases: ", discarded #print "filtered", float(discarded) / total #print "" return profile, aln_errors
def _compute_profile(alignment, platform, genome_len): """ Computes alignment profile """ max_aln_err = cfg.vals["err_modes"][platform]["max_aln_error"] min_aln_len = cfg.vals["min_polish_aln_len"] aln_errors = [] #filtered = 0 profile = [ProfileInfo() for _ in range(genome_len)] for aln in alignment: if aln.err_rate > max_aln_err or len(aln.qry_seq) < min_aln_len: #filtered += 1 continue aln_errors.append(aln.err_rate) qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq) trg_seq = shift_gaps(qry_seq, aln.trg_seq) trg_pos = aln.trg_start for trg_nuc, qry_nuc in zip(trg_seq, qry_seq): if trg_nuc == "-": trg_pos -= 1 if trg_pos >= genome_len: trg_pos -= genome_len prof_elem = profile[trg_pos] if trg_nuc == "-": prof_elem.num_inserts += 1 else: prof_elem.nucl = trg_nuc prof_elem.coverage += 1 if qry_nuc == "-": prof_elem.num_deletions += 1 elif trg_nuc != qry_nuc: prof_elem.num_missmatch += 1 trg_pos += 1 #logger.debug("Filtered: {0} out of {1}".format(filtered, len(alignment))) return profile, aln_errors
def _compute_profile(alignment, ref_sequence): """ Computes alignment profile """ if len(alignment) == 0: raise Exception("No alignmemnts!") genome_len = alignment[0].trg_len #max_aln_err = cfg.vals["err_modes"][platform]["max_aln_error"] min_aln_len = cfg.vals["min_polish_aln_len"] aln_errors = [] #filtered = 0 profile = [ProfileInfo() for _ in range(genome_len)] for i in range(genome_len): profile[i].nucl = ref_sequence[i] for aln in alignment: #if aln.err_rate > max_aln_err or len(aln.qry_seq) < min_aln_len: if len(aln.qry_seq) < min_aln_len: #filtered += 1 continue aln_errors.append(aln.err_rate) qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq) trg_seq = shift_gaps(qry_seq, aln.trg_seq) trg_pos = aln.trg_start for trg_nuc, qry_nuc in zip(trg_seq, qry_seq): if trg_nuc == "-": trg_pos -= 1 #if trg_pos >= genome_len: # trg_pos -= genome_len prof_elem = profile[trg_pos] if trg_nuc == "-": prof_elem.insertions[aln.qry_id] += qry_nuc #prof_elem.num_inserts += 1 else: #prof_elem.nucl = trg_nuc prof_elem.coverage += 1 if qry_nuc == "-": prof_elem.num_deletions += 1 elif trg_nuc != qry_nuc: prof_elem.num_missmatch += 1 trg_pos += 1 for i in range(genome_len): for ins_read, ins_str in profile[i].insertions.items(): profile[i].propagated_ins += 1 span = len(ins_str) for j in range(max(0, i - span), i): profile[j].propagated_ins += 1 for j in range(i + 1, min(i + span + 1, genome_len)): profile[j].propagated_ins += 1 #logger.debug("Filtered: {0} out of {1}".format(filtered, len(alignment))) return profile, aln_errors