def _match(insert_match, offset, insert_match_size, prob): # pylint disable=unused-argument if offset < self.min_adapter_overlap: # The reads are mostly overlapping, to the point where # there's not enough overhang to do a confident adapter # match. We return just the insert match to signal that # error correction can be done even though no adapter # trimming is required. return (insert_match, None, None) # TODO: this is very sensitive to the exact correct choice of # adapter. For example, if you specifiy GATCGGAA... and the correct # adapter is AGATCGGAA..., the prefixes will not match exactly and # the alignment will fail. We need to use a comparison that is a bit # more forgiving. a1_match = compare_prefixes(seq1[insert_match_size:], self.adapter1, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) a2_match = compare_prefixes(seq2[insert_match_size:], self.adapter2, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) adapter_len = min(offset, self.adapter1_len, self.adapter2_len) max_adapter_mismatches = round(adapter_len * self.max_adapter_mismatch_frac) if (a1_match[5] > max_adapter_mismatches and a2_match[5] > max_adapter_mismatches): return None a1_prob = self.match_probability(a1_match[4], adapter_len) a2_prob = self.match_probability(a2_match[4], adapter_len) if ((adapter_len > self.adapter_check_cutoff) and ((a1_prob * a2_prob) > self.adapter_max_rmp)): return None adapter_len1 = min(self.adapter1_len, len1 - insert_match_size) adapter_len2 = min(self.adapter2_len, len2 - insert_match_size) best_adapter_matches, best_adapter_mismatches = ( a1_match if a1_prob < a2_prob else a2_match)[4:6] return (insert_match, Match(0, adapter_len1, insert_match_size, len1, best_adapter_matches, best_adapter_mismatches), Match(0, adapter_len2, insert_match_size, len2, best_adapter_matches, best_adapter_mismatches))
def _adapter_match(insert_seq, adapter_seq, adapter_len): amatch = compare_prefixes(insert_seq[_insert_match_size:], adapter_seq, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards) alen = min(_offset, adapter_len) return amatch, alen, round(alen * self.max_adapter_mismatch_frac)
def compare_suffixes(suffix_ref, suffix_query, wildcard_ref=False, wildcard_query=False): """Find out whether one string is the suffix of the other one, allowing mismatches. Used to find an anchored 3' adapter when no indels are allowed. Args: suffix_ref, suffix_query: The suffices to compare. wildcard_ref, wildcard_query: Whether wildcards are valid in either of the suffices. """ suffix_ref = suffix_ref[::-1] suffix_query = suffix_query[::-1] _, length, _, _, matches, errors = compare_prefixes( suffix_ref, suffix_query, wildcard_ref, wildcard_query) return (len(suffix_ref) - length, len(suffix_ref), len(suffix_query) - length, len(suffix_query), matches, errors)