def define_adapter_presence_substitutions_only(self, adapter, max_substitutions): """Sets the adapter_absent attribute according to whether a match is found with a number of substitutions less or equal to edit_threshold. :rtype : None Args: self, adapter, edit_threshold Returns: None Args: self, adapter, max_substitutions Returns: None """ # Solves the straightforward case where the adapter is exactly present in the read if adapter in self.sequence_line: self.adapter_present = True return # Otherwise, look for an approximate match of the adapter, less than max_substitutions different # First do a preliminary (hopefully faster) check whether the adapter is present as an approximate match in # the read sequence within a Levenshtein distance equal to the allowed number of substitutions # Two sequences less than N substitutions apart are automatically less than a N edits apart (the reciprocal # is not true). If the Levenshtein code is faster, then it will save a lot of time pre-filtering adapters # within the Levenshtein distance, which are much more likely to contain adapters. The slower code checking # the mismatch distance will then be called on these pre-filtered reads to confirm whether it is an actual # substituted match or if the Levensthein match involved insertions and deletions # If 1 or more approximate matches of the adapter were found within a Levenshtein distance of max_substitutions if len(fuzzysearch.find_near_matches(adapter, self.sequence_line, max_substitutions)): # scan the read sequence for a potential substituted match # for each subsequence of the sequence_line of length identical to the adapter (last starting position is # length of the adapter before the end of the read) for index in range(len(self.sequence_line) - len(adapter) + 1): # check if the adapter is less than max_substitutions away from the subsequence result = ApproxMatch.approx_substitute(adapter, self.sequence_line[index:index + len(adapter)], max_substitutions) # If a match was found (result is TRUE) if result: # set the adapter presence to True (and stop the function here) self.adapter_present = True return
def assign_to_unique_sample(self, sequenced_barcode): """Resolves the barcode to a unique sample, or returns False if the barcode is ambiguous. Args: self, sequenced_barcode Returns: The name of the sample matched to the barcode, or False if the barcode was ambiguous. """ # Temporary list to store the expected barcodes which approximately match the sequenced barcode matches = [] # For each expected barcode for expected in list(self.expected.keys()): # check if the sequenced barcode is an approximate match to each of them if ApproxMatch.approx_substitute(expected, sequenced_barcode, 1): matches.append(expected) # If the sequenced barcode is an approximate match of a unique expected barcode if len(matches) == 1: # return the corresponding sample name return self.expected[matches[0]] # If the sequenced barcode is an approximate match of more than one expected barcode, or not a match at all else: # return False return False