def test_list_built(self): bwt = BWT([1, 2]) self.assertEqual(bwt.table[0], [BWT.END, 2, 1]) self.assertEqual(bwt.transform(), [1, 2, BWT.END]) self.assertEqual(bwt.C(1), 1) # There is one 1 in the string self.assertEqual(bwt.F(1, 1), 1) # There is one 1 in the last one characters
def test_alignment(self): bwt = BWT(list("banana")) # bwt.print_table() string = "ana" # This can be a string, because it supports random access and slicing. self.assertEqual(bwt.L(string), 3) self.assertEqual(bwt.U(string), 4) self.assertEqual(sorted(bwt.get_start_indices(string)), [3,5]) # return is index of last token in match
class MarkovModel: def __init__(self, string): self.string = string self.bwt = BWT(self.string) def get_n_tokens(self, context, n): ''' Returns up to n tokens that follow the list of tokens given in context in the source string, or None if no such tokens exist. ''' indices = self.bwt.get_start_indices(context) if indices == []: return None index = choice(indices) return self.get_n_gram_at_index(n, index) def get_n_gram_at_index(self, n, index): return self.string[index+1:min(index+1+n ,len(self.string)-1)] def get_all_possible_n_grams(self, context, n): indices = self.bwt.get_start_indices(context) return (self.get_n_gram_at_index(n, index) for index in indices)
class ApproximateMatcher: def __init__(self, target): self._text = target + '$' self._bwt = BWT(self._text) # return indices in target that contain # matches of string pattern with up to d # mismatches def get_matches(self, pattern, d): # initialze seed and check object seed_checker = SeedChecker(pattern, d) # for each seed k-mer in pattern for seed, seed_index in seed_checker.enumerate(): # find exact matches of seed using BWT indices = self._bwt.get_matches(seed) # add candidate approximate matches based on # seed exact matches seed_checker.add_candidates(indices, seed_index) # verify that candidate approximate matches are within # minimum edit distance, and return final matches matches = seed_checker.filter_candidates(self._text) return matches
class ApproximateMatcher: def __init__(self, target): self._text = target + "$" self._bwt = BWT(self._text) # return indices in target that contain # matches of string pattern with up to d # mismatches def get_matches(self, pattern, d): # initialze seed and check object seed_checker = SeedChecker(pattern, d) # for each seed k-mer in pattern for seed, seed_index in seed_checker.enumerate(): # find exact matches of seed using BWT indices = self._bwt.get_matches(seed) # add candidate approximate matches based on # seed exact matches seed_checker.add_candidates(indices, seed_index) # verify that candidate approximate matches are within # minimum edit distance, and return final matches matches = seed_checker.filter_candidates(self._text) return matches
def __init__(self, string): self.string = string self.bwt = BWT(self.string)
def __init__(self, target): self._text = target + '$' self._bwt = BWT(self._text)
def __init__(self, target): self._text = target + "$" self._bwt = BWT(self._text)