def test_dna_search(self): # see: http://stackoverflow.com/questions/19725127/ text = ''.join('''\ GACTAGCACTGTAGGGATAACAATTTCACACAGGTGGACAATTACATTGAAAATCACAGATTGGT CACACACACATTGGACATACATAGAAACACACACACATACATTAGATACGAACATAGAAACACAC ATTAGACGCGTACATAGACACAAACACATTGACAGGCAGTTCAGATGATGACGCCCGACTGATAC TCGCGTAGTCGTGGGAGGCAAGGCACACAGGGGATAGG '''.split()) pattern = 'TGCACTGTAGGGATAACAAT' matches = list(fnm_levenshtein_lp(pattern, text, max_l_dist=2)) self.assertTrue(len(matches) > 0) self.assertIn(Match(start=3, end=24, dist=1), matches)
def test_dna_search(self): # see: http://stackoverflow.com/questions/19725127/ text = ''.join('''\ GACTAGCACTGTAGGGATAACAATTTCACACAGGTGGACAATTACATTGAAAATCACAGATTGGT CACACACACATTGGACATACATAGAAACACACACACATACATTAGATACGAACATAGAAACACAC ATTAGACGCGTACATAGACACAAACACATTGACAGGCAGTTCAGATGATGACGCCCGACTGATAC TCGCGTAGTCGTGGGAGGCAAGGCACACAGGGGATAGG '''.split()) pattern = 'TGCACTGTAGGGATAACAAT' matches = list(fnm_levenshtein_lp(pattern, text, max_l_dist=2)) self.assertTrue(len(matches) > 0) self.assertIn(Match(start=3, end=24, dist=1), matches)
def test_missing_second_item(self): sequence = 'abcdefg' pattern = 'bde' matches = \ list(fnm_levenshtein_lp(pattern, sequence, max_l_dist=1)) self.assertIn(Match(start=1, end=5, dist=1, matched='bcde'), matches)
def search(self, subsequence, sequence, max_l_dist): return consolidate_overlapping_matches( fnm_levenshtein_lp(subsequence, sequence, max_l_dist) )
def test_double_first_item(self): sequence = 'abcddefg' pattern = 'def' matches = \ list(fnm_levenshtein_lp(pattern, sequence, max_l_dist=1)) self.assertIn(Match(start=4, end=7, dist=0, matched=pattern), matches)
def test_match_identical_sequence(self): matches = \ list(fnm_levenshtein_lp('PATTERN', 'PATTERN', max_l_dist=0)) self.assertEqual(matches, [Match(start=0, end=len('PATTERN'), dist=0, matched='PATTERN')])
def test_empty_subsequence_exeption(self): with self.assertRaises(ValueError): list(fnm_levenshtein_lp('', 'TEXT', max_l_dist=0))
def test_empty_sequence(self): self.assertEqual( list(fnm_levenshtein_lp('PATTERN', '', max_l_dist=0)), [], )
def search(self, subsequence, sequence, max_l_dist): return [ get_best_match_in_group(group) for group in group_matches( fnm_levenshtein_lp(subsequence, sequence, max_l_dist)) ]
def test_missing_second_item(self): sequence = 'abcdefg' pattern = 'bde' matches = \ list(fnm_levenshtein_lp(pattern, sequence, max_l_dist=1)) self.assertIn(Match(start=1, end=5, dist=1), matches)
def search(self, subsequence, sequence, max_l_dist): return consolidate_overlapping_matches( fnm_levenshtein_lp(subsequence, sequence, max_l_dist) )
def test_double_first_item(self): sequence = 'abcddefg' pattern = 'def' matches = \ list(fnm_levenshtein_lp(pattern, sequence, max_l_dist=1)) self.assertIn(Match(start=4, end=7, dist=0), matches)
def test_match_identical_sequence(self): matches = \ list(fnm_levenshtein_lp('PATTERN', 'PATTERN', max_l_dist=0)) self.assertEqual(matches, [Match(start=0, end=len('PATTERN'), dist=0)])
def test_empty_subsequence_exeption(self): with self.assertRaises(ValueError): list(fnm_levenshtein_lp('', 'TEXT', max_l_dist=0))
def test_empty_sequence(self): self.assertEqual( list(fnm_levenshtein_lp('PATTERN', '', max_l_dist=0)), [], )